examples/datasets/hh-rlhf-helpful-base.py,sha256=C4PNdI4xg4gcdp4C4xBPdnwU5xnmL8Bjd_mh42qp3VQ,5408
examples/datasets/lm-human-preferences-descriptiveness.py,sha256=11GZ7x2lBSF2Jz_4r1gIBtBztFpwyAuQIEcFCU7a9Sg,4943
examples/datasets/lm-human-preferences-sentiment.py,sha256=IzXYqJrkDJk2BPIgXLAoff7yMsFZSEYSleAfvm59IZg,4621
examples/datasets/math_shepherd.py,sha256=2CqbxaJRghO6NG6y4hyOs5mdyDbDGkw4kBDCFH6JWvo,6561
examples/datasets/prm800k.py,sha256=9mv6Kl7X-ZMtILPltOqRK_a4b1la6si-VdoRs8cTFXs,6078
examples/datasets/rlaif-v.py,sha256=elX5GJFNCK4KCknhmCZQ7T0zGU8P-NADY_1E1n7e1zo,4642
examples/datasets/tldr.py,sha256=jJV5_-gqhLAJ4_mreEEkO_fixiPZIkZoqujCN8dmdlo,4294
examples/datasets/tldr_preference.py,sha256=2IYpJF_ioSHBeeF97iztJ3MjpoG2UUaShE6P80ildkM,4458
examples/datasets/ultrafeedback-prompt.py,sha256=Nqh5gSD1-VTkA6o1-l58IUDFozvBEM8qK6S2RJlcreU,3531
examples/datasets/ultrafeedback.py,sha256=AeKecokY4tNGzGyhaODxUrcLRpx47KSJHEVeQoe2ujQ,5527
examples/research_projects/layer_skip/scripts/benchmark_layer_skip.py,sha256=VgPMvXr_KPMJ_2WgKFoATnA_nVCaBHSl9ezJyy4MLjs,2588
examples/research_projects/layer_skip/scripts/config.py,sha256=9U7jhkjG6n8HPFvNz_xRHzUEqwrHcnRBHmFqtCc32_E,1032
examples/research_projects/layer_skip/scripts/custom_trainer.py,sha256=ptZrlvWCAJMgwxYEkWL807JIqcc5KezbTKYP6X3ZFmA,2066
examples/research_projects/layer_skip/scripts/layer_skip_sft.py,sha256=GN2Ul35n0SOon7iRoTumE6amgizZq_i_oYRnbglVdMQ,3407
examples/research_projects/stack_llama/scripts/merge_peft_adapter.py,sha256=xlhCG9LGYg1QUbm5oal-8nvZxmm1JIP1lRakU3m7fMw,2610
examples/research_projects/stack_llama/scripts/reward_modeling.py,sha256=JTWWI-SmiC5TDT_-U1hLTU686IvpsLla5u4fRYAhG4A,11868
examples/research_projects/stack_llama/scripts/rl_training.py,sha256=5kQ37v9PyQwvThBRPVntIOZVd3-a0wtj5lwOpiZs0Gs,10313
examples/research_projects/stack_llama/scripts/supervised_finetuning.py,sha256=40wQBDd-hZcxGQwnbtHEMSBdXDfv-BAuSxoQhK7z5LU,7727
examples/research_projects/stack_llama_2/scripts/dpo_llama2.py,sha256=xdRbuUQroxzpLGQyx-Mw7sQmwrcGmZ3cVBDFZRiml2g,9958
examples/research_projects/stack_llama_2/scripts/sft_llama2.py,sha256=9-D51uZyHK9tQzVHpbGLiDbHxH8bXRtqO_4sunMZGnI,7725
examples/research_projects/toxicity/scripts/evaluate-toxicity.py,sha256=AR1O23hNQySxM_rd5sdnJYNULoCO-EnH45tnEkd_rjs,5442
examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py,sha256=UVc17Jvb4yepT7nxCyNuBlZHqNCXxg_76mKyWgfE2pA,9129
examples/scripts/alignprop.py,sha256=dodf3-GLZ96KnnmbWmo12uwxKOP3bQzGjVfM_eCM8TU,5263
examples/scripts/bco.py,sha256=e2LUPUIddvXbiT_EmSE2LyUAlvu5edzdBG7ZUtofHCQ,5982
examples/scripts/cpo.py,sha256=c5WIU1Qe5T8blqEswdwyW20XT0QcGFqg2dh01zcMG88,3582
examples/scripts/ddpo.py,sha256=PlnAwgLSh1CoeOupo4PVpzcDs1hINQU2fJONNjBjC_I,7701
examples/scripts/dpo.py,sha256=AViXy3Gjt9XOr1WFihtCNavsRvzMk_A1g-NRx6jEqY0,900
examples/scripts/dpo_online.py,sha256=6qbiJbz6ZT7UODjR-oqJcIknCqqR8c4-_BWxKjmK-Lo,5470
examples/scripts/dpo_vlm.py,sha256=XV3eG9lJi_1HSR96hHy9kewwaF4cuNLmqKPjvO4CBjY,5841
examples/scripts/gkd.py,sha256=mCQx--3_ok_EzUw9Y9SuJuIX-YgAxAnjraVC-YoiiXc,4699
examples/scripts/grpo_vlm.py,sha256=y7NCo5DjWAQRLvslYXSuGH7hXkccdokNZOfBeOH_Rsc,7164
examples/scripts/kto.py,sha256=5oCRyDvN4UeA8JpjNL60iZOmD5akZELgCF9Sql-tjBk,3776
examples/scripts/nash_md.py,sha256=vmnQEbPqv_aHpn1HBesUelTpcSjnCgXbZmd8yHF_xi0,5318
examples/scripts/orpo.py,sha256=wZ3C0CZiR6XQxeSR4Gd7q-_xeABIfFDGdJhvCb-qY3I,3671
examples/scripts/prm.py,sha256=RNjb-7KJuBGj9PM2wgXGMKLfSrwTdjbGG4RfAKzC4XU,4462
examples/scripts/reward_modeling.py,sha256=aHjn9YlWug5vIOxYkhTHK42d0tHtZU8Nl0h-BOERKkw,4809
examples/scripts/sft.py,sha256=C1cJ0H9PoSFHi9p7RJB3GjWzRRyCog-I2gQQyIINslY,900
examples/scripts/sft_gemma3.py,sha256=PyY86sm9R_Z66mzriJAIxmJGV3OhSuHPI6IBsRQRx7I,2003
examples/scripts/sft_video_llm.py,sha256=si7EgVVBB_MFR2P__Q3ITt_ACm3cpmXth8XTRTeNvhY,8447
examples/scripts/sft_vlm.py,sha256=KsUF3oZZalJnUx1uJVFzZIqDtU_rNMXo08oNHhcgRH8,5084
examples/scripts/sft_vlm_gemma3.py,sha256=o9f_hRK-leovETTHRg_zwR3hAeVugBAYia6u0bqMwnM,8513
examples/scripts/sft_vlm_smol_vlm.py,sha256=X4t8x-O3aOEhBUhmk_bOXSz7xB8ydX1pCLRaFPiUI0s,5495
examples/scripts/xpo.py,sha256=fJjeLia3x1LiFo-2lQEM4AQNHUFehkBvY-b5CmtTZ6g,4747
examples/scripts/evals/judge_tldr.py,sha256=jI1chWPAIH0PHtSzwhHMvs0bpMmXR12J1Qw4b0ur5Ng,4077
examples/scripts/ppo/ppo.py,sha256=DuobzAhPc3aKDS-n8xCcJISDu4v3WdQNSw6rQjb4im8,6108
examples/scripts/ppo/ppo_tldr.py,sha256=RwuJfGHTe4ilEHZbU206DNUQveU2aKiSUH4vaj21FzM,6822
examples/scripts/rloo/rloo.py,sha256=_IyAzq8x1aevXitU9MoTFLhX_Np410R1B5OchARn1nk,5187
examples/scripts/rloo/rloo_tldr.py,sha256=lzZPY8hSi8TFOHUbtIIvKKo3tMjejGO-DJcH8Yw4Y9c,5695
scripts/add_copyrights.py,sha256=rWy8u4AX7z7fEzkImMLRbyGgu1gHqNIbxWPJLr0W_pQ,3360
scripts/generate_tiny_models.py,sha256=lM5Aq4OERsi-mYux22fDXY9gKRTii0TB19UAIGcU6IA,10460
scripts/generate_toolcall_dataset.py,sha256=KaqZXp1XFznnogifGhukbKzFAnIiLeM6Xi_e_3fqffY,9760
scripts/generate_zen_dataset.py,sha256=xXcK8fPqn1tKHnjCr1qlXEzjdLaaDdB71xlBq0Isl18,37892
scripts/generate_zen_image_dataset.py,sha256=bZAbuxYGeSe-gpI_-HZVXQL7_zeMED4HsCWsgIlS0cQ,41543
scripts/log_example_reports.py,sha256=mHmbflXohXONZD58iJJ_r5RKuTEdqHw4git54hYELEg,5591
scripts/log_reports.py,sha256=LrKQ6maOyZdOsNLx-l6QPDqc_eEovujEJ0LuZTFhDAg,5730
trl/__init__.py,sha256=h43K623bMdTLgs64NGUHsQgqw942iOACTA4aR0qg0_I,6121
trl/cli.py,sha256=W0ya7EWVSIO9e0ByAIKb_AzW4cv3WQU9OqDpVN2dqC8,6580
trl/core.py,sha256=vHZbFc_VxuLicw8fYiLRrvJbKlqy60YuJ1_ffMeOf1I,5978
trl/data_utils.py,sha256=uUYsQdmV7eQyJpTVGh5SCIfYPZBy5XAhrHg8X5zUcIc,31086
trl/import_utils.py,sha256=j4HAJbf3WKntxH2qbhBzz0stjEpGYQdcFzFjp4pimao,5405
trl/mergekit_utils.py,sha256=4vmWYpwFjU2VJQBye2WGC680spqBvFbiqy6IYPGikAw,11069
trl/accelerate_configs/fsdp1.yaml,sha256=FBw8i6diLwfcUI6Uu7T-UTpd7niY062tLsCFMLoT0c4,725
trl/accelerate_configs/fsdp2.yaml,sha256=zg1L3kVDJb7kvuxIPaqMN6mZQFD2mvG8IxSRmkY7dDk,627
trl/accelerate_configs/multi_gpu.yaml,sha256=yDX-SPo8qY3SW8jsf_53VJX1GwIV20wmRPBbCKf4_8I,321
trl/accelerate_configs/single_gpu.yaml,sha256=4MuOZIjHNir2adOzaoOgBwD9IXXRbPNPzBYVW-zkc7I,316
trl/accelerate_configs/zero1.yaml,sha256=fyUlyzQ_nhP6oamOgOBM7MDZMuHvG4xZfm_-2wnuYTo,441
trl/accelerate_configs/zero2.yaml,sha256=kW58rFIjO1Jz0-TWjg2zUHO5BUBHI4TPFIUHb2M-WiQ,470
trl/accelerate_configs/zero3.yaml,sha256=VN3HDSxEn_WCvYpML4thWO7cpr2N39VwAJjMvPtwfxE,498
trl/environment/__init__.py,sha256=JANVP5oKaZE9xPXVm-ikSEr4-11zIjiLY7M6K_VjjR4,989
trl/environment/base_environment.py,sha256=KM0NnnBClOADdJHDRao-WmKo4J-Aez44S9cDxricQr8,18265
trl/extras/__init__.py,sha256=5CIPkzvP9D6vpnWLNZz23RNU98MfjeBsxBFMHXKM09U,961
trl/extras/best_of_n_sampler.py,sha256=pke-6-dqnLMfFA5lVBB5_Iq6Z9varKFwFBhBqnjlsFQ,5802
trl/extras/dataset_formatting.py,sha256=4j1ZO_oPMiql6c0BzKXwNmijtJiFvT5WQ0SGUCQWCrY,4796
trl/extras/profiling.py,sha256=txitznUe6ISatuwTGzcbzkUzngEiouA2bCzNKJ77n-w,3258
trl/extras/vllm_client.py,sha256=Ibp0zkfP8PjHKWIFCxNguQt7anxEFT1wGO3_iD9nzEM,15266
trl/models/__init__.py,sha256=yBbzfOBH_aplrIQGFDAXHYom66hD8wAX0WmRGtziw9I,2602
trl/models/activation_offloading.py,sha256=vuVp3K4mT3KW2aNaFGUTDtEGlY4QF4PN8mCqQRxjz6Q,23556
trl/models/auxiliary_modules.py,sha256=2Qamjf5g8XrK6Xkz3iwgtL_NNBLND_dBM9P28OeVe3I,3344
trl/models/modeling_base.py,sha256=2Eb-bBnz65X2O6AfWiW3SICV0K5jlfIBi_o1k2q3fAQ,30786
trl/models/modeling_sd_base.py,sha256=mZLwNK7qZazr_BvwpG7WGu2mhZQ8inbiGTG6ZsxHwCg,42298
trl/models/modeling_value_head.py,sha256=Uj_vVHfJX3NzvlQ_KKwrqsVa2gQ2d7W6_bc3VsC3GHY,18833
trl/models/sd_utils.py,sha256=nnqSZL0eSJeX2RbceUhpWQdAz2L4cSuGWt_XEqvjBjs,5879
trl/models/utils.py,sha256=uyJu7y2WtjqMbIO7hjpikcB0xFe_TU4TfZok8VmhKgo,19813
trl/rewards/__init__.py,sha256=9qZueWsDNBlJpanpOoKo-eIExJK2hKFHWiwXDyjogtQ,951
trl/rewards/format_rewards.py,sha256=fCQhtw6lazljB5Kv20ZAaD5XraoxKqVDBvqbscmldYE,2211
trl/scripts/__init__.py,sha256=V7eYI1BLoXW4VVJP7Qe0avdLaRK-kVaiXEG4QVlP9xc,1003
trl/scripts/dpo.py,sha256=zG0LUiOyZJlaZ5jKYDRmtKklYoXadoU0u0bTi_UtDvM,5314
trl/scripts/env.py,sha256=b2AblbPA91lL54MxpW3NNw-1sTKa6QDSCPsheHr3gbY,3681
trl/scripts/grpo.py,sha256=_0EeADyZjzg8vqLaJa9f7PYNNpUI2mQ8xxUkDuH9GRQ,5280
trl/scripts/kto.py,sha256=nnlXMmeTWI3dSpEMMDnsgV9w0mHE53DiyUqnLibQAUI,4193
trl/scripts/sft.py,sha256=9OOrnB3rmpjYf4KdaUi7kLK59Rk_SSxTFYJZA-Sx_YE,5387
trl/scripts/utils.py,sha256=SRNLoSfJgjWT7LCbYuHI6unzKPbjoC0X-IBV4nwr2NQ,11295
trl/scripts/vllm_serve.py,sha256=Jdar8J-XrE7r_gBNKXCm5QLseGUw9JFCrtWS4RPWssM,28291
trl/templates/lm_model_card.md,sha256=zq0dXorqEOnjHJ8wxwhI3LS2R6qiHmlcgPcVzliwRmc,2158
trl/trainer/__init__.py,sha256=lRiWrBIJ7O18HbIahBjHy8urSxN3k-VDYn8OI7INPV0,5429
trl/trainer/alignprop_config.py,sha256=jLsZIkPRpyKZspVsk7gcn0JS7Mg2mCrccl-slpwOpIU,10196
trl/trainer/alignprop_trainer.py,sha256=bpttR2WTQKAyT6xgnex44-lu3Fbcq7iIev6Ki6WPvjs,19023
trl/trainer/bco_config.py,sha256=iHdh9qHIi-fA3tq-WVYTBsTZLMUEXKYSOR0EggQhBzE,9900
trl/trainer/bco_trainer.py,sha256=qLd377-HaoM00HJ-uFRW6ws7ekPNb4Sc6hn3SOKCu6U,71588
trl/trainer/callbacks.py,sha256=yWJQe5XthIaLGTB-E1bRivIVHIBs-D5zQairb7rW_0Y,24571
trl/trainer/cpo_config.py,sha256=fFYAaG7clIH5KU4c-2Bmq7AizMJ92toAsWdAcx8N0eA,9051
trl/trainer/cpo_trainer.py,sha256=A4R1Q3gHmHUuv0KeXAZO6z9NXNspDiioEcafZ6tUcTU,51705
trl/trainer/ddpo_config.py,sha256=1qL0nWhpEYFel7PUpaKXVWfc0PHvrYWiTcSiEc1rwdo,12241
trl/trainer/ddpo_trainer.py,sha256=Zfc_RqqVtHoGDtG884pdtaa9AzFRPxP5CKfoIlWt2jE,28731
trl/trainer/dpo_config.py,sha256=G-cf6M7IKdKzsRlbAAodygztZYsAtA0wev0ZZQpoAB0,24750
trl/trainer/dpo_trainer.py,sha256=1kqMJPi63vkHQFmKihqRoIPXqL1pNjkUko1kB_ujzIY,100243
trl/trainer/gkd_config.py,sha256=jBTXHa8xxFlpIKbYIDmwr69xpAzwiFs2DOCl3era878,4949
trl/trainer/gkd_trainer.py,sha256=w9ZdtC7B2BUZ6sRmpKsCOPyp8bc2vlmV2GvSDEivfEs,15260
trl/trainer/grpo_config.py,sha256=9llrwn16Y10rFyfWvxCoJk_w7nxzdeC_j0vdrtCFuD0,35129
trl/trainer/grpo_trainer.py,sha256=bJLQfTVsUX6XsXtjokyg8Akw-4cFM7UHiTB5s44vT44,106615
trl/trainer/iterative_sft_config.py,sha256=WFF1DuSp3D5GDp8Gk1blSAXl88ZY_-TqM9CoDU5Frtw,4333
trl/trainer/iterative_sft_trainer.py,sha256=9sSBif07Ouu8RXAIwGKjee0Jd2sSEtoE2zD2SiDMERU,21212
trl/trainer/judges.py,sha256=NZkSr0LBU858iyksX56uMq41iKTL2VcSGruEIZAz8sA,19570
trl/trainer/kto_config.py,sha256=mtCG0dF_iPhL7rNEP_ffa_mtd3GKsJn1MtpZCrvk4Tw,11205
trl/trainer/kto_trainer.py,sha256=C9wCh8-sg2OdNCQFsYVyDtlP62sgMdjZFmtkJR3dI5U,82344
trl/trainer/model_config.py,sha256=XMJkyXhifzXqd1k7-GXGorQqQPbRkrPTEAx2LAhnJBU,8660
trl/trainer/nash_md_config.py,sha256=T3004FbQP7A9mir_I-XvW9GkzoykRL4U_bMg52NLVFE,1844
trl/trainer/nash_md_trainer.py,sha256=SOy4ad3UTYBOXIcF6pXlmZaCK819pC6grfU1BtkgOV4,24151
trl/trainer/online_dpo_config.py,sha256=sLw7b6mX4IdUFD4uQYc5WtEXKN5eBfEHQOBU5UUq_sU,9425
trl/trainer/online_dpo_trainer.py,sha256=icIClZZdn4hekTpNQgX1Avu0pv3Dg530vUOFO3vaje8,40306
trl/trainer/orpo_config.py,sha256=ZojvzIeNX2QyvcQ8Vl385_ObZrGr9y9RBTIzeqReCd4,7621
trl/trainer/orpo_trainer.py,sha256=lNCBtjcystHlykxmmxjASJGeMvmkYuDZ2J2h65EUzKI,51316
trl/trainer/ppo_config.py,sha256=-_wtzoOV5FfUJOk1SERIosd3oPfkbkCGlOjpMm5jDUU,6081
trl/trainer/ppo_trainer.py,sha256=Hto6Xm5ZX5JhfTGABW1zaDG-tI--PNpkOgkK5kCQasw,40112
trl/trainer/prm_config.py,sha256=SRBZYvspZYaCjlRcGW0fVhtCj4d3fYjkFi57IyFQwjY,5175
trl/trainer/prm_trainer.py,sha256=5GLaf1GpcbfSlCBd3bD9nHjNoo3UByGOvIFnaG3-2Tw,16886
trl/trainer/reward_config.py,sha256=Otd1yd8GuaxYJyPTp4nXziQ6VKOhI3vpzDhcyIEiBwg,5089
trl/trainer/reward_trainer.py,sha256=NBK-ZKLjVyN7oQESQOT4-4LUFFTISinEDaFalZLhCa4,19508
trl/trainer/rloo_config.py,sha256=8ATE6HxKv893B2HLXFQC6QZ1nRSG6EqbkslIF4r5re8,4834
trl/trainer/rloo_trainer.py,sha256=j_pjWzLCIZINE94jnMJXwIrdTKnOrAQ-n4L2aAEoFeM,34597
trl/trainer/sft_config.py,sha256=Zjf1c640MvcpitXl75uPUtY7prNLaSzYM3zpL-Yu9kE,12819
trl/trainer/sft_trainer.py,sha256=MgE_IMzr1ctikZHEM_PT9m657KWE08YOAFj6b1d3SfY,50803
trl/trainer/utils.py,sha256=HGgFgnEoimQ_e2neutBQUI6kVUSPgibKc_R6H9h84Lg,64454
trl/trainer/xpo_config.py,sha256=nPzvCvvJpEWVBNnq-vv5CPpyTVZkQ47ZucVh_87fOPE,1667
trl/trainer/xpo_trainer.py,sha256=i-L6fsBZViewIOGvrLwk3wTzEx8DJ2qHOX1EwrmiI1k,26553
trl-0.20.0.dist-info/licenses/LICENSE,sha256=HZDsv3u4D_J-9nt3hfYhSAtlK4LkbHLYpXMLDbZb81U,11355
trl-0.20.0.dist-info/METADATA,sha256=5u_BOSYVpyWuJmOLnJNtwYdoXiMFldXVB3PgN-02VXE,11070
trl-0.20.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
trl-0.20.0.dist-info/entry_points.txt,sha256=EiubuQrZSGOohRB4xVoiVAyWsPsOHPSVpbzSk8YB0x0,37
trl-0.20.0.dist-info/top_level.txt,sha256=lphUqoPSFELZEDepHSNwOoZdSmPcI9EVdsv2QWHV9QM,21
trl-0.20.0.dist-info/RECORD,,
