examples/datasets/hh-rlhf-helpful-base.py,sha256=C4PNdI4xg4gcdp4C4xBPdnwU5xnmL8Bjd_mh42qp3VQ,5408
examples/datasets/llava_instruct_mix.py,sha256=o75R8EP24i0hm7J1KeW6e2EMcvaRTOiUZaRSoCCjaSQ,4443
examples/datasets/lm-human-preferences-descriptiveness.py,sha256=11GZ7x2lBSF2Jz_4r1gIBtBztFpwyAuQIEcFCU7a9Sg,4943
examples/datasets/lm-human-preferences-sentiment.py,sha256=IzXYqJrkDJk2BPIgXLAoff7yMsFZSEYSleAfvm59IZg,4621
examples/datasets/math_shepherd.py,sha256=2CqbxaJRghO6NG6y4hyOs5mdyDbDGkw4kBDCFH6JWvo,6561
examples/datasets/prm800k.py,sha256=9mv6Kl7X-ZMtILPltOqRK_a4b1la6si-VdoRs8cTFXs,6078
examples/datasets/rlaif-v.py,sha256=elX5GJFNCK4KCknhmCZQ7T0zGU8P-NADY_1E1n7e1zo,4642
examples/datasets/tldr.py,sha256=jJV5_-gqhLAJ4_mreEEkO_fixiPZIkZoqujCN8dmdlo,4294
examples/datasets/tldr_preference.py,sha256=2IYpJF_ioSHBeeF97iztJ3MjpoG2UUaShE6P80ildkM,4458
examples/datasets/ultrafeedback-prompt.py,sha256=Nqh5gSD1-VTkA6o1-l58IUDFozvBEM8qK6S2RJlcreU,3531
examples/datasets/ultrafeedback.py,sha256=AeKecokY4tNGzGyhaODxUrcLRpx47KSJHEVeQoe2ujQ,5527
examples/research_projects/layer_skip/scripts/benchmark_layer_skip.py,sha256=QGMlp4eC-qsSAJ_xWTJiWZwMarDSQRPSUozOlSxusZk,2582
examples/research_projects/layer_skip/scripts/config.py,sha256=9U7jhkjG6n8HPFvNz_xRHzUEqwrHcnRBHmFqtCc32_E,1032
examples/research_projects/layer_skip/scripts/custom_trainer.py,sha256=ptZrlvWCAJMgwxYEkWL807JIqcc5KezbTKYP6X3ZFmA,2066
examples/research_projects/layer_skip/scripts/layer_skip_sft.py,sha256=HoT71CB-Urd6CVa3QrQowBpJq0x-bCp3UujbCnynF74,3401
examples/research_projects/stack_llama/scripts/merge_peft_adapter.py,sha256=V-kY4g4uJZm73eb6B3soNCkLDGza-Ix3pHkWzqg3q5s,2584
examples/research_projects/stack_llama/scripts/reward_modeling.py,sha256=VX_ldsczL84AjjUa2NaQlFd_Fd97mFFxaFCsgB3JXck,11856
examples/research_projects/stack_llama/scripts/rl_training.py,sha256=U7kNyu4fAja3CFUzG9TmmiiW7k-d-DVCeuU3UQ8sHBE,10415
examples/research_projects/stack_llama/scripts/supervised_finetuning.py,sha256=40wQBDd-hZcxGQwnbtHEMSBdXDfv-BAuSxoQhK7z5LU,7727
examples/research_projects/stack_llama_2/scripts/dpo_llama2.py,sha256=Pbp4al3VrOF6zCrLojZPzI8kVIjcv4QVticei_QKSNM,9928
examples/research_projects/stack_llama_2/scripts/sft_llama2.py,sha256=8y41k7aOizMeknGXvHSbhYujDoW8ttMHgmSE-ibYFNg,7719
examples/research_projects/toxicity/scripts/evaluate-toxicity.py,sha256=UhrW2RZPCEHGdfe7RLPpdTc1nM0gAfLQXE7xXoEDslM,5436
examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py,sha256=alVqwvmBQ6_-2YmxHZZmYsI2x1Mu_XBfHW68hlMVJpw,9407
examples/scripts/alignprop.py,sha256=rKz4ouMnI8H35raWRvsmTpjyPEd8BdKiHAjeduG8Ta0,5474
examples/scripts/bco.py,sha256=s2axzTdyS4rOboSY2VFiZAOdfuJJ5UByfUiOEr6RT1A,5990
examples/scripts/cpo.py,sha256=PriXcPpMkXfaHC0d8gUoRBYaYi0zX_Wr0lmBtGfrarI,3659
examples/scripts/ddpo.py,sha256=P2jHH6i6eU_Z7phDsNv868-T1n70kHB2tOhlrzphm2Y,7842
examples/scripts/dpo.py,sha256=AViXy3Gjt9XOr1WFihtCNavsRvzMk_A1g-NRx6jEqY0,900
examples/scripts/dpo_vlm.py,sha256=s016fi_K5GEA_TtwGjv0db2s4UMVaMzl6BDqP4yPYko,6052
examples/scripts/gkd.py,sha256=Scq9qxUHwre1xlgcTpDm-T_B4W3N7Jng9CXoWxIGeTg,5106
examples/scripts/grpo_vlm.py,sha256=3TntcuN2Vp-NNwNd_FVohAPHL1sIe9oTrErYzAqqSMU,7443
examples/scripts/gspo.py,sha256=02fDMGH_Fg4Sy10oSMFSYLV2lDZwQRJRBqb3JcukCvs,6585
examples/scripts/gspo_vlm.py,sha256=-yIi3KbVEQYSrS73vUImfgEBRqHYuuO74cRYqXY8hO8,7025
examples/scripts/kto.py,sha256=kAYr4aH8nhaPrBkjPxcF_vhh_LVtxPlCTDnvNGJkwTc,3618
examples/scripts/mpo_vlm.py,sha256=30xU5dyj3h7OfAS859i6HtUwDSDj06LzSFPGimzx0Ok,4752
examples/scripts/nash_md.py,sha256=jF8tx6CveNadu4Gi9YItFEsMPoXfzEOLWhcRWELae1A,5553
examples/scripts/online_dpo.py,sha256=AfSRqQSgWw-ljn1iZEI2l1jjkY-XpBu6SjCJuxgNl8I,5825
examples/scripts/online_dpo_vlm.py,sha256=3qaMwG_HWfdEKacy6ngjOGre7GIf2iXpwb9z4NG9u_4,9938
examples/scripts/orpo.py,sha256=9WRPSk_xdwzxMVciaWsczy7NP0BHluKXG20hTKUNh1g,3742
examples/scripts/prm.py,sha256=_0C5xeibh1bt2DPIciqF17mPvtfKGeTezMLNdKmjoHQ,4698
examples/scripts/reward_modeling.py,sha256=mRNQY8pPsFUinxmxZsMiO9n9gUBB5Ihbs169su37IuQ,5105
examples/scripts/rloo.py,sha256=WftmkUlYb0FwVntcy8myzBp9Akxo4CDlKAty7RB4tbQ,5426
examples/scripts/sft.py,sha256=C1cJ0H9PoSFHi9p7RJB3GjWzRRyCog-I2gQQyIINslY,900
examples/scripts/sft_gemma3.py,sha256=OUAj_UGwT7FD8CjHimj7fUXP7giQJwz_z-kWxakHaCw,2165
examples/scripts/sft_gpt_oss.py,sha256=YCmfBVoSjCSAbhE75K9G9kvzh_GYcS1MNeZgRcYgGrE,3430
examples/scripts/sft_video_llm.py,sha256=Z1Lmrv1tfrcdwKbDJ5_ZkfUAXtoQZ7Kn32CN5TBb-Bg,8325
examples/scripts/sft_vlm.py,sha256=ex-zQ0MTH4Cd10HX8W5lZZTyCYU7Cqb60qQZkfFk4nU,4065
examples/scripts/sft_vlm_gemma3.py,sha256=3l5ebsbyQdpXgJ8g2MbMYEiVLd4gFmt3LWHzg0AN8Ng,6753
examples/scripts/xpo.py,sha256=Za1EqVhEo4_BdQknIhpsOFubq499BsqNQcloEyvHIXo,4982
examples/scripts/evals/judge_tldr.py,sha256=oE1woi565fsvMaCCi-tmSVBR4AlO0zwykqceXi5yDPw,4200
examples/scripts/ppo/ppo.py,sha256=D0aU8KBklK98LeNvyqWO0QxUwQnKN-UQ3xxBHWJnxHg,6420
examples/scripts/ppo/ppo_tldr.py,sha256=oUQfddRleE8-079w4OG4Hpgfx2s64Tq3RumMvC_JtbM,7113
scripts/add_copyrights.py,sha256=rWy8u4AX7z7fEzkImMLRbyGgu1gHqNIbxWPJLr0W_pQ,3360
scripts/generate_harmony_dataset.py,sha256=Du_ZbQOq07pWcHWhEFnXRLUl181xe6Du2vLqaHxfHTc,14211
scripts/generate_tiny_models.py,sha256=zLR824Mschdt-4OnukdYPNaRborpWKRTKeCYlM9EXzA,11299
scripts/generate_toolcall_dataset.py,sha256=KaqZXp1XFznnogifGhukbKzFAnIiLeM6Xi_e_3fqffY,9760
scripts/generate_zen_dataset.py,sha256=xXcK8fPqn1tKHnjCr1qlXEzjdLaaDdB71xlBq0Isl18,37892
scripts/generate_zen_image_dataset.py,sha256=gSf3KLi38zh047NEnIvYx53KpSWIMWR6KE0DPg6ELuY,25283
scripts/generate_zen_multi_image_dataset.py,sha256=cw1vqVTJaOHjNhscHv9EJEw453aJBkahu6hlBLoT6kE,22288
scripts/log_example_reports.py,sha256=mHmbflXohXONZD58iJJ_r5RKuTEdqHw4git54hYELEg,5591
scripts/log_reports.py,sha256=LrKQ6maOyZdOsNLx-l6QPDqc_eEovujEJ0LuZTFhDAg,5730
trl/__init__.py,sha256=GbZoQ44sHszIyFXKXiVSQS0EgOMvGMZzgOySgLzziZ0,6386
trl/cli.py,sha256=y2czQI5fMomfFim2X_aAt-iH0N4N8h2JxP_0qNPKxFU,7115
trl/core.py,sha256=cj5h1UqHSd5ttpj07UzGcQrSLJQVCzrc7MRtfMXcP60,6034
trl/data_utils.py,sha256=pmy4-5HLX1DvL8YxZiKEy54vWEGnhc2epIzfqyx_op0,34027
trl/import_utils.py,sha256=j4HAJbf3WKntxH2qbhBzz0stjEpGYQdcFzFjp4pimao,5405
trl/mergekit_utils.py,sha256=OyJ0GyyoN1Yu0PcaPnfW7XOFYYAOWuU0PwbH9ElVRtw,11096
trl/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
trl/accelerate_configs/fsdp1.yaml,sha256=FBw8i6diLwfcUI6Uu7T-UTpd7niY062tLsCFMLoT0c4,725
trl/accelerate_configs/fsdp2.yaml,sha256=zg1L3kVDJb7kvuxIPaqMN6mZQFD2mvG8IxSRmkY7dDk,627
trl/accelerate_configs/multi_gpu.yaml,sha256=yDX-SPo8qY3SW8jsf_53VJX1GwIV20wmRPBbCKf4_8I,321
trl/accelerate_configs/single_gpu.yaml,sha256=4MuOZIjHNir2adOzaoOgBwD9IXXRbPNPzBYVW-zkc7I,316
trl/accelerate_configs/zero1.yaml,sha256=fyUlyzQ_nhP6oamOgOBM7MDZMuHvG4xZfm_-2wnuYTo,441
trl/accelerate_configs/zero2.yaml,sha256=kW58rFIjO1Jz0-TWjg2zUHO5BUBHI4TPFIUHb2M-WiQ,470
trl/accelerate_configs/zero3.yaml,sha256=VN3HDSxEn_WCvYpML4thWO7cpr2N39VwAJjMvPtwfxE,498
trl/extras/__init__.py,sha256=5CIPkzvP9D6vpnWLNZz23RNU98MfjeBsxBFMHXKM09U,961
trl/extras/best_of_n_sampler.py,sha256=EMpJgA1QaEaVJ2yr-pXc186XpIwQBPIz2EIK3GorHXI,5720
trl/extras/dataset_formatting.py,sha256=QzTX4Jy5o_dfmAo4VvNZTTrg9gBl7tAiocSno0l-S5c,5001
trl/extras/profiling.py,sha256=txitznUe6ISatuwTGzcbzkUzngEiouA2bCzNKJ77n-w,3258
trl/extras/vllm_client.py,sha256=YVdc-AWkbhUKBozVAtnACn3VXq1D2a_QZ1BVX7NtcoE,15559
trl/models/__init__.py,sha256=1F7UoZ7DAiXgLpsRkDVG04Tx-gOKssfVzwIGH3-8Mv8,2660
trl/models/activation_offloading.py,sha256=5vbcZWoRStXA2qf8tBCZ-xNbRoQePOLz0zyZxMA7ivg,23613
trl/models/auxiliary_modules.py,sha256=2Qamjf5g8XrK6Xkz3iwgtL_NNBLND_dBM9P28OeVe3I,3344
trl/models/modeling_base.py,sha256=meU6rIj_remUZt0CuhwZt3-3T8-kiX7ie-pMLZEpDDo,30786
trl/models/modeling_sd_base.py,sha256=r9GHUAFbmoO7G9EF0gNTzvTv5OY4XZbnMnWF-Qxqbk4,42248
trl/models/modeling_value_head.py,sha256=Uj_vVHfJX3NzvlQ_KKwrqsVa2gQ2d7W6_bc3VsC3GHY,18833
trl/models/sd_utils.py,sha256=nnqSZL0eSJeX2RbceUhpWQdAz2L4cSuGWt_XEqvjBjs,5879
trl/models/utils.py,sha256=ZAyVsuLOSErSEvpZI3c07L5-s0-Mtg2IgF1cSzSuZbo,24392
trl/rewards/__init__.py,sha256=A24vQvT1Pyv8g-TcodKeQyXayR0LsdIOi-QWL9eamYU,1066
trl/rewards/format_rewards.py,sha256=fCQhtw6lazljB5Kv20ZAaD5XraoxKqVDBvqbscmldYE,2211
trl/rewards/other_rewards.py,sha256=WhVSfVjOZ5cGaLSfmf9s3BGtnpsrtowY7PMFtcpU1JM,2634
trl/scripts/__init__.py,sha256=tDv5yBlfQh0ovYdrTF-xW7dLwbARJPhHT8JkgKs4KO4,1077
trl/scripts/dpo.py,sha256=DqQeZU03hB1WrwXcijYU4zzw4VLcRLLXBncky0yxUfQ,6439
trl/scripts/env.py,sha256=b2AblbPA91lL54MxpW3NNw-1sTKa6QDSCPsheHr3gbY,3681
trl/scripts/grpo.py,sha256=HMk_oClEHvMZQ78TO-7cqJEkLgKijxZu38t498bg0RE,6345
trl/scripts/kto.py,sha256=tj4nqUCSW7CUW2Q2pBgW7w51NbXVNQWpAqw0kGOb4wU,5181
trl/scripts/rloo.py,sha256=tepKYTze0-VzdIgdaYcXJw0YB8YsOHWOd_c9jw5vyCI,5907
trl/scripts/sft.py,sha256=5P368AwCdHtsTjZmzMgaHf7zX1UExKqXo-xEMXjgDg8,6011
trl/scripts/utils.py,sha256=-VRNfux1cPVWrI229IKZMFLmhdPT6wgHkFV1oUeN1zM,18351
trl/scripts/vllm_serve.py,sha256=ifccNRJW-M4eT9OlcXf_MyRLrZmNhZAObSxrqi0nKaI,29944
trl/templates/lm_model_card.md,sha256=zq0dXorqEOnjHJ8wxwhI3LS2R6qiHmlcgPcVzliwRmc,2158
trl/trainer/__init__.py,sha256=a4_hAFwa0cNzouQLmYfOyA6KzYWXsQNIZU2SKr6tKEY,5475
trl/trainer/alignprop_config.py,sha256=jLsZIkPRpyKZspVsk7gcn0JS7Mg2mCrccl-slpwOpIU,10196
trl/trainer/alignprop_trainer.py,sha256=CuQfgPPKe4VfQ8krLQtwIyc59Zhkqi7FJaiEZW9x3PY,19225
trl/trainer/bco_config.py,sha256=8aMCd52YlMOvz2v9tXfLMKOBoskDc7-OM60Dqhcd4fE,10113
trl/trainer/bco_trainer.py,sha256=a9v8UABvPhuYD8QwLbqc60pMhc8jhAOwXjZOWTRt_lk,72851
trl/trainer/callbacks.py,sha256=84k26cZXcVKWUm_ha6EXi_8yBblaTR69HRwK2lAzATI,32320
trl/trainer/cpo_config.py,sha256=PLW0AvTqXkxgfStF8HwMzZAAdqtcbS7iylgT0884X_M,10521
trl/trainer/cpo_trainer.py,sha256=MRm7Iag59PN__wkB6nldv8o3JSZN96ChXaQHtdKwve0,52756
trl/trainer/ddpo_config.py,sha256=1qL0nWhpEYFel7PUpaKXVWfc0PHvrYWiTcSiEc1rwdo,12241
trl/trainer/ddpo_trainer.py,sha256=iKAR2Sm2ZmH33TVAuBOYWJAqWPoWk4SNRS87rDo78HY,28822
trl/trainer/dpo_config.py,sha256=oS-UG8CAHt6LT1T0fIlOyJXZT8fFY_ZvwyygDuc55Z0,24963
trl/trainer/dpo_trainer.py,sha256=tE9DCNaggOEoTJpNN1Lzl7IzT2o3grXE_UaekLekEuo,102759
trl/trainer/gkd_config.py,sha256=jBTXHa8xxFlpIKbYIDmwr69xpAzwiFs2DOCl3era878,4949
trl/trainer/gkd_trainer.py,sha256=_bIe-N2kaC9hb5_PJYI2RiPn35kMteUhIxp1z_pBEHI,21837
trl/trainer/grpo_config.py,sha256=Uhs7b3bJvywpvyREf1xWt2zDbpnuO62KiphHgTfaSB0,39714
trl/trainer/grpo_trainer.py,sha256=8hcYPIS-mziyNFSSDrG5FHJqo8S53UwHbUVb2qZ78bg,102748
trl/trainer/iterative_sft_config.py,sha256=DgppfN5EPOY9qGXsx5QLC5CdQdVbZnYpHrcS8uJIrmM,4701
trl/trainer/iterative_sft_trainer.py,sha256=PvvIJJV2soOEMgJlxWA7nJpU3jUkHYmUYM0QxylQSng,21250
trl/trainer/judges.py,sha256=3dA9Ci95FTZ9NZx0jRyH2yNbeLV3tKdwb4a6NVXG3tM,19576
trl/trainer/kto_config.py,sha256=6GI72tTb-j-ibctOa0OCHA4U8VC-scndcNG1cybjoZ4,11418
trl/trainer/kto_trainer.py,sha256=zPvcJvxRgR1HlxuL9K4vTjf9Pgw16YuB4kWVRnE5ZUg,82919
trl/trainer/model_config.py,sha256=yXCtfrZ32A1AZj4FnxWPXLJ_-VSY2W6o5V-AX0URPM0,9529
trl/trainer/nash_md_config.py,sha256=T3004FbQP7A9mir_I-XvW9GkzoykRL4U_bMg52NLVFE,1844
trl/trainer/nash_md_trainer.py,sha256=gqU9dNax7CmWsPbMBmFD698l_atCdZT4Ka3HKRsixsk,24765
trl/trainer/online_dpo_config.py,sha256=RlrWg1apYRYrBOL1Furw51bl7jvxXsDtLCloi0S95Mc,21303
trl/trainer/online_dpo_trainer.py,sha256=k1wIbIF3asQuL6CVgt59tCebGzZNGLnxXtd4XjdIwow,79282
trl/trainer/orpo_config.py,sha256=NmKNplPgUx-9mCdRHEQtSaVFQHu0SAC64bvWGY9P_Xg,7834
trl/trainer/orpo_trainer.py,sha256=McI0pb_GIkggrPD-U-ufygYr8UanryXsHsGQ09Gc6es,51222
trl/trainer/ppo_config.py,sha256=-_wtzoOV5FfUJOk1SERIosd3oPfkbkCGlOjpMm5jDUU,6081
trl/trainer/ppo_trainer.py,sha256=g2-czmb8UYulMpqdjAVzbWRcewga-rttNnU4IXU_7Bw,42426
trl/trainer/prm_config.py,sha256=bjWhZXBvPCMfqzUQPGrZVTGUqaGfR8JZa42WKr8N2Vo,4739
trl/trainer/prm_trainer.py,sha256=nbXyBd_p-W5DckdvhXSTRzbH04ACvkslqi_X3itR0l0,15559
trl/trainer/reward_config.py,sha256=1Oac6GUphF32u-zjU0UXmVtVbc6qJzM669z5MDALfic,4653
trl/trainer/reward_trainer.py,sha256=LPE7V02saUtfoEKVbiF4xuNsRED8Jwnlfi0vhqeDunQ,18360
trl/trainer/rloo_config.py,sha256=mrE-59VIHKbkFwnGmJ6K5buIZqtsaJQmb7zA1x8ePXw,34858
trl/trainer/rloo_trainer.py,sha256=VYc5UzkMMhFSK07HhKIeiiyBivEuLHB7vyYF13qlY0Y,82332
trl/trainer/sft_config.py,sha256=3ynJ5qJ13Of7WVqMPHS4hWfjUZaEMCRHsPZQ5RxkAaU,13795
trl/trainer/sft_trainer.py,sha256=wjeL0YJ1HN_KeIA3kgh2RWzHcCKQ-eDq9M8lIUUKfV0,66637
trl/trainer/utils.py,sha256=B8zMkCPWBShUQ5DXDvwKUF0twekkcxM8BF0z9-2XY2A,77056
trl/trainer/xpo_config.py,sha256=nPzvCvvJpEWVBNnq-vv5CPpyTVZkQ47ZucVh_87fOPE,1667
trl/trainer/xpo_trainer.py,sha256=BPonndfWRNILIp-2-czot2Tzl0ZwMxh0DGqFj1mR6is,27411
trl-0.23.1.dist-info/licenses/LICENSE,sha256=HZDsv3u4D_J-9nt3hfYhSAtlK4LkbHLYpXMLDbZb81U,11355
trl-0.23.1.dist-info/METADATA,sha256=QQ0D_feXbRUXLxA46IPLuttCPG7ySWKi4-XOUeTyCNk,11846
trl-0.23.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
trl-0.23.1.dist-info/entry_points.txt,sha256=EiubuQrZSGOohRB4xVoiVAyWsPsOHPSVpbzSk8YB0x0,37
trl-0.23.1.dist-info/top_level.txt,sha256=lphUqoPSFELZEDepHSNwOoZdSmPcI9EVdsv2QWHV9QM,21
trl-0.23.1.dist-info/RECORD,,
