examples/datasets/hh-rlhf-helpful-base.py,sha256=C4PNdI4xg4gcdp4C4xBPdnwU5xnmL8Bjd_mh42qp3VQ,5408
examples/datasets/llava_instruct_mix.py,sha256=o75R8EP24i0hm7J1KeW6e2EMcvaRTOiUZaRSoCCjaSQ,4443
examples/datasets/lm-human-preferences-descriptiveness.py,sha256=11GZ7x2lBSF2Jz_4r1gIBtBztFpwyAuQIEcFCU7a9Sg,4943
examples/datasets/lm-human-preferences-sentiment.py,sha256=IzXYqJrkDJk2BPIgXLAoff7yMsFZSEYSleAfvm59IZg,4621
examples/datasets/math_shepherd.py,sha256=2CqbxaJRghO6NG6y4hyOs5mdyDbDGkw4kBDCFH6JWvo,6561
examples/datasets/prm800k.py,sha256=9mv6Kl7X-ZMtILPltOqRK_a4b1la6si-VdoRs8cTFXs,6078
examples/datasets/rlaif-v.py,sha256=elX5GJFNCK4KCknhmCZQ7T0zGU8P-NADY_1E1n7e1zo,4642
examples/datasets/tldr.py,sha256=jJV5_-gqhLAJ4_mreEEkO_fixiPZIkZoqujCN8dmdlo,4294
examples/datasets/tldr_preference.py,sha256=2IYpJF_ioSHBeeF97iztJ3MjpoG2UUaShE6P80ildkM,4458
examples/datasets/ultrafeedback-prompt.py,sha256=Nqh5gSD1-VTkA6o1-l58IUDFozvBEM8qK6S2RJlcreU,3531
examples/datasets/ultrafeedback.py,sha256=AeKecokY4tNGzGyhaODxUrcLRpx47KSJHEVeQoe2ujQ,5527
examples/research_projects/layer_skip/scripts/benchmark_layer_skip.py,sha256=VgPMvXr_KPMJ_2WgKFoATnA_nVCaBHSl9ezJyy4MLjs,2588
examples/research_projects/layer_skip/scripts/config.py,sha256=9U7jhkjG6n8HPFvNz_xRHzUEqwrHcnRBHmFqtCc32_E,1032
examples/research_projects/layer_skip/scripts/custom_trainer.py,sha256=ptZrlvWCAJMgwxYEkWL807JIqcc5KezbTKYP6X3ZFmA,2066
examples/research_projects/layer_skip/scripts/layer_skip_sft.py,sha256=GN2Ul35n0SOon7iRoTumE6amgizZq_i_oYRnbglVdMQ,3407
examples/research_projects/stack_llama/scripts/merge_peft_adapter.py,sha256=xlhCG9LGYg1QUbm5oal-8nvZxmm1JIP1lRakU3m7fMw,2610
examples/research_projects/stack_llama/scripts/reward_modeling.py,sha256=JTWWI-SmiC5TDT_-U1hLTU686IvpsLla5u4fRYAhG4A,11868
examples/research_projects/stack_llama/scripts/rl_training.py,sha256=5kQ37v9PyQwvThBRPVntIOZVd3-a0wtj5lwOpiZs0Gs,10313
examples/research_projects/stack_llama/scripts/supervised_finetuning.py,sha256=40wQBDd-hZcxGQwnbtHEMSBdXDfv-BAuSxoQhK7z5LU,7727
examples/research_projects/stack_llama_2/scripts/dpo_llama2.py,sha256=xdRbuUQroxzpLGQyx-Mw7sQmwrcGmZ3cVBDFZRiml2g,9958
examples/research_projects/stack_llama_2/scripts/sft_llama2.py,sha256=9-D51uZyHK9tQzVHpbGLiDbHxH8bXRtqO_4sunMZGnI,7725
examples/research_projects/toxicity/scripts/evaluate-toxicity.py,sha256=AR1O23hNQySxM_rd5sdnJYNULoCO-EnH45tnEkd_rjs,5442
examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py,sha256=UVc17Jvb4yepT7nxCyNuBlZHqNCXxg_76mKyWgfE2pA,9129
examples/scripts/alignprop.py,sha256=T65hMZ5HVtkPwUt_DkJsS8p3ueP9Vxz8xbpuGF9uf1Y,5457
examples/scripts/bco.py,sha256=ia0tgeEl0zPw9VR9KSuR7ixTxZTobIsCEy7-CHphMz8,5979
examples/scripts/cpo.py,sha256=tSmAoOQ8v5wTM999XdvGNZwTkSqhIxOnPOKNGQ8yJdg,3642
examples/scripts/ddpo.py,sha256=ESsIFKSntlgUnLo9osf6BrdHOwU5A8y2KdNCnRPS0Y0,7825
examples/scripts/dpo.py,sha256=AViXy3Gjt9XOr1WFihtCNavsRvzMk_A1g-NRx6jEqY0,900
examples/scripts/dpo_online.py,sha256=9A6B862hd-XZemA0ZBDlzcyZ5VEZq47jmFhZYI2YHZ4,5716
examples/scripts/dpo_vlm.py,sha256=F7UIQ7dY-yowcMs0hHJGiLbA_vFOuq_l2tQw8392-UM,5956
examples/scripts/gkd.py,sha256=1ZcvoVZR1smoQVLysp4HCAsAJ_dJKLfDpeowPvnbqII,4827
examples/scripts/grpo_vlm.py,sha256=5PVh-ZAnXcE_ra4fIRbWcLy5Nn2g-UoX_6duzoU_RNA,7310
examples/scripts/gspo.py,sha256=paQE_qtjGsvyDQjun2Ipx0TYW3dVOFmOfTH6XemFPy8,6446
examples/scripts/gspo_vlm.py,sha256=71hIUnaBYq1-Rs5XjvXIMPAhkkilv9UEcb0tMIwV7K0,6886
examples/scripts/kto.py,sha256=9zFvGKkSvZ3rtygHAJz-Qam-8FfCz3k2kdKoDa82bN0,3601
examples/scripts/mpo_vlm.py,sha256=mQ7GJTnD4CHI7VeZQat1TRQVNsqjEnBOSGPdjDlCDfs,4650
examples/scripts/nash_md.py,sha256=DNNhYnCWr8sbSvqsDxWNlDiK0Jrc4ykGcaItw2_DkBM,5446
examples/scripts/orpo.py,sha256=YgiHPtN6o9gNuwsxIyC2ZtYpsS90GngdmPvgV9KfdrQ,3725
examples/scripts/prm.py,sha256=Taxuw8WSEudzhK2gSHH5ncK8miZRx9pMefE122dej2o,4620
examples/scripts/reward_modeling.py,sha256=bXXJzGxi7J34zQQRZDuHJZeqah53H3pr7bjS_b1JiPU,4997
examples/scripts/rloo.py,sha256=MVsOVjT0LmTAYrPwCvv3vY37l0OE4kZ8-_H0ETynxV0,5409
examples/scripts/sft.py,sha256=C1cJ0H9PoSFHi9p7RJB3GjWzRRyCog-I2gQQyIINslY,900
examples/scripts/sft_gemma3.py,sha256=BQ6wFP1ar9o9uraYbsewoI-0mjZKVR-AvsVfiINsZD4,2148
examples/scripts/sft_gpt_oss.py,sha256=llKdVNoEZTanoHb8rarDp7xIKBKFJ6h4BXV-HZWrj6g,3461
examples/scripts/sft_video_llm.py,sha256=OpAbftoCLsnm4aa76rMsr1Sdb57QdQckFXkjhACSIgc,8366
examples/scripts/sft_vlm.py,sha256=NAlrMhvMD5YSqIdmDSCWyb5uskVj3Avw4WaxFDucqGs,4032
examples/scripts/sft_vlm_gemma3.py,sha256=f9BQ0unw69vy2b9cx5WOaS8hu_IF-8CbhbEqVXKek7U,6657
examples/scripts/xpo.py,sha256=BM49rPv9QoLAiaCF6lHPesx7X3d8FXMp4EpBqnv3oiI,4875
examples/scripts/evals/judge_tldr.py,sha256=oE1woi565fsvMaCCi-tmSVBR4AlO0zwykqceXi5yDPw,4200
examples/scripts/ppo/ppo.py,sha256=O1jE7N_F1hmeiQ_BXi-TUHN_xcG0jIpMAUZtzssKdxM,6313
examples/scripts/ppo/ppo_tldr.py,sha256=XeGyIhyox3gjchse6CzmIpSd1oMDs3PosRxhPP-ZnDk,7006
scripts/add_copyrights.py,sha256=rWy8u4AX7z7fEzkImMLRbyGgu1gHqNIbxWPJLr0W_pQ,3360
scripts/generate_harmony_dataset.py,sha256=Du_ZbQOq07pWcHWhEFnXRLUl181xe6Du2vLqaHxfHTc,14211
scripts/generate_tiny_models.py,sha256=zLR824Mschdt-4OnukdYPNaRborpWKRTKeCYlM9EXzA,11299
scripts/generate_toolcall_dataset.py,sha256=KaqZXp1XFznnogifGhukbKzFAnIiLeM6Xi_e_3fqffY,9760
scripts/generate_zen_dataset.py,sha256=xXcK8fPqn1tKHnjCr1qlXEzjdLaaDdB71xlBq0Isl18,37892
scripts/generate_zen_image_dataset.py,sha256=gSf3KLi38zh047NEnIvYx53KpSWIMWR6KE0DPg6ELuY,25283
scripts/generate_zen_multi_image_dataset.py,sha256=cw1vqVTJaOHjNhscHv9EJEw453aJBkahu6hlBLoT6kE,22288
scripts/log_example_reports.py,sha256=mHmbflXohXONZD58iJJ_r5RKuTEdqHw4git54hYELEg,5591
scripts/log_reports.py,sha256=LrKQ6maOyZdOsNLx-l6QPDqc_eEovujEJ0LuZTFhDAg,5730
trl/__init__.py,sha256=GbZoQ44sHszIyFXKXiVSQS0EgOMvGMZzgOySgLzziZ0,6386
trl/cli.py,sha256=y2czQI5fMomfFim2X_aAt-iH0N4N8h2JxP_0qNPKxFU,7115
trl/core.py,sha256=cj5h1UqHSd5ttpj07UzGcQrSLJQVCzrc7MRtfMXcP60,6034
trl/data_utils.py,sha256=LfaowyHfruSli8pJjSR5bjHV7BB8vK7-xaJ3JmPXnrE,33985
trl/import_utils.py,sha256=j4HAJbf3WKntxH2qbhBzz0stjEpGYQdcFzFjp4pimao,5405
trl/mergekit_utils.py,sha256=OyJ0GyyoN1Yu0PcaPnfW7XOFYYAOWuU0PwbH9ElVRtw,11096
trl/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
trl/accelerate_configs/fsdp1.yaml,sha256=FBw8i6diLwfcUI6Uu7T-UTpd7niY062tLsCFMLoT0c4,725
trl/accelerate_configs/fsdp2.yaml,sha256=zg1L3kVDJb7kvuxIPaqMN6mZQFD2mvG8IxSRmkY7dDk,627
trl/accelerate_configs/multi_gpu.yaml,sha256=yDX-SPo8qY3SW8jsf_53VJX1GwIV20wmRPBbCKf4_8I,321
trl/accelerate_configs/single_gpu.yaml,sha256=4MuOZIjHNir2adOzaoOgBwD9IXXRbPNPzBYVW-zkc7I,316
trl/accelerate_configs/zero1.yaml,sha256=fyUlyzQ_nhP6oamOgOBM7MDZMuHvG4xZfm_-2wnuYTo,441
trl/accelerate_configs/zero2.yaml,sha256=kW58rFIjO1Jz0-TWjg2zUHO5BUBHI4TPFIUHb2M-WiQ,470
trl/accelerate_configs/zero3.yaml,sha256=VN3HDSxEn_WCvYpML4thWO7cpr2N39VwAJjMvPtwfxE,498
trl/extras/__init__.py,sha256=5CIPkzvP9D6vpnWLNZz23RNU98MfjeBsxBFMHXKM09U,961
trl/extras/best_of_n_sampler.py,sha256=pke-6-dqnLMfFA5lVBB5_Iq6Z9varKFwFBhBqnjlsFQ,5802
trl/extras/dataset_formatting.py,sha256=4j1ZO_oPMiql6c0BzKXwNmijtJiFvT5WQ0SGUCQWCrY,4796
trl/extras/profiling.py,sha256=txitznUe6ISatuwTGzcbzkUzngEiouA2bCzNKJ77n-w,3258
trl/extras/vllm_client.py,sha256=Ph8K23Jl9FxwOVm_sDHHE6ZRSHoiv6U1jDpaOS63yeY,15266
trl/models/__init__.py,sha256=1F7UoZ7DAiXgLpsRkDVG04Tx-gOKssfVzwIGH3-8Mv8,2660
trl/models/activation_offloading.py,sha256=5vbcZWoRStXA2qf8tBCZ-xNbRoQePOLz0zyZxMA7ivg,23613
trl/models/auxiliary_modules.py,sha256=2Qamjf5g8XrK6Xkz3iwgtL_NNBLND_dBM9P28OeVe3I,3344
trl/models/modeling_base.py,sha256=meU6rIj_remUZt0CuhwZt3-3T8-kiX7ie-pMLZEpDDo,30786
trl/models/modeling_sd_base.py,sha256=tES3skT-k5wrcnbsu-yXGMQaN2ryOMJHD3jYpuJHR7s,42321
trl/models/modeling_value_head.py,sha256=Uj_vVHfJX3NzvlQ_KKwrqsVa2gQ2d7W6_bc3VsC3GHY,18833
trl/models/sd_utils.py,sha256=nnqSZL0eSJeX2RbceUhpWQdAz2L4cSuGWt_XEqvjBjs,5879
trl/models/utils.py,sha256=roD9DdwvqACjc09k3VKk3TKNFDvoq_QCG5b0Yo7rksU,24405
trl/rewards/__init__.py,sha256=A24vQvT1Pyv8g-TcodKeQyXayR0LsdIOi-QWL9eamYU,1066
trl/rewards/format_rewards.py,sha256=fCQhtw6lazljB5Kv20ZAaD5XraoxKqVDBvqbscmldYE,2211
trl/rewards/other_rewards.py,sha256=WhVSfVjOZ5cGaLSfmf9s3BGtnpsrtowY7PMFtcpU1JM,2634
trl/scripts/__init__.py,sha256=tDv5yBlfQh0ovYdrTF-xW7dLwbARJPhHT8JkgKs4KO4,1077
trl/scripts/dpo.py,sha256=_BFSzOGOMpZ9GLxgUrYLo8KLyMFZ81C8TztcR6j8F6M,6331
trl/scripts/env.py,sha256=b2AblbPA91lL54MxpW3NNw-1sTKa6QDSCPsheHr3gbY,3681
trl/scripts/grpo.py,sha256=ljrj5pFgkPtAPWIhTZePIrOwTq9W-nbYGwFsbS4wwkw,6025
trl/scripts/kto.py,sha256=X9QnvmzZbe1A-AVotVg7icJE6eQsFPoUIHuafwPUMUg,5164
trl/scripts/rloo.py,sha256=tepKYTze0-VzdIgdaYcXJw0YB8YsOHWOd_c9jw5vyCI,5907
trl/scripts/sft.py,sha256=hyRWWAvbu7WCSSKtSPRaCq62JtL_IVdTzNSBkbuyVpI,6111
trl/scripts/utils.py,sha256=-VRNfux1cPVWrI229IKZMFLmhdPT6wgHkFV1oUeN1zM,18351
trl/scripts/vllm_serve.py,sha256=_8c4T22O0DM6xBOezXcamGdsbyYw6L_JcWODD2WXs2g,29192
trl/templates/lm_model_card.md,sha256=zq0dXorqEOnjHJ8wxwhI3LS2R6qiHmlcgPcVzliwRmc,2158
trl/trainer/__init__.py,sha256=a4_hAFwa0cNzouQLmYfOyA6KzYWXsQNIZU2SKr6tKEY,5475
trl/trainer/alignprop_config.py,sha256=jLsZIkPRpyKZspVsk7gcn0JS7Mg2mCrccl-slpwOpIU,10196
trl/trainer/alignprop_trainer.py,sha256=D_J7Aqh3C6gOq73Kwa54zAyiTJZbbSer7y_ZyEMmhSI,19123
trl/trainer/bco_config.py,sha256=8aMCd52YlMOvz2v9tXfLMKOBoskDc7-OM60Dqhcd4fE,10113
trl/trainer/bco_trainer.py,sha256=KD_tWvU-hqAKO9ukKtxB-BULGD0ti4SlIfLmbB0X2A4,72458
trl/trainer/callbacks.py,sha256=84k26cZXcVKWUm_ha6EXi_8yBblaTR69HRwK2lAzATI,32320
trl/trainer/cpo_config.py,sha256=PLW0AvTqXkxgfStF8HwMzZAAdqtcbS7iylgT0884X_M,10521
trl/trainer/cpo_trainer.py,sha256=EFRLCAOWg6t652Bn3xivawx9X5gUDqxizob_Sgx82kc,52834
trl/trainer/ddpo_config.py,sha256=1qL0nWhpEYFel7PUpaKXVWfc0PHvrYWiTcSiEc1rwdo,12241
trl/trainer/ddpo_trainer.py,sha256=GRxNT802fp5xFZgrAcqKJt2omNW6BzMVKWEYN23Qo_0,28831
trl/trainer/dpo_config.py,sha256=oS-UG8CAHt6LT1T0fIlOyJXZT8fFY_ZvwyygDuc55Z0,24963
trl/trainer/dpo_trainer.py,sha256=x99kUm21GOXJOQQ92JskYqpyIw1GU2h8mee2ysHYJxI,100745
trl/trainer/gkd_config.py,sha256=jBTXHa8xxFlpIKbYIDmwr69xpAzwiFs2DOCl3era878,4949
trl/trainer/gkd_trainer.py,sha256=xYxIxLDKj9ERv2KtTdL-ILgY7MrmD8OuDsc9-RjNxUY,19200
trl/trainer/grpo_config.py,sha256=NKXgkdsDP5aeEKZrVf6-tveUNcuDxDuSbspbM7GyyC4,37337
trl/trainer/grpo_trainer.py,sha256=MAQo27h-cZSWb5-4wzv9Li7FP5lFQPUXTHL_pvq8wOA,98065
trl/trainer/iterative_sft_config.py,sha256=DgppfN5EPOY9qGXsx5QLC5CdQdVbZnYpHrcS8uJIrmM,4701
trl/trainer/iterative_sft_trainer.py,sha256=PvvIJJV2soOEMgJlxWA7nJpU3jUkHYmUYM0QxylQSng,21250
trl/trainer/judges.py,sha256=jVFXZqXdxe4cnLkgUiiMX4RgN1UVpbdbrA-4tklYT-g,19572
trl/trainer/kto_config.py,sha256=6GI72tTb-j-ibctOa0OCHA4U8VC-scndcNG1cybjoZ4,11418
trl/trainer/kto_trainer.py,sha256=BhjwuA69mrVhymrBIVGXGsJ4NjX8pnb_4GAUh-JANQc,82609
trl/trainer/model_config.py,sha256=5TKHcmj_6otQ8npA62R44-KWGV-kZVOIY-9ktKX7OWk,8959
trl/trainer/nash_md_config.py,sha256=T3004FbQP7A9mir_I-XvW9GkzoykRL4U_bMg52NLVFE,1844
trl/trainer/nash_md_trainer.py,sha256=8h_b192-v8faUCESzCiayx4avg9c7X7_iLDHLymTNco,24245
trl/trainer/online_dpo_config.py,sha256=a-AjonGxQ4G3Ft7YcXc1MLRcBNqUWJ_5BG4-4ne1HRk,10313
trl/trainer/online_dpo_trainer.py,sha256=AY5cKgHauGiE1Z5tkMBEe-nKQcQfLpOtPLeadQnLWpk,39803
trl/trainer/orpo_config.py,sha256=NmKNplPgUx-9mCdRHEQtSaVFQHu0SAC64bvWGY9P_Xg,7834
trl/trainer/orpo_trainer.py,sha256=aOp1Gh3iUmrdL0_ekHltB-F5hxRt0xd4JkdUiqzJJOo,51300
trl/trainer/ppo_config.py,sha256=-_wtzoOV5FfUJOk1SERIosd3oPfkbkCGlOjpMm5jDUU,6081
trl/trainer/ppo_trainer.py,sha256=qAZAWt_OAUtwI-1AB5Qmlu9I2y4QVzedY9G-6m3E5A8,40280
trl/trainer/prm_config.py,sha256=Y6PIIQJeCvJajxyDzMpTI9seG8aNJ7RnacquQfK8VxM,5388
trl/trainer/prm_trainer.py,sha256=nbXyBd_p-W5DckdvhXSTRzbH04ACvkslqi_X3itR0l0,15559
trl/trainer/reward_config.py,sha256=BBXiY3pV6PdpY0I1OhJOuJ3a4vTj21cQfXUi3T8msVA,5302
trl/trainer/reward_trainer.py,sha256=JTvilVsY91ZJgYY5PgBsR1bneiW96N2R5RUG4E8Tp0k,18130
trl/trainer/rloo_config.py,sha256=LnLp675umRvNtxGn0UWTjh3ZI-fYUaQZcvuWFImbh50,34856
trl/trainer/rloo_trainer.py,sha256=n8uHs8dT2mEH8HfLdwEgfanCvsWlwSJ4WYVVShNnqKU,82426
trl/trainer/sft_config.py,sha256=H3e1WkypErgWOaZO5ZXl3puY-E_UxNUvJWTQR3Tr9bI,13406
trl/trainer/sft_trainer.py,sha256=tVB04CNdjzxy3uwc5RIKOVhQ9ChXvl_dbKzQYfPbjI0,63036
trl/trainer/utils.py,sha256=FxLYENeL2RuUxkYuE_9qDb3V99IeQcHiiYPQFZ9cFLM,76388
trl/trainer/xpo_config.py,sha256=nPzvCvvJpEWVBNnq-vv5CPpyTVZkQ47ZucVh_87fOPE,1667
trl/trainer/xpo_trainer.py,sha256=urLhcrjCStBTrDGosw97hwWcr0gRdEQHrgZejNBB_Wc,26647
trl-0.22.1.dist-info/licenses/LICENSE,sha256=HZDsv3u4D_J-9nt3hfYhSAtlK4LkbHLYpXMLDbZb81U,11355
trl-0.22.1.dist-info/METADATA,sha256=jFHONiKY3kN5efIQocURUaXmRrw27z4RwikJsFD6OHg,11638
trl-0.22.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
trl-0.22.1.dist-info/entry_points.txt,sha256=EiubuQrZSGOohRB4xVoiVAyWsPsOHPSVpbzSk8YB0x0,37
trl-0.22.1.dist-info/top_level.txt,sha256=lphUqoPSFELZEDepHSNwOoZdSmPcI9EVdsv2QWHV9QM,21
trl-0.22.1.dist-info/RECORD,,
