nanotron/__init__.py,sha256=Myupfp18EVZacE4Tp7qYqYpimE8TWJJr-zQjrBwIJiA,20
nanotron/constants.py,sha256=DB5cO2VtO5isOZ3aE7m4ilF3Vc-U-3YhDTYRduvBMbs,146
nanotron/dataloader.py,sha256=dMUaJAwbo6-KPZ9xtBhmbjgyVhdSZ4j7AZe0bIR0ZVk,22965
nanotron/distributed.py,sha256=feR5LHSSPBl7vATg1Hj8dVLQcsQYd9hbVcPu-RO5q9s,10600
nanotron/helpers.py,sha256=kNEEW5sv6qHDuGBlL476pmgQHfsd2c5g2Ri9YnxvmCI,20344
nanotron/logging.py,sha256=R3dqFDNZkNmQUQCOaowECVk5LUwOO6XXgXV8MPwrc8c,9652
nanotron/random.py,sha256=dOJ0yGmB3BiY3cHDQDul7W8e7QT4zceolocOya7XC78,4700
nanotron/sanity_checks.py,sha256=nFMTsJV-_lfFOlMuARHk3Mq__bJXMOtJR2WKbrF95oA,9805
nanotron/trainer.py,sha256=34pOAXEBySwCobmGF_E_mG86v1pcG8arSVbM6L3o2PY,34830
nanotron/utils.py,sha256=2t8NrDWTr6DSMew1nPVQ80nQyrvQU2usLOKOWyhdwMc,5779
nanotron/config/__init__.py,sha256=Xhj3aLhAazqW0vr3Ve7p7tcVXl4A09Zm-Nf8aPwu7TA,186
nanotron/config/config.py,sha256=v-4JkLzooNJA-4kbgNlxSArZfhqx6JtkRdwqP8Oxro0,13944
nanotron/config/lighteval_config.py,sha256=BCwDpDucUs-rMFTT5g2MnAyXdQtOfKjCmKIqc7q5W8w,2933
nanotron/config/models_config.py,sha256=OoyDkF5mYTdmtsXBmBUYkOdjEq8ex-nmPK1SjXPfujc,3543
nanotron/config/parallelism_config.py,sha256=KCXiz-4PfBMF-V6eY69psEj-mnkx0F8gD736aMq1RTc,2088
nanotron/config/utils_config.py,sha256=R0L0tiyJuoKnKY7ojh2Bv6DP2rQyjyuvQH4kYiMoVj4,3612
nanotron/fp8/__init__.py,sha256=kNNaMxeLtqdSPLBFUztvdnSdLpqvDgwiO87qLQwdIBw,419
nanotron/fp8/constants.py,sha256=Meby-gLCxOApcMDX5uPoU4U4y19FuZEa2dgdjv9Rkao,441
nanotron/fp8/dtypes.py,sha256=Tov3hkiaao1KDCo83WC7YUXHaypAXqSD26r43_uxkIo,114
nanotron/fp8/kernel.py,sha256=3_4owTaLnOPeimRJycDDK3ikcoXN3nJ1nOfeWGM2OcE,2057
nanotron/fp8/linear.py,sha256=zG4P_M6K2to5hjUcr4AlcxhAH-0WiIn7gLv4eykgaUQ,4781
nanotron/fp8/meta.py,sha256=V5VwZUdygJ5jHPjx_N1cmSdZ8up91zv9YBTJzYek6QQ,1198
nanotron/fp8/parameter.py,sha256=ogIwi-iBdDBUMUYyRmZdgmGEau2XyY4eQ3aXiyCXv5k,1632
nanotron/fp8/tensor.py,sha256=AGaTMinofW9f6SB-qS7o-aXdwuVrWTz86Q92wIO6NTk,4699
nanotron/fp8/utils.py,sha256=RaX1qjTdfuGESoJ2KmD5etIanAYcfj2a_-2l6gtHAbA,421
nanotron/generation/__init__.py,sha256=OarEPG2G_RDEsgepHESjnCEnXNm9LwBlnsCI4KzO1cA,198
nanotron/generation/decode.py,sha256=rvxYW8X88-KsRBnRjGdRHM-kYPsu0YQTNjMBN_H8Y_E,40888
nanotron/generation/generate_store.py,sha256=liHDVxfr_aiIDC0tEhs1torP2DXLIL_DjY5ADSXBxJc,1550
nanotron/generation/sampler.py,sha256=wVKRWEnXEwI8cj7VwlmROcVwzfJOinGff3IkqyJ--g0,13758
nanotron/models/__init__.py,sha256=EDAZQjt3yp1aYMV7c25VyuelEvQp-1wiB1xT6oFAj04,130
nanotron/models/base.py,sha256=IgbiodIf1q4e1r5nqHVqbCUrhJsXYTyuGDr87KaBfGI,11503
nanotron/models/llama.py,sha256=eYWeiSoCV_A2I2TaLZSF9tDHhjGa_tJNB9onsWRfmJo,51011
nanotron/models/starcoder2.py,sha256=jOQ13kUql_lIXua9A8S_4ii8i6nKhJhxy6nXlijrvkw,78488
nanotron/nn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nanotron/nn/activations.py,sha256=nIlj34cD9i16eOFBNIMqi0UjBbAhQBjXN4iEw6PZ0_M,8784
nanotron/nn/layer_norm.py,sha256=km-25m4KWL7ayel8cJEi6l1yDFstdu-CE_CJBGa7RHg,1568
nanotron/optim/__init__.py,sha256=JCYDRwV2S87-csom079HbKOBHFxCI5vuWCESgepTCN0,502
nanotron/optim/base.py,sha256=2-7sm5qmXJIjH7Z0FQ9T3pIPheViJhlKQcW1WVa2Tio,1087
nanotron/optim/clip_grads.py,sha256=9LzUWr0AJcH5FmTE1FeSb7wXvox4vY2RArbcklLEHWk,4147
nanotron/optim/gradient_accumulator.py,sha256=JGwhs9G_K4Rb9_GgTWBsSTvd59IdSYtsKGnVvEQyBOw,16937
nanotron/optim/inherit_from_other_optimizer.py,sha256=KwKXORwFP4nqwAE8vJllO-7HmGLn5kvTe-jnbJSm3Lc,1754
nanotron/optim/named_optimizer.py,sha256=1PTGewa4fVJWg56Tz89GbtvU1BHvlPnrPWEpMbIyfkI,2936
nanotron/optim/optimizer_from_gradient_accumulator.py,sha256=VLuO-jQUlMn9iYGzBBb0F1J3Vqi1MuBo6V2858zzNgo,3104
nanotron/optim/zero.py,sha256=0AB14nJI81IRetox-f1fy20g4bT1JIth_ky2Gr_94gM,23065
nanotron/parallel/__init__.py,sha256=s-UMh8WBjiAfSbxRgVHHL00EKeJN1eeWVXWCq0jsSX8,69
nanotron/parallel/context.py,sha256=h49jMmffnlTpwM-9Z7tNLsL3mvrxsnmpjCiLJhEbWvI,4955
nanotron/parallel/parameters.py,sha256=XGbPecgYyooboR8_f7uCtGZn_fl9XyRETemPOIPnmx0,7767
nanotron/parallel/sharded_parameters.py,sha256=rsMbn8OlIn_YZ1IEMxNrMTM7Q1G3UVBiZZqYWrPe9DI,5157
nanotron/parallel/tied_parameters.py,sha256=yVpYZah2asV7RBBG8Dl50KGeGnRReI4nh2NPw4F5e94,6551
nanotron/parallel/utils.py,sha256=PtTehpauzJ7HdZBDJY_qYoMZoT6XhuULprj1KQxS74w,1247
nanotron/parallel/data_parallel/utils.py,sha256=BRwMZiePda9lIxAb9U2ovQSgCyy3EX4IH7FqTSSzmp0,1968
nanotron/parallel/pipeline_parallel/block.py,sha256=UBryA5H0E3_Zj3mzQmp-nUnyd0jbnl5h9Yr0xYP33OM,8728
nanotron/parallel/pipeline_parallel/context_manager.py,sha256=7NQweGnxETknCYuCS4T-Pp-wLMkvZT6q3GVd-az7X7I,1013
nanotron/parallel/pipeline_parallel/engine.py,sha256=rw0Mdbk96WeuaOX0awu8Uo99NwJlAlHtp15ytjGVqmc,14227
nanotron/parallel/pipeline_parallel/functional.py,sha256=gwzVeYypQskKM17EFDIoMBAB9VliF-XDZA_n_NJSWWY,4190
nanotron/parallel/pipeline_parallel/p2p.py,sha256=vCf4Ta0fIah2udntB2Vzwz9GV_RYMekxMjKgZOf8nXM,20171
nanotron/parallel/pipeline_parallel/state.py,sha256=xmynF8uyAS5r6g1uYHS7JOvg_LMYWDiv1kDxuXB4VFI,12439
nanotron/parallel/pipeline_parallel/tensor_pointer.py,sha256=CgNxMlsCUCeB9HjkTf_ef3GriocP4PCw8jmp6UjBeo4,452
nanotron/parallel/pipeline_parallel/utils.py,sha256=QgoWFN4Gcf5Umn3wz4vAzo4GS5loM8SGs-TviO4CDxk,1407
nanotron/parallel/tensor_parallel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py,sha256=4-Ug_bnpyFhx8sYw-fxNQv71DkHtlw36RSuzGX_dJqY,4796
nanotron/parallel/tensor_parallel/enum.py,sha256=7CVJV5B-4s5Mt7mOCn4fDzMiA6HXoTFlkTNvZvStzCo,206
nanotron/parallel/tensor_parallel/functional.py,sha256=F_qhBXRcfHS0oY-FnfTOAYeXdS7-y0TKAqiPkVky4wA,20601
nanotron/parallel/tensor_parallel/nn.py,sha256=Ffda-gg3MMKGZlOIA1c2kQPe7V9_Kz74PjpBGMqADlQ,10295
nanotron/serialize/__init__.py,sha256=nlZfihiwtvm5S6GFXWMrhIojHLj-XR1omtY2SgYW2eA,136
nanotron/serialize/main.py,sha256=fNGCLhGO3rwP4soE_iC0x9k_QVLFN3lSz0VKrDKWtlw,10529
nanotron/serialize/metadata.py,sha256=r3n_7usMRW0IasgojyXTvyrYcoTEkmaQl8gJMni6eFU,4182
nanotron/serialize/optimizer.py,sha256=bMM7G7Na83M2pT51DOTXTCywjVSo0jHwel_ptu1Ua-I,17066
nanotron/serialize/random.py,sha256=lcGnjuZCk6xbtr9-5FN77XOrUHuUqL3ugq0ADAjzy4k,1551
nanotron/serialize/utils.py,sha256=BS0mXu8qErcc8cNQnR62nB60wZp69suOUCfPvZTAk2o,2382
nanotron/serialize/weights.py,sha256=OXagwN8gYSTjkL0w4LOJdrGLPGdd0SPeX8MuWdvhniM,16481
nanotron-0.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
nanotron-0.2.dist-info/METADATA,sha256=0uSTRGCLHZmcte24vZIYghPNZh3xHyi_ixADxuG0VFA,4255
nanotron-0.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
nanotron-0.2.dist-info/top_level.txt,sha256=4LTLLbCZoHkoAVCQ6jIC5qUFyQKoUjWJYGW23pXW40w,9
nanotron-0.2.dist-info/RECORD,,
