examples/blend_and_shuffle.py,sha256=d4xSfXJSgWQc7WITQFLp1iiwrDpKG876cJM0eeDsAuU,1696
examples/classifier_filtering.py,sha256=44jDsyMv9Fo2wFhpcr1C7zAzOkSojIVQs68I7wCUJnY,3424
examples/download_arxiv.py,sha256=TciJ7CRkwR7QJ09eehyfUTSVSzyafXrt6-4i-2-okS8,1479
examples/download_common_crawl.py,sha256=wDY1lbFPDk3O6bHJb9pVbTY-XOu4Jy287_7qMVu7LN0,1618
examples/download_wikipedia.py,sha256=kvRsoL47jAii7PlkNCkMmBSi6Ez22Bj4RlbUD0NGRLw,1561
examples/exact_deduplication.py,sha256=6hyvcv6lgKzC98mPabcsF9c599N_axB7DMhcLfegjok,2886
examples/find_pii_and_deidentify.py,sha256=Uj2RR_pbkeWcSpK2maMc5Sfr71ENIz4IET93Ju6YL-o,1766
examples/fuzzy_deduplication.py,sha256=9Lzyfa63r4U3-PaDm-iI1N4upwT4Gr8M2gTXvkscngw,3727
examples/identify_languages.py,sha256=NeWaZSZqjDC8KWAnx4fU1MgKk7IdTs_XSK072zKC73Q,2684
examples/raw_download_common_crawl.py,sha256=4g9Hmta8S4098lK449Tjqex30mqPAyxEFDKMfWWiizg,2008
examples/semdedup_example.py,sha256=MD9GG3PtHV-998OaymaaQPoZefmUrBV-Z42R_KmEM8k,2592
examples/task_decontamination.py,sha256=6pVsJLlswtveltjvGt1b2DxIbr6qwH7qM8ArfZaQk1Y,2641
examples/translation_example.py,sha256=9mgq9fYwtolsBUuTXSHLwMc4Cu3Qh7mqv6MMHv2ibUU,13934
examples/classifiers/aegis_example.py,sha256=ypphdtrW2svhGWDZb81ZW5ztSqGqcYhZtQUkUa0K0vY,2189
examples/classifiers/content_type_example.py,sha256=M4uhR1mXpWOEgzzgBbIzT303b5beXYOs0uVPB2w47u4,2010
examples/classifiers/domain_example.py,sha256=bk6DG-V2GRUvUaErEcVAv3c8LpY56f1LhcvEFJlEqzc,1984
examples/classifiers/fineweb_edu_example.py,sha256=oqjGgRSnaQUmCQvRKvESmDROlwAFQ3szD1_Ukl9COKo,1977
examples/classifiers/fineweb_mixtral_edu_example.py,sha256=wa0OlvcEEgxWqFPMgqYobwC8DPbxAmvT_oe6AV4ps70,2015
examples/classifiers/fineweb_nemotron_edu_example.py,sha256=3xG1gMFeJRdcsFsAeSwSCXqUDlBXxVzM_Sd5VDyfNT0,2022
examples/classifiers/instruction_data_guard_example.py,sha256=GIx9HBlBEbNFWBOrEB1B4H8iVVWG8MDImzVyt_-EufQ,2300
examples/classifiers/multilingual_domain_example.py,sha256=v6gFenMIWEKXdeHa6WwSihU7SElypMidpKUEAR8CC0k,2061
examples/classifiers/prompt_task_complexity_example.py,sha256=izH2CcrAUztAL-MYq4acB-HY2-QHVyLcKKjMi91LIUQ,2035
examples/classifiers/quality_example.py,sha256=qyaXacq-S72FbUn4mdz8MBgcM_uJCISQdg0STqfIjDo,1988
examples/k8s/create_dask_cluster.py,sha256=4x8r0uI4gakvMFNF3uUqa9SR9GmsBl_TN9ZbrP4pd14,4238
examples/nemo_run/launch_slurm.py,sha256=Cn5y7DpqTEB9wfv9an-9TkAnDdBEVnhQgFiMi-iMXB0,2009
nemo_curator/__init__.py,sha256=DbfJ8agEW9W3CYvDAmbWhPJiJZYMydF_niTFT5aIkQk,1618
nemo_curator/_compat.py,sha256=lGGedH4xcy7okZZt2MuUjVtwgsR9OPSn1GJJWJCxLxM,2537
nemo_curator/log.py,sha256=RoRApb4jGYC2TaIWZy8SnYi4v7CO6r31FYBCO15jCss,2937
nemo_curator/package_info.py,sha256=BExWf-O7Qhqh7DQhtoUCIi0wb20wm5GaRLpFCR3zXNY,1647
nemo_curator/classifiers/__init__.py,sha256=PYeRD6_xEp4Ne7VUTEg3mm1_5I5if9mBcRf7m7RiwRI,1400
nemo_curator/classifiers/aegis.py,sha256=jY0qTsITiXZ0Nyvgjr_KafwkH8n6vNH6H-I6RAEIg6U,20253
nemo_curator/classifiers/base.py,sha256=AJXmYP0MeSmbESy1XdGl7_0rrRQOaQY6_ZTmb5ih3HA,5038
nemo_curator/classifiers/content_type.py,sha256=3maiOLXfXyt-pWeslVTBy14eATL8pVl5faeFmOzaEY8,5569
nemo_curator/classifiers/domain.py,sha256=tlSJWNYfBoD63ykGcjYEa84yCW57EGGvty5vqsvH5jY,9411
nemo_curator/classifiers/fineweb_edu.py,sha256=XsdjNh3uqXNmk7SdVI7ULqML0viDExIgJ6H7fcoaWO8,13271
nemo_curator/classifiers/prompt_task_complexity.py,sha256=U0UEIQYgjLtcZ-CGnwlU9zV5GpYLozlMhTXP-OrEu8c,13549
nemo_curator/classifiers/quality.py,sha256=VZjgy5wobX7VYllkDpOe1Ql7bhualUqhK151ZgBP0hQ,5182
nemo_curator/datasets/__init__.py,sha256=RKN_DespHiwOSlBQ3-fsILTsnZKvnWHoEylq4iaSjek,966
nemo_curator/datasets/doc_dataset.py,sha256=KjnvULIHx1y6TZd0Ec5i3Qeh-34zqI2A_xaWsAy8pBA,13747
nemo_curator/datasets/image_text_pair_dataset.py,sha256=FLnmkN7UzwaPHDxXTiezhACrj983ZkaT410y0ELN2qI,11984
nemo_curator/datasets/parallel_dataset.py,sha256=-_TwixZc2xkH0LRKN0PXVPD-kjK06fx2jF31uJ-Ra-k,6652
nemo_curator/download/__init__.py,sha256=9lBlOthqw-Qd1P93Hn1VmjAfM57PStMfQ8teCf-V-Sk,1887
nemo_curator/download/arxiv.py,sha256=JuMl8WAWq8traWH1ZYyhZBWzUOyc6IanWzGkowUOYAg,15558
nemo_curator/download/commoncrawl.py,sha256=J83pjiAjMSYGNVCT0JYZydbBZn980yTixz2P_IKQr2I,15761
nemo_curator/download/doc_builder.py,sha256=BUoF89qUl0ucDEeuqAdQZ2gQq8CXhl7Bb0jxrso2Ygo,7666
nemo_curator/download/thai_stopwords.py,sha256=oIf0Y2NK9Ig1oSsk6rUcVb4ZHgYPWrtyT_PHf5akIws,2715
nemo_curator/download/wikipedia.py,sha256=_-EXJonq2W9MEisXwJ7c7ApG28P02jlafpQrJsodBss,31228
nemo_curator/filters/__init__.py,sha256=aWrZPCgUhkOngN2DD6J26dmk4PmC8_EFWRD5niZSKKw,2948
nemo_curator/filters/bitext_filter.py,sha256=HSaC1M9Dg3bYuUriEPQzjT6cwq3hfmWRtaqM6zVvA-U,6205
nemo_curator/filters/classifier_filter.py,sha256=PbLd6dcSRaAXkjDmORoYJPEmdwN5u6twpC-4Qu8pLZA,10080
nemo_curator/filters/code.py,sha256=w7cXk_7EGCpsEkuQLq34hRrLaKVrbZ_46rgCQ8tya7A,10645
nemo_curator/filters/doc_filter.py,sha256=qmz_pIivt4SDs515s8AA9kXl5dJWM_Ur7qLHElCaOYM,4759
nemo_curator/filters/heuristic_filter.py,sha256=QHtoAJnksJ0FDoxgn2Q8jAzp9enATwdUChIPyGf03i4,29975
nemo_curator/filters/synthetic.py,sha256=BNJtk983O9WvWR8F72jYZvwng50eh9wAb1XRfrVl_fo,7709
nemo_curator/filters/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/filters/models/qe_models.py,sha256=oTK9Ug7e3zC_G0LVIqwYyXcms_YWv5AjouhrYZxfj2M,6831
nemo_curator/image/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/image/classifiers/__init__.py,sha256=hzZMZAKaYrifBfy067gYW0zym2onTKAA4BAhS1EFKz8,792
nemo_curator/image/classifiers/aesthetic.py,sha256=fIHd522jT0aXey9Q-sQeaikhQckdJ1Dmd4aTTn9dBpw,4522
nemo_curator/image/classifiers/base.py,sha256=tT6hqFA6jf6DQug6LqnbDTeSYOuemU_SL3Og05pjVaU,5818
nemo_curator/image/classifiers/nsfw.py,sha256=ObhNpGzB0x6w4M49VL2tnxkBBX5MT3EKF4Nuw-Hj590,4940
nemo_curator/image/embedders/__init__.py,sha256=PJJYO1NFEqffgd2lMU4I84tJYXp8Px-flCP7-aQvnCw,728
nemo_curator/image/embedders/base.py,sha256=RhnwpZuLoqohzPi15_1YNucKcWGm9m67Uz1LCRgzhKg,7516
nemo_curator/image/embedders/timm.py,sha256=Sn8gRCZ-rrBlKU7rvisnWTPWNmTbZ98CXey0hqMZQ9s,8309
nemo_curator/modifiers/__init__.py,sha256=21LvMSj2EKcw5CqiuaAZqELijap3JeEUFhujyWtLDHs,1344
nemo_curator/modifiers/c4.py,sha256=U25SZoeVOcbMRJ4kRsb8JsI8S6IQv1bBt--Q3MfdqLk,3273
nemo_curator/modifiers/doc_modifier.py,sha256=GWnBE_8jdrs_G9N9z_N5FbYEUwcvpL5Tt0biCnVvH_c,1312
nemo_curator/modifiers/fasttext.py,sha256=aUg7jTcS1OvG_BKG1PUexzc0JcxEF4jDgeGoy0yx8XI,938
nemo_curator/modifiers/line_remover.py,sha256=tWkWVymsvLGQk5FzB2joWqgVOZ1M7WkNNOKzOWfvdRM,1223
nemo_curator/modifiers/markdown_remover.py,sha256=ztrAFPbzUPF-jtyJZnF9UVsYm7IvgnIhEGDqBM-A4X0,1536
nemo_curator/modifiers/newline_normalizer.py,sha256=7VI0VD_gFu00LUm9RRtKWjC6eQgd2YlQRHf8tqb2O8s,1187
nemo_curator/modifiers/pii_modifier.py,sha256=zZbsc3BkZjSuw-x3VMYUci7CSmdQ2YPjIC_z-fMWeck,3728
nemo_curator/modifiers/quotation_remover.py,sha256=9MadGemGAe5jQvcEdPosPU5ZP3wyFh91L5-MCjviKRo,1564
nemo_curator/modifiers/slicer.py,sha256=Rxx7UklgS16xjCIhEX_l24v-u4jD8MqRJPWU1VAH1YU,3453
nemo_curator/modifiers/unicode_reformatter.py,sha256=nz-vfzMqcOHpoCY6KSkwQ1nLmhA1UxOBGb0B7wgeYHE,846
nemo_curator/modifiers/url_remover.py,sha256=uXfRPg422648aOt5jp0wg8gcMpxyTiIzJZIghXq-_kI,963
nemo_curator/modules/__init__.py,sha256=WvVl6N1WFbFS6Ax9xT_dP7c_d8Zim53V9fY7KJgYjjs,3373
nemo_curator/modules/add_id.py,sha256=CNyA7BCgIOkqCeXdTvXGh8NsTZ_DfopSEnvBAUf5an4,3701
nemo_curator/modules/base.py,sha256=KR139qRjdHCKhMNj0h7PMIt9SDlXMsZBzSho3l8Wx3I,2929
nemo_curator/modules/config.py,sha256=EKM5S5M2_PnEXDJ0CqbI7F897CadN5tmnhe4H_uqqf8,8807
nemo_curator/modules/dataset_ops.py,sha256=OWWWo3IqlnktmMaQfaeyMa1hAo1nhZBtwhJI4GQigMc,7421
nemo_curator/modules/exact_dedup.py,sha256=6YRtX2RrtLqfQh2pIT6aNUapceqPWQe5bwRLIpRj-58,7920
nemo_curator/modules/filter.py,sha256=YyfNTD3S6eAE2Xbq6Z4tFYOQ8f9NKzI4mAC7vw8q_DU,11286
nemo_curator/modules/joiner.py,sha256=C5KJtmwraXAZAqDl6pKGGBuAD-DiLw83cT8hTpOG4A0,7541
nemo_curator/modules/meta.py,sha256=9q-4cd1pZQm4T5D-U_oydoBtavJDxR_ssAZy32KS0pI,825
nemo_curator/modules/modify.py,sha256=TerYNo6sQPhNa8F9H6JW5qGqOo08D5kmrF9fwnAAv5Y,1519
nemo_curator/modules/splitter.py,sha256=vNWpkrkREYntSukazJ76KNlKETL69dWNN7aASmWR6gE,3133
nemo_curator/modules/task.py,sha256=GHIHBN_UvT7_loDMO6pd4VlSUd8V4xf-XoVF6BEXsno,18872
nemo_curator/modules/to_backend.py,sha256=22ZYsYljgR2nyazHZmsdidGTtzb0PGpH5hKkfik5Rm0,1480
nemo_curator/modules/fuzzy_dedup/_mapbuckets.py,sha256=-xBWNZnupmRpcS6lgHm-U6WUQz2K0TI9j_daPl_frYE,10539
nemo_curator/modules/fuzzy_dedup/_shuffle.py,sha256=J1T1xmjvnnbUg6csW68Xl6930MvK9yoCBJXj11CM-zg,11105
nemo_curator/modules/fuzzy_dedup/bucketstoedges.py,sha256=IxnoDPjqwhyRwPwT9IzSDjNV_GzLCSLb_KCaiC-jGFw,5893
nemo_curator/modules/fuzzy_dedup/connectedcomponents.py,sha256=kVL4KB9XdQ9uM8VK3akbMvdtdxmQ8qzSpqJazU8BlPA,11471
nemo_curator/modules/fuzzy_dedup/fuzzyduplicates.py,sha256=H0dVn3yOfRIKTDGmpAqD5MShzrA8TlXm_l3WvREojAs,11410
nemo_curator/modules/fuzzy_dedup/jaccardsimilarity.py,sha256=uQaXcl4Vhu-1ZfRLmf0Ikws6ahiZHvMX4aEsc05wnVI,7679
nemo_curator/modules/fuzzy_dedup/lsh.py,sha256=eTcjWhagYzTgGKfEzW39u8fjx40R3gDBsjuyTv4AiSU,10550
nemo_curator/modules/fuzzy_dedup/minhash.py,sha256=vqu6ZewNbb_tcOWe41u9985GbSZGW4F7MzCkNn5HwfM,8622
nemo_curator/modules/semantic_dedup/clusteringmodel.py,sha256=6Jo8bowkcMVkxhuCmnfpqNT3uQIl2_CdO3iI3nYiB4Y,8920
nemo_curator/modules/semantic_dedup/embeddings.py,sha256=Aubrd3F8WA8-VBn9q1q4W60LyK9iOJuF2NxdwJHBmhw,10442
nemo_curator/modules/semantic_dedup/semanticclusterleveldedup.py,sha256=4VuR22qw8U6_nJZRE_z8Zf_oGJs9TRzPZ285p6Qv2jY,6935
nemo_curator/modules/semantic_dedup/semdedup.py,sha256=wNc8HzUMe63T1FvedMON1tYNp5qOQBEIPf0AOnsxrP4,3971
nemo_curator/nemo_run/__init__.py,sha256=SsipNxmMuUKT-P22M7z3i2K10gSybbAuHklWQgxl9Zs,675
nemo_curator/nemo_run/slurm.py,sha256=uIJS3o7kGGBDUolXrJK_ucnK6-oB5Qd-7FVxXh0VYPU,5298
nemo_curator/pii/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/pii/algorithm.py,sha256=WzyCn7S5UE1_0VmmSdtcEY5yS7ase3NvZDPR134sa_E,9845
nemo_curator/pii/constants.py,sha256=FhrU0UmUUHE6LIxDNAP4WQuY3Dl48d0bdBwr5sM-TVg,369
nemo_curator/pii/custom_batch_analyzer_engine.py,sha256=bc5EmJ5BZs07C6Y2Z-rlX6FagC1R29hjxDyvisrbsFY,6668
nemo_curator/pii/custom_nlp_engine.py,sha256=1ZPR8gxCskO7pajgSFgeOz_eAERJjNV12V1Oqqz9rJ8,2686
nemo_curator/pii/recognizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/pii/recognizers/address_recognizer.py,sha256=Pc_lSuT-J5asHQQ9So5GW-2yoSDoIqoQzSsjtkyg9uY,1828
nemo_curator/scripts/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/scripts/add_id.py,sha256=VHd-FYP8FA9uAUJNoVYEqckXExs18rdnQI1Rru_yiv0,4093
nemo_curator/scripts/blend_datasets.py,sha256=ARlamjhilcFgwGW_NJsEXgfO2AOGMIaE245M6P_uTnE,3718
nemo_curator/scripts/download_and_extract.py,sha256=9KDCAWTLhUY3NzW6K9SOGFCsGZZmUnR44AY9qNQp2GE,5819
nemo_curator/scripts/filter_documents.py,sha256=XPjTCRvLB8bhRsZteFc1f0eMj2_WS5Kw4INdHT7ZcGA,11064
nemo_curator/scripts/find_exact_duplicates.py,sha256=P0CGlC35z74NVbD1Fa3Y4CCgip_Zd5UrGvaohyaz1Vw,4065
nemo_curator/scripts/find_matching_ngrams.py,sha256=Z8lPn1_eGOwEupgY2-NWsbpI8C_XXfCQSZSqkPviVvA,3341
nemo_curator/scripts/find_pii_and_deidentify.py,sha256=ATwyeuSzge4S6cRF9dbSSi_X6_D9IB9LSAbiTxL0WD4,5551
nemo_curator/scripts/get_common_crawl_urls.py,sha256=-RYZNH08UTDn8ZGU0JElOuiZrh-PXjqD_JKL4FcFYI4,3531
nemo_curator/scripts/get_wikipedia_urls.py,sha256=fs2-F59KLyXyIvmW7syYFzFSVR4KGD50o4eG8qyfncA,1866
nemo_curator/scripts/make_data_shards.py,sha256=3-DnW_6_c5F52JQnBwKEC1SEcE9DZBhdTi0ELSHF_G4,2587
nemo_curator/scripts/prepare_fasttext_training_data.py,sha256=-GmYWM0K5Xn7LzQw5xUlv_AxmV_NCG1Gh8JYTzL2bv8,3528
nemo_curator/scripts/prepare_task_data.py,sha256=ZuPmS51xFYPCq3UeMx4Al23iqmaxGHIZkpRnHAALXWc,3035
nemo_curator/scripts/remove_matching_ngrams.py,sha256=lh2eCu7W5Zw0MGbSYNELjQ3RtygQ8EuJdO-I4j0MkQg,5309
nemo_curator/scripts/separate_by_metadata.py,sha256=mG3imMv0wNB4U_F-_89lNTpLbKjjzJDbxzBNpKYIRvg,4698
nemo_curator/scripts/text_cleaning.py,sha256=QAgBwVeujtx5FgsTupWT0UQk1XOlNk2CTK1_t9Ia3_Q,3614
nemo_curator/scripts/train_fasttext.py,sha256=w-jI921mBu6RSzS-I82YcHowdbh_vxnGIYBZIAjCr_k,6418
nemo_curator/scripts/verify_classification_results.py,sha256=tgLf_OV-hsV4Io3b4aMaNlded-1uClhLq1zVumdUoBo,6650
nemo_curator/scripts/classifiers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/classifiers/aegis_classifier_inference.py,sha256=1SKQYflfRLkaIVWqqg7oRCxldOidhYosjkLB13Jj1Po,4375
nemo_curator/scripts/classifiers/content_type_classifier_inference.py,sha256=j7zAkqUY9_65ddgwxngKSWtPAeK85DOLhpUNrdUVCMU,3729
nemo_curator/scripts/classifiers/domain_classifier_inference.py,sha256=gbp6urk2wQ7JTXaUuE6WM7sl7zi6nxWw0PVY28Oy3U8,3689
nemo_curator/scripts/classifiers/fineweb_edu_classifier_inference.py,sha256=0SLTA9q46zeou5WcQJbrPMIL8bR722f2GUrHSkKAB8g,3723
nemo_curator/scripts/classifiers/fineweb_mixtral_edu_classifier_inference.py,sha256=G56wcjYys8cB01VPotcS-6DIKN2eRNPjrAYT1vVrzh0,3777
nemo_curator/scripts/classifiers/fineweb_nemotron_edu_classifier_inference.py,sha256=zfO9nIPOyN4gF5nSnnM2kF7y6COtwJZLdAa_93K3Tac,3790
nemo_curator/scripts/classifiers/instruction_data_guard_classifier_inference.py,sha256=fek7-qtWtV_Ox1f2OXesD7ehbqp_ovh_I4SKMrd7-8E,4070
nemo_curator/scripts/classifiers/multilingual_domain_classifier_inference.py,sha256=tM_9dWh5zovI4rmHt8nsCieX9WIRNHIrwzv8i1nRMgU,3778
nemo_curator/scripts/classifiers/prompt_task_complexity_classifier_inference.py,sha256=wsuIPAgWyX831GA56jp7LAsj2Kl1JrvjWTDRt7_jBZM,3809
nemo_curator/scripts/classifiers/quality_classifier_inference.py,sha256=RpCv2AyJiYHhsRB-NVPXQKMnrdbK0EUwKyX7QqXvZBk,3692
nemo_curator/scripts/fuzzy_deduplication/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/fuzzy_deduplication/buckets_to_edges.py,sha256=qgBWnqVVLJMNtoXOUfCuqwi_DqkuqkHWwPa-z_59wHQ,2971
nemo_curator/scripts/fuzzy_deduplication/compute_minhashes.py,sha256=LE2NzD8d2BXIrVkGd2zeO3hXZ1JF7qlRr3vochD0ZGM,5457
nemo_curator/scripts/fuzzy_deduplication/connected_components.py,sha256=3yToZXgCWPR3uxksH87u4hB5kDTtGrGAN5-LVAcTOOU,2836
nemo_curator/scripts/fuzzy_deduplication/jaccard_compute.py,sha256=N0MoukF3b8sjdCsXbIQKhSqwfnEUI5fINhb3HeAk1aI,2899
nemo_curator/scripts/fuzzy_deduplication/jaccard_shuffle.py,sha256=KD4D-rDhnJRlFTgMOJKvfNbDs1Z6zlL8JPhBh-y--Tg,4216
nemo_curator/scripts/fuzzy_deduplication/map_buckets.py,sha256=r_qza_FasCnSs8nBBUj8yRfkK9GJgk_1Ws1ZjZAO0FM,5836
nemo_curator/scripts/fuzzy_deduplication/minhash_lsh.py,sha256=qP9Hs1o3GAUE7qdbPgKMz1rRyDGXjTEZzde_gV9w2uM,4173
nemo_curator/scripts/semdedup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/semdedup/clustering.py,sha256=sR81ImN92-xDEnabY1GRQFwTso0M7D0-1rOOZWB_MZw,3824
nemo_curator/scripts/semdedup/compute_embeddings.py,sha256=IaFqpy3ehE7I_gAkIhG5pz0SsAvkTmh_VSCmfNc7sNY,4950
nemo_curator/scripts/semdedup/extract_dedup_data.py,sha256=QybnV28i65QaukVszWDXwbc5pEF6C2WoVnum9h3Xdxw,3358
nemo_curator/services/__init__.py,sha256=B3D0AylSmuOOruKFvG_vqV78f9OuR52mDGTWa4npDoQ,973
nemo_curator/services/conversation_formatter.py,sha256=JRMOpMCMrCz2nN4BwHvcHigi_xJTrw6lo6MTlzGN2kQ,1022
nemo_curator/services/model_client.py,sha256=1OEgNbF9vgeso9d64G4cKiGL9ZfxU5Csue0hOOHwi_0,2994
nemo_curator/services/nemo_client.py,sha256=uVN8xhRVsKr7iD20xJ8j-ZfHWzlsOUSnIkvhf1xF0v4,3626
nemo_curator/services/openai_client.py,sha256=z4Kqvs1ItyvhnbPJJB-Idpvys6iNUyr5ai_IayjwQHw,6035
nemo_curator/synthetic/__init__.py,sha256=6swtfa9MuBm7iX80WZ675VJ_4Mk4FLBj-lXEIDtYQD0,3405
nemo_curator/synthetic/async_nemotron.py,sha256=Meg3F9-gTt3q0_n59dQoVMIJVsbhE2xdTDSe7sflroo,80632
nemo_curator/synthetic/async_nemotron_cc.py,sha256=s4kpbQOVjRIEIDEDr1EZfkrS6W6OyRzvGvYhWoWlnwc,8335
nemo_curator/synthetic/error.py,sha256=EWGJNnhanzilxsUrlMKOgcv2fJ_9YjunOFIpLpteOzQ,803
nemo_curator/synthetic/generator.py,sha256=Ej4DiBlqS369IeIwWvIUeGmQ9MYCGh0pRVFAlcAOoQQ,1242
nemo_curator/synthetic/mixtral.py,sha256=CUKR-bmD3qYayxtK08987A7AepAa5jbzvGfoltxzSZ4,1319
nemo_curator/synthetic/nemotron.py,sha256=m1S03olbsjYmNcv9XF5P2xUF5AtF_wk0IshAGtIgMzk,71704
nemo_curator/synthetic/nemotron_cc.py,sha256=vON5KoiQx72x1fYqEme8LPylDzEuGQgQFvqAwesU638,13476
nemo_curator/synthetic/no_format.py,sha256=TjJoa04f_VQTczgAYf67p5XnGB0oKATXFRQ6SXSYTAk,1194
nemo_curator/synthetic/prompts.py,sha256=vbyCYrU2c5FyzGVrZlcet-53mQILCcj7dMIPgqa39Hg,12936
nemo_curator/tasks/__init__.py,sha256=IfaRyarhNVd7icbFyiT_fBnWoY38iCcIqZoFR019Wis,1499
nemo_curator/tasks/downstream_task.py,sha256=krVCvrbuZy9ToNyImYXpxUl03hBCnPVlWcrOXWvCwPY,1968
nemo_curator/tasks/metrics.py,sha256=MgBEr9mC9KEeDRfmW32_k4a06f6ljWveyC9LBLtAG0Y,18930
nemo_curator/utils/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/aegis_utils.py,sha256=LEwVKVFFP7pzBciaWaAL5O1hzZ_yE4AI5ik7-DD70so,10140
nemo_curator/utils/config_utils.py,sha256=0_Uy16dEXqqTeqiQsBCRzD8E_wzMYOYvz0_jkC-ZGhU,3598
nemo_curator/utils/constants.py,sha256=Szr4RTF8hFFkTqz5dhh_IbTReZfBXFLlhyokggQ_bvI,3296
nemo_curator/utils/decorators.py,sha256=t0gGx4HhyEJntCk5XAAaoePmoahSyTNjhqtEpGeeHrY,864
nemo_curator/utils/distributed_utils.py,sha256=gXyiFLbaMZ2RUHn0XThoBABKWlzj6rpxNBzQ5d4hmos,44306
nemo_curator/utils/download_utils.py,sha256=i97IsKFyOCWrZa6JuZh84ugabIO9LHkKxIp7fzAR9ic,7458
nemo_curator/utils/duplicates_removal.py,sha256=UzE-v-MCtlvTtfQkCtdqVGN1JhKrQLQ43rbyyj7DUBk,2364
nemo_curator/utils/file_utils.py,sha256=pahv8S2-BwgOr-Gkui2FmQcPwavVwi8Dg6agZCBlNiU,15281
nemo_curator/utils/gpu_utils.py,sha256=3hZWSm_aQdxyIyOjf2vsuHo3QT9qzKNh63ZPzvuTohE,1113
nemo_curator/utils/import_utils.py,sha256=7hu0ztU42uvo_tpy_c2P5Yt6yiwD6jTIgS7GkBDMA1U,13261
nemo_curator/utils/module_utils.py,sha256=aaTOspYbMLpGby-gd-wcVKhVv_eBbtYpPd4mjMiRgSM,779
nemo_curator/utils/script_utils.py,sha256=oED7RCN42mwYgQv18EoSwmYG-aVoQIbeUHQ8lAQV-F0,20692
nemo_curator/utils/semdedup_utils.py,sha256=0kc4tXfA2chK_LNg8ryY8zXrmfqwaWYRlIZTsJ21uew,16635
nemo_curator/utils/text_utils.py,sha256=B1AJG4-DE-KB9HIlLAyRn3PVJchtrO40AibvZE18lZI,7125
nemo_curator/utils/fuzzy_dedup_utils/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/fuzzy_dedup_utils/id_mapping.py,sha256=05MwMpEDBycgB9rESP5yIZSbZDqAW4tb3t2zJLKSEtM,1862
nemo_curator/utils/fuzzy_dedup_utils/io_utils.py,sha256=b9ZwRyZ_R38nicDLSKkIG5yAxlqFfT5FNOejOqTD86s,6711
nemo_curator/utils/fuzzy_dedup_utils/merge_utils.py,sha256=HpOP6KaV1AJA5HR90QlGIypZcsSEebV_8lMO4VXFDBM,8027
nemo_curator/utils/fuzzy_dedup_utils/output_map_utils.py,sha256=FhmWlPCUZa9kZJdnSfGvbi6RQR0bg7uq0l9TzKaV9uQ,2906
nemo_curator/utils/fuzzy_dedup_utils/shuffle_utils.py,sha256=ZkcVElmeXmuF9ARK2as6fo1-tXjbp4F0u9JufGJyqVk,3986
nemo_curator/utils/image/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/image/transforms.py,sha256=piLOcV_iG48lCnfJkBstqqUSlDico0hNaxiMzJ9YeHQ,3561
tutorials/bitext_cleaning/docbuilder.py,sha256=5EDlR2UXDSTpUI_kT3hxOwfzsNEIYoxxAHGI6ZIn2gM,1385
tutorials/bitext_cleaning/main.py,sha256=VCK57Pk3NMQegLlwCTfpNud1urKvUsCGi19OA7AG0zs,4286
tutorials/dapt-curation/code/docbuilder.py,sha256=vI2myBkkwMzLwipREuLHn60M6BxQjrU8RfyH9jiTA2g,15237
tutorials/dapt-curation/code/downloaders.py,sha256=U17_zloT_YyHxZ8CsNAI9g90vMaUyD_gG7z9GwhGVgI,7387
tutorials/dapt-curation/code/main.py,sha256=gghQrMnjbKrEd7YylkI4ei79ZBCkFgUwA4p4Qs8LuGE,10482
tutorials/dapt-curation/code/utils.py,sha256=rFh3Gh_3RHtRYux_JSFwMtKQYXnVuj3u1SQv_0evV2M,12590
tutorials/image-curation/helper.py,sha256=u8xUI36wcULoeP-mpwDl2O5JE4KRUPQhr6OGp7j0Tq8,4132
tutorials/nemo-retriever-synthetic-data-generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
tutorials/nemo-retriever-synthetic-data-generation/main.py,sha256=zmP_VBvSTW-dB_LqR4blyOX9wdOJAM7L_rjCGoPJMCo,8871
tutorials/nemo-retriever-synthetic-data-generation/retriever_evalset_generator.py,sha256=34y4XaP1uwYpJEqPh5IO0rSXGCuAEY0Qpd2PbmzAKa4,7823
tutorials/nemo-retriever-synthetic-data-generation/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
tutorials/nemo-retriever-synthetic-data-generation/config/config.py,sha256=mGEcqYtQilzOfcpgVyJdrhLQMNObsDzZggaf1TWoeQI,4173
tutorials/peft-curation/docbuilder.py,sha256=PqsPKj0NjoibZkBhbkAq73WFMHs94Ti3yTTUmiuqDe4,3435
tutorials/peft-curation/filters.py,sha256=YWX0GumDSCqIY-_L6kOPUM_8WrhOPikahxttkWx5KVE,1536
tutorials/peft-curation/main.py,sha256=mUUkS35uvNlpD9B46A9JSugMS2itdNw0toskccf1Gng,6112
tutorials/peft-curation/modifiers.py,sha256=LPXCaZwoeY4Hur-I9DMAIP0ExsMaQMVhRrcXSl-wy5Y,2064
tutorials/peft-curation-with-sdg/docbuilder.py,sha256=f5c-tkkMrBUDoyxLJmnvE_N-x-i02UqBjurrp7rutHs,5068
tutorials/peft-curation-with-sdg/filters.py,sha256=_80wocMPm-8-XGxMEPOef7U3_u1ye4uAe5GyRIGGrz4,1097
tutorials/peft-curation-with-sdg/main.py,sha256=2rm3TnatxiinK8Rwqw6aE0Oeqof5AoiXMqXf3YuAZPA,14686
tutorials/peft-curation-with-sdg/modifiers.py,sha256=fNmB3q9i7VKiQz0-zhSu1-bKDennTmNDNtobFar78CU,1377
tutorials/peft-curation-with-sdg/synthetic_gen.py,sha256=mkJyhJCDOLJlhD6mM3qYUxDxgJ7wiweKEhY3OD5B-h8,11988
tutorials/pretraining-data-curation/helper.py,sha256=zuzRk9-fgG50Wdr7ErVd78WJa_4DL_xdw4995ljmZsk,2400
tutorials/synthetic-retrieval-evaluation/DeDup.py,sha256=tQ2fYk-uAf6YS9hVbOHaiGp4nvnYIWoUqtj16zy7RdE,1564
tutorials/synthetic-retrieval-evaluation/Endpoints.py,sha256=jC-4BGK62YVA0fqnzV1GvzdvqBObLmCWQuRsZUhth3g,1704
tutorials/synthetic-retrieval-evaluation/Generator.py,sha256=L8ebM5qhE-cEtZ9oRWi6GBOZ2IVQ1gcliXntP5KGPRw,6134
tutorials/synthetic-retrieval-evaluation/prompts.py,sha256=BklBCzxYWsae-3wiJ3HRDdn3HCDtAtJOwHXzpnJtl48,5245
tutorials/tinystories/docbuilder.py,sha256=53R6SkM7nQwqcu5hgm9HELpx9Gvcjd5UWxUQcct0Uko,2923
tutorials/tinystories/filters.py,sha256=mAzJCtzqR9ICDgOZbybwV5AEtsy-Ee928kimBtodiKA,1415
tutorials/tinystories/helpers.py,sha256=90YZ_zBumIN4xMyClYsM0tGrt_HCxIvXG0ypKt0_ig4,2700
tutorials/tinystories/main.py,sha256=l0yg8KsU-mYhUCi6dGSknz286ClxXhTZwgeTT85SFL4,7516
tutorials/tinystories/modifiers.py,sha256=JAzC1lb6eiw6mrGEbMxaWnAFceH4NJ4nlUfbEJ7Pyns,1307
tutorials/zyda2-tutorial/0_processing/helper.py,sha256=86Q022vpfY1K4-Teoiyui4ELaYZQiITSBETIgy9LVWg,744
tutorials/zyda2-tutorial/0_processing/process_dclm.py,sha256=J3TdnBT8H-knsBZydiN6pBdJURSw0X9rX1VXnNqGBP4,1500
tutorials/zyda2-tutorial/0_processing/process_dolma_cc.py,sha256=cWRQKbvSQmH7sfJQkosylV2Dpz7rg7rFTs566i7q29Y,826
tutorials/zyda2-tutorial/0_processing/process_fwe2.py,sha256=SUwWUH53fUn1vEKpd4I99v3ufyFlr6z1Z6FqCqV4jXI,2365
tutorials/zyda2-tutorial/0_processing/process_zyda.py,sha256=ABiqJoHGDrfPG8p_LkGr-Nfyc4A1hP39fN2mx51zo0k,1248
tutorials/zyda2-tutorial/1_fuzzy_dedup/0_minhash.py,sha256=FdNoXYDas0jxYBBlnDrCu-u5_5sJSGbG9ieXWGofFLQ,1928
tutorials/zyda2-tutorial/1_fuzzy_dedup/1_lsh.py,sha256=zfdT0sL9ZauCWGVVOxp0nTwCbDGTUNkcEtYaDuio-e0,1921
tutorials/zyda2-tutorial/1_fuzzy_dedup/2_buckets_to_edges.py,sha256=eQC3QdP4umXC8879Y1p58a68z08IK4mUR4Jchn7yRcQ,1217
tutorials/zyda2-tutorial/1_fuzzy_dedup/3_connected_components.py,sha256=moIrdc6RlHc66WvwoYzYcORlqyrJYPPUMq9Q6ECEFvI,1404
tutorials/zyda2-tutorial/2_dupes_removal/0_id_mapping.py,sha256=l8BxGdo55p0VrMCjGScZ9goye3AEUiWKnsdlhC217uw,2484
tutorials/zyda2-tutorial/2_dupes_removal/1_id_conversion.py,sha256=L92MR7t1wS8VkAgAPLLlL6H0HnynRFuOeB7T5F89Vqc,2289
tutorials/zyda2-tutorial/2_dupes_removal/2_compute_counts.py,sha256=0x5pSiR0umxKY3J6d4mcPoMAeUHgfbWKhr5utlsEciU,2973
tutorials/zyda2-tutorial/2_dupes_removal/3_prep_dupes.py,sha256=64P5Lxlsf2HQ-emHg2CZo9mMu_Kc22QKOlrO6vMEyA4,6346
tutorials/zyda2-tutorial/2_dupes_removal/4_get_dupes_dclm.py,sha256=A2JzsEe39BBkWJifaBF1VGyYrxVf-UZCjZb-4kxdn_U,4869
tutorials/zyda2-tutorial/2_dupes_removal/4_get_dupes_dolma-cc.py,sha256=jt4lIDXsUei5Xmq-_BGQ4c7DUd6XN0U1MF_KVWlALb0,4002
tutorials/zyda2-tutorial/2_dupes_removal/5_get_dupes_zyda.py,sha256=LDwlun4wwQlOdK9Nwd0jQQwd1v7WL_V45Ux0QLLSGQQ,4139
tutorials/zyda2-tutorial/2_dupes_removal/remove_dupes.py,sha256=_uNrjc-ElVkyoBJTHDStih3N0tRc0VL7bz8cpk0wIZo,2205
tutorials/zyda2-tutorial/3_quality_model/run_quality_classifier.py,sha256=TAvRGbly7DAkrqz7tzGiTZDoZDbeCZG4ElON8ep_KxU,1663
tutorials/zyda2-tutorial/4_filtering/filter_fwe.py,sha256=YYgyDlVYA8yZaBTY8ThV4BphvPb1JPSHO3nFBZ5nhYE,1122
tutorials/zyda2-tutorial/4_filtering/filter_quality.py,sha256=CqaknS5EUpL8Xpynb6oPlhsTtasao1dyGogmaV_haOM,1556
nemo_curator-0.7.0rc2.dev0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
nemo_curator-0.7.0rc2.dev0.dist-info/METADATA,sha256=M4Nl1DSET8CrHiy0GNJO-MN658EnNUh-KkCtJAORmEo,17427
nemo_curator-0.7.0rc2.dev0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
nemo_curator-0.7.0rc2.dev0.dist-info/entry_points.txt,sha256=M09nYHUxheQJ7NnLyFCuSnPjbvM8rscjkx1ybJJcZzM,3459
nemo_curator-0.7.0rc2.dev0.dist-info/top_level.txt,sha256=lE9bTBQ09Wn5G_wLHlO67rvyLjl7kq37AfOktW6C7hE,32
nemo_curator-0.7.0rc2.dev0.dist-info/RECORD,,
