examples/async_llm_pii_redaction.py,sha256=EXF9xpcSvz3jY8aEifyNEvfpun1uWSsK7Mc81d14fLA,1956
examples/blend_and_shuffle.py,sha256=d4xSfXJSgWQc7WITQFLp1iiwrDpKG876cJM0eeDsAuU,1696
examples/classifier_filtering.py,sha256=PJUhx8Mme5fVWtDZ0q6mATDSU-kAobBrO5LTgQnPwLA,3449
examples/download_arxiv.py,sha256=TciJ7CRkwR7QJ09eehyfUTSVSzyafXrt6-4i-2-okS8,1479
examples/download_common_crawl.py,sha256=wDY1lbFPDk3O6bHJb9pVbTY-XOu4Jy287_7qMVu7LN0,1618
examples/download_wikipedia.py,sha256=kvRsoL47jAii7PlkNCkMmBSi6Ez22Bj4RlbUD0NGRLw,1561
examples/exact_deduplication.py,sha256=p2qQtL1q0dKp3UO5tg8n910B1n4Qfqr8a8S60s5GUSg,2797
examples/find_pii_and_deidentify.py,sha256=eWava8-M1F6SbZ75Gs6fy9Jp6BOuQsn1QWXOWqcI9PU,1697
examples/fuzzy_deduplication.py,sha256=9Lzyfa63r4U3-PaDm-iI1N4upwT4Gr8M2gTXvkscngw,3727
examples/identify_languages.py,sha256=m9jFSBvr9tHRNPik0iW5h4UntL0XqY-ZUs4asHGyYQE,2709
examples/llm_pii_redaction.py,sha256=wOExwLsatNkoOWAg7-wCAlMhtBegrZjcUMNd_f-MyOI,1904
examples/raw_download_common_crawl.py,sha256=4g9Hmta8S4098lK449Tjqex30mqPAyxEFDKMfWWiizg,2008
examples/semdedup_example.py,sha256=4OqUTrcIYXSK-bW4w-ZpzGiOe9fTBV7EexnzUUa4v3A,3216
examples/task_decontamination.py,sha256=83V_QXgtpSNf9nkr-_oX1f7EMTA8cqG4kmfuYgpWYIQ,2666
examples/classifiers/aegis_example.py,sha256=ypphdtrW2svhGWDZb81ZW5ztSqGqcYhZtQUkUa0K0vY,2189
examples/classifiers/content_type_example.py,sha256=M4uhR1mXpWOEgzzgBbIzT303b5beXYOs0uVPB2w47u4,2010
examples/classifiers/domain_example.py,sha256=bk6DG-V2GRUvUaErEcVAv3c8LpY56f1LhcvEFJlEqzc,1984
examples/classifiers/fineweb_edu_example.py,sha256=oqjGgRSnaQUmCQvRKvESmDROlwAFQ3szD1_Ukl9COKo,1977
examples/classifiers/fineweb_mixtral_edu_example.py,sha256=wa0OlvcEEgxWqFPMgqYobwC8DPbxAmvT_oe6AV4ps70,2015
examples/classifiers/fineweb_nemotron_edu_example.py,sha256=3xG1gMFeJRdcsFsAeSwSCXqUDlBXxVzM_Sd5VDyfNT0,2022
examples/classifiers/instruction_data_guard_example.py,sha256=GIx9HBlBEbNFWBOrEB1B4H8iVVWG8MDImzVyt_-EufQ,2300
examples/classifiers/multilingual_domain_example.py,sha256=v6gFenMIWEKXdeHa6WwSihU7SElypMidpKUEAR8CC0k,2061
examples/classifiers/prompt_task_complexity_example.py,sha256=izH2CcrAUztAL-MYq4acB-HY2-QHVyLcKKjMi91LIUQ,2035
examples/classifiers/quality_example.py,sha256=qyaXacq-S72FbUn4mdz8MBgcM_uJCISQdg0STqfIjDo,1988
examples/k8s/create_dask_cluster.py,sha256=4x8r0uI4gakvMFNF3uUqa9SR9GmsBl_TN9ZbrP4pd14,4238
examples/nemo_run/launch_slurm.py,sha256=Cn5y7DpqTEB9wfv9an-9TkAnDdBEVnhQgFiMi-iMXB0,2009
nemo_curator/__init__.py,sha256=DbfJ8agEW9W3CYvDAmbWhPJiJZYMydF_niTFT5aIkQk,1618
nemo_curator/_compat.py,sha256=u9xBDLef-HdDkyVWzu-wXF4w5Y_9HFto_UfvYci7PdM,2642
nemo_curator/log.py,sha256=RoRApb4jGYC2TaIWZy8SnYi4v7CO6r31FYBCO15jCss,2937
nemo_curator/package_info.py,sha256=kNfkih8o6YM2G2Onn475dWMavxvHA-yYHwAc9TxwfCo,1647
nemo_curator/classifiers/__init__.py,sha256=PYeRD6_xEp4Ne7VUTEg3mm1_5I5if9mBcRf7m7RiwRI,1400
nemo_curator/classifiers/aegis.py,sha256=PGn0Ue2bSmXhLLwjfBoFgDsRIl81jY_Myo_xL1fDNzA,20450
nemo_curator/classifiers/base.py,sha256=RibaupyzRGKoKz6noeASR9dCumoPNvjL16dN3k-DKIM,5163
nemo_curator/classifiers/content_type.py,sha256=3maiOLXfXyt-pWeslVTBy14eATL8pVl5faeFmOzaEY8,5569
nemo_curator/classifiers/domain.py,sha256=tlSJWNYfBoD63ykGcjYEa84yCW57EGGvty5vqsvH5jY,9411
nemo_curator/classifiers/fineweb_edu.py,sha256=XsdjNh3uqXNmk7SdVI7ULqML0viDExIgJ6H7fcoaWO8,13271
nemo_curator/classifiers/prompt_task_complexity.py,sha256=bJz5jSxboR2_PWlHL5PAZbQPotTAw4tNa2oS6P6AC2o,13579
nemo_curator/classifiers/quality.py,sha256=VZjgy5wobX7VYllkDpOe1Ql7bhualUqhK151ZgBP0hQ,5182
nemo_curator/datasets/__init__.py,sha256=RKN_DespHiwOSlBQ3-fsILTsnZKvnWHoEylq4iaSjek,966
nemo_curator/datasets/doc_dataset.py,sha256=AW5m5_QZTkcESrBkL3I8VWHekfvuW8XI6k4F1PHVmes,17471
nemo_curator/datasets/image_text_pair_dataset.py,sha256=yrnWVOpoXj0TWIUjh0U6XYz0q-zvukV1-KKp-01ILgY,12095
nemo_curator/datasets/parallel_dataset.py,sha256=-_TwixZc2xkH0LRKN0PXVPD-kjK06fx2jF31uJ-Ra-k,6652
nemo_curator/download/__init__.py,sha256=99X1V6P6Aa2bcx4owzuYtEcuwDpkB431q5k-73S4Cg0,1941
nemo_curator/download/arxiv.py,sha256=aReph1xcDY105VmQS4V6lxk7uaSbOw3CiX9YIKIECJE,16962
nemo_curator/download/commoncrawl.py,sha256=EjHr508SkC9nVdP5oQCxPtA8dryriMTwCjjRXiSiVGI,28613
nemo_curator/download/doc_builder.py,sha256=pzCWy4fzhhLRp0oH5RTvs7RrcIY26EorQIheJysjEvs,10733
nemo_curator/download/ja_stopwords.py,sha256=lBhaaw_skEDrkT3k6XaqgdsJKhdhPpH6zKG1VWiBqLs,2521
nemo_curator/download/th_stopwords.py,sha256=xuy_9oWZWKGYliCj4P9PqLpu2IXJaoyDEnimlH_m63k,2713
nemo_curator/download/wikipedia.py,sha256=FJHN33hVAlkLhTsu7DxidA0cVcJEQLu8Sf7M4wQCWJM,32466
nemo_curator/download/zh_stopwords.py,sha256=kwwcQsi7aBkRDx1VPgx2d3LUZ1dNpSKOACcZpIqpINk,13735
nemo_curator/filters/__init__.py,sha256=aWrZPCgUhkOngN2DD6J26dmk4PmC8_EFWRD5niZSKKw,2948
nemo_curator/filters/bitext_filter.py,sha256=HSaC1M9Dg3bYuUriEPQzjT6cwq3hfmWRtaqM6zVvA-U,6205
nemo_curator/filters/classifier_filter.py,sha256=PbLd6dcSRaAXkjDmORoYJPEmdwN5u6twpC-4Qu8pLZA,10080
nemo_curator/filters/code.py,sha256=w7cXk_7EGCpsEkuQLq34hRrLaKVrbZ_46rgCQ8tya7A,10645
nemo_curator/filters/doc_filter.py,sha256=qmz_pIivt4SDs515s8AA9kXl5dJWM_Ur7qLHElCaOYM,4759
nemo_curator/filters/heuristic_filter.py,sha256=QHtoAJnksJ0FDoxgn2Q8jAzp9enATwdUChIPyGf03i4,29975
nemo_curator/filters/synthetic.py,sha256=BNJtk983O9WvWR8F72jYZvwng50eh9wAb1XRfrVl_fo,7709
nemo_curator/filters/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/filters/models/qe_models.py,sha256=oTK9Ug7e3zC_G0LVIqwYyXcms_YWv5AjouhrYZxfj2M,6831
nemo_curator/image/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/image/classifiers/__init__.py,sha256=hzZMZAKaYrifBfy067gYW0zym2onTKAA4BAhS1EFKz8,792
nemo_curator/image/classifiers/aesthetic.py,sha256=fIHd522jT0aXey9Q-sQeaikhQckdJ1Dmd4aTTn9dBpw,4522
nemo_curator/image/classifiers/base.py,sha256=tT6hqFA6jf6DQug6LqnbDTeSYOuemU_SL3Og05pjVaU,5818
nemo_curator/image/classifiers/nsfw.py,sha256=ObhNpGzB0x6w4M49VL2tnxkBBX5MT3EKF4Nuw-Hj590,4940
nemo_curator/image/embedders/__init__.py,sha256=PJJYO1NFEqffgd2lMU4I84tJYXp8Px-flCP7-aQvnCw,728
nemo_curator/image/embedders/base.py,sha256=RhnwpZuLoqohzPi15_1YNucKcWGm9m67Uz1LCRgzhKg,7516
nemo_curator/image/embedders/timm.py,sha256=Sn8gRCZ-rrBlKU7rvisnWTPWNmTbZ98CXey0hqMZQ9s,8309
nemo_curator/modifiers/__init__.py,sha256=21LvMSj2EKcw5CqiuaAZqELijap3JeEUFhujyWtLDHs,1344
nemo_curator/modifiers/async_llm_pii_modifier.py,sha256=fjaIdDod_nQM_pGw0I4IdvzShG7vPZ85Du4a6vdVdXA,8785
nemo_curator/modifiers/c4.py,sha256=U25SZoeVOcbMRJ4kRsb8JsI8S6IQv1bBt--Q3MfdqLk,3273
nemo_curator/modifiers/doc_modifier.py,sha256=GWnBE_8jdrs_G9N9z_N5FbYEUwcvpL5Tt0biCnVvH_c,1312
nemo_curator/modifiers/fasttext.py,sha256=aUg7jTcS1OvG_BKG1PUexzc0JcxEF4jDgeGoy0yx8XI,938
nemo_curator/modifiers/line_remover.py,sha256=tWkWVymsvLGQk5FzB2joWqgVOZ1M7WkNNOKzOWfvdRM,1223
nemo_curator/modifiers/llm_pii_modifier.py,sha256=zP2ygTcakZAbuQIUWiMIAacB3vUXscaj94KRgCTiyqw,7135
nemo_curator/modifiers/markdown_remover.py,sha256=ztrAFPbzUPF-jtyJZnF9UVsYm7IvgnIhEGDqBM-A4X0,1536
nemo_curator/modifiers/newline_normalizer.py,sha256=7VI0VD_gFu00LUm9RRtKWjC6eQgd2YlQRHf8tqb2O8s,1187
nemo_curator/modifiers/pii_modifier.py,sha256=zZbsc3BkZjSuw-x3VMYUci7CSmdQ2YPjIC_z-fMWeck,3728
nemo_curator/modifiers/quotation_remover.py,sha256=9MadGemGAe5jQvcEdPosPU5ZP3wyFh91L5-MCjviKRo,1564
nemo_curator/modifiers/slicer.py,sha256=Rxx7UklgS16xjCIhEX_l24v-u4jD8MqRJPWU1VAH1YU,3453
nemo_curator/modifiers/unicode_reformatter.py,sha256=uzjdDU7Sak0vR3LrQ-iNIFCXCflJHRd8ImtAUnn3MJM,7926
nemo_curator/modifiers/url_remover.py,sha256=uXfRPg422648aOt5jp0wg8gcMpxyTiIzJZIghXq-_kI,963
nemo_curator/modules/__init__.py,sha256=WvVl6N1WFbFS6Ax9xT_dP7c_d8Zim53V9fY7KJgYjjs,3373
nemo_curator/modules/add_id.py,sha256=CNyA7BCgIOkqCeXdTvXGh8NsTZ_DfopSEnvBAUf5an4,3701
nemo_curator/modules/base.py,sha256=Y7QfcAfEXoizXZlCUAJHrvcIQoTe98Ao7W8cLs3neIc,5518
nemo_curator/modules/config.py,sha256=fmjPmydUddF49_mEpJpopg107GiHVrZmZFvg6dO7zuE,10857
nemo_curator/modules/dataset_ops.py,sha256=OWWWo3IqlnktmMaQfaeyMa1hAo1nhZBtwhJI4GQigMc,7421
nemo_curator/modules/exact_dedup.py,sha256=QDzEQ4kBetOseH2rv5yEuGe_yciJ_Ur2CEhfM1jFqcw,7647
nemo_curator/modules/filter.py,sha256=BkiV3tZWyRspLx5G9rgrZQcMoQVtzLTy14HGidnVRJ0,12075
nemo_curator/modules/joiner.py,sha256=C5KJtmwraXAZAqDl6pKGGBuAD-DiLw83cT8hTpOG4A0,7541
nemo_curator/modules/meta.py,sha256=9q-4cd1pZQm4T5D-U_oydoBtavJDxR_ssAZy32KS0pI,825
nemo_curator/modules/modify.py,sha256=TerYNo6sQPhNa8F9H6JW5qGqOo08D5kmrF9fwnAAv5Y,1519
nemo_curator/modules/splitter.py,sha256=vNWpkrkREYntSukazJ76KNlKETL69dWNN7aASmWR6gE,3133
nemo_curator/modules/task.py,sha256=GHIHBN_UvT7_loDMO6pd4VlSUd8V4xf-XoVF6BEXsno,18872
nemo_curator/modules/to_backend.py,sha256=22ZYsYljgR2nyazHZmsdidGTtzb0PGpH5hKkfik5Rm0,1480
nemo_curator/modules/fuzzy_dedup/_mapbuckets.py,sha256=-xBWNZnupmRpcS6lgHm-U6WUQz2K0TI9j_daPl_frYE,10539
nemo_curator/modules/fuzzy_dedup/_shuffle.py,sha256=J1T1xmjvnnbUg6csW68Xl6930MvK9yoCBJXj11CM-zg,11105
nemo_curator/modules/fuzzy_dedup/bucketstoedges.py,sha256=IxnoDPjqwhyRwPwT9IzSDjNV_GzLCSLb_KCaiC-jGFw,5893
nemo_curator/modules/fuzzy_dedup/connectedcomponents.py,sha256=kVL4KB9XdQ9uM8VK3akbMvdtdxmQ8qzSpqJazU8BlPA,11471
nemo_curator/modules/fuzzy_dedup/fuzzyduplicates.py,sha256=iCDWRJMUiwGwQGp9rMZbW9k9D9PSteI_Gzm6otISdPo,11766
nemo_curator/modules/fuzzy_dedup/jaccardsimilarity.py,sha256=uQaXcl4Vhu-1ZfRLmf0Ikws6ahiZHvMX4aEsc05wnVI,7679
nemo_curator/modules/fuzzy_dedup/lsh.py,sha256=eTcjWhagYzTgGKfEzW39u8fjx40R3gDBsjuyTv4AiSU,10550
nemo_curator/modules/fuzzy_dedup/minhash.py,sha256=vqu6ZewNbb_tcOWe41u9985GbSZGW4F7MzCkNn5HwfM,8622
nemo_curator/modules/semantic_dedup/clusteringmodel.py,sha256=9Yn6eR8QTSOMYkLA5FYRoSdClMKboimfLmNqqs9gBKE,9977
nemo_curator/modules/semantic_dedup/embeddings.py,sha256=LsatJy3ibTo2PitUA1r02NknVRjhnftHqt5_h1bVvTI,10478
nemo_curator/modules/semantic_dedup/semanticclusterleveldedup.py,sha256=FWPoawqUqmrH9lw_7iqFRY84vJxgNTZa7y-dVvdPSzw,8496
nemo_curator/modules/semantic_dedup/semdedup.py,sha256=MXsiJ146regFVCdDbTZICwoaYtdxCqfexYCpOIiDNCI,5365
nemo_curator/nemo_run/__init__.py,sha256=SsipNxmMuUKT-P22M7z3i2K10gSybbAuHklWQgxl9Zs,675
nemo_curator/nemo_run/slurm.py,sha256=uIJS3o7kGGBDUolXrJK_ucnK6-oB5Qd-7FVxXh0VYPU,5298
nemo_curator/pii/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/pii/algorithm.py,sha256=WzyCn7S5UE1_0VmmSdtcEY5yS7ase3NvZDPR134sa_E,9845
nemo_curator/pii/constants.py,sha256=FhrU0UmUUHE6LIxDNAP4WQuY3Dl48d0bdBwr5sM-TVg,369
nemo_curator/pii/custom_batch_analyzer_engine.py,sha256=bc5EmJ5BZs07C6Y2Z-rlX6FagC1R29hjxDyvisrbsFY,6668
nemo_curator/pii/custom_nlp_engine.py,sha256=1ZPR8gxCskO7pajgSFgeOz_eAERJjNV12V1Oqqz9rJ8,2686
nemo_curator/pii/recognizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/pii/recognizers/address_recognizer.py,sha256=Pc_lSuT-J5asHQQ9So5GW-2yoSDoIqoQzSsjtkyg9uY,1828
nemo_curator/scripts/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/scripts/add_id.py,sha256=VHd-FYP8FA9uAUJNoVYEqckXExs18rdnQI1Rru_yiv0,4093
nemo_curator/scripts/async_llm_pii_redaction.py,sha256=NMg8AXb9dSYzz1PKmo0zr6A1fW3zY_ZHfanvgLfO5Kc,4967
nemo_curator/scripts/blend_datasets.py,sha256=ARlamjhilcFgwGW_NJsEXgfO2AOGMIaE245M6P_uTnE,3718
nemo_curator/scripts/download_and_extract.py,sha256=9r758TirZ91kKtmq6BIZA7EfyB_nTI-kDQ5TVOWiq5w,6113
nemo_curator/scripts/filter_documents.py,sha256=XPjTCRvLB8bhRsZteFc1f0eMj2_WS5Kw4INdHT7ZcGA,11064
nemo_curator/scripts/find_exact_duplicates.py,sha256=vknmO7cavoB-7A5wc_QRVZlIEEOwtq4sQwcraaXtNls,4053
nemo_curator/scripts/find_matching_ngrams.py,sha256=Z8lPn1_eGOwEupgY2-NWsbpI8C_XXfCQSZSqkPviVvA,3341
nemo_curator/scripts/find_pii_and_deidentify.py,sha256=rHVm1ZsUHD8METDWm-0cTbwjQ9ftwXfFS1NWu5-oM5o,5579
nemo_curator/scripts/get_common_crawl_urls.py,sha256=-RYZNH08UTDn8ZGU0JElOuiZrh-PXjqD_JKL4FcFYI4,3531
nemo_curator/scripts/get_wikipedia_urls.py,sha256=fs2-F59KLyXyIvmW7syYFzFSVR4KGD50o4eG8qyfncA,1866
nemo_curator/scripts/llm_pii_redaction.py,sha256=LPamF6mGftm5eyV7wRDbiD5HX9uaQrOUPtVmi0eSiko,4679
nemo_curator/scripts/make_data_shards.py,sha256=3-DnW_6_c5F52JQnBwKEC1SEcE9DZBhdTi0ELSHF_G4,2587
nemo_curator/scripts/prepare_fasttext_training_data.py,sha256=CNDLaWxP6-qyTcL55KqNfRxJOnkt6cIgTvTvbtI8Lw8,3567
nemo_curator/scripts/prepare_task_data.py,sha256=ZuPmS51xFYPCq3UeMx4Al23iqmaxGHIZkpRnHAALXWc,3035
nemo_curator/scripts/remove_matching_ngrams.py,sha256=lh2eCu7W5Zw0MGbSYNELjQ3RtygQ8EuJdO-I4j0MkQg,5309
nemo_curator/scripts/separate_by_metadata.py,sha256=mG3imMv0wNB4U_F-_89lNTpLbKjjzJDbxzBNpKYIRvg,4698
nemo_curator/scripts/text_cleaning.py,sha256=QAgBwVeujtx5FgsTupWT0UQk1XOlNk2CTK1_t9Ia3_Q,3614
nemo_curator/scripts/train_fasttext.py,sha256=w-jI921mBu6RSzS-I82YcHowdbh_vxnGIYBZIAjCr_k,6418
nemo_curator/scripts/verify_classification_results.py,sha256=tgLf_OV-hsV4Io3b4aMaNlded-1uClhLq1zVumdUoBo,6650
nemo_curator/scripts/classifiers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/classifiers/aegis_classifier_inference.py,sha256=1SKQYflfRLkaIVWqqg7oRCxldOidhYosjkLB13Jj1Po,4375
nemo_curator/scripts/classifiers/content_type_classifier_inference.py,sha256=j7zAkqUY9_65ddgwxngKSWtPAeK85DOLhpUNrdUVCMU,3729
nemo_curator/scripts/classifiers/domain_classifier_inference.py,sha256=gbp6urk2wQ7JTXaUuE6WM7sl7zi6nxWw0PVY28Oy3U8,3689
nemo_curator/scripts/classifiers/fineweb_edu_classifier_inference.py,sha256=0SLTA9q46zeou5WcQJbrPMIL8bR722f2GUrHSkKAB8g,3723
nemo_curator/scripts/classifiers/fineweb_mixtral_edu_classifier_inference.py,sha256=fWnuTk6A-Q3Ys_NWKBftMkyLH7mjXb0h7xU4Hx00W9c,3777
nemo_curator/scripts/classifiers/fineweb_nemotron_edu_classifier_inference.py,sha256=R1v_OekboDfXdW_Bp8p02uZGtz_wQ9t8FUyoPjC5KSw,3790
nemo_curator/scripts/classifiers/instruction_data_guard_classifier_inference.py,sha256=fek7-qtWtV_Ox1f2OXesD7ehbqp_ovh_I4SKMrd7-8E,4070
nemo_curator/scripts/classifiers/multilingual_domain_classifier_inference.py,sha256=tM_9dWh5zovI4rmHt8nsCieX9WIRNHIrwzv8i1nRMgU,3778
nemo_curator/scripts/classifiers/prompt_task_complexity_classifier_inference.py,sha256=wsuIPAgWyX831GA56jp7LAsj2Kl1JrvjWTDRt7_jBZM,3809
nemo_curator/scripts/classifiers/quality_classifier_inference.py,sha256=RpCv2AyJiYHhsRB-NVPXQKMnrdbK0EUwKyX7QqXvZBk,3692
nemo_curator/scripts/fuzzy_deduplication/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/fuzzy_deduplication/buckets_to_edges.py,sha256=qgBWnqVVLJMNtoXOUfCuqwi_DqkuqkHWwPa-z_59wHQ,2971
nemo_curator/scripts/fuzzy_deduplication/compute_minhashes.py,sha256=I59xSBe_qqbcgDTf3W5bcvXsnFuAN4qv5EpUex0Pf9M,5445
nemo_curator/scripts/fuzzy_deduplication/connected_components.py,sha256=3yToZXgCWPR3uxksH87u4hB5kDTtGrGAN5-LVAcTOOU,2836
nemo_curator/scripts/fuzzy_deduplication/jaccard_compute.py,sha256=N0MoukF3b8sjdCsXbIQKhSqwfnEUI5fINhb3HeAk1aI,2899
nemo_curator/scripts/fuzzy_deduplication/jaccard_shuffle.py,sha256=KD4D-rDhnJRlFTgMOJKvfNbDs1Z6zlL8JPhBh-y--Tg,4216
nemo_curator/scripts/fuzzy_deduplication/map_buckets.py,sha256=r_qza_FasCnSs8nBBUj8yRfkK9GJgk_1Ws1ZjZAO0FM,5836
nemo_curator/scripts/fuzzy_deduplication/minhash_lsh.py,sha256=qP9Hs1o3GAUE7qdbPgKMz1rRyDGXjTEZzde_gV9w2uM,4173
nemo_curator/scripts/semdedup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/semdedup/clustering.py,sha256=NzKZbJK4fJoohTskierIjWvtBj6VDPxSce6P4ytq2qM,3906
nemo_curator/scripts/semdedup/compute_embeddings.py,sha256=Uol_8mimk8U87MtmX8cEB3HDqjBhDjWxveQ8Y2EKmoE,5192
nemo_curator/scripts/semdedup/extract_dedup_data.py,sha256=qpGVkMkOL-Nk1wQaTNN56CT86ybAYwwTNpc86ZgMCuc,3766
nemo_curator/services/__init__.py,sha256=B3D0AylSmuOOruKFvG_vqV78f9OuR52mDGTWa4npDoQ,973
nemo_curator/services/conversation_formatter.py,sha256=JRMOpMCMrCz2nN4BwHvcHigi_xJTrw6lo6MTlzGN2kQ,1022
nemo_curator/services/model_client.py,sha256=1OEgNbF9vgeso9d64G4cKiGL9ZfxU5Csue0hOOHwi_0,2994
nemo_curator/services/nemo_client.py,sha256=pTj-w7Wv2foJqFb9oxVeXZEl7kynMo4yDnbf6VsEOfQ,3624
nemo_curator/services/openai_client.py,sha256=z4Kqvs1ItyvhnbPJJB-Idpvys6iNUyr5ai_IayjwQHw,6035
nemo_curator/synthetic/__init__.py,sha256=6swtfa9MuBm7iX80WZ675VJ_4Mk4FLBj-lXEIDtYQD0,3405
nemo_curator/synthetic/async_nemotron.py,sha256=yiHGLwbTbsJWjY0VIBS6w2kyRPftIpK23Dv1xOYDOwU,81270
nemo_curator/synthetic/async_nemotron_cc.py,sha256=s4kpbQOVjRIEIDEDr1EZfkrS6W6OyRzvGvYhWoWlnwc,8335
nemo_curator/synthetic/error.py,sha256=EWGJNnhanzilxsUrlMKOgcv2fJ_9YjunOFIpLpteOzQ,803
nemo_curator/synthetic/generator.py,sha256=Ej4DiBlqS369IeIwWvIUeGmQ9MYCGh0pRVFAlcAOoQQ,1242
nemo_curator/synthetic/mixtral.py,sha256=CUKR-bmD3qYayxtK08987A7AepAa5jbzvGfoltxzSZ4,1319
nemo_curator/synthetic/nemotron.py,sha256=pt_Hn55pvGrgRsUMy2uiLjaujQcltFv00GTD4kxmZrg,72340
nemo_curator/synthetic/nemotron_cc.py,sha256=vON5KoiQx72x1fYqEme8LPylDzEuGQgQFvqAwesU638,13476
nemo_curator/synthetic/no_format.py,sha256=TjJoa04f_VQTczgAYf67p5XnGB0oKATXFRQ6SXSYTAk,1194
nemo_curator/synthetic/prompts.py,sha256=vbyCYrU2c5FyzGVrZlcet-53mQILCcj7dMIPgqa39Hg,12936
nemo_curator/tasks/__init__.py,sha256=IfaRyarhNVd7icbFyiT_fBnWoY38iCcIqZoFR019Wis,1499
nemo_curator/tasks/downstream_task.py,sha256=krVCvrbuZy9ToNyImYXpxUl03hBCnPVlWcrOXWvCwPY,1968
nemo_curator/tasks/metrics.py,sha256=MgBEr9mC9KEeDRfmW32_k4a06f6ljWveyC9LBLtAG0Y,18930
nemo_curator/utils/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/aegis_utils.py,sha256=LEwVKVFFP7pzBciaWaAL5O1hzZ_yE4AI5ik7-DD70so,10140
nemo_curator/utils/config_utils.py,sha256=0_Uy16dEXqqTeqiQsBCRzD8E_wzMYOYvz0_jkC-ZGhU,3598
nemo_curator/utils/constants.py,sha256=Szr4RTF8hFFkTqz5dhh_IbTReZfBXFLlhyokggQ_bvI,3296
nemo_curator/utils/decorators.py,sha256=t0gGx4HhyEJntCk5XAAaoePmoahSyTNjhqtEpGeeHrY,864
nemo_curator/utils/distributed_utils.py,sha256=i-oXQ-FxQ9kmOm5ILFbG_i8GSHc60w0Yo3adf6vVhMw,48861
nemo_curator/utils/download_utils.py,sha256=i97IsKFyOCWrZa6JuZh84ugabIO9LHkKxIp7fzAR9ic,7458
nemo_curator/utils/duplicates_removal.py,sha256=k8JDN3xFT_qzxszO0DDFIVpUIDDrNzt53AXyZTLtILg,3393
nemo_curator/utils/file_utils.py,sha256=GP8BRlRZmFv8dvWm5WAQiRwUKnrYNturXU99auuJpD4,15503
nemo_curator/utils/gpu_utils.py,sha256=3hZWSm_aQdxyIyOjf2vsuHo3QT9qzKNh63ZPzvuTohE,1113
nemo_curator/utils/import_utils.py,sha256=obg-cP-X8QDk6I1GMeIIbO8hqLlAb-2qXgregIs1yrw,13703
nemo_curator/utils/llm_pii_utils.py,sha256=IwyIedzvSW2FmWqTbhdtwG5puHvQEpb9Z7mnhlGHQFA,6414
nemo_curator/utils/module_utils.py,sha256=aaTOspYbMLpGby-gd-wcVKhVv_eBbtYpPd4mjMiRgSM,779
nemo_curator/utils/script_utils.py,sha256=A0UHsXT9FJf28PhDhTCE02PqJgEcwiIr4QL7luVGFZE,20644
nemo_curator/utils/semdedup_utils.py,sha256=vhFStizzsorU5q-a2ahTtLaceEe_AfCtJRqdm80agiA,10089
nemo_curator/utils/text_utils.py,sha256=B1AJG4-DE-KB9HIlLAyRn3PVJchtrO40AibvZE18lZI,7125
nemo_curator/utils/fuzzy_dedup_utils/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/fuzzy_dedup_utils/id_mapping.py,sha256=05MwMpEDBycgB9rESP5yIZSbZDqAW4tb3t2zJLKSEtM,1862
nemo_curator/utils/fuzzy_dedup_utils/io_utils.py,sha256=b9ZwRyZ_R38nicDLSKkIG5yAxlqFfT5FNOejOqTD86s,6711
nemo_curator/utils/fuzzy_dedup_utils/merge_utils.py,sha256=HpOP6KaV1AJA5HR90QlGIypZcsSEebV_8lMO4VXFDBM,8027
nemo_curator/utils/fuzzy_dedup_utils/output_map_utils.py,sha256=FhmWlPCUZa9kZJdnSfGvbi6RQR0bg7uq0l9TzKaV9uQ,2906
nemo_curator/utils/fuzzy_dedup_utils/shuffle_utils.py,sha256=sYvUV1Bjp9bgFw-EHQ9WU1qOXD76P--g4VCWFPJu5BQ,4077
nemo_curator/utils/image/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/image/transforms.py,sha256=piLOcV_iG48lCnfJkBstqqUSlDico0hNaxiMzJ9YeHQ,3561
nemo_curator-0.8.0rc3.dev0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
tutorials/bitext_cleaning/docbuilder.py,sha256=5EDlR2UXDSTpUI_kT3hxOwfzsNEIYoxxAHGI6ZIn2gM,1385
tutorials/bitext_cleaning/main.py,sha256=VCK57Pk3NMQegLlwCTfpNud1urKvUsCGi19OA7AG0zs,4286
tutorials/dapt-curation/code/docbuilder.py,sha256=FcMDBPZXjMOfxLHnzwOdFd_mkNb-KQuT7U_N61a53WA,15230
tutorials/dapt-curation/code/downloaders.py,sha256=-1zTdbo4IpNGgYfDuCM8ycEpDaxOW6Erj86F8Sj-lNg,7474
tutorials/dapt-curation/code/main.py,sha256=OtEMRdgOBUcaXRIpKYynaB-2T9hJWc032UVLNGY8-bQ,10565
tutorials/dapt-curation/code/utils.py,sha256=fw57bOBCwGl5oYLigDmYaoYVErtlhI-7mvUR7dUNsjo,12775
tutorials/image-curation/helper.py,sha256=u8xUI36wcULoeP-mpwDl2O5JE4KRUPQhr6OGp7j0Tq8,4132
tutorials/nemo-retriever-synthetic-data-generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
tutorials/nemo-retriever-synthetic-data-generation/main.py,sha256=k_OMpMNteaKoabOejGc_zytxdWbLA8j0rqNrp8_CYPk,8975
tutorials/nemo-retriever-synthetic-data-generation/mine_hard_negatives.py,sha256=CxjTfldTIOTyDzz9cJERxcMAkAFN2H7EJoLd4YfIsHs,3514
tutorials/nemo-retriever-synthetic-data-generation/repartition.py,sha256=OiahsIx0KWy2WnO16508ZVTc5lITR97AMELl8I1M4YA,3699
tutorials/nemo-retriever-synthetic-data-generation/retriever_evalset_generator.py,sha256=cRQM5BZJNr9Zh9rxTbUrwb9RLaZOM7_lMQLdWt5RG7g,7339
tutorials/nemo-retriever-synthetic-data-generation/retriever_hardnegative_miner.py,sha256=-6eriGE2IPGhCM8yM1NqRuvBcXbo33X1XmT4nfVOOc0,11763
tutorials/nemo-retriever-synthetic-data-generation/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
tutorials/nemo-retriever-synthetic-data-generation/config/config.py,sha256=wQbbBMFFJlGMNjEBHJe2ALaHIJXKGp-5T9ZCeE5j--I,4888
tutorials/peft-curation/docbuilder.py,sha256=PqsPKj0NjoibZkBhbkAq73WFMHs94Ti3yTTUmiuqDe4,3435
tutorials/peft-curation/filters.py,sha256=YWX0GumDSCqIY-_L6kOPUM_8WrhOPikahxttkWx5KVE,1536
tutorials/peft-curation/main.py,sha256=zc1YOVPXRSpuS42HEth3oRLhbqHgzK-DpH9e9BEim2c,6093
tutorials/peft-curation/modifiers.py,sha256=LPXCaZwoeY4Hur-I9DMAIP0ExsMaQMVhRrcXSl-wy5Y,2064
tutorials/peft-curation-with-sdg/docbuilder.py,sha256=f5c-tkkMrBUDoyxLJmnvE_N-x-i02UqBjurrp7rutHs,5068
tutorials/peft-curation-with-sdg/filters.py,sha256=_80wocMPm-8-XGxMEPOef7U3_u1ye4uAe5GyRIGGrz4,1097
tutorials/peft-curation-with-sdg/main.py,sha256=hj0ETnEdWYApluLf38tx0I-rYVKh5XHERxIwQK5tm_0,14420
tutorials/peft-curation-with-sdg/modifiers.py,sha256=fNmB3q9i7VKiQz0-zhSu1-bKDennTmNDNtobFar78CU,1377
tutorials/peft-curation-with-sdg/synthetic_gen.py,sha256=vpps6kYwCsF5EA9kZ22ReyJvUYozzDem1gwc447_oe0,11908
tutorials/pretraining-data-curation/helper.py,sha256=zuzRk9-fgG50Wdr7ErVd78WJa_4DL_xdw4995ljmZsk,2400
tutorials/synthetic-retrieval-evaluation/DeDup.py,sha256=AbOW294veLsMyiNsTFOw0v10MRWYUWW0hXAMetP69Us,1562
tutorials/synthetic-retrieval-evaluation/Endpoints.py,sha256=jC-4BGK62YVA0fqnzV1GvzdvqBObLmCWQuRsZUhth3g,1704
tutorials/synthetic-retrieval-evaluation/Generator.py,sha256=L8ebM5qhE-cEtZ9oRWi6GBOZ2IVQ1gcliXntP5KGPRw,6134
tutorials/synthetic-retrieval-evaluation/prompts.py,sha256=BklBCzxYWsae-3wiJ3HRDdn3HCDtAtJOwHXzpnJtl48,5245
tutorials/tinystories/docbuilder.py,sha256=53R6SkM7nQwqcu5hgm9HELpx9Gvcjd5UWxUQcct0Uko,2923
tutorials/tinystories/filters.py,sha256=mAzJCtzqR9ICDgOZbybwV5AEtsy-Ee928kimBtodiKA,1415
tutorials/tinystories/helpers.py,sha256=90YZ_zBumIN4xMyClYsM0tGrt_HCxIvXG0ypKt0_ig4,2700
tutorials/tinystories/main.py,sha256=9dGyRrvP1Zw5HgCatBXGyxKzO8OlycpnTxDo5_bqI1k,7485
tutorials/tinystories/modifiers.py,sha256=JAzC1lb6eiw6mrGEbMxaWnAFceH4NJ4nlUfbEJ7Pyns,1307
tutorials/zyda2-tutorial/0_processing/helper.py,sha256=86Q022vpfY1K4-Teoiyui4ELaYZQiITSBETIgy9LVWg,744
tutorials/zyda2-tutorial/0_processing/process_dclm.py,sha256=J3TdnBT8H-knsBZydiN6pBdJURSw0X9rX1VXnNqGBP4,1500
tutorials/zyda2-tutorial/0_processing/process_dolma_cc.py,sha256=cWRQKbvSQmH7sfJQkosylV2Dpz7rg7rFTs566i7q29Y,826
tutorials/zyda2-tutorial/0_processing/process_fwe2.py,sha256=SUwWUH53fUn1vEKpd4I99v3ufyFlr6z1Z6FqCqV4jXI,2365
tutorials/zyda2-tutorial/0_processing/process_zyda.py,sha256=ABiqJoHGDrfPG8p_LkGr-Nfyc4A1hP39fN2mx51zo0k,1248
tutorials/zyda2-tutorial/1_fuzzy_dedup/0_minhash.py,sha256=IyoYtj_jjHckgG2bBH7mGhOCo3wesqcxSiV2yeEc5zs,1888
tutorials/zyda2-tutorial/1_fuzzy_dedup/1_lsh.py,sha256=zfdT0sL9ZauCWGVVOxp0nTwCbDGTUNkcEtYaDuio-e0,1921
tutorials/zyda2-tutorial/1_fuzzy_dedup/2_buckets_to_edges.py,sha256=eQC3QdP4umXC8879Y1p58a68z08IK4mUR4Jchn7yRcQ,1217
tutorials/zyda2-tutorial/1_fuzzy_dedup/3_connected_components.py,sha256=moIrdc6RlHc66WvwoYzYcORlqyrJYPPUMq9Q6ECEFvI,1404
tutorials/zyda2-tutorial/2_dupes_removal/0_id_mapping.py,sha256=l8BxGdo55p0VrMCjGScZ9goye3AEUiWKnsdlhC217uw,2484
tutorials/zyda2-tutorial/2_dupes_removal/1_id_conversion.py,sha256=L92MR7t1wS8VkAgAPLLlL6H0HnynRFuOeB7T5F89Vqc,2289
tutorials/zyda2-tutorial/2_dupes_removal/2_compute_counts.py,sha256=0x5pSiR0umxKY3J6d4mcPoMAeUHgfbWKhr5utlsEciU,2973
tutorials/zyda2-tutorial/2_dupes_removal/3_prep_dupes.py,sha256=64P5Lxlsf2HQ-emHg2CZo9mMu_Kc22QKOlrO6vMEyA4,6346
tutorials/zyda2-tutorial/2_dupes_removal/4_get_dupes_dclm.py,sha256=A2JzsEe39BBkWJifaBF1VGyYrxVf-UZCjZb-4kxdn_U,4869
tutorials/zyda2-tutorial/2_dupes_removal/4_get_dupes_dolma-cc.py,sha256=jt4lIDXsUei5Xmq-_BGQ4c7DUd6XN0U1MF_KVWlALb0,4002
tutorials/zyda2-tutorial/2_dupes_removal/5_get_dupes_zyda.py,sha256=LDwlun4wwQlOdK9Nwd0jQQwd1v7WL_V45Ux0QLLSGQQ,4139
tutorials/zyda2-tutorial/2_dupes_removal/remove_dupes.py,sha256=_uNrjc-ElVkyoBJTHDStih3N0tRc0VL7bz8cpk0wIZo,2205
tutorials/zyda2-tutorial/3_quality_model/run_quality_classifier.py,sha256=TAvRGbly7DAkrqz7tzGiTZDoZDbeCZG4ElON8ep_KxU,1663
tutorials/zyda2-tutorial/4_filtering/filter_fwe.py,sha256=YYgyDlVYA8yZaBTY8ThV4BphvPb1JPSHO3nFBZ5nhYE,1122
tutorials/zyda2-tutorial/4_filtering/filter_quality.py,sha256=CqaknS5EUpL8Xpynb6oPlhsTtasao1dyGogmaV_haOM,1556
nemo_curator-0.8.0rc3.dev0.dist-info/METADATA,sha256=sAcrrKMcG3ApI0Z1zwPg1xnHdjpe254OkUsRRSA45l4,17472
nemo_curator-0.8.0rc3.dev0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
nemo_curator-0.8.0rc3.dev0.dist-info/entry_points.txt,sha256=1SQxE7ZqEvmVUgKRT7HCu32JebX1CouoCwF5YIcnd2c,3619
nemo_curator-0.8.0rc3.dev0.dist-info/top_level.txt,sha256=lE9bTBQ09Wn5G_wLHlO67rvyLjl7kq37AfOktW6C7hE,32
nemo_curator-0.8.0rc3.dev0.dist-info/RECORD,,
