examples/blend_and_shuffle.py,sha256=d4xSfXJSgWQc7WITQFLp1iiwrDpKG876cJM0eeDsAuU,1696
examples/classifier_filtering.py,sha256=44jDsyMv9Fo2wFhpcr1C7zAzOkSojIVQs68I7wCUJnY,3424
examples/download_arxiv.py,sha256=TciJ7CRkwR7QJ09eehyfUTSVSzyafXrt6-4i-2-okS8,1479
examples/download_common_crawl.py,sha256=wDY1lbFPDk3O6bHJb9pVbTY-XOu4Jy287_7qMVu7LN0,1618
examples/download_wikipedia.py,sha256=kvRsoL47jAii7PlkNCkMmBSi6Ez22Bj4RlbUD0NGRLw,1561
examples/exact_deduplication.py,sha256=wOPcNRJ5t4V7weZLZYwZrmLf9eOIsFKyGBJWbxbYaXQ,2903
examples/find_pii_and_deidentify.py,sha256=Uj2RR_pbkeWcSpK2maMc5Sfr71ENIz4IET93Ju6YL-o,1766
examples/fuzzy_deduplication.py,sha256=D2-iSZOGntKY3s64qlWkCWMMiRcaVu6YpgLPSD-BEww,3821
examples/identify_languages_and_fix_unicode.py,sha256=JXsO0ZSgYT9F7J0zkRxnGQxXBwexsdTNwr2CGzU7530,3306
examples/raw_download_common_crawl.py,sha256=4g9Hmta8S4098lK449Tjqex30mqPAyxEFDKMfWWiizg,2008
examples/semdedup_example.py,sha256=MD9GG3PtHV-998OaymaaQPoZefmUrBV-Z42R_KmEM8k,2592
examples/task_decontamination.py,sha256=6pVsJLlswtveltjvGt1b2DxIbr6qwH7qM8ArfZaQk1Y,2641
examples/translation_example.py,sha256=xELNTS04fHhXbWBgV3ZOjjmNU92VnIBg-g2LIq6Mc-4,13938
examples/classifiers/aegis_example.py,sha256=Go24TsdNv8wqvi8-9XRcaMEOucIvb-X5mW7eiibIets,2193
examples/classifiers/domain_example.py,sha256=sOE7LDnwIQZJ7MYSy9wM7HrCdgb68yiAp_n1883Ous8,1988
examples/classifiers/fineweb_edu_example.py,sha256=ZFHw5m8-wj_48bmoImW02NLYIVwgJi19i2hduCUU7vM,1981
examples/classifiers/multilingual_domain_example.py,sha256=_zR_tItqN6bzWE8_q68rVLIKqciiMe64Gu24WSiRuS8,2065
examples/classifiers/quality_example.py,sha256=smsWwkBW1geoNi-PGYcRC5-4QxLBcx8ZJhSYpuz6C0I,1992
examples/k8s/create_dask_cluster.py,sha256=4x8r0uI4gakvMFNF3uUqa9SR9GmsBl_TN9ZbrP4pd14,4238
examples/nemo_run/launch_slurm.py,sha256=Cn5y7DpqTEB9wfv9an-9TkAnDdBEVnhQgFiMi-iMXB0,2009
nemo_curator/__init__.py,sha256=DbfJ8agEW9W3CYvDAmbWhPJiJZYMydF_niTFT5aIkQk,1618
nemo_curator/_compat.py,sha256=Op_Vlh2kT_0gm6iWrGoJh-nGEQgrIICfMlWHlh-9UEo,2021
nemo_curator/log.py,sha256=RoRApb4jGYC2TaIWZy8SnYi4v7CO6r31FYBCO15jCss,2937
nemo_curator/package_info.py,sha256=VHn6sSFwmzcSsPkhXSjfrxw08HlZmGvb0DZn-PehqsE,1647
nemo_curator/sample_dataframe.py,sha256=553_PeN7UlAjbcIZNDeZxmFmUJgqDB_0O86ML4SwUWA,2835
nemo_curator/classifiers/__init__.py,sha256=aXsWt7_4BEbWYLS340rcRHR7Q4sY8pOXdyunmmamTwU,1071
nemo_curator/classifiers/aegis.py,sha256=FuRZ261orFhnZWGjbU1G8c052oUjrJO3mXcd_IsMC-E,19962
nemo_curator/classifiers/base.py,sha256=V1On0eueO3BlJgH4ds3ycnxiFs6Koi8oZdu8cVNmYLc,5586
nemo_curator/classifiers/domain.py,sha256=KEpuXwC4U6YZBjKXTuGiiOVjq9Ih3lgeNfsslNgn6T8,9391
nemo_curator/classifiers/fineweb_edu.py,sha256=KVh13tsP4Zvhtd2EuNpiyERoqhrtdUOY0CGF6Wsv9sg,5755
nemo_curator/classifiers/quality.py,sha256=s7bulVmOZUTbGix-bhaJlgZmuNcDBK2OS5cuCk-x_-c,5164
nemo_curator/datasets/__init__.py,sha256=RKN_DespHiwOSlBQ3-fsILTsnZKvnWHoEylq4iaSjek,966
nemo_curator/datasets/doc_dataset.py,sha256=M5E8Ya5YQHK3Rj0Iu8SPd0Da_6e5wjG9AGZ8EjzwheM,10311
nemo_curator/datasets/image_text_pair_dataset.py,sha256=RjC86vHJATqz8G_S5j6WM6wi1Ox7RmgGTT6cBsgOwa0,11944
nemo_curator/datasets/parallel_dataset.py,sha256=38S5rHoeJqJp1Xe2WCUBzPMLmg2j2WXecWBqwiqO21s,6442
nemo_curator/download/__init__.py,sha256=9lBlOthqw-Qd1P93Hn1VmjAfM57PStMfQ8teCf-V-Sk,1887
nemo_curator/download/arxiv.py,sha256=12DUsllmKr2DIcurKRJYHNNzewjolrU3PzT2UjRe3ZM,15523
nemo_curator/download/commoncrawl.py,sha256=hDKLDqeblBo8vdCeyWiQjMhshfv0BEhL-AZuKvjcIbs,15726
nemo_curator/download/doc_builder.py,sha256=WRYD-vq9lDwhuOyv1o17oqcYkMafFfhhKTBs4HdrkDs,7372
nemo_curator/download/thai_stopwords.py,sha256=oIf0Y2NK9Ig1oSsk6rUcVb4ZHgYPWrtyT_PHf5akIws,2715
nemo_curator/download/wikipedia.py,sha256=nVSbpmW7QhNg9eGJAA4BOAHLptJuhctllEFaRDfZWLg,31193
nemo_curator/filters/__init__.py,sha256=9FPjeiMcSGMbKaF90M8ppXQWW4oTm_gaRgMUDa4LjWw,2858
nemo_curator/filters/bitext_filter.py,sha256=HSaC1M9Dg3bYuUriEPQzjT6cwq3hfmWRtaqM6zVvA-U,6205
nemo_curator/filters/classifier_filter.py,sha256=PbLd6dcSRaAXkjDmORoYJPEmdwN5u6twpC-4Qu8pLZA,10080
nemo_curator/filters/code.py,sha256=w7cXk_7EGCpsEkuQLq34hRrLaKVrbZ_46rgCQ8tya7A,10645
nemo_curator/filters/doc_filter.py,sha256=t8h-GClESUFgqW6ljLavg1BR62QcV_PNgPI7VMVcNGQ,4405
nemo_curator/filters/heuristic_filter.py,sha256=6LUzjSbOm1retp9jQbxmSxMlQfQ2ylhfPNj_BYd23sA,27664
nemo_curator/filters/synthetic.py,sha256=gk8OIvHyw_oRS3jWCUXfhIkU4zIRBoYDxH_8Gi6VcxY,7226
nemo_curator/filters/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/filters/models/qe_models.py,sha256=oTK9Ug7e3zC_G0LVIqwYyXcms_YWv5AjouhrYZxfj2M,6831
nemo_curator/image/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/image/classifiers/__init__.py,sha256=hzZMZAKaYrifBfy067gYW0zym2onTKAA4BAhS1EFKz8,792
nemo_curator/image/classifiers/aesthetic.py,sha256=fIHd522jT0aXey9Q-sQeaikhQckdJ1Dmd4aTTn9dBpw,4522
nemo_curator/image/classifiers/base.py,sha256=UCNsZWzZCS1Eu7gD9IaCrqO_ZJEVnbzWSFLsI-bzeKI,5819
nemo_curator/image/classifiers/nsfw.py,sha256=ObhNpGzB0x6w4M49VL2tnxkBBX5MT3EKF4Nuw-Hj590,4940
nemo_curator/image/embedders/__init__.py,sha256=PJJYO1NFEqffgd2lMU4I84tJYXp8Px-flCP7-aQvnCw,728
nemo_curator/image/embedders/base.py,sha256=peTsz7qLbAP1adsvoIg5jmsknlUnOwKUt1S6GtxHx6I,7517
nemo_curator/image/embedders/timm.py,sha256=Sn8gRCZ-rrBlKU7rvisnWTPWNmTbZ98CXey0hqMZQ9s,8309
nemo_curator/modifiers/__init__.py,sha256=S_fHYgbQOezDHVA65kV-AR2HMzPn9-G482Rc-_vqNN0,976
nemo_curator/modifiers/c4.py,sha256=U25SZoeVOcbMRJ4kRsb8JsI8S6IQv1bBt--Q3MfdqLk,3273
nemo_curator/modifiers/doc_modifier.py,sha256=i6lDpiCybvQP_u6ea2-dC1X8Ym2CTRImPmtQBlxA5HU,936
nemo_curator/modifiers/fasttext.py,sha256=aUg7jTcS1OvG_BKG1PUexzc0JcxEF4jDgeGoy0yx8XI,938
nemo_curator/modifiers/pii_modifier.py,sha256=zZbsc3BkZjSuw-x3VMYUci7CSmdQ2YPjIC_z-fMWeck,3728
nemo_curator/modifiers/unicode_reformatter.py,sha256=nz-vfzMqcOHpoCY6KSkwQ1nLmhA1UxOBGb0B7wgeYHE,846
nemo_curator/modules/__init__.py,sha256=559Y9ctR3l7OSIB8gI8yW2t3Y77GOvtUioN02ppnuqw,2732
nemo_curator/modules/add_id.py,sha256=sMUyt3c_puGsxqpu10QeNOXhXHprg1oDxOvDJkXY-lg,3377
nemo_curator/modules/config.py,sha256=gQ0m8oUnMW9gOjeXjOigpdDbV5CidyYeNaAy_PO_0kc,6918
nemo_curator/modules/dataset_ops.py,sha256=34SYVMJQTQkb7GoHyBidJ8aGEoSes6wzkfcHWyIqI_0,7171
nemo_curator/modules/exact_dedup.py,sha256=Yt3YztPsWoUp5HTdsZj0PNDrszVp9fejGS5d2DnBk_U,6409
nemo_curator/modules/filter.py,sha256=iL4Y51Cz27dVceDd23pW4h2uqmz8iYVEV2BkhNFzopg,10986
nemo_curator/modules/fuzzy_dedup.py,sha256=FYtfk6q5lKz75ZRZz_H-xa18eCWU221QJiUGF7osiPE,68718
nemo_curator/modules/meta.py,sha256=9q-4cd1pZQm4T5D-U_oydoBtavJDxR_ssAZy32KS0pI,825
nemo_curator/modules/modify.py,sha256=-vuGZy_hhIN963Eq3aIhhCVbHOYOV8nle-Ik8VGS8CQ,1406
nemo_curator/modules/semantic_dedup.py,sha256=k3gQoK5M3v57FXW4QzdLhJLeYZuZnItVYlEAeqi1u74,25665
nemo_curator/modules/task.py,sha256=4xpnykbH9qQiyODjwVMlR8GlT6xCEAgk9tWBtPv3OCo,18766
nemo_curator/nemo_run/__init__.py,sha256=SsipNxmMuUKT-P22M7z3i2K10gSybbAuHklWQgxl9Zs,675
nemo_curator/nemo_run/slurm.py,sha256=uIJS3o7kGGBDUolXrJK_ucnK6-oB5Qd-7FVxXh0VYPU,5298
nemo_curator/pii/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/pii/algorithm.py,sha256=WzyCn7S5UE1_0VmmSdtcEY5yS7ase3NvZDPR134sa_E,9845
nemo_curator/pii/constants.py,sha256=FhrU0UmUUHE6LIxDNAP4WQuY3Dl48d0bdBwr5sM-TVg,369
nemo_curator/pii/custom_batch_analyzer_engine.py,sha256=bc5EmJ5BZs07C6Y2Z-rlX6FagC1R29hjxDyvisrbsFY,6668
nemo_curator/pii/custom_nlp_engine.py,sha256=1ZPR8gxCskO7pajgSFgeOz_eAERJjNV12V1Oqqz9rJ8,2686
nemo_curator/pii/recognizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/pii/recognizers/address_recognizer.py,sha256=Pc_lSuT-J5asHQQ9So5GW-2yoSDoIqoQzSsjtkyg9uY,1828
nemo_curator/scripts/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/scripts/add_id.py,sha256=DxMXb2Pvq2ITklgN4e8eJTrTYA3Kqi_k4K6CCMeMKkE,4035
nemo_curator/scripts/blend_datasets.py,sha256=ARlamjhilcFgwGW_NJsEXgfO2AOGMIaE245M6P_uTnE,3718
nemo_curator/scripts/download_and_extract.py,sha256=9KDCAWTLhUY3NzW6K9SOGFCsGZZmUnR44AY9qNQp2GE,5819
nemo_curator/scripts/filter_documents.py,sha256=XPjTCRvLB8bhRsZteFc1f0eMj2_WS5Kw4INdHT7ZcGA,11064
nemo_curator/scripts/find_exact_duplicates.py,sha256=P0CGlC35z74NVbD1Fa3Y4CCgip_Zd5UrGvaohyaz1Vw,4065
nemo_curator/scripts/find_matching_ngrams.py,sha256=Z8lPn1_eGOwEupgY2-NWsbpI8C_XXfCQSZSqkPviVvA,3341
nemo_curator/scripts/find_pii_and_deidentify.py,sha256=ATwyeuSzge4S6cRF9dbSSi_X6_D9IB9LSAbiTxL0WD4,5551
nemo_curator/scripts/get_common_crawl_urls.py,sha256=-RYZNH08UTDn8ZGU0JElOuiZrh-PXjqD_JKL4FcFYI4,3531
nemo_curator/scripts/get_wikipedia_urls.py,sha256=fs2-F59KLyXyIvmW7syYFzFSVR4KGD50o4eG8qyfncA,1866
nemo_curator/scripts/make_data_shards.py,sha256=3-DnW_6_c5F52JQnBwKEC1SEcE9DZBhdTi0ELSHF_G4,2587
nemo_curator/scripts/prepare_fasttext_training_data.py,sha256=-GmYWM0K5Xn7LzQw5xUlv_AxmV_NCG1Gh8JYTzL2bv8,3528
nemo_curator/scripts/prepare_task_data.py,sha256=ZuPmS51xFYPCq3UeMx4Al23iqmaxGHIZkpRnHAALXWc,3035
nemo_curator/scripts/remove_matching_ngrams.py,sha256=lh2eCu7W5Zw0MGbSYNELjQ3RtygQ8EuJdO-I4j0MkQg,5309
nemo_curator/scripts/separate_by_metadata.py,sha256=mG3imMv0wNB4U_F-_89lNTpLbKjjzJDbxzBNpKYIRvg,4698
nemo_curator/scripts/text_cleaning.py,sha256=C0fNRVrXXXd6xwLT2Sz7w0pgnHtUC-fK81Zx3IHYZ7c,2983
nemo_curator/scripts/train_fasttext.py,sha256=w-jI921mBu6RSzS-I82YcHowdbh_vxnGIYBZIAjCr_k,6418
nemo_curator/scripts/verify_classification_results.py,sha256=tgLf_OV-hsV4Io3b4aMaNlded-1uClhLq1zVumdUoBo,6650
nemo_curator/scripts/classifiers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/classifiers/aegis_classifier_inference.py,sha256=QiOt5JuN9txIrwBKA_TW3zhpD_4dnsxyR26wVNVBQDo,4379
nemo_curator/scripts/classifiers/domain_classifier_inference.py,sha256=o5aQlFdo8yIGWEvqSvjuT-qqethrNjX0pxdxIgiRk-g,3693
nemo_curator/scripts/classifiers/fineweb_edu_classifier_inference.py,sha256=7B6lK0zxxnGRSV6ydOD3svqfNpqJNgzYXG8vUgldtxQ,3727
nemo_curator/scripts/classifiers/multilingual_domain_classifier_inference.py,sha256=btUYD5mO16dL_bmfmhGczGc1JupXOi4YIiOj6GV0Rmk,3782
nemo_curator/scripts/classifiers/quality_classifier_inference.py,sha256=vZtJq6awKd0RY6527rQJmX640RnlLjH0pl9sM-F8Qg0,3696
nemo_curator/scripts/fuzzy_deduplication/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/fuzzy_deduplication/buckets_to_edges.py,sha256=qgBWnqVVLJMNtoXOUfCuqwi_DqkuqkHWwPa-z_59wHQ,2971
nemo_curator/scripts/fuzzy_deduplication/compute_minhashes.py,sha256=alml4cpK2O7RL8YP-u5YeQMW6BXLe4uOncfmTls0rsA,5456
nemo_curator/scripts/fuzzy_deduplication/connected_components.py,sha256=mxYL144_cTqkVX65hheTgesaW1D20dxxfEOGtALCHBg,2856
nemo_curator/scripts/fuzzy_deduplication/jaccard_compute.py,sha256=9fdhIh2W-Lavc-i82RHZac9ep00lFj3AxD4rStCHKy0,2919
nemo_curator/scripts/fuzzy_deduplication/jaccard_shuffle.py,sha256=iOAsSKjndLR3fklJF2vbcjNn30ojjPU_2I1dNwPXGlA,4198
nemo_curator/scripts/fuzzy_deduplication/map_buckets.py,sha256=tFqjm0MQb9BMauSjNQJ7UjPz6PbHl_WFoG_7kOhXcgM,5824
nemo_curator/scripts/fuzzy_deduplication/minhash_lsh.py,sha256=qP9Hs1o3GAUE7qdbPgKMz1rRyDGXjTEZzde_gV9w2uM,4173
nemo_curator/scripts/semdedup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/semdedup/clustering.py,sha256=y8nuVTYAcjvkbyaS_oEwFQ5FDMQ4jX8tEVghmBdmc6w,3847
nemo_curator/scripts/semdedup/compute_embeddings.py,sha256=EcGSiWbbAbgdT1qYKwwPrS8NIeNJtHCtbN5Q1jwByPg,4898
nemo_curator/scripts/semdedup/extract_dedup_data.py,sha256=TbkwSQCvF1TKwtnubWf9qnTcbbryPZDthXiad0Qa7bg,3381
nemo_curator/services/__init__.py,sha256=B3D0AylSmuOOruKFvG_vqV78f9OuR52mDGTWa4npDoQ,973
nemo_curator/services/conversation_formatter.py,sha256=JRMOpMCMrCz2nN4BwHvcHigi_xJTrw6lo6MTlzGN2kQ,1022
nemo_curator/services/model_client.py,sha256=1OEgNbF9vgeso9d64G4cKiGL9ZfxU5Csue0hOOHwi_0,2994
nemo_curator/services/nemo_client.py,sha256=uVN8xhRVsKr7iD20xJ8j-ZfHWzlsOUSnIkvhf1xF0v4,3626
nemo_curator/services/openai_client.py,sha256=z4Kqvs1ItyvhnbPJJB-Idpvys6iNUyr5ai_IayjwQHw,6035
nemo_curator/synthetic/__init__.py,sha256=EskbOiR6d4VCMNgR50IIo-mgjc4SDnaQ2ru-4x7iw3E,3072
nemo_curator/synthetic/async_nemotron.py,sha256=Meg3F9-gTt3q0_n59dQoVMIJVsbhE2xdTDSe7sflroo,80632
nemo_curator/synthetic/error.py,sha256=EWGJNnhanzilxsUrlMKOgcv2fJ_9YjunOFIpLpteOzQ,803
nemo_curator/synthetic/generator.py,sha256=Ej4DiBlqS369IeIwWvIUeGmQ9MYCGh0pRVFAlcAOoQQ,1242
nemo_curator/synthetic/mixtral.py,sha256=CUKR-bmD3qYayxtK08987A7AepAa5jbzvGfoltxzSZ4,1319
nemo_curator/synthetic/nemotron.py,sha256=m1S03olbsjYmNcv9XF5P2xUF5AtF_wk0IshAGtIgMzk,71704
nemo_curator/synthetic/no_format.py,sha256=TjJoa04f_VQTczgAYf67p5XnGB0oKATXFRQ6SXSYTAk,1194
nemo_curator/synthetic/prompts.py,sha256=DjsQRoi4iqQX9FNeWFEVoKC0TAKGFurCqoEt-Gxgwyg,8198
nemo_curator/tasks/__init__.py,sha256=IfaRyarhNVd7icbFyiT_fBnWoY38iCcIqZoFR019Wis,1499
nemo_curator/tasks/downstream_task.py,sha256=krVCvrbuZy9ToNyImYXpxUl03hBCnPVlWcrOXWvCwPY,1968
nemo_curator/tasks/metrics.py,sha256=MgBEr9mC9KEeDRfmW32_k4a06f6ljWveyC9LBLtAG0Y,18930
nemo_curator/utils/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/aegis_utils.py,sha256=LEwVKVFFP7pzBciaWaAL5O1hzZ_yE4AI5ik7-DD70so,10140
nemo_curator/utils/config_utils.py,sha256=0_Uy16dEXqqTeqiQsBCRzD8E_wzMYOYvz0_jkC-ZGhU,3598
nemo_curator/utils/constants.py,sha256=Szr4RTF8hFFkTqz5dhh_IbTReZfBXFLlhyokggQ_bvI,3296
nemo_curator/utils/cudf_utils.py,sha256=tBF12by81jGwAx-lBEGYvGniJdYLIMDIN1GtROfkh90,1543
nemo_curator/utils/decorators.py,sha256=t0gGx4HhyEJntCk5XAAaoePmoahSyTNjhqtEpGeeHrY,864
nemo_curator/utils/distributed_utils.py,sha256=THtOlquQmXOkPkLD--T4Qu1ju4Nc2DcySPfzagZNp5I,33818
nemo_curator/utils/download_utils.py,sha256=i97IsKFyOCWrZa6JuZh84ugabIO9LHkKxIp7fzAR9ic,7458
nemo_curator/utils/file_utils.py,sha256=p_O3PjDZWAohjSN-sFS9XqBjh5GbXj3hOT00Pe8LyGQ,15008
nemo_curator/utils/gpu_utils.py,sha256=3hZWSm_aQdxyIyOjf2vsuHo3QT9qzKNh63ZPzvuTohE,1113
nemo_curator/utils/import_utils.py,sha256=7hu0ztU42uvo_tpy_c2P5Yt6yiwD6jTIgS7GkBDMA1U,13261
nemo_curator/utils/module_utils.py,sha256=aaTOspYbMLpGby-gd-wcVKhVv_eBbtYpPd4mjMiRgSM,779
nemo_curator/utils/script_utils.py,sha256=oED7RCN42mwYgQv18EoSwmYG-aVoQIbeUHQ8lAQV-F0,20692
nemo_curator/utils/semdedup_utils.py,sha256=0kc4tXfA2chK_LNg8ryY8zXrmfqwaWYRlIZTsJ21uew,16635
nemo_curator/utils/text_utils.py,sha256=B1AJG4-DE-KB9HIlLAyRn3PVJchtrO40AibvZE18lZI,7125
nemo_curator/utils/fuzzy_dedup_utils/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/fuzzy_dedup_utils/id_mapping.py,sha256=05MwMpEDBycgB9rESP5yIZSbZDqAW4tb3t2zJLKSEtM,1862
nemo_curator/utils/fuzzy_dedup_utils/io_utils.py,sha256=b9ZwRyZ_R38nicDLSKkIG5yAxlqFfT5FNOejOqTD86s,6711
nemo_curator/utils/fuzzy_dedup_utils/merge_utils.py,sha256=HpOP6KaV1AJA5HR90QlGIypZcsSEebV_8lMO4VXFDBM,8027
nemo_curator/utils/fuzzy_dedup_utils/output_map_utils.py,sha256=FhmWlPCUZa9kZJdnSfGvbi6RQR0bg7uq0l9TzKaV9uQ,2906
nemo_curator/utils/fuzzy_dedup_utils/shuffle_utils.py,sha256=ZkcVElmeXmuF9ARK2as6fo1-tXjbp4F0u9JufGJyqVk,3986
nemo_curator/utils/image/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/image/transforms.py,sha256=piLOcV_iG48lCnfJkBstqqUSlDico0hNaxiMzJ9YeHQ,3561
tutorials/bitext_cleaning/docbuilder.py,sha256=5EDlR2UXDSTpUI_kT3hxOwfzsNEIYoxxAHGI6ZIn2gM,1385
tutorials/bitext_cleaning/main.py,sha256=VCK57Pk3NMQegLlwCTfpNud1urKvUsCGi19OA7AG0zs,4286
tutorials/dapt-curation/code/docbuilder.py,sha256=vI2myBkkwMzLwipREuLHn60M6BxQjrU8RfyH9jiTA2g,15237
tutorials/dapt-curation/code/downloaders.py,sha256=U17_zloT_YyHxZ8CsNAI9g90vMaUyD_gG7z9GwhGVgI,7387
tutorials/dapt-curation/code/main.py,sha256=GP4h1fK37IDZSu_u8Het1CSdi8SMkRtI1VEDeIdQWLs,8604
tutorials/dapt-curation/code/utils.py,sha256=0vlDg7UPoWBF1oIHfCS3mYYKOQyksDwTTLaTYkdILsc,10611
tutorials/image-curation/helper.py,sha256=u8xUI36wcULoeP-mpwDl2O5JE4KRUPQhr6OGp7j0Tq8,4132
tutorials/nemo-retriever-synthetic-data-generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
tutorials/nemo-retriever-synthetic-data-generation/main.py,sha256=lZvGqZsUYLBS-RBnGsqpnrp5Y0HHyHjx7TSNNfNx4bE,6491
tutorials/nemo-retriever-synthetic-data-generation/retriever_evalset_generator.py,sha256=n4a0rA0DihQwmRNZp_emVYd0GIRPdXOhDHOUgCGgvGM,6776
tutorials/nemo-retriever-synthetic-data-generation/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
tutorials/nemo-retriever-synthetic-data-generation/config/config.py,sha256=mGEcqYtQilzOfcpgVyJdrhLQMNObsDzZggaf1TWoeQI,4173
tutorials/peft-curation/docbuilder.py,sha256=5Eojv6zINksQJjPNiyil84xrvik7asRnNLOauR5a8IE,3434
tutorials/peft-curation/filters.py,sha256=YWX0GumDSCqIY-_L6kOPUM_8WrhOPikahxttkWx5KVE,1536
tutorials/peft-curation/main.py,sha256=mUUkS35uvNlpD9B46A9JSugMS2itdNw0toskccf1Gng,6112
tutorials/peft-curation/modifiers.py,sha256=LPXCaZwoeY4Hur-I9DMAIP0ExsMaQMVhRrcXSl-wy5Y,2064
tutorials/peft-curation-with-sdg/docbuilder.py,sha256=-R8SZ_WO8zG_7ldKh7en5idlZkdwR5XItEyCtrxhwa0,5067
tutorials/peft-curation-with-sdg/filters.py,sha256=_80wocMPm-8-XGxMEPOef7U3_u1ye4uAe5GyRIGGrz4,1097
tutorials/peft-curation-with-sdg/main.py,sha256=2rm3TnatxiinK8Rwqw6aE0Oeqof5AoiXMqXf3YuAZPA,14686
tutorials/peft-curation-with-sdg/modifiers.py,sha256=fNmB3q9i7VKiQz0-zhSu1-bKDennTmNDNtobFar78CU,1377
tutorials/peft-curation-with-sdg/synthetic_gen.py,sha256=DrwEU6coScjIh2qeHClsZlvWQ2zuQFBZuQOAZgXNI8g,11986
tutorials/pretraining-data-curation/helper.py,sha256=zuzRk9-fgG50Wdr7ErVd78WJa_4DL_xdw4995ljmZsk,2400
tutorials/synthetic-retrieval-evaluation/DeDup.py,sha256=tQ2fYk-uAf6YS9hVbOHaiGp4nvnYIWoUqtj16zy7RdE,1564
tutorials/synthetic-retrieval-evaluation/Endpoints.py,sha256=jC-4BGK62YVA0fqnzV1GvzdvqBObLmCWQuRsZUhth3g,1704
tutorials/synthetic-retrieval-evaluation/Generator.py,sha256=L8ebM5qhE-cEtZ9oRWi6GBOZ2IVQ1gcliXntP5KGPRw,6134
tutorials/synthetic-retrieval-evaluation/prompts.py,sha256=BklBCzxYWsae-3wiJ3HRDdn3HCDtAtJOwHXzpnJtl48,5245
tutorials/tinystories/docbuilder.py,sha256=7EAN2PuETz4BHTrmo3VVsqgLvsSn4ZU2U-wouctJv3M,2922
tutorials/tinystories/filters.py,sha256=mAzJCtzqR9ICDgOZbybwV5AEtsy-Ee928kimBtodiKA,1415
tutorials/tinystories/helpers.py,sha256=90YZ_zBumIN4xMyClYsM0tGrt_HCxIvXG0ypKt0_ig4,2700
tutorials/tinystories/main.py,sha256=l0yg8KsU-mYhUCi6dGSknz286ClxXhTZwgeTT85SFL4,7516
tutorials/tinystories/modifiers.py,sha256=JAzC1lb6eiw6mrGEbMxaWnAFceH4NJ4nlUfbEJ7Pyns,1307
tutorials/zyda2-tutorial/0_processing/helper.py,sha256=86Q022vpfY1K4-Teoiyui4ELaYZQiITSBETIgy9LVWg,744
tutorials/zyda2-tutorial/0_processing/process_dclm.py,sha256=J3TdnBT8H-knsBZydiN6pBdJURSw0X9rX1VXnNqGBP4,1500
tutorials/zyda2-tutorial/0_processing/process_dolma_cc.py,sha256=cWRQKbvSQmH7sfJQkosylV2Dpz7rg7rFTs566i7q29Y,826
tutorials/zyda2-tutorial/0_processing/process_fwe2.py,sha256=SUwWUH53fUn1vEKpd4I99v3ufyFlr6z1Z6FqCqV4jXI,2365
tutorials/zyda2-tutorial/0_processing/process_zyda.py,sha256=ABiqJoHGDrfPG8p_LkGr-Nfyc4A1hP39fN2mx51zo0k,1248
tutorials/zyda2-tutorial/1_fuzzy_dedup/0_minhash.py,sha256=FdNoXYDas0jxYBBlnDrCu-u5_5sJSGbG9ieXWGofFLQ,1928
tutorials/zyda2-tutorial/1_fuzzy_dedup/1_lsh.py,sha256=zfdT0sL9ZauCWGVVOxp0nTwCbDGTUNkcEtYaDuio-e0,1921
tutorials/zyda2-tutorial/1_fuzzy_dedup/2_buckets_to_edges.py,sha256=s-AMXxNCCZN9P4GqNwcrZlM3djrSb0qPBeasKATy8cs,1237
tutorials/zyda2-tutorial/1_fuzzy_dedup/3_connected_components.py,sha256=XX9I470Ecj-cktKf-qe-a0mTghvrVy0Hi6Eh2ow0oqA,1424
tutorials/zyda2-tutorial/2_dupes_removal/0_id_mapping.py,sha256=l8BxGdo55p0VrMCjGScZ9goye3AEUiWKnsdlhC217uw,2484
tutorials/zyda2-tutorial/2_dupes_removal/1_id_conversion.py,sha256=L92MR7t1wS8VkAgAPLLlL6H0HnynRFuOeB7T5F89Vqc,2289
tutorials/zyda2-tutorial/2_dupes_removal/2_compute_counts.py,sha256=0x5pSiR0umxKY3J6d4mcPoMAeUHgfbWKhr5utlsEciU,2973
tutorials/zyda2-tutorial/2_dupes_removal/3_prep_dupes.py,sha256=64P5Lxlsf2HQ-emHg2CZo9mMu_Kc22QKOlrO6vMEyA4,6346
tutorials/zyda2-tutorial/2_dupes_removal/4_get_dupes_dclm.py,sha256=A2JzsEe39BBkWJifaBF1VGyYrxVf-UZCjZb-4kxdn_U,4869
tutorials/zyda2-tutorial/2_dupes_removal/4_get_dupes_dolma-cc.py,sha256=jt4lIDXsUei5Xmq-_BGQ4c7DUd6XN0U1MF_KVWlALb0,4002
tutorials/zyda2-tutorial/2_dupes_removal/5_get_dupes_zyda.py,sha256=LDwlun4wwQlOdK9Nwd0jQQwd1v7WL_V45Ux0QLLSGQQ,4139
tutorials/zyda2-tutorial/2_dupes_removal/remove_dupes.py,sha256=_uNrjc-ElVkyoBJTHDStih3N0tRc0VL7bz8cpk0wIZo,2205
tutorials/zyda2-tutorial/3_quality_model/run_quality_classifier.py,sha256=TAvRGbly7DAkrqz7tzGiTZDoZDbeCZG4ElON8ep_KxU,1663
tutorials/zyda2-tutorial/4_filtering/filter_fwe.py,sha256=YYgyDlVYA8yZaBTY8ThV4BphvPb1JPSHO3nFBZ5nhYE,1122
tutorials/zyda2-tutorial/4_filtering/filter_quality.py,sha256=CqaknS5EUpL8Xpynb6oPlhsTtasao1dyGogmaV_haOM,1556
nemo_curator-0.6.0rc2.dev1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
nemo_curator-0.6.0rc2.dev1.dist-info/METADATA,sha256=vtyQvh6XeUEH-8viSTv0DDSasV3FhRhvQw1CR6spRpw,17004
nemo_curator-0.6.0rc2.dev1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
nemo_curator-0.6.0rc2.dev1.dist-info/entry_points.txt,sha256=7OvzGqU8sF3yNi5l_3LJQdl4LFWEAOhjTqRF-0MZhXo,2799
nemo_curator-0.6.0rc2.dev1.dist-info/top_level.txt,sha256=lE9bTBQ09Wn5G_wLHlO67rvyLjl7kq37AfOktW6C7hE,32
nemo_curator-0.6.0rc2.dev1.dist-info/RECORD,,
