nemo_curator/__init__.py,sha256=ZPVqjreaCd1_SvLRqqenMcmaxNw5QWND7umucyWOhwg,899
nemo_curator/_compat.py,sha256=KD0x4kI45L0vTzlt2cV-kmRk0zA-jDQamjwvWurVLJg,967
nemo_curator/log.py,sha256=UdSY9zzNz1OxW7Bx_Qy43inmH7SXvX4CO2NZZ5njRJo,2770
nemo_curator/sample_dataframe.py,sha256=6yNHp7nOnIzFHv6-XfqxgqIfxPmOxERrDkAkFdQz4jE,2739
nemo_curator/datasets/__init__.py,sha256=jb_GcqbWfrisj9nqglR0faq0iaMNh0fhKJ-ZTw71_7k,683
nemo_curator/datasets/doc_dataset.py,sha256=bOuFR27AtkhUeTGB9YW8RGp8aOwu2KvmV7EbbIB-hQA,6087
nemo_curator/distributed_data_classification/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/distributed_data_classification/arg_utils.py,sha256=74BRZ3vbdXUBa7LV61Hgkoo7sG7-CSrgWypH7mpDQ6Y,4754
nemo_curator/distributed_data_classification/domain_classifier_inference.py,sha256=GDxdtQGC1Kk0V9HKkvw_v-6pvemrqgSEbb_n-EvjCQY,8395
nemo_curator/distributed_data_classification/generate_statistics.py,sha256=NXcMQwJ5dj7DEQ7wGNl2yWNmw2lkanwij6AzXgiEQU4,2608
nemo_curator/distributed_data_classification/pytorch_utils.py,sha256=Ja3rpvDCzSMfwHBlISZWRM4CMSgbvRpGqA5LWdTf2gU,3678
nemo_curator/distributed_data_classification/quality_classifier_inference.py,sha256=og62uPJvhllDnV3jz_iZabPKE02AuYZxYOhHHrjrpf8,10020
nemo_curator/distributed_data_classification/quality_classifier_multiple_models_inference.py,sha256=YAk__hL-C7Fvb2Gkpp91-0pp96RCKvZqu0yupO3KHwU,5070
nemo_curator/distributed_data_classification/verify_results.py,sha256=hLh-p67CcZXM_qxb6cq4ZDxrajKCXdwG95rUCUPQ-4Q,6065
nemo_curator/download/__init__.py,sha256=npmLHdnWIt2CYnZjsI61OhVlyE3X0AnZ7mIhK6tCi-k,1787
nemo_curator/download/arxiv.py,sha256=3KaaccswHti-ZCQ2ev1FxCB0jf58xTCY70BX_L3Sw5g,16228
nemo_curator/download/commoncrawl.py,sha256=XpNRuru0Imgy5WCMkVI9ezIyKLSMfGST8s7DT8hCbk0,11715
nemo_curator/download/doc_builder.py,sha256=WCl9pCDCWApiWLOHJDP4BHOjUuEVem1XdfDd2Mw7SeE,6919
nemo_curator/download/wikipedia.py,sha256=nVSbpmW7QhNg9eGJAA4BOAHLptJuhctllEFaRDfZWLg,31193
nemo_curator/filters/__init__.py,sha256=2oFTNdkjy5pJboO23HpqhiDdjf2gTHcq-jIsbmeDZM4,2525
nemo_curator/filters/classifier_filter.py,sha256=La51m4_HfUTW4vtXku6AtirAzAuqdsxpkedsqpXGkWE,3436
nemo_curator/filters/code.py,sha256=Fx-8hHHL85VxsgcKZ2eX7Cai-5TCqgLOzfnH-_Hc7wE,10712
nemo_curator/filters/doc_filter.py,sha256=9T2yC0F1-g7B9m73_BpPVrqZXH3oSVePQdcoWC5Uak4,2030
nemo_curator/filters/heuristic_filter.py,sha256=bBBxfVALXBdZGmUGCNjPl_xROSybYNzL-Ct7nLHHmd8,19992
nemo_curator/modifiers/__init__.py,sha256=S_fHYgbQOezDHVA65kV-AR2HMzPn9-G482Rc-_vqNN0,976
nemo_curator/modifiers/c4.py,sha256=U25SZoeVOcbMRJ4kRsb8JsI8S6IQv1bBt--Q3MfdqLk,3273
nemo_curator/modifiers/doc_modifier.py,sha256=i6lDpiCybvQP_u6ea2-dC1X8Ym2CTRImPmtQBlxA5HU,936
nemo_curator/modifiers/fasttext.py,sha256=aUg7jTcS1OvG_BKG1PUexzc0JcxEF4jDgeGoy0yx8XI,938
nemo_curator/modifiers/pii_modifier.py,sha256=zZbsc3BkZjSuw-x3VMYUci7CSmdQ2YPjIC_z-fMWeck,3728
nemo_curator/modifiers/unicode_reformatter.py,sha256=nz-vfzMqcOHpoCY6KSkwQ1nLmhA1UxOBGb0B7wgeYHE,846
nemo_curator/modules/__init__.py,sha256=alWf9G0pbUnEwLAxtAqglbxCQwXx5p1oIpiSZ7Fv6tk,2143
nemo_curator/modules/add_id.py,sha256=3D5_xKM4LwCqPieAD8dE2h65GuvOG2z55MBs-a6W_Pw,3341
nemo_curator/modules/config.py,sha256=mMyk_L619fpOnbHIUEGlZixsLOk8Fmpu8xo7QLw_DCA,3906
nemo_curator/modules/dataset_ops.py,sha256=IMUPEE2kCQZduqPyozQwQvLSqA93JULjoOVC7XkMAro,7170
nemo_curator/modules/distributed_data_classifier.py,sha256=yyfK6amPqCVRd1WGH6wSRt6JpX_Dq3FfJT3qw8-_604,9906
nemo_curator/modules/exact_dedup.py,sha256=JpDbBsdlvgZ09iPfwjpUFhhXqNeLVw4K0uSAWZJYdLM,6406
nemo_curator/modules/filter.py,sha256=3cp1XK8aN-g7bGXIRI4dWyLU3IvcCwOnp7YyHL7Os6M,4529
nemo_curator/modules/fuzzy_dedup.py,sha256=-TIX3kEatOQSnf5ZWMtgGMKphZ_k9pZ6FZnVb2J8FCc,56938
nemo_curator/modules/meta.py,sha256=9q-4cd1pZQm4T5D-U_oydoBtavJDxR_ssAZy32KS0pI,825
nemo_curator/modules/modify.py,sha256=-vuGZy_hhIN963Eq3aIhhCVbHOYOV8nle-Ik8VGS8CQ,1406
nemo_curator/modules/task.py,sha256=4xpnykbH9qQiyODjwVMlR8GlT6xCEAgk9tWBtPv3OCo,18766
nemo_curator/pii/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/pii/algorithm.py,sha256=vQyyewAniVQ0xpCPm61IdjQwfZWm9MLcs7etuT96ncI,9843
nemo_curator/pii/constants.py,sha256=FhrU0UmUUHE6LIxDNAP4WQuY3Dl48d0bdBwr5sM-TVg,369
nemo_curator/pii/custom_batch_analyzer_engine.py,sha256=bc5EmJ5BZs07C6Y2Z-rlX6FagC1R29hjxDyvisrbsFY,6668
nemo_curator/pii/custom_nlp_engine.py,sha256=1ZPR8gxCskO7pajgSFgeOz_eAERJjNV12V1Oqqz9rJ8,2686
nemo_curator/pii/recognizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/pii/recognizers/address_recognizer.py,sha256=Pc_lSuT-J5asHQQ9So5GW-2yoSDoIqoQzSsjtkyg9uY,1828
nemo_curator/scripts/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/scripts/add_id.py,sha256=ADszuA5uPyD7hRK_rzxuzINDq1eOvDJR0j9ZZbPiGKU,4821
nemo_curator/scripts/blend_datasets.py,sha256=rrVxe1z2MY3XmKcRrdJ5ciNfRl6ZW6inbWqLEiiHlew,4496
nemo_curator/scripts/download_and_extract.py,sha256=3J8GY4uWmE0cFXdyNDwV6A-YnZAAiaOBIsLytgme5ss,5783
nemo_curator/scripts/filter_documents.py,sha256=uaM-E3OCMF2byEyKa63W_aehh-tPY3LCIQQJdZcZuio,11976
nemo_curator/scripts/find_exact_duplicates.py,sha256=DTJYUFw8zV1aTddf-Ti1YbZ07ATAPyES2XbCOXq-Nv0,4084
nemo_curator/scripts/find_matching_ngrams.py,sha256=klNBvj73fiuYOitKltJoNwR4svYLdBHL87viDfrf3OY,3848
nemo_curator/scripts/find_pii_and_deidentify.py,sha256=YwYMF85lb55xVLnI8xmcdxelA1pqguGu-WqwwvUGu8s,6361
nemo_curator/scripts/get_common_crawl_urls.py,sha256=cIvNyshCzxh0IkYVNU53mKZxN91RPIVah5QefJYFuJY,3468
nemo_curator/scripts/get_wikipedia_urls.py,sha256=TnDUNu02JXRC_jSbkmQO8ewUZGxabCWTfAf0vxgAHyU,1845
nemo_curator/scripts/make_data_shards.py,sha256=n2zve7CyUJK0dExYtaX-Qt6iR132GVrgDIy5ToLR-Hw,2670
nemo_curator/scripts/prepare_fasttext_training_data.py,sha256=b1drIo-HkYJhlArurLuiYz04sEQuBD2__cPRihkNcV0,4462
nemo_curator/scripts/prepare_task_data.py,sha256=ttJm2ozzoOX1-y5_TuDSaJKX1BiAwC0WZIpzUAqH368,3010
nemo_curator/scripts/remove_matching_ngrams.py,sha256=nuXCEilbINZft6oKMHcDGDgdjeIUz9S7fTR3uiQYNa4,6136
nemo_curator/scripts/separate_by_metadata.py,sha256=iv5bX4W_BwCbjNmPGM-s12SO381GcxM6uS7SyNnRXJ4,4327
nemo_curator/scripts/text_cleaning.py,sha256=VncuQw-fwDsHmdFi6GPiHBpvEr8sKWhQRsTPERN6Bj0,3835
nemo_curator/scripts/train_fasttext.py,sha256=gXbI_XjoKSSv8zqOpjwADyccVoR8BG_qbxFIKEwsZNo,6467
nemo_curator/scripts/fuzzy_deduplication/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
nemo_curator/scripts/fuzzy_deduplication/compute_minhashes.py,sha256=Sv4bDx5-CU3Zj9oiTemKhpD4sRz1GSHsPwYRqMaTDPw,5432
nemo_curator/scripts/fuzzy_deduplication/connected_components.py,sha256=49jy87i9dDzOWXBXeHgL5RG41pcPMK3bO-5VgOsitKA,2784
nemo_curator/scripts/fuzzy_deduplication/jaccard_compute.py,sha256=GuUmLxDp1WLjeF-HWNlaDNmkjk70LaQAFa52aB0NYgY,2818
nemo_curator/scripts/fuzzy_deduplication/jaccard_shuffle.py,sha256=XnZrHHIpb-5erEL77AswO38KYvkPu25zWD_CXceGIoU,4150
nemo_curator/scripts/fuzzy_deduplication/map_buckets.py,sha256=DObhkNH_9b_8xvA-RGb1mTlI0uyYiJpdgz43gV_mmp8,5675
nemo_curator/scripts/fuzzy_deduplication/minhash_lsh.py,sha256=Ya0uDz0ORZhoDTYBK_ZwS_XbeZ2Vmk2rB5r76qTM3Iw,3917
nemo_curator/tasks/__init__.py,sha256=IfaRyarhNVd7icbFyiT_fBnWoY38iCcIqZoFR019Wis,1499
nemo_curator/tasks/downstream_task.py,sha256=krVCvrbuZy9ToNyImYXpxUl03hBCnPVlWcrOXWvCwPY,1968
nemo_curator/tasks/metrics.py,sha256=MgBEr9mC9KEeDRfmW32_k4a06f6ljWveyC9LBLtAG0Y,18930
nemo_curator/utils/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/config_utils.py,sha256=0_Uy16dEXqqTeqiQsBCRzD8E_wzMYOYvz0_jkC-ZGhU,3598
nemo_curator/utils/constants.py,sha256=Szr4RTF8hFFkTqz5dhh_IbTReZfBXFLlhyokggQ_bvI,3296
nemo_curator/utils/decorators.py,sha256=t0gGx4HhyEJntCk5XAAaoePmoahSyTNjhqtEpGeeHrY,864
nemo_curator/utils/distributed_utils.py,sha256=FpxRBhga1gQGpxDywxUfNnKlaqEqHwRiNm1IW5ritzY,17735
nemo_curator/utils/download_utils.py,sha256=9s5I8QbntWFzsH5IkqV-uFjK9C1RqdCcbut7aEr76Ms,7156
nemo_curator/utils/file_utils.py,sha256=6L8dQtrVRXgROE0wPpCGI3ox1ESNdg4AMuYBMxH59-w,7957
nemo_curator/utils/gpu_utils.py,sha256=n--us0SxBXsor3vgbb7GYa8g3JQnt-oMZ2re-tM11fI,1113
nemo_curator/utils/import_utils.py,sha256=wfbo1yAr7fM6NLinCI4eihMCTe8rlZbVvUCflxWuNPc,11865
nemo_curator/utils/module_utils.py,sha256=aaTOspYbMLpGby-gd-wcVKhVv_eBbtYpPd4mjMiRgSM,779
nemo_curator/utils/script_utils.py,sha256=H8jiE1aNuUMr0TsKw36W7QkYFIJQDOjOZWcH_SHuZb0,7502
nemo_curator/utils/text_utils.py,sha256=OMFateEXEDtlsIrZov7J_6CXYI00M0AAfXs_m-wxiM0,5920
nemo_curator/utils/fuzzy_dedup_utils/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
nemo_curator/utils/fuzzy_dedup_utils/id_mapping.py,sha256=05MwMpEDBycgB9rESP5yIZSbZDqAW4tb3t2zJLKSEtM,1862
nemo_curator/utils/fuzzy_dedup_utils/io_utils.py,sha256=4QW-xktxkwHZrTLyRgD3zVSY5W9SkTem1G08MmFnzzI,5968
nemo_curator/utils/fuzzy_dedup_utils/merge_utils.py,sha256=PQpt26iyJQhBiBdjA1n3fVMM-HYxJrU1YeTGucInxUA,7800
nemo_curator/utils/fuzzy_dedup_utils/output_map_utils.py,sha256=XzhhT-dP885ZQW5Y37jCvMz4D0DKS_Lkg7V3UqQaoEg,2588
nemo_curator/utils/fuzzy_dedup_utils/shuffle_utils.py,sha256=u8YM-8RD8X7y_ZDVWbA2nm_HgZ6i3rug46dxpDv1RCo,5091
tests/__init__.py,sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ,610
tests/test_add_id.py,sha256=54wB9j5C79YGEuoMUBNW4e4gHHAwXNLJr4fET1-IpDk,4987
tests/test_blend_datasets.py,sha256=X4Xu9jTroU0dbu4WFkvHeVqePxj_ncASJZfLpIulyWA,3878
tests/test_config.py,sha256=5sx7G-ATi0JJ35UeLWWlOLIFHibuvpb3trpMC40GV5o,2716
tests/test_download.py,sha256=BUJOeHj83KkACfTudiX7hFXlfvw7Ta13y0UeaGEc-qg,218
tests/test_exact_dedup.py,sha256=hBLdCU7qBUxaePygjtT4QhsxK5Pcyf8x5b6mIBr3U_Y,1900
tests/test_filters.py,sha256=R-CTSCsFx8ZZlFLbGcv9K_wjT2ilgBfVwk_cnq1R2og,30378
tests/test_fuzzy_dedup.py,sha256=gLn8boiPfK17jzTQd5uhogYMXkfl__W1OlC8iEAXA_4,15154
tests/test_pii_accuracy.py,sha256=vsjKth8M73To0UkMJvhR0WpE6jsZLMZH4zsyXrHa9y0,6376
tests/test_shuffle.py,sha256=JaX3Psby4bEIe0-BNOCDvnrHP9rnXk2EbC-MY5x6hQo,7389
tests/test_task_decontamination.py,sha256=RFj0x7rkl_pQEGf3RI4KVB4T4gADjX4Xf3FOBHJnCyA,12881
tests/test_unicode_reformatter.py,sha256=iALTzl-G1ggy9ZjiDWqhw9ub1x3DqZCQro4S1UVR0YA,2082
invisible_rabbit-0.3.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
invisible_rabbit-0.3.0.dist-info/METADATA,sha256=4oNX5Fxi8O0IuHrFQJI_WAX7OiSK-JQzORf5Y_xihEk,11121
invisible_rabbit-0.3.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
invisible_rabbit-0.3.0.dist-info/entry_points.txt,sha256=V4_ZOwkDIKiPQat8vD_e5Q8lHuxyXBBz1t9Oqm4lZzY,2441
invisible_rabbit-0.3.0.dist-info/top_level.txt,sha256=NZpMgId9Qc8gKXBh5ITLq99W4VsNI3MN1cG7f0hm_n0,19
invisible_rabbit-0.3.0.dist-info/RECORD,,
