MANIFEST.in
README.md
pyproject.toml
requirements.txt
setup.py
clean_text_my/__init__.py
clean_text_my/deduplication.py
clean_text_my/download_dataset.py
clean_text_my/logging.py
clean_text_my/postprocessing.py
clean_text_my/utils.py
clean_text_my.egg-info/PKG-INFO
clean_text_my.egg-info/SOURCES.txt
clean_text_my.egg-info/dependency_links.txt
clean_text_my.egg-info/requires.txt
clean_text_my.egg-info/top_level.txt
clean_text_my/text_dedup/__init__.py
clean_text_my/text_dedup/minhash.py
clean_text_my/text_dedup/utils/__init__.py
clean_text_my/text_dedup/utils/add_args.py
clean_text_my/text_dedup/utils/analysis.py
clean_text_my/text_dedup/utils/hashfunc.py
clean_text_my/text_dedup/utils/preprocess.py
clean_text_my/text_dedup/utils/timer.py
clean_text_my/text_dedup/utils/tokenization.py
clean_text_my/text_dedup/utils/union_find.py