README.md
pyproject.toml
setup.py
classifiers/__init__.py
classifiers/classifier_base.py
classifiers/classifier_simple.py
classifiers/classifier_strict_ollama.py
classifiers/classifier_strict_openai.py
classifiers/cls_prompts.py
classifiers/config.py
datasets/__init__.py
datasets/dataset_reader.py
datasets/web_page_examples.py
lang_detectors/__init__.py
lang_detectors/fasttext_custom.py
lang_detectors/iso-639-3.tab
lang_detectors/json_loader.py
lang_detectors/lang_detector.py
tests/__init__.py
utils/__init__.py
utils/cleaner.py
utils/html2md.py
utils/image_cache_handler.py
utils/io.py
utils/metrics.py
utils/prompts.py
utils/url2html.py
utils/vllm_image.py
web_page_cls.egg-info/PKG-INFO
web_page_cls.egg-info/SOURCES.txt
web_page_cls.egg-info/dependency_links.txt
web_page_cls.egg-info/requires.txt
web_page_cls.egg-info/top_level.txt