.gitignore
.readthedocs.yml
CONTRIBUTING.md
DEVELOPMENT.md
LICENSE
MANIFEST.in
Makefile
README.md
make.bat
pyproject.toml
soda_logo.png
uv.lock
.github/workflows/ci.yml
.github/workflows/docs.yml
.github/workflows/pylint.yml
.github/workflows/python-publish.yml
addons/__init__.py
addons/firmbackbone_extractor.py
docs/DOCUMENTATION.md
docs/scripts/sync_featured_notebook.py
docs/source/conf.py
docs/source/contact.rst
docs/source/contribute.rst
docs/source/example_scraper_extractor.ipynb
docs/source/examples.rst
docs/source/index.rst
docs/source/installation.rst
docs/source/modules.rst
docs/source/userguide.rst
docs/source/websweep.consolidator.rst
docs/source/websweep.crawler.rst
docs/source/websweep.extractor.rst
docs/source/websweep.rst
docs/source/websweep.utils.rst
docs/source/_static/pipeline_workflow.svg
docs/source/_static/soda_logo.png
docs/source/_templates/contents.html
examples/example_scraper_extractor.ipynb
src/websweep/__init__.py
src/websweep/__main__.py
src/websweep/config.py
src/websweep/main.py
src/websweep.egg-info/PKG-INFO
src/websweep.egg-info/SOURCES.txt
src/websweep.egg-info/dependency_links.txt
src/websweep.egg-info/entry_points.txt
src/websweep.egg-info/requires.txt
src/websweep.egg-info/top_level.txt
src/websweep/consolidator/__init__.py
src/websweep/consolidator/consolidator.py
src/websweep/crawler/__init__.py
src/websweep/crawler/crawler.py
src/websweep/extractor/__init__.py
src/websweep/extractor/add_host.py
src/websweep/extractor/extractor.py
src/websweep/utils/__init__.py
src/websweep/utils/backend.py
src/websweep/utils/default_regex.json
src/websweep/utils/json_io.py
src/websweep/utils/public_suffix.py
src/websweep/utils/public_suffix_list.dat
src/websweep/utils/source_urls.py
src/websweep/utils/utils.py
tests/__init__.py
tests/conftest.py
tests/test.py
tests/test_config.py
tests/test_consolidator.py
tests/test_cpscraper.py
tests/test_crawler_backends.py
tests/test_crawler_complement.py
tests/test_crawler_duplicate_domains.py
tests/test_crawler_input_normalization.py
tests/test_extractor.py
tests/test_main_backend_selection.py
tests/test_main_crawled_data_guard.py
tests/test_main_extractor_addon_loader.py
tests/test_public_suffix.py
tests/test_source_urls.py
tests/test_url_classification.py
tests/units.txt
tests/assets/crawled_data/aaschroefpalen.nl.zip