.coveragerc
.git-blame-ignore-revs
.gitignore
.pre-commit-config.yaml
CHANGES.rst
CODE_OF_CONDUCT.md
CONTRIBUTING.rst
LICENSE.txt
README.rst
RELEASE_PROCESS.rst
codecov.yml
pixi.lock
pyproject.toml
.binder/postBuild
.binder/requirements.txt
.binder/runtime.txt
.circleci/config.yml
.github/dependabot.yml
.github/ISSUE_TEMPLATE/bug_report.yml
.github/ISSUE_TEMPLATE/config.yml
.github/ISSUE_TEMPLATE/doc_improvement.yml
.github/ISSUE_TEMPLATE/feature_request.yml
.github/workflows/changelog.yml
.github/workflows/main.yml
.github/workflows/run-code-format-checks.yaml
.github/workflows/test-javascript.yml
.github/workflows/testing.yml
.github/workflows/update_pixi_lock_files.yml
benchmarks/README.md
benchmarks/__init__.py
benchmarks/bench_fuzzy_join_count_vs_hash.py
benchmarks/bench_fuzzy_join_sparse_vs_dense.py
benchmarks/bench_fuzzy_join_vs_others.py
benchmarks/bench_gap_divergence.py
benchmarks/bench_gap_encoder_hp.py
benchmarks/bench_gap_es_score.py
benchmarks/bench_minhash_batch_number.py
benchmarks/bench_tablevectorizer_tuning.py
benchmarks/run_on_openml_datasets.py
benchmarks/results/README.md
benchmarks/results/bench_fuzzy_join_count_vs_hash-20230606.parquet
benchmarks/results/bench_fuzzy_join_sparse_vs_dense-20230609.parquet
benchmarks/results/bench_fuzzy_join_vs_others-20230607.parquet
benchmarks/results/bench_gap_divergence-20230701.parquet
benchmarks/results/bench_minhash_batch_number-20230608.parquet
benchmarks/results/bench_tablevectorizer_tuning-20230609.parquet
benchmarks/results/gap_encoder_benchmark_es_score-20230731.parquet
benchmarks/results/gap_encoder_benchmark_hp-20230731.parquet
benchmarks/utils/__init__.py
benchmarks/utils/_argparser.py
benchmarks/utils/_various.py
benchmarks/utils/join.py
benchmarks/utils/monitor.py
build_tools/circle/build_doc.sh
build_tools/circle/checkout_merge_commit.sh
build_tools/circle/push_doc.sh
doc/Makefile
doc/about.rst
doc/announcement.html
doc/assembling.rst
doc/cleaning.rst
doc/conf.py
doc/development.rst
doc/documentation.rst
doc/encoding.rst
doc/end_to_end_pipeline.rst
doc/index.rst
doc/install.rst
doc/jupyter-lite.json
doc/skrub.png
doc/skrub.svg
doc/skrub_unzoomed.png
doc/unlisted.rst
doc/version.json
doc/vision.rst
doc/_static/skrub-cover.svg
doc/_static/skrub.svg
doc/_static/skrub_pipeline.svg
doc/_static/css/custom.css
doc/_templates/class.rst
doc/_templates/function.rst
doc/_templates/index.html
doc/_templates/numpydoc_docstring.rst
doc/binder/requirements.txt
doc/includes/big_toc_css.rst
doc/reference/building_a_pipeline.rst
doc/reference/cleaning_a_dataframe.rst
doc/reference/downloading_a_dataset.rst
doc/reference/encoding_a_column.rst
doc/reference/generating_a_report.rst
doc/reference/index.rst
doc/reference/joining_dataframes.rst
doc/sphinxext/MANIFEST.in
doc/sphinxext/github_link.py
doc/sphinxext/sphinx_issues.py
examples/00_getting_started.py
examples/01_encodings.py
examples/02_feature_interpretation_with_gapencoder.py
examples/03_datetime_encoder.py
examples/04_fuzzy_joining.py
examples/05_deduplication.py
examples/06_ken_embeddings.py
examples/07_multiple_key_join.py
examples/08_join_aggregation.py
examples/09_interpolation_join.py
examples/README.txt
examples/FIXME/07_grid_searching_with_the_tablevectorizer.py
skrub/VERSION.txt
skrub/__init__.py
skrub/_agg_joiner.py
skrub/_check_dependencies.py
skrub/_check_input.py
skrub/_clean_categories.py
skrub/_clean_null_strings.py
skrub/_datetime_encoder.py
skrub/_deduplicate.py
skrub/_dispatch.py
skrub/_fast_hash.py
skrub/_fuzzy_join.py
skrub/_gap_encoder.py
skrub/_interpolation_joiner.py
skrub/_join_utils.py
skrub/_joiner.py
skrub/_matching.py
skrub/_minhash_encoder.py
skrub/_multi_agg_joiner.py
skrub/_on_each_column.py
skrub/_on_subframe.py
skrub/_select_cols.py
skrub/_similarity_encoder.py
skrub/_string_distances.py
skrub/_table_vectorizer.py
skrub/_tabular_learner.py
skrub/_to_categorical.py
skrub/_to_datetime.py
skrub/_to_float32.py
skrub/_to_str.py
skrub/_utils.py
skrub/_wrap_transformer.py
skrub/conftest.py
skrub.egg-info/PKG-INFO
skrub.egg-info/SOURCES.txt
skrub.egg-info/dependency_links.txt
skrub.egg-info/requires.txt
skrub.egg-info/top_level.txt
skrub/_dataframe/__init__.py
skrub/_dataframe/_common.py
skrub/_dataframe/_namespace.py
skrub/_dataframe/_pandas.py
skrub/_dataframe/_polars.py
skrub/_dataframe/tests/__init__.py
skrub/_dataframe/tests/test_common.py
skrub/_dataframe/tests/test_namespace.py
skrub/_dataframe/tests/test_pandas.py
skrub/_dataframe/tests/test_polars.py
skrub/_reporting/README.rst
skrub/_reporting/__init__.py
skrub/_reporting/_associations.py
skrub/_reporting/_html.py
skrub/_reporting/_plotting.py
skrub/_reporting/_serve.py
skrub/_reporting/_summarize.py
skrub/_reporting/_table_report.py
skrub/_reporting/_utils.py
skrub/_reporting/_data/templates/.gitignore
skrub/_reporting/_data/templates/base.css
skrub/_reporting/_data/templates/buttons.html
skrub/_reporting/_data/templates/column-associations.html
skrub/_reporting/_data/templates/column-filter.html
skrub/_reporting/_data/templates/column-summaries.css
skrub/_reporting/_data/templates/column-summaries.html
skrub/_reporting/_data/templates/column-summary.html
skrub/_reporting/_data/templates/copybutton.css
skrub/_reporting/_data/templates/dataframe-sample.css
skrub/_reporting/_data/templates/dataframe-sample.html
skrub/_reporting/_data/templates/inline-report.css
skrub/_reporting/_data/templates/inline-report.html
skrub/_reporting/_data/templates/no-filter-matches.html
skrub/_reporting/_data/templates/report.css
skrub/_reporting/_data/templates/report.html
skrub/_reporting/_data/templates/report.js
skrub/_reporting/_data/templates/standalone-report.html
skrub/_reporting/_data/templates/table-bar.html
skrub/_reporting/_data/templates/table-part.html
skrub/_reporting/_data/templates/tabs.css
skrub/_reporting/_data/templates/tabs.html
skrub/_reporting/_data/templates/tooltip.css
skrub/_reporting/_data/templates/icons/LICENSE
skrub/_reporting/_data/templates/icons/README.md
skrub/_reporting/_data/templates/icons/check-lg.svg
skrub/_reporting/_data/templates/icons/clipboard.svg
skrub/_reporting/_data/templates/icons/x-lg.svg
skrub/_reporting/_data/templates/pure-3.0.0/LICENSE
skrub/_reporting/_data/templates/pure-3.0.0/base-min.css
skrub/_reporting/_data/templates/pure-3.0.0/tables-min.css
skrub/_reporting/js_tests/.gitignore
skrub/_reporting/js_tests/cypress.config.js
skrub/_reporting/js_tests/make-reports
skrub/_reporting/js_tests/package-lock.json
skrub/_reporting/js_tests/package.json
skrub/_reporting/js_tests/cypress/e2e/column-filter.cy.js
skrub/_reporting/js_tests/cypress/e2e/column-summaries.cy.js
skrub/_reporting/js_tests/cypress/e2e/copybutton.cy.js
skrub/_reporting/js_tests/cypress/e2e/dataframe-sample.cy.js
skrub/_reporting/js_tests/cypress/e2e/tabs.cy.js
skrub/_reporting/js_tests/cypress/fixtures/example.json
skrub/_reporting/js_tests/cypress/support/commands.js
skrub/_reporting/js_tests/cypress/support/e2e.js
skrub/_reporting/tests/conftest.py
skrub/_reporting/tests/test_serve.py
skrub/_reporting/tests/test_summarize.py
skrub/_reporting/tests/test_table_report.py
skrub/_reporting/tests/test_utils.py
skrub/_reporting/tests/data/air_quality_tiny.csv
skrub/_reporting/tests/data/air_quality_tiny.parquet
skrub/_selectors/__init__.py
skrub/_selectors/_base.py
skrub/_selectors/_selectors.py
skrub/_selectors/tests/test_base.py
skrub/_selectors/tests/test_selectors.py
skrub/datasets/__init__.py
skrub/datasets/_fetching.py
skrub/datasets/_generating.py
skrub/datasets/_ken_embeddings.py
skrub/datasets/_utils.py
skrub/datasets/tests/__init__.py
skrub/datasets/tests/test_fetching.py
skrub/datasets/tests/test_generating.py
skrub/datasets/tests/test_ken_embeddings.py
skrub/datasets/tests/test_utils.py
skrub/tests/__init__.py
skrub/tests/test_agg_joiner.py
skrub/tests/test_check_input.py
skrub/tests/test_clean_categories.py
skrub/tests/test_clean_null_strings.py
skrub/tests/test_datetime_encoder.py
skrub/tests/test_deduplicate.py
skrub/tests/test_dispatch.py
skrub/tests/test_docstrings.py
skrub/tests/test_fast_hash.py
skrub/tests/test_fuzzy_join.py
skrub/tests/test_gap_encoder.py
skrub/tests/test_interpolation_joiner.py
skrub/tests/test_join_utils.py
skrub/tests/test_joiner.py
skrub/tests/test_matching.py
skrub/tests/test_minhash_encoder.py
skrub/tests/test_multi_agg_joiner.py
skrub/tests/test_on_each_column.py
skrub/tests/test_on_subframe.py
skrub/tests/test_select_cols.py
skrub/tests/test_similarity_encoder.py
skrub/tests/test_sklearn.py
skrub/tests/test_string_distances.py
skrub/tests/test_table_vectorizer.py
skrub/tests/test_tabular_learner.py
skrub/tests/test_to_categorical.py
skrub/tests/test_to_datetime.py
skrub/tests/test_to_float32.py
skrub/tests/test_to_str.py
skrub/tests/test_utils.py
skrub/tests/test_wrap_transformer.py
skrub/tests/utils.py