.gitignore
.pre-commit-config.yaml
CLAUDE.md
LICENSE
MANIFEST.in
README.md
mkdocs.yml
noxfile.py
pyproject.toml
sample-screen.png
uv.lock
.github/workflows/ci.yml
.github/workflows/docs.yml
.github/workflows/nightly-tutorials.yml
benchmark/__init__.py
benchmark/__main__.py
benchmark/cache.py
benchmark/config.py
benchmark/progress.py
benchmark/providers.py
benchmark/report_generator.py
benchmark/runner.py
benchmark/schemas.py
benchmark/utils.py
benchmark/configs/__init__.py
benchmark/configs/arkansas_state.py
benchmark/configs/atlanta_schools.py
benchmark/configs/guides_expenses.py
benchmark/configs/hebrew_table.py
benchmark/configs/oklahoma_license.py
benchmark/configs/pennsylvania_election.py
benchmark/configs/practice_01.py
benchmark/configs/prompts.py
datasets/__init__.py
datasets/checkbox/__init__.py
datasets/checkbox/eval_ffdnet_sahi.py
datasets/checkbox/pipeline/__init__.py
datasets/checkbox/pipeline/__main__.py
datasets/checkbox/pipeline/annotate.py
datasets/checkbox/pipeline/build_coco.py
datasets/checkbox/pipeline/config.py
datasets/checkbox/pipeline/download.py
datasets/checkbox/pipeline/prefilter.py
datasets/checkbox/pipeline/run.py
datasets/checkbox/pipeline/search.py
datasets/checkbox/pipeline/state.py
datasets/checkbox/pipeline/structural_filter.py
datasets/checkbox/pipeline/train.py
datasets/checkbox/pipeline/validate.py
datasets/checkbox/pipeline/visualize.py
datasets/checkbox/tools/build_yolo_dataset.py
datasets/checkbox/tools/curate_templates.py
datasets/checkbox/tools/download_commonforms.py
datasets/checkbox/tools/extract_check_templates.py
datasets/checkbox/tools/runpod_status.py
datasets/checkbox/tools/synthesize_checks.py
datasets/checkbox/tools/train.py
datasets/checkbox/tools/train_cloud.py
docs/index.md
docs/llms.txt
docs/api/index.md
docs/assets/favicon.png
docs/assets/favicon.svg
docs/assets/logo.svg
docs/assets/sample-screen.png
docs/assets/social-preview.png
docs/assets/social-preview.svg
docs/assets/javascripts/custom.js
docs/assets/stylesheets/custom.css
docs/cookbook/batch-processing.md
docs/cookbook/finding-sections.md
docs/cookbook/guides.md
docs/cookbook/label-value-extraction.md
docs/cookbook/messy-tables.md
docs/cookbook/multi-column-layouts.md
docs/cookbook/multipage-content.md
docs/cookbook/ocr-then-navigate.md
docs/cookbook/one-page-one-row.md
docs/cookbook/troubleshooting.md
docs/for-llms/common-patterns.md
docs/getting-started/choose-your-path.md
docs/getting-started/concepts.md
docs/getting-started/quickstart.md
docs/getting-started/selectors.md
docs/installation/index.md
docs/tutorials/01-loading-and-extraction.md
docs/tutorials/02-finding-elements.md
docs/tutorials/04-table-extraction.md
docs/tutorials/05-excluding-content.md
docs/tutorials/06-document-qa.md
docs/tutorials/07-layout-analysis.md
docs/tutorials/08-spatial-navigation.md
docs/tutorials/10-structured-extraction.md
docs/tutorials/12-ocr-integration.md
docs/tutorials/15-working-with-regions.md
docs/use-cases/idea-gallery.md
natural_pdf/__init__.py
natural_pdf/cli.py
natural_pdf/engine_provider.py
natural_pdf/exceptions.py
natural_pdf/judge.py
natural_pdf.egg-info/PKG-INFO
natural_pdf.egg-info/SOURCES.txt
natural_pdf.egg-info/dependency_links.txt
natural_pdf.egg-info/entry_points.txt
natural_pdf.egg-info/requires.txt
natural_pdf.egg-info/top_level.txt
natural_pdf/analyzers/__init__.py
natural_pdf/analyzers/shape_detection_mixin.py
natural_pdf/analyzers/text_options.py
natural_pdf/analyzers/text_structure.py
natural_pdf/analyzers/utils.py
natural_pdf/analyzers/checkbox/__init__.py
natural_pdf/analyzers/checkbox/base.py
natural_pdf/analyzers/checkbox/checkbox_analyzer.py
natural_pdf/analyzers/checkbox/checkbox_manager.py
natural_pdf/analyzers/checkbox/checkbox_options.py
natural_pdf/analyzers/checkbox/classifier.py
natural_pdf/analyzers/checkbox/default_detector.py
natural_pdf/analyzers/checkbox/onnx_engine.py
natural_pdf/analyzers/checkbox/vector.py
natural_pdf/analyzers/checkbox/vlm_detector.py
natural_pdf/analyzers/guides/__init__.py
natural_pdf/analyzers/guides/base.py
natural_pdf/analyzers/guides/flow_adapter.py
natural_pdf/analyzers/guides/grid_helpers.py
natural_pdf/analyzers/guides/helpers.py
natural_pdf/analyzers/guides/separators.py
natural_pdf/analyzers/guides/text_detect.py
natural_pdf/analyzers/layout/__init__.py
natural_pdf/analyzers/layout/base.py
natural_pdf/analyzers/layout/layout_analyzer.py
natural_pdf/analyzers/layout/layout_manager.py
natural_pdf/analyzers/layout/layout_options.py
natural_pdf/analyzers/layout/paddle.py
natural_pdf/analyzers/layout/pdfplumber_table_finder.py
natural_pdf/analyzers/layout/surya.py
natural_pdf/analyzers/layout/table_structure_utils.py
natural_pdf/analyzers/layout/tatr.py
natural_pdf/analyzers/layout/vlm.py
natural_pdf/analyzers/layout/yolo.py
natural_pdf/classification/__init__.py
natural_pdf/classification/accessors.py
natural_pdf/classification/classification_provider.py
natural_pdf/classification/pipelines.py
natural_pdf/classification/results.py
natural_pdf/collections/mixins.py
natural_pdf/core/__init__.py
natural_pdf/core/capabilities.py
natural_pdf/core/context.py
natural_pdf/core/crop_utils.py
natural_pdf/core/decoration_detector.py
natural_pdf/core/element_loader.py
natural_pdf/core/element_manager.py
natural_pdf/core/element_store.py
natural_pdf/core/exclusion_mixin.py
natural_pdf/core/geometry_mixin.py
natural_pdf/core/highlighter_utils.py
natural_pdf/core/highlighting_service.py
natural_pdf/core/interfaces.py
natural_pdf/core/mixins.py
natural_pdf/core/ocr_converter.py
natural_pdf/core/page.py
natural_pdf/core/page_collection.py
natural_pdf/core/page_groupby.py
natural_pdf/core/pdf.py
natural_pdf/core/pdf_collection.py
natural_pdf/core/qa_mixin.py
natural_pdf/core/render_spec.py
natural_pdf/core/selector_utils.py
natural_pdf/core/vlm_client.py
natural_pdf/core/vlm_prompts.py
natural_pdf/core/word_engine.py
natural_pdf/describe/__init__.py
natural_pdf/describe/base.py
natural_pdf/describe/elements.py
natural_pdf/describe/summary.py
natural_pdf/deskew/__init__.py
natural_pdf/deskew/deskew_provider.py
natural_pdf/elements/__init__.py
natural_pdf/elements/base.py
natural_pdf/elements/element_collection.py
natural_pdf/elements/image.py
natural_pdf/elements/line.py
natural_pdf/elements/rect.py
natural_pdf/elements/region.py
natural_pdf/elements/text.py
natural_pdf/elements/mixins/classification_batch_mixin.py
natural_pdf/engine_registry/__init__.py
natural_pdf/engine_registry/base.py
natural_pdf/engine_registry/checkbox.py
natural_pdf/engine_registry/classification.py
natural_pdf/engine_registry/deskew.py
natural_pdf/engine_registry/guides.py
natural_pdf/engine_registry/layout.py
natural_pdf/engine_registry/ocr.py
natural_pdf/engine_registry/selectors.py
natural_pdf/engine_registry/tables.py
natural_pdf/export/mixin.py
natural_pdf/exporters/__init__.py
natural_pdf/exporters/annotated_pdf.py
natural_pdf/exporters/hocr.py
natural_pdf/exporters/hocr_font.py
natural_pdf/exporters/original_pdf.py
natural_pdf/exporters/region_pdf.py
natural_pdf/exporters/searchable_pdf.py
natural_pdf/exporters/training_data.py
natural_pdf/exporters/data/__init__.py
natural_pdf/exporters/data/pdf.ttf
natural_pdf/exporters/data/sRGB.icc
natural_pdf/extraction/citations.py
natural_pdf/extraction/json_parser.py
natural_pdf/extraction/result.py
natural_pdf/extraction/structured_ops.py
natural_pdf/extraction/vlm_adapter.py
natural_pdf/flows/__init__.py
natural_pdf/flows/_utils.py
natural_pdf/flows/collections.py
natural_pdf/flows/element.py
natural_pdf/flows/flow.py
natural_pdf/flows/region.py
natural_pdf/guides/__init__.py
natural_pdf/guides/guides_provider.py
natural_pdf/guides/engines/content.py
natural_pdf/guides/engines/headers.py
natural_pdf/guides/engines/lines.py
natural_pdf/guides/engines/stripes.py
natural_pdf/guides/engines/whitespace.py
natural_pdf/ocr/__init__.py
natural_pdf/ocr/engine.py
natural_pdf/ocr/engine_doctr.py
natural_pdf/ocr/engine_easyocr.py
natural_pdf/ocr/engine_paddle.py
natural_pdf/ocr/engine_paddleocr_vl.py
natural_pdf/ocr/engine_rapidocr.py
natural_pdf/ocr/engine_surya.py
natural_pdf/ocr/ocr_factory.py
natural_pdf/ocr/ocr_manager.py
natural_pdf/ocr/ocr_options.py
natural_pdf/ocr/ocr_provider.py
natural_pdf/ocr/utils.py
natural_pdf/ocr/vlm_ocr.py
natural_pdf/qa/__init__.py
natural_pdf/qa/document_qa.py
natural_pdf/search/__init__.py
natural_pdf/search/search_service.py
natural_pdf/selectors/__init__.py
natural_pdf/selectors/_clauses.py
natural_pdf/selectors/host_mixin.py
natural_pdf/selectors/parser.py
natural_pdf/selectors/registry.py
natural_pdf/selectors/selector_provider.py
natural_pdf/services/__init__.py
natural_pdf/services/base.py
natural_pdf/services/checkbox_service.py
natural_pdf/services/classification_service.py
natural_pdf/services/conversion_service.py
natural_pdf/services/describe_service.py
natural_pdf/services/exclusion_service.py
natural_pdf/services/extraction_service.py
natural_pdf/services/guides_service.py
natural_pdf/services/layout_service.py
natural_pdf/services/navigation_service.py
natural_pdf/services/ocr_service.py
natural_pdf/services/qa_service.py
natural_pdf/services/registry.py
natural_pdf/services/rendering_service.py
natural_pdf/services/selector_service.py
natural_pdf/services/shape_detection_service.py
natural_pdf/services/table_service.py
natural_pdf/services/text_service.py
natural_pdf/tables/__init__.py
natural_pdf/tables/result.py
natural_pdf/tables/structure_provider.py
natural_pdf/tables/table_provider.py
natural_pdf/tables/engines/pdfplumber.py
natural_pdf/tables/engines/tatr.py
natural_pdf/tables/engines/text.py
natural_pdf/tables/structure_engines/__init__.py
natural_pdf/tables/structure_engines/tatr.py
natural_pdf/tables/utils/__init__.py
natural_pdf/tables/utils/cells.py
natural_pdf/tables/utils/common.py
natural_pdf/tables/utils/guides.py
natural_pdf/tables/utils/plumber.py
natural_pdf/templates/__init__.py
natural_pdf/text/font_style.py
natural_pdf/text/operations.py
natural_pdf/utils/__init__.py
natural_pdf/utils/bidi_mirror.py
natural_pdf/utils/color_utils.py
natural_pdf/utils/identifiers.py
natural_pdf/utils/layout.py
natural_pdf/utils/locks.py
natural_pdf/utils/option_validation.py
natural_pdf/utils/optional_imports.py
natural_pdf/utils/packaging.py
natural_pdf/utils/page_context.py
natural_pdf/utils/pdfminer_patches.py
natural_pdf/utils/sections.py
natural_pdf/utils/spatial.py
natural_pdf/utils/visualization.py
natural_pdf/widgets/__init__.py
natural_pdf/widgets/checkbox_annotator.py
natural_pdf/widgets/viewer.py
scripts/01-execute_notebooks.py
scripts/create_cookbook_pdfs.py
scripts/pdf-extraction-task-catalog.md
scripts/publish.sh
scripts/doc_coverage/README.md
scripts/doc_coverage/__init__.py
scripts/doc_coverage/__main__.py
scripts/doc_coverage/cli.py
scripts/doc_coverage/analyzers/__init__.py
scripts/doc_coverage/analyzers/api_catalog.py
scripts/doc_coverage/analyzers/ast_walker.py
scripts/doc_coverage/analyzers/matcher.py
scripts/doc_coverage/extractors/__init__.py
scripts/doc_coverage/extractors/base.py
scripts/doc_coverage/extractors/markdown.py
scripts/doc_coverage/extractors/notebook.py
scripts/doc_coverage/reporters/__init__.py
scripts/doc_coverage/reporters/html_report.py
scripts/doc_coverage/reporters/json_report.py
scripts/doc_coverage/reporters/terminal.py
temp/test_vlm_ocr_openai.py
tests/__init__.py
tests/conftest.py
tests/demo_multipage.py
tests/test_aggregate_selectors.py
tests/test_alt_text.py
tests/test_annotate.py
tests/test_arabic_performance.py
tests/test_arabic_real_world.py
tests/test_ask_extract_wrapper.py
tests/test_attr_method.py
tests/test_auto_multipage_option.py
tests/test_below_from_anchor.py
tests/test_checkbox_reimpl.py
tests/test_citations.py
tests/test_classification_provider.py
tests/test_classification_results.py
tests/test_closest_substring_sorting.py
tests/test_closest_until.py
tests/test_closest_until_comparison.py
tests/test_closest_until_debug.py
tests/test_closest_until_fix.py
tests/test_closest_until_ordering.py
tests/test_color_conversion.py
tests/test_color_hex_display.py
tests/test_confidence.py
tests/test_conversion_service.py
tests/test_crop_enhancements.py
tests/test_crop_region_highlights.py
tests/test_describe_colors.py
tests/test_deskew_provider.py
tests/test_directional_boundary_precision.py
tests/test_directional_defaults.py
tests/test_dissolve.py
tests/test_dissolve_cross_page_bug.py
tests/test_dissolve_debug_issue.py
tests/test_dissolve_real_world_issue.py
tests/test_dissolve_single_elements.py
tests/test_dissolve_vertical_offset_issue.py
tests/test_doc_coverage.py
tests/test_document_qa.py
tests/test_element_addition.py
tests/test_element_collection_guides.py
tests/test_element_collection_show_cols.py
tests/test_element_collection_slicing.py
tests/test_element_exclusions.py
tests/test_element_show_crop_highlights.py
tests/test_empty_pseudo_class.py
tests/test_endpoint_property.py
tests/test_engine_provider.py
tests/test_engine_registry.py
tests/test_exclude_multi_page.py
tests/test_exclude_real_pdf.py
tests/test_exclusion_recursion_fix.py
tests/test_exclusions.py
tests/test_expand.py
tests/test_expand_enhanced.py
tests/test_export_training_data.py
tests/test_extensions.py
tests/test_extract_text_words.py
tests/test_extraction_error.py
tests/test_extraction_text_and_vision.py
tests/test_extraction_working.py
tests/test_first_last_selectors.py
tests/test_fix_get_sections_zero_height.py
tests/test_flow_analysis.py
tests/test_flow_cleanup.py
tests/test_flow_region_directional.py
tests/test_flowregion_ocr_exclusions.py
tests/test_flowregion_tables.py
tests/test_from_images.py
tests/test_from_parameter.py
tests/test_from_parameter_example.py
tests/test_from_self_exclusion.py
tests/test_get_sections_fix_comprehensive.py
tests/test_get_sections_zero_height.py
tests/test_groupby.py
tests/test_guides.py
tests/test_guides_apply_exclusions.py
tests/test_guides_apply_exclusions_simple.py
tests/test_guides_boundaries.py
tests/test_guides_extract_table.py
tests/test_guides_extract_table_collections.py
tests/test_guides_extract_table_exclusions.py
tests/test_guides_extract_table_real.py
tests/test_guides_flow_adapter.py
tests/test_guides_from_headers.py
tests/test_guides_from_headers_strings.py
tests/test_guides_from_stripes.py
tests/test_guides_grid_helpers.py
tests/test_guides_integration.py
tests/test_guides_marker_sorting.py
tests/test_guides_minima_detection.py
tests/test_guides_partial.py
tests/test_guides_provider.py
tests/test_guides_service.py
tests/test_guides_snap_to_whitespace.py
tests/test_guides_text_detect.py
tests/test_highlight_color_falsy.py
tests/test_highlight_detection.py
tests/test_highlight_detection_comprehensive.py
tests/test_highlight_offset.py
tests/test_highlight_protocol.py
tests/test_highlight_protocol_simple.py
tests/test_highlight_regions.py
tests/test_horizontal_guides_alignment.py
tests/test_include_boundaries_comprehensive.py
tests/test_include_boundaries_final.py
tests/test_include_boundaries_final_verification.py
tests/test_include_boundaries_fix.py
tests/test_include_boundaries_mock.py
tests/test_include_boundaries_simple.py
tests/test_include_boundaries_types_pdf.py
tests/test_include_boundaries_verification.py
tests/test_include_boundaries_with_real_text.py
tests/test_json_parser.py
tests/test_language_hints.py
tests/test_layout_bugs.py
tests/test_layout_service.py
tests/test_loading_original.py
tests/test_local_provider.py
tests/test_map_method.py
tests/test_markdown_docs.py
tests/test_merge_connected.py
tests/test_merge_connected_real_world.py
tests/test_merge_method.py
tests/test_merged_flowregion_specs.py
tests/test_mixed_collection_rendering.py
tests/test_multi_page_table_discovery.py
tests/test_multipage_directional.py
tests/test_negative_bounds_pdf.py
tests/test_ocr_provider_integration.py
tests/test_option_validation.py
tests/test_optional_deps.py
tests/test_optional_imports_config.py
tests/test_paddle_language_normalization.py
tests/test_paddleocr_vl.py
tests/test_page_collection_qa.py
tests/test_page_exclusion_lists.py
tests/test_page_extract_table_outer.py
tests/test_pdf_add_exclusion_elementcollection.py
tests/test_pdf_collection_selectors.py
tests/test_pdf_exclusions_in_find_methods.py
tests/test_pdf_qa_service.py
tests/test_pdfminer_bug_status.py
tests/test_pdfminer_color_bug.py
tests/test_pdfminer_color_stack_bug.py
tests/test_rapidocr_engine.py
tests/test_region_pdf_export.py
tests/test_region_show_crop_highlights.py
tests/test_region_text_kwargs.py
tests/test_region_viewer.py
tests/test_rotate_views.py
tests/test_search.py
tests/test_search_refactor.py
tests/test_sections_collection_mixin.py
tests/test_sections_end_only.py
tests/test_sections_with_start_and_end.py
tests/test_selector_aggregates_unit.py
tests/test_selector_cache.py
tests/test_selector_expressions.py
tests/test_selector_provider.py
tests/test_show_column_layout.py
tests/test_show_edge_cases.py
tests/test_show_exclusions.py
tests/test_show_exclusions_feature.py
tests/test_show_limit.py
tests/test_skip_repeating_headers_multipage.py
tests/test_slice_cache_reuse.py
tests/test_slice_exclusion_fix.py
tests/test_slice_exclusion_issue.py
tests/test_slice_exclusion_mock.py
tests/test_sliced_collection_exclusions.py
tests/test_smart_exclusion.py
tests/test_space_injection.py
tests/test_spatial_offset.py
tests/test_sprint1_fixes.py
tests/test_sprint2_fixes.py
tests/test_sprint3_fixes.py
tests/test_strikethrough_detection.py
tests/test_structure_provider.py
tests/test_structured_ops_tiers.py
tests/test_table_result_header_mismatch.py
tests/test_table_result_keep_blank.py
tests/test_table_result_to_df_extras.py
tests/test_table_structure_usage.py
tests/test_tables_integration.py
tests/test_tables_provider.py
tests/test_text_closest_selector.py
tests/test_text_tolerance.py
tests/test_text_tolerance_regression.py
tests/test_tiny_text_tables.py
tests/test_tiny_text_tables_table.py
tests/test_trim_sparse_content.py
tests/test_tutorials.py
tests/test_underline_detection.py
tests/test_unique_method.py
tests/test_update_text.py
tests/test_vlm_adapter.py
tests/test_vlm_client.py
tests/test_vlm_ocr.py
tests/test_within_constraint.py
tests/test_words_vs_find_all_text.py
tests/test_words_vs_find_all_text_summary.md
tests/realpdf/__init__.py
tests/realpdf/snapshot_utils.py
tests/realpdf/test_element_manager_compat.py
tests/realpdf/test_element_manager_snapshots.py
tests/realpdf/snapshots/element_manager_snapshots.json
tests/test_core/test_containment_geometry.py
tests/test_core/test_elements.py
tests/test_core/test_loading.py
tests/test_core/test_spatial.py
tests/test_core/test_text_extraction.py
tests/test_core/test_text_layer.py
typings/ipywidgets/__init__.pyi
typings/scipy/__init__.pyi
typings/scipy/ndimage/__init__.pyi
typings/scipy/signal/__init__.pyi
typings/sklearn/__init__.pyi
typings/sklearn/cluster/__init__.pyi