Metadata-Version: 2.1
Name: unstructured-ingest
Version: 0.0.3
Summary: A library that prepares raw documents for downstream ML tasks.
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
Author: Unstructured Technologies
Author-email: devops@unstructuredai.io
License: Apache-2.0
Keywords: NLP PDF HTML CV XML parsing preprocessing
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Education
Classifier: Intended Audience :: Science/Research
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
Requires-Python: >=3.9.0,<3.13
Description-Content-Type: text/markdown
Requires-Dist: unstructured
Requires-Dist: python-dateutil
Requires-Dist: pandas
Provides-Extra: airtable
Requires-Dist: pyairtable ; extra == 'airtable'
Provides-Extra: astra
Requires-Dist: astrapy ; extra == 'astra'
Provides-Extra: azure
Requires-Dist: fsspec ; extra == 'azure'
Requires-Dist: adlfs ; extra == 'azure'
Provides-Extra: azure-cognitive-search
Requires-Dist: azure-search-documents ; extra == 'azure-cognitive-search'
Provides-Extra: bedrock
Requires-Dist: boto3 ; extra == 'bedrock'
Requires-Dist: langchain-community ; extra == 'bedrock'
Provides-Extra: biomed
Requires-Dist: bs4 ; extra == 'biomed'
Provides-Extra: box
Requires-Dist: fsspec ; extra == 'box'
Requires-Dist: boxfs ; extra == 'box'
Provides-Extra: chroma
Requires-Dist: typer <=0.9.0 ; extra == 'chroma'
Requires-Dist: importlib-metadata >=7.1.0 ; extra == 'chroma'
Requires-Dist: chromadb ; extra == 'chroma'
Provides-Extra: clarifai
Requires-Dist: clarifai ; extra == 'clarifai'
Provides-Extra: confluence
Requires-Dist: atlassian-python-api ; extra == 'confluence'
Provides-Extra: csv
Requires-Dist: unstructured[tsv] ; extra == 'csv'
Provides-Extra: databricks-volumes
Requires-Dist: databricks-sdk ; extra == 'databricks-volumes'
Provides-Extra: delta-table
Requires-Dist: fsspec ; extra == 'delta-table'
Requires-Dist: deltalake ; extra == 'delta-table'
Provides-Extra: discord
Requires-Dist: discord-py ; extra == 'discord'
Provides-Extra: doc
Requires-Dist: unstructured[docx] ; extra == 'doc'
Provides-Extra: docx
Requires-Dist: unstructured[docx] ; extra == 'docx'
Provides-Extra: dropbox
Requires-Dist: dropboxdrivefs ; extra == 'dropbox'
Requires-Dist: fsspec ; extra == 'dropbox'
Provides-Extra: elasticsearch
Requires-Dist: elasticsearch[async] ; extra == 'elasticsearch'
Provides-Extra: embed-huggingface
Requires-Dist: sentence-transformers ; extra == 'embed-huggingface'
Requires-Dist: langchain-community ; extra == 'embed-huggingface'
Requires-Dist: huggingface ; extra == 'embed-huggingface'
Provides-Extra: embed-octoai
Requires-Dist: tiktoken ; extra == 'embed-octoai'
Requires-Dist: openai ; extra == 'embed-octoai'
Provides-Extra: embed-vertexai
Requires-Dist: langchain ; extra == 'embed-vertexai'
Requires-Dist: langchain-community ; extra == 'embed-vertexai'
Requires-Dist: langchain-google-vertexai ; extra == 'embed-vertexai'
Provides-Extra: embed-voyageai
Requires-Dist: langchain ; extra == 'embed-voyageai'
Requires-Dist: langchain-voyageai ; extra == 'embed-voyageai'
Provides-Extra: epub
Requires-Dist: unstructured[epub] ; extra == 'epub'
Provides-Extra: gcs
Requires-Dist: fsspec ; extra == 'gcs'
Requires-Dist: bs4 ; extra == 'gcs'
Requires-Dist: gcsfs ; extra == 'gcs'
Provides-Extra: github
Requires-Dist: pygithub >1.58.0 ; extra == 'github'
Provides-Extra: gitlab
Requires-Dist: python-gitlab ; extra == 'gitlab'
Provides-Extra: google-drive
Requires-Dist: google-api-python-client ; extra == 'google-drive'
Provides-Extra: hubspot
Requires-Dist: urllib3 ; extra == 'hubspot'
Requires-Dist: hubspot-api-client ; extra == 'hubspot'
Provides-Extra: jira
Requires-Dist: atlassian-python-api ; extra == 'jira'
Provides-Extra: kafka
Requires-Dist: confluent-kafka ; extra == 'kafka'
Provides-Extra: md
Requires-Dist: unstructured[md] ; extra == 'md'
Provides-Extra: milvus
Requires-Dist: pymilvus ; extra == 'milvus'
Provides-Extra: mongodb
Requires-Dist: pymongo ; extra == 'mongodb'
Provides-Extra: msg
Requires-Dist: unstructured[msg] ; extra == 'msg'
Provides-Extra: notion
Requires-Dist: notion-client ; extra == 'notion'
Requires-Dist: htmlBuilder ; extra == 'notion'
Provides-Extra: odt
Requires-Dist: unstructured[odt] ; extra == 'odt'
Provides-Extra: onedrive
Requires-Dist: bs4 ; extra == 'onedrive'
Requires-Dist: msal ; extra == 'onedrive'
Requires-Dist: Office365-REST-Python-Client ; extra == 'onedrive'
Provides-Extra: openai
Requires-Dist: tiktoken ; extra == 'openai'
Requires-Dist: langchain-community ; extra == 'openai'
Requires-Dist: openai ; extra == 'openai'
Provides-Extra: opensearch
Requires-Dist: opensearch-py ; extra == 'opensearch'
Provides-Extra: org
Requires-Dist: unstructured[org] ; extra == 'org'
Provides-Extra: outlook
Requires-Dist: msal ; extra == 'outlook'
Requires-Dist: Office365-REST-Python-Client ; extra == 'outlook'
Provides-Extra: pdf
Requires-Dist: unstructured[pdf] ; extra == 'pdf'
Provides-Extra: pinecone
Requires-Dist: pinecone-client >=3.7.1 ; extra == 'pinecone'
Provides-Extra: postgres
Requires-Dist: psycopg2-binary ; extra == 'postgres'
Provides-Extra: ppt
Requires-Dist: unstructured[pptx] ; extra == 'ppt'
Provides-Extra: pptx
Requires-Dist: unstructured[pptx] ; extra == 'pptx'
Provides-Extra: qdrant
Requires-Dist: qdrant-client ; extra == 'qdrant'
Provides-Extra: reddit
Requires-Dist: praw ; extra == 'reddit'
Provides-Extra: rst
Requires-Dist: unstructured[rst] ; extra == 'rst'
Provides-Extra: rtf
Requires-Dist: unstructured[rtf] ; extra == 'rtf'
Provides-Extra: s3
Requires-Dist: fsspec ; extra == 's3'
Requires-Dist: s3fs ; extra == 's3'
Provides-Extra: salesforce
Requires-Dist: simple-salesforce ; extra == 'salesforce'
Provides-Extra: sftp
Requires-Dist: fsspec ; extra == 'sftp'
Requires-Dist: paramiko ; extra == 'sftp'
Provides-Extra: sharepoint
Requires-Dist: msal ; extra == 'sharepoint'
Requires-Dist: Office365-REST-Python-Client ; extra == 'sharepoint'
Provides-Extra: singlestore
Requires-Dist: singlestoredb ; extra == 'singlestore'
Provides-Extra: slack
Requires-Dist: slack-sdk ; extra == 'slack'
Provides-Extra: tsv
Requires-Dist: unstructured[tsv] ; extra == 'tsv'
Provides-Extra: weaviate
Requires-Dist: weaviate-client ; extra == 'weaviate'
Provides-Extra: wikipedia
Requires-Dist: wikipedia ; extra == 'wikipedia'
Provides-Extra: xlsx
Requires-Dist: unstructured[xlsx] ; extra == 'xlsx'

# Unstructured Ingest  

For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
