dask[complete]>=2021.7.1
distributed>=2021.7.1
dask-mpi>=2021.11.0
charset_normalizer>=3.1.0
awscli>=1.22.55
fasttext==0.9.2
pycld2
justext==3.0.1
resiliparse
ftfy==6.1.1
warcio==1.7.4
zstandard==0.18.0
in-place==0.5.0
unidic-lite==1.0.8
jieba==0.42.1
comment_parser
beautifulsoup4
mwparserfromhell==0.6.5
spacy<4.0.0,>=3.6.0
presidio-analyzer==2.2.351
presidio-anonymizer==2.2.351
usaddress==0.5.10
nemo_toolkit[nlp]>=1.23.0
Cython
crossfit==0.0.4
numpy<2
openai

[cuda12x]
cudf-cu12==24.6.*
dask-cudf-cu12==24.6.*
cuml-cu12==24.6.*
cugraph-cu12==24.6.*
dask-cuda==24.6.*
spacy[cuda12x]<4.0.0,>=3.6.0
