awscli>=1.22.55
beautifulsoup4
charset_normalizer>=3.1.0
comment_parser
crossfit>=0.0.7
dask-mpi>=2021.11.0
dask[complete]>=2021.7.1
datasets
distributed>=2021.7.1
fasttext==0.9.2
ftfy==6.1.1
in-place==0.5.0
jieba==0.42.1
justext==3.0.1
lxml_html_clean
mecab-python3
mwparserfromhell==0.6.5
numpy<2
openai
peft
platformdirs
presidio-analyzer==2.2.351
presidio-anonymizer==2.2.351
pycld2
resiliparse
sentencepiece
spacy<3.8.0,>=3.6.0
unidic-lite==1.0.8
usaddress==0.5.10
warcio==1.7.4
zstandard==0.18.0

[all]
nemo_curator[image]
nemo_curator[bitext]

[all_nightly]
nemo_curator[image_nightly]

[bitext]
huggingface-hub
tqdm
transformers
nemo_curator[cuda12x]

[cuda12x]
cudf-cu12>=24.10
cugraph-cu12>=24.10
cuml-cu12>=24.10
dask-cuda>=24.10
dask-cudf-cu12>=24.10
spacy[cuda12x]<3.8.0,>=3.6.0

[cuda12x_nightly]
cudf-cu12<=24.12,>=24.12.0a0
cugraph-cu12<=24.12,>=24.12.0a0
cuml-cu12<=24.12,>=24.12.0a0
dask-cuda<=24.12,>=24.12.0a0
dask-cudf-cu12<=24.12,>=24.12.0a0
spacy[cuda12x]<3.8.0,>=3.6.0

[image]
nvidia-dali-cuda120
nvidia-nvjpeg2k-cu12
timm>=1.0.8
nemo_curator[cuda12x]

[image_nightly]
nvidia-dali-cuda120
nvidia-nvjpeg2k-cu12
timm>=1.0.8
nemo_curator[cuda12x_nightly]
