einops
gdown
huggingface-hub>=0.26.2
joblib
librosa==0.10.2.post1
numpy<2.0,>=1.24.3
opencv-python==4.10.0.84
packaging
pydub
python_speech_features==0.6
rotary_embedding_torch==0.8.3
scenedetect==0.6.6
scikit_learn
scipy>=1.10.1
soundfile==0.12.1
torch>=2.0.1
torchaudio>=2.0.2
torchinfo
torchvision
tqdm
yamlargparse
