LICENSE
README.md
pyproject.toml
src/olm/__init__.py
src/olm/core/__init__.py
src/olm/core/checkpoint.py
src/olm/core/config.py
src/olm/core/dist.py
src/olm/core/dtype.py
src/olm/core/registry.py
src/olm/core/utils.py
src/olm/data/__init__.py
src/olm/data/collate.py
src/olm/data/datasets/__init__.py
src/olm/data/datasets/base_dataset.py
src/olm/data/datasets/data_loader.py
src/olm/data/datasets/hf_dataset.py
src/olm/data/datasets/local_dataset.py
src/olm/data/tokenization/__init__.py
src/olm/data/tokenization/base.py
src/olm/data/tokenization/hf_tokenizer.py
src/olm/data/tokenization/hf_train_custom.py
src/olm/export/__init__.py
src/olm/export/hf_convert.py
src/olm/export/onnx_torchscript.py
src/olm/logging/__init__.py
src/olm/logging/logger.py
src/olm/logging/progress.py
src/olm/logging/wandb_logger.py
src/olm/models/alibaba/__init__.py
src/olm/models/alibaba/qwen2.py
src/olm/models/allenai/__init__.py
src/olm/models/allenai/olmo.py
src/olm/models/facebook/__init__.py
src/olm/models/facebook/opt.py
src/olm/models/google/__init__.py
src/olm/models/google/gemma2.py
src/olm/models/meta/__init__.py
src/olm/models/meta/llama2.py
src/olm/models/meta/llama3.py
src/olm/models/microsoft/__init__.py
src/olm/models/microsoft/phi3.py
src/olm/models/microsoft/phi4.py
src/olm/models/openai/__init__.py
src/olm/models/openai/gpt2.py
src/olm/nn/__init__.py
src/olm/nn/torch_nn_wrappers.py
src/olm/nn/activations/__init__.py
src/olm/nn/activations/base.py
src/olm/nn/activations/clu.py
src/olm/nn/activations/elu.py
src/olm/nn/activations/geglu.py
src/olm/nn/activations/gelu.py
src/olm/nn/activations/glu.py
src/olm/nn/activations/identity.py
src/olm/nn/activations/leaky_relu.py
src/olm/nn/activations/liglu.py
src/olm/nn/activations/mish.py
src/olm/nn/activations/prelu.py
src/olm/nn/activations/reglu.py
src/olm/nn/activations/relu.py
src/olm/nn/activations/selu.py
src/olm/nn/activations/sigmoid.py
src/olm/nn/activations/silu.py
src/olm/nn/activations/softmax.py
src/olm/nn/activations/softplus.py
src/olm/nn/activations/swiglu.py
src/olm/nn/activations/tanh.py
src/olm/nn/attention/__init__.py
src/olm/nn/attention/alibi.py
src/olm/nn/attention/base.py
src/olm/nn/attention/flash.py
src/olm/nn/attention/gqa.py
src/olm/nn/attention/linear_attn.py
src/olm/nn/attention/mha.py
src/olm/nn/blocks/LM.py
src/olm/nn/blocks/__init__.py
src/olm/nn/blocks/linear_projections.py
src/olm/nn/blocks/output_head.py
src/olm/nn/blocks/transformer_block.py
src/olm/nn/embeddings/__init__.py
src/olm/nn/embeddings/token_embed.py
src/olm/nn/embeddings/positional/__init__.py
src/olm/nn/embeddings/positional/absolute.py
src/olm/nn/embeddings/positional/alibi.py
src/olm/nn/embeddings/positional/base.py
src/olm/nn/embeddings/positional/rope.py
src/olm/nn/embeddings/positional/sinusoidal.py
src/olm/nn/feedforward/__init__.py
src/olm/nn/feedforward/base.py
src/olm/nn/feedforward/classic_ffn.py
src/olm/nn/feedforward/classic_moe.py
src/olm/nn/feedforward/geglu_ffn.py
src/olm/nn/feedforward/geglu_moe.py
src/olm/nn/feedforward/moe_base.py
src/olm/nn/feedforward/swiglu_ffn.py
src/olm/nn/feedforward/swiglu_moe.py
src/olm/nn/norms/__init__.py
src/olm/nn/norms/base.py
src/olm/nn/norms/layer_norm.py
src/olm/nn/norms/rms_norm.py
src/olm/nn/structure/__init__.py
src/olm/nn/structure/block.py
src/olm/nn/structure/combinators/__init__.py
src/olm/nn/structure/combinators/base.py
src/olm/nn/structure/combinators/parallel.py
src/olm/nn/structure/combinators/repeat.py
src/olm/nn/structure/combinators/residual.py
src/olm/plugins/__init__.py
src/olm/train/__init__.py
src/olm/train/callbacks/__init__.py
src/olm/train/callbacks/checkpoint_cb.py
src/olm/train/callbacks/early_stopping_cb.py
src/olm/train/callbacks/lr_monitor_cb.py
src/olm/train/callbacks/metrics_logger_cb.py
src/olm/train/callbacks/throughput_cb.py
src/olm/train/callbacks/validation_cb.py
src/olm/train/losses/base.py
src/olm/train/losses/cross_entropy.py
src/olm/train/losses/kllloss.py
src/olm/train/losses/mce.py
src/olm/train/losses/zloss.py
src/olm/train/optim/__init__.py
src/olm/train/optim/adamw.py
src/olm/train/optim/base.py
src/olm/train/optim/lion.py
src/olm/train/optim/zero.py
src/olm/train/regularization/__init__.py
src/olm/train/regularization/grad_clip.py
src/olm/train/regularization/weight_decay.py
src/olm/train/schedulers/__init__.py
src/olm/train/schedulers/base.py
src/olm/train/schedulers/cosine.py
src/olm/train/schedulers/linear.py
src/olm/train/schedulers/warmup.py
src/olm/train/trainer/__init__.py
src/olm/train/trainer/ddp_trainer.py
src/olm/train/trainer/fsdp_trainer.py
src/olm/train/trainer/trainer.py
src/openlanguagemodel.egg-info/PKG-INFO
src/openlanguagemodel.egg-info/SOURCES.txt
src/openlanguagemodel.egg-info/dependency_links.txt
src/openlanguagemodel.egg-info/requires.txt
src/openlanguagemodel.egg-info/top_level.txt
tests/test_moe.py
tests/test_save_load.py