LICENSE
README.md
pyproject.toml
src/FJFormer.egg-info/PKG-INFO
src/FJFormer.egg-info/SOURCES.txt
src/FJFormer.egg-info/dependency_links.txt
src/FJFormer.egg-info/requires.txt
src/FJFormer.egg-info/top_level.txt
src/fjformer/__init__.py
src/fjformer/utils.py
src/fjformer/bits/__init__.py
src/fjformer/bits/bits.py
src/fjformer/bits/calibration.py
src/fjformer/bits/config.py
src/fjformer/bits/int_numerics.py
src/fjformer/bits/no_numerics.py
src/fjformer/bits/numerics.py
src/fjformer/bits/q_dot_general.py
src/fjformer/bits/q_flax.py
src/fjformer/bits/qk.py
src/fjformer/bits/stochastic_rounding.py
src/fjformer/checkpoint/__init__.py
src/fjformer/checkpoint/_load.py
src/fjformer/checkpoint/streamer.py
src/fjformer/func/__init__.py
src/fjformer/func/_func.py
src/fjformer/func/loss_func.py
src/fjformer/linen/__init__.py
src/fjformer/linen/linear.py
src/fjformer/monitor/__init__.py
src/fjformer/monitor/tracker.py
src/fjformer/optimizers/__init__.py
src/fjformer/optimizers/adafactor.py
src/fjformer/optimizers/adamw.py
src/fjformer/optimizers/lion.py
src/fjformer/optimizers/optimizer_utils.py
src/fjformer/optimizers/rmsprop.py
src/fjformer/pallas_operations/__init__.py
src/fjformer/pallas_operations/efficient_attention/__init__.py
src/fjformer/pallas_operations/efficient_attention/efficient_attention.py
src/fjformer/pallas_operations/flash_attention/__init__.py
src/fjformer/pallas_operations/flash_attention/gpu/__init__.py
src/fjformer/pallas_operations/flash_attention/gpu/jax_flash_attn_gpu.py
src/fjformer/pallas_operations/flash_attention/tpu/__init__.py
src/fjformer/pallas_operations/flash_attention/tpu/jax_flash_attn_tpu.py
src/fjformer/pallas_operations/layer_norm/__init__.py
src/fjformer/pallas_operations/layer_norm/gpu/__init__.py
src/fjformer/pallas_operations/layer_norm/gpu/layer_norm.py
src/fjformer/pallas_operations/ring_attention/__init__.py
src/fjformer/pallas_operations/ring_attention/ring_attention.py
src/fjformer/pallas_operations/rms_norm/__init__.py
src/fjformer/pallas_operations/rms_norm/gpu/__init__.py
src/fjformer/pallas_operations/rms_norm/gpu/rms_norm.py
src/fjformer/pallas_operations/softmax/__init__.py
src/fjformer/pallas_operations/softmax/gpu/__init__.py
src/fjformer/pallas_operations/softmax/gpu/softmax.py
src/fjformer/pallas_operations/splash_attention/__init__.py
src/fjformer/pallas_operations/splash_attention/tpu/__init__.py
src/fjformer/pallas_operations/splash_attention/tpu/splash_attention_kernel.py
src/fjformer/pallas_operations/splash_attention/tpu/splash_attention_mask.py
src/fjformer/pallas_operations/splash_attention/tpu/splash_attention_mask_info.py
src/fjformer/partition_utils/__init__.py
src/fjformer/partition_utils/mesh_utils.py
src/fjformer/partition_utils/t5x_partitioning.py
src/fjformer/xrapture/__init__.py
src/fjformer/xrapture/implicit_array.py
src/fjformer/xrapture/tracer.py
src/fjformer/xrapture/xrapture.py
test/test_cross_ent_loss_and_acc.py