.dockerignore
.gitignore
.pre-commit-config.yaml
LICENSE
Makefile
README.md
backoff_retry.sh
pylintrc
pyproject.toml
xpk-large-scale-guide.sh
xpk-notebooks.md
xpk.py
.github/CODEOWNERS
.github/PULL_REQUEST_TEMPLATE.md
.github/release.yaml
.github/actions/install-crane/action.yml
.github/actions/install-kueue/action.yml
.github/actions/setup-test-env/action.yml
.github/workflows/README.md
.github/workflows/build_tests.yaml
.github/workflows/build_wheels.yaml
.github/workflows/cleanup.yaml
.github/workflows/gemini-dispatch.yml
.github/workflows/gemini-invoke.yml
.github/workflows/gemini-review.yml
.github/workflows/gemini-scheduled-triage.yml
.github/workflows/gemini-triage.yml
.github/workflows/integration_basic_cluster_create.yaml
.github/workflows/integration_gpu_cluster_create.yaml
.github/workflows/integration_pathways_cluster_create.yaml
.github/workflows/integration_ray_cluster_create.yaml
.github/workflows/integration_storage_tests.yaml
.github/workflows/label-validation.yaml
.github/workflows/nightly_tests.yaml
.github/workflows/periodic_release.yaml
.github/workflows/release_branch_versioning.yaml
.github/workflows/reusable_build_scripts.yaml
.github/workflows/reusable_build_wheel.yaml
.github/workflows/reusable_goldens.yaml
.github/workflows/reusable_lint_and_format.yml
.github/workflows/reusable_storage_create.yaml
.github/workflows/reusable_storage_delete.yaml
.github/workflows/reusable_unit_tests.yaml
.github/workflows/stale.yaml
data/Dockerfile
docs/code-of-conduct.md
docs/contributing.md
docs/installation.md
docs/permissions.md
docs/testing.md
docs/troubleshooting.md
docs/usage/advanced.md
docs/usage/autoprovisioning.md
docs/usage/clusters.md
docs/usage/cpu.md
docs/usage/docker.md
docs/usage/gpu.md
docs/usage/inspector.md
docs/usage/storage.md
docs/usage/workloads.md
docs/usage/tpu7x/recipes/flex_filestore_recipe.md
docs/usage/tpu7x/recipes/flex_lustre_recipe.md
docs/usage/tpu7x/recipes/reservation_gcs_bucket_recipe.md
examples/fake_training.py
examples/llama-3.1-finetuning/check_cuda.sh
examples/llama-3.1-finetuning/requirements.txt
examples/llama-3.1-finetuning/train.py
examples/llama-3.1-finetuning/train.slurm
examples/llama-3.1-finetuning/training_data.jsonl
examples/nccl/nccl-a3mega.sh
examples/nccl/nccl-a3ultra.sh
examples/nccl/nccl.md
examples/storage/filestore-manifest-attach.yaml
examples/storage/gcsfuse-manifest.yaml
examples/storage/lustre-manifest-attach.yaml
examples/storage/parallelstore-manifest-attach.yaml
examples/storage/pd-manifest-attach.yaml
recipes/Basic_cluster_adapt.md
recipes/Basic_cluster_create.md
recipes/Cluster_create_RayCluster.md
recipes/Cluster_create_for_multi-host_nodepool.md
recipes/Cluster_create_for_single-host_nodepool.md
recipes/Cluster_create_private.md
recipes/Cluster_create_sub-slicing.md
recipes/Cluster_create_super-slicing.md
recipes/Cluster_create_with_CPU_and_memory_limits_above_capacity.md
recipes/Cluster_create_with_CPU_and_memory_limits_below_capacity.md
recipes/Cluster_create_with_Managed_Lustre_driver.md
recipes/Cluster_create_with_Managed_Lustre_driver_and_legacy_port.md
recipes/Cluster_create_with_gb200-4.md
recipes/Cluster_create_with_shared_reservation.md
recipes/Cluster_delete.md
recipes/Cluster_delete_force.md
recipes/NAP_cluster-create.md
recipes/NAP_cluster-create_with_pathways.md
recipes/Storage_list.md
recipes/Workload_create.md
recipes/Workload_create_Crane.md
recipes/Workload_create_pathways.md
recipes/Workload_create_sub-slicing.md
recipes/Workload_create_super-slicing.md
recipes/Workload_create_with_output-manifest-file.md
recipes/Workload_delete.md
recipes/Workload_list.md
recipes/comprehensive-demo.md
src/xpk/__init__.py
src/xpk/main.py
src/xpk/telemetry_uploader.py
src/xpk.egg-info/PKG-INFO
src/xpk.egg-info/SOURCES.txt
src/xpk.egg-info/dependency_links.txt
src/xpk.egg-info/entry_points.txt
src/xpk.egg-info/requires.txt
src/xpk.egg-info/top_level.txt
src/xpk/api/__init__.py
src/xpk/api/storage_crd.yaml
src/xpk/blueprints/a3mega/config-map.yaml.tftpl
src/xpk/blueprints/a3mega/storage_crd.yaml
src/xpk/blueprints/a3ultra/config-map.yaml.tftpl
src/xpk/blueprints/a3ultra/mlgru-disable.yaml
src/xpk/blueprints/a3ultra/nccl-installer.yaml
src/xpk/blueprints/a3ultra/storage_crd.yaml
src/xpk/blueprints/a4/config-map.yaml.tftpl
src/xpk/blueprints/a4/nccl-rdma-installer-a4.yaml
src/xpk/blueprints/a4/storage_crd.yaml
src/xpk/commands/__init__.py
src/xpk/commands/cluster.py
src/xpk/commands/cluster_gcluster.py
src/xpk/commands/cluster_gcluster_test.py
src/xpk/commands/cluster_test.py
src/xpk/commands/common.py
src/xpk/commands/common_test.py
src/xpk/commands/config.py
src/xpk/commands/info.py
src/xpk/commands/inspector.py
src/xpk/commands/inspector_test.py
src/xpk/commands/managed_ml_diagnostics.py
src/xpk/commands/managed_ml_diagnostics_test.py
src/xpk/commands/storage.py
src/xpk/commands/version.py
src/xpk/commands/workload.py
src/xpk/commands/workload_test.py
src/xpk/core/__init__.py
src/xpk/core/capacity.py
src/xpk/core/capacity_test.py
src/xpk/core/cluster.py
src/xpk/core/cluster_private.py
src/xpk/core/cluster_test.py
src/xpk/core/commands.py
src/xpk/core/config.py
src/xpk/core/config_test.py
src/xpk/core/docker_container.py
src/xpk/core/docker_image.py
src/xpk/core/docker_image_test.py
src/xpk/core/docker_manager.py
src/xpk/core/docker_resources.py
src/xpk/core/filestore.py
src/xpk/core/gcloud_context.py
src/xpk/core/gcloud_context_test.py
src/xpk/core/gcluster_manager.py
src/xpk/core/gcsfuse.py
src/xpk/core/jobset.py
src/xpk/core/kubectl_common.py
src/xpk/core/kubectl_common_test.py
src/xpk/core/kueue_manager.py
src/xpk/core/kueue_manager_test.py
src/xpk/core/monitoring.py
src/xpk/core/mtc.py
src/xpk/core/nap.py
src/xpk/core/network.py
src/xpk/core/nodepool.py
src/xpk/core/nodepool_test.py
src/xpk/core/pathways.py
src/xpk/core/pathways_test.py
src/xpk/core/ray.py
src/xpk/core/reservation.py
src/xpk/core/reservation_test.py
src/xpk/core/resources.py
src/xpk/core/scheduling.py
src/xpk/core/scheduling_test.py
src/xpk/core/storage.py
src/xpk/core/system_characteristics.py
src/xpk/core/system_characteristics_test.py
src/xpk/core/telemetry.py
src/xpk/core/telemetry_test.py
src/xpk/core/updates.py
src/xpk/core/updates_test.py
src/xpk/core/vertex.py
src/xpk/core/workload.py
src/xpk/core/workload_test.py
src/xpk/core/blueprint/__init__.py
src/xpk/core/blueprint/blueprint_definitions.py
src/xpk/core/blueprint/blueprint_generator.py
src/xpk/core/blueprint/blueprint_test.py
src/xpk/core/blueprint/testing/__init__.py
src/xpk/core/blueprint/testing/data/a3_mega.yaml
src/xpk/core/blueprint/testing/data/a3_mega_spot.yaml
src/xpk/core/blueprint/testing/data/a3_ultra.yaml
src/xpk/core/blueprint/testing/data/a4.yaml
src/xpk/core/remote_state/__init__.py
src/xpk/core/remote_state/fuse_remote_state.py
src/xpk/core/remote_state/remote_state_client.py
src/xpk/core/testing/__init__.py
src/xpk/core/testing/commands_tester.py
src/xpk/core/testing/commands_tester_test.py
src/xpk/core/testing/mock_reservation.py
src/xpk/core/workload_decorators/__init__.py
src/xpk/core/workload_decorators/rdma_decorator.py
src/xpk/core/workload_decorators/storage_decorator.py
src/xpk/core/workload_decorators/tcpx_decorator.py
src/xpk/core/workload_decorators/tcpx_decorator_test.py
src/xpk/core/workload_decorators/tcpxo_decorator.py
src/xpk/parser/__init__.py
src/xpk/parser/cluster.py
src/xpk/parser/cluster_test.py
src/xpk/parser/common.py
src/xpk/parser/common_test.py
src/xpk/parser/config.py
src/xpk/parser/core.py
src/xpk/parser/info.py
src/xpk/parser/inspector.py
src/xpk/parser/storage.py
src/xpk/parser/storage_test.py
src/xpk/parser/validators.py
src/xpk/parser/version.py
src/xpk/parser/workload.py
src/xpk/parser/workload_test.py
src/xpk/templates/__init__.py
src/xpk/templates/arm_gpu_workload_crate.yaml.j2
src/xpk/templates/cluster_preheat.yaml.j2
src/xpk/templates/filestore-pv.yaml
src/xpk/templates/filestore-pvc.yaml
src/xpk/templates/filestore-sc.yaml
src/xpk/templates/fuse-pv.yaml
src/xpk/templates/fuse-pvc.yaml
src/xpk/templates/kueue_config.yaml.j2
src/xpk/templates/kueue_gke_default_topology.yaml.j2
src/xpk/templates/kueue_sub_slicing_topology.yaml.j2
src/xpk/templates/kueue_super_slicing_topology.yaml.j2
src/xpk/templates/mtc-cpc.yaml
src/xpk/templates/storage.yaml
src/xpk/utils/__init__.py
src/xpk/utils/console.py
src/xpk/utils/console_test.py
src/xpk/utils/environment.py
src/xpk/utils/execution_context.py
src/xpk/utils/feature_flags.py
src/xpk/utils/file.py
src/xpk/utils/gcs_utils.py
src/xpk/utils/kubectl.py
src/xpk/utils/kueue.py
src/xpk/utils/network.py
src/xpk/utils/objects.py
src/xpk/utils/templates.py
src/xpk/utils/topology.py
src/xpk/utils/topology_test.py
src/xpk/utils/user_agent.py
src/xpk/utils/user_agent_test.py
src/xpk/utils/validation.py
src/xpk/utils/validation_test.py
src/xpk/utils/versions.py
src/xpk/utils/yaml.py
tools/install-gke-auth-plugin.sh
tools/install-xpk.sh
tools/recipes.py