include .env
export

.PHONY: build up down logs restart status bench

build:
	docker compose build

up: build
	docker compose up -d
	@sleep 2
	docker compose logs --tail 20

down:
	docker compose down

logs:
	docker compose logs -f

restart:
	docker compose restart
	@sleep 2
	docker compose logs --tail 20

status:
	@docker compose ps
	@echo "---"
	@curl -s http://localhost:8371/mcp 2>/dev/null | head -5 || echo "Server not responding"

bench:
	@echo "Benchmarking llama-server throughput..."
	@docker exec orpheus-llama-server curl -s "http://127.0.0.1:8081/v1/completions" \
	  -H "Content-Type: application/json" \
	  -d '{"prompt":"<|audio|>tara: Hello, how are you doing today?<|eot_id|>","max_tokens":500,"stream":false}' | \
	  python3 -c "import sys,json; d=json.load(sys.stdin); u=d['usage']; t=d.get('timings',{}); print(f\"{u['completion_tokens']} tokens, {t.get('predicted_per_second',0):.1f} tok/s\")"
