Coverage for llm_dataset_engine/utils/metrics_exporter.py: 0%
32 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-15 18:04 +0200
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-15 18:04 +0200
1"""
2Prometheus metrics export for monitoring.
4Provides instrumentation for external monitoring systems.
5"""
7from prometheus_client import Counter, Gauge, Histogram, start_http_server
9from llm_dataset_engine.utils import get_logger
11logger = get_logger(__name__)
14class PrometheusMetrics:
15 """
16 Prometheus metrics exporter.
18 Follows Single Responsibility: only handles metrics export.
19 """
21 def __init__(self, port: int = 9090):
22 """
23 Initialize Prometheus metrics.
25 Args:
26 port: Port for metrics HTTP server
27 """
28 self.port = port
29 self._server_started = False
31 # Define metrics
32 self.requests_total = Counter(
33 "llm_requests_total",
34 "Total LLM requests",
35 ["provider", "model", "stage"],
36 )
38 self.request_duration = Histogram(
39 "llm_request_duration_seconds",
40 "LLM request duration in seconds",
41 ["provider", "stage"],
42 )
44 self.cost_total = Gauge(
45 "llm_cost_total_usd",
46 "Total cost in USD",
47 ["provider"],
48 )
50 self.errors_total = Counter(
51 "llm_errors_total",
52 "Total errors",
53 ["stage", "error_type"],
54 )
56 self.rows_processed = Gauge(
57 "llm_rows_processed_total",
58 "Total rows processed",
59 ["stage"],
60 )
62 self.rows_per_second = Gauge(
63 "llm_rows_per_second",
64 "Processing throughput",
65 )
67 def start_server(self) -> None:
68 """Start HTTP server for metrics endpoint."""
69 if not self._server_started:
70 try:
71 start_http_server(self.port)
72 self._server_started = True
73 logger.info(
74 f"Prometheus metrics server started on port {self.port}"
75 )
76 except Exception as e:
77 logger.error(f"Failed to start metrics server: {e}")
79 def record_request(
80 self, provider: str, model: str, stage: str, duration: float
81 ) -> None:
82 """
83 Record LLM request metrics.
85 Args:
86 provider: Provider name
87 model: Model name
88 stage: Stage name
89 duration: Request duration in seconds
90 """
91 self.requests_total.labels(
92 provider=provider, model=model, stage=stage
93 ).inc()
95 self.request_duration.labels(provider=provider, stage=stage).observe(
96 duration
97 )
99 def record_cost(self, provider: str, cost: float) -> None:
100 """
101 Record cost metric.
103 Args:
104 provider: Provider name
105 cost: Cost in USD
106 """
107 self.cost_total.labels(provider=provider).set(cost)
109 def record_error(self, stage: str, error_type: str) -> None:
110 """
111 Record error metric.
113 Args:
114 stage: Stage name
115 error_type: Error type
116 """
117 self.errors_total.labels(stage=stage, error_type=error_type).inc()
119 def record_rows_processed(self, stage: str, count: int) -> None:
120 """
121 Record rows processed.
123 Args:
124 stage: Stage name
125 count: Number of rows
126 """
127 self.rows_processed.labels(stage=stage).set(count)
129 def record_throughput(self, rows_per_second: float) -> None:
130 """
131 Record processing throughput.
133 Args:
134 rows_per_second: Throughput metric
135 """
136 self.rows_per_second.set(rows_per_second)