Coverage for src/dataknobs_llm/prompts/versioning/types.py: 97%
137 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-08 13:51 -0700
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-08 13:51 -0700
1"""Core type definitions for prompt versioning and A/B testing.
3This module defines:
4- Version data structures
5- Experiment configurations
6- Metrics tracking types
7- Custom exceptions
8"""
10from dataclasses import dataclass, field
11from datetime import datetime
12from typing import Any, Dict, List
13from enum import Enum
15class VersioningError(Exception):
16 """Base exception for versioning-related errors."""
17 pass
20class VersionStatus(Enum):
21 """Status of a prompt version.
23 Attributes:
24 DRAFT: Version is in development
25 ACTIVE: Version is active and can be used
26 PRODUCTION: Version is deployed in production
27 DEPRECATED: Version is deprecated but still available
28 ARCHIVED: Version is archived and should not be used
29 """
30 DRAFT = "draft"
31 ACTIVE = "active"
32 PRODUCTION = "production"
33 DEPRECATED = "deprecated"
34 ARCHIVED = "archived"
37@dataclass
38class PromptVersion:
39 """Represents a versioned prompt.
41 Attributes:
42 version_id: Unique identifier for this version (auto-generated)
43 name: Name of the prompt
44 prompt_type: Type of prompt ("system", "user", "message")
45 version: Semantic version string (e.g., "1.2.3")
46 template: The prompt template content
47 defaults: Default parameter values
48 validation: Validation configuration
49 metadata: Additional metadata (author, description, etc.)
50 created_at: Timestamp when version was created
51 created_by: Username/ID of creator
52 parent_version: Previous version ID (for history tracking)
53 tags: List of tags (e.g., ["production", "experiment-A"])
54 status: Current status of this version
55 """
56 version_id: str
57 name: str
58 prompt_type: str
59 version: str
60 template: str
61 defaults: Dict[str, Any] = field(default_factory=dict)
62 validation: Dict[str, Any] | None = None
63 metadata: Dict[str, Any] = field(default_factory=dict)
64 created_at: datetime = field(default_factory=datetime.utcnow)
65 created_by: str | None = None
66 parent_version: str | None = None
67 tags: List[str] = field(default_factory=list)
68 status: VersionStatus = VersionStatus.ACTIVE
70 def to_dict(self) -> Dict[str, Any]:
71 """Convert to dictionary for storage."""
72 return {
73 "version_id": self.version_id,
74 "name": self.name,
75 "prompt_type": self.prompt_type,
76 "version": self.version,
77 "template": self.template,
78 "defaults": self.defaults,
79 "validation": self.validation,
80 "metadata": self.metadata,
81 "created_at": self.created_at.isoformat(),
82 "created_by": self.created_by,
83 "parent_version": self.parent_version,
84 "tags": self.tags,
85 "status": self.status.value,
86 }
88 @classmethod
89 def from_dict(cls, data: Dict[str, Any]) -> "PromptVersion":
90 """Create from dictionary."""
91 data = data.copy()
92 # Parse datetime
93 if isinstance(data.get("created_at"), str):
94 data["created_at"] = datetime.fromisoformat(data["created_at"])
95 # Parse status enum
96 if isinstance(data.get("status"), str):
97 data["status"] = VersionStatus(data["status"])
98 return cls(**data)
101@dataclass
102class PromptVariant:
103 """A variant in an A/B test experiment.
105 Attributes:
106 version: Version string of this variant
107 weight: Traffic allocation weight (relative weight, must be > 0.0)
108 Weights are normalized to sum to 1.0 when creating experiment
109 description: Human-readable description
110 metadata: Additional variant metadata
111 """
112 version: str
113 weight: float
114 description: str = ""
115 metadata: Dict[str, Any] = field(default_factory=dict)
117 def __post_init__(self):
118 """Validate weight is positive."""
119 if self.weight <= 0.0:
120 raise ValueError(f"Variant weight must be positive, got {self.weight}")
122 def to_dict(self) -> Dict[str, Any]:
123 """Convert to dictionary for storage."""
124 return {
125 "version": self.version,
126 "weight": self.weight,
127 "description": self.description,
128 "metadata": self.metadata,
129 }
131 @classmethod
132 def from_dict(cls, data: Dict[str, Any]) -> "PromptVariant":
133 """Create from dictionary."""
134 return cls(**data)
137@dataclass
138class PromptExperiment:
139 """Configuration for an A/B test experiment.
141 Attributes:
142 experiment_id: Unique identifier for this experiment
143 name: Name of the prompt being tested
144 prompt_type: Type of prompt ("system", "user", "message")
145 variants: List of variants in this experiment
146 traffic_split: Mapping of version to traffic percentage
147 start_date: When experiment started
148 end_date: When experiment ended (None if still running)
149 status: Current status ("running", "paused", "completed")
150 metrics: Aggregated metrics for the experiment
151 metadata: Additional experiment metadata
152 """
153 experiment_id: str
154 name: str
155 prompt_type: str
156 variants: List[PromptVariant]
157 traffic_split: Dict[str, float]
158 start_date: datetime = field(default_factory=datetime.utcnow)
159 end_date: datetime | None = None
160 status: str = "running"
161 metrics: Dict[str, Any] = field(default_factory=dict)
162 metadata: Dict[str, Any] = field(default_factory=dict)
164 def __post_init__(self):
165 """Validate traffic split sums to 1.0."""
166 total = sum(self.traffic_split.values())
167 if not (0.99 <= total <= 1.01): # Allow small floating point error
168 raise ValueError(
169 f"Traffic split must sum to 1.0, got {total}. "
170 f"Split: {self.traffic_split}"
171 )
173 def to_dict(self) -> Dict[str, Any]:
174 """Convert to dictionary for storage."""
175 return {
176 "experiment_id": self.experiment_id,
177 "name": self.name,
178 "prompt_type": self.prompt_type,
179 "variants": [v.to_dict() for v in self.variants],
180 "traffic_split": self.traffic_split,
181 "start_date": self.start_date.isoformat(),
182 "end_date": self.end_date.isoformat() if self.end_date else None,
183 "status": self.status,
184 "metrics": self.metrics,
185 "metadata": self.metadata,
186 }
188 @classmethod
189 def from_dict(cls, data: Dict[str, Any]) -> "PromptExperiment":
190 """Create from dictionary."""
191 data = data.copy()
192 # Parse datetimes
193 if isinstance(data.get("start_date"), str):
194 data["start_date"] = datetime.fromisoformat(data["start_date"])
195 if isinstance(data.get("end_date"), str):
196 data["end_date"] = datetime.fromisoformat(data["end_date"])
197 # Parse variants
198 if data.get("variants"):
199 data["variants"] = [
200 PromptVariant.from_dict(v) if isinstance(v, dict) else v
201 for v in data["variants"]
202 ]
203 return cls(**data)
206@dataclass
207class PromptMetrics:
208 """Performance metrics for a prompt version.
210 Attributes:
211 version_id: Version ID these metrics belong to
212 total_uses: Total number of times this version was used
213 success_count: Number of successful uses
214 error_count: Number of errors/failures
215 total_response_time: Total response time across all uses (seconds)
216 total_tokens: Total tokens used across all uses
217 user_ratings: List of user ratings (1-5 scale)
218 last_used: Timestamp of last use
219 metadata: Additional custom metrics
220 """
221 version_id: str
222 total_uses: int = 0
223 success_count: int = 0
224 error_count: int = 0
225 total_response_time: float = 0.0
226 total_tokens: int = 0
227 user_ratings: List[float] = field(default_factory=list)
228 last_used: datetime | None = None
229 metadata: Dict[str, Any] = field(default_factory=dict)
231 @property
232 def success_rate(self) -> float:
233 """Calculate success rate."""
234 if self.total_uses == 0:
235 return 0.0
236 return self.success_count / self.total_uses
238 @property
239 def avg_response_time(self) -> float:
240 """Calculate average response time."""
241 if self.total_uses == 0:
242 return 0.0
243 return self.total_response_time / self.total_uses
245 @property
246 def avg_tokens(self) -> float:
247 """Calculate average tokens per use."""
248 if self.total_uses == 0:
249 return 0.0
250 return self.total_tokens / self.total_uses
252 @property
253 def avg_rating(self) -> float:
254 """Calculate average user rating."""
255 if not self.user_ratings:
256 return 0.0
257 return sum(self.user_ratings) / len(self.user_ratings)
259 def to_dict(self) -> Dict[str, Any]:
260 """Convert to dictionary for storage."""
261 return {
262 "version_id": self.version_id,
263 "total_uses": self.total_uses,
264 "success_count": self.success_count,
265 "error_count": self.error_count,
266 "total_response_time": self.total_response_time,
267 "total_tokens": self.total_tokens,
268 "user_ratings": self.user_ratings,
269 "last_used": self.last_used.isoformat() if self.last_used else None,
270 "metadata": self.metadata,
271 # Include computed properties
272 "success_rate": self.success_rate,
273 "avg_response_time": self.avg_response_time,
274 "avg_tokens": self.avg_tokens,
275 "avg_rating": self.avg_rating,
276 }
278 @classmethod
279 def from_dict(cls, data: Dict[str, Any]) -> "PromptMetrics":
280 """Create from dictionary."""
281 data = data.copy()
282 # Parse datetime
283 if isinstance(data.get("last_used"), str):
284 data["last_used"] = datetime.fromisoformat(data["last_used"])
285 # Remove computed properties (they're recalculated)
286 for key in ["success_rate", "avg_response_time", "avg_tokens", "avg_rating"]:
287 data.pop(key, None)
288 return cls(**data)
291@dataclass
292class MetricEvent:
293 """Single event for metrics tracking.
295 Attributes:
296 version_id: Version ID this event belongs to
297 timestamp: When the event occurred
298 success: Whether the use was successful
299 response_time: Response time in seconds (None if not applicable)
300 tokens: Number of tokens used (None if not applicable)
301 user_rating: User rating 1-5 (None if not provided)
302 metadata: Additional event metadata
303 """
304 version_id: str
305 timestamp: datetime = field(default_factory=datetime.utcnow)
306 success: bool = True
307 response_time: float | None = None
308 tokens: int | None = None
309 user_rating: float | None = None
310 metadata: Dict[str, Any] = field(default_factory=dict)
312 def to_dict(self) -> Dict[str, Any]:
313 """Convert to dictionary for storage."""
314 return {
315 "version_id": self.version_id,
316 "timestamp": self.timestamp.isoformat(),
317 "success": self.success,
318 "response_time": self.response_time,
319 "tokens": self.tokens,
320 "user_rating": self.user_rating,
321 "metadata": self.metadata,
322 }
324 @classmethod
325 def from_dict(cls, data: Dict[str, Any]) -> "MetricEvent":
326 """Create from dictionary."""
327 data = data.copy()
328 if isinstance(data.get("timestamp"), str):
329 data["timestamp"] = datetime.fromisoformat(data["timestamp"])
330 return cls(**data)