Coverage for src/dataknobs_llm/prompts/versioning/types.py: 97%
137 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-31 16:07 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-31 16:07 -0600
1"""Core type definitions for prompt versioning and A/B testing.
3This module defines:
4- Version data structures
5- Experiment configurations
6- Metrics tracking types
7- Custom exceptions
8"""
10from dataclasses import dataclass, field
11from datetime import datetime
12from typing import Any, Dict, List, Optional
13from enum import Enum
16class VersioningError(Exception):
17 """Base exception for versioning-related errors."""
18 pass
21class VersionStatus(Enum):
22 """Status of a prompt version.
24 Attributes:
25 DRAFT: Version is in development
26 ACTIVE: Version is active and can be used
27 PRODUCTION: Version is deployed in production
28 DEPRECATED: Version is deprecated but still available
29 ARCHIVED: Version is archived and should not be used
30 """
31 DRAFT = "draft"
32 ACTIVE = "active"
33 PRODUCTION = "production"
34 DEPRECATED = "deprecated"
35 ARCHIVED = "archived"
38@dataclass
39class PromptVersion:
40 """Represents a versioned prompt.
42 Attributes:
43 version_id: Unique identifier for this version (auto-generated)
44 name: Name of the prompt
45 prompt_type: Type of prompt ("system", "user", "message")
46 version: Semantic version string (e.g., "1.2.3")
47 template: The prompt template content
48 defaults: Default parameter values
49 validation: Validation configuration
50 metadata: Additional metadata (author, description, etc.)
51 created_at: Timestamp when version was created
52 created_by: Username/ID of creator
53 parent_version: Previous version ID (for history tracking)
54 tags: List of tags (e.g., ["production", "experiment-A"])
55 status: Current status of this version
56 """
57 version_id: str
58 name: str
59 prompt_type: str
60 version: str
61 template: str
62 defaults: Dict[str, Any] = field(default_factory=dict)
63 validation: Optional[Dict[str, Any]] = None
64 metadata: Dict[str, Any] = field(default_factory=dict)
65 created_at: datetime = field(default_factory=datetime.utcnow)
66 created_by: Optional[str] = None
67 parent_version: Optional[str] = None
68 tags: List[str] = field(default_factory=list)
69 status: VersionStatus = VersionStatus.ACTIVE
71 def to_dict(self) -> Dict[str, Any]:
72 """Convert to dictionary for storage."""
73 return {
74 "version_id": self.version_id,
75 "name": self.name,
76 "prompt_type": self.prompt_type,
77 "version": self.version,
78 "template": self.template,
79 "defaults": self.defaults,
80 "validation": self.validation,
81 "metadata": self.metadata,
82 "created_at": self.created_at.isoformat(),
83 "created_by": self.created_by,
84 "parent_version": self.parent_version,
85 "tags": self.tags,
86 "status": self.status.value,
87 }
89 @classmethod
90 def from_dict(cls, data: Dict[str, Any]) -> "PromptVersion":
91 """Create from dictionary."""
92 data = data.copy()
93 # Parse datetime
94 if isinstance(data.get("created_at"), str):
95 data["created_at"] = datetime.fromisoformat(data["created_at"])
96 # Parse status enum
97 if isinstance(data.get("status"), str):
98 data["status"] = VersionStatus(data["status"])
99 return cls(**data)
102@dataclass
103class PromptVariant:
104 """A variant in an A/B test experiment.
106 Attributes:
107 version: Version string of this variant
108 weight: Traffic allocation weight (relative weight, must be > 0.0)
109 Weights are normalized to sum to 1.0 when creating experiment
110 description: Human-readable description
111 metadata: Additional variant metadata
112 """
113 version: str
114 weight: float
115 description: str = ""
116 metadata: Dict[str, Any] = field(default_factory=dict)
118 def __post_init__(self):
119 """Validate weight is positive."""
120 if self.weight <= 0.0:
121 raise ValueError(f"Variant weight must be positive, got {self.weight}")
123 def to_dict(self) -> Dict[str, Any]:
124 """Convert to dictionary for storage."""
125 return {
126 "version": self.version,
127 "weight": self.weight,
128 "description": self.description,
129 "metadata": self.metadata,
130 }
132 @classmethod
133 def from_dict(cls, data: Dict[str, Any]) -> "PromptVariant":
134 """Create from dictionary."""
135 return cls(**data)
138@dataclass
139class PromptExperiment:
140 """Configuration for an A/B test experiment.
142 Attributes:
143 experiment_id: Unique identifier for this experiment
144 name: Name of the prompt being tested
145 prompt_type: Type of prompt ("system", "user", "message")
146 variants: List of variants in this experiment
147 traffic_split: Mapping of version to traffic percentage
148 start_date: When experiment started
149 end_date: When experiment ended (None if still running)
150 status: Current status ("running", "paused", "completed")
151 metrics: Aggregated metrics for the experiment
152 metadata: Additional experiment metadata
153 """
154 experiment_id: str
155 name: str
156 prompt_type: str
157 variants: List[PromptVariant]
158 traffic_split: Dict[str, float]
159 start_date: datetime = field(default_factory=datetime.utcnow)
160 end_date: Optional[datetime] = None
161 status: str = "running"
162 metrics: Dict[str, Any] = field(default_factory=dict)
163 metadata: Dict[str, Any] = field(default_factory=dict)
165 def __post_init__(self):
166 """Validate traffic split sums to 1.0."""
167 total = sum(self.traffic_split.values())
168 if not (0.99 <= total <= 1.01): # Allow small floating point error
169 raise ValueError(
170 f"Traffic split must sum to 1.0, got {total}. "
171 f"Split: {self.traffic_split}"
172 )
174 def to_dict(self) -> Dict[str, Any]:
175 """Convert to dictionary for storage."""
176 return {
177 "experiment_id": self.experiment_id,
178 "name": self.name,
179 "prompt_type": self.prompt_type,
180 "variants": [v.to_dict() for v in self.variants],
181 "traffic_split": self.traffic_split,
182 "start_date": self.start_date.isoformat(),
183 "end_date": self.end_date.isoformat() if self.end_date else None,
184 "status": self.status,
185 "metrics": self.metrics,
186 "metadata": self.metadata,
187 }
189 @classmethod
190 def from_dict(cls, data: Dict[str, Any]) -> "PromptExperiment":
191 """Create from dictionary."""
192 data = data.copy()
193 # Parse datetimes
194 if isinstance(data.get("start_date"), str):
195 data["start_date"] = datetime.fromisoformat(data["start_date"])
196 if isinstance(data.get("end_date"), str):
197 data["end_date"] = datetime.fromisoformat(data["end_date"])
198 # Parse variants
199 if data.get("variants"):
200 data["variants"] = [
201 PromptVariant.from_dict(v) if isinstance(v, dict) else v
202 for v in data["variants"]
203 ]
204 return cls(**data)
207@dataclass
208class PromptMetrics:
209 """Performance metrics for a prompt version.
211 Attributes:
212 version_id: Version ID these metrics belong to
213 total_uses: Total number of times this version was used
214 success_count: Number of successful uses
215 error_count: Number of errors/failures
216 total_response_time: Total response time across all uses (seconds)
217 total_tokens: Total tokens used across all uses
218 user_ratings: List of user ratings (1-5 scale)
219 last_used: Timestamp of last use
220 metadata: Additional custom metrics
221 """
222 version_id: str
223 total_uses: int = 0
224 success_count: int = 0
225 error_count: int = 0
226 total_response_time: float = 0.0
227 total_tokens: int = 0
228 user_ratings: List[float] = field(default_factory=list)
229 last_used: Optional[datetime] = None
230 metadata: Dict[str, Any] = field(default_factory=dict)
232 @property
233 def success_rate(self) -> float:
234 """Calculate success rate."""
235 if self.total_uses == 0:
236 return 0.0
237 return self.success_count / self.total_uses
239 @property
240 def avg_response_time(self) -> float:
241 """Calculate average response time."""
242 if self.total_uses == 0:
243 return 0.0
244 return self.total_response_time / self.total_uses
246 @property
247 def avg_tokens(self) -> float:
248 """Calculate average tokens per use."""
249 if self.total_uses == 0:
250 return 0.0
251 return self.total_tokens / self.total_uses
253 @property
254 def avg_rating(self) -> float:
255 """Calculate average user rating."""
256 if not self.user_ratings:
257 return 0.0
258 return sum(self.user_ratings) / len(self.user_ratings)
260 def to_dict(self) -> Dict[str, Any]:
261 """Convert to dictionary for storage."""
262 return {
263 "version_id": self.version_id,
264 "total_uses": self.total_uses,
265 "success_count": self.success_count,
266 "error_count": self.error_count,
267 "total_response_time": self.total_response_time,
268 "total_tokens": self.total_tokens,
269 "user_ratings": self.user_ratings,
270 "last_used": self.last_used.isoformat() if self.last_used else None,
271 "metadata": self.metadata,
272 # Include computed properties
273 "success_rate": self.success_rate,
274 "avg_response_time": self.avg_response_time,
275 "avg_tokens": self.avg_tokens,
276 "avg_rating": self.avg_rating,
277 }
279 @classmethod
280 def from_dict(cls, data: Dict[str, Any]) -> "PromptMetrics":
281 """Create from dictionary."""
282 data = data.copy()
283 # Parse datetime
284 if isinstance(data.get("last_used"), str):
285 data["last_used"] = datetime.fromisoformat(data["last_used"])
286 # Remove computed properties (they're recalculated)
287 for key in ["success_rate", "avg_response_time", "avg_tokens", "avg_rating"]:
288 data.pop(key, None)
289 return cls(**data)
292@dataclass
293class MetricEvent:
294 """Single event for metrics tracking.
296 Attributes:
297 version_id: Version ID this event belongs to
298 timestamp: When the event occurred
299 success: Whether the use was successful
300 response_time: Response time in seconds (None if not applicable)
301 tokens: Number of tokens used (None if not applicable)
302 user_rating: User rating 1-5 (None if not provided)
303 metadata: Additional event metadata
304 """
305 version_id: str
306 timestamp: datetime = field(default_factory=datetime.utcnow)
307 success: bool = True
308 response_time: Optional[float] = None
309 tokens: Optional[int] = None
310 user_rating: Optional[float] = None
311 metadata: Dict[str, Any] = field(default_factory=dict)
313 def to_dict(self) -> Dict[str, Any]:
314 """Convert to dictionary for storage."""
315 return {
316 "version_id": self.version_id,
317 "timestamp": self.timestamp.isoformat(),
318 "success": self.success,
319 "response_time": self.response_time,
320 "tokens": self.tokens,
321 "user_rating": self.user_rating,
322 "metadata": self.metadata,
323 }
325 @classmethod
326 def from_dict(cls, data: Dict[str, Any]) -> "MetricEvent":
327 """Create from dictionary."""
328 data = data.copy()
329 if isinstance(data.get("timestamp"), str):
330 data["timestamp"] = datetime.fromisoformat(data["timestamp"])
331 return cls(**data)