Coverage for llm_dataset_engine/utils/logging_utils.py: 31%
26 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-15 18:04 +0200
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-15 18:04 +0200
1"""
2Structured logging utilities.
4Provides consistent logging configuration across the SDK using structlog.
5"""
7import logging
8import sys
9from typing import Any, Dict, Optional
11import structlog
14def configure_logging(
15 level: str = "INFO",
16 json_format: bool = False,
17 include_timestamp: bool = True,
18) -> None:
19 """
20 Configure structured logging for the SDK.
22 Args:
23 level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
24 json_format: Use JSON output format
25 include_timestamp: Include timestamps in logs
26 """
27 # Set stdlib logging level
28 logging.basicConfig(
29 format="%(message)s",
30 stream=sys.stdout,
31 level=getattr(logging, level.upper()),
32 )
34 # Configure structlog processors
35 processors = [
36 structlog.contextvars.merge_contextvars,
37 structlog.processors.add_log_level,
38 structlog.processors.StackInfoRenderer(),
39 ]
41 if include_timestamp:
42 processors.append(structlog.processors.TimeStamper(fmt="iso"))
44 if json_format:
45 processors.append(structlog.processors.JSONRenderer())
46 else:
47 processors.append(structlog.dev.ConsoleRenderer())
49 structlog.configure(
50 processors=processors,
51 wrapper_class=structlog.make_filtering_bound_logger(
52 getattr(logging, level.upper())
53 ),
54 context_class=dict,
55 logger_factory=structlog.PrintLoggerFactory(),
56 cache_logger_on_first_use=True,
57 )
60def get_logger(name: str) -> structlog.BoundLogger:
61 """
62 Get a structured logger instance.
64 Args:
65 name: Logger name (typically __name__)
67 Returns:
68 Configured structlog logger
69 """
70 return structlog.get_logger(name)
73def sanitize_for_logging(data: Dict[str, Any]) -> Dict[str, Any]:
74 """
75 Sanitize sensitive data for logging.
77 Args:
78 data: Dictionary potentially containing sensitive data
80 Returns:
81 Sanitized dictionary
82 """
83 sensitive_keys = {
84 "api_key",
85 "password",
86 "secret",
87 "token",
88 "authorization",
89 "credential",
90 }
92 sanitized = {}
93 for key, value in data.items():
94 key_lower = key.lower()
95 if any(sensitive in key_lower for sensitive in sensitive_keys):
96 sanitized[key] = "***REDACTED***"
97 elif isinstance(value, dict):
98 sanitized[key] = sanitize_for_logging(value)
99 else:
100 sanitized[key] = value
102 return sanitized