Coverage for llm_dataset_engine/utils/logging_utils.py: 31%

26 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-15 18:04 +0200

1""" 

2Structured logging utilities. 

3 

4Provides consistent logging configuration across the SDK using structlog. 

5""" 

6 

7import logging 

8import sys 

9from typing import Any, Dict, Optional 

10 

11import structlog 

12 

13 

14def configure_logging( 

15 level: str = "INFO", 

16 json_format: bool = False, 

17 include_timestamp: bool = True, 

18) -> None: 

19 """ 

20 Configure structured logging for the SDK. 

21 

22 Args: 

23 level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) 

24 json_format: Use JSON output format 

25 include_timestamp: Include timestamps in logs 

26 """ 

27 # Set stdlib logging level 

28 logging.basicConfig( 

29 format="%(message)s", 

30 stream=sys.stdout, 

31 level=getattr(logging, level.upper()), 

32 ) 

33 

34 # Configure structlog processors 

35 processors = [ 

36 structlog.contextvars.merge_contextvars, 

37 structlog.processors.add_log_level, 

38 structlog.processors.StackInfoRenderer(), 

39 ] 

40 

41 if include_timestamp: 

42 processors.append(structlog.processors.TimeStamper(fmt="iso")) 

43 

44 if json_format: 

45 processors.append(structlog.processors.JSONRenderer()) 

46 else: 

47 processors.append(structlog.dev.ConsoleRenderer()) 

48 

49 structlog.configure( 

50 processors=processors, 

51 wrapper_class=structlog.make_filtering_bound_logger( 

52 getattr(logging, level.upper()) 

53 ), 

54 context_class=dict, 

55 logger_factory=structlog.PrintLoggerFactory(), 

56 cache_logger_on_first_use=True, 

57 ) 

58 

59 

60def get_logger(name: str) -> structlog.BoundLogger: 

61 """ 

62 Get a structured logger instance. 

63 

64 Args: 

65 name: Logger name (typically __name__) 

66 

67 Returns: 

68 Configured structlog logger 

69 """ 

70 return structlog.get_logger(name) 

71 

72 

73def sanitize_for_logging(data: Dict[str, Any]) -> Dict[str, Any]: 

74 """ 

75 Sanitize sensitive data for logging. 

76 

77 Args: 

78 data: Dictionary potentially containing sensitive data 

79 

80 Returns: 

81 Sanitized dictionary 

82 """ 

83 sensitive_keys = { 

84 "api_key", 

85 "password", 

86 "secret", 

87 "token", 

88 "authorization", 

89 "credential", 

90 } 

91 

92 sanitized = {} 

93 for key, value in data.items(): 

94 key_lower = key.lower() 

95 if any(sensitive in key_lower for sensitive in sensitive_keys): 

96 sanitized[key] = "***REDACTED***" 

97 elif isinstance(value, dict): 

98 sanitized[key] = sanitize_for_logging(value) 

99 else: 

100 sanitized[key] = value 

101 

102 return sanitized 

103