Coverage for src / content_generator / geo_optimizer.py: 98%

41 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-13 20:29 +0800

1"""GEO 优化器. 

2 

3提供生成式引擎优化功能。 

4""" 

5 

6from __future__ import annotations 

7 

8import re 

9from dataclasses import dataclass 

10 

11 

12@dataclass 

13class GEOResult: 

14 """GEO 优化结果.""" 

15 

16 score: float 

17 entity_density: float 

18 semantic_completeness: float 

19 question_coverage: float 

20 ai_summary: str 

21 suggestions: list[str] 

22 

23 

24class GEOOptimizer: 

25 """GEO (生成式引擎优化) 优化器.""" 

26 

27 def __init__(self) -> None: 

28 self.question_patterns = [ 

29 r"什么是\s+(\w+)", 

30 r"如何\s+(\w+)", 

31 r"为什么\s+(\w+)", 

32 ] 

33 

34 def extract_entities(self, content: str) -> set[str]: 

35 """提取内容中的实体.""" 

36 proper_nouns = re.findall(r"\b[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*\b", content) 

37 return set(proper_nouns) 

38 

39 def calculate_entity_density(self, content: str) -> float: 

40 """计算实体密度.""" 

41 entities = self.extract_entities(content) 

42 word_count = len(content.split()) 

43 if word_count == 0: 

44 return 0.0 

45 return (len(entities) / word_count) * 100 

46 

47 def calculate_semantic_completeness(self, content: str) -> float: 

48 """计算语义完整性.""" 

49 score = 100.0 

50 if not re.search(r"##?\s+", content): 

51 score -= 15 

52 if not re.search(r"\d+\.\s+", content): 

53 score -= 15 

54 paragraphs = [p for p in content.split("\n\n") if len(p) > 50] 

55 if len(paragraphs) < 3: 

56 score -= 20 

57 return max(0, score) 

58 

59 def optimize(self, content: str, target_questions: list[str] | None = None) -> GEOResult: 

60 """执行 GEO 优化分析.""" 

61 suggestions = [] 

62 entity_density = self.calculate_entity_density(content) 

63 semantic_score = self.calculate_semantic_completeness(content) 

64 

65 if entity_density < 5.0: 

66 suggestions.append("实体密度偏低") 

67 

68 score = entity_density * 3 + semantic_score * 0.4 

69 

70 return GEOResult( 

71 score=min(100, score), 

72 entity_density=entity_density, 

73 semantic_completeness=semantic_score, 

74 question_coverage=100.0, 

75 ai_summary=content[:200] + "...", 

76 suggestions=suggestions, 

77 )