Coverage for src / content_generator / geo_optimizer.py: 98%
41 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-13 20:29 +0800
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-13 20:29 +0800
1"""GEO 优化器.
3提供生成式引擎优化功能。
4"""
6from __future__ import annotations
8import re
9from dataclasses import dataclass
12@dataclass
13class GEOResult:
14 """GEO 优化结果."""
16 score: float
17 entity_density: float
18 semantic_completeness: float
19 question_coverage: float
20 ai_summary: str
21 suggestions: list[str]
24class GEOOptimizer:
25 """GEO (生成式引擎优化) 优化器."""
27 def __init__(self) -> None:
28 self.question_patterns = [
29 r"什么是\s+(\w+)",
30 r"如何\s+(\w+)",
31 r"为什么\s+(\w+)",
32 ]
34 def extract_entities(self, content: str) -> set[str]:
35 """提取内容中的实体."""
36 proper_nouns = re.findall(r"\b[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*\b", content)
37 return set(proper_nouns)
39 def calculate_entity_density(self, content: str) -> float:
40 """计算实体密度."""
41 entities = self.extract_entities(content)
42 word_count = len(content.split())
43 if word_count == 0:
44 return 0.0
45 return (len(entities) / word_count) * 100
47 def calculate_semantic_completeness(self, content: str) -> float:
48 """计算语义完整性."""
49 score = 100.0
50 if not re.search(r"##?\s+", content):
51 score -= 15
52 if not re.search(r"\d+\.\s+", content):
53 score -= 15
54 paragraphs = [p for p in content.split("\n\n") if len(p) > 50]
55 if len(paragraphs) < 3:
56 score -= 20
57 return max(0, score)
59 def optimize(self, content: str, target_questions: list[str] | None = None) -> GEOResult:
60 """执行 GEO 优化分析."""
61 suggestions = []
62 entity_density = self.calculate_entity_density(content)
63 semantic_score = self.calculate_semantic_completeness(content)
65 if entity_density < 5.0:
66 suggestions.append("实体密度偏低")
68 score = entity_density * 3 + semantic_score * 0.4
70 return GEOResult(
71 score=min(100, score),
72 entity_density=entity_density,
73 semantic_completeness=semantic_score,
74 question_coverage=100.0,
75 ai_summary=content[:200] + "...",
76 suggestions=suggestions,
77 )