Coverage for src / content_generator / seo_optimizer.py: 99%
85 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-13 20:29 +0800
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-13 20:29 +0800
1"""SEO 优化器.
3提供关键词密度计算、可读性评分等 SEO 功能。
4"""
6from __future__ import annotations
8import re
9from dataclasses import dataclass
10from typing import Any
13@dataclass
14class SEOResult:
15 """SEO 优化结果."""
17 score: float
18 keyword_density: float
19 readability_score: float
20 title_optimization: dict[str, Any]
21 meta_description: str
22 suggestions: list[str]
25class SEOOptimizer:
26 """SEO 优化器."""
28 def __init__(self) -> None:
29 self.ideal_keyword_density = 1.5
30 self.title_max_length = 60
31 self.meta_max_length = 160
33 def calculate_keyword_density(self, content: str, keyword: str) -> float:
34 """计算关键词密度."""
35 content_lower = content.lower()
36 keyword_lower = keyword.lower()
37 word_count = len(content_lower.split())
38 keyword_count = content_lower.count(keyword_lower)
39 if word_count == 0:
40 return 0.0
41 return (keyword_count / word_count) * 100
43 def calculate_readability(self, content: str) -> float:
44 """计算 Flesch-Kincaid 可读性评分."""
45 sentences = [s.strip() for s in re.split(r"[.!?]+", content) if s.strip()]
46 words = content.split()
48 if not sentences or not words:
49 return 0.0
51 def count_syllables(word: str) -> int:
52 word = word.lower()
53 vowels = "aeiouy"
54 syllables = 0
55 prev_was_vowel = False
56 for char in word:
57 is_vowel = char in vowels
58 if is_vowel and not prev_was_vowel:
59 syllables += 1
60 prev_was_vowel = is_vowel
61 if word.endswith("e"):
62 syllables -= 1
63 return max(1, syllables)
65 total_syllables = sum(count_syllables(w) for w in words)
66 avg_sentence_length = len(words) / len(sentences)
67 avg_syllables_per_word = total_syllables / len(words)
69 score = 206.835 - (1.015 * avg_sentence_length) - (84.6 * avg_syllables_per_word)
70 return max(0, min(100, score))
72 def optimize_title(self, title: str, keyword: str) -> dict[str, Any]:
73 """优化标题."""
74 score = 100.0
75 if len(title) > self.title_max_length:
76 score -= 20
77 if keyword.lower() not in title.lower():
78 score -= 30
79 return {"original": title, "score": max(0, score), "length": len(title)}
81 def generate_meta_description(self, content: str, keyword: str) -> str:
82 """生成元描述."""
83 desc = content[:150]
84 if keyword.lower() not in desc.lower():
85 desc = f"{keyword}: {desc}"
86 return desc[: self.meta_max_length]
88 def optimize(self, content: str, keyword: str, title: str) -> SEOResult:
89 """执行完整 SEO 优化."""
90 suggestions = []
91 density = self.calculate_keyword_density(content, keyword)
92 readability = self.calculate_readability(content)
93 title_opt = self.optimize_title(title, keyword)
95 if density < 1.0:
96 suggestions.append(f"关键词密度偏低 ({density:.2f}%)")
97 elif density > 3.0:
98 suggestions.append(f"关键词密度过高 ({density:.2f}%)")
100 if readability < 60:
101 suggestions.append("可读性较低,建议使用更简单的词汇")
103 # 计算综合得分:基于关键词密度、可读性、标题优化
104 # 密度得分: 理想密度 1.5%, 在合理范围内 (0.5-5%) 不扣分
105 if 0.5 <= density <= 5.0:
106 density_score = 90.0 # 在合理范围内给高分
107 else:
108 density_score = max(0, 100 - abs(density - self.ideal_keyword_density) * 15)
110 # 可读性得分: 直接使用可读性分数
111 readability_score = readability
113 # 标题得分
114 title_score = title_opt["score"]
116 # 内容长度惩罚:内容少于 50 个字符大幅扣分
117 word_count = len(content.split())
118 if word_count < 10:
119 length_penalty = 50
120 elif word_count < 50:
121 length_penalty = 20
122 else:
123 length_penalty = 0
125 # 综合得分 (权重: 密度20%, 可读性30%, 标题50%)
126 score = (density_score * 0.2 + readability_score * 0.3 + title_score * 0.5) - length_penalty
127 score = max(0, min(100, score))
129 return SEOResult(
130 score=round(score, 1),
131 keyword_density=density,
132 readability_score=readability,
133 title_optimization=title_opt,
134 meta_description=self.generate_meta_description(content, keyword),
135 suggestions=suggestions,
136 )