Coverage for src / content_generator / seo_optimizer.py: 99%

85 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-13 20:29 +0800

1"""SEO 优化器. 

2 

3提供关键词密度计算、可读性评分等 SEO 功能。 

4""" 

5 

6from __future__ import annotations 

7 

8import re 

9from dataclasses import dataclass 

10from typing import Any 

11 

12 

13@dataclass 

14class SEOResult: 

15 """SEO 优化结果.""" 

16 

17 score: float 

18 keyword_density: float 

19 readability_score: float 

20 title_optimization: dict[str, Any] 

21 meta_description: str 

22 suggestions: list[str] 

23 

24 

25class SEOOptimizer: 

26 """SEO 优化器.""" 

27 

28 def __init__(self) -> None: 

29 self.ideal_keyword_density = 1.5 

30 self.title_max_length = 60 

31 self.meta_max_length = 160 

32 

33 def calculate_keyword_density(self, content: str, keyword: str) -> float: 

34 """计算关键词密度.""" 

35 content_lower = content.lower() 

36 keyword_lower = keyword.lower() 

37 word_count = len(content_lower.split()) 

38 keyword_count = content_lower.count(keyword_lower) 

39 if word_count == 0: 

40 return 0.0 

41 return (keyword_count / word_count) * 100 

42 

43 def calculate_readability(self, content: str) -> float: 

44 """计算 Flesch-Kincaid 可读性评分.""" 

45 sentences = [s.strip() for s in re.split(r"[.!?]+", content) if s.strip()] 

46 words = content.split() 

47 

48 if not sentences or not words: 

49 return 0.0 

50 

51 def count_syllables(word: str) -> int: 

52 word = word.lower() 

53 vowels = "aeiouy" 

54 syllables = 0 

55 prev_was_vowel = False 

56 for char in word: 

57 is_vowel = char in vowels 

58 if is_vowel and not prev_was_vowel: 

59 syllables += 1 

60 prev_was_vowel = is_vowel 

61 if word.endswith("e"): 

62 syllables -= 1 

63 return max(1, syllables) 

64 

65 total_syllables = sum(count_syllables(w) for w in words) 

66 avg_sentence_length = len(words) / len(sentences) 

67 avg_syllables_per_word = total_syllables / len(words) 

68 

69 score = 206.835 - (1.015 * avg_sentence_length) - (84.6 * avg_syllables_per_word) 

70 return max(0, min(100, score)) 

71 

72 def optimize_title(self, title: str, keyword: str) -> dict[str, Any]: 

73 """优化标题.""" 

74 score = 100.0 

75 if len(title) > self.title_max_length: 

76 score -= 20 

77 if keyword.lower() not in title.lower(): 

78 score -= 30 

79 return {"original": title, "score": max(0, score), "length": len(title)} 

80 

81 def generate_meta_description(self, content: str, keyword: str) -> str: 

82 """生成元描述.""" 

83 desc = content[:150] 

84 if keyword.lower() not in desc.lower(): 

85 desc = f"{keyword}: {desc}" 

86 return desc[: self.meta_max_length] 

87 

88 def optimize(self, content: str, keyword: str, title: str) -> SEOResult: 

89 """执行完整 SEO 优化.""" 

90 suggestions = [] 

91 density = self.calculate_keyword_density(content, keyword) 

92 readability = self.calculate_readability(content) 

93 title_opt = self.optimize_title(title, keyword) 

94 

95 if density < 1.0: 

96 suggestions.append(f"关键词密度偏低 ({density:.2f}%)") 

97 elif density > 3.0: 

98 suggestions.append(f"关键词密度过高 ({density:.2f}%)") 

99 

100 if readability < 60: 

101 suggestions.append("可读性较低,建议使用更简单的词汇") 

102 

103 # 计算综合得分:基于关键词密度、可读性、标题优化 

104 # 密度得分: 理想密度 1.5%, 在合理范围内 (0.5-5%) 不扣分 

105 if 0.5 <= density <= 5.0: 

106 density_score = 90.0 # 在合理范围内给高分 

107 else: 

108 density_score = max(0, 100 - abs(density - self.ideal_keyword_density) * 15) 

109 

110 # 可读性得分: 直接使用可读性分数 

111 readability_score = readability 

112 

113 # 标题得分 

114 title_score = title_opt["score"] 

115 

116 # 内容长度惩罚:内容少于 50 个字符大幅扣分 

117 word_count = len(content.split()) 

118 if word_count < 10: 

119 length_penalty = 50 

120 elif word_count < 50: 

121 length_penalty = 20 

122 else: 

123 length_penalty = 0 

124 

125 # 综合得分 (权重: 密度20%, 可读性30%, 标题50%) 

126 score = (density_score * 0.2 + readability_score * 0.3 + title_score * 0.5) - length_penalty 

127 score = max(0, min(100, score)) 

128 

129 return SEOResult( 

130 score=round(score, 1), 

131 keyword_density=density, 

132 readability_score=readability, 

133 title_optimization=title_opt, 

134 meta_description=self.generate_meta_description(content, keyword), 

135 suggestions=suggestions, 

136 )