Coverage for sphinxlint/checkers.py: 89%

252 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-24 18:46 +0100

1import os 

2 

3import regex as re 

4 

5from sphinxlint import rst 

6from sphinxlint.utils import ( 

7 clean_paragraph, 

8 escape2null, 

9 hide_non_rst_blocks, 

10 looks_like_glued, 

11 match_size, 

12 paragraphs, 

13) 

14 

15 

16all_checkers = {} 

17 

18 

19def checker(*suffixes, **kwds): 

20 """Decorator to register a function as a checker.""" 

21 checker_props = {"enabled": True, "rst_only": True} 

22 

23 def deco(func): 

24 if not func.__name__.startswith("check_"): 

25 raise ValueError("Checker names should start with 'check_'.") 

26 for prop, default_value in checker_props.items(): 

27 setattr(func, prop, kwds.get(prop, default_value)) 

28 func.suffixes = suffixes 

29 func.name = func.__name__[len("check_") :].replace("_", "-") 

30 all_checkers[func.name] = func 

31 return func 

32 

33 return deco 

34 

35 

36@checker(".py", rst_only=False) 

37def check_python_syntax(file, lines, options=None): 

38 """Search invalid syntax in Python examples.""" 

39 code = "".join(lines) 

40 if "\r" in code: 

41 if os.name != "nt": 

42 yield 0, "\\r in code file" 

43 code = code.replace("\r", "") 

44 try: 

45 compile(code, file, "exec") 

46 except SyntaxError as err: 

47 yield err.lineno, f"not compilable: {err}" 

48 

49 

50@checker(".rst", ".po") 

51def check_missing_backtick_after_role(file, lines, options=None): 

52 """Search for roles missing their closing backticks. 

53 

54 Bad: :fct:`foo 

55 Good: :fct:`foo` 

56 """ 

57 for paragraph_lno, paragraph in paragraphs(lines): 

58 if rst.paragraph_looks_like_a_table(paragraph): 

59 return # we don't handle tables yet. 

60 error = rst.ROLE_MISSING_CLOSING_BACKTICK_RE.search(paragraph) 

61 if error: 

62 error_offset = paragraph[: error.start()].count("\n") 

63 yield paragraph_lno + error_offset, f"role missing closing backtick: {error.group(0)!r}" 

64 

65 

66_RST_ROLE_RE = re.compile("``.+?``(?!`).", flags=re.DOTALL) 

67_END_STRING_SUFFIX_RE = re.compile(rst.END_STRING_SUFFIX) 

68 

69 

70@checker(".rst", ".po") 

71def check_missing_space_after_literal(file, lines, options=None): 

72 r"""Search for inline literals immediately followed by a character. 

73 

74 Bad: ``items``s 

75 Good: ``items``\ s 

76 """ 

77 for paragraph_lno, paragraph in paragraphs(lines): 

78 if rst.paragraph_looks_like_a_table(paragraph): 

79 return # we don't handle tables yet. 

80 paragraph = clean_paragraph(paragraph) 

81 for role in _RST_ROLE_RE.finditer(paragraph): 

82 if not _END_STRING_SUFFIX_RE.match(role[0][-1]): 

83 error_offset = paragraph[: role.start()].count("\n") 

84 yield ( 

85 paragraph_lno + error_offset, 

86 "inline literal missing " 

87 f"(escaped) space after literal: {role.group(0)!r}", 

88 ) 

89 

90 

91_LONE_DOUBLE_BACKTICK_RE = re.compile("(?<!`)``(?!`)") 

92 

93 

94@checker(".rst", ".po") 

95def check_unbalanced_inline_literals_delimiters(file, lines, options=None): 

96 r"""Search for unbalanced inline literals delimiters. 

97 

98 Bad: ``hello`` world`` 

99 Good: ``hello`` world 

100 """ 

101 for paragraph_lno, paragraph in paragraphs(lines): 

102 if rst.paragraph_looks_like_a_table(paragraph): 

103 return # we don't handle tables yet. 

104 paragraph = clean_paragraph(paragraph) 

105 for lone_double_backtick in _LONE_DOUBLE_BACKTICK_RE.finditer(paragraph): 

106 error_offset = paragraph[: lone_double_backtick.start()].count("\n") 

107 yield ( 

108 paragraph_lno + error_offset, 

109 "found an unbalanced inline literal markup.", 

110 ) 

111 

112 

113_ends_with_role_tag = re.compile(rst.ROLE_TAG + "$").search 

114_starts_with_role_tag = re.compile("^" + rst.ROLE_TAG).search 

115 

116 

117@checker(".rst", ".po", enabled=False) 

118def check_default_role(file, lines, options=None): 

119 """Search for default roles (but they are allowed in many projects). 

120 

121 Bad: `print` 

122 Good: ``print`` 

123 """ 

124 for lno, line in enumerate(lines, start=1): 

125 line = clean_paragraph(line) 

126 line = escape2null(line) 

127 match = rst.INTERPRETED_TEXT_RE.search(line) 

128 if match: 

129 before_match = line[: match.start()] 

130 after_match = line[match.end() :] 

131 if rst.line_looks_like_a_table(line): 

132 return # we don't handle tables yet. 

133 if _ends_with_role_tag(before_match): 

134 # It's not a default role: it ends with a tag. 

135 continue 

136 if _starts_with_role_tag(after_match): 

137 # It's not a default role: it starts with a tag. 

138 continue 

139 if match.group(0).startswith("``") and match.group(0).endswith("``"): 

140 # It's not a default role: it's an inline literal. 

141 continue 

142 yield lno, "default role used (hint: for inline literals, use double backticks)" 

143 

144 

145@checker(".rst", ".po") 

146def check_directive_with_three_dots(file, lines, options=None): 

147 """Search for directives with three dots instead of two. 

148 

149 Bad: ... versionchanged:: 3.6 

150 Good: .. versionchanged:: 3.6 

151 """ 

152 for lno, line in enumerate(lines, start=1): 

153 if rst.THREE_DOT_DIRECTIVE_RE.search(line): 

154 yield lno, "directive should start with two dots, not three." 

155 

156 

157@checker(".rst", ".po") 

158def check_directive_missing_colons(file, lines, options=None): 

159 """Search for directive wrongly typed as comments. 

160 

161 Bad: .. versionchanged 3.6. 

162 Good: .. versionchanged:: 3.6 

163 """ 

164 for lno, line in enumerate(lines, start=1): 

165 if rst.SEEMS_DIRECTIVE_RE.search(line): 

166 yield lno, "comment seems to be intended as a directive" 

167 

168 

169# The difficulty here is that the following is valid: 

170# The :literal:`:exc:`Exceptions`` 

171# While this is not: 

172# The :literal:`:exc:`Exceptions``s 

173_ROLE_BODY = rf"([^`]|\s`+|\\`|:{rst.SIMPLENAME}:`([^`]|\s`+|\\`)+`)+" 

174_ALLOWED_AFTER_ROLE = ( 

175 rst.ASCII_ALLOWED_AFTER_INLINE_MARKUP 

176 + rst.UNICODE_ALLOWED_AFTER_INLINE_MARKUP 

177 + r"|\s" 

178) 

179_SUSPICIOUS_ROLE = re.compile( 

180 f":{rst.SIMPLENAME}:`{_ROLE_BODY}`[^{_ALLOWED_AFTER_ROLE}]" 

181) 

182 

183 

184@checker(".rst", ".po") 

185def check_missing_space_after_role(file, lines, options=None): 

186 r"""Search for roles immediately followed by a character. 

187 

188 Bad: :exc:`Exception`s. 

189 Good: :exc:`Exceptions`\ s 

190 """ 

191 for lno, line in enumerate(lines, start=1): 

192 line = clean_paragraph(line) 

193 role = _SUSPICIOUS_ROLE.search(line) 

194 if role: 

195 yield lno, f"role missing (escaped) space after role: {role.group(0)!r}" 

196 

197 

198@checker(".rst", ".po") 

199def check_role_without_backticks(file, lines, options=None): 

200 """Search roles without backticks. 

201 

202 Bad: :func:pdb.main 

203 Good: :func:`pdb.main` 

204 """ 

205 for lno, line in enumerate(lines, start=1): 

206 no_backticks = rst.ROLE_WITH_NO_BACKTICKS_RE.search(line) 

207 if no_backticks: 

208 yield lno, f"role with no backticks: {no_backticks.group(0)!r}" 

209 

210 

211@checker(".rst", ".po") 

212def check_backtick_before_role(file, lines, options=None): 

213 """Search for roles preceded by a backtick. 

214 

215 Bad: `:fct:`sum` 

216 Good: :fct:`sum` 

217 """ 

218 for lno, line in enumerate(lines, start=1): 

219 if "`" not in line: 

220 continue 

221 if rst.BACKTICK_IN_FRONT_OF_ROLE_RE.search(line): 

222 yield lno, "superfluous backtick in front of role" 

223 

224 

225@checker(".rst", ".po") 

226def check_missing_space_in_hyperlink(file, lines, options=None): 

227 """Search for hyperlinks missing a space. 

228 

229 Bad: `Link text<https://example.com>_` 

230 Good: `Link text <https://example.com>_` 

231 """ 

232 for lno, line in enumerate(lines, start=1): 

233 if "`" not in line: 

234 continue 

235 for match in rst.SEEMS_HYPERLINK_RE.finditer(line): 

236 if not match.group(1): 

237 yield lno, "missing space before < in hyperlink" 

238 

239 

240@checker(".rst", ".po") 

241def check_missing_underscore_after_hyperlink(file, lines, options=None): 

242 """Search for hyperlinks missing underscore after their closing backtick. 

243 

244 Bad: `Link text <https://example.com>` 

245 Good: `Link text <https://example.com>`_ 

246 """ 

247 for lno, line in enumerate(lines, start=1): 

248 if "`" not in line: 

249 continue 

250 for match in rst.SEEMS_HYPERLINK_RE.finditer(line): 

251 if not match.group(2): 

252 yield lno, "missing underscore after closing backtick in hyperlink" 

253 

254 

255@checker(".rst", ".po") 

256def check_role_with_double_backticks(file, lines, options=None): 

257 """Search for roles with double backticks. 

258 

259 Bad: :fct:``sum`` 

260 Good: :fct:`sum` 

261 

262 The hard thing is that :fct:``sum`` is a legitimate 

263 restructuredtext construction: 

264 

265 :fct: is just plain text. 

266 ``sum`` is an inline literal. 

267 

268 So to properly detect this one we're searching for actual inline 

269 literals that have a role tag. 

270 """ 

271 for paragraph_lno, paragraph in paragraphs(lines): 

272 if "`" not in paragraph: 

273 continue 

274 if rst.paragraph_looks_like_a_table(paragraph): 

275 return # we don't handle tables yet. 

276 paragraph = escape2null(paragraph) 

277 while True: 

278 inline_literal = min( 

279 rst.INLINE_LITERAL_RE.finditer(paragraph, overlapped=True), 

280 key=match_size, 

281 default=None, 

282 ) 

283 if inline_literal is None: 

284 break 

285 before = paragraph[: inline_literal.start()] 

286 if _ends_with_role_tag(before): 

287 error_offset = paragraph[: inline_literal.start()].count("\n") 

288 yield ( 

289 paragraph_lno + error_offset, 

290 "role use a single backtick, double backtick found.", 

291 ) 

292 paragraph = ( 

293 paragraph[: inline_literal.start()] + paragraph[inline_literal.end() :] 

294 ) 

295 

296 

297@checker(".rst", ".po") 

298def check_missing_space_before_role(file, lines, options=None): 

299 """Search for missing spaces before roles. 

300 

301 Bad: the:fct:`sum`, issue:`123`, c:func:`foo` 

302 Good: the :fct:`sum`, :issue:`123`, :c:func:`foo` 

303 """ 

304 for paragraph_lno, paragraph in paragraphs(lines): 

305 if rst.paragraph_looks_like_a_table(paragraph): 

306 return # we don't handle tables yet. 

307 paragraph = clean_paragraph(paragraph) 

308 match = rst.ROLE_GLUED_WITH_WORD_RE.search(paragraph) 

309 if match: 

310 error_offset = paragraph[: match.start()].count("\n") 

311 if looks_like_glued(match): 

312 yield ( 

313 paragraph_lno + error_offset, 

314 f"missing space before role ({match.group(0)}).", 

315 ) 

316 else: 

317 yield ( 

318 paragraph_lno + error_offset, 

319 f"role missing opening tag colon ({match.group(0)}).", 

320 ) 

321 

322 

323@checker(".rst", ".po") 

324def check_missing_space_before_default_role(file, lines, options=None): 

325 """Search for missing spaces before default role. 

326 

327 Bad: the`sum` 

328 Good: the `sum` 

329 """ 

330 for paragraph_lno, paragraph in paragraphs(lines): 

331 if rst.paragraph_looks_like_a_table(paragraph): 

332 return # we don't handle tables yet. 

333 paragraph = clean_paragraph(paragraph) 

334 paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph) 

335 for role in rst.inline_markup_gen( 

336 "`", "`", extra_allowed_before="[^_]" 

337 ).finditer(paragraph): 

338 error_offset = paragraph[: role.start()].count("\n") 

339 context = paragraph[role.start() - 3 : role.end()] 

340 yield ( 

341 paragraph_lno + error_offset, 

342 f"missing space before default role: {context!r}.", 

343 ) 

344 

345 

346_HYPERLINK_REFERENCE_RE = re.compile(r"\S* <https?://[^ ]+>`_") 

347 

348 

349@checker(".rst", ".po") 

350def check_hyperlink_reference_missing_backtick(file, lines, options=None): 

351 """Search for missing backticks in front of hyperlink references. 

352 

353 Bad: Misc/NEWS <https://github.com/python/cpython/blob/v3.2.6/Misc/NEWS>`_ 

354 Good: `Misc/NEWS <https://github.com/python/cpython/blob/v3.2.6/Misc/NEWS>`_ 

355 """ 

356 for paragraph_lno, paragraph in paragraphs(lines): 

357 if rst.paragraph_looks_like_a_table(paragraph): 

358 return # we don't handle tables yet. 

359 paragraph = clean_paragraph(paragraph) 

360 paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph) 

361 for hyperlink_reference in _HYPERLINK_REFERENCE_RE.finditer(paragraph): 

362 error_offset = paragraph[: hyperlink_reference.start()].count("\n") 

363 context = hyperlink_reference.group(0) 

364 yield ( 

365 paragraph_lno + error_offset, 

366 f"missing backtick before hyperlink reference: {context!r}.", 

367 ) 

368 

369 

370@checker(".rst", ".po") 

371def check_missing_colon_in_role(file, lines, options=None): 

372 """Search for missing colons in roles. 

373 

374 Bad: :issue`123` 

375 Good: :issue:`123` 

376 """ 

377 for lno, line in enumerate(lines, start=1): 

378 match = rst.ROLE_MISSING_RIGHT_COLON_RE.search(line) 

379 if match: 

380 yield lno, f"role missing colon before first backtick ({match.group(0)})." 

381 

382 

383@checker(".py", ".rst", ".po", rst_only=False) 

384def check_carriage_return(file, lines, options=None): 

385 r"""Check for carriage returns (\r) in lines.""" 

386 for lno, line in enumerate(lines): 

387 if "\r" in line: 

388 yield lno + 1, "\\r in line" 

389 

390 

391@checker(".py", ".rst", ".po", rst_only=False) 

392def check_horizontal_tab(file, lines, options=None): 

393 r"""Check for horizontal tabs (\t) in lines.""" 

394 for lno, line in enumerate(lines): 

395 if "\t" in line: 

396 yield lno + 1, "OMG TABS!!!1" 

397 

398 

399@checker(".py", ".rst", ".po", rst_only=False) 

400def check_trailing_whitespace(file, lines, options=None): 

401 """Check for trailing whitespaces at end of lines.""" 

402 for lno, line in enumerate(lines): 

403 stripped_line = line.rstrip("\n") 

404 if stripped_line.rstrip(" \t") != stripped_line: 

405 yield lno + 1, "trailing whitespace" 

406 

407 

408@checker(".py", ".rst", ".po", rst_only=False) 

409def check_missing_final_newline(file, lines, options=None): 

410 """Check that the last line of the file ends with a newline.""" 

411 if lines and not lines[-1].endswith("\n"): 

412 yield len(lines), "No newline at end of file." 

413 

414 

415_is_long_interpreted_text = re.compile(r"^\s*\W*(:(\w+:)+)?`.*`\W*$").match 

416_starts_with_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match 

417_starts_with_anonymous_hyperlink = re.compile(r"^\s*__ ").match 

418_is_very_long_string_literal = re.compile(r"^\s*``[^`]+``$").match 

419 

420 

421@checker(".rst", ".po", enabled=False, rst_only=True) 

422def check_line_too_long(file, lines, options=None): 

423 """Check for line length; this checker is not run by default.""" 

424 for lno, line in enumerate(lines): 

425 # Beware, in `line` we have the trailing newline. 

426 if len(line) - 1 > options.max_line_length: 

427 if line.lstrip()[0] in "+|": 

428 continue # ignore wide tables 

429 if _is_long_interpreted_text(line): 

430 continue # ignore long interpreted text 

431 if _starts_with_directive_or_hyperlink(line): 

432 continue # ignore directives and hyperlink targets 

433 if _starts_with_anonymous_hyperlink(line): 

434 continue # ignore anonymous hyperlink targets 

435 if _is_very_long_string_literal(line): 

436 continue # ignore a very long literal string 

437 yield lno + 1, f"Line too long ({len(line)-1}/{options.max_line_length})" 

438 

439 

440@checker(".html", enabled=False, rst_only=False) 

441def check_leaked_markup(file, lines, options=None): 

442 """Check HTML files for leaked reST markup. 

443 

444 This only works if the HTML files have been built. 

445 """ 

446 for lno, line in enumerate(lines): 

447 if rst.LEAKED_MARKUP_RE.search(line): 

448 yield lno + 1, f"possibly leaked markup: {line}" 

449 

450 

451@checker(".rst", ".po", enabled=False) 

452def check_triple_backticks(file, lines, options=None): 

453 """Check for triple backticks, like ```Point``` (but it's a valid syntax). 

454 

455 Bad: ```Point``` 

456 Good: ``Point`` 

457 

458 In reality, triple backticks are valid: ```foo``` gets 

459 rendered as `foo`, it's at least used by Sphinx to document rst 

460 syntax, but it's really uncommon. 

461 """ 

462 for lno, line in enumerate(lines): 

463 match = rst.TRIPLE_BACKTICKS_RE.search(line) 

464 if match: 

465 yield lno + 1, "There's no rst syntax using triple backticks" 

466 

467 

468_has_bad_dedent = re.compile(" [^ ].*::$").match 

469 

470 

471@checker(".rst", ".po", rst_only=False) 

472def check_bad_dedent(file, lines, options=None): 

473 """Check for mis-alignment in indentation in code blocks. 

474 

475 |A 5 lines block:: 

476 | 

477 | Hello! 

478 | 

479 | Looks like another block:: 

480 | 

481 | But in fact it's not due to the leading space. 

482 """ 

483 

484 errors = [] 

485 

486 def check_block(block_lineno, block): 

487 for lineno, line in enumerate(block.splitlines()): 

488 if _has_bad_dedent(line): 

489 errors.append((block_lineno + lineno, "Bad dedent in block")) 

490 

491 list(hide_non_rst_blocks(lines, hidden_block_cb=check_block)) 

492 yield from errors 

493 

494 

495_has_dangling_hyphen = re.compile(r".*[a-z]-$").match 

496 

497 

498@checker(".rst", rst_only=True) 

499def check_dangling_hyphen(file, lines, options): 

500 """Check for lines ending in a hyphen.""" 

501 for lno, line in enumerate(lines): 

502 stripped_line = line.rstrip("\n") 

503 if _has_dangling_hyphen(stripped_line): 

504 yield lno + 1, "Line ends with dangling hyphen"