Coverage for sphinxlint/checkers.py: 89%
252 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-24 18:46 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-24 18:46 +0100
1import os
3import regex as re
5from sphinxlint import rst
6from sphinxlint.utils import (
7 clean_paragraph,
8 escape2null,
9 hide_non_rst_blocks,
10 looks_like_glued,
11 match_size,
12 paragraphs,
13)
16all_checkers = {}
19def checker(*suffixes, **kwds):
20 """Decorator to register a function as a checker."""
21 checker_props = {"enabled": True, "rst_only": True}
23 def deco(func):
24 if not func.__name__.startswith("check_"):
25 raise ValueError("Checker names should start with 'check_'.")
26 for prop, default_value in checker_props.items():
27 setattr(func, prop, kwds.get(prop, default_value))
28 func.suffixes = suffixes
29 func.name = func.__name__[len("check_") :].replace("_", "-")
30 all_checkers[func.name] = func
31 return func
33 return deco
36@checker(".py", rst_only=False)
37def check_python_syntax(file, lines, options=None):
38 """Search invalid syntax in Python examples."""
39 code = "".join(lines)
40 if "\r" in code:
41 if os.name != "nt":
42 yield 0, "\\r in code file"
43 code = code.replace("\r", "")
44 try:
45 compile(code, file, "exec")
46 except SyntaxError as err:
47 yield err.lineno, f"not compilable: {err}"
50@checker(".rst", ".po")
51def check_missing_backtick_after_role(file, lines, options=None):
52 """Search for roles missing their closing backticks.
54 Bad: :fct:`foo
55 Good: :fct:`foo`
56 """
57 for paragraph_lno, paragraph in paragraphs(lines):
58 if rst.paragraph_looks_like_a_table(paragraph):
59 return # we don't handle tables yet.
60 error = rst.ROLE_MISSING_CLOSING_BACKTICK_RE.search(paragraph)
61 if error:
62 error_offset = paragraph[: error.start()].count("\n")
63 yield paragraph_lno + error_offset, f"role missing closing backtick: {error.group(0)!r}"
66_RST_ROLE_RE = re.compile("``.+?``(?!`).", flags=re.DOTALL)
67_END_STRING_SUFFIX_RE = re.compile(rst.END_STRING_SUFFIX)
70@checker(".rst", ".po")
71def check_missing_space_after_literal(file, lines, options=None):
72 r"""Search for inline literals immediately followed by a character.
74 Bad: ``items``s
75 Good: ``items``\ s
76 """
77 for paragraph_lno, paragraph in paragraphs(lines):
78 if rst.paragraph_looks_like_a_table(paragraph):
79 return # we don't handle tables yet.
80 paragraph = clean_paragraph(paragraph)
81 for role in _RST_ROLE_RE.finditer(paragraph):
82 if not _END_STRING_SUFFIX_RE.match(role[0][-1]):
83 error_offset = paragraph[: role.start()].count("\n")
84 yield (
85 paragraph_lno + error_offset,
86 "inline literal missing "
87 f"(escaped) space after literal: {role.group(0)!r}",
88 )
91_LONE_DOUBLE_BACKTICK_RE = re.compile("(?<!`)``(?!`)")
94@checker(".rst", ".po")
95def check_unbalanced_inline_literals_delimiters(file, lines, options=None):
96 r"""Search for unbalanced inline literals delimiters.
98 Bad: ``hello`` world``
99 Good: ``hello`` world
100 """
101 for paragraph_lno, paragraph in paragraphs(lines):
102 if rst.paragraph_looks_like_a_table(paragraph):
103 return # we don't handle tables yet.
104 paragraph = clean_paragraph(paragraph)
105 for lone_double_backtick in _LONE_DOUBLE_BACKTICK_RE.finditer(paragraph):
106 error_offset = paragraph[: lone_double_backtick.start()].count("\n")
107 yield (
108 paragraph_lno + error_offset,
109 "found an unbalanced inline literal markup.",
110 )
113_ends_with_role_tag = re.compile(rst.ROLE_TAG + "$").search
114_starts_with_role_tag = re.compile("^" + rst.ROLE_TAG).search
117@checker(".rst", ".po", enabled=False)
118def check_default_role(file, lines, options=None):
119 """Search for default roles (but they are allowed in many projects).
121 Bad: `print`
122 Good: ``print``
123 """
124 for lno, line in enumerate(lines, start=1):
125 line = clean_paragraph(line)
126 line = escape2null(line)
127 match = rst.INTERPRETED_TEXT_RE.search(line)
128 if match:
129 before_match = line[: match.start()]
130 after_match = line[match.end() :]
131 if rst.line_looks_like_a_table(line):
132 return # we don't handle tables yet.
133 if _ends_with_role_tag(before_match):
134 # It's not a default role: it ends with a tag.
135 continue
136 if _starts_with_role_tag(after_match):
137 # It's not a default role: it starts with a tag.
138 continue
139 if match.group(0).startswith("``") and match.group(0).endswith("``"):
140 # It's not a default role: it's an inline literal.
141 continue
142 yield lno, "default role used (hint: for inline literals, use double backticks)"
145@checker(".rst", ".po")
146def check_directive_with_three_dots(file, lines, options=None):
147 """Search for directives with three dots instead of two.
149 Bad: ... versionchanged:: 3.6
150 Good: .. versionchanged:: 3.6
151 """
152 for lno, line in enumerate(lines, start=1):
153 if rst.THREE_DOT_DIRECTIVE_RE.search(line):
154 yield lno, "directive should start with two dots, not three."
157@checker(".rst", ".po")
158def check_directive_missing_colons(file, lines, options=None):
159 """Search for directive wrongly typed as comments.
161 Bad: .. versionchanged 3.6.
162 Good: .. versionchanged:: 3.6
163 """
164 for lno, line in enumerate(lines, start=1):
165 if rst.SEEMS_DIRECTIVE_RE.search(line):
166 yield lno, "comment seems to be intended as a directive"
169# The difficulty here is that the following is valid:
170# The :literal:`:exc:`Exceptions``
171# While this is not:
172# The :literal:`:exc:`Exceptions``s
173_ROLE_BODY = rf"([^`]|\s`+|\\`|:{rst.SIMPLENAME}:`([^`]|\s`+|\\`)+`)+"
174_ALLOWED_AFTER_ROLE = (
175 rst.ASCII_ALLOWED_AFTER_INLINE_MARKUP
176 + rst.UNICODE_ALLOWED_AFTER_INLINE_MARKUP
177 + r"|\s"
178)
179_SUSPICIOUS_ROLE = re.compile(
180 f":{rst.SIMPLENAME}:`{_ROLE_BODY}`[^{_ALLOWED_AFTER_ROLE}]"
181)
184@checker(".rst", ".po")
185def check_missing_space_after_role(file, lines, options=None):
186 r"""Search for roles immediately followed by a character.
188 Bad: :exc:`Exception`s.
189 Good: :exc:`Exceptions`\ s
190 """
191 for lno, line in enumerate(lines, start=1):
192 line = clean_paragraph(line)
193 role = _SUSPICIOUS_ROLE.search(line)
194 if role:
195 yield lno, f"role missing (escaped) space after role: {role.group(0)!r}"
198@checker(".rst", ".po")
199def check_role_without_backticks(file, lines, options=None):
200 """Search roles without backticks.
202 Bad: :func:pdb.main
203 Good: :func:`pdb.main`
204 """
205 for lno, line in enumerate(lines, start=1):
206 no_backticks = rst.ROLE_WITH_NO_BACKTICKS_RE.search(line)
207 if no_backticks:
208 yield lno, f"role with no backticks: {no_backticks.group(0)!r}"
211@checker(".rst", ".po")
212def check_backtick_before_role(file, lines, options=None):
213 """Search for roles preceded by a backtick.
215 Bad: `:fct:`sum`
216 Good: :fct:`sum`
217 """
218 for lno, line in enumerate(lines, start=1):
219 if "`" not in line:
220 continue
221 if rst.BACKTICK_IN_FRONT_OF_ROLE_RE.search(line):
222 yield lno, "superfluous backtick in front of role"
225@checker(".rst", ".po")
226def check_missing_space_in_hyperlink(file, lines, options=None):
227 """Search for hyperlinks missing a space.
229 Bad: `Link text<https://example.com>_`
230 Good: `Link text <https://example.com>_`
231 """
232 for lno, line in enumerate(lines, start=1):
233 if "`" not in line:
234 continue
235 for match in rst.SEEMS_HYPERLINK_RE.finditer(line):
236 if not match.group(1):
237 yield lno, "missing space before < in hyperlink"
240@checker(".rst", ".po")
241def check_missing_underscore_after_hyperlink(file, lines, options=None):
242 """Search for hyperlinks missing underscore after their closing backtick.
244 Bad: `Link text <https://example.com>`
245 Good: `Link text <https://example.com>`_
246 """
247 for lno, line in enumerate(lines, start=1):
248 if "`" not in line:
249 continue
250 for match in rst.SEEMS_HYPERLINK_RE.finditer(line):
251 if not match.group(2):
252 yield lno, "missing underscore after closing backtick in hyperlink"
255@checker(".rst", ".po")
256def check_role_with_double_backticks(file, lines, options=None):
257 """Search for roles with double backticks.
259 Bad: :fct:``sum``
260 Good: :fct:`sum`
262 The hard thing is that :fct:``sum`` is a legitimate
263 restructuredtext construction:
265 :fct: is just plain text.
266 ``sum`` is an inline literal.
268 So to properly detect this one we're searching for actual inline
269 literals that have a role tag.
270 """
271 for paragraph_lno, paragraph in paragraphs(lines):
272 if "`" not in paragraph:
273 continue
274 if rst.paragraph_looks_like_a_table(paragraph):
275 return # we don't handle tables yet.
276 paragraph = escape2null(paragraph)
277 while True:
278 inline_literal = min(
279 rst.INLINE_LITERAL_RE.finditer(paragraph, overlapped=True),
280 key=match_size,
281 default=None,
282 )
283 if inline_literal is None:
284 break
285 before = paragraph[: inline_literal.start()]
286 if _ends_with_role_tag(before):
287 error_offset = paragraph[: inline_literal.start()].count("\n")
288 yield (
289 paragraph_lno + error_offset,
290 "role use a single backtick, double backtick found.",
291 )
292 paragraph = (
293 paragraph[: inline_literal.start()] + paragraph[inline_literal.end() :]
294 )
297@checker(".rst", ".po")
298def check_missing_space_before_role(file, lines, options=None):
299 """Search for missing spaces before roles.
301 Bad: the:fct:`sum`, issue:`123`, c:func:`foo`
302 Good: the :fct:`sum`, :issue:`123`, :c:func:`foo`
303 """
304 for paragraph_lno, paragraph in paragraphs(lines):
305 if rst.paragraph_looks_like_a_table(paragraph):
306 return # we don't handle tables yet.
307 paragraph = clean_paragraph(paragraph)
308 match = rst.ROLE_GLUED_WITH_WORD_RE.search(paragraph)
309 if match:
310 error_offset = paragraph[: match.start()].count("\n")
311 if looks_like_glued(match):
312 yield (
313 paragraph_lno + error_offset,
314 f"missing space before role ({match.group(0)}).",
315 )
316 else:
317 yield (
318 paragraph_lno + error_offset,
319 f"role missing opening tag colon ({match.group(0)}).",
320 )
323@checker(".rst", ".po")
324def check_missing_space_before_default_role(file, lines, options=None):
325 """Search for missing spaces before default role.
327 Bad: the`sum`
328 Good: the `sum`
329 """
330 for paragraph_lno, paragraph in paragraphs(lines):
331 if rst.paragraph_looks_like_a_table(paragraph):
332 return # we don't handle tables yet.
333 paragraph = clean_paragraph(paragraph)
334 paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph)
335 for role in rst.inline_markup_gen(
336 "`", "`", extra_allowed_before="[^_]"
337 ).finditer(paragraph):
338 error_offset = paragraph[: role.start()].count("\n")
339 context = paragraph[role.start() - 3 : role.end()]
340 yield (
341 paragraph_lno + error_offset,
342 f"missing space before default role: {context!r}.",
343 )
346_HYPERLINK_REFERENCE_RE = re.compile(r"\S* <https?://[^ ]+>`_")
349@checker(".rst", ".po")
350def check_hyperlink_reference_missing_backtick(file, lines, options=None):
351 """Search for missing backticks in front of hyperlink references.
353 Bad: Misc/NEWS <https://github.com/python/cpython/blob/v3.2.6/Misc/NEWS>`_
354 Good: `Misc/NEWS <https://github.com/python/cpython/blob/v3.2.6/Misc/NEWS>`_
355 """
356 for paragraph_lno, paragraph in paragraphs(lines):
357 if rst.paragraph_looks_like_a_table(paragraph):
358 return # we don't handle tables yet.
359 paragraph = clean_paragraph(paragraph)
360 paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph)
361 for hyperlink_reference in _HYPERLINK_REFERENCE_RE.finditer(paragraph):
362 error_offset = paragraph[: hyperlink_reference.start()].count("\n")
363 context = hyperlink_reference.group(0)
364 yield (
365 paragraph_lno + error_offset,
366 f"missing backtick before hyperlink reference: {context!r}.",
367 )
370@checker(".rst", ".po")
371def check_missing_colon_in_role(file, lines, options=None):
372 """Search for missing colons in roles.
374 Bad: :issue`123`
375 Good: :issue:`123`
376 """
377 for lno, line in enumerate(lines, start=1):
378 match = rst.ROLE_MISSING_RIGHT_COLON_RE.search(line)
379 if match:
380 yield lno, f"role missing colon before first backtick ({match.group(0)})."
383@checker(".py", ".rst", ".po", rst_only=False)
384def check_carriage_return(file, lines, options=None):
385 r"""Check for carriage returns (\r) in lines."""
386 for lno, line in enumerate(lines):
387 if "\r" in line:
388 yield lno + 1, "\\r in line"
391@checker(".py", ".rst", ".po", rst_only=False)
392def check_horizontal_tab(file, lines, options=None):
393 r"""Check for horizontal tabs (\t) in lines."""
394 for lno, line in enumerate(lines):
395 if "\t" in line:
396 yield lno + 1, "OMG TABS!!!1"
399@checker(".py", ".rst", ".po", rst_only=False)
400def check_trailing_whitespace(file, lines, options=None):
401 """Check for trailing whitespaces at end of lines."""
402 for lno, line in enumerate(lines):
403 stripped_line = line.rstrip("\n")
404 if stripped_line.rstrip(" \t") != stripped_line:
405 yield lno + 1, "trailing whitespace"
408@checker(".py", ".rst", ".po", rst_only=False)
409def check_missing_final_newline(file, lines, options=None):
410 """Check that the last line of the file ends with a newline."""
411 if lines and not lines[-1].endswith("\n"):
412 yield len(lines), "No newline at end of file."
415_is_long_interpreted_text = re.compile(r"^\s*\W*(:(\w+:)+)?`.*`\W*$").match
416_starts_with_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match
417_starts_with_anonymous_hyperlink = re.compile(r"^\s*__ ").match
418_is_very_long_string_literal = re.compile(r"^\s*``[^`]+``$").match
421@checker(".rst", ".po", enabled=False, rst_only=True)
422def check_line_too_long(file, lines, options=None):
423 """Check for line length; this checker is not run by default."""
424 for lno, line in enumerate(lines):
425 # Beware, in `line` we have the trailing newline.
426 if len(line) - 1 > options.max_line_length:
427 if line.lstrip()[0] in "+|":
428 continue # ignore wide tables
429 if _is_long_interpreted_text(line):
430 continue # ignore long interpreted text
431 if _starts_with_directive_or_hyperlink(line):
432 continue # ignore directives and hyperlink targets
433 if _starts_with_anonymous_hyperlink(line):
434 continue # ignore anonymous hyperlink targets
435 if _is_very_long_string_literal(line):
436 continue # ignore a very long literal string
437 yield lno + 1, f"Line too long ({len(line)-1}/{options.max_line_length})"
440@checker(".html", enabled=False, rst_only=False)
441def check_leaked_markup(file, lines, options=None):
442 """Check HTML files for leaked reST markup.
444 This only works if the HTML files have been built.
445 """
446 for lno, line in enumerate(lines):
447 if rst.LEAKED_MARKUP_RE.search(line):
448 yield lno + 1, f"possibly leaked markup: {line}"
451@checker(".rst", ".po", enabled=False)
452def check_triple_backticks(file, lines, options=None):
453 """Check for triple backticks, like ```Point``` (but it's a valid syntax).
455 Bad: ```Point```
456 Good: ``Point``
458 In reality, triple backticks are valid: ```foo``` gets
459 rendered as `foo`, it's at least used by Sphinx to document rst
460 syntax, but it's really uncommon.
461 """
462 for lno, line in enumerate(lines):
463 match = rst.TRIPLE_BACKTICKS_RE.search(line)
464 if match:
465 yield lno + 1, "There's no rst syntax using triple backticks"
468_has_bad_dedent = re.compile(" [^ ].*::$").match
471@checker(".rst", ".po", rst_only=False)
472def check_bad_dedent(file, lines, options=None):
473 """Check for mis-alignment in indentation in code blocks.
475 |A 5 lines block::
476 |
477 | Hello!
478 |
479 | Looks like another block::
480 |
481 | But in fact it's not due to the leading space.
482 """
484 errors = []
486 def check_block(block_lineno, block):
487 for lineno, line in enumerate(block.splitlines()):
488 if _has_bad_dedent(line):
489 errors.append((block_lineno + lineno, "Bad dedent in block"))
491 list(hide_non_rst_blocks(lines, hidden_block_cb=check_block))
492 yield from errors
495_has_dangling_hyphen = re.compile(r".*[a-z]-$").match
498@checker(".rst", rst_only=True)
499def check_dangling_hyphen(file, lines, options):
500 """Check for lines ending in a hyphen."""
501 for lno, line in enumerate(lines):
502 stripped_line = line.rstrip("\n")
503 if _has_dangling_hyphen(stripped_line):
504 yield lno + 1, "Line ends with dangling hyphen"