Coverage for sphinxlint/rst.py: 100%
53 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-24 18:46 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-24 18:46 +0100
1"""Constants, regexes, and function generating regexes to "parse" reStructuredText.
3In this file:
4- All constants are ALL_CAPS
5- All compiled regexes are suffixed by _RE
6"""
7from functools import lru_cache
9import regex as re
11DELIMITERS = (
12 "\\-/:\u058a\xa1\xb7\xbf\u037e\u0387\u055a-\u055f\u0589"
13 "\u05be\u05c0\u05c3\u05c6\u05f3\u05f4\u0609\u060a\u060c"
14 "\u060d\u061b\u061e\u061f\u066a-\u066d\u06d4\u0700-\u070d"
15 "\u07f7-\u07f9\u0830-\u083e\u0964\u0965\u0970\u0df4\u0e4f"
16 "\u0e5a\u0e5b\u0f04-\u0f12\u0f85\u0fd0-\u0fd4\u104a-\u104f"
17 "\u10fb\u1361-\u1368\u1400\u166d\u166e\u16eb-\u16ed\u1735"
18 "\u1736\u17d4-\u17d6\u17d8-\u17da\u1800-\u180a\u1944\u1945"
19 "\u19de\u19df\u1a1e\u1a1f\u1aa0-\u1aa6\u1aa8-\u1aad\u1b5a-"
20 "\u1b60\u1c3b-\u1c3f\u1c7e\u1c7f\u1cd3\u2010-\u2017\u2020-"
21 "\u2027\u2030-\u2038\u203b-\u203e\u2041-\u2043\u2047-"
22 "\u2051\u2053\u2055-\u205e\u2cf9-\u2cfc\u2cfe\u2cff\u2e00"
23 "\u2e01\u2e06-\u2e08\u2e0b\u2e0e-\u2e1b\u2e1e\u2e1f\u2e2a-"
24 "\u2e2e\u2e30\u2e31\u3001-\u3003\u301c\u3030\u303d\u30a0"
25 "\u30fb\ua4fe\ua4ff\ua60d-\ua60f\ua673\ua67e\ua6f2-\ua6f7"
26 "\ua874-\ua877\ua8ce\ua8cf\ua8f8-\ua8fa\ua92e\ua92f\ua95f"
27 "\ua9c1-\ua9cd\ua9de\ua9df\uaa5c-\uaa5f\uaade\uaadf\uabeb"
28 "\ufe10-\ufe16\ufe19\ufe30-\ufe32\ufe45\ufe46\ufe49-\ufe4c"
29 "\ufe50-\ufe52\ufe54-\ufe58\ufe5f-\ufe61\ufe63\ufe68\ufe6a"
30 "\ufe6b\uff01-\uff03\uff05-\uff07\uff0a\uff0c-\uff0f\uff1a"
31 "\uff1b\uff1f\uff20\uff3c\uff61\uff64\uff65"
32)
34CLOSERS = (
35 "\"')>\\]}\u0f3b\u0f3d\u169c\u2046\u207e\u208e\u232a\u2769"
36 "\u276b\u276d\u276f\u2771\u2773\u2775\u27c6\u27e7\u27e9\u27eb"
37 "\u27ed\u27ef\u2984\u2986\u2988\u298a\u298c\u298e\u2990\u2992"
38 "\u2994\u2996\u2998\u29d9\u29db\u29fd\u2e23\u2e25\u2e27\u2e29"
39 "\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e"
40 "\u301f\ufd3f\ufe18\ufe36\ufe38\ufe3a\ufe3c\ufe3e\ufe40\ufe42"
41 "\ufe44\ufe48\ufe5a\ufe5c\ufe5e\uff09\uff3d\uff5d\uff60\uff63"
42 "\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d\u2e1d\u2e21"
43 "\u201b\u201f\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c"
44 "\u2e1c\u2e20\u201a\u201e"
45)
47OPENERS = (
48 "\"'(<\\[{\u0f3a\u0f3c\u169b\u2045\u207d\u208d\u2329\u2768"
49 "\u276a\u276c\u276e\u2770\u2772\u2774\u27c5\u27e6\u27e8\u27ea"
50 "\u27ec\u27ee\u2983\u2985\u2987\u2989\u298b\u298d\u298f\u2991"
51 "\u2993\u2995\u2997\u29d8\u29da\u29fc\u2e22\u2e24\u2e26\u2e28"
52 "\u3008\u300a\u300c\u300e\u3010\u3014\u3016\u3018\u301a\u301d"
53 "\u301d\ufd3e\ufe17\ufe35\ufe37\ufe39\ufe3b\ufe3d\ufe3f\ufe41"
54 "\ufe43\ufe47\ufe59\ufe5b\ufe5d\uff08\uff3b\uff5b\uff5f\uff62"
55 "\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c\u2e1c\u2e20"
56 "\u201a\u201e\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d"
57 "\u2e1d\u2e21\u201b\u201f"
58)
60# fmt: off
61DIRECTIVES_CONTAINING_RST = [
62 # standard docutils ones
63 'admonition', 'attention', 'caution', 'class', 'compound', 'container',
64 'danger', 'epigraph', 'error', 'figure', 'footer', 'header', 'highlights',
65 'hint', 'image', 'important', 'include', 'line-block', 'list-table', 'meta',
66 'note', 'parsed-literal', 'pull-quote', 'replace', 'sidebar', 'tip', 'topic',
67 'warning',
68 # Sphinx and Python docs custom ones
69 'acks', 'attribute', 'autoattribute', 'autoclass', 'autodata',
70 'autoexception', 'autofunction', 'automethod', 'automodule',
71 'availability', 'centered', 'cfunction', 'class', 'classmethod', 'cmacro',
72 'cmdoption', 'cmember', 'confval', 'cssclass', 'ctype',
73 'currentmodule', 'cvar', 'data', 'decorator', 'decoratormethod',
74 'deprecated-removed', 'deprecated(?!-removed)', 'describe', 'directive',
75 'envvar', 'event', 'exception', 'function', 'glossary',
76 'highlight', 'highlightlang', 'impl-detail', 'index', 'literalinclude',
77 'method', 'miscnews', 'module', 'moduleauthor', 'opcode', 'pdbcommand',
78 'program', 'role', 'sectionauthor', 'seealso',
79 'sourcecode', 'staticmethod', 'tabularcolumns', 'testcode', 'testoutput',
80 'testsetup', 'toctree', 'todo', 'todolist', 'versionadded',
81 'versionchanged', 'c:function', 'coroutinefunction'
82]
84DIRECTIVES_CONTAINING_ARBITRARY_CONTENT = [
85 # standard docutils ones
86 'contents', 'csv-table', 'date', 'default-role', 'include', 'raw',
87 'restructuredtext-test-directive', 'role', 'rubric', 'sectnum', 'table',
88 'target-notes', 'title', 'unicode',
89 # Sphinx and Python docs custom ones
90 'code-block', 'doctest', 'productionlist',
91]
93# fmt: on
95DIRECTIVES_CONTAINING_ARBITRARY_CONTENT_RE = re.compile(
96 r"^\s*\.\. (" + "|".join(DIRECTIVES_CONTAINING_ARBITRARY_CONTENT) + ")::"
97)
99DIRECTIVES_CONTAINING_RST_RE = re.compile(
100 r"^\s*\.\. (" + "|".join(DIRECTIVES_CONTAINING_RST) + ")::"
101)
103ALL_DIRECTIVES = (
104 "("
105 + "|".join(DIRECTIVES_CONTAINING_RST + DIRECTIVES_CONTAINING_ARBITRARY_CONTENT)
106 + ")"
107)
109QUOTE_PAIRS = [
110 "»»", # Swedish
111 "‘‚", # Albanian/Greek/Turkish
112 "’’", # Swedish
113 "‚‘", # German
114 "‚’", # Polish
115 "“„", # Albanian/Greek/Turkish
116 "„“", # German
117 "„”", # Polish
118 "””", # Swedish
119 "››", # Swedish
120 "''", # ASCII
121 '""', # ASCII
122 "<>", # ASCII
123 "()", # ASCII
124 "[]", # ASCII
125 "{}", # ASCII
126]
129QUOTE_PAIRS_NEGATIVE_LOOKBEHIND = (
130 "(?<!"
131 + "|".join(f"{re.escape(pair[0])}`{re.escape(pair[1])}" for pair in QUOTE_PAIRS)
132 + "|"
133 + "|".join(
134 f"{opener}`{closer}"
135 for opener, closer in zip(map(re.escape, OPENERS), map(re.escape, CLOSERS))
136 )
137 + ")"
138)
140SIMPLENAME = r"(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*"
142# The following chars groups are from docutils:
143CLOSING_DELIMITERS = "\\\\.,;!?"
145BEFORE_ROLE = r"(^|(?<=[\s(/'{\[*-]))"
146ROLE_TAG = rf":{SIMPLENAME}:"
147ROLE_HEAD = rf"({BEFORE_ROLE}:{SIMPLENAME}:)" # A role, with a clean start
149ASCII_ALLOWED_BEFORE_INLINE_MARKUP = r"""-:/'"<(\[{"""
150UNICODE_ALLOWED_BEFORE_INLINE_MARKUP = r"\p{Ps}\p{Pi}\p{Pf}\p{Pd}\p{Po}"
151ASCII_ALLOWED_AFTER_INLINE_MARKUP = r"""-.,:;!?/'")\]}>"""
152UNICODE_ALLOWED_AFTER_INLINE_MARKUP = r"\p{Pe}\p{Pi}\p{Pf}\p{Pd}\p{Po}"
155@lru_cache(maxsize=None)
156def inline_markup_gen(start_string, end_string, extra_allowed_before=""):
157 """Generate a regex matching an inline markup.
159 inline_markup_gen('**', '**') geneates a regex matching strong
160 emphasis inline markup.
161 """
162 if extra_allowed_before:
163 extra_allowed_before = "|" + extra_allowed_before
164 return re.compile(
165 rf"""
166 (?<!\x00) # Both inline markup start-string and end-string must not be preceded by
167 # an unescaped backslash
169 (?<= # Inline markup start-strings must:
170 ^| # start a text block
171 \s| # or be immediately preceded by whitespace,
172 [{ASCII_ALLOWED_BEFORE_INLINE_MARKUP}]| # one of the ASCII characters
173 [{UNICODE_ALLOWED_BEFORE_INLINE_MARKUP}] # or a similar non-ASCII
174 # punctuation character.
175 {extra_allowed_before}
176 )
178 (?P<inline_markup>
179 {start_string} # Inline markup start
180 \S # Inline markup start-strings must be immediately followed by
181 # non-whitespace.
182 # The inline markup end-string must be separated by at least one
183 # character from the start-string.
184 {QUOTE_PAIRS_NEGATIVE_LOOKBEHIND}
185 .*?
186 (?<=\x00\ |\S)# Inline markup end-strings must be immediately preceded
187 # by non-whitespace.
188 {end_string} # Inline markup end
189 )
191 (?= # Inline markup end-strings must
192 $| # end a text block or
193 \s| # be immediately followed by whitespace,
194 \x00|
195 [{ASCII_ALLOWED_AFTER_INLINE_MARKUP}]| # one of the ASCII characters
196 [{UNICODE_ALLOWED_AFTER_INLINE_MARKUP}] # or a similar non-ASCII
197 # punctuation character.
198 )
199 """,
200 flags=re.VERBOSE | re.DOTALL,
201 )
204# https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
205INTERPRETED_TEXT_RE = inline_markup_gen("`", "`")
206INLINE_INTERNAL_TARGET_RE = inline_markup_gen("_`", "`")
207HYPERLINK_REFERENCES_RE = inline_markup_gen("`", "`_")
208ANONYMOUS_HYPERLINK_REFERENCES_RE = inline_markup_gen("`", "`__")
209INLINE_LITERAL_RE = inline_markup_gen("``", "``")
210NORMAL_ROLE_RE = re.compile(
211 rf"""
212 (?<!\x00) # Both inline markup start-string and end-string must not be preceded by
213 # an unescaped backslash
215 (?<= # Inline markup start-strings must:
216 ^| # start a text block
217 \s| # or be immediately preceded by whitespace,
218 [{ASCII_ALLOWED_BEFORE_INLINE_MARKUP}]| # one of the ASCII characters
219 [{UNICODE_ALLOWED_BEFORE_INLINE_MARKUP}] # or a similar non-ASCII
220 # punctuation character.
221 )
223 :{SIMPLENAME}:{INTERPRETED_TEXT_RE.pattern}""",
224 flags=re.VERBOSE | re.DOTALL,
225)
227BACKTICK_IN_FRONT_OF_ROLE_RE = re.compile(
228 rf"(^|\s)`:{SIMPLENAME}:{INTERPRETED_TEXT_RE.pattern}", flags=re.VERBOSE | re.DOTALL
229)
231# Find comments that look like a directive, like:
232# .. versionchanged 3.6
233# or
234# .. versionchanged: 3.6
235# as it should be:
236# .. versionchanged:: 3.6
237SEEMS_DIRECTIVE_RE = re.compile(rf"^\s*(?<!\.)\.\. {ALL_DIRECTIVES}([^a-z:]|:(?!:))")
239# Find directive prefixed with three dots instead of two, like:
240# ... versionchanged:: 3.6
241# instead of:
242# .. versionchanged:: 3.6
243THREE_DOT_DIRECTIVE_RE = re.compile(rf"\.\.\. {ALL_DIRECTIVES}::")
245# Find role used with double backticks instead of simple backticks like:
246# :const:``None``
247# instead of:
248# :const:`None`
249DOUBLE_BACKTICK_ROLE_RE = re.compile(rf"(?<!``){ROLE_HEAD}``")
251START_STRING_PREFIX = f"(^|(?<=\\s|[{OPENERS}{DELIMITERS}|]))"
252END_STRING_SUFFIX = f"($|(?=\\s|[\x00{CLOSING_DELIMITERS}{DELIMITERS}{CLOSERS}|]))"
254# Find role glued with another word like:
255# the:c:func:`PyThreadState_LeaveTracing` function.
256# instead of:
257# the :c:func:`PyThreadState_LeaveTracing` function.
258#
259# Also finds roles missing their leading colon like:
260# issue:`123`
261# instead of:
262# :issue:`123`
264ROLE_GLUED_WITH_WORD_RE = re.compile(rf"(^|\s)(?<!:){SIMPLENAME}:`(?!`)")
266ROLE_WITH_NO_BACKTICKS_RE = re.compile(rf"(^|\s):{SIMPLENAME}:(?![`:])[^\s`]+(\s|$)")
268# Find role missing middle colon, like:
269# The :issue`123` is ...
270ROLE_MISSING_RIGHT_COLON_RE = re.compile(rf"(^|\s):{SIMPLENAME}`(?!`)")
273SEEMS_HYPERLINK_RE = re.compile(r"`[^`]+?(\s?)<https?://[^`]+>`(_?)")
275LEAKED_MARKUP_RE = re.compile(r"[a-z]::\s|`|\.\.\s*\w+:")
277TRIPLE_BACKTICKS_RE = re.compile(
278 rf"(?:{START_STRING_PREFIX})```[^`]+?(?<!{START_STRING_PREFIX})```(?:{END_STRING_SUFFIX})"
279)
281ROLE_MISSING_CLOSING_BACKTICK_RE = re.compile(rf"({ROLE_HEAD}`[^`]+?)[^`]*$")
284TABLE_HEAD_RE = re.compile(r"^\+[+=-]+\+")
287def line_looks_like_a_table(line):
288 """Return true if the given line looks part of an rst table."""
289 line = line.strip()
290 if TABLE_HEAD_RE.match(line):
291 return True
292 return line.startswith("|") and line.endswith("|")
295def paragraph_looks_like_a_table(paragraph):
296 """Return true if the given paragraph looks like an rst table."""
297 return all(line_looks_like_a_table(line) for line in paragraph.splitlines())