You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

268 lines
7.4 KiB
Python

"""because list is complex, split list parser in a new file"""
import re
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Match
from .util import expand_leading_tab, expand_tab, strip_end
if TYPE_CHECKING:
from .block_parser import BlockParser
from .core import BlockState
LIST_PATTERN = (
r"^(?P<list_1> {0,3})"
r"(?P<list_2>[\*\+-]|\d{1,9}[.)])"
r"(?P<list_3>[ \t]*|[ \t].+)$"
)
_LINE_HAS_TEXT = re.compile(r"(\s*)\S")
def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int:
"""Parse tokens for ordered and unordered list."""
text = m.group("list_3")
if not text.strip():
# Example 285
# an empty list item cannot interrupt a paragraph
end_pos = state.append_paragraph()
if end_pos:
return end_pos
marker = m.group("list_2")
ordered = len(marker) > 1
depth = state.depth()
token: Dict[str, Any] = {
"type": "list",
"children": [],
"tight": True,
"bullet": marker[-1],
"attrs": {
"depth": depth,
"ordered": ordered,
},
}
if ordered:
start = int(marker[:-1])
if start != 1:
# Example 304
# we allow only lists starting with 1 to interrupt paragraphs
end_pos = state.append_paragraph()
if end_pos:
return end_pos
token["attrs"]["start"] = start
state.cursor = m.end() + 1
groups: Optional[Tuple[str, str, str]] = (m.group("list_1"), marker, text)
if depth >= block.max_nested_level - 1:
rules = list(block.list_rules)
rules.remove("list")
else:
rules = block.list_rules
bullet = _get_list_bullet(marker[-1])
while groups:
groups = _parse_list_item(block, bullet, groups, token, state, rules)
end_pos = token.pop("_end_pos", None)
_transform_tight_list(token)
if end_pos:
index = token.pop("_tok_index")
state.tokens.insert(index, token)
return end_pos
state.append_token(token)
return state.cursor
def _transform_tight_list(token: Dict[str, Any]) -> None:
if token["tight"]:
# reset tight list item
for list_item in token["children"]:
for tok in list_item["children"]:
if tok["type"] == "paragraph":
tok["type"] = "block_text"
elif tok["type"] == "list":
_transform_tight_list(tok)
def _parse_list_item(
block: "BlockParser",
bullet: str,
groups: Tuple[str, str, str],
token: Dict[str, Any],
state: "BlockState",
rules: List[str],
) -> Optional[Tuple[str, str, str]]:
spaces, marker, text = groups
leading_width = len(spaces) + len(marker)
text, continue_width = _compile_continue_width(text, leading_width)
item_pattern = _compile_list_item_pattern(bullet, leading_width)
list_item_breaks = [
"thematic_break",
"fenced_code",
"atx_heading",
"block_quote",
"block_html",
"list",
]
if "fenced_directive" in block.specification:
list_item_breaks.insert(1, "fenced_directive")
pairs = [(name, block.specification[name]) for name in list_item_breaks]
if leading_width < 3:
_repl_w = str(leading_width)
pairs = [(n, p.replace("3", _repl_w, 1)) for n, p in pairs]
pairs.insert(1, ("list_item", item_pattern))
regex = "|".join(r"(?P<%s>(?<=\n)%s)" % pair for pair in pairs)
sc = re.compile(regex, re.M)
src = ""
next_group = None
prev_blank_line = False
pos = state.cursor
continue_space = " " * continue_width
while pos < state.cursor_max:
pos = state.find_line_end()
line = state.get_text(pos)
if block.BLANK_LINE.match(line):
src += "\n"
prev_blank_line = True
state.cursor = pos
continue
line = expand_leading_tab(line)
if line.startswith(continue_space):
if prev_blank_line and not text and not src.strip():
# Example 280
# A list item can begin with at most one blank line
break
src += line
prev_blank_line = False
state.cursor = pos
continue
m = sc.match(state.src, state.cursor)
if m:
tok_type = m.lastgroup
if tok_type == "list_item":
if prev_blank_line:
token["tight"] = False
next_group = (m.group("listitem_1"), m.group("listitem_2"), m.group("listitem_3"))
state.cursor = m.end() + 1
break
if tok_type == "list":
break
tok_index = len(state.tokens)
end_pos = block.parse_method(m, state)
if end_pos:
token["_tok_index"] = tok_index
token["_end_pos"] = end_pos
break
if prev_blank_line and not line.startswith(continue_space):
# not a continue line, and previous line is blank
break
src += line
state.cursor = pos
text += _clean_list_item_text(src, continue_width)
child = state.child_state(strip_end(text))
block.parse(child, rules)
if token["tight"] and _is_loose_list(child.tokens):
token["tight"] = False
token["children"].append(
{
"type": "list_item",
"children": child.tokens,
}
)
if next_group:
return next_group
return None
def _get_list_bullet(c: str) -> str:
if c == ".":
bullet = r"\d{0,9}\."
elif c == ")":
bullet = r"\d{0,9}\)"
elif c == "*":
bullet = r"\*"
elif c == "+":
bullet = r"\+"
else:
bullet = "-"
return bullet
def _compile_list_item_pattern(bullet: str, leading_width: int) -> str:
if leading_width > 3:
leading_width = 3
return (
r"^(?P<listitem_1> {0," + str(leading_width) + "})"
r"(?P<listitem_2>" + bullet + ")"
r"(?P<listitem_3>[ \t]*|[ \t][^\n]+)$"
)
def _compile_continue_width(text: str, leading_width: int) -> Tuple[str, int]:
text = expand_leading_tab(text, 3)
text = expand_tab(text)
m2 = _LINE_HAS_TEXT.match(text)
if m2:
# indent code, startswith 5 spaces
if text.startswith(" "):
space_width = 1
else:
space_width = len(m2.group(1))
text = text[space_width:] + "\n"
else:
space_width = 1
text = ""
continue_width = leading_width + space_width
return text, continue_width
def _clean_list_item_text(src: str, continue_width: int) -> str:
# according to Example 7, tab should be treated as 3 spaces
rv = []
trim_space = " " * continue_width
lines = src.split("\n")
for line in lines:
if line.startswith(trim_space):
line = line.replace(trim_space, "", 1)
# according to CommonMark Example 5
# tab should be treated as 4 spaces
line = expand_tab(line)
rv.append(line)
else:
rv.append(line)
return "\n".join(rv)
def _is_loose_list(tokens: Iterable[Dict[str, Any]]) -> bool:
paragraph_count = 0
for tok in tokens:
if tok["type"] == "blank_line":
return True
if tok["type"] == "paragraph":
paragraph_count += 1
if paragraph_count > 1:
return True
return False