You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
200 lines
6.4 KiB
Python
200 lines
6.4 KiB
Python
"""Token-related utilities"""
|
|
|
|
# Copyright (c) IPython Development Team.
|
|
# Distributed under the terms of the Modified BSD License.
|
|
from __future__ import annotations
|
|
|
|
import itertools
|
|
import tokenize
|
|
from io import StringIO
|
|
from keyword import iskeyword
|
|
from tokenize import TokenInfo
|
|
from typing import Generator, NamedTuple
|
|
|
|
|
|
class Token(NamedTuple):
|
|
token: int
|
|
text: str
|
|
start: int
|
|
end: int
|
|
line: str
|
|
|
|
|
|
def generate_tokens(readline) -> Generator[TokenInfo, None, None]:
|
|
"""wrap generate_tkens to catch EOF errors"""
|
|
try:
|
|
yield from tokenize.generate_tokens(readline)
|
|
except tokenize.TokenError:
|
|
# catch EOF error
|
|
return
|
|
|
|
|
|
def generate_tokens_catch_errors(
|
|
readline, extra_errors_to_catch: list[str] | None = None
|
|
):
|
|
default_errors_to_catch = [
|
|
"unterminated string literal",
|
|
"invalid non-printable character",
|
|
"after line continuation character",
|
|
]
|
|
assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
|
|
errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
|
|
|
|
tokens: list[TokenInfo] = []
|
|
try:
|
|
for token in tokenize.generate_tokens(readline):
|
|
tokens.append(token)
|
|
yield token
|
|
except tokenize.TokenError as exc:
|
|
if any(error in exc.args[0] for error in errors_to_catch):
|
|
if tokens:
|
|
start = tokens[-1].start[0], tokens[-1].end[0]
|
|
end = start
|
|
line = tokens[-1].line
|
|
else:
|
|
start = end = (1, 0)
|
|
line = ""
|
|
yield TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
|
|
else:
|
|
# Catch EOF
|
|
raise
|
|
|
|
|
|
def line_at_cursor(cell: str, cursor_pos: int = 0) -> tuple[str, int]:
|
|
"""Return the line in a cell at a given cursor position
|
|
|
|
Used for calling line-based APIs that don't support multi-line input, yet.
|
|
|
|
Parameters
|
|
----------
|
|
cell : str
|
|
multiline block of text
|
|
cursor_pos : integer
|
|
the cursor position
|
|
|
|
Returns
|
|
-------
|
|
(line, offset): (string, integer)
|
|
The line with the current cursor, and the character offset of the start of the line.
|
|
"""
|
|
offset = 0
|
|
lines = cell.splitlines(True)
|
|
for line in lines:
|
|
next_offset = offset + len(line)
|
|
if not line.endswith("\n"):
|
|
# If the last line doesn't have a trailing newline, treat it as if
|
|
# it does so that the cursor at the end of the line still counts
|
|
# as being on that line.
|
|
next_offset += 1
|
|
if next_offset > cursor_pos:
|
|
break
|
|
offset = next_offset
|
|
else:
|
|
line = ""
|
|
return line, offset
|
|
|
|
|
|
def token_at_cursor(cell: str, cursor_pos: int = 0) -> str:
|
|
"""Get the token at a given cursor
|
|
|
|
Used for introspection.
|
|
|
|
Function calls are prioritized, so the token for the callable will be returned
|
|
if the cursor is anywhere inside the call.
|
|
|
|
Parameters
|
|
----------
|
|
cell : str
|
|
A block of Python code
|
|
cursor_pos : int
|
|
The location of the cursor in the block where the token should be found
|
|
"""
|
|
names: list[str] = []
|
|
call_names: list[str] = []
|
|
closing_call_name: str | None = None
|
|
most_recent_outer_name: str | None = None
|
|
|
|
offsets = {1: 0} # lines start at 1
|
|
intersects_with_cursor = False
|
|
cur_token_is_name = False
|
|
tokens: list[Token | None] = [
|
|
Token(*tup) for tup in generate_tokens(StringIO(cell).readline)
|
|
]
|
|
if not tokens:
|
|
return ""
|
|
for prev_tok, (tok, next_tok) in zip(
|
|
[None] + tokens, itertools.pairwise(tokens + [None])
|
|
):
|
|
# token, text, start, end, line = tup
|
|
start_line, start_col = tok.start
|
|
end_line, end_col = tok.end
|
|
if end_line + 1 not in offsets:
|
|
# keep track of offsets for each line
|
|
lines = tok.line.splitlines(True)
|
|
for lineno, line in enumerate(lines, start_line + 1):
|
|
if lineno not in offsets:
|
|
offsets[lineno] = offsets[lineno - 1] + len(line)
|
|
|
|
closing_call_name = None
|
|
|
|
offset = offsets[start_line]
|
|
if offset + start_col > cursor_pos:
|
|
# current token starts after the cursor,
|
|
# don't consume it
|
|
break
|
|
|
|
if cur_token_is_name := tok.token == tokenize.NAME and not iskeyword(tok.text):
|
|
if (
|
|
names
|
|
and prev_tok
|
|
and prev_tok.token == tokenize.OP
|
|
and prev_tok.text == "."
|
|
):
|
|
names[-1] = "%s.%s" % (names[-1], tok.text)
|
|
else:
|
|
names.append(tok.text)
|
|
if (
|
|
next_tok is not None
|
|
and next_tok.token == tokenize.OP
|
|
and next_tok.text == "="
|
|
):
|
|
# don't inspect the lhs of an assignment
|
|
names.pop(-1)
|
|
cur_token_is_name = False
|
|
if not call_names:
|
|
most_recent_outer_name = names[-1] if names else None
|
|
elif tok.token == tokenize.OP:
|
|
if tok.text == "(" and names:
|
|
# if we are inside a function call, inspect the function
|
|
call_names.append(names[-1])
|
|
elif tok.text == ")" and call_names:
|
|
# keep track of the most recently popped call_name from the stack
|
|
closing_call_name = call_names.pop(-1)
|
|
|
|
if offsets[end_line] + end_col > cursor_pos:
|
|
# we found the cursor, stop reading
|
|
# if the current token intersects directly, use it instead of the call token
|
|
intersects_with_cursor = offsets[start_line] + start_col <= cursor_pos
|
|
break
|
|
|
|
if cur_token_is_name and intersects_with_cursor:
|
|
return names[-1]
|
|
# if the cursor isn't directly over a name token, use the most recent
|
|
# call name if we can find one
|
|
elif closing_call_name:
|
|
# if we're on a ")", use the most recently popped call name
|
|
return closing_call_name
|
|
elif call_names:
|
|
# otherwise, look for the most recent call name in the stack
|
|
return call_names[-1]
|
|
elif most_recent_outer_name:
|
|
# if we've popped all the call names, use the most recently-seen
|
|
# outer name
|
|
return most_recent_outer_name
|
|
elif names:
|
|
# failing that, use the most recently seen name
|
|
return names[-1]
|
|
else:
|
|
# give up
|
|
return ""
|