You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			200 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			200 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Python
		
	
"""Token-related utilities"""
 | 
						|
 | 
						|
# Copyright (c) IPython Development Team.
 | 
						|
# Distributed under the terms of the Modified BSD License.
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
import itertools
 | 
						|
import tokenize
 | 
						|
from io import StringIO
 | 
						|
from keyword import iskeyword
 | 
						|
from tokenize import TokenInfo
 | 
						|
from typing import Generator, NamedTuple
 | 
						|
 | 
						|
 | 
						|
class Token(NamedTuple):
 | 
						|
    token: int
 | 
						|
    text: str
 | 
						|
    start: int
 | 
						|
    end: int
 | 
						|
    line: str
 | 
						|
 | 
						|
 | 
						|
def generate_tokens(readline) -> Generator[TokenInfo, None, None]:
 | 
						|
    """wrap generate_tkens to catch EOF errors"""
 | 
						|
    try:
 | 
						|
        yield from tokenize.generate_tokens(readline)
 | 
						|
    except tokenize.TokenError:
 | 
						|
        # catch EOF error
 | 
						|
        return
 | 
						|
 | 
						|
 | 
						|
def generate_tokens_catch_errors(
 | 
						|
    readline, extra_errors_to_catch: list[str] | None = None
 | 
						|
):
 | 
						|
    default_errors_to_catch = [
 | 
						|
        "unterminated string literal",
 | 
						|
        "invalid non-printable character",
 | 
						|
        "after line continuation character",
 | 
						|
    ]
 | 
						|
    assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
 | 
						|
    errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
 | 
						|
 | 
						|
    tokens: list[TokenInfo] = []
 | 
						|
    try:
 | 
						|
        for token in tokenize.generate_tokens(readline):
 | 
						|
            tokens.append(token)
 | 
						|
            yield token
 | 
						|
    except tokenize.TokenError as exc:
 | 
						|
        if any(error in exc.args[0] for error in errors_to_catch):
 | 
						|
            if tokens:
 | 
						|
                start = tokens[-1].start[0], tokens[-1].end[0]
 | 
						|
                end = start
 | 
						|
                line = tokens[-1].line
 | 
						|
            else:
 | 
						|
                start = end = (1, 0)
 | 
						|
                line = ""
 | 
						|
            yield TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
 | 
						|
        else:
 | 
						|
            # Catch EOF
 | 
						|
            raise
 | 
						|
 | 
						|
 | 
						|
def line_at_cursor(cell: str, cursor_pos: int = 0) -> tuple[str, int]:
 | 
						|
    """Return the line in a cell at a given cursor position
 | 
						|
 | 
						|
    Used for calling line-based APIs that don't support multi-line input, yet.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    cell : str
 | 
						|
        multiline block of text
 | 
						|
    cursor_pos : integer
 | 
						|
        the cursor position
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    (line, offset): (string, integer)
 | 
						|
        The line with the current cursor, and the character offset of the start of the line.
 | 
						|
    """
 | 
						|
    offset = 0
 | 
						|
    lines = cell.splitlines(True)
 | 
						|
    for line in lines:
 | 
						|
        next_offset = offset + len(line)
 | 
						|
        if not line.endswith("\n"):
 | 
						|
            # If the last line doesn't have a trailing newline, treat it as if
 | 
						|
            # it does so that the cursor at the end of the line still counts
 | 
						|
            # as being on that line.
 | 
						|
            next_offset += 1
 | 
						|
        if next_offset > cursor_pos:
 | 
						|
            break
 | 
						|
        offset = next_offset
 | 
						|
    else:
 | 
						|
        line = ""
 | 
						|
    return line, offset
 | 
						|
 | 
						|
 | 
						|
def token_at_cursor(cell: str, cursor_pos: int = 0) -> str:
 | 
						|
    """Get the token at a given cursor
 | 
						|
 | 
						|
    Used for introspection.
 | 
						|
 | 
						|
    Function calls are prioritized, so the token for the callable will be returned
 | 
						|
    if the cursor is anywhere inside the call.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    cell : str
 | 
						|
        A block of Python code
 | 
						|
    cursor_pos : int
 | 
						|
        The location of the cursor in the block where the token should be found
 | 
						|
    """
 | 
						|
    names: list[str] = []
 | 
						|
    call_names: list[str] = []
 | 
						|
    closing_call_name: str | None = None
 | 
						|
    most_recent_outer_name: str | None = None
 | 
						|
 | 
						|
    offsets = {1: 0}  # lines start at 1
 | 
						|
    intersects_with_cursor = False
 | 
						|
    cur_token_is_name = False
 | 
						|
    tokens: list[Token | None] = [
 | 
						|
        Token(*tup) for tup in generate_tokens(StringIO(cell).readline)
 | 
						|
    ]
 | 
						|
    if not tokens:
 | 
						|
        return ""
 | 
						|
    for prev_tok, (tok, next_tok) in zip(
 | 
						|
        [None] + tokens, itertools.pairwise(tokens + [None])
 | 
						|
    ):
 | 
						|
        # token, text, start, end, line = tup
 | 
						|
        start_line, start_col = tok.start
 | 
						|
        end_line, end_col = tok.end
 | 
						|
        if end_line + 1 not in offsets:
 | 
						|
            # keep track of offsets for each line
 | 
						|
            lines = tok.line.splitlines(True)
 | 
						|
            for lineno, line in enumerate(lines, start_line + 1):
 | 
						|
                if lineno not in offsets:
 | 
						|
                    offsets[lineno] = offsets[lineno - 1] + len(line)
 | 
						|
 | 
						|
        closing_call_name = None
 | 
						|
 | 
						|
        offset = offsets[start_line]
 | 
						|
        if offset + start_col > cursor_pos:
 | 
						|
            # current token starts after the cursor,
 | 
						|
            # don't consume it
 | 
						|
            break
 | 
						|
 | 
						|
        if cur_token_is_name := tok.token == tokenize.NAME and not iskeyword(tok.text):
 | 
						|
            if (
 | 
						|
                names
 | 
						|
                and prev_tok
 | 
						|
                and prev_tok.token == tokenize.OP
 | 
						|
                and prev_tok.text == "."
 | 
						|
            ):
 | 
						|
                names[-1] = "%s.%s" % (names[-1], tok.text)
 | 
						|
            else:
 | 
						|
                names.append(tok.text)
 | 
						|
            if (
 | 
						|
                next_tok is not None
 | 
						|
                and next_tok.token == tokenize.OP
 | 
						|
                and next_tok.text == "="
 | 
						|
            ):
 | 
						|
                # don't inspect the lhs of an assignment
 | 
						|
                names.pop(-1)
 | 
						|
                cur_token_is_name = False
 | 
						|
            if not call_names:
 | 
						|
                most_recent_outer_name = names[-1] if names else None
 | 
						|
        elif tok.token == tokenize.OP:
 | 
						|
            if tok.text == "(" and names:
 | 
						|
                # if we are inside a function call, inspect the function
 | 
						|
                call_names.append(names[-1])
 | 
						|
            elif tok.text == ")" and call_names:
 | 
						|
                # keep track of the most recently popped call_name from the stack
 | 
						|
                closing_call_name = call_names.pop(-1)
 | 
						|
 | 
						|
        if offsets[end_line] + end_col > cursor_pos:
 | 
						|
            # we found the cursor, stop reading
 | 
						|
            # if the current token intersects directly, use it instead of the call token
 | 
						|
            intersects_with_cursor = offsets[start_line] + start_col <= cursor_pos
 | 
						|
            break
 | 
						|
 | 
						|
    if cur_token_is_name and intersects_with_cursor:
 | 
						|
        return names[-1]
 | 
						|
    # if the cursor isn't directly over a name token, use the most recent
 | 
						|
    # call name if we can find one
 | 
						|
    elif closing_call_name:
 | 
						|
        # if we're on a ")", use the most recently popped call name
 | 
						|
        return closing_call_name
 | 
						|
    elif call_names:
 | 
						|
        # otherwise, look for the most recent call name in the stack
 | 
						|
        return call_names[-1]
 | 
						|
    elif most_recent_outer_name:
 | 
						|
        # if we've popped all the call names, use the most recently-seen
 | 
						|
        # outer name
 | 
						|
        return most_recent_outer_name
 | 
						|
    elif names:
 | 
						|
        # failing that, use the most recently seen name
 | 
						|
        return names[-1]
 | 
						|
    else:
 | 
						|
        # give up
 | 
						|
        return ""
 |