You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			207 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			207 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
Module for formatting output data in console (to string).
 | 
						|
"""
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
from shutil import get_terminal_size
 | 
						|
from typing import TYPE_CHECKING
 | 
						|
 | 
						|
import numpy as np
 | 
						|
 | 
						|
from pandas.io.formats.printing import pprint_thing
 | 
						|
 | 
						|
if TYPE_CHECKING:
 | 
						|
    from collections.abc import Iterable
 | 
						|
 | 
						|
    from pandas.io.formats.format import DataFrameFormatter
 | 
						|
 | 
						|
 | 
						|
class StringFormatter:
 | 
						|
    """Formatter for string representation of a dataframe."""
 | 
						|
 | 
						|
    def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None:
 | 
						|
        self.fmt = fmt
 | 
						|
        self.adj = fmt.adj
 | 
						|
        self.frame = fmt.frame
 | 
						|
        self.line_width = line_width
 | 
						|
 | 
						|
    def to_string(self) -> str:
 | 
						|
        text = self._get_string_representation()
 | 
						|
        if self.fmt.should_show_dimensions:
 | 
						|
            text = f"{text}{self.fmt.dimensions_info}"
 | 
						|
        return text
 | 
						|
 | 
						|
    def _get_strcols(self) -> list[list[str]]:
 | 
						|
        strcols = self.fmt.get_strcols()
 | 
						|
        if self.fmt.is_truncated:
 | 
						|
            strcols = self._insert_dot_separators(strcols)
 | 
						|
        return strcols
 | 
						|
 | 
						|
    def _get_string_representation(self) -> str:
 | 
						|
        if self.fmt.frame.empty:
 | 
						|
            return self._empty_info_line
 | 
						|
 | 
						|
        strcols = self._get_strcols()
 | 
						|
 | 
						|
        if self.line_width is None:
 | 
						|
            # no need to wrap around just print the whole frame
 | 
						|
            return self.adj.adjoin(1, *strcols)
 | 
						|
 | 
						|
        if self._need_to_wrap_around:
 | 
						|
            return self._join_multiline(strcols)
 | 
						|
 | 
						|
        return self._fit_strcols_to_terminal_width(strcols)
 | 
						|
 | 
						|
    @property
 | 
						|
    def _empty_info_line(self) -> str:
 | 
						|
        return (
 | 
						|
            f"Empty {type(self.frame).__name__}\n"
 | 
						|
            f"Columns: {pprint_thing(self.frame.columns)}\n"
 | 
						|
            f"Index: {pprint_thing(self.frame.index)}"
 | 
						|
        )
 | 
						|
 | 
						|
    @property
 | 
						|
    def _need_to_wrap_around(self) -> bool:
 | 
						|
        return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0)
 | 
						|
 | 
						|
    def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]:
 | 
						|
        str_index = self.fmt._get_formatted_index(self.fmt.tr_frame)
 | 
						|
        index_length = len(str_index)
 | 
						|
 | 
						|
        if self.fmt.is_truncated_horizontally:
 | 
						|
            strcols = self._insert_dot_separator_horizontal(strcols, index_length)
 | 
						|
 | 
						|
        if self.fmt.is_truncated_vertically:
 | 
						|
            strcols = self._insert_dot_separator_vertical(strcols, index_length)
 | 
						|
 | 
						|
        return strcols
 | 
						|
 | 
						|
    @property
 | 
						|
    def _adjusted_tr_col_num(self) -> int:
 | 
						|
        return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num
 | 
						|
 | 
						|
    def _insert_dot_separator_horizontal(
 | 
						|
        self, strcols: list[list[str]], index_length: int
 | 
						|
    ) -> list[list[str]]:
 | 
						|
        strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length)
 | 
						|
        return strcols
 | 
						|
 | 
						|
    def _insert_dot_separator_vertical(
 | 
						|
        self, strcols: list[list[str]], index_length: int
 | 
						|
    ) -> list[list[str]]:
 | 
						|
        n_header_rows = index_length - len(self.fmt.tr_frame)
 | 
						|
        row_num = self.fmt.tr_row_num
 | 
						|
        for ix, col in enumerate(strcols):
 | 
						|
            cwidth = self.adj.len(col[row_num])
 | 
						|
 | 
						|
            if self.fmt.is_truncated_horizontally:
 | 
						|
                is_dot_col = ix == self._adjusted_tr_col_num
 | 
						|
            else:
 | 
						|
                is_dot_col = False
 | 
						|
 | 
						|
            if cwidth > 3 or is_dot_col:
 | 
						|
                dots = "..."
 | 
						|
            else:
 | 
						|
                dots = ".."
 | 
						|
 | 
						|
            if ix == 0 and self.fmt.index:
 | 
						|
                dot_mode = "left"
 | 
						|
            elif is_dot_col:
 | 
						|
                cwidth = 4
 | 
						|
                dot_mode = "right"
 | 
						|
            else:
 | 
						|
                dot_mode = "right"
 | 
						|
 | 
						|
            dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0]
 | 
						|
            col.insert(row_num + n_header_rows, dot_str)
 | 
						|
        return strcols
 | 
						|
 | 
						|
    def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str:
 | 
						|
        lwidth = self.line_width
 | 
						|
        adjoin_width = 1
 | 
						|
        strcols = list(strcols_input)
 | 
						|
 | 
						|
        if self.fmt.index:
 | 
						|
            idx = strcols.pop(0)
 | 
						|
            lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
 | 
						|
 | 
						|
        col_widths = [
 | 
						|
            np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0
 | 
						|
            for col in strcols
 | 
						|
        ]
 | 
						|
 | 
						|
        assert lwidth is not None
 | 
						|
        col_bins = _binify(col_widths, lwidth)
 | 
						|
        nbins = len(col_bins)
 | 
						|
 | 
						|
        str_lst = []
 | 
						|
        start = 0
 | 
						|
        for i, end in enumerate(col_bins):
 | 
						|
            row = strcols[start:end]
 | 
						|
            if self.fmt.index:
 | 
						|
                row.insert(0, idx)
 | 
						|
            if nbins > 1:
 | 
						|
                nrows = len(row[-1])
 | 
						|
                if end <= len(strcols) and i < nbins - 1:
 | 
						|
                    row.append([" \\"] + ["  "] * (nrows - 1))
 | 
						|
                else:
 | 
						|
                    row.append([" "] * nrows)
 | 
						|
            str_lst.append(self.adj.adjoin(adjoin_width, *row))
 | 
						|
            start = end
 | 
						|
        return "\n\n".join(str_lst)
 | 
						|
 | 
						|
    def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str:
 | 
						|
        from pandas import Series
 | 
						|
 | 
						|
        lines = self.adj.adjoin(1, *strcols).split("\n")
 | 
						|
        max_len = Series(lines).str.len().max()
 | 
						|
        # plus truncate dot col
 | 
						|
        width, _ = get_terminal_size()
 | 
						|
        dif = max_len - width
 | 
						|
        # '+ 1' to avoid too wide repr (GH PR #17023)
 | 
						|
        adj_dif = dif + 1
 | 
						|
        col_lens = Series([Series(ele).str.len().max() for ele in strcols])
 | 
						|
        n_cols = len(col_lens)
 | 
						|
        counter = 0
 | 
						|
        while adj_dif > 0 and n_cols > 1:
 | 
						|
            counter += 1
 | 
						|
            mid = round(n_cols / 2)
 | 
						|
            mid_ix = col_lens.index[mid]
 | 
						|
            col_len = col_lens[mid_ix]
 | 
						|
            # adjoin adds one
 | 
						|
            adj_dif -= col_len + 1
 | 
						|
            col_lens = col_lens.drop(mid_ix)
 | 
						|
            n_cols = len(col_lens)
 | 
						|
 | 
						|
        # subtract index column
 | 
						|
        max_cols_fitted = n_cols - self.fmt.index
 | 
						|
        # GH-21180. Ensure that we print at least two.
 | 
						|
        max_cols_fitted = max(max_cols_fitted, 2)
 | 
						|
        self.fmt.max_cols_fitted = max_cols_fitted
 | 
						|
 | 
						|
        # Call again _truncate to cut frame appropriately
 | 
						|
        # and then generate string representation
 | 
						|
        self.fmt.truncate()
 | 
						|
        strcols = self._get_strcols()
 | 
						|
        return self.adj.adjoin(1, *strcols)
 | 
						|
 | 
						|
 | 
						|
def _binify(cols: list[int], line_width: int) -> list[int]:
 | 
						|
    adjoin_width = 1
 | 
						|
    bins = []
 | 
						|
    curr_width = 0
 | 
						|
    i_last_column = len(cols) - 1
 | 
						|
    for i, w in enumerate(cols):
 | 
						|
        w_adjoined = w + adjoin_width
 | 
						|
        curr_width += w_adjoined
 | 
						|
        if i_last_column == i:
 | 
						|
            wrap = curr_width + 1 > line_width and i > 0
 | 
						|
        else:
 | 
						|
            wrap = curr_width + 2 > line_width and i > 0
 | 
						|
        if wrap:
 | 
						|
            bins.append(i)
 | 
						|
            curr_width = w_adjoined
 | 
						|
 | 
						|
    bins.append(len(cols))
 | 
						|
    return bins
 |