You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			122 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			122 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Python
		
	
from __future__ import annotations
 | 
						|
 | 
						|
from datetime import (
 | 
						|
    date,
 | 
						|
    datetime,
 | 
						|
    time,
 | 
						|
    timedelta,
 | 
						|
)
 | 
						|
from typing import (
 | 
						|
    TYPE_CHECKING,
 | 
						|
    Any,
 | 
						|
    Union,
 | 
						|
)
 | 
						|
 | 
						|
from pandas.compat._optional import import_optional_dependency
 | 
						|
from pandas.util._decorators import doc
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
from pandas.core.shared_docs import _shared_docs
 | 
						|
 | 
						|
from pandas.io.excel._base import BaseExcelReader
 | 
						|
 | 
						|
if TYPE_CHECKING:
 | 
						|
    from python_calamine import (
 | 
						|
        CalamineSheet,
 | 
						|
        CalamineWorkbook,
 | 
						|
    )
 | 
						|
 | 
						|
    from pandas._typing import (
 | 
						|
        FilePath,
 | 
						|
        NaTType,
 | 
						|
        ReadBuffer,
 | 
						|
        Scalar,
 | 
						|
        StorageOptions,
 | 
						|
    )
 | 
						|
 | 
						|
_CellValue = Union[int, float, str, bool, time, date, datetime, timedelta]
 | 
						|
 | 
						|
 | 
						|
class CalamineReader(BaseExcelReader["CalamineWorkbook"]):
 | 
						|
    @doc(storage_options=_shared_docs["storage_options"])
 | 
						|
    def __init__(
 | 
						|
        self,
 | 
						|
        filepath_or_buffer: FilePath | ReadBuffer[bytes],
 | 
						|
        storage_options: StorageOptions | None = None,
 | 
						|
        engine_kwargs: dict | None = None,
 | 
						|
    ) -> None:
 | 
						|
        """
 | 
						|
        Reader using calamine engine (xlsx/xls/xlsb/ods).
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        filepath_or_buffer : str, path to be parsed or
 | 
						|
            an open readable stream.
 | 
						|
        {storage_options}
 | 
						|
        engine_kwargs : dict, optional
 | 
						|
            Arbitrary keyword arguments passed to excel engine.
 | 
						|
        """
 | 
						|
        import_optional_dependency("python_calamine")
 | 
						|
        super().__init__(
 | 
						|
            filepath_or_buffer,
 | 
						|
            storage_options=storage_options,
 | 
						|
            engine_kwargs=engine_kwargs,
 | 
						|
        )
 | 
						|
 | 
						|
    @property
 | 
						|
    def _workbook_class(self) -> type[CalamineWorkbook]:
 | 
						|
        from python_calamine import CalamineWorkbook
 | 
						|
 | 
						|
        return CalamineWorkbook
 | 
						|
 | 
						|
    def load_workbook(
 | 
						|
        self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs: Any
 | 
						|
    ) -> CalamineWorkbook:
 | 
						|
        from python_calamine import load_workbook
 | 
						|
 | 
						|
        return load_workbook(filepath_or_buffer, **engine_kwargs)
 | 
						|
 | 
						|
    @property
 | 
						|
    def sheet_names(self) -> list[str]:
 | 
						|
        from python_calamine import SheetTypeEnum
 | 
						|
 | 
						|
        return [
 | 
						|
            sheet.name
 | 
						|
            for sheet in self.book.sheets_metadata
 | 
						|
            if sheet.typ == SheetTypeEnum.WorkSheet
 | 
						|
        ]
 | 
						|
 | 
						|
    def get_sheet_by_name(self, name: str) -> CalamineSheet:
 | 
						|
        self.raise_if_bad_sheet_by_name(name)
 | 
						|
        return self.book.get_sheet_by_name(name)
 | 
						|
 | 
						|
    def get_sheet_by_index(self, index: int) -> CalamineSheet:
 | 
						|
        self.raise_if_bad_sheet_by_index(index)
 | 
						|
        return self.book.get_sheet_by_index(index)
 | 
						|
 | 
						|
    def get_sheet_data(
 | 
						|
        self, sheet: CalamineSheet, file_rows_needed: int | None = None
 | 
						|
    ) -> list[list[Scalar | NaTType | time]]:
 | 
						|
        def _convert_cell(value: _CellValue) -> Scalar | NaTType | time:
 | 
						|
            if isinstance(value, float):
 | 
						|
                val = int(value)
 | 
						|
                if val == value:
 | 
						|
                    return val
 | 
						|
                else:
 | 
						|
                    return value
 | 
						|
            elif isinstance(value, date):
 | 
						|
                return pd.Timestamp(value)
 | 
						|
            elif isinstance(value, timedelta):
 | 
						|
                return pd.Timedelta(value)
 | 
						|
            elif isinstance(value, time):
 | 
						|
                return value
 | 
						|
 | 
						|
            return value
 | 
						|
 | 
						|
        rows: list[list[_CellValue]] = sheet.to_python(
 | 
						|
            skip_empty_area=False, nrows=file_rows_needed
 | 
						|
        )
 | 
						|
        data = [[_convert_cell(cell) for cell in row] for row in rows]
 | 
						|
 | 
						|
        return data
 |