You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			179 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			179 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
Read SAS sas7bdat or xport files.
 | 
						|
"""
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
from abc import (
 | 
						|
    ABC,
 | 
						|
    abstractmethod,
 | 
						|
)
 | 
						|
from typing import (
 | 
						|
    TYPE_CHECKING,
 | 
						|
    overload,
 | 
						|
)
 | 
						|
 | 
						|
from pandas.util._decorators import doc
 | 
						|
 | 
						|
from pandas.core.shared_docs import _shared_docs
 | 
						|
 | 
						|
from pandas.io.common import stringify_path
 | 
						|
 | 
						|
if TYPE_CHECKING:
 | 
						|
    from collections.abc import Hashable
 | 
						|
    from types import TracebackType
 | 
						|
 | 
						|
    from pandas._typing import (
 | 
						|
        CompressionOptions,
 | 
						|
        FilePath,
 | 
						|
        ReadBuffer,
 | 
						|
        Self,
 | 
						|
    )
 | 
						|
 | 
						|
    from pandas import DataFrame
 | 
						|
 | 
						|
 | 
						|
class ReaderBase(ABC):
 | 
						|
    """
 | 
						|
    Protocol for XportReader and SAS7BDATReader classes.
 | 
						|
    """
 | 
						|
 | 
						|
    @abstractmethod
 | 
						|
    def read(self, nrows: int | None = None) -> DataFrame:
 | 
						|
        ...
 | 
						|
 | 
						|
    @abstractmethod
 | 
						|
    def close(self) -> None:
 | 
						|
        ...
 | 
						|
 | 
						|
    def __enter__(self) -> Self:
 | 
						|
        return self
 | 
						|
 | 
						|
    def __exit__(
 | 
						|
        self,
 | 
						|
        exc_type: type[BaseException] | None,
 | 
						|
        exc_value: BaseException | None,
 | 
						|
        traceback: TracebackType | None,
 | 
						|
    ) -> None:
 | 
						|
        self.close()
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def read_sas(
 | 
						|
    filepath_or_buffer: FilePath | ReadBuffer[bytes],
 | 
						|
    *,
 | 
						|
    format: str | None = ...,
 | 
						|
    index: Hashable | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    chunksize: int = ...,
 | 
						|
    iterator: bool = ...,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
) -> ReaderBase:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def read_sas(
 | 
						|
    filepath_or_buffer: FilePath | ReadBuffer[bytes],
 | 
						|
    *,
 | 
						|
    format: str | None = ...,
 | 
						|
    index: Hashable | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    chunksize: None = ...,
 | 
						|
    iterator: bool = ...,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
) -> DataFrame | ReaderBase:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer")
 | 
						|
def read_sas(
 | 
						|
    filepath_or_buffer: FilePath | ReadBuffer[bytes],
 | 
						|
    *,
 | 
						|
    format: str | None = None,
 | 
						|
    index: Hashable | None = None,
 | 
						|
    encoding: str | None = None,
 | 
						|
    chunksize: int | None = None,
 | 
						|
    iterator: bool = False,
 | 
						|
    compression: CompressionOptions = "infer",
 | 
						|
) -> DataFrame | ReaderBase:
 | 
						|
    """
 | 
						|
    Read SAS files stored as either XPORT or SAS7BDAT format files.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    filepath_or_buffer : str, path object, or file-like object
 | 
						|
        String, path object (implementing ``os.PathLike[str]``), or file-like
 | 
						|
        object implementing a binary ``read()`` function. The string could be a URL.
 | 
						|
        Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
 | 
						|
        expected. A local file could be:
 | 
						|
        ``file://localhost/path/to/table.sas7bdat``.
 | 
						|
    format : str {{'xport', 'sas7bdat'}} or None
 | 
						|
        If None, file format is inferred from file extension. If 'xport' or
 | 
						|
        'sas7bdat', uses the corresponding format.
 | 
						|
    index : identifier of index column, defaults to None
 | 
						|
        Identifier of column that should be used as index of the DataFrame.
 | 
						|
    encoding : str, default is None
 | 
						|
        Encoding for text data.  If None, text data are stored as raw bytes.
 | 
						|
    chunksize : int
 | 
						|
        Read file `chunksize` lines at a time, returns iterator.
 | 
						|
    iterator : bool, defaults to False
 | 
						|
        If True, returns an iterator for reading the file incrementally.
 | 
						|
    {decompression_options}
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
 | 
						|
    or XportReader
 | 
						|
 | 
						|
    Examples
 | 
						|
    --------
 | 
						|
    >>> df = pd.read_sas("sas_data.sas7bdat")  # doctest: +SKIP
 | 
						|
    """
 | 
						|
    if format is None:
 | 
						|
        buffer_error_msg = (
 | 
						|
            "If this is a buffer object rather "
 | 
						|
            "than a string name, you must specify a format string"
 | 
						|
        )
 | 
						|
        filepath_or_buffer = stringify_path(filepath_or_buffer)
 | 
						|
        if not isinstance(filepath_or_buffer, str):
 | 
						|
            raise ValueError(buffer_error_msg)
 | 
						|
        fname = filepath_or_buffer.lower()
 | 
						|
        if ".xpt" in fname:
 | 
						|
            format = "xport"
 | 
						|
        elif ".sas7bdat" in fname:
 | 
						|
            format = "sas7bdat"
 | 
						|
        else:
 | 
						|
            raise ValueError(
 | 
						|
                f"unable to infer format of SAS file from filename: {repr(fname)}"
 | 
						|
            )
 | 
						|
 | 
						|
    reader: ReaderBase
 | 
						|
    if format.lower() == "xport":
 | 
						|
        from pandas.io.sas.sas_xport import XportReader
 | 
						|
 | 
						|
        reader = XportReader(
 | 
						|
            filepath_or_buffer,
 | 
						|
            index=index,
 | 
						|
            encoding=encoding,
 | 
						|
            chunksize=chunksize,
 | 
						|
            compression=compression,
 | 
						|
        )
 | 
						|
    elif format.lower() == "sas7bdat":
 | 
						|
        from pandas.io.sas.sas7bdat import SAS7BDATReader
 | 
						|
 | 
						|
        reader = SAS7BDATReader(
 | 
						|
            filepath_or_buffer,
 | 
						|
            index=index,
 | 
						|
            encoding=encoding,
 | 
						|
            chunksize=chunksize,
 | 
						|
            compression=compression,
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        raise ValueError("unknown SAS format")
 | 
						|
 | 
						|
    if iterator or chunksize:
 | 
						|
        return reader
 | 
						|
 | 
						|
    with reader:
 | 
						|
        return reader.read()
 |