You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			659 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			659 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
manage legacy pickle tests
 | 
						|
 | 
						|
How to add pickle tests:
 | 
						|
 | 
						|
1. Install pandas version intended to output the pickle.
 | 
						|
 | 
						|
2. Execute "generate_legacy_storage_files.py" to create the pickle.
 | 
						|
$ python generate_legacy_storage_files.py <output_dir> pickle
 | 
						|
 | 
						|
3. Move the created pickle to "data/legacy_pickle/<version>" directory.
 | 
						|
"""
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
from array import array
 | 
						|
import bz2
 | 
						|
import datetime
 | 
						|
import functools
 | 
						|
from functools import partial
 | 
						|
import gzip
 | 
						|
import io
 | 
						|
import os
 | 
						|
from pathlib import Path
 | 
						|
import pickle
 | 
						|
import shutil
 | 
						|
import tarfile
 | 
						|
from typing import Any
 | 
						|
import uuid
 | 
						|
import zipfile
 | 
						|
 | 
						|
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
from pandas.compat import (
 | 
						|
    get_lzma_file,
 | 
						|
    is_platform_little_endian,
 | 
						|
)
 | 
						|
from pandas.compat._optional import import_optional_dependency
 | 
						|
from pandas.compat.compressors import flatten_buffer
 | 
						|
import pandas.util._test_decorators as td
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
from pandas import (
 | 
						|
    DataFrame,
 | 
						|
    Index,
 | 
						|
    Series,
 | 
						|
    period_range,
 | 
						|
)
 | 
						|
import pandas._testing as tm
 | 
						|
from pandas.tests.io.generate_legacy_storage_files import create_pickle_data
 | 
						|
 | 
						|
import pandas.io.common as icom
 | 
						|
from pandas.tseries.offsets import (
 | 
						|
    Day,
 | 
						|
    MonthEnd,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
# ---------------------
 | 
						|
# comparison functions
 | 
						|
# ---------------------
 | 
						|
def compare_element(result, expected, typ):
 | 
						|
    if isinstance(expected, Index):
 | 
						|
        tm.assert_index_equal(expected, result)
 | 
						|
        return
 | 
						|
 | 
						|
    if typ.startswith("sp_"):
 | 
						|
        tm.assert_equal(result, expected)
 | 
						|
    elif typ == "timestamp":
 | 
						|
        if expected is pd.NaT:
 | 
						|
            assert result is pd.NaT
 | 
						|
        else:
 | 
						|
            assert result == expected
 | 
						|
    else:
 | 
						|
        comparator = getattr(tm, f"assert_{typ}_equal", tm.assert_almost_equal)
 | 
						|
        comparator(result, expected)
 | 
						|
 | 
						|
 | 
						|
# ---------------------
 | 
						|
# tests
 | 
						|
# ---------------------
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "data",
 | 
						|
    [
 | 
						|
        b"123",
 | 
						|
        b"123456",
 | 
						|
        bytearray(b"123"),
 | 
						|
        memoryview(b"123"),
 | 
						|
        pickle.PickleBuffer(b"123"),
 | 
						|
        array("I", [1, 2, 3]),
 | 
						|
        memoryview(b"123456").cast("B", (3, 2)),
 | 
						|
        memoryview(b"123456").cast("B", (3, 2))[::2],
 | 
						|
        np.arange(12).reshape((3, 4), order="C"),
 | 
						|
        np.arange(12).reshape((3, 4), order="F"),
 | 
						|
        np.arange(12).reshape((3, 4), order="C")[:, ::2],
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_flatten_buffer(data):
 | 
						|
    result = flatten_buffer(data)
 | 
						|
    expected = memoryview(data).tobytes("A")
 | 
						|
    assert result == expected
 | 
						|
    if isinstance(data, (bytes, bytearray)):
 | 
						|
        assert result is data
 | 
						|
    elif isinstance(result, memoryview):
 | 
						|
        assert result.ndim == 1
 | 
						|
        assert result.format == "B"
 | 
						|
        assert result.contiguous
 | 
						|
        assert result.shape == (result.nbytes,)
 | 
						|
 | 
						|
 | 
						|
def test_pickles(datapath):
 | 
						|
    if not is_platform_little_endian():
 | 
						|
        pytest.skip("known failure on non-little endian")
 | 
						|
 | 
						|
    # For loop for compat with --strict-data-files
 | 
						|
    for legacy_pickle in Path(__file__).parent.glob("data/legacy_pickle/*/*.p*kl*"):
 | 
						|
        legacy_pickle = datapath(legacy_pickle)
 | 
						|
 | 
						|
        data = pd.read_pickle(legacy_pickle)
 | 
						|
 | 
						|
        for typ, dv in data.items():
 | 
						|
            for dt, result in dv.items():
 | 
						|
                expected = data[typ][dt]
 | 
						|
 | 
						|
                if typ == "series" and dt == "ts":
 | 
						|
                    # GH 7748
 | 
						|
                    tm.assert_series_equal(result, expected)
 | 
						|
                    assert result.index.freq == expected.index.freq
 | 
						|
                    assert not result.index.freq.normalize
 | 
						|
                    tm.assert_series_equal(result > 0, expected > 0)
 | 
						|
 | 
						|
                    # GH 9291
 | 
						|
                    freq = result.index.freq
 | 
						|
                    assert freq + Day(1) == Day(2)
 | 
						|
 | 
						|
                    res = freq + pd.Timedelta(hours=1)
 | 
						|
                    assert isinstance(res, pd.Timedelta)
 | 
						|
                    assert res == pd.Timedelta(days=1, hours=1)
 | 
						|
 | 
						|
                    res = freq + pd.Timedelta(nanoseconds=1)
 | 
						|
                    assert isinstance(res, pd.Timedelta)
 | 
						|
                    assert res == pd.Timedelta(days=1, nanoseconds=1)
 | 
						|
                elif typ == "index" and dt == "period":
 | 
						|
                    tm.assert_index_equal(result, expected)
 | 
						|
                    assert isinstance(result.freq, MonthEnd)
 | 
						|
                    assert result.freq == MonthEnd()
 | 
						|
                    assert result.freqstr == "M"
 | 
						|
                    tm.assert_index_equal(result.shift(2), expected.shift(2))
 | 
						|
                elif typ == "series" and dt in ("dt_tz", "cat"):
 | 
						|
                    tm.assert_series_equal(result, expected)
 | 
						|
                elif typ == "frame" and dt in (
 | 
						|
                    "dt_mixed_tzs",
 | 
						|
                    "cat_onecol",
 | 
						|
                    "cat_and_float",
 | 
						|
                ):
 | 
						|
                    tm.assert_frame_equal(result, expected)
 | 
						|
                else:
 | 
						|
                    compare_element(result, expected, typ)
 | 
						|
 | 
						|
 | 
						|
def python_pickler(obj, path):
 | 
						|
    with open(path, "wb") as fh:
 | 
						|
        pickle.dump(obj, fh, protocol=-1)
 | 
						|
 | 
						|
 | 
						|
def python_unpickler(path):
 | 
						|
    with open(path, "rb") as fh:
 | 
						|
        fh.seek(0)
 | 
						|
        return pickle.load(fh)
 | 
						|
 | 
						|
 | 
						|
def flatten(data: dict) -> list[tuple[str, Any]]:
 | 
						|
    """Flatten create_pickle_data"""
 | 
						|
    return [
 | 
						|
        (typ, example)
 | 
						|
        for typ, examples in data.items()
 | 
						|
        for example in examples.values()
 | 
						|
    ]
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "pickle_writer",
 | 
						|
    [
 | 
						|
        pytest.param(python_pickler, id="python"),
 | 
						|
        pytest.param(pd.to_pickle, id="pandas_proto_default"),
 | 
						|
        pytest.param(
 | 
						|
            functools.partial(pd.to_pickle, protocol=pickle.HIGHEST_PROTOCOL),
 | 
						|
            id="pandas_proto_highest",
 | 
						|
        ),
 | 
						|
        pytest.param(functools.partial(pd.to_pickle, protocol=4), id="pandas_proto_4"),
 | 
						|
        pytest.param(
 | 
						|
            functools.partial(pd.to_pickle, protocol=5),
 | 
						|
            id="pandas_proto_5",
 | 
						|
        ),
 | 
						|
    ],
 | 
						|
)
 | 
						|
@pytest.mark.parametrize("writer", [pd.to_pickle, python_pickler])
 | 
						|
@pytest.mark.parametrize("typ, expected", flatten(create_pickle_data()))
 | 
						|
def test_round_trip_current(typ, expected, pickle_writer, writer):
 | 
						|
    with tm.ensure_clean() as path:
 | 
						|
        # test writing with each pickler
 | 
						|
        pickle_writer(expected, path)
 | 
						|
 | 
						|
        # test reading with each unpickler
 | 
						|
        result = pd.read_pickle(path)
 | 
						|
        compare_element(result, expected, typ)
 | 
						|
 | 
						|
        result = python_unpickler(path)
 | 
						|
        compare_element(result, expected, typ)
 | 
						|
 | 
						|
        # and the same for file objects (GH 35679)
 | 
						|
        with open(path, mode="wb") as handle:
 | 
						|
            writer(expected, path)
 | 
						|
            handle.seek(0)  # shouldn't close file handle
 | 
						|
        with open(path, mode="rb") as handle:
 | 
						|
            result = pd.read_pickle(handle)
 | 
						|
            handle.seek(0)  # shouldn't close file handle
 | 
						|
        compare_element(result, expected, typ)
 | 
						|
 | 
						|
 | 
						|
def test_pickle_path_pathlib():
 | 
						|
    df = DataFrame(
 | 
						|
        1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
        columns=Index(list("ABCD"), dtype=object),
 | 
						|
        index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
    )
 | 
						|
    result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle)
 | 
						|
    tm.assert_frame_equal(df, result)
 | 
						|
 | 
						|
 | 
						|
def test_pickle_path_localpath():
 | 
						|
    df = DataFrame(
 | 
						|
        1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
        columns=Index(list("ABCD"), dtype=object),
 | 
						|
        index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
    )
 | 
						|
    result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
 | 
						|
    tm.assert_frame_equal(df, result)
 | 
						|
 | 
						|
 | 
						|
# ---------------------
 | 
						|
# test pickle compression
 | 
						|
# ---------------------
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def get_random_path():
 | 
						|
    return f"__{uuid.uuid4()}__.pickle"
 | 
						|
 | 
						|
 | 
						|
class TestCompression:
 | 
						|
    _extension_to_compression = icom.extension_to_compression
 | 
						|
 | 
						|
    def compress_file(self, src_path, dest_path, compression):
 | 
						|
        if compression is None:
 | 
						|
            shutil.copyfile(src_path, dest_path)
 | 
						|
            return
 | 
						|
 | 
						|
        if compression == "gzip":
 | 
						|
            f = gzip.open(dest_path, "w")
 | 
						|
        elif compression == "bz2":
 | 
						|
            f = bz2.BZ2File(dest_path, "w")
 | 
						|
        elif compression == "zip":
 | 
						|
            with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f:
 | 
						|
                f.write(src_path, os.path.basename(src_path))
 | 
						|
        elif compression == "tar":
 | 
						|
            with open(src_path, "rb") as fh:
 | 
						|
                with tarfile.open(dest_path, mode="w") as tar:
 | 
						|
                    tarinfo = tar.gettarinfo(src_path, os.path.basename(src_path))
 | 
						|
                    tar.addfile(tarinfo, fh)
 | 
						|
        elif compression == "xz":
 | 
						|
            f = get_lzma_file()(dest_path, "w")
 | 
						|
        elif compression == "zstd":
 | 
						|
            f = import_optional_dependency("zstandard").open(dest_path, "wb")
 | 
						|
        else:
 | 
						|
            msg = f"Unrecognized compression type: {compression}"
 | 
						|
            raise ValueError(msg)
 | 
						|
 | 
						|
        if compression not in ["zip", "tar"]:
 | 
						|
            with open(src_path, "rb") as fh:
 | 
						|
                with f:
 | 
						|
                    f.write(fh.read())
 | 
						|
 | 
						|
    def test_write_explicit(self, compression, get_random_path):
 | 
						|
        base = get_random_path
 | 
						|
        path1 = base + ".compressed"
 | 
						|
        path2 = base + ".raw"
 | 
						|
 | 
						|
        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
 | 
						|
            df = DataFrame(
 | 
						|
                1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
                columns=Index(list("ABCD"), dtype=object),
 | 
						|
                index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
            )
 | 
						|
 | 
						|
            # write to compressed file
 | 
						|
            df.to_pickle(p1, compression=compression)
 | 
						|
 | 
						|
            # decompress
 | 
						|
            with tm.decompress_file(p1, compression=compression) as f:
 | 
						|
                with open(p2, "wb") as fh:
 | 
						|
                    fh.write(f.read())
 | 
						|
 | 
						|
            # read decompressed file
 | 
						|
            df2 = pd.read_pickle(p2, compression=None)
 | 
						|
 | 
						|
            tm.assert_frame_equal(df, df2)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("compression", ["", "None", "bad", "7z"])
 | 
						|
    def test_write_explicit_bad(self, compression, get_random_path):
 | 
						|
        with pytest.raises(ValueError, match="Unrecognized compression type"):
 | 
						|
            with tm.ensure_clean(get_random_path) as path:
 | 
						|
                df = DataFrame(
 | 
						|
                    1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
                    columns=Index(list("ABCD"), dtype=object),
 | 
						|
                    index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
                )
 | 
						|
                df.to_pickle(path, compression=compression)
 | 
						|
 | 
						|
    def test_write_infer(self, compression_ext, get_random_path):
 | 
						|
        base = get_random_path
 | 
						|
        path1 = base + compression_ext
 | 
						|
        path2 = base + ".raw"
 | 
						|
        compression = self._extension_to_compression.get(compression_ext.lower())
 | 
						|
 | 
						|
        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
 | 
						|
            df = DataFrame(
 | 
						|
                1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
                columns=Index(list("ABCD"), dtype=object),
 | 
						|
                index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
            )
 | 
						|
 | 
						|
            # write to compressed file by inferred compression method
 | 
						|
            df.to_pickle(p1)
 | 
						|
 | 
						|
            # decompress
 | 
						|
            with tm.decompress_file(p1, compression=compression) as f:
 | 
						|
                with open(p2, "wb") as fh:
 | 
						|
                    fh.write(f.read())
 | 
						|
 | 
						|
            # read decompressed file
 | 
						|
            df2 = pd.read_pickle(p2, compression=None)
 | 
						|
 | 
						|
            tm.assert_frame_equal(df, df2)
 | 
						|
 | 
						|
    def test_read_explicit(self, compression, get_random_path):
 | 
						|
        base = get_random_path
 | 
						|
        path1 = base + ".raw"
 | 
						|
        path2 = base + ".compressed"
 | 
						|
 | 
						|
        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
 | 
						|
            df = DataFrame(
 | 
						|
                1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
                columns=Index(list("ABCD"), dtype=object),
 | 
						|
                index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
            )
 | 
						|
 | 
						|
            # write to uncompressed file
 | 
						|
            df.to_pickle(p1, compression=None)
 | 
						|
 | 
						|
            # compress
 | 
						|
            self.compress_file(p1, p2, compression=compression)
 | 
						|
 | 
						|
            # read compressed file
 | 
						|
            df2 = pd.read_pickle(p2, compression=compression)
 | 
						|
            tm.assert_frame_equal(df, df2)
 | 
						|
 | 
						|
    def test_read_infer(self, compression_ext, get_random_path):
 | 
						|
        base = get_random_path
 | 
						|
        path1 = base + ".raw"
 | 
						|
        path2 = base + compression_ext
 | 
						|
        compression = self._extension_to_compression.get(compression_ext.lower())
 | 
						|
 | 
						|
        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
 | 
						|
            df = DataFrame(
 | 
						|
                1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
                columns=Index(list("ABCD"), dtype=object),
 | 
						|
                index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
            )
 | 
						|
 | 
						|
            # write to uncompressed file
 | 
						|
            df.to_pickle(p1, compression=None)
 | 
						|
 | 
						|
            # compress
 | 
						|
            self.compress_file(p1, p2, compression=compression)
 | 
						|
 | 
						|
            # read compressed file by inferred compression method
 | 
						|
            df2 = pd.read_pickle(p2)
 | 
						|
            tm.assert_frame_equal(df, df2)
 | 
						|
 | 
						|
 | 
						|
# ---------------------
 | 
						|
# test pickle compression
 | 
						|
# ---------------------
 | 
						|
 | 
						|
 | 
						|
class TestProtocol:
 | 
						|
    @pytest.mark.parametrize("protocol", [-1, 0, 1, 2])
 | 
						|
    def test_read(self, protocol, get_random_path):
 | 
						|
        with tm.ensure_clean(get_random_path) as path:
 | 
						|
            df = DataFrame(
 | 
						|
                1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
                columns=Index(list("ABCD"), dtype=object),
 | 
						|
                index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
            )
 | 
						|
            df.to_pickle(path, protocol=protocol)
 | 
						|
            df2 = pd.read_pickle(path)
 | 
						|
            tm.assert_frame_equal(df, df2)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    ["pickle_file", "excols"],
 | 
						|
    [
 | 
						|
        ("test_py27.pkl", Index(["a", "b", "c"], dtype=object)),
 | 
						|
        (
 | 
						|
            "test_mi_py27.pkl",
 | 
						|
            pd.MultiIndex(
 | 
						|
                [
 | 
						|
                    Index(["a", "b", "c"], dtype=object),
 | 
						|
                    Index(["A", "B", "C"], dtype=object),
 | 
						|
                ],
 | 
						|
                [np.array([0, 1, 2]), np.array([0, 1, 2])],
 | 
						|
            ),
 | 
						|
        ),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_unicode_decode_error(datapath, pickle_file, excols):
 | 
						|
    # pickle file written with py27, should be readable without raising
 | 
						|
    #  UnicodeDecodeError, see GH#28645 and GH#31988
 | 
						|
    path = datapath("io", "data", "pickle", pickle_file)
 | 
						|
    df = pd.read_pickle(path)
 | 
						|
 | 
						|
    # just test the columns are correct since the values are random
 | 
						|
    tm.assert_index_equal(df.columns, excols)
 | 
						|
 | 
						|
 | 
						|
# ---------------------
 | 
						|
# tests for buffer I/O
 | 
						|
# ---------------------
 | 
						|
 | 
						|
 | 
						|
def test_pickle_buffer_roundtrip():
 | 
						|
    with tm.ensure_clean() as path:
 | 
						|
        df = DataFrame(
 | 
						|
            1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
            columns=Index(list("ABCD"), dtype=object),
 | 
						|
            index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
        )
 | 
						|
        with open(path, "wb") as fh:
 | 
						|
            df.to_pickle(fh)
 | 
						|
        with open(path, "rb") as fh:
 | 
						|
            result = pd.read_pickle(fh)
 | 
						|
        tm.assert_frame_equal(df, result)
 | 
						|
 | 
						|
 | 
						|
# ---------------------
 | 
						|
# tests for URL I/O
 | 
						|
# ---------------------
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "mockurl", ["http://url.com", "ftp://test.com", "http://gzip.com"]
 | 
						|
)
 | 
						|
def test_pickle_generalurl_read(monkeypatch, mockurl):
 | 
						|
    def python_pickler(obj, path):
 | 
						|
        with open(path, "wb") as fh:
 | 
						|
            pickle.dump(obj, fh, protocol=-1)
 | 
						|
 | 
						|
    class MockReadResponse:
 | 
						|
        def __init__(self, path) -> None:
 | 
						|
            self.file = open(path, "rb")
 | 
						|
            if "gzip" in path:
 | 
						|
                self.headers = {"Content-Encoding": "gzip"}
 | 
						|
            else:
 | 
						|
                self.headers = {"Content-Encoding": ""}
 | 
						|
 | 
						|
        def __enter__(self):
 | 
						|
            return self
 | 
						|
 | 
						|
        def __exit__(self, *args):
 | 
						|
            self.close()
 | 
						|
 | 
						|
        def read(self):
 | 
						|
            return self.file.read()
 | 
						|
 | 
						|
        def close(self):
 | 
						|
            return self.file.close()
 | 
						|
 | 
						|
    with tm.ensure_clean() as path:
 | 
						|
 | 
						|
        def mock_urlopen_read(*args, **kwargs):
 | 
						|
            return MockReadResponse(path)
 | 
						|
 | 
						|
        df = DataFrame(
 | 
						|
            1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
            columns=Index(list("ABCD"), dtype=object),
 | 
						|
            index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
        )
 | 
						|
        python_pickler(df, path)
 | 
						|
        monkeypatch.setattr("urllib.request.urlopen", mock_urlopen_read)
 | 
						|
        result = pd.read_pickle(mockurl)
 | 
						|
        tm.assert_frame_equal(df, result)
 | 
						|
 | 
						|
 | 
						|
def test_pickle_fsspec_roundtrip():
 | 
						|
    pytest.importorskip("fsspec")
 | 
						|
    with tm.ensure_clean():
 | 
						|
        mockurl = "memory://mockfile"
 | 
						|
        df = DataFrame(
 | 
						|
            1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
            columns=Index(list("ABCD"), dtype=object),
 | 
						|
            index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
        )
 | 
						|
        df.to_pickle(mockurl)
 | 
						|
        result = pd.read_pickle(mockurl)
 | 
						|
        tm.assert_frame_equal(df, result)
 | 
						|
 | 
						|
 | 
						|
class MyTz(datetime.tzinfo):
 | 
						|
    def __init__(self) -> None:
 | 
						|
        pass
 | 
						|
 | 
						|
 | 
						|
def test_read_pickle_with_subclass():
 | 
						|
    # GH 12163
 | 
						|
    expected = Series(dtype=object), MyTz()
 | 
						|
    result = tm.round_trip_pickle(expected)
 | 
						|
 | 
						|
    tm.assert_series_equal(result[0], expected[0])
 | 
						|
    assert isinstance(result[1], MyTz)
 | 
						|
 | 
						|
 | 
						|
def test_pickle_binary_object_compression(compression):
 | 
						|
    """
 | 
						|
    Read/write from binary file-objects w/wo compression.
 | 
						|
 | 
						|
    GH 26237, GH 29054, and GH 29570
 | 
						|
    """
 | 
						|
    df = DataFrame(
 | 
						|
        1.1 * np.arange(120).reshape((30, 4)),
 | 
						|
        columns=Index(list("ABCD"), dtype=object),
 | 
						|
        index=Index([f"i-{i}" for i in range(30)], dtype=object),
 | 
						|
    )
 | 
						|
 | 
						|
    # reference for compression
 | 
						|
    with tm.ensure_clean() as path:
 | 
						|
        df.to_pickle(path, compression=compression)
 | 
						|
        reference = Path(path).read_bytes()
 | 
						|
 | 
						|
    # write
 | 
						|
    buffer = io.BytesIO()
 | 
						|
    df.to_pickle(buffer, compression=compression)
 | 
						|
    buffer.seek(0)
 | 
						|
 | 
						|
    # gzip  and zip safe the filename: cannot compare the compressed content
 | 
						|
    assert buffer.getvalue() == reference or compression in ("gzip", "zip", "tar")
 | 
						|
 | 
						|
    # read
 | 
						|
    read_df = pd.read_pickle(buffer, compression=compression)
 | 
						|
    buffer.seek(0)
 | 
						|
    tm.assert_frame_equal(df, read_df)
 | 
						|
 | 
						|
 | 
						|
def test_pickle_dataframe_with_multilevel_index(
 | 
						|
    multiindex_year_month_day_dataframe_random_data,
 | 
						|
    multiindex_dataframe_random_data,
 | 
						|
):
 | 
						|
    ymd = multiindex_year_month_day_dataframe_random_data
 | 
						|
    frame = multiindex_dataframe_random_data
 | 
						|
 | 
						|
    def _test_roundtrip(frame):
 | 
						|
        unpickled = tm.round_trip_pickle(frame)
 | 
						|
        tm.assert_frame_equal(frame, unpickled)
 | 
						|
 | 
						|
    _test_roundtrip(frame)
 | 
						|
    _test_roundtrip(frame.T)
 | 
						|
    _test_roundtrip(ymd)
 | 
						|
    _test_roundtrip(ymd.T)
 | 
						|
 | 
						|
 | 
						|
def test_pickle_timeseries_periodindex():
 | 
						|
    # GH#2891
 | 
						|
    prng = period_range("1/1/2011", "1/1/2012", freq="M")
 | 
						|
    ts = Series(np.random.default_rng(2).standard_normal(len(prng)), prng)
 | 
						|
    new_ts = tm.round_trip_pickle(ts)
 | 
						|
    assert new_ts.index.freqstr == "M"
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "name", [777, 777.0, "name", datetime.datetime(2001, 11, 11), (1, 2)]
 | 
						|
)
 | 
						|
def test_pickle_preserve_name(name):
 | 
						|
    unpickled = tm.round_trip_pickle(Series(np.arange(10, dtype=np.float64), name=name))
 | 
						|
    assert unpickled.name == name
 | 
						|
 | 
						|
 | 
						|
def test_pickle_datetimes(datetime_series):
 | 
						|
    unp_ts = tm.round_trip_pickle(datetime_series)
 | 
						|
    tm.assert_series_equal(unp_ts, datetime_series)
 | 
						|
 | 
						|
 | 
						|
def test_pickle_strings(string_series):
 | 
						|
    unp_series = tm.round_trip_pickle(string_series)
 | 
						|
    tm.assert_series_equal(unp_series, string_series)
 | 
						|
 | 
						|
 | 
						|
@td.skip_array_manager_invalid_test
 | 
						|
def test_pickle_preserves_block_ndim():
 | 
						|
    # GH#37631
 | 
						|
    ser = Series(list("abc")).astype("category").iloc[[0]]
 | 
						|
    res = tm.round_trip_pickle(ser)
 | 
						|
 | 
						|
    assert res._mgr.blocks[0].ndim == 1
 | 
						|
    assert res._mgr.blocks[0].shape == (1,)
 | 
						|
 | 
						|
    # GH#37631 OP issue was about indexing, underlying problem was pickle
 | 
						|
    tm.assert_series_equal(res[[True]], ser)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("protocol", [pickle.DEFAULT_PROTOCOL, pickle.HIGHEST_PROTOCOL])
 | 
						|
def test_pickle_big_dataframe_compression(protocol, compression):
 | 
						|
    # GH#39002
 | 
						|
    df = DataFrame(range(100000))
 | 
						|
    result = tm.round_trip_pathlib(
 | 
						|
        partial(df.to_pickle, protocol=protocol, compression=compression),
 | 
						|
        partial(pd.read_pickle, compression=compression),
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(df, result)
 | 
						|
 | 
						|
 | 
						|
def test_pickle_frame_v124_unpickle_130(datapath):
 | 
						|
    # GH#42345 DataFrame created in 1.2.x, unpickle in 1.3.x
 | 
						|
    path = datapath(
 | 
						|
        Path(__file__).parent,
 | 
						|
        "data",
 | 
						|
        "legacy_pickle",
 | 
						|
        "1.2.4",
 | 
						|
        "empty_frame_v1_2_4-GH#42345.pkl",
 | 
						|
    )
 | 
						|
    with open(path, "rb") as fd:
 | 
						|
        df = pickle.load(fd)
 | 
						|
 | 
						|
    expected = DataFrame(index=[], columns=[])
 | 
						|
    tm.assert_frame_equal(df, expected)
 | 
						|
 | 
						|
 | 
						|
def test_pickle_pos_args_deprecation():
 | 
						|
    # GH-54229
 | 
						|
    df = DataFrame({"a": [1, 2, 3]})
 | 
						|
    msg = (
 | 
						|
        r"Starting with pandas version 3.0 all arguments of to_pickle except for the "
 | 
						|
        r"argument 'path' will be keyword-only."
 | 
						|
    )
 | 
						|
    with tm.assert_produces_warning(FutureWarning, match=msg):
 | 
						|
        buffer = io.BytesIO()
 | 
						|
        df.to_pickle(buffer, "infer")
 |