You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			728 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			728 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			Python
		
	
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
from pandas import (
 | 
						|
    DataFrame,
 | 
						|
    DatetimeIndex,
 | 
						|
    Series,
 | 
						|
    date_range,
 | 
						|
)
 | 
						|
import pandas._testing as tm
 | 
						|
 | 
						|
 | 
						|
def test_doc_string():
 | 
						|
    df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
 | 
						|
    df
 | 
						|
    df.ewm(com=0.5).mean()
 | 
						|
 | 
						|
 | 
						|
def test_constructor(frame_or_series):
 | 
						|
    c = frame_or_series(range(5)).ewm
 | 
						|
 | 
						|
    # valid
 | 
						|
    c(com=0.5)
 | 
						|
    c(span=1.5)
 | 
						|
    c(alpha=0.5)
 | 
						|
    c(halflife=0.75)
 | 
						|
    c(com=0.5, span=None)
 | 
						|
    c(alpha=0.5, com=None)
 | 
						|
    c(halflife=0.75, alpha=None)
 | 
						|
 | 
						|
    # not valid: mutually exclusive
 | 
						|
    msg = "comass, span, halflife, and alpha are mutually exclusive"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        c(com=0.5, alpha=0.5)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        c(span=1.5, halflife=0.75)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        c(alpha=0.5, span=1.5)
 | 
						|
 | 
						|
    # not valid: com < 0
 | 
						|
    msg = "comass must satisfy: comass >= 0"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        c(com=-0.5)
 | 
						|
 | 
						|
    # not valid: span < 1
 | 
						|
    msg = "span must satisfy: span >= 1"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        c(span=0.5)
 | 
						|
 | 
						|
    # not valid: halflife <= 0
 | 
						|
    msg = "halflife must satisfy: halflife > 0"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        c(halflife=0)
 | 
						|
 | 
						|
    # not valid: alpha <= 0 or alpha > 1
 | 
						|
    msg = "alpha must satisfy: 0 < alpha <= 1"
 | 
						|
    for alpha in (-0.5, 1.5):
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            c(alpha=alpha)
 | 
						|
 | 
						|
 | 
						|
def test_ewma_times_not_datetime_type():
 | 
						|
    msg = r"times must be datetime64 dtype."
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        Series(range(5)).ewm(times=np.arange(5))
 | 
						|
 | 
						|
 | 
						|
def test_ewma_times_not_same_length():
 | 
						|
    msg = "times must be the same length as the object."
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        Series(range(5)).ewm(times=np.arange(4).astype("datetime64[ns]"))
 | 
						|
 | 
						|
 | 
						|
def test_ewma_halflife_not_correct_type():
 | 
						|
    msg = "halflife must be a timedelta convertible object"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        Series(range(5)).ewm(halflife=1, times=np.arange(5).astype("datetime64[ns]"))
 | 
						|
 | 
						|
 | 
						|
def test_ewma_halflife_without_times(halflife_with_times):
 | 
						|
    msg = "halflife can only be a timedelta convertible argument if times is not None."
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        Series(range(5)).ewm(halflife=halflife_with_times)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "times",
 | 
						|
    [
 | 
						|
        np.arange(10).astype("datetime64[D]").astype("datetime64[ns]"),
 | 
						|
        date_range("2000", freq="D", periods=10),
 | 
						|
        date_range("2000", freq="D", periods=10).tz_localize("UTC"),
 | 
						|
    ],
 | 
						|
)
 | 
						|
@pytest.mark.parametrize("min_periods", [0, 2])
 | 
						|
def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
 | 
						|
    halflife = halflife_with_times
 | 
						|
    data = np.arange(10.0)
 | 
						|
    data[::2] = np.nan
 | 
						|
    df = DataFrame({"A": data})
 | 
						|
    result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
 | 
						|
    expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit):
 | 
						|
    tz = tz_aware_fixture
 | 
						|
    halflife = "23 days"
 | 
						|
    times = (
 | 
						|
        DatetimeIndex(["2020-01-01", "2020-01-10T00:04:05", "2020-02-23T05:00:23"])
 | 
						|
        .tz_localize(tz)
 | 
						|
        .as_unit(unit)
 | 
						|
    )
 | 
						|
    data = np.arange(3)
 | 
						|
    df = DataFrame(data)
 | 
						|
    result = df.ewm(halflife=halflife, times=times).mean()
 | 
						|
    expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459])
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_ewm_with_nat_raises(halflife_with_times):
 | 
						|
    # GH#38535
 | 
						|
    ser = Series(range(1))
 | 
						|
    times = DatetimeIndex(["NaT"])
 | 
						|
    with pytest.raises(ValueError, match="Cannot convert NaT values to integer"):
 | 
						|
        ser.ewm(com=0.1, halflife=halflife_with_times, times=times)
 | 
						|
 | 
						|
 | 
						|
def test_ewm_with_times_getitem(halflife_with_times):
 | 
						|
    # GH 40164
 | 
						|
    halflife = halflife_with_times
 | 
						|
    data = np.arange(10.0)
 | 
						|
    data[::2] = np.nan
 | 
						|
    times = date_range("2000", freq="D", periods=10)
 | 
						|
    df = DataFrame({"A": data, "B": data})
 | 
						|
    result = df.ewm(halflife=halflife, times=times)["A"].mean()
 | 
						|
    expected = df.ewm(halflife=1.0)["A"].mean()
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("arg", ["com", "halflife", "span", "alpha"])
 | 
						|
def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na):
 | 
						|
    # GH 40164
 | 
						|
    kwargs = {arg: 1, "adjust": adjust, "ignore_na": ignore_na}
 | 
						|
    ewm = DataFrame({"A": range(1), "B": range(1)}).ewm(**kwargs)
 | 
						|
    expected = {attr: getattr(ewm, attr) for attr in ewm._attributes}
 | 
						|
    ewm_slice = ewm["A"]
 | 
						|
    result = {attr: getattr(ewm, attr) for attr in ewm_slice._attributes}
 | 
						|
    assert result == expected
 | 
						|
 | 
						|
 | 
						|
def test_ewma_times_adjust_false_raises():
 | 
						|
    # GH 40098
 | 
						|
    with pytest.raises(
 | 
						|
        NotImplementedError, match="times is not supported with adjust=False."
 | 
						|
    ):
 | 
						|
        Series(range(1)).ewm(
 | 
						|
            0.1, adjust=False, times=date_range("2000", freq="D", periods=1)
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "func, expected",
 | 
						|
    [
 | 
						|
        [
 | 
						|
            "mean",
 | 
						|
            DataFrame(
 | 
						|
                {
 | 
						|
                    0: range(5),
 | 
						|
                    1: range(4, 9),
 | 
						|
                    2: [7.428571, 9, 10.571429, 12.142857, 13.714286],
 | 
						|
                },
 | 
						|
                dtype=float,
 | 
						|
            ),
 | 
						|
        ],
 | 
						|
        [
 | 
						|
            "std",
 | 
						|
            DataFrame(
 | 
						|
                {
 | 
						|
                    0: [np.nan] * 5,
 | 
						|
                    1: [4.242641] * 5,
 | 
						|
                    2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788],
 | 
						|
                }
 | 
						|
            ),
 | 
						|
        ],
 | 
						|
        [
 | 
						|
            "var",
 | 
						|
            DataFrame(
 | 
						|
                {
 | 
						|
                    0: [np.nan] * 5,
 | 
						|
                    1: [18.0] * 5,
 | 
						|
                    2: [21.428571, 27, 33.428571, 40.714286, 48.857143],
 | 
						|
                }
 | 
						|
            ),
 | 
						|
        ],
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_float_dtype_ewma(func, expected, float_numpy_dtype):
 | 
						|
    # GH#42452
 | 
						|
 | 
						|
    df = DataFrame(
 | 
						|
        {0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=float_numpy_dtype
 | 
						|
    )
 | 
						|
    msg = "Support for axis=1 in DataFrame.ewm is deprecated"
 | 
						|
    with tm.assert_produces_warning(FutureWarning, match=msg):
 | 
						|
        e = df.ewm(alpha=0.5, axis=1)
 | 
						|
    result = getattr(e, func)()
 | 
						|
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_times_string_col_raises():
 | 
						|
    # GH 43265
 | 
						|
    df = DataFrame(
 | 
						|
        {"A": np.arange(10.0), "time_col": date_range("2000", freq="D", periods=10)}
 | 
						|
    )
 | 
						|
    with pytest.raises(ValueError, match="times must be datetime64"):
 | 
						|
        df.ewm(halflife="1 day", min_periods=0, times="time_col")
 | 
						|
 | 
						|
 | 
						|
def test_ewm_sum_adjust_false_notimplemented():
 | 
						|
    data = Series(range(1)).ewm(com=1, adjust=False)
 | 
						|
    with pytest.raises(NotImplementedError, match="sum is not"):
 | 
						|
        data.sum()
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "expected_data, ignore",
 | 
						|
    [[[10.0, 5.0, 2.5, 11.25], False], [[10.0, 5.0, 5.0, 12.5], True]],
 | 
						|
)
 | 
						|
def test_ewm_sum(expected_data, ignore):
 | 
						|
    # xref from Numbagg tests
 | 
						|
    # https://github.com/numbagg/numbagg/blob/v0.2.1/numbagg/test/test_moving.py#L50
 | 
						|
    data = Series([10, 0, np.nan, 10])
 | 
						|
    result = data.ewm(alpha=0.5, ignore_na=ignore).sum()
 | 
						|
    expected = Series(expected_data)
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_ewma_adjust():
 | 
						|
    vals = Series(np.zeros(1000))
 | 
						|
    vals[5] = 1
 | 
						|
    result = vals.ewm(span=100, adjust=False).mean().sum()
 | 
						|
    assert np.abs(result - 1) < 1e-2
 | 
						|
 | 
						|
 | 
						|
def test_ewma_cases(adjust, ignore_na):
 | 
						|
    # try adjust/ignore_na args matrix
 | 
						|
 | 
						|
    s = Series([1.0, 2.0, 4.0, 8.0])
 | 
						|
 | 
						|
    if adjust:
 | 
						|
        expected = Series([1.0, 1.6, 2.736842, 4.923077])
 | 
						|
    else:
 | 
						|
        expected = Series([1.0, 1.333333, 2.222222, 4.148148])
 | 
						|
 | 
						|
    result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_ewma_nan_handling():
 | 
						|
    s = Series([1.0] + [np.nan] * 5 + [1.0])
 | 
						|
    result = s.ewm(com=5).mean()
 | 
						|
    tm.assert_series_equal(result, Series([1.0] * len(s)))
 | 
						|
 | 
						|
    s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0])
 | 
						|
    result = s.ewm(com=5).mean()
 | 
						|
    tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4))
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "s, adjust, ignore_na, w",
 | 
						|
    [
 | 
						|
        (
 | 
						|
            Series([np.nan, 1.0, 101.0]),
 | 
						|
            True,
 | 
						|
            False,
 | 
						|
            [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([np.nan, 1.0, 101.0]),
 | 
						|
            True,
 | 
						|
            True,
 | 
						|
            [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([np.nan, 1.0, 101.0]),
 | 
						|
            False,
 | 
						|
            False,
 | 
						|
            [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([np.nan, 1.0, 101.0]),
 | 
						|
            False,
 | 
						|
            True,
 | 
						|
            [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([1.0, np.nan, 101.0]),
 | 
						|
            True,
 | 
						|
            False,
 | 
						|
            [(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, 1.0],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([1.0, np.nan, 101.0]),
 | 
						|
            True,
 | 
						|
            True,
 | 
						|
            [(1.0 - (1.0 / (1.0 + 2.0))), np.nan, 1.0],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([1.0, np.nan, 101.0]),
 | 
						|
            False,
 | 
						|
            False,
 | 
						|
            [(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, (1.0 / (1.0 + 2.0))],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([1.0, np.nan, 101.0]),
 | 
						|
            False,
 | 
						|
            True,
 | 
						|
            [(1.0 - (1.0 / (1.0 + 2.0))), np.nan, (1.0 / (1.0 + 2.0))],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
 | 
						|
            True,
 | 
						|
            False,
 | 
						|
            [np.nan, (1.0 - (1.0 / (1.0 + 2.0))) ** 3, np.nan, np.nan, 1.0, np.nan],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
 | 
						|
            True,
 | 
						|
            True,
 | 
						|
            [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), np.nan, np.nan, 1.0, np.nan],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
 | 
						|
            False,
 | 
						|
            False,
 | 
						|
            [
 | 
						|
                np.nan,
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))) ** 3,
 | 
						|
                np.nan,
 | 
						|
                np.nan,
 | 
						|
                (1.0 / (1.0 + 2.0)),
 | 
						|
                np.nan,
 | 
						|
            ],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
 | 
						|
            False,
 | 
						|
            True,
 | 
						|
            [
 | 
						|
                np.nan,
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))),
 | 
						|
                np.nan,
 | 
						|
                np.nan,
 | 
						|
                (1.0 / (1.0 + 2.0)),
 | 
						|
                np.nan,
 | 
						|
            ],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([1.0, np.nan, 101.0, 50.0]),
 | 
						|
            True,
 | 
						|
            False,
 | 
						|
            [
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))) ** 3,
 | 
						|
                np.nan,
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))),
 | 
						|
                1.0,
 | 
						|
            ],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([1.0, np.nan, 101.0, 50.0]),
 | 
						|
            True,
 | 
						|
            True,
 | 
						|
            [
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))) ** 2,
 | 
						|
                np.nan,
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))),
 | 
						|
                1.0,
 | 
						|
            ],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([1.0, np.nan, 101.0, 50.0]),
 | 
						|
            False,
 | 
						|
            False,
 | 
						|
            [
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))) ** 3,
 | 
						|
                np.nan,
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)),
 | 
						|
                (1.0 / (1.0 + 2.0))
 | 
						|
                * ((1.0 - (1.0 / (1.0 + 2.0))) ** 2 + (1.0 / (1.0 + 2.0))),
 | 
						|
            ],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            Series([1.0, np.nan, 101.0, 50.0]),
 | 
						|
            False,
 | 
						|
            True,
 | 
						|
            [
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))) ** 2,
 | 
						|
                np.nan,
 | 
						|
                (1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)),
 | 
						|
                (1.0 / (1.0 + 2.0)),
 | 
						|
            ],
 | 
						|
        ),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_ewma_nan_handling_cases(s, adjust, ignore_na, w):
 | 
						|
    # GH 7603
 | 
						|
    expected = (s.multiply(w).cumsum() / Series(w).cumsum()).ffill()
 | 
						|
    result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
 | 
						|
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
    if ignore_na is False:
 | 
						|
        # check that ignore_na defaults to False
 | 
						|
        result = s.ewm(com=2.0, adjust=adjust).mean()
 | 
						|
        tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_ewm_alpha():
 | 
						|
    # GH 10789
 | 
						|
    arr = np.random.default_rng(2).standard_normal(100)
 | 
						|
    locs = np.arange(20, 40)
 | 
						|
    arr[locs] = np.nan
 | 
						|
 | 
						|
    s = Series(arr)
 | 
						|
    a = s.ewm(alpha=0.61722699889169674).mean()
 | 
						|
    b = s.ewm(com=0.62014947789973052).mean()
 | 
						|
    c = s.ewm(span=2.240298955799461).mean()
 | 
						|
    d = s.ewm(halflife=0.721792864318).mean()
 | 
						|
    tm.assert_series_equal(a, b)
 | 
						|
    tm.assert_series_equal(a, c)
 | 
						|
    tm.assert_series_equal(a, d)
 | 
						|
 | 
						|
 | 
						|
def test_ewm_domain_checks():
 | 
						|
    # GH 12492
 | 
						|
    arr = np.random.default_rng(2).standard_normal(100)
 | 
						|
    locs = np.arange(20, 40)
 | 
						|
    arr[locs] = np.nan
 | 
						|
 | 
						|
    s = Series(arr)
 | 
						|
    msg = "comass must satisfy: comass >= 0"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(com=-0.1)
 | 
						|
    s.ewm(com=0.0)
 | 
						|
    s.ewm(com=0.1)
 | 
						|
 | 
						|
    msg = "span must satisfy: span >= 1"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(span=-0.1)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(span=0.0)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(span=0.9)
 | 
						|
    s.ewm(span=1.0)
 | 
						|
    s.ewm(span=1.1)
 | 
						|
 | 
						|
    msg = "halflife must satisfy: halflife > 0"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(halflife=-0.1)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(halflife=0.0)
 | 
						|
    s.ewm(halflife=0.1)
 | 
						|
 | 
						|
    msg = "alpha must satisfy: 0 < alpha <= 1"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(alpha=-0.1)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(alpha=0.0)
 | 
						|
    s.ewm(alpha=0.1)
 | 
						|
    s.ewm(alpha=1.0)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(alpha=1.1)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("method", ["mean", "std", "var"])
 | 
						|
def test_ew_empty_series(method):
 | 
						|
    vals = Series([], dtype=np.float64)
 | 
						|
 | 
						|
    ewm = vals.ewm(3)
 | 
						|
    result = getattr(ewm, method)()
 | 
						|
    tm.assert_almost_equal(result, vals)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("min_periods", [0, 1])
 | 
						|
@pytest.mark.parametrize("name", ["mean", "var", "std"])
 | 
						|
def test_ew_min_periods(min_periods, name):
 | 
						|
    # excluding NaNs correctly
 | 
						|
    arr = np.random.default_rng(2).standard_normal(50)
 | 
						|
    arr[:10] = np.nan
 | 
						|
    arr[-10:] = np.nan
 | 
						|
    s = Series(arr)
 | 
						|
 | 
						|
    # check min_periods
 | 
						|
    # GH 7898
 | 
						|
    result = getattr(s.ewm(com=50, min_periods=2), name)()
 | 
						|
    assert result[:11].isna().all()
 | 
						|
    assert not result[11:].isna().any()
 | 
						|
 | 
						|
    result = getattr(s.ewm(com=50, min_periods=min_periods), name)()
 | 
						|
    if name == "mean":
 | 
						|
        assert result[:10].isna().all()
 | 
						|
        assert not result[10:].isna().any()
 | 
						|
    else:
 | 
						|
        # ewm.std, ewm.var (with bias=False) require at least
 | 
						|
        # two values
 | 
						|
        assert result[:11].isna().all()
 | 
						|
        assert not result[11:].isna().any()
 | 
						|
 | 
						|
    # check series of length 0
 | 
						|
    result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)()
 | 
						|
    tm.assert_series_equal(result, Series(dtype="float64"))
 | 
						|
 | 
						|
    # check series of length 1
 | 
						|
    result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)()
 | 
						|
    if name == "mean":
 | 
						|
        tm.assert_series_equal(result, Series([1.0]))
 | 
						|
    else:
 | 
						|
        # ewm.std, ewm.var with bias=False require at least
 | 
						|
        # two values
 | 
						|
        tm.assert_series_equal(result, Series([np.nan]))
 | 
						|
 | 
						|
    # pass in ints
 | 
						|
    result2 = getattr(Series(np.arange(50)).ewm(span=10), name)()
 | 
						|
    assert result2.dtype == np.float64
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("name", ["cov", "corr"])
 | 
						|
def test_ewm_corr_cov(name):
 | 
						|
    A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
 | 
						|
    B = A[2:] + np.random.default_rng(2).standard_normal(48)
 | 
						|
 | 
						|
    A[:10] = np.nan
 | 
						|
    B.iloc[-10:] = np.nan
 | 
						|
 | 
						|
    result = getattr(A.ewm(com=20, min_periods=5), name)(B)
 | 
						|
    assert np.isnan(result.values[:14]).all()
 | 
						|
    assert not np.isnan(result.values[14:]).any()
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("min_periods", [0, 1, 2])
 | 
						|
@pytest.mark.parametrize("name", ["cov", "corr"])
 | 
						|
def test_ewm_corr_cov_min_periods(name, min_periods):
 | 
						|
    # GH 7898
 | 
						|
    A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
 | 
						|
    B = A[2:] + np.random.default_rng(2).standard_normal(48)
 | 
						|
 | 
						|
    A[:10] = np.nan
 | 
						|
    B.iloc[-10:] = np.nan
 | 
						|
 | 
						|
    result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B)
 | 
						|
    # binary functions (ewmcov, ewmcorr) with bias=False require at
 | 
						|
    # least two values
 | 
						|
    assert np.isnan(result.values[:11]).all()
 | 
						|
    assert not np.isnan(result.values[11:]).any()
 | 
						|
 | 
						|
    # check series of length 0
 | 
						|
    empty = Series([], dtype=np.float64)
 | 
						|
    result = getattr(empty.ewm(com=50, min_periods=min_periods), name)(empty)
 | 
						|
    tm.assert_series_equal(result, empty)
 | 
						|
 | 
						|
    # check series of length 1
 | 
						|
    result = getattr(Series([1.0]).ewm(com=50, min_periods=min_periods), name)(
 | 
						|
        Series([1.0])
 | 
						|
    )
 | 
						|
    tm.assert_series_equal(result, Series([np.nan]))
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("name", ["cov", "corr"])
 | 
						|
def test_different_input_array_raise_exception(name):
 | 
						|
    A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
 | 
						|
    A[:10] = np.nan
 | 
						|
 | 
						|
    msg = "other must be a DataFrame or Series"
 | 
						|
    # exception raised is Exception
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        getattr(A.ewm(com=20, min_periods=5), name)(
 | 
						|
            np.random.default_rng(2).standard_normal(50)
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("name", ["var", "std", "mean"])
 | 
						|
def test_ewma_series(series, name):
 | 
						|
    series_result = getattr(series.ewm(com=10), name)()
 | 
						|
    assert isinstance(series_result, Series)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("name", ["var", "std", "mean"])
 | 
						|
def test_ewma_frame(frame, name):
 | 
						|
    frame_result = getattr(frame.ewm(com=10), name)()
 | 
						|
    assert isinstance(frame_result, DataFrame)
 | 
						|
 | 
						|
 | 
						|
def test_ewma_span_com_args(series):
 | 
						|
    A = series.ewm(com=9.5).mean()
 | 
						|
    B = series.ewm(span=20).mean()
 | 
						|
    tm.assert_almost_equal(A, B)
 | 
						|
    msg = "comass, span, halflife, and alpha are mutually exclusive"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        series.ewm(com=9.5, span=20)
 | 
						|
 | 
						|
    msg = "Must pass one of comass, span, halflife, or alpha"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        series.ewm().mean()
 | 
						|
 | 
						|
 | 
						|
def test_ewma_halflife_arg(series):
 | 
						|
    A = series.ewm(com=13.932726172912965).mean()
 | 
						|
    B = series.ewm(halflife=10.0).mean()
 | 
						|
    tm.assert_almost_equal(A, B)
 | 
						|
    msg = "comass, span, halflife, and alpha are mutually exclusive"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        series.ewm(span=20, halflife=50)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        series.ewm(com=9.5, halflife=50)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        series.ewm(com=9.5, span=20, halflife=50)
 | 
						|
    msg = "Must pass one of comass, span, halflife, or alpha"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        series.ewm()
 | 
						|
 | 
						|
 | 
						|
def test_ewm_alpha_arg(series):
 | 
						|
    # GH 10789
 | 
						|
    s = series
 | 
						|
    msg = "Must pass one of comass, span, halflife, or alpha"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm()
 | 
						|
 | 
						|
    msg = "comass, span, halflife, and alpha are mutually exclusive"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(com=10.0, alpha=0.5)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(span=10.0, alpha=0.5)
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        s.ewm(halflife=10.0, alpha=0.5)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("func", ["cov", "corr"])
 | 
						|
def test_ewm_pairwise_cov_corr(func, frame):
 | 
						|
    result = getattr(frame.ewm(span=10, min_periods=5), func)()
 | 
						|
    result = result.loc[(slice(None), 1), 5]
 | 
						|
    result.index = result.index.droplevel(1)
 | 
						|
    expected = getattr(frame[1].ewm(span=10, min_periods=5), func)(frame[5])
 | 
						|
    tm.assert_series_equal(result, expected, check_names=False)
 | 
						|
 | 
						|
 | 
						|
def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
 | 
						|
    # GH#46560
 | 
						|
    kernel = arithmetic_win_operators
 | 
						|
    df = DataFrame({"a": [1], "b": 2, "c": 3})
 | 
						|
    df["c"] = df["c"].astype(object)
 | 
						|
    ewm = df.ewm(span=2, min_periods=1)
 | 
						|
    op = getattr(ewm, kernel, None)
 | 
						|
    if op is not None:
 | 
						|
        result = op(numeric_only=numeric_only)
 | 
						|
 | 
						|
        columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
 | 
						|
        expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
 | 
						|
        assert list(expected.columns) == columns
 | 
						|
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("kernel", ["corr", "cov"])
 | 
						|
@pytest.mark.parametrize("use_arg", [True, False])
 | 
						|
def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
 | 
						|
    # GH#46560
 | 
						|
    df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
 | 
						|
    df["c"] = df["c"].astype(object)
 | 
						|
    arg = (df,) if use_arg else ()
 | 
						|
    ewm = df.ewm(span=2, min_periods=1)
 | 
						|
    op = getattr(ewm, kernel)
 | 
						|
    result = op(*arg, numeric_only=numeric_only)
 | 
						|
 | 
						|
    # Compare result to op using float dtypes, dropping c when numeric_only is True
 | 
						|
    columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
 | 
						|
    df2 = df[columns].astype(float)
 | 
						|
    arg2 = (df2,) if use_arg else ()
 | 
						|
    ewm2 = df2.ewm(span=2, min_periods=1)
 | 
						|
    op2 = getattr(ewm2, kernel)
 | 
						|
    expected = op2(*arg2, numeric_only=numeric_only)
 | 
						|
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("dtype", [int, object])
 | 
						|
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
 | 
						|
    # GH#46560
 | 
						|
    kernel = arithmetic_win_operators
 | 
						|
    ser = Series([1], dtype=dtype)
 | 
						|
    ewm = ser.ewm(span=2, min_periods=1)
 | 
						|
    op = getattr(ewm, kernel, None)
 | 
						|
    if op is None:
 | 
						|
        # Nothing to test
 | 
						|
        pytest.skip("No op to test")
 | 
						|
    if numeric_only and dtype is object:
 | 
						|
        msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only"
 | 
						|
        with pytest.raises(NotImplementedError, match=msg):
 | 
						|
            op(numeric_only=numeric_only)
 | 
						|
    else:
 | 
						|
        result = op(numeric_only=numeric_only)
 | 
						|
        expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
 | 
						|
        tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("kernel", ["corr", "cov"])
 | 
						|
@pytest.mark.parametrize("use_arg", [True, False])
 | 
						|
@pytest.mark.parametrize("dtype", [int, object])
 | 
						|
def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
 | 
						|
    # GH#46560
 | 
						|
    ser = Series([1, 2, 3], dtype=dtype)
 | 
						|
    arg = (ser,) if use_arg else ()
 | 
						|
    ewm = ser.ewm(span=2, min_periods=1)
 | 
						|
    op = getattr(ewm, kernel)
 | 
						|
    if numeric_only and dtype is object:
 | 
						|
        msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only"
 | 
						|
        with pytest.raises(NotImplementedError, match=msg):
 | 
						|
            op(*arg, numeric_only=numeric_only)
 | 
						|
    else:
 | 
						|
        result = op(*arg, numeric_only=numeric_only)
 | 
						|
 | 
						|
        ser2 = ser.astype(float)
 | 
						|
        arg2 = (ser2,) if use_arg else ()
 | 
						|
        ewm2 = ser2.ewm(span=2, min_periods=1)
 | 
						|
        op2 = getattr(ewm2, kernel)
 | 
						|
        expected = op2(*arg2, numeric_only=numeric_only)
 | 
						|
        tm.assert_series_equal(result, expected)
 |