import numpy as np
import pytest

from pandas import (
    DataFrame,
    NaT,
    Series,
    Timedelta,
    Timestamp,
    date_range,
)
import pandas._testing as tm


def test_group_shift_with_null_key():
    # This test is designed to replicate the segfault in issue #13813.
    n_rows = 1200

    # Generate a moderately large dataframe with occasional missing
    # values in column `B`, and then group by [`A`, `B`]. This should
    # force `-1` in `labels` array of `g.grouper.group_info` exactly
    # at those places, where the group-by key is partially missing.
    df = DataFrame(
        [(i % 12, i % 3 if i % 3 else np.nan, i) for i in range(n_rows)],
        dtype=float,
        columns=["A", "B", "Z"],
        index=None,
    )
    g = df.groupby(["A", "B"])

    expected = DataFrame(
        [(i + 12 if i % 3 and i < n_rows - 12 else np.nan) for i in range(n_rows)],
        dtype=float,
        columns=["Z"],
        index=None,
    )
    result = g.shift(-1)

    tm.assert_frame_equal(result, expected)


def test_group_shift_with_fill_value():
    # GH #24128
    n_rows = 24
    df = DataFrame(
        [(i % 12, i % 3, i) for i in range(n_rows)],
        dtype=float,
        columns=["A", "B", "Z"],
        index=None,
    )
    g = df.groupby(["A", "B"])

    expected = DataFrame(
        [(i + 12 if i < n_rows - 12 else 0) for i in range(n_rows)],
        dtype=float,
        columns=["Z"],
        index=None,
    )
    result = g.shift(-1, fill_value=0)

    tm.assert_frame_equal(result, expected)


def test_group_shift_lose_timezone():
    # GH 30134
    now_dt = Timestamp.utcnow().as_unit("ns")
    df = DataFrame({"a": [1, 1], "date": now_dt})
    result = df.groupby("a").shift(0).iloc[0]
    expected = Series({"date": now_dt}, name=result.name)
    tm.assert_series_equal(result, expected)


def test_group_diff_real_series(any_real_numpy_dtype):
    df = DataFrame(
        {"a": [1, 2, 3, 3, 2], "b": [1, 2, 3, 4, 5]},
        dtype=any_real_numpy_dtype,
    )
    result = df.groupby("a")["b"].diff()
    exp_dtype = "float"
    if any_real_numpy_dtype in ["int8", "int16", "float32"]:
        exp_dtype = "float32"
    expected = Series([np.nan, np.nan, np.nan, 1.0, 3.0], dtype=exp_dtype, name="b")
    tm.assert_series_equal(result, expected)


def test_group_diff_real_frame(any_real_numpy_dtype):
    df = DataFrame(
        {
            "a": [1, 2, 3, 3, 2],
            "b": [1, 2, 3, 4, 5],
            "c": [1, 2, 3, 4, 6],
        },
        dtype=any_real_numpy_dtype,
    )
    result = df.groupby("a").diff()
    exp_dtype = "float"
    if any_real_numpy_dtype in ["int8", "int16", "float32"]:
        exp_dtype = "float32"
    expected = DataFrame(
        {
            "b": [np.nan, np.nan, np.nan, 1.0, 3.0],
            "c": [np.nan, np.nan, np.nan, 1.0, 4.0],
        },
        dtype=exp_dtype,
    )
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
    "data",
    [
        [
            Timestamp("2013-01-01"),
            Timestamp("2013-01-02"),
            Timestamp("2013-01-03"),
        ],
        [Timedelta("5 days"), Timedelta("6 days"), Timedelta("7 days")],
    ],
)
def test_group_diff_datetimelike(data):
    df = DataFrame({"a": [1, 2, 2], "b": data})
    result = df.groupby("a")["b"].diff()
    expected = Series([NaT, NaT, Timedelta("1 days")], name="b")
    tm.assert_series_equal(result, expected)


def test_group_diff_bool():
    df = DataFrame({"a": [1, 2, 3, 3, 2], "b": [True, True, False, False, True]})
    result = df.groupby("a")["b"].diff()
    expected = Series([np.nan, np.nan, np.nan, False, False], name="b")
    tm.assert_series_equal(result, expected)


def test_group_diff_object_raises(object_dtype):
    df = DataFrame(
        {"a": ["foo", "bar", "bar"], "b": ["baz", "foo", "foo"]}, dtype=object_dtype
    )
    with pytest.raises(TypeError, match=r"unsupported operand type\(s\) for -"):
        df.groupby("a")["b"].diff()


def test_empty_shift_with_fill():
    # GH 41264, single-index check
    df = DataFrame(columns=["a", "b", "c"])
    shifted = df.groupby(["a"]).shift(1)
    shifted_with_fill = df.groupby(["a"]).shift(1, fill_value=0)
    tm.assert_frame_equal(shifted, shifted_with_fill)
    tm.assert_index_equal(shifted.index, shifted_with_fill.index)


def test_multindex_empty_shift_with_fill():
    # GH 41264, multi-index check
    df = DataFrame(columns=["a", "b", "c"])
    shifted = df.groupby(["a", "b"]).shift(1)
    shifted_with_fill = df.groupby(["a", "b"]).shift(1, fill_value=0)
    tm.assert_frame_equal(shifted, shifted_with_fill)
    tm.assert_index_equal(shifted.index, shifted_with_fill.index)


def test_shift_periods_freq():
    # GH 54093
    data = {"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]}
    df = DataFrame(data, index=date_range(start="20100101", periods=6))
    result = df.groupby(df.index).shift(periods=-2, freq="D")
    expected = DataFrame(data, index=date_range(start="2009-12-30", periods=6))
    tm.assert_frame_equal(result, expected)


def test_shift_deprecate_freq_and_fill_value():
    # GH 53832
    data = {"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]}
    df = DataFrame(data, index=date_range(start="20100101", periods=6))
    msg = (
        "Passing a 'freq' together with a 'fill_value' silently ignores the fill_value"
    )
    with tm.assert_produces_warning(FutureWarning, match=msg):
        df.groupby(df.index).shift(periods=-2, freq="D", fill_value="1")


def test_shift_disallow_suffix_if_periods_is_int():
    # GH#44424
    data = {"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]}
    df = DataFrame(data)
    msg = "Cannot specify `suffix` if `periods` is an int."
    with pytest.raises(ValueError, match=msg):
        df.groupby("b").shift(1, suffix="fails")


def test_group_shift_with_multiple_periods():
    # GH#44424
    df = DataFrame({"a": [1, 2, 3, 3, 2], "b": [True, True, False, False, True]})

    shifted_df = df.groupby("b")[["a"]].shift([0, 1])
    expected_df = DataFrame(
        {"a_0": [1, 2, 3, 3, 2], "a_1": [np.nan, 1.0, np.nan, 3.0, 2.0]}
    )
    tm.assert_frame_equal(shifted_df, expected_df)

    # series
    shifted_series = df.groupby("b")["a"].shift([0, 1])
    tm.assert_frame_equal(shifted_series, expected_df)


def test_group_shift_with_multiple_periods_and_freq():
    # GH#44424
    df = DataFrame(
        {"a": [1, 2, 3, 4, 5], "b": [True, True, False, False, True]},
        index=date_range("1/1/2000", periods=5, freq="H"),
    )
    shifted_df = df.groupby("b")[["a"]].shift(
        [0, 1],
        freq="H",
    )
    expected_df = DataFrame(
        {
            "a_0": [1.0, 2.0, 3.0, 4.0, 5.0, np.nan],
            "a_1": [
                np.nan,
                1.0,
                2.0,
                3.0,
                4.0,
                5.0,
            ],
        },
        index=date_range("1/1/2000", periods=6, freq="H"),
    )
    tm.assert_frame_equal(shifted_df, expected_df)


def test_group_shift_with_multiple_periods_and_fill_value():
    # GH#44424
    df = DataFrame(
        {"a": [1, 2, 3, 4, 5], "b": [True, True, False, False, True]},
    )
    shifted_df = df.groupby("b")[["a"]].shift([0, 1], fill_value=-1)
    expected_df = DataFrame(
        {"a_0": [1, 2, 3, 4, 5], "a_1": [-1, 1, -1, 3, 2]},
    )
    tm.assert_frame_equal(shifted_df, expected_df)


def test_group_shift_with_multiple_periods_and_both_fill_and_freq_deprecated():
    # GH#44424
    df = DataFrame(
        {"a": [1, 2, 3, 4, 5], "b": [True, True, False, False, True]},
        index=date_range("1/1/2000", periods=5, freq="H"),
    )
    msg = (
        "Passing a 'freq' together with a 'fill_value' silently ignores the "
        "fill_value"
    )
    with tm.assert_produces_warning(FutureWarning, match=msg):
        df.groupby("b")[["a"]].shift([1, 2], fill_value=1, freq="H")
