SongZihuan
/
SpringFocus
peilaus alkaen https://github.com/SongZihuan/SpringFocus.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985
							import datetime

import dateutil
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import Categorical, DataFrame, Series, Timestamp, date_range
import pandas._testing as tm
from pandas.tests.frame.common import _check_mixed_float


class TestDataFrameMissingData:
    def test_dropEmptyRows(self, float_frame):
        N = len(float_frame.index)
        mat = np.random.randn(N)
        mat[:5] = np.nan

        frame = DataFrame({"foo": mat}, index=float_frame.index)
        original = Series(mat, index=float_frame.index, name="foo")
        expected = original.dropna()
        inplace_frame1, inplace_frame2 = frame.copy(), frame.copy()

        smaller_frame = frame.dropna(how="all")
        # check that original was preserved
        tm.assert_series_equal(frame["foo"], original)
        inplace_frame1.dropna(how="all", inplace=True)
        tm.assert_series_equal(smaller_frame["foo"], expected)
        tm.assert_series_equal(inplace_frame1["foo"], expected)

        smaller_frame = frame.dropna(how="all", subset=["foo"])
        inplace_frame2.dropna(how="all", subset=["foo"], inplace=True)
        tm.assert_series_equal(smaller_frame["foo"], expected)
        tm.assert_series_equal(inplace_frame2["foo"], expected)

    def test_dropIncompleteRows(self, float_frame):
        N = len(float_frame.index)
        mat = np.random.randn(N)
        mat[:5] = np.nan

        frame = DataFrame({"foo": mat}, index=float_frame.index)
        frame["bar"] = 5
        original = Series(mat, index=float_frame.index, name="foo")
        inp_frame1, inp_frame2 = frame.copy(), frame.copy()

        smaller_frame = frame.dropna()
        tm.assert_series_equal(frame["foo"], original)
        inp_frame1.dropna(inplace=True)

        exp = Series(mat[5:], index=float_frame.index[5:], name="foo")
        tm.assert_series_equal(smaller_frame["foo"], exp)
        tm.assert_series_equal(inp_frame1["foo"], exp)

        samesize_frame = frame.dropna(subset=["bar"])
        tm.assert_series_equal(frame["foo"], original)
        assert (frame["bar"] == 5).all()
        inp_frame2.dropna(subset=["bar"], inplace=True)
        tm.assert_index_equal(samesize_frame.index, float_frame.index)
        tm.assert_index_equal(inp_frame2.index, float_frame.index)

    def test_dropna(self):
        df = DataFrame(np.random.randn(6, 4))
        df[2][:2] = np.nan

        dropped = df.dropna(axis=1)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, inplace=True)
        tm.assert_frame_equal(dropped, expected)
        tm.assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0)
        expected = df.loc[list(range(2, 6))]
        inp = df.copy()
        inp.dropna(axis=0, inplace=True)
        tm.assert_frame_equal(dropped, expected)
        tm.assert_frame_equal(inp, expected)

        # threshold
        dropped = df.dropna(axis=1, thresh=5)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, thresh=5, inplace=True)
        tm.assert_frame_equal(dropped, expected)
        tm.assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0, thresh=4)
        expected = df.loc[range(2, 6)]
        inp = df.copy()
        inp.dropna(axis=0, thresh=4, inplace=True)
        tm.assert_frame_equal(dropped, expected)
        tm.assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=1, thresh=4)
        tm.assert_frame_equal(dropped, df)

        dropped = df.dropna(axis=1, thresh=3)
        tm.assert_frame_equal(dropped, df)

        # subset
        dropped = df.dropna(axis=0, subset=[0, 1, 3])
        inp = df.copy()
        inp.dropna(axis=0, subset=[0, 1, 3], inplace=True)
        tm.assert_frame_equal(dropped, df)
        tm.assert_frame_equal(inp, df)

        # all
        dropped = df.dropna(axis=1, how="all")
        tm.assert_frame_equal(dropped, df)

        df[2] = np.nan
        dropped = df.dropna(axis=1, how="all")
        expected = df.loc[:, [0, 1, 3]]
        tm.assert_frame_equal(dropped, expected)

        # bad input
        msg = "No axis named 3 for object type <class 'pandas.core.frame.DataFrame'>"
        with pytest.raises(ValueError, match=msg):
            df.dropna(axis=3)

    def test_drop_and_dropna_caching(self):
        # tst that cacher updates
        original = Series([1, 2, np.nan], name="A")
        expected = Series([1, 2], dtype=original.dtype, name="A")
        df = pd.DataFrame({"A": original.values.copy()})
        df2 = df.copy()
        df["A"].dropna()
        tm.assert_series_equal(df["A"], original)
        df["A"].dropna(inplace=True)
        tm.assert_series_equal(df["A"], expected)
        df2["A"].drop([1])
        tm.assert_series_equal(df2["A"], original)
        df2["A"].drop([1], inplace=True)
        tm.assert_series_equal(df2["A"], original.drop([1]))

    def test_dropna_corner(self, float_frame):
        # bad input
        msg = "invalid how option: foo"
        with pytest.raises(ValueError, match=msg):
            float_frame.dropna(how="foo")
        msg = "must specify how or thresh"
        with pytest.raises(TypeError, match=msg):
            float_frame.dropna(how=None)
        # non-existent column - 8303
        with pytest.raises(KeyError, match=r"^\['X'\]$"):
            float_frame.dropna(subset=["A", "X"])

    def test_dropna_multiple_axes(self):
        df = DataFrame(
            [
                [1, np.nan, 2, 3],
                [4, np.nan, 5, 6],
                [np.nan, np.nan, np.nan, np.nan],
                [7, np.nan, 8, 9],
            ]
        )

        # GH20987
        with pytest.raises(TypeError, match="supplying multiple axes"):
            df.dropna(how="all", axis=[0, 1])
        with pytest.raises(TypeError, match="supplying multiple axes"):
            df.dropna(how="all", axis=(0, 1))

        inp = df.copy()
        with pytest.raises(TypeError, match="supplying multiple axes"):
            inp.dropna(how="all", axis=(0, 1), inplace=True)

    def test_dropna_tz_aware_datetime(self):
        # GH13407
        df = DataFrame()
        dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc())
        dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc())
        df["Time"] = [dt1]
        result = df.dropna(axis=0)
        expected = DataFrame({"Time": [dt1]})
        tm.assert_frame_equal(result, expected)

        # Ex2
        df = DataFrame({"Time": [dt1, None, np.nan, dt2]})
        result = df.dropna(axis=0)
        expected = DataFrame([dt1, dt2], columns=["Time"], index=[0, 3])
        tm.assert_frame_equal(result, expected)

    def test_dropna_categorical_interval_index(self):
        # GH 25087
        ii = pd.IntervalIndex.from_breaks([0, 2.78, 3.14, 6.28])
        ci = pd.CategoricalIndex(ii)
        df = pd.DataFrame({"A": list("abc")}, index=ci)

        expected = df
        result = df.dropna()
        tm.assert_frame_equal(result, expected)

    def test_fillna_datetime(self, datetime_frame):
        tf = datetime_frame
        tf.loc[tf.index[:5], "A"] = np.nan
        tf.loc[tf.index[-5:], "A"] = np.nan

        zero_filled = datetime_frame.fillna(0)
        assert (zero_filled.loc[zero_filled.index[:5], "A"] == 0).all()

        padded = datetime_frame.fillna(method="pad")
        assert np.isnan(padded.loc[padded.index[:5], "A"]).all()
        assert (
            padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"]
        ).all()

        msg = "Must specify a fill 'value' or 'method'"
        with pytest.raises(ValueError, match=msg):
            datetime_frame.fillna()
        msg = "Cannot specify both 'value' and 'method'"
        with pytest.raises(ValueError, match=msg):
            datetime_frame.fillna(5, method="ffill")

    def test_fillna_mixed_type(self, float_string_frame):

        mf = float_string_frame
        mf.loc[mf.index[5:20], "foo"] = np.nan
        mf.loc[mf.index[-10:], "A"] = np.nan
        # TODO: make stronger assertion here, GH 25640
        mf.fillna(value=0)
        mf.fillna(method="pad")

    def test_fillna_mixed_float(self, mixed_float_frame):

        # mixed numeric (but no float16)
        mf = mixed_float_frame.reindex(columns=["A", "B", "D"])
        mf.loc[mf.index[-10:], "A"] = np.nan
        result = mf.fillna(value=0)
        _check_mixed_float(result, dtype=dict(C=None))

        result = mf.fillna(method="pad")
        _check_mixed_float(result, dtype=dict(C=None))

    def test_fillna_empty(self):
        # empty frame (GH #2778)
        df = DataFrame(columns=["x"])
        for m in ["pad", "backfill"]:
            df.x.fillna(method=m, inplace=True)
            df.x.fillna(method=m)

    def test_fillna_different_dtype(self):
        # with different dtype (GH#3386)
        df = DataFrame(
            [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
        )

        result = df.fillna({2: "foo"})
        expected = DataFrame(
            [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
        )
        tm.assert_frame_equal(result, expected)

        df.fillna({2: "foo"}, inplace=True)
        tm.assert_frame_equal(df, expected)

    def test_fillna_limit_and_value(self):
        # limit and value
        df = DataFrame(np.random.randn(10, 3))
        df.iloc[2:7, 0] = np.nan
        df.iloc[3:5, 2] = np.nan

        expected = df.copy()
        expected.iloc[2, 0] = 999
        expected.iloc[3, 2] = 999
        result = df.fillna(999, limit=1)
        tm.assert_frame_equal(result, expected)

    def test_fillna_datelike(self):
        # with datelike
        # GH#6344
        df = DataFrame(
            {
                "Date": [pd.NaT, Timestamp("2014-1-1")],
                "Date2": [Timestamp("2013-1-1"), pd.NaT],
            }
        )

        expected = df.copy()
        expected["Date"] = expected["Date"].fillna(df.loc[df.index[0], "Date2"])
        result = df.fillna(value={"Date": df["Date2"]})
        tm.assert_frame_equal(result, expected)

    def test_fillna_tzaware(self):
        # with timezone
        # GH#15855
        df = pd.DataFrame({"A": [pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT]})
        exp = pd.DataFrame(
            {
                "A": [
                    pd.Timestamp("2012-11-11 00:00:00+01:00"),
                    pd.Timestamp("2012-11-11 00:00:00+01:00"),
                ]
            }
        )
        tm.assert_frame_equal(df.fillna(method="pad"), exp)

        df = pd.DataFrame({"A": [pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")]})
        exp = pd.DataFrame(
            {
                "A": [
                    pd.Timestamp("2012-11-11 00:00:00+01:00"),
                    pd.Timestamp("2012-11-11 00:00:00+01:00"),
                ]
            }
        )
        tm.assert_frame_equal(df.fillna(method="bfill"), exp)

    def test_fillna_tzaware_different_column(self):
        # with timezone in another column
        # GH#15522
        df = pd.DataFrame(
            {
                "A": pd.date_range("20130101", periods=4, tz="US/Eastern"),
                "B": [1, 2, np.nan, np.nan],
            }
        )
        result = df.fillna(method="pad")
        expected = pd.DataFrame(
            {
                "A": pd.date_range("20130101", periods=4, tz="US/Eastern"),
                "B": [1.0, 2.0, 2.0, 2.0],
            }
        )
        tm.assert_frame_equal(result, expected)

    def test_na_actions_categorical(self):

        cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
        vals = ["a", "b", np.nan, "d"]
        df = DataFrame({"cats": cat, "vals": vals})
        cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3])
        vals2 = ["a", "b", "b", "d"]
        df_exp_fill = DataFrame({"cats": cat2, "vals": vals2})
        cat3 = Categorical([1, 2, 3], categories=[1, 2, 3])
        vals3 = ["a", "b", np.nan]
        df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3})
        cat4 = Categorical([1, 2], categories=[1, 2, 3])
        vals4 = ["a", "b"]
        df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4})

        # fillna
        res = df.fillna(value={"cats": 3, "vals": "b"})
        tm.assert_frame_equal(res, df_exp_fill)

        with pytest.raises(ValueError, match=("fill value must be in categories")):
            df.fillna(value={"cats": 4, "vals": "c"})

        res = df.fillna(method="pad")
        tm.assert_frame_equal(res, df_exp_fill)

        # dropna
        res = df.dropna(subset=["cats"])
        tm.assert_frame_equal(res, df_exp_drop_cats)

        res = df.dropna()
        tm.assert_frame_equal(res, df_exp_drop_all)

        # make sure that fillna takes missing values into account
        c = Categorical([np.nan, "b", np.nan], categories=["a", "b"])
        df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]})

        cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"])
        df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]})

        res = df.fillna("a")
        tm.assert_frame_equal(res, df_exp)

    def test_fillna_categorical_nan(self):
        # GH 14021
        # np.nan should always be a valid filler
        cat = Categorical([np.nan, 2, np.nan])
        val = Categorical([np.nan, np.nan, np.nan])
        df = DataFrame({"cats": cat, "vals": val})
        with tm.assert_produces_warning(RuntimeWarning):
            res = df.fillna(df.median())
        v_exp = [np.nan, np.nan, np.nan]
        df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category")
        tm.assert_frame_equal(res, df_exp)

        result = df.cats.fillna(np.nan)
        tm.assert_series_equal(result, df.cats)

        result = df.vals.fillna(np.nan)
        tm.assert_series_equal(result, df.vals)

        idx = pd.DatetimeIndex(
            ["2011-01-01 09:00", "2016-01-01 23:45", "2011-01-01 09:00", pd.NaT, pd.NaT]
        )
        df = DataFrame({"a": Categorical(idx)})
        tm.assert_frame_equal(df.fillna(value=pd.NaT), df)

        idx = pd.PeriodIndex(
            ["2011-01", "2011-01", "2011-01", pd.NaT, pd.NaT], freq="M"
        )
        df = DataFrame({"a": Categorical(idx)})
        tm.assert_frame_equal(df.fillna(value=pd.NaT), df)

        idx = pd.TimedeltaIndex(["1 days", "2 days", "1 days", pd.NaT, pd.NaT])
        df = DataFrame({"a": Categorical(idx)})
        tm.assert_frame_equal(df.fillna(value=pd.NaT), df)

    def test_fillna_downcast(self):
        # GH 15277
        # infer int64 from float64
        df = pd.DataFrame({"a": [1.0, np.nan]})
        result = df.fillna(0, downcast="infer")
        expected = pd.DataFrame({"a": [1, 0]})
        tm.assert_frame_equal(result, expected)

        # infer int64 from float64 when fillna value is a dict
        df = pd.DataFrame({"a": [1.0, np.nan]})
        result = df.fillna({"a": 0}, downcast="infer")
        expected = pd.DataFrame({"a": [1, 0]})
        tm.assert_frame_equal(result, expected)

    def test_fillna_dtype_conversion(self):
        # make sure that fillna on an empty frame works
        df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
        result = df.dtypes
        expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5])
        tm.assert_series_equal(result, expected)

        result = df.fillna(1)
        expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
        tm.assert_frame_equal(result, expected)

        # empty block
        df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
        result = df.fillna("nan")
        expected = DataFrame("nan", index=range(3), columns=["A", "B"])
        tm.assert_frame_equal(result, expected)

        # equiv of replace
        df = DataFrame(dict(A=[1, np.nan], B=[1.0, 2.0]))
        for v in ["", 1, np.nan, 1.0]:
            expected = df.replace(np.nan, v)
            result = df.fillna(v)
            tm.assert_frame_equal(result, expected)

    def test_fillna_datetime_columns(self):
        # GH 7095
        df = pd.DataFrame(
            {
                "A": [-1, -2, np.nan],
                "B": date_range("20130101", periods=3),
                "C": ["foo", "bar", None],
                "D": ["foo2", "bar2", None],
            },
            index=date_range("20130110", periods=3),
        )
        result = df.fillna("?")
        expected = pd.DataFrame(
            {
                "A": [-1, -2, "?"],
                "B": date_range("20130101", periods=3),
                "C": ["foo", "bar", "?"],
                "D": ["foo2", "bar2", "?"],
            },
            index=date_range("20130110", periods=3),
        )
        tm.assert_frame_equal(result, expected)

        df = pd.DataFrame(
            {
                "A": [-1, -2, np.nan],
                "B": [pd.Timestamp("2013-01-01"), pd.Timestamp("2013-01-02"), pd.NaT],
                "C": ["foo", "bar", None],
                "D": ["foo2", "bar2", None],
            },
            index=date_range("20130110", periods=3),
        )
        result = df.fillna("?")
        expected = pd.DataFrame(
            {
                "A": [-1, -2, "?"],
                "B": [pd.Timestamp("2013-01-01"), pd.Timestamp("2013-01-02"), "?"],
                "C": ["foo", "bar", "?"],
                "D": ["foo2", "bar2", "?"],
            },
            index=pd.date_range("20130110", periods=3),
        )
        tm.assert_frame_equal(result, expected)

    def test_ffill(self, datetime_frame):
        datetime_frame["A"][:5] = np.nan
        datetime_frame["A"][-5:] = np.nan

        tm.assert_frame_equal(
            datetime_frame.ffill(), datetime_frame.fillna(method="ffill")
        )

    def test_bfill(self, datetime_frame):
        datetime_frame["A"][:5] = np.nan
        datetime_frame["A"][-5:] = np.nan

        tm.assert_frame_equal(
            datetime_frame.bfill(), datetime_frame.fillna(method="bfill")
        )

    def test_frame_pad_backfill_limit(self):
        index = np.arange(10)
        df = DataFrame(np.random.randn(10, 4), index=index)

        result = df[:2].reindex(index, method="pad", limit=5)

        expected = df[:2].reindex(index).fillna(method="pad")
        expected.values[-3:] = np.nan
        tm.assert_frame_equal(result, expected)

        result = df[-2:].reindex(index, method="backfill", limit=5)

        expected = df[-2:].reindex(index).fillna(method="backfill")
        expected.values[:3] = np.nan
        tm.assert_frame_equal(result, expected)

    def test_frame_fillna_limit(self):
        index = np.arange(10)
        df = DataFrame(np.random.randn(10, 4), index=index)

        result = df[:2].reindex(index)
        result = result.fillna(method="pad", limit=5)

        expected = df[:2].reindex(index).fillna(method="pad")
        expected.values[-3:] = np.nan
        tm.assert_frame_equal(result, expected)

        result = df[-2:].reindex(index)
        result = result.fillna(method="backfill", limit=5)

        expected = df[-2:].reindex(index).fillna(method="backfill")
        expected.values[:3] = np.nan
        tm.assert_frame_equal(result, expected)

    def test_fillna_skip_certain_blocks(self):
        # don't try to fill boolean, int blocks

        df = DataFrame(np.random.randn(10, 4).astype(int))

        # it works!
        df.fillna(np.nan)

    @pytest.mark.parametrize("type", [int, float])
    def test_fillna_positive_limit(self, type):
        df = DataFrame(np.random.randn(10, 4)).astype(type)

        msg = "Limit must be greater than 0"
        with pytest.raises(ValueError, match=msg):
            df.fillna(0, limit=-5)

    @pytest.mark.parametrize("type", [int, float])
    def test_fillna_integer_limit(self, type):
        df = DataFrame(np.random.randn(10, 4)).astype(type)

        msg = "Limit must be an integer"
        with pytest.raises(ValueError, match=msg):
            df.fillna(0, limit=0.5)

    def test_fillna_inplace(self):
        df = DataFrame(np.random.randn(10, 4))
        df[1][:4] = np.nan
        df[3][-4:] = np.nan

        expected = df.fillna(value=0)
        assert expected is not df

        df.fillna(value=0, inplace=True)
        tm.assert_frame_equal(df, expected)

        expected = df.fillna(value={0: 0}, inplace=True)
        assert expected is None

        df[1][:4] = np.nan
        df[3][-4:] = np.nan
        expected = df.fillna(method="ffill")
        assert expected is not df

        df.fillna(method="ffill", inplace=True)
        tm.assert_frame_equal(df, expected)

    def test_fillna_dict_series(self):
        df = DataFrame(
            {
                "a": [np.nan, 1, 2, np.nan, np.nan],
                "b": [1, 2, 3, np.nan, np.nan],
                "c": [np.nan, 1, 2, 3, 4],
            }
        )

        result = df.fillna({"a": 0, "b": 5})

        expected = df.copy()
        expected["a"] = expected["a"].fillna(0)
        expected["b"] = expected["b"].fillna(5)
        tm.assert_frame_equal(result, expected)

        # it works
        result = df.fillna({"a": 0, "b": 5, "d": 7})

        # Series treated same as dict
        result = df.fillna(df.max())
        expected = df.fillna(df.max().to_dict())
        tm.assert_frame_equal(result, expected)

        # disable this for now
        with pytest.raises(NotImplementedError, match="column by column"):
            df.fillna(df.max(1), axis=1)

    def test_fillna_dataframe(self):
        # GH 8377
        df = DataFrame(
            {
                "a": [np.nan, 1, 2, np.nan, np.nan],
                "b": [1, 2, 3, np.nan, np.nan],
                "c": [np.nan, 1, 2, 3, 4],
            },
            index=list("VWXYZ"),
        )

        # df2 may have different index and columns
        df2 = DataFrame(
            {
                "a": [np.nan, 10, 20, 30, 40],
                "b": [50, 60, 70, 80, 90],
                "foo": ["bar"] * 5,
            },
            index=list("VWXuZ"),
        )

        result = df.fillna(df2)

        # only those columns and indices which are shared get filled
        expected = DataFrame(
            {
                "a": [np.nan, 1, 2, np.nan, 40],
                "b": [1, 2, 3, np.nan, 90],
                "c": [np.nan, 1, 2, 3, 4],
            },
            index=list("VWXYZ"),
        )

        tm.assert_frame_equal(result, expected)

    def test_fillna_columns(self):
        df = DataFrame(np.random.randn(10, 10))
        df.values[:, ::2] = np.nan

        result = df.fillna(method="ffill", axis=1)
        expected = df.T.fillna(method="pad").T
        tm.assert_frame_equal(result, expected)

        df.insert(6, "foo", 5)
        result = df.fillna(method="ffill", axis=1)
        expected = df.astype(float).fillna(method="ffill", axis=1)
        tm.assert_frame_equal(result, expected)

    def test_fillna_invalid_method(self, float_frame):
        with pytest.raises(ValueError, match="ffil"):
            float_frame.fillna(method="ffil")

    def test_fillna_invalid_value(self, float_frame):
        # list
        msg = '"value" parameter must be a scalar or dict, but you passed a "{}"'
        with pytest.raises(TypeError, match=msg.format("list")):
            float_frame.fillna([1, 2])
        # tuple
        with pytest.raises(TypeError, match=msg.format("tuple")):
            float_frame.fillna((1, 2))
        # frame with series
        msg = (
            '"value" parameter must be a scalar, dict or Series, but you'
            ' passed a "DataFrame"'
        )
        with pytest.raises(TypeError, match=msg):
            float_frame.iloc[:, 0].fillna(float_frame)

    def test_fillna_col_reordering(self):
        cols = ["COL." + str(i) for i in range(5, 0, -1)]
        data = np.random.rand(20, 5)
        df = DataFrame(index=range(20), columns=cols, data=data)
        filled = df.fillna(method="ffill")
        assert df.columns.tolist() == filled.columns.tolist()

    def test_fill_corner(self, float_frame, float_string_frame):
        mf = float_string_frame
        mf.loc[mf.index[5:20], "foo"] = np.nan
        mf.loc[mf.index[-10:], "A"] = np.nan

        filled = float_string_frame.fillna(value=0)
        assert (filled.loc[filled.index[5:20], "foo"] == 0).all()
        del float_string_frame["foo"]

        empty_float = float_frame.reindex(columns=[])

        # TODO(wesm): unused?
        result = empty_float.fillna(value=0)  # noqa

    def test_fill_value_when_combine_const(self):
        # GH12723
        dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float")
        df = DataFrame({"foo": dat}, index=range(6))

        exp = df.fillna(0).add(2)
        res = df.add(2, fill_value=0)
        tm.assert_frame_equal(res, exp)


class TestDataFrameInterpolate:
    def test_interp_basic(self):
        df = DataFrame(
            {
                "A": [1, 2, np.nan, 4],
                "B": [1, 4, 9, np.nan],
                "C": [1, 2, 3, 5],
                "D": list("abcd"),
            }
        )
        expected = DataFrame(
            {
                "A": [1.0, 2.0, 3.0, 4.0],
                "B": [1.0, 4.0, 9.0, 9.0],
                "C": [1, 2, 3, 5],
                "D": list("abcd"),
            }
        )
        result = df.interpolate()
        tm.assert_frame_equal(result, expected)

        result = df.set_index("C").interpolate()
        expected = df.set_index("C")
        expected.loc[3, "A"] = 3
        expected.loc[5, "B"] = 9
        tm.assert_frame_equal(result, expected)

    def test_interp_bad_method(self):
        df = DataFrame(
            {
                "A": [1, 2, np.nan, 4],
                "B": [1, 4, 9, np.nan],
                "C": [1, 2, 3, 5],
                "D": list("abcd"),
            }
        )
        with pytest.raises(ValueError):
            df.interpolate(method="not_a_method")

    def test_interp_combo(self):
        df = DataFrame(
            {
                "A": [1.0, 2.0, np.nan, 4.0],
                "B": [1, 4, 9, np.nan],
                "C": [1, 2, 3, 5],
                "D": list("abcd"),
            }
        )

        result = df["A"].interpolate()
        expected = Series([1.0, 2.0, 3.0, 4.0], name="A")
        tm.assert_series_equal(result, expected)

        result = df["A"].interpolate(downcast="infer")
        expected = Series([1, 2, 3, 4], name="A")
        tm.assert_series_equal(result, expected)

    def test_interp_nan_idx(self):
        df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]})
        df = df.set_index("A")
        with pytest.raises(NotImplementedError):
            df.interpolate(method="values")

    @td.skip_if_no_scipy
    def test_interp_various(self):
        df = DataFrame(
            {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]}
        )
        df = df.set_index("C")
        expected = df.copy()
        result = df.interpolate(method="polynomial", order=1)

        expected.A.loc[3] = 2.66666667
        expected.A.loc[13] = 5.76923076
        tm.assert_frame_equal(result, expected)

        result = df.interpolate(method="cubic")
        # GH #15662.
        expected.A.loc[3] = 2.81547781
        expected.A.loc[13] = 5.52964175
        tm.assert_frame_equal(result, expected)

        result = df.interpolate(method="nearest")
        expected.A.loc[3] = 2
        expected.A.loc[13] = 5
        tm.assert_frame_equal(result, expected, check_dtype=False)

        result = df.interpolate(method="quadratic")
        expected.A.loc[3] = 2.82150771
        expected.A.loc[13] = 6.12648668
        tm.assert_frame_equal(result, expected)

        result = df.interpolate(method="slinear")
        expected.A.loc[3] = 2.66666667
        expected.A.loc[13] = 5.76923077
        tm.assert_frame_equal(result, expected)

        result = df.interpolate(method="zero")
        expected.A.loc[3] = 2.0
        expected.A.loc[13] = 5
        tm.assert_frame_equal(result, expected, check_dtype=False)

    @td.skip_if_no_scipy
    def test_interp_alt_scipy(self):
        df = DataFrame(
            {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]}
        )
        result = df.interpolate(method="barycentric")
        expected = df.copy()
        expected.loc[2, "A"] = 3
        expected.loc[5, "A"] = 6
        tm.assert_frame_equal(result, expected)

        result = df.interpolate(method="barycentric", downcast="infer")
        tm.assert_frame_equal(result, expected.astype(np.int64))

        result = df.interpolate(method="krogh")
        expectedk = df.copy()
        expectedk["A"] = expected["A"]
        tm.assert_frame_equal(result, expectedk)

        result = df.interpolate(method="pchip")
        expected.loc[2, "A"] = 3
        expected.loc[5, "A"] = 6.0

        tm.assert_frame_equal(result, expected)

    def test_interp_rowwise(self):
        df = DataFrame(
            {
                0: [1, 2, np.nan, 4],
                1: [2, 3, 4, np.nan],
                2: [np.nan, 4, 5, 6],
                3: [4, np.nan, 6, 7],
                4: [1, 2, 3, 4],
            }
        )
        result = df.interpolate(axis=1)
        expected = df.copy()
        expected.loc[3, 1] = 5
        expected.loc[0, 2] = 3
        expected.loc[1, 3] = 3
        expected[4] = expected[4].astype(np.float64)
        tm.assert_frame_equal(result, expected)

        result = df.interpolate(axis=1, method="values")
        tm.assert_frame_equal(result, expected)

        result = df.interpolate(axis=0)
        expected = df.interpolate()
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "axis_name, axis_number",
        [
            pytest.param("rows", 0, id="rows_0"),
            pytest.param("index", 0, id="index_0"),
            pytest.param("columns", 1, id="columns_1"),
        ],
    )
    def test_interp_axis_names(self, axis_name, axis_number):
        # GH 29132: test axis names
        data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]}

        df = DataFrame(data, dtype=np.float64)
        result = df.interpolate(axis=axis_name, method="linear")
        expected = df.interpolate(axis=axis_number, method="linear")
        tm.assert_frame_equal(result, expected)

    def test_rowwise_alt(self):
        df = DataFrame(
            {
                0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64],
                1: [1, 2, 3, 4, 3, 2, 1, 0, -1],
            }
        )
        df.interpolate(axis=0)

    @pytest.mark.parametrize(
        "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
    )
    def test_interp_leading_nans(self, check_scipy):
        df = DataFrame(
            {"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]}
        )
        result = df.interpolate()
        expected = df.copy()
        expected["B"].loc[3] = -3.75
        tm.assert_frame_equal(result, expected)

        if check_scipy:
            result = df.interpolate(method="polynomial", order=1)
            tm.assert_frame_equal(result, expected)

    def test_interp_raise_on_only_mixed(self):
        df = DataFrame(
            {
                "A": [1, 2, np.nan, 4],
                "B": ["a", "b", "c", "d"],
                "C": [np.nan, 2, 5, 7],
                "D": [np.nan, np.nan, 9, 9],
                "E": [1, 2, 3, 4],
            }
        )
        with pytest.raises(TypeError):
            df.interpolate(axis=1)

    def test_interp_raise_on_all_object_dtype(self):
        # GH 22985
        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object")
        msg = (
            "Cannot interpolate with all object-dtype columns "
            "in the DataFrame. Try setting at least one "
            "column to a numeric dtype."
        )
        with pytest.raises(TypeError, match=msg):
            df.interpolate()

    def test_interp_inplace(self):
        df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]})
        expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]})
        result = df.copy()
        result["a"].interpolate(inplace=True)
        tm.assert_frame_equal(result, expected)

        result = df.copy()
        result["a"].interpolate(inplace=True, downcast="infer")
        tm.assert_frame_equal(result, expected.astype("int64"))

    def test_interp_inplace_row(self):
        # GH 10395
        result = DataFrame(
            {"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]}
        )
        expected = result.interpolate(method="linear", axis=1, inplace=False)
        result.interpolate(method="linear", axis=1, inplace=True)
        tm.assert_frame_equal(result, expected)

    def test_interp_ignore_all_good(self):
        # GH
        df = DataFrame(
            {
                "A": [1, 2, np.nan, 4],
                "B": [1, 2, 3, 4],
                "C": [1.0, 2.0, np.nan, 4.0],
                "D": [1.0, 2.0, 3.0, 4.0],
            }
        )
        expected = DataFrame(
            {
                "A": np.array([1, 2, 3, 4], dtype="float64"),
                "B": np.array([1, 2, 3, 4], dtype="int64"),
                "C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"),
                "D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"),
            }
        )

        result = df.interpolate(downcast=None)
        tm.assert_frame_equal(result, expected)

        # all good
        result = df[["B", "D"]].interpolate(downcast=None)
        tm.assert_frame_equal(result, df[["B", "D"]])

    @pytest.mark.parametrize("axis", [0, 1])
    def test_interp_time_inplace_axis(self, axis):
        # GH 9687
        periods = 5
        idx = pd.date_range(start="2014-01-01", periods=periods)
        data = np.random.rand(periods, periods)
        data[data < 0.5] = np.nan
        expected = pd.DataFrame(index=idx, columns=idx, data=data)

        result = expected.interpolate(axis=0, method="time")
        expected.interpolate(axis=0, method="time", inplace=True)
        tm.assert_frame_equal(result, expected)