from datetime import datetime import numpy as np import pytest import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range import pandas._testing as tm class TestDataFrameConcatCommon: def test_concat_multiple_frames_dtypes(self): # GH 2759 A = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64) B = DataFrame(data=np.ones((10, 2)), dtype=np.float32) results = pd.concat((A, B), axis=1).dtypes expected = Series( [np.dtype("float64")] * 2 + [np.dtype("float32")] * 2, index=["foo", "bar", 0, 1], ) tm.assert_series_equal(results, expected) @pytest.mark.parametrize( "data", [ pd.date_range("2000", periods=4), pd.date_range("2000", periods=4, tz="US/Central"), pd.period_range("2000", periods=4), pd.timedelta_range(0, periods=4), ], ) def test_combine_datetlike_udf(self, data): # https://github.com/pandas-dev/pandas/issues/23079 df = pd.DataFrame({"A": data}) other = df.copy() df.iloc[1, 0] = None def combiner(a, b): return b result = df.combine(other, combiner) tm.assert_frame_equal(result, other) def test_concat_multiple_tzs(self): # GH 12467 # combining datetime tz-aware and naive DataFrames ts1 = Timestamp("2015-01-01", tz=None) ts2 = Timestamp("2015-01-01", tz="UTC") ts3 = Timestamp("2015-01-01", tz="EST") df1 = DataFrame(dict(time=[ts1])) df2 = DataFrame(dict(time=[ts2])) df3 = DataFrame(dict(time=[ts3])) results = pd.concat([df1, df2]).reset_index(drop=True) expected = DataFrame(dict(time=[ts1, ts2]), dtype=object) tm.assert_frame_equal(results, expected) results = pd.concat([df1, df3]).reset_index(drop=True) expected = DataFrame(dict(time=[ts1, ts3]), dtype=object) tm.assert_frame_equal(results, expected) results = pd.concat([df2, df3]).reset_index(drop=True) expected = DataFrame(dict(time=[ts2, ts3])) tm.assert_frame_equal(results, expected) @pytest.mark.parametrize( "t1", [ "2015-01-01", pytest.param( pd.NaT, marks=pytest.mark.xfail( reason="GH23037 incorrect dtype when concatenating" ), ), ], ) def test_concat_tz_NaT(self, t1): # GH 22796 # Concating tz-aware multicolumn DataFrames ts1 = Timestamp(t1, tz="UTC") ts2 = Timestamp("2015-01-01", tz="UTC") ts3 = Timestamp("2015-01-01", tz="UTC") df1 = DataFrame([[ts1, ts2]]) df2 = DataFrame([[ts3]]) result = pd.concat([df1, df2]) expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0]) tm.assert_frame_equal(result, expected) def test_concat_tz_not_aligned(self): # GH 22796 ts = pd.to_datetime([1, 2]).tz_localize("UTC") a = pd.DataFrame({"A": ts}) b = pd.DataFrame({"A": ts, "B": ts}) result = pd.concat([a, b], sort=True, ignore_index=True) expected = pd.DataFrame( {"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)} ) tm.assert_frame_equal(result, expected) def test_concat_tuple_keys(self): # GH 14438 df1 = pd.DataFrame(np.ones((2, 2)), columns=list("AB")) df2 = pd.DataFrame(np.ones((3, 2)) * 2, columns=list("AB")) results = pd.concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")]) expected = pd.DataFrame( { "A": { ("bee", "bah", 0): 1.0, ("bee", "bah", 1): 1.0, ("bee", "boo", 0): 2.0, ("bee", "boo", 1): 2.0, ("bee", "boo", 2): 2.0, }, "B": { ("bee", "bah", 0): 1.0, ("bee", "bah", 1): 1.0, ("bee", "boo", 0): 2.0, ("bee", "boo", 1): 2.0, ("bee", "boo", 2): 2.0, }, } ) tm.assert_frame_equal(results, expected) def test_update(self): df = DataFrame( [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] ) other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) df.update(other) expected = DataFrame( [[1.5, np.nan, 3], [3.6, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] ) tm.assert_frame_equal(df, expected) def test_update_dtypes(self): # gh 3016 df = DataFrame( [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], columns=["A", "B", "bool1", "bool2"], ) other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) df.update(other) expected = DataFrame( [[45.0, 45.0, False, True], [4.0, 5.0, True, False]], columns=["A", "B", "bool1", "bool2"], ) tm.assert_frame_equal(df, expected) def test_update_nooverwrite(self): df = DataFrame( [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] ) other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) df.update(other, overwrite=False) expected = DataFrame( [[1.5, np.nan, 3], [1.5, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 3.0]] ) tm.assert_frame_equal(df, expected) def test_update_filtered(self): df = DataFrame( [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] ) other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) df.update(other, filter_func=lambda x: x > 2) expected = DataFrame( [[1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] ) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "bad_kwarg, exception, msg", [ # errors must be 'ignore' or 'raise' ({"errors": "something"}, ValueError, "The parameter errors must.*"), ({"join": "inner"}, NotImplementedError, "Only left join is supported"), ], ) def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg): df = DataFrame([[1.5, 1, 3.0]]) with pytest.raises(exception, match=msg): df.update(df, **bad_kwarg) def test_update_raise_on_overlap(self): df = DataFrame( [[1.5, 1, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] ) other = DataFrame([[2.0, np.nan], [np.nan, 7]], index=[1, 3], columns=[1, 2]) with pytest.raises(ValueError, match="Data overlaps"): df.update(other, errors="raise") def test_update_from_non_df(self): d = {"a": Series([1, 2, 3, 4]), "b": Series([5, 6, 7, 8])} df = DataFrame(d) d["a"] = Series([5, 6, 7, 8]) df.update(d) expected = DataFrame(d) tm.assert_frame_equal(df, expected) d = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} df = DataFrame(d) d["a"] = [5, 6, 7, 8] df.update(d) expected = DataFrame(d) tm.assert_frame_equal(df, expected) def test_update_datetime_tz(self): # GH 25807 result = DataFrame([pd.Timestamp("2019", tz="UTC")]) result.update(result) expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) tm.assert_frame_equal(result, expected) def test_join_str_datetime(self): str_dates = ["20120209", "20120222"] dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] A = DataFrame(str_dates, index=range(2), columns=["aa"]) C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) tst = A.join(C, on="aa") assert len(tst.columns) == 3 def test_join_multiindex_leftright(self): # GH 10741 df1 = pd.DataFrame( [ ["a", "x", 0.471780], ["a", "y", 0.774908], ["a", "z", 0.563634], ["b", "x", -0.353756], ["b", "y", 0.368062], ["b", "z", -1.721840], ["c", "x", 1], ["c", "y", 2], ["c", "z", 3], ], columns=["first", "second", "value1"], ).set_index(["first", "second"]) df2 = pd.DataFrame( [["a", 10], ["b", 20]], columns=["first", "value2"] ).set_index(["first"]) exp = pd.DataFrame( [ [0.471780, 10], [0.774908, 10], [0.563634, 10], [-0.353756, 20], [0.368062, 20], [-1.721840, 20], [1.000000, np.nan], [2.000000, np.nan], [3.000000, np.nan], ], index=df1.index, columns=["value1", "value2"], ) # these must be the same results (but columns are flipped) tm.assert_frame_equal(df1.join(df2, how="left"), exp) tm.assert_frame_equal(df2.join(df1, how="right"), exp[["value2", "value1"]]) exp_idx = pd.MultiIndex.from_product( [["a", "b"], ["x", "y", "z"]], names=["first", "second"] ) exp = pd.DataFrame( [ [0.471780, 10], [0.774908, 10], [0.563634, 10], [-0.353756, 20], [0.368062, 20], [-1.721840, 20], ], index=exp_idx, columns=["value1", "value2"], ) tm.assert_frame_equal(df1.join(df2, how="right"), exp) tm.assert_frame_equal(df2.join(df1, how="left"), exp[["value2", "value1"]]) def test_concat_named_keys(self): # GH 14252 df = pd.DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]}) index = Index(["a", "b"], name="baz") concatted_named_from_keys = pd.concat([df, df], keys=index) expected_named = pd.DataFrame( {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=["baz", None]), ) tm.assert_frame_equal(concatted_named_from_keys, expected_named) index_no_name = Index(["a", "b"], name=None) concatted_named_from_names = pd.concat( [df, df], keys=index_no_name, names=["baz"] ) tm.assert_frame_equal(concatted_named_from_names, expected_named) concatted_unnamed = pd.concat([df, df], keys=index_no_name) expected_unnamed = pd.DataFrame( {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=[None, None]), ) tm.assert_frame_equal(concatted_unnamed, expected_unnamed) def test_concat_axis_parameter(self): # GH 14369 df1 = pd.DataFrame({"A": [0.1, 0.2]}, index=range(2)) df2 = pd.DataFrame({"A": [0.3, 0.4]}, index=range(2)) # Index/row/0 DataFrame expected_index = pd.DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1]) concatted_index = pd.concat([df1, df2], axis="index") tm.assert_frame_equal(concatted_index, expected_index) concatted_row = pd.concat([df1, df2], axis="rows") tm.assert_frame_equal(concatted_row, expected_index) concatted_0 = pd.concat([df1, df2], axis=0) tm.assert_frame_equal(concatted_0, expected_index) # Columns/1 DataFrame expected_columns = pd.DataFrame( [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=["A", "A"] ) concatted_columns = pd.concat([df1, df2], axis="columns") tm.assert_frame_equal(concatted_columns, expected_columns) concatted_1 = pd.concat([df1, df2], axis=1) tm.assert_frame_equal(concatted_1, expected_columns) series1 = pd.Series([0.1, 0.2]) series2 = pd.Series([0.3, 0.4]) # Index/row/0 Series expected_index_series = pd.Series([0.1, 0.2, 0.3, 0.4], index=[0, 1, 0, 1]) concatted_index_series = pd.concat([series1, series2], axis="index") tm.assert_series_equal(concatted_index_series, expected_index_series) concatted_row_series = pd.concat([series1, series2], axis="rows") tm.assert_series_equal(concatted_row_series, expected_index_series) concatted_0_series = pd.concat([series1, series2], axis=0) tm.assert_series_equal(concatted_0_series, expected_index_series) # Columns/1 Series expected_columns_series = pd.DataFrame( [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1] ) concatted_columns_series = pd.concat([series1, series2], axis="columns") tm.assert_frame_equal(concatted_columns_series, expected_columns_series) concatted_1_series = pd.concat([series1, series2], axis=1) tm.assert_frame_equal(concatted_1_series, expected_columns_series) # Testing ValueError with pytest.raises(ValueError, match="No axis named"): pd.concat([series1, series2], axis="something") def test_concat_numerical_names(self): # #15262 # #12223 df = pd.DataFrame( {"col": range(9)}, dtype="int32", index=( pd.MultiIndex.from_product( [["A0", "A1", "A2"], ["B0", "B1", "B2"]], names=[1, 2] ) ), ) result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :])) expected = pd.DataFrame( {"col": [0, 1, 7, 8]}, dtype="int32", index=pd.MultiIndex.from_tuples( [("A0", "B0"), ("A0", "B1"), ("A2", "B1"), ("A2", "B2")], names=[1, 2] ), ) tm.assert_frame_equal(result, expected) def test_concat_astype_dup_col(self): # gh 23049 df = pd.DataFrame([{"a": "b"}]) df = pd.concat([df, df], axis=1) result = df.astype("category") expected = pd.DataFrame( np.array(["b", "b"]).reshape(1, 2), columns=["a", "a"] ).astype("category") tm.assert_frame_equal(result, expected) class TestDataFrameCombineFirst: def test_combine_first_mixed(self): a = Series(["a", "b"], index=range(2)) b = Series(range(2), index=range(2)) f = DataFrame({"A": a, "B": b}) a = Series(["a", "b"], index=range(5, 7)) b = Series(range(2), index=range(5, 7)) g = DataFrame({"A": a, "B": b}) exp = pd.DataFrame( {"A": list("abab"), "B": [0.0, 1.0, 0.0, 1.0]}, index=[0, 1, 5, 6] ) combined = f.combine_first(g) tm.assert_frame_equal(combined, exp) def test_combine_first(self, float_frame): # disjoint head, tail = float_frame[:5], float_frame[5:] combined = head.combine_first(tail) reordered_frame = float_frame.reindex(combined.index) tm.assert_frame_equal(combined, reordered_frame) assert tm.equalContents(combined.columns, float_frame.columns) tm.assert_series_equal(combined["A"], reordered_frame["A"]) # same index fcopy = float_frame.copy() fcopy["A"] = 1 del fcopy["C"] fcopy2 = float_frame.copy() fcopy2["B"] = 0 del fcopy2["D"] combined = fcopy.combine_first(fcopy2) assert (combined["A"] == 1).all() tm.assert_series_equal(combined["B"], fcopy["B"]) tm.assert_series_equal(combined["C"], fcopy2["C"]) tm.assert_series_equal(combined["D"], fcopy["D"]) # overlap head, tail = reordered_frame[:10].copy(), reordered_frame head["A"] = 1 combined = head.combine_first(tail) assert (combined["A"][:10] == 1).all() # reverse overlap tail["A"][:10] = 0 combined = tail.combine_first(head) assert (combined["A"][:10] == 0).all() # no overlap f = float_frame[:10] g = float_frame[10:] combined = f.combine_first(g) tm.assert_series_equal(combined["A"].reindex(f.index), f["A"]) tm.assert_series_equal(combined["A"].reindex(g.index), g["A"]) # corner cases comb = float_frame.combine_first(DataFrame()) tm.assert_frame_equal(comb, float_frame) comb = DataFrame().combine_first(float_frame) tm.assert_frame_equal(comb, float_frame) comb = float_frame.combine_first(DataFrame(index=["faz", "boo"])) assert "faz" in comb.index # #2525 df = DataFrame({"a": [1]}, index=[datetime(2012, 1, 1)]) df2 = DataFrame(columns=["b"]) result = df.combine_first(df2) assert "b" in result def test_combine_first_mixed_bug(self): idx = Index(["a", "b", "c", "e"]) ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) ser2 = Series(["a", "b", "c", "e"], index=idx) ser3 = Series([12, 4, 5, 97], index=idx) frame1 = DataFrame({"col0": ser1, "col2": ser2, "col3": ser3}) idx = Index(["a", "b", "c", "f"]) ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) ser2 = Series(["a", "b", "c", "f"], index=idx) ser3 = Series([12, 4, 5, 97], index=idx) frame2 = DataFrame({"col1": ser1, "col2": ser2, "col5": ser3}) combined = frame1.combine_first(frame2) assert len(combined.columns) == 5 # gh 3016 (same as in update) df = DataFrame( [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], columns=["A", "B", "bool1", "bool2"], ) other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) result = df.combine_first(other) tm.assert_frame_equal(result, df) df.loc[0, "A"] = np.nan result = df.combine_first(other) df.loc[0, "A"] = 45 tm.assert_frame_equal(result, df) # doc example df1 = DataFrame( {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]} ) df2 = DataFrame( { "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0], "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0], } ) result = df1.combine_first(df2) expected = DataFrame({"A": [1, 2, 3, 5, 3, 7.0], "B": [np.nan, 2, 3, 4, 6, 8]}) tm.assert_frame_equal(result, expected) # GH3552, return object dtype with bools df1 = DataFrame( [[np.nan, 3.0, True], [-4.6, np.nan, True], [np.nan, 7.0, False]] ) df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2]) result = df1.combine_first(df2)[2] expected = Series([True, True, False], name=2) tm.assert_series_equal(result, expected) # GH 3593, converting datetime64[ns] incorrectly df0 = DataFrame( {"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]} ) df1 = DataFrame({"a": [None, None, None]}) df2 = df1.combine_first(df0) tm.assert_frame_equal(df2, df0) df2 = df0.combine_first(df1) tm.assert_frame_equal(df2, df0) df0 = DataFrame( {"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]} ) df1 = DataFrame({"a": [datetime(2000, 1, 2), None, None]}) df2 = df1.combine_first(df0) result = df0.copy() result.iloc[0, :] = df1.iloc[0, :] tm.assert_frame_equal(df2, result) df2 = df0.combine_first(df1) tm.assert_frame_equal(df2, df0) def test_combine_first_align_nan(self): # GH 7509 (not fixed) dfa = pd.DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) dfb = pd.DataFrame([[4], [5]], columns=["b"]) assert dfa["a"].dtype == "datetime64[ns]" assert dfa["b"].dtype == "int64" res = dfa.combine_first(dfb) exp = pd.DataFrame( {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2.0, 5.0]}, columns=["a", "b"], ) tm.assert_frame_equal(res, exp) assert res["a"].dtype == "datetime64[ns]" # ToDo: this must be int64 assert res["b"].dtype == "float64" res = dfa.iloc[:0].combine_first(dfb) exp = pd.DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"]) tm.assert_frame_equal(res, exp) # ToDo: this must be datetime64 assert res["a"].dtype == "float64" # ToDo: this must be int64 assert res["b"].dtype == "int64" def test_combine_first_timezone(self): # see gh-7630 data1 = pd.to_datetime("20100101 01:01").tz_localize("UTC") df1 = pd.DataFrame( columns=["UTCdatetime", "abc"], data=data1, index=pd.date_range("20140627", periods=1), ) data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC") df2 = pd.DataFrame( columns=["UTCdatetime", "xyz"], data=data2, index=pd.date_range("20140628", periods=1), ) res = df2[["UTCdatetime"]].combine_first(df1) exp = pd.DataFrame( { "UTCdatetime": [ pd.Timestamp("2010-01-01 01:01", tz="UTC"), pd.Timestamp("2012-12-12 12:12", tz="UTC"), ], "abc": [pd.Timestamp("2010-01-01 01:01:00", tz="UTC"), pd.NaT], }, columns=["UTCdatetime", "abc"], index=pd.date_range("20140627", periods=2, freq="D"), ) tm.assert_frame_equal(res, exp) assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]" assert res["abc"].dtype == "datetime64[ns, UTC]" # see gh-10567 dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC") df1 = pd.DataFrame({"DATE": dts1}) dts2 = pd.date_range("2015-01-03", "2015-01-05", tz="UTC") df2 = pd.DataFrame({"DATE": dts2}) res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) assert res["DATE"].dtype == "datetime64[ns, UTC]" dts1 = pd.DatetimeIndex( ["2011-01-01", "NaT", "2011-01-03", "2011-01-04"], tz="US/Eastern" ) df1 = pd.DataFrame({"DATE": dts1}, index=[1, 3, 5, 7]) dts2 = pd.DatetimeIndex( ["2012-01-01", "2012-01-02", "2012-01-03"], tz="US/Eastern" ) df2 = pd.DataFrame({"DATE": dts2}, index=[2, 4, 5]) res = df1.combine_first(df2) exp_dts = pd.DatetimeIndex( [ "2011-01-01", "2012-01-01", "NaT", "2012-01-02", "2011-01-03", "2011-01-04", ], tz="US/Eastern", ) exp = pd.DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) # different tz dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="US/Eastern") df1 = pd.DataFrame({"DATE": dts1}) dts2 = pd.date_range("2015-01-03", "2015-01-05") df2 = pd.DataFrame({"DATE": dts2}) # if df1 doesn't have NaN, keep its dtype res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) assert res["DATE"].dtype == "datetime64[ns, US/Eastern]" dts1 = pd.date_range("2015-01-01", "2015-01-02", tz="US/Eastern") df1 = pd.DataFrame({"DATE": dts1}) dts2 = pd.date_range("2015-01-01", "2015-01-03") df2 = pd.DataFrame({"DATE": dts2}) res = df1.combine_first(df2) exp_dts = [ pd.Timestamp("2015-01-01", tz="US/Eastern"), pd.Timestamp("2015-01-02", tz="US/Eastern"), pd.Timestamp("2015-01-03"), ] exp = pd.DataFrame({"DATE": exp_dts}) tm.assert_frame_equal(res, exp) assert res["DATE"].dtype == "object" def test_combine_first_timedelta(self): data1 = pd.TimedeltaIndex(["1 day", "NaT", "3 day", "4day"]) df1 = pd.DataFrame({"TD": data1}, index=[1, 3, 5, 7]) data2 = pd.TimedeltaIndex(["10 day", "11 day", "12 day"]) df2 = pd.DataFrame({"TD": data2}, index=[2, 4, 5]) res = df1.combine_first(df2) exp_dts = pd.TimedeltaIndex( ["1 day", "10 day", "NaT", "11 day", "3 day", "4 day"] ) exp = pd.DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) assert res["TD"].dtype == "timedelta64[ns]" def test_combine_first_period(self): data1 = pd.PeriodIndex(["2011-01", "NaT", "2011-03", "2011-04"], freq="M") df1 = pd.DataFrame({"P": data1}, index=[1, 3, 5, 7]) data2 = pd.PeriodIndex(["2012-01-01", "2012-02", "2012-03"], freq="M") df2 = pd.DataFrame({"P": data2}, index=[2, 4, 5]) res = df1.combine_first(df2) exp_dts = pd.PeriodIndex( ["2011-01", "2012-01", "NaT", "2012-02", "2011-03", "2011-04"], freq="M" ) exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) assert res["P"].dtype == data1.dtype # different freq dts2 = pd.PeriodIndex(["2012-01-01", "2012-01-02", "2012-01-03"], freq="D") df2 = pd.DataFrame({"P": dts2}, index=[2, 4, 5]) res = df1.combine_first(df2) exp_dts = [ pd.Period("2011-01", freq="M"), pd.Period("2012-01-01", freq="D"), pd.NaT, pd.Period("2012-01-02", freq="D"), pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M"), ] exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) assert res["P"].dtype == "object" def test_combine_first_int(self): # GH14687 - integer series that do no align exactly df1 = pd.DataFrame({"a": [0, 1, 3, 5]}, dtype="int64") df2 = pd.DataFrame({"a": [1, 4]}, dtype="int64") res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) assert res["a"].dtype == "int64" @pytest.mark.parametrize("val", [1, 1.0]) def test_combine_first_with_asymmetric_other(self, val): # see gh-20699 df1 = pd.DataFrame({"isNum": [val]}) df2 = pd.DataFrame({"isBool": [True]}) res = df1.combine_first(df2) exp = pd.DataFrame({"isBool": [True], "isNum": [val]}) tm.assert_frame_equal(res, exp) def test_concat_datetime_datetime64_frame(self): # #2624 rows = [] rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), "hi"]) df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) ind = date_range(start="2000/1/1", freq="D", periods=10) df1 = DataFrame({"date": ind, "test": range(10)}) # it works! pd.concat([df1, df2_obj]) class TestDataFrameUpdate: def test_update_nan(self): # #15593 #15617 # test 1 df1 = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) df2 = DataFrame({"A": [None, 2, 3]}) expected = df1.copy() df1.update(df2, overwrite=False) tm.assert_frame_equal(df1, expected) # test 2 df1 = DataFrame({"A": [1.0, None, 3], "B": date_range("2000", periods=3)}) df2 = DataFrame({"A": [None, 2, 3]}) expected = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) df1.update(df2, overwrite=False) tm.assert_frame_equal(df1, expected)