123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776 |
- from itertools import product
- import warnings
- import numpy as np
- import pandas as pd
- import matplotlib as mpl
- import matplotlib.pyplot as plt
- import pytest
- from .. import relational as rel
- from ..palettes import color_palette
- from ..utils import categorical_order
- class TestRelationalPlotter(object):
- def scatter_rgbs(self, collections):
- rgbs = []
- for col in collections:
- rgb = tuple(col.get_facecolor().squeeze()[:3])
- rgbs.append(rgb)
- return rgbs
- def colors_equal(self, *args):
- equal = True
- for c1, c2 in zip(*args):
- c1 = mpl.colors.colorConverter.to_rgb(np.squeeze(c1))
- c2 = mpl.colors.colorConverter.to_rgb(np.squeeze(c1))
- equal &= c1 == c2
- return equal
- def paths_equal(self, *args):
- equal = True
- for p1, p2 in zip(*args):
- equal &= np.array_equal(p1.vertices, p2.vertices)
- equal &= np.array_equal(p1.codes, p2.codes)
- return equal
- @pytest.fixture
- def wide_df(self):
- columns = list("abc")
- index = pd.Int64Index(np.arange(10, 50, 2), name="wide_index")
- values = np.random.randn(len(index), len(columns))
- return pd.DataFrame(values, index=index, columns=columns)
- @pytest.fixture
- def wide_array(self):
- return np.random.randn(20, 3)
- @pytest.fixture
- def flat_array(self):
- return np.random.randn(20)
- @pytest.fixture
- def flat_series(self):
- index = pd.Int64Index(np.arange(10, 30), name="t")
- return pd.Series(np.random.randn(20), index, name="s")
- @pytest.fixture
- def wide_list(self):
- return [np.random.randn(20), np.random.randn(10)]
- @pytest.fixture
- def wide_list_of_series(self):
- return [pd.Series(np.random.randn(20), np.arange(20), name="a"),
- pd.Series(np.random.randn(10), np.arange(5, 15), name="b")]
- @pytest.fixture
- def long_df(self):
- n = 100
- rs = np.random.RandomState()
- df = pd.DataFrame(dict(
- x=rs.randint(0, 20, n),
- y=rs.randn(n),
- a=np.take(list("abc"), rs.randint(0, 3, n)),
- b=np.take(list("mnop"), rs.randint(0, 4, n)),
- c=np.take(list([0, 1]), rs.randint(0, 2, n)),
- d=np.repeat(np.datetime64('2005-02-25'), n),
- s=np.take([2, 4, 8], rs.randint(0, 3, n)),
- f=np.take(list([0.2, 0.3]), rs.randint(0, 2, n)),
- ))
- df["s_cat"] = df["s"].astype("category")
- return df
- @pytest.fixture
- def repeated_df(self):
- n = 100
- rs = np.random.RandomState()
- return pd.DataFrame(dict(
- x=np.tile(np.arange(n // 2), 2),
- y=rs.randn(n),
- a=np.take(list("abc"), rs.randint(0, 3, n)),
- u=np.repeat(np.arange(2), n // 2),
- ))
- @pytest.fixture
- def missing_df(self):
- n = 100
- rs = np.random.RandomState()
- df = pd.DataFrame(dict(
- x=rs.randint(0, 20, n),
- y=rs.randn(n),
- a=np.take(list("abc"), rs.randint(0, 3, n)),
- b=np.take(list("mnop"), rs.randint(0, 4, n)),
- s=np.take([2, 4, 8], rs.randint(0, 3, n)),
- ))
- for col in df:
- idx = rs.permutation(df.index)[:10]
- df.loc[idx, col] = np.nan
- return df
- @pytest.fixture
- def null_column(self):
- return pd.Series(index=np.arange(20), dtype='float64')
- def test_wide_df_variables(self, wide_df):
- p = rel._RelationalPlotter()
- p.establish_variables(data=wide_df)
- assert p.input_format == "wide"
- assert p.semantics == ["x", "y", "hue", "style"]
- assert len(p.plot_data) == np.product(wide_df.shape)
- x = p.plot_data["x"]
- expected_x = np.tile(wide_df.index, wide_df.shape[1])
- assert np.array_equal(x, expected_x)
- y = p.plot_data["y"]
- expected_y = wide_df.values.ravel(order="f")
- assert np.array_equal(y, expected_y)
- hue = p.plot_data["hue"]
- expected_hue = np.repeat(wide_df.columns.values, wide_df.shape[0])
- assert np.array_equal(hue, expected_hue)
- style = p.plot_data["style"]
- expected_style = expected_hue
- assert np.array_equal(style, expected_style)
- assert p.plot_data["size"].isnull().all()
- assert p.x_label == wide_df.index.name
- assert p.y_label is None
- assert p.hue_label == wide_df.columns.name
- assert p.size_label is None
- assert p.style_label == wide_df.columns.name
- def test_wide_df_variables_check(self, wide_df):
- p = rel._RelationalPlotter()
- wide_df = wide_df.copy()
- wide_df.loc[:, "not_numeric"] = "a"
- with pytest.raises(ValueError):
- p.establish_variables(data=wide_df)
- def test_wide_array_variables(self, wide_array):
- p = rel._RelationalPlotter()
- p.establish_variables(data=wide_array)
- assert p.input_format == "wide"
- assert p.semantics == ["x", "y", "hue", "style"]
- assert len(p.plot_data) == np.product(wide_array.shape)
- nrow, ncol = wide_array.shape
- x = p.plot_data["x"]
- expected_x = np.tile(np.arange(nrow), ncol)
- assert np.array_equal(x, expected_x)
- y = p.plot_data["y"]
- expected_y = wide_array.ravel(order="f")
- assert np.array_equal(y, expected_y)
- hue = p.plot_data["hue"]
- expected_hue = np.repeat(np.arange(ncol), nrow)
- assert np.array_equal(hue, expected_hue)
- style = p.plot_data["style"]
- expected_style = expected_hue
- assert np.array_equal(style, expected_style)
- assert p.plot_data["size"].isnull().all()
- assert p.x_label is None
- assert p.y_label is None
- assert p.hue_label is None
- assert p.size_label is None
- assert p.style_label is None
- def test_flat_array_variables(self, flat_array):
- p = rel._RelationalPlotter()
- p.establish_variables(data=flat_array)
- assert p.input_format == "wide"
- assert p.semantics == ["x", "y"]
- assert len(p.plot_data) == np.product(flat_array.shape)
- x = p.plot_data["x"]
- expected_x = np.arange(flat_array.shape[0])
- assert np.array_equal(x, expected_x)
- y = p.plot_data["y"]
- expected_y = flat_array
- assert np.array_equal(y, expected_y)
- assert p.plot_data["hue"].isnull().all()
- assert p.plot_data["style"].isnull().all()
- assert p.plot_data["size"].isnull().all()
- assert p.x_label is None
- assert p.y_label is None
- assert p.hue_label is None
- assert p.size_label is None
- assert p.style_label is None
- def test_flat_series_variables(self, flat_series):
- p = rel._RelationalPlotter()
- p.establish_variables(data=flat_series)
- assert p.input_format == "wide"
- assert p.semantics == ["x", "y"]
- assert len(p.plot_data) == len(flat_series)
- x = p.plot_data["x"]
- expected_x = flat_series.index
- assert np.array_equal(x, expected_x)
- y = p.plot_data["y"]
- expected_y = flat_series
- assert np.array_equal(y, expected_y)
- assert p.x_label is None
- assert p.y_label is None
- assert p.hue_label is None
- assert p.size_label is None
- assert p.style_label is None
- def test_wide_list_variables(self, wide_list):
- p = rel._RelationalPlotter()
- p.establish_variables(data=wide_list)
- assert p.input_format == "wide"
- assert p.semantics == ["x", "y", "hue", "style"]
- assert len(p.plot_data) == sum(len(l) for l in wide_list)
- x = p.plot_data["x"]
- expected_x = np.concatenate([np.arange(len(l)) for l in wide_list])
- assert np.array_equal(x, expected_x)
- y = p.plot_data["y"]
- expected_y = np.concatenate(wide_list)
- assert np.array_equal(y, expected_y)
- hue = p.plot_data["hue"]
- expected_hue = np.concatenate([
- np.ones_like(l) * i for i, l in enumerate(wide_list)
- ])
- assert np.array_equal(hue, expected_hue)
- style = p.plot_data["style"]
- expected_style = expected_hue
- assert np.array_equal(style, expected_style)
- assert p.plot_data["size"].isnull().all()
- assert p.x_label is None
- assert p.y_label is None
- assert p.hue_label is None
- assert p.size_label is None
- assert p.style_label is None
- def test_wide_list_of_series_variables(self, wide_list_of_series):
- p = rel._RelationalPlotter()
- p.establish_variables(data=wide_list_of_series)
- assert p.input_format == "wide"
- assert p.semantics == ["x", "y", "hue", "style"]
- assert len(p.plot_data) == sum(len(l) for l in wide_list_of_series)
- x = p.plot_data["x"]
- expected_x = np.concatenate([s.index for s in wide_list_of_series])
- assert np.array_equal(x, expected_x)
- y = p.plot_data["y"]
- expected_y = np.concatenate(wide_list_of_series)
- assert np.array_equal(y, expected_y)
- hue = p.plot_data["hue"]
- expected_hue = np.concatenate([
- np.full(len(s), s.name, object) for s in wide_list_of_series
- ])
- assert np.array_equal(hue, expected_hue)
- style = p.plot_data["style"]
- expected_style = expected_hue
- assert np.array_equal(style, expected_style)
- assert p.plot_data["size"].isnull().all()
- assert p.x_label is None
- assert p.y_label is None
- assert p.hue_label is None
- assert p.size_label is None
- assert p.style_label is None
- def test_long_df(self, long_df):
- p = rel._RelationalPlotter()
- p.establish_variables(x="x", y="y", data=long_df)
- assert p.input_format == "long"
- assert p.semantics == ["x", "y"]
- assert np.array_equal(p.plot_data["x"], long_df["x"])
- assert np.array_equal(p.plot_data["y"], long_df["y"])
- for col in ["hue", "style", "size"]:
- assert p.plot_data[col].isnull().all()
- assert (p.x_label, p.y_label) == ("x", "y")
- assert p.hue_label is None
- assert p.size_label is None
- assert p.style_label is None
- p.establish_variables(x=long_df.x, y="y", data=long_df)
- assert p.semantics == ["x", "y"]
- assert np.array_equal(p.plot_data["x"], long_df["x"])
- assert np.array_equal(p.plot_data["y"], long_df["y"])
- assert (p.x_label, p.y_label) == ("x", "y")
- p.establish_variables(x="x", y=long_df.y, data=long_df)
- assert p.semantics == ["x", "y"]
- assert np.array_equal(p.plot_data["x"], long_df["x"])
- assert np.array_equal(p.plot_data["y"], long_df["y"])
- assert (p.x_label, p.y_label) == ("x", "y")
- p.establish_variables(x="x", y="y", hue="a", data=long_df)
- assert p.semantics == ["x", "y", "hue"]
- assert np.array_equal(p.plot_data["hue"], long_df["a"])
- for col in ["style", "size"]:
- assert p.plot_data[col].isnull().all()
- assert p.hue_label == "a"
- assert p.size_label is None and p.style_label is None
- p.establish_variables(x="x", y="y", hue="a", style="a", data=long_df)
- assert p.semantics == ["x", "y", "hue", "style"]
- assert np.array_equal(p.plot_data["hue"], long_df["a"])
- assert np.array_equal(p.plot_data["style"], long_df["a"])
- assert p.plot_data["size"].isnull().all()
- assert p.hue_label == p.style_label == "a"
- assert p.size_label is None
- p.establish_variables(x="x", y="y", hue="a", style="b", data=long_df)
- assert p.semantics == ["x", "y", "hue", "style"]
- assert np.array_equal(p.plot_data["hue"], long_df["a"])
- assert np.array_equal(p.plot_data["style"], long_df["b"])
- assert p.plot_data["size"].isnull().all()
- p.establish_variables(x="x", y="y", size="y", data=long_df)
- assert p.semantics == ["x", "y", "size"]
- assert np.array_equal(p.plot_data["size"], long_df["y"])
- assert p.size_label == "y"
- assert p.hue_label is None and p.style_label is None
- def test_bad_input(self, long_df):
- p = rel._RelationalPlotter()
- with pytest.raises(ValueError):
- p.establish_variables(x=long_df.x)
- with pytest.raises(ValueError):
- p.establish_variables(y=long_df.y)
- with pytest.raises(ValueError):
- p.establish_variables(x="not_in_df", data=long_df)
- with pytest.raises(ValueError):
- p.establish_variables(x="x", y="not_in_df", data=long_df)
- with pytest.raises(ValueError):
- p.establish_variables(x="x", y="not_in_df", data=long_df)
- def test_empty_input(self):
- p = rel._RelationalPlotter()
- p.establish_variables(data=[])
- p.establish_variables(data=np.array([]))
- p.establish_variables(data=pd.DataFrame())
- p.establish_variables(x=[], y=[])
- def test_units(self, repeated_df):
- p = rel._RelationalPlotter()
- p.establish_variables(x="x", y="y", units="u", data=repeated_df)
- assert np.array_equal(p.plot_data["units"], repeated_df["u"])
- def test_parse_hue_null(self, wide_df, null_column):
- p = rel._LinePlotter(data=wide_df)
- p.parse_hue(null_column, "Blues", None, None)
- assert p.hue_levels == [None]
- assert p.palette == {}
- assert p.hue_type is None
- assert p.cmap is None
- def test_parse_hue_categorical(self, wide_df, long_df):
- p = rel._LinePlotter(data=wide_df)
- assert p.hue_levels == wide_df.columns.tolist()
- assert p.hue_type == "categorical"
- assert p.cmap is None
- # Test named palette
- palette = "Blues"
- expected_colors = color_palette(palette, wide_df.shape[1])
- expected_palette = dict(zip(wide_df.columns, expected_colors))
- p.parse_hue(p.plot_data.hue, palette, None, None)
- assert p.palette == expected_palette
- # Test list palette
- palette = color_palette("Reds", wide_df.shape[1])
- p.parse_hue(p.plot_data.hue, palette, None, None)
- expected_palette = dict(zip(wide_df.columns, palette))
- assert p.palette == expected_palette
- # Test dict palette
- colors = color_palette("Set1", 8)
- palette = dict(zip(wide_df.columns, colors))
- p.parse_hue(p.plot_data.hue, palette, None, None)
- assert p.palette == palette
- # Test dict with missing keys
- palette = dict(zip(wide_df.columns[:-1], colors))
- with pytest.raises(ValueError):
- p.parse_hue(p.plot_data.hue, palette, None, None)
- # Test list with wrong number of colors
- palette = colors[:-1]
- with pytest.raises(ValueError):
- p.parse_hue(p.plot_data.hue, palette, None, None)
- # Test hue order
- hue_order = ["a", "c", "d"]
- p.parse_hue(p.plot_data.hue, None, hue_order, None)
- assert p.hue_levels == hue_order
- # Test long data
- p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df)
- assert p.hue_levels == categorical_order(long_df.a)
- assert p.hue_type == "categorical"
- assert p.cmap is None
- # Test default palette
- p.parse_hue(p.plot_data.hue, None, None, None)
- hue_levels = categorical_order(long_df.a)
- expected_colors = color_palette(n_colors=len(hue_levels))
- expected_palette = dict(zip(hue_levels, expected_colors))
- assert p.palette == expected_palette
- # Test default palette with many levels
- levels = pd.Series(list("abcdefghijklmnopqrstuvwxyz"))
- p.parse_hue(levels, None, None, None)
- expected_colors = color_palette("husl", n_colors=len(levels))
- expected_palette = dict(zip(levels, expected_colors))
- assert p.palette == expected_palette
- # Test binary data
- p = rel._LinePlotter(x="x", y="y", hue="c", data=long_df)
- assert p.hue_levels == [0, 1]
- assert p.hue_type == "categorical"
- df = long_df[long_df["c"] == 0]
- p = rel._LinePlotter(x="x", y="y", hue="c", data=df)
- assert p.hue_levels == [0]
- assert p.hue_type == "categorical"
- df = long_df[long_df["c"] == 1]
- p = rel._LinePlotter(x="x", y="y", hue="c", data=df)
- assert p.hue_levels == [1]
- assert p.hue_type == "categorical"
- # Test Timestamp data
- p = rel._LinePlotter(x="x", y="y", hue="d", data=long_df)
- assert p.hue_levels == [pd.Timestamp('2005-02-25')]
- assert p.hue_type == "categorical"
- # Test numeric data with category type
- p = rel._LinePlotter(x="x", y="y", hue="s_cat", data=long_df)
- assert p.hue_levels == categorical_order(long_df.s_cat)
- assert p.hue_type == "categorical"
- assert p.cmap is None
- # Test categorical palette specified for numeric data
- palette = "deep"
- p = rel._LinePlotter(x="x", y="y", hue="s",
- palette=palette, data=long_df)
- expected_colors = color_palette(palette, n_colors=len(levels))
- hue_levels = categorical_order(long_df["s"])
- expected_palette = dict(zip(hue_levels, expected_colors))
- assert p.palette == expected_palette
- assert p.hue_type == "categorical"
- def test_parse_hue_numeric(self, long_df):
- p = rel._LinePlotter(x="x", y="y", hue="s", data=long_df)
- hue_levels = list(np.sort(long_df.s.unique()))
- assert p.hue_levels == hue_levels
- assert p.hue_type == "numeric"
- assert p.cmap.name == "seaborn_cubehelix"
- # Test named colormap
- palette = "Purples"
- p.parse_hue(p.plot_data.hue, palette, None, None)
- assert p.cmap is mpl.cm.get_cmap(palette)
- # Test colormap object
- palette = mpl.cm.get_cmap("Greens")
- p.parse_hue(p.plot_data.hue, palette, None, None)
- assert p.cmap is palette
- # Test cubehelix shorthand
- palette = "ch:2,0,light=.2"
- p.parse_hue(p.plot_data.hue, palette, None, None)
- assert isinstance(p.cmap, mpl.colors.ListedColormap)
- # Test default hue limits
- p.parse_hue(p.plot_data.hue, None, None, None)
- assert p.hue_limits == (p.plot_data.hue.min(), p.plot_data.hue.max())
- # Test specified hue limits
- hue_norm = 1, 4
- p.parse_hue(p.plot_data.hue, None, None, hue_norm)
- assert p.hue_limits == hue_norm
- assert isinstance(p.hue_norm, mpl.colors.Normalize)
- assert p.hue_norm.vmin == hue_norm[0]
- assert p.hue_norm.vmax == hue_norm[1]
- # Test Normalize object
- hue_norm = mpl.colors.PowerNorm(2, vmin=1, vmax=10)
- p.parse_hue(p.plot_data.hue, None, None, hue_norm)
- assert p.hue_limits == (hue_norm.vmin, hue_norm.vmax)
- assert p.hue_norm is hue_norm
- # Test default colormap values
- hmin, hmax = p.plot_data.hue.min(), p.plot_data.hue.max()
- p.parse_hue(p.plot_data.hue, None, None, None)
- assert p.palette[hmin] == pytest.approx(p.cmap(0.0))
- assert p.palette[hmax] == pytest.approx(p.cmap(1.0))
- # Test specified colormap values
- hue_norm = hmin - 1, hmax - 1
- p.parse_hue(p.plot_data.hue, None, None, hue_norm)
- norm_min = (hmin - hue_norm[0]) / (hue_norm[1] - hue_norm[0])
- assert p.palette[hmin] == pytest.approx(p.cmap(norm_min))
- assert p.palette[hmax] == pytest.approx(p.cmap(1.0))
- # Test list of colors
- hue_levels = list(np.sort(long_df.s.unique()))
- palette = color_palette("Blues", len(hue_levels))
- p.parse_hue(p.plot_data.hue, palette, None, None)
- assert p.palette == dict(zip(hue_levels, palette))
- palette = color_palette("Blues", len(hue_levels) + 1)
- with pytest.raises(ValueError):
- p.parse_hue(p.plot_data.hue, palette, None, None)
- # Test dictionary of colors
- palette = dict(zip(hue_levels, color_palette("Reds")))
- p.parse_hue(p.plot_data.hue, palette, None, None)
- assert p.palette == palette
- palette.pop(hue_levels[0])
- with pytest.raises(ValueError):
- p.parse_hue(p.plot_data.hue, palette, None, None)
- # Test invalid palette
- palette = "not_a_valid_palette"
- with pytest.raises(ValueError):
- p.parse_hue(p.plot_data.hue, palette, None, None)
- # Test bad norm argument
- hue_norm = "not a norm"
- with pytest.raises(ValueError):
- p.parse_hue(p.plot_data.hue, None, None, hue_norm)
- def test_parse_size(self, long_df):
- p = rel._LinePlotter(x="x", y="y", size="s", data=long_df)
- # Test default size limits and range
- default_linewidth = mpl.rcParams["lines.linewidth"]
- default_limits = p.plot_data["size"].min(), p.plot_data["size"].max()
- default_range = .5 * default_linewidth, 2 * default_linewidth
- p.parse_size(p.plot_data["size"], None, None, None)
- assert p.size_limits == default_limits
- size_range = min(p.sizes.values()), max(p.sizes.values())
- assert size_range == default_range
- # Test specified size limits
- size_limits = (1, 5)
- p.parse_size(p.plot_data["size"], None, None, size_limits)
- assert p.size_limits == size_limits
- # Test specified size range
- sizes = (.1, .5)
- p.parse_size(p.plot_data["size"], sizes, None, None)
- assert p.size_limits == default_limits
- # Test size values with normalization range
- sizes = (1, 5)
- size_norm = (1, 10)
- p.parse_size(p.plot_data["size"], sizes, None, size_norm)
- normalize = mpl.colors.Normalize(*size_norm, clip=True)
- for level, width in p.sizes.items():
- assert width == sizes[0] + (sizes[1] - sizes[0]) * normalize(level)
- # Test size values with normalization object
- sizes = (1, 5)
- size_norm = mpl.colors.LogNorm(1, 10, clip=False)
- p.parse_size(p.plot_data["size"], sizes, None, size_norm)
- assert p.size_norm.clip
- for level, width in p.sizes.items():
- assert width == sizes[0] + (sizes[1] - sizes[0]) * size_norm(level)
- # Test specified size order
- var = "a"
- levels = long_df[var].unique()
- sizes = [1, 4, 6]
- size_order = [levels[1], levels[2], levels[0]]
- p = rel._LinePlotter(x="x", y="y", size=var, data=long_df)
- p.parse_size(p.plot_data["size"], sizes, size_order, None)
- assert p.sizes == dict(zip(size_order, sizes))
- # Test list of sizes
- var = "a"
- levels = categorical_order(long_df[var])
- sizes = list(np.random.rand(len(levels)))
- p = rel._LinePlotter(x="x", y="y", size=var, data=long_df)
- p.parse_size(p.plot_data["size"], sizes, None, None)
- assert p.sizes == dict(zip(levels, sizes))
- # Test dict of sizes
- var = "a"
- levels = categorical_order(long_df[var])
- sizes = dict(zip(levels, np.random.rand(len(levels))))
- p = rel._LinePlotter(x="x", y="y", size=var, data=long_df)
- p.parse_size(p.plot_data["size"], sizes, None, None)
- assert p.sizes == sizes
- # Test sizes list with wrong length
- sizes = list(np.random.rand(len(levels) + 1))
- with pytest.raises(ValueError):
- p.parse_size(p.plot_data["size"], sizes, None, None)
- # Test sizes dict with missing levels
- sizes = dict(zip(levels, np.random.rand(len(levels) - 1)))
- with pytest.raises(ValueError):
- p.parse_size(p.plot_data["size"], sizes, None, None)
- # Test bad sizes argument
- sizes = "bad_size"
- with pytest.raises(ValueError):
- p.parse_size(p.plot_data["size"], sizes, None, None)
- # Test bad norm argument
- size_norm = "not a norm"
- p = rel._LinePlotter(x="x", y="y", size="s", data=long_df)
- with pytest.raises(ValueError):
- p.parse_size(p.plot_data["size"], None, None, size_norm)
- def test_parse_style(self, long_df):
- p = rel._LinePlotter(x="x", y="y", style="a", data=long_df)
- # Test defaults
- markers, dashes = True, True
- p.parse_style(p.plot_data["style"], markers, dashes, None)
- assert p.markers == dict(zip(p.style_levels, p.default_markers))
- assert p.dashes == dict(zip(p.style_levels, p.default_dashes))
- # Test lists
- markers, dashes = ["o", "s", "d"], [(1, 0), (1, 1), (2, 1, 3, 1)]
- p.parse_style(p.plot_data["style"], markers, dashes, None)
- assert p.markers == dict(zip(p.style_levels, markers))
- assert p.dashes == dict(zip(p.style_levels, dashes))
- # Test dicts
- markers = dict(zip(p.style_levels, markers))
- dashes = dict(zip(p.style_levels, dashes))
- p.parse_style(p.plot_data["style"], markers, dashes, None)
- assert p.markers == markers
- assert p.dashes == dashes
- # Test style order with defaults
- style_order = np.take(p.style_levels, [1, 2, 0])
- markers = dashes = True
- p.parse_style(p.plot_data["style"], markers, dashes, style_order)
- assert p.markers == dict(zip(style_order, p.default_markers))
- assert p.dashes == dict(zip(style_order, p.default_dashes))
- # Test too many levels with style lists
- markers, dashes = ["o", "s"], False
- with pytest.raises(ValueError):
- p.parse_style(p.plot_data["style"], markers, dashes, None)
- markers, dashes = False, [(2, 1)]
- with pytest.raises(ValueError):
- p.parse_style(p.plot_data["style"], markers, dashes, None)
- # Test too many levels with style dicts
- markers, dashes = {"a": "o", "b": "s"}, False
- with pytest.raises(ValueError):
- p.parse_style(p.plot_data["style"], markers, dashes, None)
- markers, dashes = False, {"a": (1, 0), "b": (2, 1)}
- with pytest.raises(ValueError):
- p.parse_style(p.plot_data["style"], markers, dashes, None)
- # Test mixture of filled and unfilled markers
- markers, dashes = ["o", "x", "s"], None
- with pytest.raises(ValueError):
- p.parse_style(p.plot_data["style"], markers, dashes, None)
- def test_subset_data_quantities(self, long_df):
- p = rel._LinePlotter(x="x", y="y", data=long_df)
- assert len(list(p.subset_data())) == 1
- # --
- var = "a"
- n_subsets = len(long_df[var].unique())
- p = rel._LinePlotter(x="x", y="y", hue=var, data=long_df)
- assert len(list(p.subset_data())) == n_subsets
- p = rel._LinePlotter(x="x", y="y", style=var, data=long_df)
- assert len(list(p.subset_data())) == n_subsets
- n_subsets = len(long_df[var].unique())
- p = rel._LinePlotter(x="x", y="y", size=var, data=long_df)
- assert len(list(p.subset_data())) == n_subsets
- # --
- var = "a"
- n_subsets = len(long_df[var].unique())
- p = rel._LinePlotter(x="x", y="y", hue=var, style=var, data=long_df)
- assert len(list(p.subset_data())) == n_subsets
- # --
- var1, var2 = "a", "s"
- n_subsets = len(set(list(map(tuple, long_df[[var1, var2]].values))))
- p = rel._LinePlotter(x="x", y="y", hue=var1, style=var2,
- data=long_df)
- assert len(list(p.subset_data())) == n_subsets
- p = rel._LinePlotter(x="x", y="y", hue=var1, size=var2, style=var1,
- data=long_df)
- assert len(list(p.subset_data())) == n_subsets
- # --
- var1, var2, var3 = "a", "s", "b"
- cols = [var1, var2, var3]
- n_subsets = len(set(list(map(tuple, long_df[cols].values))))
- p = rel._LinePlotter(x="x", y="y", hue=var1, size=var2, style=var3,
- data=long_df)
- assert len(list(p.subset_data())) == n_subsets
- def test_subset_data_keys(self, long_df):
- p = rel._LinePlotter(x="x", y="y", data=long_df)
- for (hue, size, style), _ in p.subset_data():
- assert hue is None
- assert size is None
- assert style is None
- # --
- var = "a"
- p = rel._LinePlotter(x="x", y="y", hue=var, data=long_df)
- for (hue, size, style), _ in p.subset_data():
- assert hue in long_df[var].values
- assert size is None
- assert style is None
- p = rel._LinePlotter(x="x", y="y", style=var, data=long_df)
- for (hue, size, style), _ in p.subset_data():
- assert hue is None
- assert size is None
- assert style in long_df[var].values
- p = rel._LinePlotter(x="x", y="y", hue=var, style=var, data=long_df)
- for (hue, size, style), _ in p.subset_data():
- assert hue in long_df[var].values
- assert size is None
- assert style in long_df[var].values
- p = rel._LinePlotter(x="x", y="y", size=var, data=long_df)
- for (hue, size, style), _ in p.subset_data():
- assert hue is None
- assert size in long_df[var].values
- assert style is None
- # --
- var1, var2 = "a", "s"
- p = rel._LinePlotter(x="x", y="y", hue=var1, size=var2, data=long_df)
- for (hue, size, style), _ in p.subset_data():
- assert hue in long_df[var1].values
- assert size in long_df[var2].values
- assert style is None
- def test_subset_data_values(self, long_df):
- p = rel._LinePlotter(x="x", y="y", data=long_df)
- _, data = next(p.subset_data())
- expected = p.plot_data.loc[:, ["x", "y"]].sort_values(["x", "y"])
- assert np.array_equal(data.values, expected)
- p = rel._LinePlotter(x="x", y="y", data=long_df, sort=False)
- _, data = next(p.subset_data())
- expected = p.plot_data.loc[:, ["x", "y"]]
- assert np.array_equal(data.values, expected)
- p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df)
- for (hue, _, _), data in p.subset_data():
- rows = p.plot_data["hue"] == hue
- cols = ["x", "y"]
- expected = p.plot_data.loc[rows, cols].sort_values(cols)
- assert np.array_equal(data.values, expected.values)
- p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df, sort=False)
- for (hue, _, _), data in p.subset_data():
- rows = p.plot_data["hue"] == hue
- cols = ["x", "y"]
- expected = p.plot_data.loc[rows, cols]
- assert np.array_equal(data.values, expected.values)
- p = rel._LinePlotter(x="x", y="y", hue="a", style="a", data=long_df)
- for (hue, _, _), data in p.subset_data():
- rows = p.plot_data["hue"] == hue
- cols = ["x", "y"]
- expected = p.plot_data.loc[rows, cols].sort_values(cols)
- assert np.array_equal(data.values, expected.values)
- p = rel._LinePlotter(x="x", y="y", hue="a", size="s", data=long_df)
- for (hue, size, _), data in p.subset_data():
- rows = (p.plot_data["hue"] == hue) & (p.plot_data["size"] == size)
- cols = ["x", "y"]
- expected = p.plot_data.loc[rows, cols].sort_values(cols)
- assert np.array_equal(data.values, expected.values)
- class TestLinePlotter(TestRelationalPlotter):
- def test_aggregate(self, long_df):
- p = rel._LinePlotter(x="x", y="y", data=long_df)
- p.n_boot = 10000
- p.sort = False
- x = pd.Series(np.tile([1, 2], 100))
- y = pd.Series(np.random.randn(200))
- y_mean = y.groupby(x).mean()
- def sem(x):
- return np.std(x) / np.sqrt(len(x))
- y_sem = y.groupby(x).apply(sem)
- y_cis = pd.DataFrame(dict(low=y_mean - y_sem,
- high=y_mean + y_sem),
- columns=["low", "high"])
- p.ci = 68
- p.estimator = "mean"
- index, est, cis = p.aggregate(y, x)
- assert np.array_equal(index.values, x.unique())
- assert est.index.equals(index)
- assert est.values == pytest.approx(y_mean.values)
- assert cis.values == pytest.approx(y_cis.values, 4)
- assert list(cis.columns) == ["low", "high"]
- p.estimator = np.mean
- index, est, cis = p.aggregate(y, x)
- assert np.array_equal(index.values, x.unique())
- assert est.index.equals(index)
- assert est.values == pytest.approx(y_mean.values)
- assert cis.values == pytest.approx(y_cis.values, 4)
- assert list(cis.columns) == ["low", "high"]
- p.seed = 0
- _, _, ci1 = p.aggregate(y, x)
- _, _, ci2 = p.aggregate(y, x)
- assert np.array_equal(ci1, ci2)
- y_std = y.groupby(x).std()
- y_cis = pd.DataFrame(dict(low=y_mean - y_std,
- high=y_mean + y_std),
- columns=["low", "high"])
- p.ci = "sd"
- index, est, cis = p.aggregate(y, x)
- assert np.array_equal(index.values, x.unique())
- assert est.index.equals(index)
- assert est.values == pytest.approx(y_mean.values)
- assert cis.values == pytest.approx(y_cis.values)
- assert list(cis.columns) == ["low", "high"]
- p.ci = None
- index, est, cis = p.aggregate(y, x)
- assert cis is None
- p.ci = 68
- x, y = pd.Series([1, 2, 3]), pd.Series([4, 3, 2])
- index, est, cis = p.aggregate(y, x)
- assert np.array_equal(index.values, x)
- assert np.array_equal(est.values, y)
- assert cis is None
- x, y = pd.Series([1, 1, 2]), pd.Series([2, 3, 4])
- index, est, cis = p.aggregate(y, x)
- assert cis.loc[2].isnull().all()
- p = rel._LinePlotter(x="x", y="y", data=long_df)
- p.estimator = "mean"
- p.n_boot = 100
- p.ci = 95
- x = pd.Categorical(["a", "b", "a", "b"], ["a", "b", "c"])
- y = pd.Series([1, 1, 2, 2])
- with warnings.catch_warnings():
- warnings.simplefilter("error", RuntimeWarning)
- index, est, cis = p.aggregate(y, x)
- assert cis.loc[["c"]].isnull().all().all()
- def test_legend_data(self, long_df):
- f, ax = plt.subplots()
- p = rel._LinePlotter(x="x", y="y", data=long_df, legend="full")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert handles == []
- # --
- ax.clear()
- p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df,
- legend="full")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- colors = [h.get_color() for h in handles]
- assert labels == ["a"] + p.hue_levels
- assert colors == ["w"] + [p.palette[l] for l in p.hue_levels]
- # --
- ax.clear()
- p = rel._LinePlotter(x="x", y="y", hue="a", style="a",
- markers=True, legend="full", data=long_df)
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- colors = [h.get_color() for h in handles]
- markers = [h.get_marker() for h in handles]
- assert labels == ["a"] + p.hue_levels == ["a"] + p.style_levels
- assert colors == ["w"] + [p.palette[l] for l in p.hue_levels]
- assert markers == [""] + [p.markers[l] for l in p.style_levels]
- # --
- ax.clear()
- p = rel._LinePlotter(x="x", y="y", hue="a", style="b",
- markers=True, legend="full", data=long_df)
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- colors = [h.get_color() for h in handles]
- markers = [h.get_marker() for h in handles]
- expected_colors = (["w"] + [p.palette[l] for l in p.hue_levels]
- + ["w"] + [".2" for _ in p.style_levels])
- expected_markers = ([""] + ["None" for _ in p.hue_levels]
- + [""] + [p.markers[l] for l in p.style_levels])
- assert labels == ["a"] + p.hue_levels + ["b"] + p.style_levels
- assert colors == expected_colors
- assert markers == expected_markers
- # --
- ax.clear()
- p = rel._LinePlotter(x="x", y="y", hue="a", size="a", data=long_df,
- legend="full")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- colors = [h.get_color() for h in handles]
- widths = [h.get_linewidth() for h in handles]
- assert labels == ["a"] + p.hue_levels == ["a"] + p.size_levels
- assert colors == ["w"] + [p.palette[l] for l in p.hue_levels]
- assert widths == [0] + [p.sizes[l] for l in p.size_levels]
- # --
- x, y = np.random.randn(2, 40)
- z = np.tile(np.arange(20), 2)
- p = rel._LinePlotter(x=x, y=y, hue=z)
- ax.clear()
- p.legend = "full"
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert labels == [str(l) for l in p.hue_levels]
- ax.clear()
- p.legend = "brief"
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert len(labels) == 4
- p = rel._LinePlotter(x=x, y=y, size=z)
- ax.clear()
- p.legend = "full"
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert labels == [str(l) for l in p.size_levels]
- ax.clear()
- p.legend = "brief"
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert len(labels) == 4
- ax.clear()
- p.legend = "bad_value"
- with pytest.raises(ValueError):
- p.add_legend_data(ax)
- ax.clear()
- p = rel._LinePlotter(x=x, y=y, hue=z,
- hue_norm=mpl.colors.LogNorm(),
- legend="brief")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert float(labels[2]) / float(labels[1]) == 10
- ax.clear()
- p = rel._LinePlotter(x=x, y=y, size=z,
- size_norm=mpl.colors.LogNorm(),
- legend="brief")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert float(labels[2]) / float(labels[1]) == 10
- ax.clear()
- p = rel._LinePlotter(
- x="x", y="y", hue="f", legend="brief", data=long_df)
- p.add_legend_data(ax)
- expected_levels = ['0.20', '0.24', '0.28', '0.32']
- handles, labels = ax.get_legend_handles_labels()
- assert labels == ["f"] + expected_levels
- ax.clear()
- p = rel._LinePlotter(
- x="x", y="y", size="f", legend="brief", data=long_df)
- p.add_legend_data(ax)
- expected_levels = ['0.20', '0.24', '0.28', '0.32']
- handles, labels = ax.get_legend_handles_labels()
- assert labels == ["f"] + expected_levels
- def test_plot(self, long_df, repeated_df):
- f, ax = plt.subplots()
- p = rel._LinePlotter(x="x", y="y", data=long_df,
- sort=False, estimator=None)
- p.plot(ax, {})
- line, = ax.lines
- assert np.array_equal(line.get_xdata(), long_df.x.values)
- assert np.array_equal(line.get_ydata(), long_df.y.values)
- ax.clear()
- p.plot(ax, {"color": "k", "label": "test"})
- line, = ax.lines
- assert line.get_color() == "k"
- assert line.get_label() == "test"
- p = rel._LinePlotter(x="x", y="y", data=long_df,
- sort=True, estimator=None)
- ax.clear()
- p.plot(ax, {})
- line, = ax.lines
- sorted_data = long_df.sort_values(["x", "y"])
- assert np.array_equal(line.get_xdata(), sorted_data.x.values)
- assert np.array_equal(line.get_ydata(), sorted_data.y.values)
- p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df)
- ax.clear()
- p.plot(ax, {})
- assert len(ax.lines) == len(p.hue_levels)
- for line, level in zip(ax.lines, p.hue_levels):
- assert line.get_color() == p.palette[level]
- p = rel._LinePlotter(x="x", y="y", size="a", data=long_df)
- ax.clear()
- p.plot(ax, {})
- assert len(ax.lines) == len(p.size_levels)
- for line, level in zip(ax.lines, p.size_levels):
- assert line.get_linewidth() == p.sizes[level]
- p = rel._LinePlotter(x="x", y="y", hue="a", style="a",
- markers=True, data=long_df)
- ax.clear()
- p.plot(ax, {})
- assert len(ax.lines) == len(p.hue_levels) == len(p.style_levels)
- for line, level in zip(ax.lines, p.hue_levels):
- assert line.get_color() == p.palette[level]
- assert line.get_marker() == p.markers[level]
- p = rel._LinePlotter(x="x", y="y", hue="a", style="b",
- markers=True, data=long_df)
- ax.clear()
- p.plot(ax, {})
- levels = product(p.hue_levels, p.style_levels)
- assert len(ax.lines) == (len(p.hue_levels) * len(p.style_levels))
- for line, (hue, style) in zip(ax.lines, levels):
- assert line.get_color() == p.palette[hue]
- assert line.get_marker() == p.markers[style]
- p = rel._LinePlotter(x="x", y="y", data=long_df,
- estimator="mean", err_style="band", ci="sd",
- sort=True)
- ax.clear()
- p.plot(ax, {})
- line, = ax.lines
- expected_data = long_df.groupby("x").y.mean()
- assert np.array_equal(line.get_xdata(), expected_data.index.values)
- assert np.allclose(line.get_ydata(), expected_data.values)
- assert len(ax.collections) == 1
- p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df,
- estimator="mean", err_style="band", ci="sd")
- ax.clear()
- p.plot(ax, {})
- assert len(ax.lines) == len(ax.collections) == len(p.hue_levels)
- for c in ax.collections:
- assert isinstance(c, mpl.collections.PolyCollection)
- p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df,
- estimator="mean", err_style="bars", ci="sd")
- ax.clear()
- p.plot(ax, {})
- # assert len(ax.lines) / 2 == len(ax.collections) == len(p.hue_levels)
- # The lines are different on mpl 1.4 but I can't install to debug
- assert len(ax.collections) == len(p.hue_levels)
- for c in ax.collections:
- assert isinstance(c, mpl.collections.LineCollection)
- p = rel._LinePlotter(x="x", y="y", data=repeated_df,
- units="u", estimator=None)
- ax.clear()
- p.plot(ax, {})
- n_units = len(repeated_df["u"].unique())
- assert len(ax.lines) == n_units
- p = rel._LinePlotter(x="x", y="y", hue="a", data=repeated_df,
- units="u", estimator=None)
- ax.clear()
- p.plot(ax, {})
- n_units *= len(repeated_df["a"].unique())
- assert len(ax.lines) == n_units
- p.estimator = "mean"
- with pytest.raises(ValueError):
- p.plot(ax, {})
- p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df,
- err_style="band", err_kws={"alpha": .5})
- ax.clear()
- p.plot(ax, {})
- for band in ax.collections:
- assert band.get_alpha() == .5
- p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df,
- err_style="bars", err_kws={"elinewidth": 2})
- ax.clear()
- p.plot(ax, {})
- for lines in ax.collections:
- assert lines.get_linestyles() == 2
- p.err_style = "invalid"
- with pytest.raises(ValueError):
- p.plot(ax, {})
- x_str = long_df["x"].astype(str)
- p = rel._LinePlotter(x="x", y="y", hue=x_str, data=long_df)
- ax.clear()
- p.plot(ax, {})
- p = rel._LinePlotter(x="x", y="y", size=x_str, data=long_df)
- ax.clear()
- p.plot(ax, {})
- def test_axis_labels(self, long_df):
- f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
- p = rel._LinePlotter(x="x", y="y", data=long_df)
- p.plot(ax1, {})
- assert ax1.get_xlabel() == "x"
- assert ax1.get_ylabel() == "y"
- p.plot(ax2, {})
- assert ax2.get_xlabel() == "x"
- assert ax2.get_ylabel() == "y"
- assert not ax2.yaxis.label.get_visible()
- def test_lineplot_axes(self, wide_df):
- f1, ax1 = plt.subplots()
- f2, ax2 = plt.subplots()
- ax = rel.lineplot(data=wide_df)
- assert ax is ax2
- ax = rel.lineplot(data=wide_df, ax=ax1)
- assert ax is ax1
- def test_lineplot_smoke(self, flat_array, flat_series,
- wide_array, wide_list, wide_list_of_series,
- wide_df, long_df, missing_df):
- f, ax = plt.subplots()
- rel.lineplot([], [])
- ax.clear()
- rel.lineplot(data=flat_array)
- ax.clear()
- rel.lineplot(data=flat_series)
- ax.clear()
- rel.lineplot(data=wide_array)
- ax.clear()
- rel.lineplot(data=wide_list)
- ax.clear()
- rel.lineplot(data=wide_list_of_series)
- ax.clear()
- rel.lineplot(data=wide_df)
- ax.clear()
- rel.lineplot(x="x", y="y", data=long_df)
- ax.clear()
- rel.lineplot(x=long_df.x, y=long_df.y)
- ax.clear()
- rel.lineplot(x=long_df.x, y="y", data=long_df)
- ax.clear()
- rel.lineplot(x="x", y=long_df.y.values, data=long_df)
- ax.clear()
- rel.lineplot(x="x", y="y", hue="a", data=long_df)
- ax.clear()
- rel.lineplot(x="x", y="y", hue="a", style="a", data=long_df)
- ax.clear()
- rel.lineplot(x="x", y="y", hue="a", style="b", data=long_df)
- ax.clear()
- rel.lineplot(x="x", y="y", hue="a", style="a", data=missing_df)
- ax.clear()
- rel.lineplot(x="x", y="y", hue="a", style="b", data=missing_df)
- ax.clear()
- rel.lineplot(x="x", y="y", hue="a", size="a", data=long_df)
- ax.clear()
- rel.lineplot(x="x", y="y", hue="a", size="s", data=long_df)
- ax.clear()
- rel.lineplot(x="x", y="y", hue="a", size="a", data=missing_df)
- ax.clear()
- rel.lineplot(x="x", y="y", hue="a", size="s", data=missing_df)
- ax.clear()
- class TestScatterPlotter(TestRelationalPlotter):
- def test_legend_data(self, long_df):
- m = mpl.markers.MarkerStyle("o")
- default_mark = m.get_path().transformed(m.get_transform())
- m = mpl.markers.MarkerStyle("")
- null_mark = m.get_path().transformed(m.get_transform())
- f, ax = plt.subplots()
- p = rel._ScatterPlotter(x="x", y="y", data=long_df, legend="full")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert handles == []
- # --
- ax.clear()
- p = rel._ScatterPlotter(x="x", y="y", hue="a", data=long_df,
- legend="full")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- colors = [h.get_facecolors()[0] for h in handles]
- expected_colors = ["w"] + [p.palette[l] for l in p.hue_levels]
- assert labels == ["a"] + p.hue_levels
- assert self.colors_equal(colors, expected_colors)
- # --
- ax.clear()
- p = rel._ScatterPlotter(x="x", y="y", hue="a", style="a",
- markers=True, legend="full", data=long_df)
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- colors = [h.get_facecolors()[0] for h in handles]
- expected_colors = ["w"] + [p.palette[l] for l in p.hue_levels]
- paths = [h.get_paths()[0] for h in handles]
- expected_paths = [null_mark] + [p.paths[l] for l in p.style_levels]
- assert labels == ["a"] + p.hue_levels == ["a"] + p.style_levels
- assert self.colors_equal(colors, expected_colors)
- assert self.paths_equal(paths, expected_paths)
- # --
- ax.clear()
- p = rel._ScatterPlotter(x="x", y="y", hue="a", style="b",
- markers=True, legend="full", data=long_df)
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- colors = [h.get_facecolors()[0] for h in handles]
- paths = [h.get_paths()[0] for h in handles]
- expected_colors = (["w"] + [p.palette[l] for l in p.hue_levels]
- + ["w"] + [".2" for _ in p.style_levels])
- expected_paths = ([null_mark] + [default_mark for _ in p.hue_levels]
- + [null_mark] + [p.paths[l] for l in p.style_levels])
- assert labels == ["a"] + p.hue_levels + ["b"] + p.style_levels
- assert self.colors_equal(colors, expected_colors)
- assert self.paths_equal(paths, expected_paths)
- # --
- ax.clear()
- p = rel._ScatterPlotter(x="x", y="y", hue="a", size="a",
- data=long_df, legend="full")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- colors = [h.get_facecolors()[0] for h in handles]
- expected_colors = ["w"] + [p.palette[l] for l in p.hue_levels]
- sizes = [h.get_sizes()[0] for h in handles]
- expected_sizes = [0] + [p.sizes[l] for l in p.size_levels]
- assert labels == ["a"] + p.hue_levels == ["a"] + p.size_levels
- assert self.colors_equal(colors, expected_colors)
- assert sizes == expected_sizes
- # --
- ax.clear()
- sizes_list = [10, 100, 200]
- p = rel._ScatterPlotter(x="x", y="y", size="s", sizes=sizes_list,
- data=long_df, legend="full")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- sizes = [h.get_sizes()[0] for h in handles]
- expected_sizes = [0] + [p.sizes[l] for l in p.size_levels]
- assert labels == ["s"] + [str(l) for l in p.size_levels]
- assert sizes == expected_sizes
- # --
- ax.clear()
- sizes_dict = {2: 10, 4: 100, 8: 200}
- p = rel._ScatterPlotter(x="x", y="y", size="s", sizes=sizes_dict,
- data=long_df, legend="full")
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- sizes = [h.get_sizes()[0] for h in handles]
- expected_sizes = [0] + [p.sizes[l] for l in p.size_levels]
- assert labels == ["s"] + [str(l) for l in p.size_levels]
- assert sizes == expected_sizes
- # --
- x, y = np.random.randn(2, 40)
- z = np.tile(np.arange(20), 2)
- p = rel._ScatterPlotter(x=x, y=y, hue=z)
- ax.clear()
- p.legend = "full"
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert labels == [str(l) for l in p.hue_levels]
- ax.clear()
- p.legend = "brief"
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert len(labels) == 4
- p = rel._ScatterPlotter(x=x, y=y, size=z)
- ax.clear()
- p.legend = "full"
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert labels == [str(l) for l in p.size_levels]
- ax.clear()
- p.legend = "brief"
- p.add_legend_data(ax)
- handles, labels = ax.get_legend_handles_labels()
- assert len(labels) == 4
- ax.clear()
- p.legend = "bad_value"
- with pytest.raises(ValueError):
- p.add_legend_data(ax)
- def test_plot(self, long_df, repeated_df):
- f, ax = plt.subplots()
- p = rel._ScatterPlotter(x="x", y="y", data=long_df)
- p.plot(ax, {})
- points = ax.collections[0]
- assert np.array_equal(points.get_offsets(), long_df[["x", "y"]].values)
- ax.clear()
- p.plot(ax, {"color": "k", "label": "test"})
- points = ax.collections[0]
- assert self.colors_equal(points.get_facecolor(), "k")
- assert points.get_label() == "test"
- p = rel._ScatterPlotter(x="x", y="y", hue="a", data=long_df)
- ax.clear()
- p.plot(ax, {})
- points = ax.collections[0]
- expected_colors = [p.palette[k] for k in p.plot_data["hue"]]
- assert self.colors_equal(points.get_facecolors(), expected_colors)
- p = rel._ScatterPlotter(x="x", y="y", style="c",
- markers=["+", "x"], data=long_df)
- ax.clear()
- color = (1, .3, .8)
- p.plot(ax, {"color": color})
- points = ax.collections[0]
- assert self.colors_equal(points.get_edgecolors(), [color])
- p = rel._ScatterPlotter(x="x", y="y", size="a", data=long_df)
- ax.clear()
- p.plot(ax, {})
- points = ax.collections[0]
- expected_sizes = [p.size_lookup(k) for k in p.plot_data["size"]]
- assert np.array_equal(points.get_sizes(), expected_sizes)
- p = rel._ScatterPlotter(x="x", y="y", hue="a", style="a",
- markers=True, data=long_df)
- ax.clear()
- p.plot(ax, {})
- expected_colors = [p.palette[k] for k in p.plot_data["hue"]]
- expected_paths = [p.paths[k] for k in p.plot_data["style"]]
- assert self.colors_equal(points.get_facecolors(), expected_colors)
- assert self.paths_equal(points.get_paths(), expected_paths)
- p = rel._ScatterPlotter(x="x", y="y", hue="a", style="b",
- markers=True, data=long_df)
- ax.clear()
- p.plot(ax, {})
- expected_colors = [p.palette[k] for k in p.plot_data["hue"]]
- expected_paths = [p.paths[k] for k in p.plot_data["style"]]
- assert self.colors_equal(points.get_facecolors(), expected_colors)
- assert self.paths_equal(points.get_paths(), expected_paths)
- x_str = long_df["x"].astype(str)
- p = rel._ScatterPlotter(x="x", y="y", hue=x_str, data=long_df)
- ax.clear()
- p.plot(ax, {})
- p = rel._ScatterPlotter(x="x", y="y", size=x_str, data=long_df)
- ax.clear()
- p.plot(ax, {})
- def test_axis_labels(self, long_df):
- f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
- p = rel._ScatterPlotter(x="x", y="y", data=long_df)
- p.plot(ax1, {})
- assert ax1.get_xlabel() == "x"
- assert ax1.get_ylabel() == "y"
- p.plot(ax2, {})
- assert ax2.get_xlabel() == "x"
- assert ax2.get_ylabel() == "y"
- assert not ax2.yaxis.label.get_visible()
- def test_scatterplot_axes(self, wide_df):
- f1, ax1 = plt.subplots()
- f2, ax2 = plt.subplots()
- ax = rel.scatterplot(data=wide_df)
- assert ax is ax2
- ax = rel.scatterplot(data=wide_df, ax=ax1)
- assert ax is ax1
- def test_scatterplot_smoke(self, flat_array, flat_series,
- wide_array, wide_list, wide_list_of_series,
- wide_df, long_df, missing_df):
- f, ax = plt.subplots()
- rel.scatterplot([], [])
- ax.clear()
- rel.scatterplot(data=flat_array)
- ax.clear()
- rel.scatterplot(data=flat_series)
- ax.clear()
- rel.scatterplot(data=wide_array)
- ax.clear()
- rel.scatterplot(data=wide_list)
- ax.clear()
- rel.scatterplot(data=wide_list_of_series)
- ax.clear()
- rel.scatterplot(data=wide_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", data=long_df)
- ax.clear()
- rel.scatterplot(x=long_df.x, y=long_df.y)
- ax.clear()
- rel.scatterplot(x=long_df.x, y="y", data=long_df)
- ax.clear()
- rel.scatterplot(x="x", y=long_df.y.values, data=long_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", hue="a", data=long_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", hue="a", style="a", data=long_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", hue="a", style="b", data=long_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", hue="a", style="a", data=missing_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", hue="a", style="b", data=missing_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", hue="a", size="a", data=long_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", hue="a", size="s", data=long_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", hue="a", size="a", data=missing_df)
- ax.clear()
- rel.scatterplot(x="x", y="y", hue="a", size="s", data=missing_df)
- ax.clear()
- class TestRelPlotter(TestRelationalPlotter):
- def test_relplot_simple(self, long_df):
- g = rel.relplot(x="x", y="y", kind="scatter", data=long_df)
- x, y = g.ax.collections[0].get_offsets().T
- assert np.array_equal(x, long_df["x"])
- assert np.array_equal(y, long_df["y"])
- g = rel.relplot(x="x", y="y", kind="line", data=long_df)
- x, y = g.ax.lines[0].get_xydata().T
- expected = long_df.groupby("x").y.mean()
- assert np.array_equal(x, expected.index)
- assert y == pytest.approx(expected.values)
- with pytest.raises(ValueError):
- g = rel.relplot(x="x", y="y", kind="not_a_kind", data=long_df)
- def test_relplot_complex(self, long_df):
- for sem in ["hue", "size", "style"]:
- g = rel.relplot(x="x", y="y", data=long_df, **{sem: "a"})
- x, y = g.ax.collections[0].get_offsets().T
- assert np.array_equal(x, long_df["x"])
- assert np.array_equal(y, long_df["y"])
- for sem in ["hue", "size", "style"]:
- g = rel.relplot(x="x", y="y", col="c", data=long_df,
- **{sem: "a"})
- grouped = long_df.groupby("c")
- for (_, grp_df), ax in zip(grouped, g.axes.flat):
- x, y = ax.collections[0].get_offsets().T
- assert np.array_equal(x, grp_df["x"])
- assert np.array_equal(y, grp_df["y"])
- for sem in ["size", "style"]:
- g = rel.relplot(x="x", y="y", hue="b", col="c", data=long_df,
- **{sem: "a"})
- grouped = long_df.groupby("c")
- for (_, grp_df), ax in zip(grouped, g.axes.flat):
- x, y = ax.collections[0].get_offsets().T
- assert np.array_equal(x, grp_df["x"])
- assert np.array_equal(y, grp_df["y"])
- for sem in ["hue", "size", "style"]:
- g = rel.relplot(x="x", y="y", col="b", row="c",
- data=long_df.sort_values(["c", "b"]),
- **{sem: "a"})
- grouped = long_df.groupby(["c", "b"])
- for (_, grp_df), ax in zip(grouped, g.axes.flat):
- x, y = ax.collections[0].get_offsets().T
- assert np.array_equal(x, grp_df["x"])
- assert np.array_equal(y, grp_df["y"])
- def test_relplot_hues(self, long_df):
- palette = ["r", "b", "g"]
- g = rel.relplot(x="x", y="y", hue="a", style="b", col="c",
- palette=palette, data=long_df)
- palette = dict(zip(long_df["a"].unique(), palette))
- grouped = long_df.groupby("c")
- for (_, grp_df), ax in zip(grouped, g.axes.flat):
- points = ax.collections[0]
- expected_hues = [palette[val] for val in grp_df["a"]]
- assert self.colors_equal(points.get_facecolors(), expected_hues)
- def test_relplot_sizes(self, long_df):
- sizes = [5, 12, 7]
- g = rel.relplot(x="x", y="y", size="a", hue="b", col="c",
- sizes=sizes, data=long_df)
- sizes = dict(zip(long_df["a"].unique(), sizes))
- grouped = long_df.groupby("c")
- for (_, grp_df), ax in zip(grouped, g.axes.flat):
- points = ax.collections[0]
- expected_sizes = [sizes[val] for val in grp_df["a"]]
- assert np.array_equal(points.get_sizes(), expected_sizes)
- def test_relplot_styles(self, long_df):
- markers = ["o", "d", "s"]
- g = rel.relplot(x="x", y="y", style="a", hue="b", col="c",
- markers=markers, data=long_df)
- paths = []
- for m in markers:
- m = mpl.markers.MarkerStyle(m)
- paths.append(m.get_path().transformed(m.get_transform()))
- paths = dict(zip(long_df["a"].unique(), paths))
- grouped = long_df.groupby("c")
- for (_, grp_df), ax in zip(grouped, g.axes.flat):
- points = ax.collections[0]
- expected_paths = [paths[val] for val in grp_df["a"]]
- assert self.paths_equal(points.get_paths(), expected_paths)
- def test_relplot_stringy_numerics(self, long_df):
- long_df["x_str"] = long_df["x"].astype(str)
- g = rel.relplot(x="x", y="y", hue="x_str", data=long_df)
- points = g.ax.collections[0]
- xys = points.get_offsets()
- mask = np.ma.getmask(xys)
- assert not mask.any()
- assert np.array_equal(xys, long_df[["x", "y"]])
- g = rel.relplot(x="x", y="y", size="x_str", data=long_df)
- points = g.ax.collections[0]
- xys = points.get_offsets()
- mask = np.ma.getmask(xys)
- assert not mask.any()
- assert np.array_equal(xys, long_df[["x", "y"]])
- def test_relplot_legend(self, long_df):
- g = rel.relplot(x="x", y="y", data=long_df)
- assert g._legend is None
- g = rel.relplot(x="x", y="y", hue="a", data=long_df)
- texts = [t.get_text() for t in g._legend.texts]
- expected_texts = np.append(["a"], long_df["a"].unique())
- assert np.array_equal(texts, expected_texts)
- g = rel.relplot(x="x", y="y", hue="s", size="s", data=long_df)
- texts = [t.get_text() for t in g._legend.texts]
- assert np.array_equal(texts[1:], np.sort(texts[1:]))
- g = rel.relplot(x="x", y="y", hue="a", legend=False, data=long_df)
- assert g._legend is None
- palette = color_palette("deep", len(long_df["b"].unique()))
- a_like_b = dict(zip(long_df["a"].unique(), long_df["b"].unique()))
- long_df["a_like_b"] = long_df["a"].map(a_like_b)
- g = rel.relplot(x="x", y="y", hue="b", style="a_like_b",
- palette=palette, kind="line", estimator=None,
- data=long_df)
- lines = g._legend.get_lines()[1:] # Chop off title dummy
- for line, color in zip(lines, palette):
- assert line.get_color() == color
- def test_ax_kwarg_removal(self, long_df):
- f, ax = plt.subplots()
- with pytest.warns(UserWarning):
- g = rel.relplot("x", "y", data=long_df, ax=ax)
- assert len(ax.collections) == 0
- assert len(g.ax.collections) > 0
|