from itertools import product import warnings import numpy as np import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt import pytest from .. import relational as rel from ..palettes import color_palette from ..utils import categorical_order class TestRelationalPlotter(object): def scatter_rgbs(self, collections): rgbs = [] for col in collections: rgb = tuple(col.get_facecolor().squeeze()[:3]) rgbs.append(rgb) return rgbs def colors_equal(self, *args): equal = True for c1, c2 in zip(*args): c1 = mpl.colors.colorConverter.to_rgb(np.squeeze(c1)) c2 = mpl.colors.colorConverter.to_rgb(np.squeeze(c1)) equal &= c1 == c2 return equal def paths_equal(self, *args): equal = True for p1, p2 in zip(*args): equal &= np.array_equal(p1.vertices, p2.vertices) equal &= np.array_equal(p1.codes, p2.codes) return equal @pytest.fixture def wide_df(self): columns = list("abc") index = pd.Int64Index(np.arange(10, 50, 2), name="wide_index") values = np.random.randn(len(index), len(columns)) return pd.DataFrame(values, index=index, columns=columns) @pytest.fixture def wide_array(self): return np.random.randn(20, 3) @pytest.fixture def flat_array(self): return np.random.randn(20) @pytest.fixture def flat_series(self): index = pd.Int64Index(np.arange(10, 30), name="t") return pd.Series(np.random.randn(20), index, name="s") @pytest.fixture def wide_list(self): return [np.random.randn(20), np.random.randn(10)] @pytest.fixture def wide_list_of_series(self): return [pd.Series(np.random.randn(20), np.arange(20), name="a"), pd.Series(np.random.randn(10), np.arange(5, 15), name="b")] @pytest.fixture def long_df(self): n = 100 rs = np.random.RandomState() df = pd.DataFrame(dict( x=rs.randint(0, 20, n), y=rs.randn(n), a=np.take(list("abc"), rs.randint(0, 3, n)), b=np.take(list("mnop"), rs.randint(0, 4, n)), c=np.take(list([0, 1]), rs.randint(0, 2, n)), d=np.repeat(np.datetime64('2005-02-25'), n), s=np.take([2, 4, 8], rs.randint(0, 3, n)), f=np.take(list([0.2, 0.3]), rs.randint(0, 2, n)), )) df["s_cat"] = df["s"].astype("category") return df @pytest.fixture def repeated_df(self): n = 100 rs = np.random.RandomState() return pd.DataFrame(dict( x=np.tile(np.arange(n // 2), 2), y=rs.randn(n), a=np.take(list("abc"), rs.randint(0, 3, n)), u=np.repeat(np.arange(2), n // 2), )) @pytest.fixture def missing_df(self): n = 100 rs = np.random.RandomState() df = pd.DataFrame(dict( x=rs.randint(0, 20, n), y=rs.randn(n), a=np.take(list("abc"), rs.randint(0, 3, n)), b=np.take(list("mnop"), rs.randint(0, 4, n)), s=np.take([2, 4, 8], rs.randint(0, 3, n)), )) for col in df: idx = rs.permutation(df.index)[:10] df.loc[idx, col] = np.nan return df @pytest.fixture def null_column(self): return pd.Series(index=np.arange(20), dtype='float64') def test_wide_df_variables(self, wide_df): p = rel._RelationalPlotter() p.establish_variables(data=wide_df) assert p.input_format == "wide" assert p.semantics == ["x", "y", "hue", "style"] assert len(p.plot_data) == np.product(wide_df.shape) x = p.plot_data["x"] expected_x = np.tile(wide_df.index, wide_df.shape[1]) assert np.array_equal(x, expected_x) y = p.plot_data["y"] expected_y = wide_df.values.ravel(order="f") assert np.array_equal(y, expected_y) hue = p.plot_data["hue"] expected_hue = np.repeat(wide_df.columns.values, wide_df.shape[0]) assert np.array_equal(hue, expected_hue) style = p.plot_data["style"] expected_style = expected_hue assert np.array_equal(style, expected_style) assert p.plot_data["size"].isnull().all() assert p.x_label == wide_df.index.name assert p.y_label is None assert p.hue_label == wide_df.columns.name assert p.size_label is None assert p.style_label == wide_df.columns.name def test_wide_df_variables_check(self, wide_df): p = rel._RelationalPlotter() wide_df = wide_df.copy() wide_df.loc[:, "not_numeric"] = "a" with pytest.raises(ValueError): p.establish_variables(data=wide_df) def test_wide_array_variables(self, wide_array): p = rel._RelationalPlotter() p.establish_variables(data=wide_array) assert p.input_format == "wide" assert p.semantics == ["x", "y", "hue", "style"] assert len(p.plot_data) == np.product(wide_array.shape) nrow, ncol = wide_array.shape x = p.plot_data["x"] expected_x = np.tile(np.arange(nrow), ncol) assert np.array_equal(x, expected_x) y = p.plot_data["y"] expected_y = wide_array.ravel(order="f") assert np.array_equal(y, expected_y) hue = p.plot_data["hue"] expected_hue = np.repeat(np.arange(ncol), nrow) assert np.array_equal(hue, expected_hue) style = p.plot_data["style"] expected_style = expected_hue assert np.array_equal(style, expected_style) assert p.plot_data["size"].isnull().all() assert p.x_label is None assert p.y_label is None assert p.hue_label is None assert p.size_label is None assert p.style_label is None def test_flat_array_variables(self, flat_array): p = rel._RelationalPlotter() p.establish_variables(data=flat_array) assert p.input_format == "wide" assert p.semantics == ["x", "y"] assert len(p.plot_data) == np.product(flat_array.shape) x = p.plot_data["x"] expected_x = np.arange(flat_array.shape[0]) assert np.array_equal(x, expected_x) y = p.plot_data["y"] expected_y = flat_array assert np.array_equal(y, expected_y) assert p.plot_data["hue"].isnull().all() assert p.plot_data["style"].isnull().all() assert p.plot_data["size"].isnull().all() assert p.x_label is None assert p.y_label is None assert p.hue_label is None assert p.size_label is None assert p.style_label is None def test_flat_series_variables(self, flat_series): p = rel._RelationalPlotter() p.establish_variables(data=flat_series) assert p.input_format == "wide" assert p.semantics == ["x", "y"] assert len(p.plot_data) == len(flat_series) x = p.plot_data["x"] expected_x = flat_series.index assert np.array_equal(x, expected_x) y = p.plot_data["y"] expected_y = flat_series assert np.array_equal(y, expected_y) assert p.x_label is None assert p.y_label is None assert p.hue_label is None assert p.size_label is None assert p.style_label is None def test_wide_list_variables(self, wide_list): p = rel._RelationalPlotter() p.establish_variables(data=wide_list) assert p.input_format == "wide" assert p.semantics == ["x", "y", "hue", "style"] assert len(p.plot_data) == sum(len(l) for l in wide_list) x = p.plot_data["x"] expected_x = np.concatenate([np.arange(len(l)) for l in wide_list]) assert np.array_equal(x, expected_x) y = p.plot_data["y"] expected_y = np.concatenate(wide_list) assert np.array_equal(y, expected_y) hue = p.plot_data["hue"] expected_hue = np.concatenate([ np.ones_like(l) * i for i, l in enumerate(wide_list) ]) assert np.array_equal(hue, expected_hue) style = p.plot_data["style"] expected_style = expected_hue assert np.array_equal(style, expected_style) assert p.plot_data["size"].isnull().all() assert p.x_label is None assert p.y_label is None assert p.hue_label is None assert p.size_label is None assert p.style_label is None def test_wide_list_of_series_variables(self, wide_list_of_series): p = rel._RelationalPlotter() p.establish_variables(data=wide_list_of_series) assert p.input_format == "wide" assert p.semantics == ["x", "y", "hue", "style"] assert len(p.plot_data) == sum(len(l) for l in wide_list_of_series) x = p.plot_data["x"] expected_x = np.concatenate([s.index for s in wide_list_of_series]) assert np.array_equal(x, expected_x) y = p.plot_data["y"] expected_y = np.concatenate(wide_list_of_series) assert np.array_equal(y, expected_y) hue = p.plot_data["hue"] expected_hue = np.concatenate([ np.full(len(s), s.name, object) for s in wide_list_of_series ]) assert np.array_equal(hue, expected_hue) style = p.plot_data["style"] expected_style = expected_hue assert np.array_equal(style, expected_style) assert p.plot_data["size"].isnull().all() assert p.x_label is None assert p.y_label is None assert p.hue_label is None assert p.size_label is None assert p.style_label is None def test_long_df(self, long_df): p = rel._RelationalPlotter() p.establish_variables(x="x", y="y", data=long_df) assert p.input_format == "long" assert p.semantics == ["x", "y"] assert np.array_equal(p.plot_data["x"], long_df["x"]) assert np.array_equal(p.plot_data["y"], long_df["y"]) for col in ["hue", "style", "size"]: assert p.plot_data[col].isnull().all() assert (p.x_label, p.y_label) == ("x", "y") assert p.hue_label is None assert p.size_label is None assert p.style_label is None p.establish_variables(x=long_df.x, y="y", data=long_df) assert p.semantics == ["x", "y"] assert np.array_equal(p.plot_data["x"], long_df["x"]) assert np.array_equal(p.plot_data["y"], long_df["y"]) assert (p.x_label, p.y_label) == ("x", "y") p.establish_variables(x="x", y=long_df.y, data=long_df) assert p.semantics == ["x", "y"] assert np.array_equal(p.plot_data["x"], long_df["x"]) assert np.array_equal(p.plot_data["y"], long_df["y"]) assert (p.x_label, p.y_label) == ("x", "y") p.establish_variables(x="x", y="y", hue="a", data=long_df) assert p.semantics == ["x", "y", "hue"] assert np.array_equal(p.plot_data["hue"], long_df["a"]) for col in ["style", "size"]: assert p.plot_data[col].isnull().all() assert p.hue_label == "a" assert p.size_label is None and p.style_label is None p.establish_variables(x="x", y="y", hue="a", style="a", data=long_df) assert p.semantics == ["x", "y", "hue", "style"] assert np.array_equal(p.plot_data["hue"], long_df["a"]) assert np.array_equal(p.plot_data["style"], long_df["a"]) assert p.plot_data["size"].isnull().all() assert p.hue_label == p.style_label == "a" assert p.size_label is None p.establish_variables(x="x", y="y", hue="a", style="b", data=long_df) assert p.semantics == ["x", "y", "hue", "style"] assert np.array_equal(p.plot_data["hue"], long_df["a"]) assert np.array_equal(p.plot_data["style"], long_df["b"]) assert p.plot_data["size"].isnull().all() p.establish_variables(x="x", y="y", size="y", data=long_df) assert p.semantics == ["x", "y", "size"] assert np.array_equal(p.plot_data["size"], long_df["y"]) assert p.size_label == "y" assert p.hue_label is None and p.style_label is None def test_bad_input(self, long_df): p = rel._RelationalPlotter() with pytest.raises(ValueError): p.establish_variables(x=long_df.x) with pytest.raises(ValueError): p.establish_variables(y=long_df.y) with pytest.raises(ValueError): p.establish_variables(x="not_in_df", data=long_df) with pytest.raises(ValueError): p.establish_variables(x="x", y="not_in_df", data=long_df) with pytest.raises(ValueError): p.establish_variables(x="x", y="not_in_df", data=long_df) def test_empty_input(self): p = rel._RelationalPlotter() p.establish_variables(data=[]) p.establish_variables(data=np.array([])) p.establish_variables(data=pd.DataFrame()) p.establish_variables(x=[], y=[]) def test_units(self, repeated_df): p = rel._RelationalPlotter() p.establish_variables(x="x", y="y", units="u", data=repeated_df) assert np.array_equal(p.plot_data["units"], repeated_df["u"]) def test_parse_hue_null(self, wide_df, null_column): p = rel._LinePlotter(data=wide_df) p.parse_hue(null_column, "Blues", None, None) assert p.hue_levels == [None] assert p.palette == {} assert p.hue_type is None assert p.cmap is None def test_parse_hue_categorical(self, wide_df, long_df): p = rel._LinePlotter(data=wide_df) assert p.hue_levels == wide_df.columns.tolist() assert p.hue_type == "categorical" assert p.cmap is None # Test named palette palette = "Blues" expected_colors = color_palette(palette, wide_df.shape[1]) expected_palette = dict(zip(wide_df.columns, expected_colors)) p.parse_hue(p.plot_data.hue, palette, None, None) assert p.palette == expected_palette # Test list palette palette = color_palette("Reds", wide_df.shape[1]) p.parse_hue(p.plot_data.hue, palette, None, None) expected_palette = dict(zip(wide_df.columns, palette)) assert p.palette == expected_palette # Test dict palette colors = color_palette("Set1", 8) palette = dict(zip(wide_df.columns, colors)) p.parse_hue(p.plot_data.hue, palette, None, None) assert p.palette == palette # Test dict with missing keys palette = dict(zip(wide_df.columns[:-1], colors)) with pytest.raises(ValueError): p.parse_hue(p.plot_data.hue, palette, None, None) # Test list with wrong number of colors palette = colors[:-1] with pytest.raises(ValueError): p.parse_hue(p.plot_data.hue, palette, None, None) # Test hue order hue_order = ["a", "c", "d"] p.parse_hue(p.plot_data.hue, None, hue_order, None) assert p.hue_levels == hue_order # Test long data p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df) assert p.hue_levels == categorical_order(long_df.a) assert p.hue_type == "categorical" assert p.cmap is None # Test default palette p.parse_hue(p.plot_data.hue, None, None, None) hue_levels = categorical_order(long_df.a) expected_colors = color_palette(n_colors=len(hue_levels)) expected_palette = dict(zip(hue_levels, expected_colors)) assert p.palette == expected_palette # Test default palette with many levels levels = pd.Series(list("abcdefghijklmnopqrstuvwxyz")) p.parse_hue(levels, None, None, None) expected_colors = color_palette("husl", n_colors=len(levels)) expected_palette = dict(zip(levels, expected_colors)) assert p.palette == expected_palette # Test binary data p = rel._LinePlotter(x="x", y="y", hue="c", data=long_df) assert p.hue_levels == [0, 1] assert p.hue_type == "categorical" df = long_df[long_df["c"] == 0] p = rel._LinePlotter(x="x", y="y", hue="c", data=df) assert p.hue_levels == [0] assert p.hue_type == "categorical" df = long_df[long_df["c"] == 1] p = rel._LinePlotter(x="x", y="y", hue="c", data=df) assert p.hue_levels == [1] assert p.hue_type == "categorical" # Test Timestamp data p = rel._LinePlotter(x="x", y="y", hue="d", data=long_df) assert p.hue_levels == [pd.Timestamp('2005-02-25')] assert p.hue_type == "categorical" # Test numeric data with category type p = rel._LinePlotter(x="x", y="y", hue="s_cat", data=long_df) assert p.hue_levels == categorical_order(long_df.s_cat) assert p.hue_type == "categorical" assert p.cmap is None # Test categorical palette specified for numeric data palette = "deep" p = rel._LinePlotter(x="x", y="y", hue="s", palette=palette, data=long_df) expected_colors = color_palette(palette, n_colors=len(levels)) hue_levels = categorical_order(long_df["s"]) expected_palette = dict(zip(hue_levels, expected_colors)) assert p.palette == expected_palette assert p.hue_type == "categorical" def test_parse_hue_numeric(self, long_df): p = rel._LinePlotter(x="x", y="y", hue="s", data=long_df) hue_levels = list(np.sort(long_df.s.unique())) assert p.hue_levels == hue_levels assert p.hue_type == "numeric" assert p.cmap.name == "seaborn_cubehelix" # Test named colormap palette = "Purples" p.parse_hue(p.plot_data.hue, palette, None, None) assert p.cmap is mpl.cm.get_cmap(palette) # Test colormap object palette = mpl.cm.get_cmap("Greens") p.parse_hue(p.plot_data.hue, palette, None, None) assert p.cmap is palette # Test cubehelix shorthand palette = "ch:2,0,light=.2" p.parse_hue(p.plot_data.hue, palette, None, None) assert isinstance(p.cmap, mpl.colors.ListedColormap) # Test default hue limits p.parse_hue(p.plot_data.hue, None, None, None) assert p.hue_limits == (p.plot_data.hue.min(), p.plot_data.hue.max()) # Test specified hue limits hue_norm = 1, 4 p.parse_hue(p.plot_data.hue, None, None, hue_norm) assert p.hue_limits == hue_norm assert isinstance(p.hue_norm, mpl.colors.Normalize) assert p.hue_norm.vmin == hue_norm[0] assert p.hue_norm.vmax == hue_norm[1] # Test Normalize object hue_norm = mpl.colors.PowerNorm(2, vmin=1, vmax=10) p.parse_hue(p.plot_data.hue, None, None, hue_norm) assert p.hue_limits == (hue_norm.vmin, hue_norm.vmax) assert p.hue_norm is hue_norm # Test default colormap values hmin, hmax = p.plot_data.hue.min(), p.plot_data.hue.max() p.parse_hue(p.plot_data.hue, None, None, None) assert p.palette[hmin] == pytest.approx(p.cmap(0.0)) assert p.palette[hmax] == pytest.approx(p.cmap(1.0)) # Test specified colormap values hue_norm = hmin - 1, hmax - 1 p.parse_hue(p.plot_data.hue, None, None, hue_norm) norm_min = (hmin - hue_norm[0]) / (hue_norm[1] - hue_norm[0]) assert p.palette[hmin] == pytest.approx(p.cmap(norm_min)) assert p.palette[hmax] == pytest.approx(p.cmap(1.0)) # Test list of colors hue_levels = list(np.sort(long_df.s.unique())) palette = color_palette("Blues", len(hue_levels)) p.parse_hue(p.plot_data.hue, palette, None, None) assert p.palette == dict(zip(hue_levels, palette)) palette = color_palette("Blues", len(hue_levels) + 1) with pytest.raises(ValueError): p.parse_hue(p.plot_data.hue, palette, None, None) # Test dictionary of colors palette = dict(zip(hue_levels, color_palette("Reds"))) p.parse_hue(p.plot_data.hue, palette, None, None) assert p.palette == palette palette.pop(hue_levels[0]) with pytest.raises(ValueError): p.parse_hue(p.plot_data.hue, palette, None, None) # Test invalid palette palette = "not_a_valid_palette" with pytest.raises(ValueError): p.parse_hue(p.plot_data.hue, palette, None, None) # Test bad norm argument hue_norm = "not a norm" with pytest.raises(ValueError): p.parse_hue(p.plot_data.hue, None, None, hue_norm) def test_parse_size(self, long_df): p = rel._LinePlotter(x="x", y="y", size="s", data=long_df) # Test default size limits and range default_linewidth = mpl.rcParams["lines.linewidth"] default_limits = p.plot_data["size"].min(), p.plot_data["size"].max() default_range = .5 * default_linewidth, 2 * default_linewidth p.parse_size(p.plot_data["size"], None, None, None) assert p.size_limits == default_limits size_range = min(p.sizes.values()), max(p.sizes.values()) assert size_range == default_range # Test specified size limits size_limits = (1, 5) p.parse_size(p.plot_data["size"], None, None, size_limits) assert p.size_limits == size_limits # Test specified size range sizes = (.1, .5) p.parse_size(p.plot_data["size"], sizes, None, None) assert p.size_limits == default_limits # Test size values with normalization range sizes = (1, 5) size_norm = (1, 10) p.parse_size(p.plot_data["size"], sizes, None, size_norm) normalize = mpl.colors.Normalize(*size_norm, clip=True) for level, width in p.sizes.items(): assert width == sizes[0] + (sizes[1] - sizes[0]) * normalize(level) # Test size values with normalization object sizes = (1, 5) size_norm = mpl.colors.LogNorm(1, 10, clip=False) p.parse_size(p.plot_data["size"], sizes, None, size_norm) assert p.size_norm.clip for level, width in p.sizes.items(): assert width == sizes[0] + (sizes[1] - sizes[0]) * size_norm(level) # Test specified size order var = "a" levels = long_df[var].unique() sizes = [1, 4, 6] size_order = [levels[1], levels[2], levels[0]] p = rel._LinePlotter(x="x", y="y", size=var, data=long_df) p.parse_size(p.plot_data["size"], sizes, size_order, None) assert p.sizes == dict(zip(size_order, sizes)) # Test list of sizes var = "a" levels = categorical_order(long_df[var]) sizes = list(np.random.rand(len(levels))) p = rel._LinePlotter(x="x", y="y", size=var, data=long_df) p.parse_size(p.plot_data["size"], sizes, None, None) assert p.sizes == dict(zip(levels, sizes)) # Test dict of sizes var = "a" levels = categorical_order(long_df[var]) sizes = dict(zip(levels, np.random.rand(len(levels)))) p = rel._LinePlotter(x="x", y="y", size=var, data=long_df) p.parse_size(p.plot_data["size"], sizes, None, None) assert p.sizes == sizes # Test sizes list with wrong length sizes = list(np.random.rand(len(levels) + 1)) with pytest.raises(ValueError): p.parse_size(p.plot_data["size"], sizes, None, None) # Test sizes dict with missing levels sizes = dict(zip(levels, np.random.rand(len(levels) - 1))) with pytest.raises(ValueError): p.parse_size(p.plot_data["size"], sizes, None, None) # Test bad sizes argument sizes = "bad_size" with pytest.raises(ValueError): p.parse_size(p.plot_data["size"], sizes, None, None) # Test bad norm argument size_norm = "not a norm" p = rel._LinePlotter(x="x", y="y", size="s", data=long_df) with pytest.raises(ValueError): p.parse_size(p.plot_data["size"], None, None, size_norm) def test_parse_style(self, long_df): p = rel._LinePlotter(x="x", y="y", style="a", data=long_df) # Test defaults markers, dashes = True, True p.parse_style(p.plot_data["style"], markers, dashes, None) assert p.markers == dict(zip(p.style_levels, p.default_markers)) assert p.dashes == dict(zip(p.style_levels, p.default_dashes)) # Test lists markers, dashes = ["o", "s", "d"], [(1, 0), (1, 1), (2, 1, 3, 1)] p.parse_style(p.plot_data["style"], markers, dashes, None) assert p.markers == dict(zip(p.style_levels, markers)) assert p.dashes == dict(zip(p.style_levels, dashes)) # Test dicts markers = dict(zip(p.style_levels, markers)) dashes = dict(zip(p.style_levels, dashes)) p.parse_style(p.plot_data["style"], markers, dashes, None) assert p.markers == markers assert p.dashes == dashes # Test style order with defaults style_order = np.take(p.style_levels, [1, 2, 0]) markers = dashes = True p.parse_style(p.plot_data["style"], markers, dashes, style_order) assert p.markers == dict(zip(style_order, p.default_markers)) assert p.dashes == dict(zip(style_order, p.default_dashes)) # Test too many levels with style lists markers, dashes = ["o", "s"], False with pytest.raises(ValueError): p.parse_style(p.plot_data["style"], markers, dashes, None) markers, dashes = False, [(2, 1)] with pytest.raises(ValueError): p.parse_style(p.plot_data["style"], markers, dashes, None) # Test too many levels with style dicts markers, dashes = {"a": "o", "b": "s"}, False with pytest.raises(ValueError): p.parse_style(p.plot_data["style"], markers, dashes, None) markers, dashes = False, {"a": (1, 0), "b": (2, 1)} with pytest.raises(ValueError): p.parse_style(p.plot_data["style"], markers, dashes, None) # Test mixture of filled and unfilled markers markers, dashes = ["o", "x", "s"], None with pytest.raises(ValueError): p.parse_style(p.plot_data["style"], markers, dashes, None) def test_subset_data_quantities(self, long_df): p = rel._LinePlotter(x="x", y="y", data=long_df) assert len(list(p.subset_data())) == 1 # -- var = "a" n_subsets = len(long_df[var].unique()) p = rel._LinePlotter(x="x", y="y", hue=var, data=long_df) assert len(list(p.subset_data())) == n_subsets p = rel._LinePlotter(x="x", y="y", style=var, data=long_df) assert len(list(p.subset_data())) == n_subsets n_subsets = len(long_df[var].unique()) p = rel._LinePlotter(x="x", y="y", size=var, data=long_df) assert len(list(p.subset_data())) == n_subsets # -- var = "a" n_subsets = len(long_df[var].unique()) p = rel._LinePlotter(x="x", y="y", hue=var, style=var, data=long_df) assert len(list(p.subset_data())) == n_subsets # -- var1, var2 = "a", "s" n_subsets = len(set(list(map(tuple, long_df[[var1, var2]].values)))) p = rel._LinePlotter(x="x", y="y", hue=var1, style=var2, data=long_df) assert len(list(p.subset_data())) == n_subsets p = rel._LinePlotter(x="x", y="y", hue=var1, size=var2, style=var1, data=long_df) assert len(list(p.subset_data())) == n_subsets # -- var1, var2, var3 = "a", "s", "b" cols = [var1, var2, var3] n_subsets = len(set(list(map(tuple, long_df[cols].values)))) p = rel._LinePlotter(x="x", y="y", hue=var1, size=var2, style=var3, data=long_df) assert len(list(p.subset_data())) == n_subsets def test_subset_data_keys(self, long_df): p = rel._LinePlotter(x="x", y="y", data=long_df) for (hue, size, style), _ in p.subset_data(): assert hue is None assert size is None assert style is None # -- var = "a" p = rel._LinePlotter(x="x", y="y", hue=var, data=long_df) for (hue, size, style), _ in p.subset_data(): assert hue in long_df[var].values assert size is None assert style is None p = rel._LinePlotter(x="x", y="y", style=var, data=long_df) for (hue, size, style), _ in p.subset_data(): assert hue is None assert size is None assert style in long_df[var].values p = rel._LinePlotter(x="x", y="y", hue=var, style=var, data=long_df) for (hue, size, style), _ in p.subset_data(): assert hue in long_df[var].values assert size is None assert style in long_df[var].values p = rel._LinePlotter(x="x", y="y", size=var, data=long_df) for (hue, size, style), _ in p.subset_data(): assert hue is None assert size in long_df[var].values assert style is None # -- var1, var2 = "a", "s" p = rel._LinePlotter(x="x", y="y", hue=var1, size=var2, data=long_df) for (hue, size, style), _ in p.subset_data(): assert hue in long_df[var1].values assert size in long_df[var2].values assert style is None def test_subset_data_values(self, long_df): p = rel._LinePlotter(x="x", y="y", data=long_df) _, data = next(p.subset_data()) expected = p.plot_data.loc[:, ["x", "y"]].sort_values(["x", "y"]) assert np.array_equal(data.values, expected) p = rel._LinePlotter(x="x", y="y", data=long_df, sort=False) _, data = next(p.subset_data()) expected = p.plot_data.loc[:, ["x", "y"]] assert np.array_equal(data.values, expected) p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df) for (hue, _, _), data in p.subset_data(): rows = p.plot_data["hue"] == hue cols = ["x", "y"] expected = p.plot_data.loc[rows, cols].sort_values(cols) assert np.array_equal(data.values, expected.values) p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df, sort=False) for (hue, _, _), data in p.subset_data(): rows = p.plot_data["hue"] == hue cols = ["x", "y"] expected = p.plot_data.loc[rows, cols] assert np.array_equal(data.values, expected.values) p = rel._LinePlotter(x="x", y="y", hue="a", style="a", data=long_df) for (hue, _, _), data in p.subset_data(): rows = p.plot_data["hue"] == hue cols = ["x", "y"] expected = p.plot_data.loc[rows, cols].sort_values(cols) assert np.array_equal(data.values, expected.values) p = rel._LinePlotter(x="x", y="y", hue="a", size="s", data=long_df) for (hue, size, _), data in p.subset_data(): rows = (p.plot_data["hue"] == hue) & (p.plot_data["size"] == size) cols = ["x", "y"] expected = p.plot_data.loc[rows, cols].sort_values(cols) assert np.array_equal(data.values, expected.values) class TestLinePlotter(TestRelationalPlotter): def test_aggregate(self, long_df): p = rel._LinePlotter(x="x", y="y", data=long_df) p.n_boot = 10000 p.sort = False x = pd.Series(np.tile([1, 2], 100)) y = pd.Series(np.random.randn(200)) y_mean = y.groupby(x).mean() def sem(x): return np.std(x) / np.sqrt(len(x)) y_sem = y.groupby(x).apply(sem) y_cis = pd.DataFrame(dict(low=y_mean - y_sem, high=y_mean + y_sem), columns=["low", "high"]) p.ci = 68 p.estimator = "mean" index, est, cis = p.aggregate(y, x) assert np.array_equal(index.values, x.unique()) assert est.index.equals(index) assert est.values == pytest.approx(y_mean.values) assert cis.values == pytest.approx(y_cis.values, 4) assert list(cis.columns) == ["low", "high"] p.estimator = np.mean index, est, cis = p.aggregate(y, x) assert np.array_equal(index.values, x.unique()) assert est.index.equals(index) assert est.values == pytest.approx(y_mean.values) assert cis.values == pytest.approx(y_cis.values, 4) assert list(cis.columns) == ["low", "high"] p.seed = 0 _, _, ci1 = p.aggregate(y, x) _, _, ci2 = p.aggregate(y, x) assert np.array_equal(ci1, ci2) y_std = y.groupby(x).std() y_cis = pd.DataFrame(dict(low=y_mean - y_std, high=y_mean + y_std), columns=["low", "high"]) p.ci = "sd" index, est, cis = p.aggregate(y, x) assert np.array_equal(index.values, x.unique()) assert est.index.equals(index) assert est.values == pytest.approx(y_mean.values) assert cis.values == pytest.approx(y_cis.values) assert list(cis.columns) == ["low", "high"] p.ci = None index, est, cis = p.aggregate(y, x) assert cis is None p.ci = 68 x, y = pd.Series([1, 2, 3]), pd.Series([4, 3, 2]) index, est, cis = p.aggregate(y, x) assert np.array_equal(index.values, x) assert np.array_equal(est.values, y) assert cis is None x, y = pd.Series([1, 1, 2]), pd.Series([2, 3, 4]) index, est, cis = p.aggregate(y, x) assert cis.loc[2].isnull().all() p = rel._LinePlotter(x="x", y="y", data=long_df) p.estimator = "mean" p.n_boot = 100 p.ci = 95 x = pd.Categorical(["a", "b", "a", "b"], ["a", "b", "c"]) y = pd.Series([1, 1, 2, 2]) with warnings.catch_warnings(): warnings.simplefilter("error", RuntimeWarning) index, est, cis = p.aggregate(y, x) assert cis.loc[["c"]].isnull().all().all() def test_legend_data(self, long_df): f, ax = plt.subplots() p = rel._LinePlotter(x="x", y="y", data=long_df, legend="full") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert handles == [] # -- ax.clear() p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df, legend="full") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() colors = [h.get_color() for h in handles] assert labels == ["a"] + p.hue_levels assert colors == ["w"] + [p.palette[l] for l in p.hue_levels] # -- ax.clear() p = rel._LinePlotter(x="x", y="y", hue="a", style="a", markers=True, legend="full", data=long_df) p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() colors = [h.get_color() for h in handles] markers = [h.get_marker() for h in handles] assert labels == ["a"] + p.hue_levels == ["a"] + p.style_levels assert colors == ["w"] + [p.palette[l] for l in p.hue_levels] assert markers == [""] + [p.markers[l] for l in p.style_levels] # -- ax.clear() p = rel._LinePlotter(x="x", y="y", hue="a", style="b", markers=True, legend="full", data=long_df) p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() colors = [h.get_color() for h in handles] markers = [h.get_marker() for h in handles] expected_colors = (["w"] + [p.palette[l] for l in p.hue_levels] + ["w"] + [".2" for _ in p.style_levels]) expected_markers = ([""] + ["None" for _ in p.hue_levels] + [""] + [p.markers[l] for l in p.style_levels]) assert labels == ["a"] + p.hue_levels + ["b"] + p.style_levels assert colors == expected_colors assert markers == expected_markers # -- ax.clear() p = rel._LinePlotter(x="x", y="y", hue="a", size="a", data=long_df, legend="full") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() colors = [h.get_color() for h in handles] widths = [h.get_linewidth() for h in handles] assert labels == ["a"] + p.hue_levels == ["a"] + p.size_levels assert colors == ["w"] + [p.palette[l] for l in p.hue_levels] assert widths == [0] + [p.sizes[l] for l in p.size_levels] # -- x, y = np.random.randn(2, 40) z = np.tile(np.arange(20), 2) p = rel._LinePlotter(x=x, y=y, hue=z) ax.clear() p.legend = "full" p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert labels == [str(l) for l in p.hue_levels] ax.clear() p.legend = "brief" p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert len(labels) == 4 p = rel._LinePlotter(x=x, y=y, size=z) ax.clear() p.legend = "full" p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert labels == [str(l) for l in p.size_levels] ax.clear() p.legend = "brief" p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert len(labels) == 4 ax.clear() p.legend = "bad_value" with pytest.raises(ValueError): p.add_legend_data(ax) ax.clear() p = rel._LinePlotter(x=x, y=y, hue=z, hue_norm=mpl.colors.LogNorm(), legend="brief") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert float(labels[2]) / float(labels[1]) == 10 ax.clear() p = rel._LinePlotter(x=x, y=y, size=z, size_norm=mpl.colors.LogNorm(), legend="brief") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert float(labels[2]) / float(labels[1]) == 10 ax.clear() p = rel._LinePlotter( x="x", y="y", hue="f", legend="brief", data=long_df) p.add_legend_data(ax) expected_levels = ['0.20', '0.24', '0.28', '0.32'] handles, labels = ax.get_legend_handles_labels() assert labels == ["f"] + expected_levels ax.clear() p = rel._LinePlotter( x="x", y="y", size="f", legend="brief", data=long_df) p.add_legend_data(ax) expected_levels = ['0.20', '0.24', '0.28', '0.32'] handles, labels = ax.get_legend_handles_labels() assert labels == ["f"] + expected_levels def test_plot(self, long_df, repeated_df): f, ax = plt.subplots() p = rel._LinePlotter(x="x", y="y", data=long_df, sort=False, estimator=None) p.plot(ax, {}) line, = ax.lines assert np.array_equal(line.get_xdata(), long_df.x.values) assert np.array_equal(line.get_ydata(), long_df.y.values) ax.clear() p.plot(ax, {"color": "k", "label": "test"}) line, = ax.lines assert line.get_color() == "k" assert line.get_label() == "test" p = rel._LinePlotter(x="x", y="y", data=long_df, sort=True, estimator=None) ax.clear() p.plot(ax, {}) line, = ax.lines sorted_data = long_df.sort_values(["x", "y"]) assert np.array_equal(line.get_xdata(), sorted_data.x.values) assert np.array_equal(line.get_ydata(), sorted_data.y.values) p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df) ax.clear() p.plot(ax, {}) assert len(ax.lines) == len(p.hue_levels) for line, level in zip(ax.lines, p.hue_levels): assert line.get_color() == p.palette[level] p = rel._LinePlotter(x="x", y="y", size="a", data=long_df) ax.clear() p.plot(ax, {}) assert len(ax.lines) == len(p.size_levels) for line, level in zip(ax.lines, p.size_levels): assert line.get_linewidth() == p.sizes[level] p = rel._LinePlotter(x="x", y="y", hue="a", style="a", markers=True, data=long_df) ax.clear() p.plot(ax, {}) assert len(ax.lines) == len(p.hue_levels) == len(p.style_levels) for line, level in zip(ax.lines, p.hue_levels): assert line.get_color() == p.palette[level] assert line.get_marker() == p.markers[level] p = rel._LinePlotter(x="x", y="y", hue="a", style="b", markers=True, data=long_df) ax.clear() p.plot(ax, {}) levels = product(p.hue_levels, p.style_levels) assert len(ax.lines) == (len(p.hue_levels) * len(p.style_levels)) for line, (hue, style) in zip(ax.lines, levels): assert line.get_color() == p.palette[hue] assert line.get_marker() == p.markers[style] p = rel._LinePlotter(x="x", y="y", data=long_df, estimator="mean", err_style="band", ci="sd", sort=True) ax.clear() p.plot(ax, {}) line, = ax.lines expected_data = long_df.groupby("x").y.mean() assert np.array_equal(line.get_xdata(), expected_data.index.values) assert np.allclose(line.get_ydata(), expected_data.values) assert len(ax.collections) == 1 p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df, estimator="mean", err_style="band", ci="sd") ax.clear() p.plot(ax, {}) assert len(ax.lines) == len(ax.collections) == len(p.hue_levels) for c in ax.collections: assert isinstance(c, mpl.collections.PolyCollection) p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df, estimator="mean", err_style="bars", ci="sd") ax.clear() p.plot(ax, {}) # assert len(ax.lines) / 2 == len(ax.collections) == len(p.hue_levels) # The lines are different on mpl 1.4 but I can't install to debug assert len(ax.collections) == len(p.hue_levels) for c in ax.collections: assert isinstance(c, mpl.collections.LineCollection) p = rel._LinePlotter(x="x", y="y", data=repeated_df, units="u", estimator=None) ax.clear() p.plot(ax, {}) n_units = len(repeated_df["u"].unique()) assert len(ax.lines) == n_units p = rel._LinePlotter(x="x", y="y", hue="a", data=repeated_df, units="u", estimator=None) ax.clear() p.plot(ax, {}) n_units *= len(repeated_df["a"].unique()) assert len(ax.lines) == n_units p.estimator = "mean" with pytest.raises(ValueError): p.plot(ax, {}) p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df, err_style="band", err_kws={"alpha": .5}) ax.clear() p.plot(ax, {}) for band in ax.collections: assert band.get_alpha() == .5 p = rel._LinePlotter(x="x", y="y", hue="a", data=long_df, err_style="bars", err_kws={"elinewidth": 2}) ax.clear() p.plot(ax, {}) for lines in ax.collections: assert lines.get_linestyles() == 2 p.err_style = "invalid" with pytest.raises(ValueError): p.plot(ax, {}) x_str = long_df["x"].astype(str) p = rel._LinePlotter(x="x", y="y", hue=x_str, data=long_df) ax.clear() p.plot(ax, {}) p = rel._LinePlotter(x="x", y="y", size=x_str, data=long_df) ax.clear() p.plot(ax, {}) def test_axis_labels(self, long_df): f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) p = rel._LinePlotter(x="x", y="y", data=long_df) p.plot(ax1, {}) assert ax1.get_xlabel() == "x" assert ax1.get_ylabel() == "y" p.plot(ax2, {}) assert ax2.get_xlabel() == "x" assert ax2.get_ylabel() == "y" assert not ax2.yaxis.label.get_visible() def test_lineplot_axes(self, wide_df): f1, ax1 = plt.subplots() f2, ax2 = plt.subplots() ax = rel.lineplot(data=wide_df) assert ax is ax2 ax = rel.lineplot(data=wide_df, ax=ax1) assert ax is ax1 def test_lineplot_smoke(self, flat_array, flat_series, wide_array, wide_list, wide_list_of_series, wide_df, long_df, missing_df): f, ax = plt.subplots() rel.lineplot([], []) ax.clear() rel.lineplot(data=flat_array) ax.clear() rel.lineplot(data=flat_series) ax.clear() rel.lineplot(data=wide_array) ax.clear() rel.lineplot(data=wide_list) ax.clear() rel.lineplot(data=wide_list_of_series) ax.clear() rel.lineplot(data=wide_df) ax.clear() rel.lineplot(x="x", y="y", data=long_df) ax.clear() rel.lineplot(x=long_df.x, y=long_df.y) ax.clear() rel.lineplot(x=long_df.x, y="y", data=long_df) ax.clear() rel.lineplot(x="x", y=long_df.y.values, data=long_df) ax.clear() rel.lineplot(x="x", y="y", hue="a", data=long_df) ax.clear() rel.lineplot(x="x", y="y", hue="a", style="a", data=long_df) ax.clear() rel.lineplot(x="x", y="y", hue="a", style="b", data=long_df) ax.clear() rel.lineplot(x="x", y="y", hue="a", style="a", data=missing_df) ax.clear() rel.lineplot(x="x", y="y", hue="a", style="b", data=missing_df) ax.clear() rel.lineplot(x="x", y="y", hue="a", size="a", data=long_df) ax.clear() rel.lineplot(x="x", y="y", hue="a", size="s", data=long_df) ax.clear() rel.lineplot(x="x", y="y", hue="a", size="a", data=missing_df) ax.clear() rel.lineplot(x="x", y="y", hue="a", size="s", data=missing_df) ax.clear() class TestScatterPlotter(TestRelationalPlotter): def test_legend_data(self, long_df): m = mpl.markers.MarkerStyle("o") default_mark = m.get_path().transformed(m.get_transform()) m = mpl.markers.MarkerStyle("") null_mark = m.get_path().transformed(m.get_transform()) f, ax = plt.subplots() p = rel._ScatterPlotter(x="x", y="y", data=long_df, legend="full") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert handles == [] # -- ax.clear() p = rel._ScatterPlotter(x="x", y="y", hue="a", data=long_df, legend="full") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() colors = [h.get_facecolors()[0] for h in handles] expected_colors = ["w"] + [p.palette[l] for l in p.hue_levels] assert labels == ["a"] + p.hue_levels assert self.colors_equal(colors, expected_colors) # -- ax.clear() p = rel._ScatterPlotter(x="x", y="y", hue="a", style="a", markers=True, legend="full", data=long_df) p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() colors = [h.get_facecolors()[0] for h in handles] expected_colors = ["w"] + [p.palette[l] for l in p.hue_levels] paths = [h.get_paths()[0] for h in handles] expected_paths = [null_mark] + [p.paths[l] for l in p.style_levels] assert labels == ["a"] + p.hue_levels == ["a"] + p.style_levels assert self.colors_equal(colors, expected_colors) assert self.paths_equal(paths, expected_paths) # -- ax.clear() p = rel._ScatterPlotter(x="x", y="y", hue="a", style="b", markers=True, legend="full", data=long_df) p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() colors = [h.get_facecolors()[0] for h in handles] paths = [h.get_paths()[0] for h in handles] expected_colors = (["w"] + [p.palette[l] for l in p.hue_levels] + ["w"] + [".2" for _ in p.style_levels]) expected_paths = ([null_mark] + [default_mark for _ in p.hue_levels] + [null_mark] + [p.paths[l] for l in p.style_levels]) assert labels == ["a"] + p.hue_levels + ["b"] + p.style_levels assert self.colors_equal(colors, expected_colors) assert self.paths_equal(paths, expected_paths) # -- ax.clear() p = rel._ScatterPlotter(x="x", y="y", hue="a", size="a", data=long_df, legend="full") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() colors = [h.get_facecolors()[0] for h in handles] expected_colors = ["w"] + [p.palette[l] for l in p.hue_levels] sizes = [h.get_sizes()[0] for h in handles] expected_sizes = [0] + [p.sizes[l] for l in p.size_levels] assert labels == ["a"] + p.hue_levels == ["a"] + p.size_levels assert self.colors_equal(colors, expected_colors) assert sizes == expected_sizes # -- ax.clear() sizes_list = [10, 100, 200] p = rel._ScatterPlotter(x="x", y="y", size="s", sizes=sizes_list, data=long_df, legend="full") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() sizes = [h.get_sizes()[0] for h in handles] expected_sizes = [0] + [p.sizes[l] for l in p.size_levels] assert labels == ["s"] + [str(l) for l in p.size_levels] assert sizes == expected_sizes # -- ax.clear() sizes_dict = {2: 10, 4: 100, 8: 200} p = rel._ScatterPlotter(x="x", y="y", size="s", sizes=sizes_dict, data=long_df, legend="full") p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() sizes = [h.get_sizes()[0] for h in handles] expected_sizes = [0] + [p.sizes[l] for l in p.size_levels] assert labels == ["s"] + [str(l) for l in p.size_levels] assert sizes == expected_sizes # -- x, y = np.random.randn(2, 40) z = np.tile(np.arange(20), 2) p = rel._ScatterPlotter(x=x, y=y, hue=z) ax.clear() p.legend = "full" p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert labels == [str(l) for l in p.hue_levels] ax.clear() p.legend = "brief" p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert len(labels) == 4 p = rel._ScatterPlotter(x=x, y=y, size=z) ax.clear() p.legend = "full" p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert labels == [str(l) for l in p.size_levels] ax.clear() p.legend = "brief" p.add_legend_data(ax) handles, labels = ax.get_legend_handles_labels() assert len(labels) == 4 ax.clear() p.legend = "bad_value" with pytest.raises(ValueError): p.add_legend_data(ax) def test_plot(self, long_df, repeated_df): f, ax = plt.subplots() p = rel._ScatterPlotter(x="x", y="y", data=long_df) p.plot(ax, {}) points = ax.collections[0] assert np.array_equal(points.get_offsets(), long_df[["x", "y"]].values) ax.clear() p.plot(ax, {"color": "k", "label": "test"}) points = ax.collections[0] assert self.colors_equal(points.get_facecolor(), "k") assert points.get_label() == "test" p = rel._ScatterPlotter(x="x", y="y", hue="a", data=long_df) ax.clear() p.plot(ax, {}) points = ax.collections[0] expected_colors = [p.palette[k] for k in p.plot_data["hue"]] assert self.colors_equal(points.get_facecolors(), expected_colors) p = rel._ScatterPlotter(x="x", y="y", style="c", markers=["+", "x"], data=long_df) ax.clear() color = (1, .3, .8) p.plot(ax, {"color": color}) points = ax.collections[0] assert self.colors_equal(points.get_edgecolors(), [color]) p = rel._ScatterPlotter(x="x", y="y", size="a", data=long_df) ax.clear() p.plot(ax, {}) points = ax.collections[0] expected_sizes = [p.size_lookup(k) for k in p.plot_data["size"]] assert np.array_equal(points.get_sizes(), expected_sizes) p = rel._ScatterPlotter(x="x", y="y", hue="a", style="a", markers=True, data=long_df) ax.clear() p.plot(ax, {}) expected_colors = [p.palette[k] for k in p.plot_data["hue"]] expected_paths = [p.paths[k] for k in p.plot_data["style"]] assert self.colors_equal(points.get_facecolors(), expected_colors) assert self.paths_equal(points.get_paths(), expected_paths) p = rel._ScatterPlotter(x="x", y="y", hue="a", style="b", markers=True, data=long_df) ax.clear() p.plot(ax, {}) expected_colors = [p.palette[k] for k in p.plot_data["hue"]] expected_paths = [p.paths[k] for k in p.plot_data["style"]] assert self.colors_equal(points.get_facecolors(), expected_colors) assert self.paths_equal(points.get_paths(), expected_paths) x_str = long_df["x"].astype(str) p = rel._ScatterPlotter(x="x", y="y", hue=x_str, data=long_df) ax.clear() p.plot(ax, {}) p = rel._ScatterPlotter(x="x", y="y", size=x_str, data=long_df) ax.clear() p.plot(ax, {}) def test_axis_labels(self, long_df): f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) p = rel._ScatterPlotter(x="x", y="y", data=long_df) p.plot(ax1, {}) assert ax1.get_xlabel() == "x" assert ax1.get_ylabel() == "y" p.plot(ax2, {}) assert ax2.get_xlabel() == "x" assert ax2.get_ylabel() == "y" assert not ax2.yaxis.label.get_visible() def test_scatterplot_axes(self, wide_df): f1, ax1 = plt.subplots() f2, ax2 = plt.subplots() ax = rel.scatterplot(data=wide_df) assert ax is ax2 ax = rel.scatterplot(data=wide_df, ax=ax1) assert ax is ax1 def test_scatterplot_smoke(self, flat_array, flat_series, wide_array, wide_list, wide_list_of_series, wide_df, long_df, missing_df): f, ax = plt.subplots() rel.scatterplot([], []) ax.clear() rel.scatterplot(data=flat_array) ax.clear() rel.scatterplot(data=flat_series) ax.clear() rel.scatterplot(data=wide_array) ax.clear() rel.scatterplot(data=wide_list) ax.clear() rel.scatterplot(data=wide_list_of_series) ax.clear() rel.scatterplot(data=wide_df) ax.clear() rel.scatterplot(x="x", y="y", data=long_df) ax.clear() rel.scatterplot(x=long_df.x, y=long_df.y) ax.clear() rel.scatterplot(x=long_df.x, y="y", data=long_df) ax.clear() rel.scatterplot(x="x", y=long_df.y.values, data=long_df) ax.clear() rel.scatterplot(x="x", y="y", hue="a", data=long_df) ax.clear() rel.scatterplot(x="x", y="y", hue="a", style="a", data=long_df) ax.clear() rel.scatterplot(x="x", y="y", hue="a", style="b", data=long_df) ax.clear() rel.scatterplot(x="x", y="y", hue="a", style="a", data=missing_df) ax.clear() rel.scatterplot(x="x", y="y", hue="a", style="b", data=missing_df) ax.clear() rel.scatterplot(x="x", y="y", hue="a", size="a", data=long_df) ax.clear() rel.scatterplot(x="x", y="y", hue="a", size="s", data=long_df) ax.clear() rel.scatterplot(x="x", y="y", hue="a", size="a", data=missing_df) ax.clear() rel.scatterplot(x="x", y="y", hue="a", size="s", data=missing_df) ax.clear() class TestRelPlotter(TestRelationalPlotter): def test_relplot_simple(self, long_df): g = rel.relplot(x="x", y="y", kind="scatter", data=long_df) x, y = g.ax.collections[0].get_offsets().T assert np.array_equal(x, long_df["x"]) assert np.array_equal(y, long_df["y"]) g = rel.relplot(x="x", y="y", kind="line", data=long_df) x, y = g.ax.lines[0].get_xydata().T expected = long_df.groupby("x").y.mean() assert np.array_equal(x, expected.index) assert y == pytest.approx(expected.values) with pytest.raises(ValueError): g = rel.relplot(x="x", y="y", kind="not_a_kind", data=long_df) def test_relplot_complex(self, long_df): for sem in ["hue", "size", "style"]: g = rel.relplot(x="x", y="y", data=long_df, **{sem: "a"}) x, y = g.ax.collections[0].get_offsets().T assert np.array_equal(x, long_df["x"]) assert np.array_equal(y, long_df["y"]) for sem in ["hue", "size", "style"]: g = rel.relplot(x="x", y="y", col="c", data=long_df, **{sem: "a"}) grouped = long_df.groupby("c") for (_, grp_df), ax in zip(grouped, g.axes.flat): x, y = ax.collections[0].get_offsets().T assert np.array_equal(x, grp_df["x"]) assert np.array_equal(y, grp_df["y"]) for sem in ["size", "style"]: g = rel.relplot(x="x", y="y", hue="b", col="c", data=long_df, **{sem: "a"}) grouped = long_df.groupby("c") for (_, grp_df), ax in zip(grouped, g.axes.flat): x, y = ax.collections[0].get_offsets().T assert np.array_equal(x, grp_df["x"]) assert np.array_equal(y, grp_df["y"]) for sem in ["hue", "size", "style"]: g = rel.relplot(x="x", y="y", col="b", row="c", data=long_df.sort_values(["c", "b"]), **{sem: "a"}) grouped = long_df.groupby(["c", "b"]) for (_, grp_df), ax in zip(grouped, g.axes.flat): x, y = ax.collections[0].get_offsets().T assert np.array_equal(x, grp_df["x"]) assert np.array_equal(y, grp_df["y"]) def test_relplot_hues(self, long_df): palette = ["r", "b", "g"] g = rel.relplot(x="x", y="y", hue="a", style="b", col="c", palette=palette, data=long_df) palette = dict(zip(long_df["a"].unique(), palette)) grouped = long_df.groupby("c") for (_, grp_df), ax in zip(grouped, g.axes.flat): points = ax.collections[0] expected_hues = [palette[val] for val in grp_df["a"]] assert self.colors_equal(points.get_facecolors(), expected_hues) def test_relplot_sizes(self, long_df): sizes = [5, 12, 7] g = rel.relplot(x="x", y="y", size="a", hue="b", col="c", sizes=sizes, data=long_df) sizes = dict(zip(long_df["a"].unique(), sizes)) grouped = long_df.groupby("c") for (_, grp_df), ax in zip(grouped, g.axes.flat): points = ax.collections[0] expected_sizes = [sizes[val] for val in grp_df["a"]] assert np.array_equal(points.get_sizes(), expected_sizes) def test_relplot_styles(self, long_df): markers = ["o", "d", "s"] g = rel.relplot(x="x", y="y", style="a", hue="b", col="c", markers=markers, data=long_df) paths = [] for m in markers: m = mpl.markers.MarkerStyle(m) paths.append(m.get_path().transformed(m.get_transform())) paths = dict(zip(long_df["a"].unique(), paths)) grouped = long_df.groupby("c") for (_, grp_df), ax in zip(grouped, g.axes.flat): points = ax.collections[0] expected_paths = [paths[val] for val in grp_df["a"]] assert self.paths_equal(points.get_paths(), expected_paths) def test_relplot_stringy_numerics(self, long_df): long_df["x_str"] = long_df["x"].astype(str) g = rel.relplot(x="x", y="y", hue="x_str", data=long_df) points = g.ax.collections[0] xys = points.get_offsets() mask = np.ma.getmask(xys) assert not mask.any() assert np.array_equal(xys, long_df[["x", "y"]]) g = rel.relplot(x="x", y="y", size="x_str", data=long_df) points = g.ax.collections[0] xys = points.get_offsets() mask = np.ma.getmask(xys) assert not mask.any() assert np.array_equal(xys, long_df[["x", "y"]]) def test_relplot_legend(self, long_df): g = rel.relplot(x="x", y="y", data=long_df) assert g._legend is None g = rel.relplot(x="x", y="y", hue="a", data=long_df) texts = [t.get_text() for t in g._legend.texts] expected_texts = np.append(["a"], long_df["a"].unique()) assert np.array_equal(texts, expected_texts) g = rel.relplot(x="x", y="y", hue="s", size="s", data=long_df) texts = [t.get_text() for t in g._legend.texts] assert np.array_equal(texts[1:], np.sort(texts[1:])) g = rel.relplot(x="x", y="y", hue="a", legend=False, data=long_df) assert g._legend is None palette = color_palette("deep", len(long_df["b"].unique())) a_like_b = dict(zip(long_df["a"].unique(), long_df["b"].unique())) long_df["a_like_b"] = long_df["a"].map(a_like_b) g = rel.relplot(x="x", y="y", hue="b", style="a_like_b", palette=palette, kind="line", estimator=None, data=long_df) lines = g._legend.get_lines()[1:] # Chop off title dummy for line, color in zip(lines, palette): assert line.get_color() == color def test_ax_kwarg_removal(self, long_df): f, ax = plt.subplots() with pytest.warns(UserWarning): g = rel.relplot("x", "y", data=long_df, ax=ax) assert len(ax.collections) == 0 assert len(g.ax.collections) > 0