12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906 |
- import numpy as np
- import pandas as pd
- from scipy import stats, spatial
- import matplotlib as mpl
- import matplotlib.pyplot as plt
- from matplotlib.colors import rgb2hex
- from distutils.version import LooseVersion
- import pytest
- import nose.tools as nt
- import numpy.testing as npt
- from .. import categorical as cat
- from .. import palettes
- class CategoricalFixture(object):
- """Test boxplot (also base class for things like violinplots)."""
- rs = np.random.RandomState(30)
- n_total = 60
- x = rs.randn(int(n_total / 3), 3)
- x_df = pd.DataFrame(x, columns=pd.Series(list("XYZ"), name="big"))
- y = pd.Series(rs.randn(n_total), name="y_data")
- y_perm = y.reindex(rs.choice(y.index, y.size, replace=False))
- g = pd.Series(np.repeat(list("abc"), int(n_total / 3)), name="small")
- h = pd.Series(np.tile(list("mn"), int(n_total / 2)), name="medium")
- u = pd.Series(np.tile(list("jkh"), int(n_total / 3)))
- df = pd.DataFrame(dict(y=y, g=g, h=h, u=u))
- x_df["W"] = g
- class TestCategoricalPlotter(CategoricalFixture):
- def test_wide_df_data(self):
- p = cat._CategoricalPlotter()
- # Test basic wide DataFrame
- p.establish_variables(data=self.x_df)
- # Check data attribute
- for x, y, in zip(p.plot_data, self.x_df[["X", "Y", "Z"]].values.T):
- npt.assert_array_equal(x, y)
- # Check semantic attributes
- nt.assert_equal(p.orient, "v")
- nt.assert_is(p.plot_hues, None)
- nt.assert_is(p.group_label, "big")
- nt.assert_is(p.value_label, None)
- # Test wide dataframe with forced horizontal orientation
- p.establish_variables(data=self.x_df, orient="horiz")
- nt.assert_equal(p.orient, "h")
- # Text exception by trying to hue-group with a wide dataframe
- with nt.assert_raises(ValueError):
- p.establish_variables(hue="d", data=self.x_df)
- def test_1d_input_data(self):
- p = cat._CategoricalPlotter()
- # Test basic vector data
- x_1d_array = self.x.ravel()
- p.establish_variables(data=x_1d_array)
- nt.assert_equal(len(p.plot_data), 1)
- nt.assert_equal(len(p.plot_data[0]), self.n_total)
- nt.assert_is(p.group_label, None)
- nt.assert_is(p.value_label, None)
- # Test basic vector data in list form
- x_1d_list = x_1d_array.tolist()
- p.establish_variables(data=x_1d_list)
- nt.assert_equal(len(p.plot_data), 1)
- nt.assert_equal(len(p.plot_data[0]), self.n_total)
- nt.assert_is(p.group_label, None)
- nt.assert_is(p.value_label, None)
- # Test an object array that looks 1D but isn't
- x_notreally_1d = np.array([self.x.ravel(),
- self.x.ravel()[:int(self.n_total / 2)]])
- p.establish_variables(data=x_notreally_1d)
- nt.assert_equal(len(p.plot_data), 2)
- nt.assert_equal(len(p.plot_data[0]), self.n_total)
- nt.assert_equal(len(p.plot_data[1]), self.n_total / 2)
- nt.assert_is(p.group_label, None)
- nt.assert_is(p.value_label, None)
- def test_2d_input_data(self):
- p = cat._CategoricalPlotter()
- x = self.x[:, 0]
- # Test vector data that looks 2D but doesn't really have columns
- p.establish_variables(data=x[:, np.newaxis])
- nt.assert_equal(len(p.plot_data), 1)
- nt.assert_equal(len(p.plot_data[0]), self.x.shape[0])
- nt.assert_is(p.group_label, None)
- nt.assert_is(p.value_label, None)
- # Test vector data that looks 2D but doesn't really have rows
- p.establish_variables(data=x[np.newaxis, :])
- nt.assert_equal(len(p.plot_data), 1)
- nt.assert_equal(len(p.plot_data[0]), self.x.shape[0])
- nt.assert_is(p.group_label, None)
- nt.assert_is(p.value_label, None)
- def test_3d_input_data(self):
- p = cat._CategoricalPlotter()
- # Test that passing actually 3D data raises
- x = np.zeros((5, 5, 5))
- with nt.assert_raises(ValueError):
- p.establish_variables(data=x)
- def test_list_of_array_input_data(self):
- p = cat._CategoricalPlotter()
- # Test 2D input in list form
- x_list = self.x.T.tolist()
- p.establish_variables(data=x_list)
- nt.assert_equal(len(p.plot_data), 3)
- lengths = [len(v_i) for v_i in p.plot_data]
- nt.assert_equal(lengths, [self.n_total / 3] * 3)
- nt.assert_is(p.group_label, None)
- nt.assert_is(p.value_label, None)
- def test_wide_array_input_data(self):
- p = cat._CategoricalPlotter()
- # Test 2D input in array form
- p.establish_variables(data=self.x)
- nt.assert_equal(np.shape(p.plot_data), (3, self.n_total / 3))
- npt.assert_array_equal(p.plot_data, self.x.T)
- nt.assert_is(p.group_label, None)
- nt.assert_is(p.value_label, None)
- def test_single_long_direct_inputs(self):
- p = cat._CategoricalPlotter()
- # Test passing a series to the x variable
- p.establish_variables(x=self.y)
- npt.assert_equal(p.plot_data, [self.y])
- nt.assert_equal(p.orient, "h")
- nt.assert_equal(p.value_label, "y_data")
- nt.assert_is(p.group_label, None)
- # Test passing a series to the y variable
- p.establish_variables(y=self.y)
- npt.assert_equal(p.plot_data, [self.y])
- nt.assert_equal(p.orient, "v")
- nt.assert_equal(p.value_label, "y_data")
- nt.assert_is(p.group_label, None)
- # Test passing an array to the y variable
- p.establish_variables(y=self.y.values)
- npt.assert_equal(p.plot_data, [self.y])
- nt.assert_equal(p.orient, "v")
- nt.assert_is(p.value_label, None)
- nt.assert_is(p.group_label, None)
- # Test array and series with non-default index
- x = pd.Series([1, 1, 1, 1], index=[0, 2, 4, 6])
- y = np.array([1, 2, 3, 4])
- p.establish_variables(x, y)
- assert len(p.plot_data[0]) == 4
- def test_single_long_indirect_inputs(self):
- p = cat._CategoricalPlotter()
- # Test referencing a DataFrame series in the x variable
- p.establish_variables(x="y", data=self.df)
- npt.assert_equal(p.plot_data, [self.y])
- nt.assert_equal(p.orient, "h")
- nt.assert_equal(p.value_label, "y")
- nt.assert_is(p.group_label, None)
- # Test referencing a DataFrame series in the y variable
- p.establish_variables(y="y", data=self.df)
- npt.assert_equal(p.plot_data, [self.y])
- nt.assert_equal(p.orient, "v")
- nt.assert_equal(p.value_label, "y")
- nt.assert_is(p.group_label, None)
- def test_longform_groupby(self):
- p = cat._CategoricalPlotter()
- # Test a vertically oriented grouped and nested plot
- p.establish_variables("g", "y", "h", data=self.df)
- nt.assert_equal(len(p.plot_data), 3)
- nt.assert_equal(len(p.plot_hues), 3)
- nt.assert_equal(p.orient, "v")
- nt.assert_equal(p.value_label, "y")
- nt.assert_equal(p.group_label, "g")
- nt.assert_equal(p.hue_title, "h")
- for group, vals in zip(["a", "b", "c"], p.plot_data):
- npt.assert_array_equal(vals, self.y[self.g == group])
- for group, hues in zip(["a", "b", "c"], p.plot_hues):
- npt.assert_array_equal(hues, self.h[self.g == group])
- # Test a grouped and nested plot with direct array value data
- p.establish_variables("g", self.y.values, "h", self.df)
- nt.assert_is(p.value_label, None)
- nt.assert_equal(p.group_label, "g")
- for group, vals in zip(["a", "b", "c"], p.plot_data):
- npt.assert_array_equal(vals, self.y[self.g == group])
- # Test a grouped and nested plot with direct array hue data
- p.establish_variables("g", "y", self.h.values, self.df)
- for group, hues in zip(["a", "b", "c"], p.plot_hues):
- npt.assert_array_equal(hues, self.h[self.g == group])
- # Test categorical grouping data
- df = self.df.copy()
- df.g = df.g.astype("category")
- # Test that horizontal orientation is automatically detected
- p.establish_variables("y", "g", "h", data=df)
- nt.assert_equal(len(p.plot_data), 3)
- nt.assert_equal(len(p.plot_hues), 3)
- nt.assert_equal(p.orient, "h")
- nt.assert_equal(p.value_label, "y")
- nt.assert_equal(p.group_label, "g")
- nt.assert_equal(p.hue_title, "h")
- for group, vals in zip(["a", "b", "c"], p.plot_data):
- npt.assert_array_equal(vals, self.y[self.g == group])
- for group, hues in zip(["a", "b", "c"], p.plot_hues):
- npt.assert_array_equal(hues, self.h[self.g == group])
- # Test grouped data that matches on index
- p1 = cat._CategoricalPlotter()
- p1.establish_variables(self.g, self.y, self.h)
- p2 = cat._CategoricalPlotter()
- p2.establish_variables(self.g, self.y[::-1], self.h)
- for i, (d1, d2) in enumerate(zip(p1.plot_data, p2.plot_data)):
- assert np.array_equal(d1.sort_index(), d2.sort_index())
- def test_input_validation(self):
- p = cat._CategoricalPlotter()
- kws = dict(x="g", y="y", hue="h", units="u", data=self.df)
- for var in ["x", "y", "hue", "units"]:
- input_kws = kws.copy()
- input_kws[var] = "bad_input"
- with nt.assert_raises(ValueError):
- p.establish_variables(**input_kws)
- def test_order(self):
- p = cat._CategoricalPlotter()
- # Test inferred order from a wide dataframe input
- p.establish_variables(data=self.x_df)
- nt.assert_equal(p.group_names, ["X", "Y", "Z"])
- # Test specified order with a wide dataframe input
- p.establish_variables(data=self.x_df, order=["Y", "Z", "X"])
- nt.assert_equal(p.group_names, ["Y", "Z", "X"])
- for group, vals in zip(["Y", "Z", "X"], p.plot_data):
- npt.assert_array_equal(vals, self.x_df[group])
- with nt.assert_raises(ValueError):
- p.establish_variables(data=self.x, order=[1, 2, 0])
- # Test inferred order from a grouped longform input
- p.establish_variables("g", "y", data=self.df)
- nt.assert_equal(p.group_names, ["a", "b", "c"])
- # Test specified order from a grouped longform input
- p.establish_variables("g", "y", data=self.df, order=["b", "a", "c"])
- nt.assert_equal(p.group_names, ["b", "a", "c"])
- for group, vals in zip(["b", "a", "c"], p.plot_data):
- npt.assert_array_equal(vals, self.y[self.g == group])
- # Test inferred order from a grouped input with categorical groups
- df = self.df.copy()
- df.g = df.g.astype("category")
- df.g = df.g.cat.reorder_categories(["c", "b", "a"])
- p.establish_variables("g", "y", data=df)
- nt.assert_equal(p.group_names, ["c", "b", "a"])
- for group, vals in zip(["c", "b", "a"], p.plot_data):
- npt.assert_array_equal(vals, self.y[self.g == group])
- df.g = (df.g.cat.add_categories("d")
- .cat.reorder_categories(["c", "b", "d", "a"]))
- p.establish_variables("g", "y", data=df)
- nt.assert_equal(p.group_names, ["c", "b", "d", "a"])
- def test_hue_order(self):
- p = cat._CategoricalPlotter()
- # Test inferred hue order
- p.establish_variables("g", "y", "h", data=self.df)
- nt.assert_equal(p.hue_names, ["m", "n"])
- # Test specified hue order
- p.establish_variables("g", "y", "h", data=self.df,
- hue_order=["n", "m"])
- nt.assert_equal(p.hue_names, ["n", "m"])
- # Test inferred hue order from a categorical hue input
- df = self.df.copy()
- df.h = df.h.astype("category")
- df.h = df.h.cat.reorder_categories(["n", "m"])
- p.establish_variables("g", "y", "h", data=df)
- nt.assert_equal(p.hue_names, ["n", "m"])
- df.h = (df.h.cat.add_categories("o")
- .cat.reorder_categories(["o", "m", "n"]))
- p.establish_variables("g", "y", "h", data=df)
- nt.assert_equal(p.hue_names, ["o", "m", "n"])
- def test_plot_units(self):
- p = cat._CategoricalPlotter()
- p.establish_variables("g", "y", "h", data=self.df)
- nt.assert_is(p.plot_units, None)
- p.establish_variables("g", "y", "h", data=self.df, units="u")
- for group, units in zip(["a", "b", "c"], p.plot_units):
- npt.assert_array_equal(units, self.u[self.g == group])
- def test_infer_orient(self):
- p = cat._CategoricalPlotter()
- cats = pd.Series(["a", "b", "c"] * 10)
- nums = pd.Series(self.rs.randn(30))
- nt.assert_equal(p.infer_orient(cats, nums), "v")
- nt.assert_equal(p.infer_orient(nums, cats), "h")
- nt.assert_equal(p.infer_orient(nums, None), "h")
- nt.assert_equal(p.infer_orient(None, nums), "v")
- nt.assert_equal(p.infer_orient(nums, nums, "vert"), "v")
- nt.assert_equal(p.infer_orient(nums, nums, "hori"), "h")
- with nt.assert_raises(ValueError):
- p.infer_orient(cats, cats)
- cats = pd.Series([0, 1, 2] * 10, dtype="category")
- nt.assert_equal(p.infer_orient(cats, nums), "v")
- nt.assert_equal(p.infer_orient(nums, cats), "h")
- with nt.assert_raises(ValueError):
- p.infer_orient(cats, cats)
- def test_default_palettes(self):
- p = cat._CategoricalPlotter()
- # Test palette mapping the x position
- p.establish_variables("g", "y", data=self.df)
- p.establish_colors(None, None, 1)
- nt.assert_equal(p.colors, palettes.color_palette(n_colors=3))
- # Test palette mapping the hue position
- p.establish_variables("g", "y", "h", data=self.df)
- p.establish_colors(None, None, 1)
- nt.assert_equal(p.colors, palettes.color_palette(n_colors=2))
- def test_default_palette_with_many_levels(self):
- with palettes.color_palette(["blue", "red"], 2):
- p = cat._CategoricalPlotter()
- p.establish_variables("g", "y", data=self.df)
- p.establish_colors(None, None, 1)
- npt.assert_array_equal(p.colors,
- palettes.husl_palette(3, l=.7)) # noqa
- def test_specific_color(self):
- p = cat._CategoricalPlotter()
- # Test the same color for each x position
- p.establish_variables("g", "y", data=self.df)
- p.establish_colors("blue", None, 1)
- blue_rgb = mpl.colors.colorConverter.to_rgb("blue")
- nt.assert_equal(p.colors, [blue_rgb] * 3)
- # Test a color-based blend for the hue mapping
- p.establish_variables("g", "y", "h", data=self.df)
- p.establish_colors("#ff0022", None, 1)
- rgba_array = np.array(palettes.light_palette("#ff0022", 2))
- npt.assert_array_almost_equal(p.colors,
- rgba_array[:, :3])
- def test_specific_palette(self):
- p = cat._CategoricalPlotter()
- # Test palette mapping the x position
- p.establish_variables("g", "y", data=self.df)
- p.establish_colors(None, "dark", 1)
- nt.assert_equal(p.colors, palettes.color_palette("dark", 3))
- # Test that non-None `color` and `hue` raises an error
- p.establish_variables("g", "y", "h", data=self.df)
- p.establish_colors(None, "muted", 1)
- nt.assert_equal(p.colors, palettes.color_palette("muted", 2))
- # Test that specified palette overrides specified color
- p = cat._CategoricalPlotter()
- p.establish_variables("g", "y", data=self.df)
- p.establish_colors("blue", "deep", 1)
- nt.assert_equal(p.colors, palettes.color_palette("deep", 3))
- def test_dict_as_palette(self):
- p = cat._CategoricalPlotter()
- p.establish_variables("g", "y", "h", data=self.df)
- pal = {"m": (0, 0, 1), "n": (1, 0, 0)}
- p.establish_colors(None, pal, 1)
- nt.assert_equal(p.colors, [(0, 0, 1), (1, 0, 0)])
- def test_palette_desaturation(self):
- p = cat._CategoricalPlotter()
- p.establish_variables("g", "y", data=self.df)
- p.establish_colors((0, 0, 1), None, .5)
- nt.assert_equal(p.colors, [(.25, .25, .75)] * 3)
- p.establish_colors(None, [(0, 0, 1), (1, 0, 0), "w"], .5)
- nt.assert_equal(p.colors, [(.25, .25, .75),
- (.75, .25, .25),
- (1, 1, 1)])
- class TestCategoricalStatPlotter(CategoricalFixture):
- def test_no_bootstrappig(self):
- p = cat._CategoricalStatPlotter()
- p.establish_variables("g", "y", data=self.df)
- p.estimate_statistic(np.mean, None, 100, None)
- npt.assert_array_equal(p.confint, np.array([]))
- p.establish_variables("g", "y", "h", data=self.df)
- p.estimate_statistic(np.mean, None, 100, None)
- npt.assert_array_equal(p.confint, np.array([[], [], []]))
- def test_single_layer_stats(self):
- p = cat._CategoricalStatPlotter()
- g = pd.Series(np.repeat(list("abc"), 100))
- y = pd.Series(np.random.RandomState(0).randn(300))
- p.establish_variables(g, y)
- p.estimate_statistic(np.mean, 95, 10000, None)
- nt.assert_equal(p.statistic.shape, (3,))
- nt.assert_equal(p.confint.shape, (3, 2))
- npt.assert_array_almost_equal(p.statistic,
- y.groupby(g).mean())
- for ci, (_, grp_y) in zip(p.confint, y.groupby(g)):
- sem = stats.sem(grp_y)
- mean = grp_y.mean()
- stats.norm.ppf(.975)
- half_ci = stats.norm.ppf(.975) * sem
- ci_want = mean - half_ci, mean + half_ci
- npt.assert_array_almost_equal(ci_want, ci, 2)
- def test_single_layer_stats_with_units(self):
- p = cat._CategoricalStatPlotter()
- g = pd.Series(np.repeat(list("abc"), 90))
- y = pd.Series(np.random.RandomState(0).randn(270))
- u = pd.Series(np.repeat(np.tile(list("xyz"), 30), 3))
- y[u == "x"] -= 3
- y[u == "y"] += 3
- p.establish_variables(g, y)
- p.estimate_statistic(np.mean, 95, 10000, None)
- stat1, ci1 = p.statistic, p.confint
- p.establish_variables(g, y, units=u)
- p.estimate_statistic(np.mean, 95, 10000, None)
- stat2, ci2 = p.statistic, p.confint
- npt.assert_array_equal(stat1, stat2)
- ci1_size = ci1[:, 1] - ci1[:, 0]
- ci2_size = ci2[:, 1] - ci2[:, 0]
- npt.assert_array_less(ci1_size, ci2_size)
- def test_single_layer_stats_with_missing_data(self):
- p = cat._CategoricalStatPlotter()
- g = pd.Series(np.repeat(list("abc"), 100))
- y = pd.Series(np.random.RandomState(0).randn(300))
- p.establish_variables(g, y, order=list("abdc"))
- p.estimate_statistic(np.mean, 95, 10000, None)
- nt.assert_equal(p.statistic.shape, (4,))
- nt.assert_equal(p.confint.shape, (4, 2))
- mean = y[g == "b"].mean()
- sem = stats.sem(y[g == "b"])
- half_ci = stats.norm.ppf(.975) * sem
- ci = mean - half_ci, mean + half_ci
- npt.assert_almost_equal(p.statistic[1], mean)
- npt.assert_array_almost_equal(p.confint[1], ci, 2)
- npt.assert_equal(p.statistic[2], np.nan)
- npt.assert_array_equal(p.confint[2], (np.nan, np.nan))
- def test_nested_stats(self):
- p = cat._CategoricalStatPlotter()
- g = pd.Series(np.repeat(list("abc"), 100))
- h = pd.Series(np.tile(list("xy"), 150))
- y = pd.Series(np.random.RandomState(0).randn(300))
- p.establish_variables(g, y, h)
- p.estimate_statistic(np.mean, 95, 50000, None)
- nt.assert_equal(p.statistic.shape, (3, 2))
- nt.assert_equal(p.confint.shape, (3, 2, 2))
- npt.assert_array_almost_equal(p.statistic,
- y.groupby([g, h]).mean().unstack())
- for ci_g, (_, grp_y) in zip(p.confint, y.groupby(g)):
- for ci, hue_y in zip(ci_g, [grp_y[::2], grp_y[1::2]]):
- sem = stats.sem(hue_y)
- mean = hue_y.mean()
- half_ci = stats.norm.ppf(.975) * sem
- ci_want = mean - half_ci, mean + half_ci
- npt.assert_array_almost_equal(ci_want, ci, 2)
- def test_bootstrap_seed(self):
- p = cat._CategoricalStatPlotter()
- g = pd.Series(np.repeat(list("abc"), 100))
- h = pd.Series(np.tile(list("xy"), 150))
- y = pd.Series(np.random.RandomState(0).randn(300))
- p.establish_variables(g, y, h)
- p.estimate_statistic(np.mean, 95, 1000, 0)
- confint_1 = p.confint
- p.estimate_statistic(np.mean, 95, 1000, 0)
- confint_2 = p.confint
- npt.assert_array_equal(confint_1, confint_2)
- def test_nested_stats_with_units(self):
- p = cat._CategoricalStatPlotter()
- g = pd.Series(np.repeat(list("abc"), 90))
- h = pd.Series(np.tile(list("xy"), 135))
- u = pd.Series(np.repeat(list("ijkijk"), 45))
- y = pd.Series(np.random.RandomState(0).randn(270))
- y[u == "i"] -= 3
- y[u == "k"] += 3
- p.establish_variables(g, y, h)
- p.estimate_statistic(np.mean, 95, 10000, None)
- stat1, ci1 = p.statistic, p.confint
- p.establish_variables(g, y, h, units=u)
- p.estimate_statistic(np.mean, 95, 10000, None)
- stat2, ci2 = p.statistic, p.confint
- npt.assert_array_equal(stat1, stat2)
- ci1_size = ci1[:, 0, 1] - ci1[:, 0, 0]
- ci2_size = ci2[:, 0, 1] - ci2[:, 0, 0]
- npt.assert_array_less(ci1_size, ci2_size)
- def test_nested_stats_with_missing_data(self):
- p = cat._CategoricalStatPlotter()
- g = pd.Series(np.repeat(list("abc"), 100))
- y = pd.Series(np.random.RandomState(0).randn(300))
- h = pd.Series(np.tile(list("xy"), 150))
- p.establish_variables(g, y, h,
- order=list("abdc"),
- hue_order=list("zyx"))
- p.estimate_statistic(np.mean, 95, 50000, None)
- nt.assert_equal(p.statistic.shape, (4, 3))
- nt.assert_equal(p.confint.shape, (4, 3, 2))
- mean = y[(g == "b") & (h == "x")].mean()
- sem = stats.sem(y[(g == "b") & (h == "x")])
- half_ci = stats.norm.ppf(.975) * sem
- ci = mean - half_ci, mean + half_ci
- npt.assert_almost_equal(p.statistic[1, 2], mean)
- npt.assert_array_almost_equal(p.confint[1, 2], ci, 2)
- npt.assert_array_equal(p.statistic[:, 0], [np.nan] * 4)
- npt.assert_array_equal(p.statistic[2], [np.nan] * 3)
- npt.assert_array_equal(p.confint[:, 0],
- np.zeros((4, 2)) * np.nan)
- npt.assert_array_equal(p.confint[2],
- np.zeros((3, 2)) * np.nan)
- def test_sd_error_bars(self):
- p = cat._CategoricalStatPlotter()
- g = pd.Series(np.repeat(list("abc"), 100))
- y = pd.Series(np.random.RandomState(0).randn(300))
- p.establish_variables(g, y)
- p.estimate_statistic(np.mean, "sd", None, None)
- nt.assert_equal(p.statistic.shape, (3,))
- nt.assert_equal(p.confint.shape, (3, 2))
- npt.assert_array_almost_equal(p.statistic,
- y.groupby(g).mean())
- for ci, (_, grp_y) in zip(p.confint, y.groupby(g)):
- mean = grp_y.mean()
- half_ci = np.std(grp_y)
- ci_want = mean - half_ci, mean + half_ci
- npt.assert_array_almost_equal(ci_want, ci, 2)
- def test_nested_sd_error_bars(self):
- p = cat._CategoricalStatPlotter()
- g = pd.Series(np.repeat(list("abc"), 100))
- h = pd.Series(np.tile(list("xy"), 150))
- y = pd.Series(np.random.RandomState(0).randn(300))
- p.establish_variables(g, y, h)
- p.estimate_statistic(np.mean, "sd", None, None)
- nt.assert_equal(p.statistic.shape, (3, 2))
- nt.assert_equal(p.confint.shape, (3, 2, 2))
- npt.assert_array_almost_equal(p.statistic,
- y.groupby([g, h]).mean().unstack())
- for ci_g, (_, grp_y) in zip(p.confint, y.groupby(g)):
- for ci, hue_y in zip(ci_g, [grp_y[::2], grp_y[1::2]]):
- mean = hue_y.mean()
- half_ci = np.std(hue_y)
- ci_want = mean - half_ci, mean + half_ci
- npt.assert_array_almost_equal(ci_want, ci, 2)
- def test_draw_cis(self):
- p = cat._CategoricalStatPlotter()
- # Test vertical CIs
- p.orient = "v"
- f, ax = plt.subplots()
- at_group = [0, 1]
- confints = [(.5, 1.5), (.25, .8)]
- colors = [".2", ".3"]
- p.draw_confints(ax, at_group, confints, colors)
- lines = ax.lines
- for line, at, ci, c in zip(lines, at_group, confints, colors):
- x, y = line.get_xydata().T
- npt.assert_array_equal(x, [at, at])
- npt.assert_array_equal(y, ci)
- nt.assert_equal(line.get_color(), c)
- plt.close("all")
- # Test horizontal CIs
- p.orient = "h"
- f, ax = plt.subplots()
- p.draw_confints(ax, at_group, confints, colors)
- lines = ax.lines
- for line, at, ci, c in zip(lines, at_group, confints, colors):
- x, y = line.get_xydata().T
- npt.assert_array_equal(x, ci)
- npt.assert_array_equal(y, [at, at])
- nt.assert_equal(line.get_color(), c)
- plt.close("all")
- # Test vertical CIs with endcaps
- p.orient = "v"
- f, ax = plt.subplots()
- p.draw_confints(ax, at_group, confints, colors, capsize=0.3)
- capline = ax.lines[len(ax.lines) - 1]
- caplinestart = capline.get_xdata()[0]
- caplineend = capline.get_xdata()[1]
- caplinelength = abs(caplineend - caplinestart)
- nt.assert_almost_equal(caplinelength, 0.3)
- nt.assert_equal(len(ax.lines), 6)
- plt.close("all")
- # Test horizontal CIs with endcaps
- p.orient = "h"
- f, ax = plt.subplots()
- p.draw_confints(ax, at_group, confints, colors, capsize=0.3)
- capline = ax.lines[len(ax.lines) - 1]
- caplinestart = capline.get_ydata()[0]
- caplineend = capline.get_ydata()[1]
- caplinelength = abs(caplineend - caplinestart)
- nt.assert_almost_equal(caplinelength, 0.3)
- nt.assert_equal(len(ax.lines), 6)
- # Test extra keyword arguments
- f, ax = plt.subplots()
- p.draw_confints(ax, at_group, confints, colors, lw=4)
- line = ax.lines[0]
- nt.assert_equal(line.get_linewidth(), 4)
- plt.close("all")
- # Test errwidth is set appropriately
- f, ax = plt.subplots()
- p.draw_confints(ax, at_group, confints, colors, errwidth=2)
- capline = ax.lines[len(ax.lines)-1]
- nt.assert_equal(capline._linewidth, 2)
- nt.assert_equal(len(ax.lines), 2)
- plt.close("all")
- class TestBoxPlotter(CategoricalFixture):
- default_kws = dict(x=None, y=None, hue=None, data=None,
- order=None, hue_order=None,
- orient=None, color=None, palette=None,
- saturation=.75, width=.8, dodge=True,
- fliersize=5, linewidth=None)
- def test_nested_width(self):
- kws = self.default_kws.copy()
- p = cat._BoxPlotter(**kws)
- p.establish_variables("g", "y", "h", data=self.df)
- nt.assert_equal(p.nested_width, .4 * .98)
- kws = self.default_kws.copy()
- kws["width"] = .6
- p = cat._BoxPlotter(**kws)
- p.establish_variables("g", "y", "h", data=self.df)
- nt.assert_equal(p.nested_width, .3 * .98)
- kws = self.default_kws.copy()
- kws["dodge"] = False
- p = cat._BoxPlotter(**kws)
- p.establish_variables("g", "y", "h", data=self.df)
- nt.assert_equal(p.nested_width, .8)
- def test_hue_offsets(self):
- p = cat._BoxPlotter(**self.default_kws)
- p.establish_variables("g", "y", "h", data=self.df)
- npt.assert_array_equal(p.hue_offsets, [-.2, .2])
- kws = self.default_kws.copy()
- kws["width"] = .6
- p = cat._BoxPlotter(**kws)
- p.establish_variables("g", "y", "h", data=self.df)
- npt.assert_array_equal(p.hue_offsets, [-.15, .15])
- p = cat._BoxPlotter(**kws)
- p.establish_variables("h", "y", "g", data=self.df)
- npt.assert_array_almost_equal(p.hue_offsets, [-.2, 0, .2])
- def test_axes_data(self):
- ax = cat.boxplot("g", "y", data=self.df)
- nt.assert_equal(len(ax.artists), 3)
- plt.close("all")
- ax = cat.boxplot("g", "y", "h", data=self.df)
- nt.assert_equal(len(ax.artists), 6)
- plt.close("all")
- def test_box_colors(self):
- ax = cat.boxplot("g", "y", data=self.df, saturation=1)
- pal = palettes.color_palette(n_colors=3)
- for patch, color in zip(ax.artists, pal):
- nt.assert_equal(patch.get_facecolor()[:3], color)
- plt.close("all")
- ax = cat.boxplot("g", "y", "h", data=self.df, saturation=1)
- pal = palettes.color_palette(n_colors=2)
- for patch, color in zip(ax.artists, pal * 2):
- nt.assert_equal(patch.get_facecolor()[:3], color)
- plt.close("all")
- def test_draw_missing_boxes(self):
- ax = cat.boxplot("g", "y", data=self.df,
- order=["a", "b", "c", "d"])
- nt.assert_equal(len(ax.artists), 3)
- def test_missing_data(self):
- x = ["a", "a", "b", "b", "c", "c", "d", "d"]
- h = ["x", "y", "x", "y", "x", "y", "x", "y"]
- y = self.rs.randn(8)
- y[-2:] = np.nan
- ax = cat.boxplot(x, y)
- nt.assert_equal(len(ax.artists), 3)
- plt.close("all")
- y[-1] = 0
- ax = cat.boxplot(x, y, h)
- nt.assert_equal(len(ax.artists), 7)
- plt.close("all")
- def test_unaligned_index(self):
- f, (ax1, ax2) = plt.subplots(2)
- cat.boxplot(self.g, self.y, ax=ax1)
- cat.boxplot(self.g, self.y_perm, ax=ax2)
- for l1, l2 in zip(ax1.lines, ax2.lines):
- assert np.array_equal(l1.get_xydata(), l2.get_xydata())
- f, (ax1, ax2) = plt.subplots(2)
- hue_order = self.h.unique()
- cat.boxplot(self.g, self.y, self.h, hue_order=hue_order, ax=ax1)
- cat.boxplot(self.g, self.y_perm, self.h,
- hue_order=hue_order, ax=ax2)
- for l1, l2 in zip(ax1.lines, ax2.lines):
- assert np.array_equal(l1.get_xydata(), l2.get_xydata())
- def test_boxplots(self):
- # Smoke test the high level boxplot options
- cat.boxplot("y", data=self.df)
- plt.close("all")
- cat.boxplot(y="y", data=self.df)
- plt.close("all")
- cat.boxplot("g", "y", data=self.df)
- plt.close("all")
- cat.boxplot("y", "g", data=self.df, orient="h")
- plt.close("all")
- cat.boxplot("g", "y", "h", data=self.df)
- plt.close("all")
- cat.boxplot("g", "y", "h", order=list("nabc"), data=self.df)
- plt.close("all")
- cat.boxplot("g", "y", "h", hue_order=list("omn"), data=self.df)
- plt.close("all")
- cat.boxplot("y", "g", "h", data=self.df, orient="h")
- plt.close("all")
- def test_axes_annotation(self):
- ax = cat.boxplot("g", "y", data=self.df)
- nt.assert_equal(ax.get_xlabel(), "g")
- nt.assert_equal(ax.get_ylabel(), "y")
- nt.assert_equal(ax.get_xlim(), (-.5, 2.5))
- npt.assert_array_equal(ax.get_xticks(), [0, 1, 2])
- npt.assert_array_equal([l.get_text() for l in ax.get_xticklabels()],
- ["a", "b", "c"])
- plt.close("all")
- ax = cat.boxplot("g", "y", "h", data=self.df)
- nt.assert_equal(ax.get_xlabel(), "g")
- nt.assert_equal(ax.get_ylabel(), "y")
- npt.assert_array_equal(ax.get_xticks(), [0, 1, 2])
- npt.assert_array_equal([l.get_text() for l in ax.get_xticklabels()],
- ["a", "b", "c"])
- npt.assert_array_equal([l.get_text() for l in ax.legend_.get_texts()],
- ["m", "n"])
- plt.close("all")
- ax = cat.boxplot("y", "g", data=self.df, orient="h")
- nt.assert_equal(ax.get_xlabel(), "y")
- nt.assert_equal(ax.get_ylabel(), "g")
- nt.assert_equal(ax.get_ylim(), (2.5, -.5))
- npt.assert_array_equal(ax.get_yticks(), [0, 1, 2])
- npt.assert_array_equal([l.get_text() for l in ax.get_yticklabels()],
- ["a", "b", "c"])
- plt.close("all")
- class TestViolinPlotter(CategoricalFixture):
- default_kws = dict(x=None, y=None, hue=None, data=None,
- order=None, hue_order=None,
- bw="scott", cut=2, scale="area", scale_hue=True,
- gridsize=100, width=.8, inner="box", split=False,
- dodge=True, orient=None, linewidth=None,
- color=None, palette=None, saturation=.75)
- def test_split_error(self):
- kws = self.default_kws.copy()
- kws.update(dict(x="h", y="y", hue="g", data=self.df, split=True))
- with nt.assert_raises(ValueError):
- cat._ViolinPlotter(**kws)
- def test_no_observations(self):
- p = cat._ViolinPlotter(**self.default_kws)
- x = ["a", "a", "b"]
- y = self.rs.randn(3)
- y[-1] = np.nan
- p.establish_variables(x, y)
- p.estimate_densities("scott", 2, "area", True, 20)
- nt.assert_equal(len(p.support[0]), 20)
- nt.assert_equal(len(p.support[1]), 0)
- nt.assert_equal(len(p.density[0]), 20)
- nt.assert_equal(len(p.density[1]), 1)
- nt.assert_equal(p.density[1].item(), 1)
- p.estimate_densities("scott", 2, "count", True, 20)
- nt.assert_equal(p.density[1].item(), 0)
- x = ["a"] * 4 + ["b"] * 2
- y = self.rs.randn(6)
- h = ["m", "n"] * 2 + ["m"] * 2
- p.establish_variables(x, y, h)
- p.estimate_densities("scott", 2, "area", True, 20)
- nt.assert_equal(len(p.support[1][0]), 20)
- nt.assert_equal(len(p.support[1][1]), 0)
- nt.assert_equal(len(p.density[1][0]), 20)
- nt.assert_equal(len(p.density[1][1]), 1)
- nt.assert_equal(p.density[1][1].item(), 1)
- p.estimate_densities("scott", 2, "count", False, 20)
- nt.assert_equal(p.density[1][1].item(), 0)
- def test_single_observation(self):
- p = cat._ViolinPlotter(**self.default_kws)
- x = ["a", "a", "b"]
- y = self.rs.randn(3)
- p.establish_variables(x, y)
- p.estimate_densities("scott", 2, "area", True, 20)
- nt.assert_equal(len(p.support[0]), 20)
- nt.assert_equal(len(p.support[1]), 1)
- nt.assert_equal(len(p.density[0]), 20)
- nt.assert_equal(len(p.density[1]), 1)
- nt.assert_equal(p.density[1].item(), 1)
- p.estimate_densities("scott", 2, "count", True, 20)
- nt.assert_equal(p.density[1].item(), .5)
- x = ["b"] * 4 + ["a"] * 3
- y = self.rs.randn(7)
- h = (["m", "n"] * 4)[:-1]
- p.establish_variables(x, y, h)
- p.estimate_densities("scott", 2, "area", True, 20)
- nt.assert_equal(len(p.support[1][0]), 20)
- nt.assert_equal(len(p.support[1][1]), 1)
- nt.assert_equal(len(p.density[1][0]), 20)
- nt.assert_equal(len(p.density[1][1]), 1)
- nt.assert_equal(p.density[1][1].item(), 1)
- p.estimate_densities("scott", 2, "count", False, 20)
- nt.assert_equal(p.density[1][1].item(), .5)
- def test_dwidth(self):
- kws = self.default_kws.copy()
- kws.update(dict(x="g", y="y", data=self.df))
- p = cat._ViolinPlotter(**kws)
- nt.assert_equal(p.dwidth, .4)
- kws.update(dict(width=.4))
- p = cat._ViolinPlotter(**kws)
- nt.assert_equal(p.dwidth, .2)
- kws.update(dict(hue="h", width=.8))
- p = cat._ViolinPlotter(**kws)
- nt.assert_equal(p.dwidth, .2)
- kws.update(dict(split=True))
- p = cat._ViolinPlotter(**kws)
- nt.assert_equal(p.dwidth, .4)
- def test_scale_area(self):
- kws = self.default_kws.copy()
- kws["scale"] = "area"
- p = cat._ViolinPlotter(**kws)
- # Test single layer of grouping
- p.hue_names = None
- density = [self.rs.uniform(0, .8, 50), self.rs.uniform(0, .2, 50)]
- max_before = np.array([d.max() for d in density])
- p.scale_area(density, max_before, False)
- max_after = np.array([d.max() for d in density])
- nt.assert_equal(max_after[0], 1)
- before_ratio = max_before[1] / max_before[0]
- after_ratio = max_after[1] / max_after[0]
- nt.assert_equal(before_ratio, after_ratio)
- # Test nested grouping scaling across all densities
- p.hue_names = ["foo", "bar"]
- density = [[self.rs.uniform(0, .8, 50), self.rs.uniform(0, .2, 50)],
- [self.rs.uniform(0, .1, 50), self.rs.uniform(0, .02, 50)]]
- max_before = np.array([[r.max() for r in row] for row in density])
- p.scale_area(density, max_before, False)
- max_after = np.array([[r.max() for r in row] for row in density])
- nt.assert_equal(max_after[0, 0], 1)
- before_ratio = max_before[1, 1] / max_before[0, 0]
- after_ratio = max_after[1, 1] / max_after[0, 0]
- nt.assert_equal(before_ratio, after_ratio)
- # Test nested grouping scaling within hue
- p.hue_names = ["foo", "bar"]
- density = [[self.rs.uniform(0, .8, 50), self.rs.uniform(0, .2, 50)],
- [self.rs.uniform(0, .1, 50), self.rs.uniform(0, .02, 50)]]
- max_before = np.array([[r.max() for r in row] for row in density])
- p.scale_area(density, max_before, True)
- max_after = np.array([[r.max() for r in row] for row in density])
- nt.assert_equal(max_after[0, 0], 1)
- nt.assert_equal(max_after[1, 0], 1)
- before_ratio = max_before[1, 1] / max_before[1, 0]
- after_ratio = max_after[1, 1] / max_after[1, 0]
- nt.assert_equal(before_ratio, after_ratio)
- def test_scale_width(self):
- kws = self.default_kws.copy()
- kws["scale"] = "width"
- p = cat._ViolinPlotter(**kws)
- # Test single layer of grouping
- p.hue_names = None
- density = [self.rs.uniform(0, .8, 50), self.rs.uniform(0, .2, 50)]
- p.scale_width(density)
- max_after = np.array([d.max() for d in density])
- npt.assert_array_equal(max_after, [1, 1])
- # Test nested grouping
- p.hue_names = ["foo", "bar"]
- density = [[self.rs.uniform(0, .8, 50), self.rs.uniform(0, .2, 50)],
- [self.rs.uniform(0, .1, 50), self.rs.uniform(0, .02, 50)]]
- p.scale_width(density)
- max_after = np.array([[r.max() for r in row] for row in density])
- npt.assert_array_equal(max_after, [[1, 1], [1, 1]])
- def test_scale_count(self):
- kws = self.default_kws.copy()
- kws["scale"] = "count"
- p = cat._ViolinPlotter(**kws)
- # Test single layer of grouping
- p.hue_names = None
- density = [self.rs.uniform(0, .8, 20), self.rs.uniform(0, .2, 40)]
- counts = np.array([20, 40])
- p.scale_count(density, counts, False)
- max_after = np.array([d.max() for d in density])
- npt.assert_array_equal(max_after, [.5, 1])
- # Test nested grouping scaling across all densities
- p.hue_names = ["foo", "bar"]
- density = [[self.rs.uniform(0, .8, 5), self.rs.uniform(0, .2, 40)],
- [self.rs.uniform(0, .1, 100), self.rs.uniform(0, .02, 50)]]
- counts = np.array([[5, 40], [100, 50]])
- p.scale_count(density, counts, False)
- max_after = np.array([[r.max() for r in row] for row in density])
- npt.assert_array_equal(max_after, [[.05, .4], [1, .5]])
- # Test nested grouping scaling within hue
- p.hue_names = ["foo", "bar"]
- density = [[self.rs.uniform(0, .8, 5), self.rs.uniform(0, .2, 40)],
- [self.rs.uniform(0, .1, 100), self.rs.uniform(0, .02, 50)]]
- counts = np.array([[5, 40], [100, 50]])
- p.scale_count(density, counts, True)
- max_after = np.array([[r.max() for r in row] for row in density])
- npt.assert_array_equal(max_after, [[.125, 1], [1, .5]])
- def test_bad_scale(self):
- kws = self.default_kws.copy()
- kws["scale"] = "not_a_scale_type"
- with nt.assert_raises(ValueError):
- cat._ViolinPlotter(**kws)
- def test_kde_fit(self):
- p = cat._ViolinPlotter(**self.default_kws)
- data = self.y
- data_std = data.std(ddof=1)
- # Test reference rule bandwidth
- kde, bw = p.fit_kde(data, "scott")
- nt.assert_is_instance(kde, stats.gaussian_kde)
- nt.assert_equal(kde.factor, kde.scotts_factor())
- nt.assert_equal(bw, kde.scotts_factor() * data_std)
- # Test numeric scale factor
- kde, bw = p.fit_kde(self.y, .2)
- nt.assert_is_instance(kde, stats.gaussian_kde)
- nt.assert_equal(kde.factor, .2)
- nt.assert_equal(bw, .2 * data_std)
- def test_draw_to_density(self):
- p = cat._ViolinPlotter(**self.default_kws)
- # p.dwidth will be 1 for easier testing
- p.width = 2
- # Test verical plots
- support = np.array([.2, .6])
- density = np.array([.1, .4])
- # Test full vertical plot
- _, ax = plt.subplots()
- p.draw_to_density(ax, 0, .5, support, density, False)
- x, y = ax.lines[0].get_xydata().T
- npt.assert_array_equal(x, [.99 * -.4, .99 * .4])
- npt.assert_array_equal(y, [.5, .5])
- plt.close("all")
- # Test left vertical plot
- _, ax = plt.subplots()
- p.draw_to_density(ax, 0, .5, support, density, "left")
- x, y = ax.lines[0].get_xydata().T
- npt.assert_array_equal(x, [.99 * -.4, 0])
- npt.assert_array_equal(y, [.5, .5])
- plt.close("all")
- # Test right vertical plot
- _, ax = plt.subplots()
- p.draw_to_density(ax, 0, .5, support, density, "right")
- x, y = ax.lines[0].get_xydata().T
- npt.assert_array_equal(x, [0, .99 * .4])
- npt.assert_array_equal(y, [.5, .5])
- plt.close("all")
- # Switch orientation to test horizontal plots
- p.orient = "h"
- support = np.array([.2, .5])
- density = np.array([.3, .7])
- # Test full horizontal plot
- _, ax = plt.subplots()
- p.draw_to_density(ax, 0, .6, support, density, False)
- x, y = ax.lines[0].get_xydata().T
- npt.assert_array_equal(x, [.6, .6])
- npt.assert_array_equal(y, [.99 * -.7, .99 * .7])
- plt.close("all")
- # Test left horizontal plot
- _, ax = plt.subplots()
- p.draw_to_density(ax, 0, .6, support, density, "left")
- x, y = ax.lines[0].get_xydata().T
- npt.assert_array_equal(x, [.6, .6])
- npt.assert_array_equal(y, [.99 * -.7, 0])
- plt.close("all")
- # Test right horizontal plot
- _, ax = plt.subplots()
- p.draw_to_density(ax, 0, .6, support, density, "right")
- x, y = ax.lines[0].get_xydata().T
- npt.assert_array_equal(x, [.6, .6])
- npt.assert_array_equal(y, [0, .99 * .7])
- plt.close("all")
- def test_draw_single_observations(self):
- p = cat._ViolinPlotter(**self.default_kws)
- p.width = 2
- # Test vertical plot
- _, ax = plt.subplots()
- p.draw_single_observation(ax, 1, 1.5, 1)
- x, y = ax.lines[0].get_xydata().T
- npt.assert_array_equal(x, [0, 2])
- npt.assert_array_equal(y, [1.5, 1.5])
- plt.close("all")
- # Test horizontal plot
- p.orient = "h"
- _, ax = plt.subplots()
- p.draw_single_observation(ax, 2, 2.2, .5)
- x, y = ax.lines[0].get_xydata().T
- npt.assert_array_equal(x, [2.2, 2.2])
- npt.assert_array_equal(y, [1.5, 2.5])
- plt.close("all")
- def test_draw_box_lines(self):
- # Test vertical plot
- kws = self.default_kws.copy()
- kws.update(dict(y="y", data=self.df, inner=None))
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_box_lines(ax, self.y, p.support[0], p.density[0], 0)
- nt.assert_equal(len(ax.lines), 2)
- q25, q50, q75 = np.percentile(self.y, [25, 50, 75])
- _, y = ax.lines[1].get_xydata().T
- npt.assert_array_equal(y, [q25, q75])
- _, y = ax.collections[0].get_offsets().T
- nt.assert_equal(y, q50)
- plt.close("all")
- # Test horizontal plot
- kws = self.default_kws.copy()
- kws.update(dict(x="y", data=self.df, inner=None))
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_box_lines(ax, self.y, p.support[0], p.density[0], 0)
- nt.assert_equal(len(ax.lines), 2)
- q25, q50, q75 = np.percentile(self.y, [25, 50, 75])
- x, _ = ax.lines[1].get_xydata().T
- npt.assert_array_equal(x, [q25, q75])
- x, _ = ax.collections[0].get_offsets().T
- nt.assert_equal(x, q50)
- plt.close("all")
- def test_draw_quartiles(self):
- kws = self.default_kws.copy()
- kws.update(dict(y="y", data=self.df, inner=None))
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_quartiles(ax, self.y, p.support[0], p.density[0], 0)
- for val, line in zip(np.percentile(self.y, [25, 50, 75]), ax.lines):
- _, y = line.get_xydata().T
- npt.assert_array_equal(y, [val, val])
- def test_draw_points(self):
- p = cat._ViolinPlotter(**self.default_kws)
- # Test vertical plot
- _, ax = plt.subplots()
- p.draw_points(ax, self.y, 0)
- x, y = ax.collections[0].get_offsets().T
- npt.assert_array_equal(x, np.zeros_like(self.y))
- npt.assert_array_equal(y, self.y)
- plt.close("all")
- # Test horizontal plot
- p.orient = "h"
- _, ax = plt.subplots()
- p.draw_points(ax, self.y, 0)
- x, y = ax.collections[0].get_offsets().T
- npt.assert_array_equal(x, self.y)
- npt.assert_array_equal(y, np.zeros_like(self.y))
- plt.close("all")
- def test_draw_sticks(self):
- kws = self.default_kws.copy()
- kws.update(dict(y="y", data=self.df, inner=None))
- p = cat._ViolinPlotter(**kws)
- # Test vertical plot
- _, ax = plt.subplots()
- p.draw_stick_lines(ax, self.y, p.support[0], p.density[0], 0)
- for val, line in zip(self.y, ax.lines):
- _, y = line.get_xydata().T
- npt.assert_array_equal(y, [val, val])
- plt.close("all")
- # Test horizontal plot
- p.orient = "h"
- _, ax = plt.subplots()
- p.draw_stick_lines(ax, self.y, p.support[0], p.density[0], 0)
- for val, line in zip(self.y, ax.lines):
- x, _ = line.get_xydata().T
- npt.assert_array_equal(x, [val, val])
- plt.close("all")
- def test_validate_inner(self):
- kws = self.default_kws.copy()
- kws.update(dict(inner="bad_inner"))
- with nt.assert_raises(ValueError):
- cat._ViolinPlotter(**kws)
- def test_draw_violinplots(self):
- kws = self.default_kws.copy()
- # Test single vertical violin
- kws.update(dict(y="y", data=self.df, inner=None,
- saturation=1, color=(1, 0, 0, 1)))
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 1)
- npt.assert_array_equal(ax.collections[0].get_facecolors(),
- [(1, 0, 0, 1)])
- plt.close("all")
- # Test single horizontal violin
- kws.update(dict(x="y", y=None, color=(0, 1, 0, 1)))
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 1)
- npt.assert_array_equal(ax.collections[0].get_facecolors(),
- [(0, 1, 0, 1)])
- plt.close("all")
- # Test multiple vertical violins
- kws.update(dict(x="g", y="y", color=None,))
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 3)
- for violin, color in zip(ax.collections, palettes.color_palette()):
- npt.assert_array_equal(violin.get_facecolors()[0, :-1], color)
- plt.close("all")
- # Test multiple violins with hue nesting
- kws.update(dict(hue="h"))
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 6)
- for violin, color in zip(ax.collections,
- palettes.color_palette(n_colors=2) * 3):
- npt.assert_array_equal(violin.get_facecolors()[0, :-1], color)
- plt.close("all")
- # Test multiple split violins
- kws.update(dict(split=True, palette="muted"))
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 6)
- for violin, color in zip(ax.collections,
- palettes.color_palette("muted",
- n_colors=2) * 3):
- npt.assert_array_equal(violin.get_facecolors()[0, :-1], color)
- plt.close("all")
- def test_draw_violinplots_no_observations(self):
- kws = self.default_kws.copy()
- kws["inner"] = None
- # Test single layer of grouping
- x = ["a", "a", "b"]
- y = self.rs.randn(3)
- y[-1] = np.nan
- kws.update(x=x, y=y)
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 1)
- nt.assert_equal(len(ax.lines), 0)
- plt.close("all")
- # Test nested hue grouping
- x = ["a"] * 4 + ["b"] * 2
- y = self.rs.randn(6)
- h = ["m", "n"] * 2 + ["m"] * 2
- kws.update(x=x, y=y, hue=h)
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 3)
- nt.assert_equal(len(ax.lines), 0)
- plt.close("all")
- def test_draw_violinplots_single_observations(self):
- kws = self.default_kws.copy()
- kws["inner"] = None
- # Test single layer of grouping
- x = ["a", "a", "b"]
- y = self.rs.randn(3)
- kws.update(x=x, y=y)
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 1)
- nt.assert_equal(len(ax.lines), 1)
- plt.close("all")
- # Test nested hue grouping
- x = ["b"] * 4 + ["a"] * 3
- y = self.rs.randn(7)
- h = (["m", "n"] * 4)[:-1]
- kws.update(x=x, y=y, hue=h)
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 3)
- nt.assert_equal(len(ax.lines), 1)
- plt.close("all")
- # Test nested hue grouping with split
- kws["split"] = True
- p = cat._ViolinPlotter(**kws)
- _, ax = plt.subplots()
- p.draw_violins(ax)
- nt.assert_equal(len(ax.collections), 3)
- nt.assert_equal(len(ax.lines), 1)
- plt.close("all")
- def test_violinplots(self):
- # Smoke test the high level violinplot options
- cat.violinplot("y", data=self.df)
- plt.close("all")
- cat.violinplot(y="y", data=self.df)
- plt.close("all")
- cat.violinplot("g", "y", data=self.df)
- plt.close("all")
- cat.violinplot("y", "g", data=self.df, orient="h")
- plt.close("all")
- cat.violinplot("g", "y", "h", data=self.df)
- plt.close("all")
- cat.violinplot("g", "y", "h", order=list("nabc"), data=self.df)
- plt.close("all")
- cat.violinplot("g", "y", "h", hue_order=list("omn"), data=self.df)
- plt.close("all")
- cat.violinplot("y", "g", "h", data=self.df, orient="h")
- plt.close("all")
- for inner in ["box", "quart", "point", "stick", None]:
- cat.violinplot("g", "y", data=self.df, inner=inner)
- plt.close("all")
- cat.violinplot("g", "y", "h", data=self.df, inner=inner)
- plt.close("all")
- cat.violinplot("g", "y", "h", data=self.df,
- inner=inner, split=True)
- plt.close("all")
- class TestCategoricalScatterPlotter(CategoricalFixture):
- def test_group_point_colors(self):
- p = cat._CategoricalScatterPlotter()
- p.establish_variables(x="g", y="y", data=self.df)
- p.establish_colors(None, "deep", 1)
- point_colors = p.point_colors
- n_colors = self.g.unique().size
- assert len(point_colors) == n_colors
- for i, group_colors in enumerate(point_colors):
- for color in group_colors:
- assert color == i
- def test_hue_point_colors(self):
- p = cat._CategoricalScatterPlotter()
- hue_order = self.h.unique().tolist()
- p.establish_variables(x="g", y="y", hue="h",
- hue_order=hue_order, data=self.df)
- p.establish_colors(None, "deep", 1)
- point_colors = p.point_colors
- assert len(point_colors) == self.g.unique().size
- for i, group_colors in enumerate(point_colors):
- group_hues = np.asarray(p.plot_hues[i])
- for point_hue, point_color in zip(group_hues, group_colors):
- assert point_color == p.hue_names.index(point_hue)
- # hue_level = np.asarray(p.plot_hues[i])[j]
- # palette_color = deep_colors[hue_order.index(hue_level)]
- # assert tuple(point_color) == palette_color
- def test_scatterplot_legend(self):
- p = cat._CategoricalScatterPlotter()
- hue_order = ["m", "n"]
- p.establish_variables(x="g", y="y", hue="h",
- hue_order=hue_order, data=self.df)
- p.establish_colors(None, "deep", 1)
- deep_colors = palettes.color_palette("deep", self.h.unique().size)
- f, ax = plt.subplots()
- p.add_legend_data(ax)
- leg = ax.legend()
- for i, t in enumerate(leg.get_texts()):
- nt.assert_equal(t.get_text(), hue_order[i])
- for i, h in enumerate(leg.legendHandles):
- rgb = h.get_facecolor()[0, :3]
- nt.assert_equal(tuple(rgb), tuple(deep_colors[i]))
- class TestStripPlotter(CategoricalFixture):
- def test_stripplot_vertical(self):
- pal = palettes.color_palette()
- ax = cat.stripplot("g", "y", jitter=False, data=self.df)
- for i, (_, vals) in enumerate(self.y.groupby(self.g)):
- x, y = ax.collections[i].get_offsets().T
- npt.assert_array_equal(x, np.ones(len(x)) * i)
- npt.assert_array_equal(y, vals)
- npt.assert_equal(ax.collections[i].get_facecolors()[0, :3], pal[i])
- def test_stripplot_horiztonal(self):
- df = self.df.copy()
- df.g = df.g.astype("category")
- ax = cat.stripplot("y", "g", jitter=False, data=df)
- for i, (_, vals) in enumerate(self.y.groupby(self.g)):
- x, y = ax.collections[i].get_offsets().T
- npt.assert_array_equal(x, vals)
- npt.assert_array_equal(y, np.ones(len(x)) * i)
- def test_stripplot_jitter(self):
- pal = palettes.color_palette()
- ax = cat.stripplot("g", "y", data=self.df, jitter=True)
- for i, (_, vals) in enumerate(self.y.groupby(self.g)):
- x, y = ax.collections[i].get_offsets().T
- npt.assert_array_less(np.ones(len(x)) * i - .1, x)
- npt.assert_array_less(x, np.ones(len(x)) * i + .1)
- npt.assert_array_equal(y, vals)
- npt.assert_equal(ax.collections[i].get_facecolors()[0, :3], pal[i])
- def test_dodge_nested_stripplot_vertical(self):
- pal = palettes.color_palette()
- ax = cat.stripplot("g", "y", "h", data=self.df,
- jitter=False, dodge=True)
- for i, (_, group_vals) in enumerate(self.y.groupby(self.g)):
- for j, (_, vals) in enumerate(group_vals.groupby(self.h)):
- x, y = ax.collections[i * 2 + j].get_offsets().T
- npt.assert_array_equal(x, np.ones(len(x)) * i + [-.2, .2][j])
- npt.assert_array_equal(y, vals)
- fc = ax.collections[i * 2 + j].get_facecolors()[0, :3]
- assert tuple(fc) == pal[j]
- def test_dodge_nested_stripplot_horizontal(self):
- df = self.df.copy()
- df.g = df.g.astype("category")
- ax = cat.stripplot("y", "g", "h", data=df,
- jitter=False, dodge=True)
- for i, (_, group_vals) in enumerate(self.y.groupby(self.g)):
- for j, (_, vals) in enumerate(group_vals.groupby(self.h)):
- x, y = ax.collections[i * 2 + j].get_offsets().T
- npt.assert_array_equal(x, vals)
- npt.assert_array_equal(y, np.ones(len(x)) * i + [-.2, .2][j])
- def test_nested_stripplot_vertical(self):
- # Test a simple vertical strip plot
- ax = cat.stripplot("g", "y", "h", data=self.df,
- jitter=False, dodge=False)
- for i, (_, group_vals) in enumerate(self.y.groupby(self.g)):
- x, y = ax.collections[i].get_offsets().T
- npt.assert_array_equal(x, np.ones(len(x)) * i)
- npt.assert_array_equal(y, group_vals)
- def test_nested_stripplot_horizontal(self):
- df = self.df.copy()
- df.g = df.g.astype("category")
- ax = cat.stripplot("y", "g", "h", data=df,
- jitter=False, dodge=False)
- for i, (_, group_vals) in enumerate(self.y.groupby(self.g)):
- x, y = ax.collections[i].get_offsets().T
- npt.assert_array_equal(x, group_vals)
- npt.assert_array_equal(y, np.ones(len(x)) * i)
- def test_three_strip_points(self):
- x = np.arange(3)
- ax = cat.stripplot(x=x)
- facecolors = ax.collections[0].get_facecolor()
- nt.assert_equal(facecolors.shape, (3, 4))
- npt.assert_array_equal(facecolors[0], facecolors[1])
- def test_unaligned_index(self):
- f, (ax1, ax2) = plt.subplots(2)
- cat.stripplot(self.g, self.y, ax=ax1)
- cat.stripplot(self.g, self.y_perm, ax=ax2)
- for p1, p2 in zip(ax1.collections, ax2.collections):
- y1, y2 = p1.get_offsets()[:, 1], p2.get_offsets()[:, 1]
- assert np.array_equal(np.sort(y1), np.sort(y2))
- assert np.array_equal(p1.get_facecolors()[np.argsort(y1)],
- p2.get_facecolors()[np.argsort(y2)])
- f, (ax1, ax2) = plt.subplots(2)
- hue_order = self.h.unique()
- cat.stripplot(self.g, self.y, self.h,
- hue_order=hue_order, ax=ax1)
- cat.stripplot(self.g, self.y_perm, self.h,
- hue_order=hue_order, ax=ax2)
- for p1, p2 in zip(ax1.collections, ax2.collections):
- y1, y2 = p1.get_offsets()[:, 1], p2.get_offsets()[:, 1]
- assert np.array_equal(np.sort(y1), np.sort(y2))
- assert np.array_equal(p1.get_facecolors()[np.argsort(y1)],
- p2.get_facecolors()[np.argsort(y2)])
- f, (ax1, ax2) = plt.subplots(2)
- hue_order = self.h.unique()
- cat.stripplot(self.g, self.y, self.h,
- dodge=True, hue_order=hue_order, ax=ax1)
- cat.stripplot(self.g, self.y_perm, self.h,
- dodge=True, hue_order=hue_order, ax=ax2)
- for p1, p2 in zip(ax1.collections, ax2.collections):
- y1, y2 = p1.get_offsets()[:, 1], p2.get_offsets()[:, 1]
- assert np.array_equal(np.sort(y1), np.sort(y2))
- assert np.array_equal(p1.get_facecolors()[np.argsort(y1)],
- p2.get_facecolors()[np.argsort(y2)])
- class TestSwarmPlotter(CategoricalFixture):
- default_kws = dict(x=None, y=None, hue=None, data=None,
- order=None, hue_order=None, dodge=False,
- orient=None, color=None, palette=None)
- def test_could_overlap(self):
- p = cat._SwarmPlotter(**self.default_kws)
- neighbors = p.could_overlap((1, 1), [(0, 0), (1, .5), (.5, .5)], 1)
- npt.assert_array_equal(neighbors, [(1, .5), (.5, .5)])
- def test_position_candidates(self):
- p = cat._SwarmPlotter(**self.default_kws)
- xy_i = (0, 1)
- neighbors = [(0, 1), (0, 1.5)]
- candidates = p.position_candidates(xy_i, neighbors, 1)
- dx1 = 1.05
- dx2 = np.sqrt(1 - .5 ** 2) * 1.05
- npt.assert_array_equal(candidates,
- [(0, 1), (-dx1, 1), (dx1, 1),
- (dx2, 1), (-dx2, 1)])
- def test_find_first_non_overlapping_candidate(self):
- p = cat._SwarmPlotter(**self.default_kws)
- candidates = [(.5, 1), (1, 1), (1.5, 1)]
- neighbors = np.array([(0, 1)])
- first = p.first_non_overlapping_candidate(candidates, neighbors, 1)
- npt.assert_array_equal(first, (1, 1))
- def test_beeswarm(self):
- p = cat._SwarmPlotter(**self.default_kws)
- d = self.y.diff().mean() * 1.5
- x = np.zeros(self.y.size)
- y = np.sort(self.y)
- orig_xy = np.c_[x, y]
- swarm = p.beeswarm(orig_xy, d)
- dmat = spatial.distance.cdist(swarm, swarm)
- triu = dmat[np.triu_indices_from(dmat, 1)]
- npt.assert_array_less(d, triu)
- npt.assert_array_equal(y, swarm[:, 1])
- def test_add_gutters(self):
- p = cat._SwarmPlotter(**self.default_kws)
- points = np.array([0, -1, .4, .8])
- points = p.add_gutters(points, 0, 1)
- npt.assert_array_equal(points,
- np.array([0, -.5, .4, .5]))
- def test_swarmplot_vertical(self):
- pal = palettes.color_palette()
- ax = cat.swarmplot("g", "y", data=self.df)
- for i, (_, vals) in enumerate(self.y.groupby(self.g)):
- x, y = ax.collections[i].get_offsets().T
- npt.assert_array_almost_equal(y, np.sort(vals))
- fc = ax.collections[i].get_facecolors()[0, :3]
- npt.assert_equal(fc, pal[i])
- def test_swarmplot_horizontal(self):
- pal = palettes.color_palette()
- ax = cat.swarmplot("y", "g", data=self.df, orient="h")
- for i, (_, vals) in enumerate(self.y.groupby(self.g)):
- x, y = ax.collections[i].get_offsets().T
- npt.assert_array_almost_equal(x, np.sort(vals))
- fc = ax.collections[i].get_facecolors()[0, :3]
- npt.assert_equal(fc, pal[i])
- def test_dodge_nested_swarmplot_vertical(self):
- pal = palettes.color_palette()
- ax = cat.swarmplot("g", "y", "h", data=self.df, dodge=True)
- for i, (_, group_vals) in enumerate(self.y.groupby(self.g)):
- for j, (_, vals) in enumerate(group_vals.groupby(self.h)):
- x, y = ax.collections[i * 2 + j].get_offsets().T
- npt.assert_array_almost_equal(y, np.sort(vals))
- fc = ax.collections[i * 2 + j].get_facecolors()[0, :3]
- assert tuple(fc) == pal[j]
- def test_dodge_nested_swarmplot_horizontal(self):
- pal = palettes.color_palette()
- ax = cat.swarmplot("y", "g", "h", data=self.df, orient="h", dodge=True)
- for i, (_, group_vals) in enumerate(self.y.groupby(self.g)):
- for j, (_, vals) in enumerate(group_vals.groupby(self.h)):
- x, y = ax.collections[i * 2 + j].get_offsets().T
- npt.assert_array_almost_equal(x, np.sort(vals))
- fc = ax.collections[i * 2 + j].get_facecolors()[0, :3]
- assert tuple(fc) == pal[j]
- def test_nested_swarmplot_vertical(self):
- ax = cat.swarmplot("g", "y", "h", data=self.df)
- pal = palettes.color_palette()
- hue_names = self.h.unique().tolist()
- grouped_hues = list(self.h.groupby(self.g))
- for i, (_, vals) in enumerate(self.y.groupby(self.g)):
- points = ax.collections[i]
- x, y = points.get_offsets().T
- sorter = np.argsort(vals)
- npt.assert_array_almost_equal(y, vals.iloc[sorter])
- _, hue_vals = grouped_hues[i]
- for hue, fc in zip(hue_vals.values[sorter.values],
- points.get_facecolors()):
- assert tuple(fc[:3]) == pal[hue_names.index(hue)]
- def test_nested_swarmplot_horizontal(self):
- ax = cat.swarmplot("y", "g", "h", data=self.df, orient="h")
- pal = palettes.color_palette()
- hue_names = self.h.unique().tolist()
- grouped_hues = list(self.h.groupby(self.g))
- for i, (_, vals) in enumerate(self.y.groupby(self.g)):
- points = ax.collections[i]
- x, y = points.get_offsets().T
- sorter = np.argsort(vals)
- npt.assert_array_almost_equal(x, vals.iloc[sorter])
- _, hue_vals = grouped_hues[i]
- for hue, fc in zip(hue_vals.values[sorter.values],
- points.get_facecolors()):
- assert tuple(fc[:3]) == pal[hue_names.index(hue)]
- def test_unaligned_index(self):
- f, (ax1, ax2) = plt.subplots(2)
- cat.swarmplot(self.g, self.y, ax=ax1)
- cat.swarmplot(self.g, self.y_perm, ax=ax2)
- for p1, p2 in zip(ax1.collections, ax2.collections):
- assert np.allclose(p1.get_offsets()[:, 1],
- p2.get_offsets()[:, 1])
- assert np.array_equal(p1.get_facecolors(),
- p2.get_facecolors())
- f, (ax1, ax2) = plt.subplots(2)
- hue_order = self.h.unique()
- cat.swarmplot(self.g, self.y, self.h,
- hue_order=hue_order, ax=ax1)
- cat.swarmplot(self.g, self.y_perm, self.h,
- hue_order=hue_order, ax=ax2)
- for p1, p2 in zip(ax1.collections, ax2.collections):
- assert np.allclose(p1.get_offsets()[:, 1],
- p2.get_offsets()[:, 1])
- assert np.array_equal(p1.get_facecolors(),
- p2.get_facecolors())
- f, (ax1, ax2) = plt.subplots(2)
- hue_order = self.h.unique()
- cat.swarmplot(self.g, self.y, self.h,
- dodge=True, hue_order=hue_order, ax=ax1)
- cat.swarmplot(self.g, self.y_perm, self.h,
- dodge=True, hue_order=hue_order, ax=ax2)
- for p1, p2 in zip(ax1.collections, ax2.collections):
- assert np.allclose(p1.get_offsets()[:, 1],
- p2.get_offsets()[:, 1])
- assert np.array_equal(p1.get_facecolors(),
- p2.get_facecolors())
- class TestBarPlotter(CategoricalFixture):
- default_kws = dict(
- x=None, y=None, hue=None, data=None,
- estimator=np.mean, ci=95, n_boot=100, units=None, seed=None,
- order=None, hue_order=None,
- orient=None, color=None, palette=None,
- saturation=.75, errcolor=".26", errwidth=None,
- capsize=None, dodge=True
- )
- def test_nested_width(self):
- kws = self.default_kws.copy()
- p = cat._BarPlotter(**kws)
- p.establish_variables("g", "y", "h", data=self.df)
- nt.assert_equal(p.nested_width, .8 / 2)
- p = cat._BarPlotter(**kws)
- p.establish_variables("h", "y", "g", data=self.df)
- nt.assert_equal(p.nested_width, .8 / 3)
- kws["dodge"] = False
- p = cat._BarPlotter(**kws)
- p.establish_variables("h", "y", "g", data=self.df)
- nt.assert_equal(p.nested_width, .8)
- def test_draw_vertical_bars(self):
- kws = self.default_kws.copy()
- kws.update(x="g", y="y", data=self.df)
- p = cat._BarPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_bars(ax, {})
- nt.assert_equal(len(ax.patches), len(p.plot_data))
- nt.assert_equal(len(ax.lines), len(p.plot_data))
- for bar, color in zip(ax.patches, p.colors):
- nt.assert_equal(bar.get_facecolor()[:-1], color)
- positions = np.arange(len(p.plot_data)) - p.width / 2
- for bar, pos, stat in zip(ax.patches, positions, p.statistic):
- nt.assert_equal(bar.get_x(), pos)
- nt.assert_equal(bar.get_width(), p.width)
- nt.assert_equal(bar.get_y(), 0)
- nt.assert_equal(bar.get_height(), stat)
- def test_draw_horizontal_bars(self):
- kws = self.default_kws.copy()
- kws.update(x="y", y="g", orient="h", data=self.df)
- p = cat._BarPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_bars(ax, {})
- nt.assert_equal(len(ax.patches), len(p.plot_data))
- nt.assert_equal(len(ax.lines), len(p.plot_data))
- for bar, color in zip(ax.patches, p.colors):
- nt.assert_equal(bar.get_facecolor()[:-1], color)
- positions = np.arange(len(p.plot_data)) - p.width / 2
- for bar, pos, stat in zip(ax.patches, positions, p.statistic):
- nt.assert_equal(bar.get_y(), pos)
- nt.assert_equal(bar.get_height(), p.width)
- nt.assert_equal(bar.get_x(), 0)
- nt.assert_equal(bar.get_width(), stat)
- def test_draw_nested_vertical_bars(self):
- kws = self.default_kws.copy()
- kws.update(x="g", y="y", hue="h", data=self.df)
- p = cat._BarPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_bars(ax, {})
- n_groups, n_hues = len(p.plot_data), len(p.hue_names)
- nt.assert_equal(len(ax.patches), n_groups * n_hues)
- nt.assert_equal(len(ax.lines), n_groups * n_hues)
- for bar in ax.patches[:n_groups]:
- nt.assert_equal(bar.get_facecolor()[:-1], p.colors[0])
- for bar in ax.patches[n_groups:]:
- nt.assert_equal(bar.get_facecolor()[:-1], p.colors[1])
- positions = np.arange(len(p.plot_data))
- for bar, pos in zip(ax.patches[:n_groups], positions):
- nt.assert_almost_equal(bar.get_x(), pos - p.width / 2)
- nt.assert_almost_equal(bar.get_width(), p.nested_width)
- for bar, stat in zip(ax.patches, p.statistic.T.flat):
- nt.assert_almost_equal(bar.get_y(), 0)
- nt.assert_almost_equal(bar.get_height(), stat)
- def test_draw_nested_horizontal_bars(self):
- kws = self.default_kws.copy()
- kws.update(x="y", y="g", hue="h", orient="h", data=self.df)
- p = cat._BarPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_bars(ax, {})
- n_groups, n_hues = len(p.plot_data), len(p.hue_names)
- nt.assert_equal(len(ax.patches), n_groups * n_hues)
- nt.assert_equal(len(ax.lines), n_groups * n_hues)
- for bar in ax.patches[:n_groups]:
- nt.assert_equal(bar.get_facecolor()[:-1], p.colors[0])
- for bar in ax.patches[n_groups:]:
- nt.assert_equal(bar.get_facecolor()[:-1], p.colors[1])
- positions = np.arange(len(p.plot_data))
- for bar, pos in zip(ax.patches[:n_groups], positions):
- nt.assert_almost_equal(bar.get_y(), pos - p.width / 2)
- nt.assert_almost_equal(bar.get_height(), p.nested_width)
- for bar, stat in zip(ax.patches, p.statistic.T.flat):
- nt.assert_almost_equal(bar.get_x(), 0)
- nt.assert_almost_equal(bar.get_width(), stat)
- def test_draw_missing_bars(self):
- kws = self.default_kws.copy()
- order = list("abcd")
- kws.update(x="g", y="y", order=order, data=self.df)
- p = cat._BarPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_bars(ax, {})
- nt.assert_equal(len(ax.patches), len(order))
- nt.assert_equal(len(ax.lines), len(order))
- plt.close("all")
- hue_order = list("mno")
- kws.update(x="g", y="y", hue="h", hue_order=hue_order, data=self.df)
- p = cat._BarPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_bars(ax, {})
- nt.assert_equal(len(ax.patches), len(p.plot_data) * len(hue_order))
- nt.assert_equal(len(ax.lines), len(p.plot_data) * len(hue_order))
- plt.close("all")
- def test_unaligned_index(self):
- f, (ax1, ax2) = plt.subplots(2)
- cat.barplot(self.g, self.y, ci="sd", ax=ax1)
- cat.barplot(self.g, self.y_perm, ci="sd", ax=ax2)
- for l1, l2 in zip(ax1.lines, ax2.lines):
- assert pytest.approx(l1.get_xydata()) == l2.get_xydata()
- for p1, p2 in zip(ax1.patches, ax2.patches):
- assert pytest.approx(p1.get_xy()) == p2.get_xy()
- assert pytest.approx(p1.get_height()) == p2.get_height()
- assert pytest.approx(p1.get_width()) == p2.get_width()
- f, (ax1, ax2) = plt.subplots(2)
- hue_order = self.h.unique()
- cat.barplot(self.g, self.y, self.h, hue_order=hue_order, ci="sd",
- ax=ax1)
- cat.barplot(self.g, self.y_perm, self.h,
- hue_order=hue_order, ci="sd", ax=ax2)
- for l1, l2 in zip(ax1.lines, ax2.lines):
- assert pytest.approx(l1.get_xydata()) == l2.get_xydata()
- for p1, p2 in zip(ax1.patches, ax2.patches):
- assert pytest.approx(p1.get_xy()) == p2.get_xy()
- assert pytest.approx(p1.get_height()) == p2.get_height()
- assert pytest.approx(p1.get_width()) == p2.get_width()
- def test_barplot_colors(self):
- # Test unnested palette colors
- kws = self.default_kws.copy()
- kws.update(x="g", y="y", data=self.df,
- saturation=1, palette="muted")
- p = cat._BarPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_bars(ax, {})
- palette = palettes.color_palette("muted", len(self.g.unique()))
- for patch, pal_color in zip(ax.patches, palette):
- nt.assert_equal(patch.get_facecolor()[:-1], pal_color)
- plt.close("all")
- # Test single color
- color = (.2, .2, .3, 1)
- kws = self.default_kws.copy()
- kws.update(x="g", y="y", data=self.df,
- saturation=1, color=color)
- p = cat._BarPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_bars(ax, {})
- for patch in ax.patches:
- nt.assert_equal(patch.get_facecolor(), color)
- plt.close("all")
- # Test nested palette colors
- kws = self.default_kws.copy()
- kws.update(x="g", y="y", hue="h", data=self.df,
- saturation=1, palette="Set2")
- p = cat._BarPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_bars(ax, {})
- palette = palettes.color_palette("Set2", len(self.h.unique()))
- for patch in ax.patches[:len(self.g.unique())]:
- nt.assert_equal(patch.get_facecolor()[:-1], palette[0])
- for patch in ax.patches[len(self.g.unique()):]:
- nt.assert_equal(patch.get_facecolor()[:-1], palette[1])
- plt.close("all")
- def test_simple_barplots(self):
- ax = cat.barplot("g", "y", data=self.df)
- nt.assert_equal(len(ax.patches), len(self.g.unique()))
- nt.assert_equal(ax.get_xlabel(), "g")
- nt.assert_equal(ax.get_ylabel(), "y")
- plt.close("all")
- ax = cat.barplot("y", "g", orient="h", data=self.df)
- nt.assert_equal(len(ax.patches), len(self.g.unique()))
- nt.assert_equal(ax.get_xlabel(), "y")
- nt.assert_equal(ax.get_ylabel(), "g")
- plt.close("all")
- ax = cat.barplot("g", "y", "h", data=self.df)
- nt.assert_equal(len(ax.patches),
- len(self.g.unique()) * len(self.h.unique()))
- nt.assert_equal(ax.get_xlabel(), "g")
- nt.assert_equal(ax.get_ylabel(), "y")
- plt.close("all")
- ax = cat.barplot("y", "g", "h", orient="h", data=self.df)
- nt.assert_equal(len(ax.patches),
- len(self.g.unique()) * len(self.h.unique()))
- nt.assert_equal(ax.get_xlabel(), "y")
- nt.assert_equal(ax.get_ylabel(), "g")
- plt.close("all")
- class TestPointPlotter(CategoricalFixture):
- default_kws = dict(
- x=None, y=None, hue=None, data=None,
- estimator=np.mean, ci=95, n_boot=100, units=None, seed=None,
- order=None, hue_order=None,
- markers="o", linestyles="-", dodge=0,
- join=True, scale=1,
- orient=None, color=None, palette=None,
- )
- def test_different_defualt_colors(self):
- kws = self.default_kws.copy()
- kws.update(dict(x="g", y="y", data=self.df))
- p = cat._PointPlotter(**kws)
- color = palettes.color_palette()[0]
- npt.assert_array_equal(p.colors, [color, color, color])
- def test_hue_offsets(self):
- kws = self.default_kws.copy()
- kws.update(dict(x="g", y="y", hue="h", data=self.df))
- p = cat._PointPlotter(**kws)
- npt.assert_array_equal(p.hue_offsets, [0, 0])
- kws.update(dict(dodge=.5))
- p = cat._PointPlotter(**kws)
- npt.assert_array_equal(p.hue_offsets, [-.25, .25])
- kws.update(dict(x="h", hue="g", dodge=0))
- p = cat._PointPlotter(**kws)
- npt.assert_array_equal(p.hue_offsets, [0, 0, 0])
- kws.update(dict(dodge=.3))
- p = cat._PointPlotter(**kws)
- npt.assert_array_equal(p.hue_offsets, [-.15, 0, .15])
- def test_draw_vertical_points(self):
- kws = self.default_kws.copy()
- kws.update(x="g", y="y", data=self.df)
- p = cat._PointPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_points(ax)
- nt.assert_equal(len(ax.collections), 1)
- nt.assert_equal(len(ax.lines), len(p.plot_data) + 1)
- points = ax.collections[0]
- nt.assert_equal(len(points.get_offsets()), len(p.plot_data))
- x, y = points.get_offsets().T
- npt.assert_array_equal(x, np.arange(len(p.plot_data)))
- npt.assert_array_equal(y, p.statistic)
- for got_color, want_color in zip(points.get_facecolors(),
- p.colors):
- npt.assert_array_equal(got_color[:-1], want_color)
- def test_draw_horizontal_points(self):
- kws = self.default_kws.copy()
- kws.update(x="y", y="g", orient="h", data=self.df)
- p = cat._PointPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_points(ax)
- nt.assert_equal(len(ax.collections), 1)
- nt.assert_equal(len(ax.lines), len(p.plot_data) + 1)
- points = ax.collections[0]
- nt.assert_equal(len(points.get_offsets()), len(p.plot_data))
- x, y = points.get_offsets().T
- npt.assert_array_equal(x, p.statistic)
- npt.assert_array_equal(y, np.arange(len(p.plot_data)))
- for got_color, want_color in zip(points.get_facecolors(),
- p.colors):
- npt.assert_array_equal(got_color[:-1], want_color)
- def test_draw_vertical_nested_points(self):
- kws = self.default_kws.copy()
- kws.update(x="g", y="y", hue="h", data=self.df)
- p = cat._PointPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_points(ax)
- nt.assert_equal(len(ax.collections), 2)
- nt.assert_equal(len(ax.lines),
- len(p.plot_data) * len(p.hue_names) + len(p.hue_names))
- for points, numbers, color in zip(ax.collections,
- p.statistic.T,
- p.colors):
- nt.assert_equal(len(points.get_offsets()), len(p.plot_data))
- x, y = points.get_offsets().T
- npt.assert_array_equal(x, np.arange(len(p.plot_data)))
- npt.assert_array_equal(y, numbers)
- for got_color in points.get_facecolors():
- npt.assert_array_equal(got_color[:-1], color)
- def test_draw_horizontal_nested_points(self):
- kws = self.default_kws.copy()
- kws.update(x="y", y="g", hue="h", orient="h", data=self.df)
- p = cat._PointPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_points(ax)
- nt.assert_equal(len(ax.collections), 2)
- nt.assert_equal(len(ax.lines),
- len(p.plot_data) * len(p.hue_names) + len(p.hue_names))
- for points, numbers, color in zip(ax.collections,
- p.statistic.T,
- p.colors):
- nt.assert_equal(len(points.get_offsets()), len(p.plot_data))
- x, y = points.get_offsets().T
- npt.assert_array_equal(x, numbers)
- npt.assert_array_equal(y, np.arange(len(p.plot_data)))
- for got_color in points.get_facecolors():
- npt.assert_array_equal(got_color[:-1], color)
- def test_draw_missing_points(self):
- kws = self.default_kws.copy()
- df = self.df.copy()
- kws.update(x="g", y="y", hue="h", hue_order=["x", "y"], data=df)
- p = cat._PointPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_points(ax)
- df.loc[df["h"] == "m", "y"] = np.nan
- kws.update(x="g", y="y", hue="h", data=df)
- p = cat._PointPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_points(ax)
- def test_unaligned_index(self):
- f, (ax1, ax2) = plt.subplots(2)
- cat.pointplot(self.g, self.y, ci="sd", ax=ax1)
- cat.pointplot(self.g, self.y_perm, ci="sd", ax=ax2)
- for l1, l2 in zip(ax1.lines, ax2.lines):
- assert pytest.approx(l1.get_xydata()) == l2.get_xydata()
- for p1, p2 in zip(ax1.collections, ax2.collections):
- assert pytest.approx(p1.get_offsets()) == p2.get_offsets()
- f, (ax1, ax2) = plt.subplots(2)
- hue_order = self.h.unique()
- cat.pointplot(self.g, self.y, self.h,
- hue_order=hue_order, ci="sd", ax=ax1)
- cat.pointplot(self.g, self.y_perm, self.h,
- hue_order=hue_order, ci="sd", ax=ax2)
- for l1, l2 in zip(ax1.lines, ax2.lines):
- assert pytest.approx(l1.get_xydata()) == l2.get_xydata()
- for p1, p2 in zip(ax1.collections, ax2.collections):
- assert pytest.approx(p1.get_offsets()) == p2.get_offsets()
- def test_pointplot_colors(self):
- # Test a single-color unnested plot
- color = (.2, .2, .3, 1)
- kws = self.default_kws.copy()
- kws.update(x="g", y="y", data=self.df, color=color)
- p = cat._PointPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_points(ax)
- for line in ax.lines:
- nt.assert_equal(line.get_color(), color[:-1])
- for got_color in ax.collections[0].get_facecolors():
- npt.assert_array_equal(rgb2hex(got_color), rgb2hex(color))
- plt.close("all")
- # Test a multi-color unnested plot
- palette = palettes.color_palette("Set1", 3)
- kws.update(x="g", y="y", data=self.df, palette="Set1")
- p = cat._PointPlotter(**kws)
- nt.assert_true(not p.join)
- f, ax = plt.subplots()
- p.draw_points(ax)
- for line, pal_color in zip(ax.lines, palette):
- npt.assert_array_equal(line.get_color(), pal_color)
- for point_color, pal_color in zip(ax.collections[0].get_facecolors(),
- palette):
- npt.assert_array_equal(rgb2hex(point_color), rgb2hex(pal_color))
- plt.close("all")
- # Test a multi-colored nested plot
- palette = palettes.color_palette("dark", 2)
- kws.update(x="g", y="y", hue="h", data=self.df, palette="dark")
- p = cat._PointPlotter(**kws)
- f, ax = plt.subplots()
- p.draw_points(ax)
- for line in ax.lines[:(len(p.plot_data) + 1)]:
- nt.assert_equal(line.get_color(), palette[0])
- for line in ax.lines[(len(p.plot_data) + 1):]:
- nt.assert_equal(line.get_color(), palette[1])
- for i, pal_color in enumerate(palette):
- for point_color in ax.collections[i].get_facecolors():
- npt.assert_array_equal(point_color[:-1], pal_color)
- plt.close("all")
- def test_simple_pointplots(self):
- ax = cat.pointplot("g", "y", data=self.df)
- nt.assert_equal(len(ax.collections), 1)
- nt.assert_equal(len(ax.lines), len(self.g.unique()) + 1)
- nt.assert_equal(ax.get_xlabel(), "g")
- nt.assert_equal(ax.get_ylabel(), "y")
- plt.close("all")
- ax = cat.pointplot("y", "g", orient="h", data=self.df)
- nt.assert_equal(len(ax.collections), 1)
- nt.assert_equal(len(ax.lines), len(self.g.unique()) + 1)
- nt.assert_equal(ax.get_xlabel(), "y")
- nt.assert_equal(ax.get_ylabel(), "g")
- plt.close("all")
- ax = cat.pointplot("g", "y", "h", data=self.df)
- nt.assert_equal(len(ax.collections), len(self.h.unique()))
- nt.assert_equal(len(ax.lines),
- (len(self.g.unique()) *
- len(self.h.unique()) +
- len(self.h.unique())))
- nt.assert_equal(ax.get_xlabel(), "g")
- nt.assert_equal(ax.get_ylabel(), "y")
- plt.close("all")
- ax = cat.pointplot("y", "g", "h", orient="h", data=self.df)
- nt.assert_equal(len(ax.collections), len(self.h.unique()))
- nt.assert_equal(len(ax.lines),
- (len(self.g.unique()) *
- len(self.h.unique()) +
- len(self.h.unique())))
- nt.assert_equal(ax.get_xlabel(), "y")
- nt.assert_equal(ax.get_ylabel(), "g")
- plt.close("all")
- class TestCountPlot(CategoricalFixture):
- def test_plot_elements(self):
- ax = cat.countplot("g", data=self.df)
- nt.assert_equal(len(ax.patches), self.g.unique().size)
- for p in ax.patches:
- nt.assert_equal(p.get_y(), 0)
- nt.assert_equal(p.get_height(),
- self.g.size / self.g.unique().size)
- plt.close("all")
- ax = cat.countplot(y="g", data=self.df)
- nt.assert_equal(len(ax.patches), self.g.unique().size)
- for p in ax.patches:
- nt.assert_equal(p.get_x(), 0)
- nt.assert_equal(p.get_width(),
- self.g.size / self.g.unique().size)
- plt.close("all")
- ax = cat.countplot("g", hue="h", data=self.df)
- nt.assert_equal(len(ax.patches),
- self.g.unique().size * self.h.unique().size)
- plt.close("all")
- ax = cat.countplot(y="g", hue="h", data=self.df)
- nt.assert_equal(len(ax.patches),
- self.g.unique().size * self.h.unique().size)
- plt.close("all")
- def test_input_error(self):
- with nt.assert_raises(TypeError):
- cat.countplot()
- with nt.assert_raises(TypeError):
- cat.countplot(x="g", y="h", data=self.df)
- class TestCatPlot(CategoricalFixture):
- def test_facet_organization(self):
- g = cat.catplot("g", "y", data=self.df)
- nt.assert_equal(g.axes.shape, (1, 1))
- g = cat.catplot("g", "y", col="h", data=self.df)
- nt.assert_equal(g.axes.shape, (1, 2))
- g = cat.catplot("g", "y", row="h", data=self.df)
- nt.assert_equal(g.axes.shape, (2, 1))
- g = cat.catplot("g", "y", col="u", row="h", data=self.df)
- nt.assert_equal(g.axes.shape, (2, 3))
- def test_plot_elements(self):
- g = cat.catplot("g", "y", data=self.df, kind="point")
- nt.assert_equal(len(g.ax.collections), 1)
- want_lines = self.g.unique().size + 1
- nt.assert_equal(len(g.ax.lines), want_lines)
- g = cat.catplot("g", "y", "h", data=self.df, kind="point")
- want_collections = self.h.unique().size
- nt.assert_equal(len(g.ax.collections), want_collections)
- want_lines = (self.g.unique().size + 1) * self.h.unique().size
- nt.assert_equal(len(g.ax.lines), want_lines)
- g = cat.catplot("g", "y", data=self.df, kind="bar")
- want_elements = self.g.unique().size
- nt.assert_equal(len(g.ax.patches), want_elements)
- nt.assert_equal(len(g.ax.lines), want_elements)
- g = cat.catplot("g", "y", "h", data=self.df, kind="bar")
- want_elements = self.g.unique().size * self.h.unique().size
- nt.assert_equal(len(g.ax.patches), want_elements)
- nt.assert_equal(len(g.ax.lines), want_elements)
- g = cat.catplot("g", data=self.df, kind="count")
- want_elements = self.g.unique().size
- nt.assert_equal(len(g.ax.patches), want_elements)
- nt.assert_equal(len(g.ax.lines), 0)
- g = cat.catplot("g", hue="h", data=self.df, kind="count")
- want_elements = self.g.unique().size * self.h.unique().size
- nt.assert_equal(len(g.ax.patches), want_elements)
- nt.assert_equal(len(g.ax.lines), 0)
- g = cat.catplot("g", "y", data=self.df, kind="box")
- want_artists = self.g.unique().size
- nt.assert_equal(len(g.ax.artists), want_artists)
- g = cat.catplot("g", "y", "h", data=self.df, kind="box")
- want_artists = self.g.unique().size * self.h.unique().size
- nt.assert_equal(len(g.ax.artists), want_artists)
- g = cat.catplot("g", "y", data=self.df,
- kind="violin", inner=None)
- want_elements = self.g.unique().size
- nt.assert_equal(len(g.ax.collections), want_elements)
- g = cat.catplot("g", "y", "h", data=self.df,
- kind="violin", inner=None)
- want_elements = self.g.unique().size * self.h.unique().size
- nt.assert_equal(len(g.ax.collections), want_elements)
- g = cat.catplot("g", "y", data=self.df, kind="strip")
- want_elements = self.g.unique().size
- nt.assert_equal(len(g.ax.collections), want_elements)
- g = cat.catplot("g", "y", "h", data=self.df, kind="strip")
- want_elements = self.g.unique().size + self.h.unique().size
- nt.assert_equal(len(g.ax.collections), want_elements)
- def test_bad_plot_kind_error(self):
- with nt.assert_raises(ValueError):
- cat.catplot("g", "y", data=self.df, kind="not_a_kind")
- def test_count_x_and_y(self):
- with nt.assert_raises(ValueError):
- cat.catplot("g", "y", data=self.df, kind="count")
- def test_plot_colors(self):
- ax = cat.barplot("g", "y", data=self.df)
- g = cat.catplot("g", "y", data=self.df, kind="bar")
- for p1, p2 in zip(ax.patches, g.ax.patches):
- nt.assert_equal(p1.get_facecolor(), p2.get_facecolor())
- plt.close("all")
- ax = cat.barplot("g", "y", data=self.df, color="purple")
- g = cat.catplot("g", "y", data=self.df,
- kind="bar", color="purple")
- for p1, p2 in zip(ax.patches, g.ax.patches):
- nt.assert_equal(p1.get_facecolor(), p2.get_facecolor())
- plt.close("all")
- ax = cat.barplot("g", "y", data=self.df, palette="Set2")
- g = cat.catplot("g", "y", data=self.df,
- kind="bar", palette="Set2")
- for p1, p2 in zip(ax.patches, g.ax.patches):
- nt.assert_equal(p1.get_facecolor(), p2.get_facecolor())
- plt.close("all")
- ax = cat.pointplot("g", "y", data=self.df)
- g = cat.catplot("g", "y", data=self.df)
- for l1, l2 in zip(ax.lines, g.ax.lines):
- nt.assert_equal(l1.get_color(), l2.get_color())
- plt.close("all")
- ax = cat.pointplot("g", "y", data=self.df, color="purple")
- g = cat.catplot("g", "y", data=self.df, color="purple")
- for l1, l2 in zip(ax.lines, g.ax.lines):
- nt.assert_equal(l1.get_color(), l2.get_color())
- plt.close("all")
- ax = cat.pointplot("g", "y", data=self.df, palette="Set2")
- g = cat.catplot("g", "y", data=self.df, palette="Set2")
- for l1, l2 in zip(ax.lines, g.ax.lines):
- nt.assert_equal(l1.get_color(), l2.get_color())
- plt.close("all")
- def test_ax_kwarg_removal(self):
- f, ax = plt.subplots()
- with pytest.warns(UserWarning):
- g = cat.catplot("g", "y", data=self.df, ax=ax)
- assert len(ax.collections) == 0
- assert len(g.ax.collections) > 0
- def test_factorplot(self):
- with pytest.warns(UserWarning):
- g = cat.factorplot("g", "y", data=self.df)
- nt.assert_equal(len(g.ax.collections), 1)
- want_lines = self.g.unique().size + 1
- nt.assert_equal(len(g.ax.lines), want_lines)
- class TestBoxenPlotter(CategoricalFixture):
- default_kws = dict(x=None, y=None, hue=None, data=None,
- order=None, hue_order=None,
- orient=None, color=None, palette=None,
- saturation=.75, width=.8, dodge=True,
- k_depth='proportion', linewidth=None,
- scale='exponential', outlier_prop=None,
- showfliers=True)
- def ispatch(self, c):
- return isinstance(c, mpl.collections.PatchCollection)
- def ispath(self, c):
- return isinstance(c, mpl.collections.PathCollection)
- def edge_calc(self, n, data):
- q = np.asanyarray([0.5 ** n, 1 - 0.5 ** n]) * 100
- q = list(np.unique(q))
- return np.percentile(data, q)
- def test_box_ends_finite(self):
- p = cat._LVPlotter(**self.default_kws)
- p.establish_variables("g", "y", data=self.df)
- box_k = np.asarray([[b, k]
- for b, k in map(p._lv_box_ends, p.plot_data)])
- box_ends = box_k[:, 0]
- k_vals = box_k[:, 1]
- # Check that all the box ends are finite and are within
- # the bounds of the data
- b_e = map(lambda a: np.all(np.isfinite(a)), box_ends)
- assert np.sum(list(b_e)) == len(box_ends)
- def within(t):
- a, d = t
- return ((np.ravel(a) <= d.max()) &
- (np.ravel(a) >= d.min())).all()
- b_w = map(within, zip(box_ends, p.plot_data))
- assert np.sum(list(b_w)) == len(box_ends)
- k_f = map(lambda k: (k > 0.) & np.isfinite(k), k_vals)
- assert np.sum(list(k_f)) == len(k_vals)
- def test_box_ends_correct(self):
- n = 100
- linear_data = np.arange(n)
- expected_k = int(np.log2(n)) - int(np.log2(n * 0.007)) + 1
- expected_edges = [self.edge_calc(i, linear_data)
- for i in range(expected_k + 2, 1, -1)]
- p = cat._LVPlotter(**self.default_kws)
- calc_edges, calc_k = p._lv_box_ends(linear_data)
- assert np.array_equal(expected_edges, calc_edges)
- assert expected_k == calc_k
- def test_outliers(self):
- n = 100
- outlier_data = np.append(np.arange(n - 1), 2 * n)
- expected_k = int(np.log2(n)) - int(np.log2(n * 0.007)) + 1
- expected_edges = [self.edge_calc(i, outlier_data)
- for i in range(expected_k + 2, 1, -1)]
- p = cat._LVPlotter(**self.default_kws)
- calc_edges, calc_k = p._lv_box_ends(outlier_data)
- npt.assert_equal(list(expected_edges), calc_edges)
- npt.assert_equal(expected_k, calc_k)
- out_calc = p._lv_outliers(outlier_data, calc_k)
- out_exp = p._lv_outliers(outlier_data, expected_k)
- npt.assert_equal(out_exp, out_calc)
- def test_showfliers(self):
- ax = cat.boxenplot("g", "y", data=self.df)
- for c in filter(self.ispath, ax.collections):
- assert len(c.get_offsets()) == 2
- plt.close("all")
- ax = cat.boxenplot("g", "y", data=self.df, showfliers=False)
- for c in filter(self.ispath, ax.collections):
- assert len(c.get_offsets()) == 0
- plt.close("all")
- def test_hue_offsets(self):
- p = cat._LVPlotter(**self.default_kws)
- p.establish_variables("g", "y", "h", data=self.df)
- npt.assert_array_equal(p.hue_offsets, [-.2, .2])
- kws = self.default_kws.copy()
- kws["width"] = .6
- p = cat._LVPlotter(**kws)
- p.establish_variables("g", "y", "h", data=self.df)
- npt.assert_array_equal(p.hue_offsets, [-.15, .15])
- p = cat._LVPlotter(**kws)
- p.establish_variables("h", "y", "g", data=self.df)
- npt.assert_array_almost_equal(p.hue_offsets, [-.2, 0, .2])
- def test_axes_data(self):
- ax = cat.boxenplot("g", "y", data=self.df)
- patches = filter(self.ispatch, ax.collections)
- nt.assert_equal(len(list(patches)), 3)
- plt.close("all")
- ax = cat.boxenplot("g", "y", "h", data=self.df)
- patches = filter(self.ispatch, ax.collections)
- nt.assert_equal(len(list(patches)), 6)
- plt.close("all")
- def test_box_colors(self):
- ax = cat.boxenplot("g", "y", data=self.df, saturation=1)
- pal = palettes.color_palette(n_colors=3)
- for patch, color in zip(ax.artists, pal):
- nt.assert_equal(patch.get_facecolor()[:3], color)
- plt.close("all")
- ax = cat.boxenplot("g", "y", "h", data=self.df, saturation=1)
- pal = palettes.color_palette(n_colors=2)
- for patch, color in zip(ax.artists, pal * 2):
- nt.assert_equal(patch.get_facecolor()[:3], color)
- plt.close("all")
- def test_draw_missing_boxes(self):
- ax = cat.boxenplot("g", "y", data=self.df,
- order=["a", "b", "c", "d"])
- patches = filter(self.ispatch, ax.collections)
- nt.assert_equal(len(list(patches)), 3)
- plt.close("all")
- def test_unaligned_index(self):
- f, (ax1, ax2) = plt.subplots(2)
- cat.boxenplot(self.g, self.y, ax=ax1)
- cat.boxenplot(self.g, self.y_perm, ax=ax2)
- for l1, l2 in zip(ax1.lines, ax2.lines):
- assert np.array_equal(l1.get_xydata(), l2.get_xydata())
- f, (ax1, ax2) = plt.subplots(2)
- hue_order = self.h.unique()
- cat.boxenplot(self.g, self.y, self.h, hue_order=hue_order, ax=ax1)
- cat.boxenplot(self.g, self.y_perm, self.h,
- hue_order=hue_order, ax=ax2)
- for l1, l2 in zip(ax1.lines, ax2.lines):
- assert np.array_equal(l1.get_xydata(), l2.get_xydata())
- def test_missing_data(self):
- x = ["a", "a", "b", "b", "c", "c", "d", "d"]
- h = ["x", "y", "x", "y", "x", "y", "x", "y"]
- y = self.rs.randn(8)
- y[-2:] = np.nan
- ax = cat.boxenplot(x, y)
- nt.assert_equal(len(ax.lines), 3)
- plt.close("all")
- y[-1] = 0
- ax = cat.boxenplot(x, y, h)
- nt.assert_equal(len(ax.lines), 7)
- plt.close("all")
- def test_boxenplots(self):
- # Smoke test the high level boxenplot options
- cat.boxenplot("y", data=self.df)
- plt.close("all")
- cat.boxenplot(y="y", data=self.df)
- plt.close("all")
- cat.boxenplot("g", "y", data=self.df)
- plt.close("all")
- cat.boxenplot("y", "g", data=self.df, orient="h")
- plt.close("all")
- cat.boxenplot("g", "y", "h", data=self.df)
- plt.close("all")
- cat.boxenplot("g", "y", "h", order=list("nabc"), data=self.df)
- plt.close("all")
- cat.boxenplot("g", "y", "h", hue_order=list("omn"), data=self.df)
- plt.close("all")
- cat.boxenplot("y", "g", "h", data=self.df, orient="h")
- plt.close("all")
- cat.boxenplot("y", "g", "h", data=self.df, orient="h", palette="Set2")
- plt.close("all")
- cat.boxenplot("y", "g", "h", data=self.df, orient="h", color="b")
- plt.close("all")
- def test_axes_annotation(self):
- ax = cat.boxenplot("g", "y", data=self.df)
- nt.assert_equal(ax.get_xlabel(), "g")
- nt.assert_equal(ax.get_ylabel(), "y")
- nt.assert_equal(ax.get_xlim(), (-.5, 2.5))
- npt.assert_array_equal(ax.get_xticks(), [0, 1, 2])
- npt.assert_array_equal([l.get_text() for l in ax.get_xticklabels()],
- ["a", "b", "c"])
- plt.close("all")
- ax = cat.boxenplot("g", "y", "h", data=self.df)
- nt.assert_equal(ax.get_xlabel(), "g")
- nt.assert_equal(ax.get_ylabel(), "y")
- npt.assert_array_equal(ax.get_xticks(), [0, 1, 2])
- npt.assert_array_equal([l.get_text() for l in ax.get_xticklabels()],
- ["a", "b", "c"])
- npt.assert_array_equal([l.get_text() for l in ax.legend_.get_texts()],
- ["m", "n"])
- plt.close("all")
- ax = cat.boxenplot("y", "g", data=self.df, orient="h")
- nt.assert_equal(ax.get_xlabel(), "y")
- nt.assert_equal(ax.get_ylabel(), "g")
- nt.assert_equal(ax.get_ylim(), (2.5, -.5))
- npt.assert_array_equal(ax.get_yticks(), [0, 1, 2])
- npt.assert_array_equal([l.get_text() for l in ax.get_yticklabels()],
- ["a", "b", "c"])
- plt.close("all")
- @pytest.mark.parametrize("size", ["large", "medium", "small", 22, 12])
- def test_legend_titlesize(self, size):
- if LooseVersion(mpl.__version__) >= LooseVersion("3.0"):
- rc_ctx = {"legend.title_fontsize": size}
- else: # Old matplotlib doesn't have legend.title_fontsize rcparam
- rc_ctx = {"axes.labelsize": size}
- if isinstance(size, int):
- size = size * .85
- exp = mpl.font_manager.FontProperties(size=size).get_size()
- with plt.rc_context(rc=rc_ctx):
- ax = cat.boxenplot("g", "y", "h", data=self.df)
- obs = ax.get_legend().get_title().get_fontproperties().get_size()
- assert obs == exp
- plt.close("all")
- def test_lvplot(self):
- with pytest.warns(UserWarning):
- ax = cat.lvplot("g", "y", data=self.df)
- patches = filter(self.ispatch, ax.collections)
- nt.assert_equal(len(list(patches)), 3)
- plt.close("all")
|