generic.py 367 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368
  1. import collections
  2. from datetime import timedelta
  3. import functools
  4. import gc
  5. import json
  6. import operator
  7. import pickle
  8. import re
  9. from textwrap import dedent
  10. from typing import (
  11. Any,
  12. Callable,
  13. Dict,
  14. FrozenSet,
  15. Hashable,
  16. List,
  17. Mapping,
  18. Optional,
  19. Sequence,
  20. Set,
  21. Tuple,
  22. Type,
  23. Union,
  24. )
  25. import warnings
  26. import weakref
  27. import numpy as np
  28. from pandas._config import config
  29. from pandas._libs import Timestamp, iNaT, lib, properties
  30. from pandas._typing import (
  31. Axis,
  32. Dtype,
  33. FilePathOrBuffer,
  34. FrameOrSeries,
  35. JSONSerializable,
  36. Level,
  37. Renamer,
  38. )
  39. from pandas.compat import set_function_name
  40. from pandas.compat._optional import import_optional_dependency
  41. from pandas.compat.numpy import function as nv
  42. from pandas.errors import AbstractMethodError
  43. from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
  44. from pandas.util._validators import (
  45. validate_bool_kwarg,
  46. validate_fillna_kwargs,
  47. validate_percentile,
  48. )
  49. from pandas.core.dtypes.common import (
  50. ensure_int64,
  51. ensure_object,
  52. ensure_str,
  53. is_bool,
  54. is_bool_dtype,
  55. is_datetime64_any_dtype,
  56. is_datetime64tz_dtype,
  57. is_dict_like,
  58. is_extension_array_dtype,
  59. is_float,
  60. is_integer,
  61. is_list_like,
  62. is_number,
  63. is_numeric_dtype,
  64. is_object_dtype,
  65. is_period_arraylike,
  66. is_re_compilable,
  67. is_scalar,
  68. is_timedelta64_dtype,
  69. pandas_dtype,
  70. )
  71. from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
  72. from pandas.core.dtypes.inference import is_hashable
  73. from pandas.core.dtypes.missing import isna, notna
  74. import pandas as pd
  75. from pandas.core import missing, nanops
  76. import pandas.core.algorithms as algos
  77. from pandas.core.base import PandasObject, SelectionMixin
  78. import pandas.core.common as com
  79. from pandas.core.construction import create_series_with_explicit_dtype
  80. from pandas.core.indexes.api import (
  81. Index,
  82. InvalidIndexError,
  83. MultiIndex,
  84. RangeIndex,
  85. ensure_index,
  86. )
  87. from pandas.core.indexes.datetimes import DatetimeIndex
  88. from pandas.core.indexes.period import Period, PeriodIndex
  89. import pandas.core.indexing as indexing
  90. from pandas.core.internals import BlockManager
  91. from pandas.core.missing import find_valid_index
  92. from pandas.core.ops import _align_method_FRAME
  93. from pandas.io.formats import format as fmt
  94. from pandas.io.formats.format import DataFrameFormatter, format_percentiles
  95. from pandas.io.formats.printing import pprint_thing
  96. from pandas.tseries.frequencies import to_offset
  97. # goal is to be able to define the docs close to function, while still being
  98. # able to share
  99. _shared_docs: Dict[str, str] = dict()
  100. _shared_doc_kwargs = dict(
  101. axes="keywords for axes",
  102. klass="Series/DataFrame",
  103. axes_single_arg="int or labels for object",
  104. args_transpose="axes to permute (int or label for object)",
  105. optional_by="""
  106. by : str or list of str
  107. Name or list of names to sort by""",
  108. )
  109. def _single_replace(self, to_replace, method, inplace, limit):
  110. """
  111. Replaces values in a Series using the fill method specified when no
  112. replacement value is given in the replace method
  113. """
  114. if self.ndim != 1:
  115. raise TypeError(
  116. f"cannot replace {to_replace} with method {method} on a "
  117. f"{type(self).__name__}"
  118. )
  119. orig_dtype = self.dtype
  120. result = self if inplace else self.copy()
  121. fill_f = missing.get_fill_func(method)
  122. mask = missing.mask_missing(result.values, to_replace)
  123. values = fill_f(result.values, limit=limit, mask=mask)
  124. if values.dtype == orig_dtype and inplace:
  125. return
  126. result = pd.Series(values, index=self.index, dtype=self.dtype).__finalize__(self)
  127. if inplace:
  128. self._update_inplace(result._data)
  129. return
  130. return result
  131. bool_t = bool # Need alias because NDFrame has def bool:
  132. class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin):
  133. """
  134. N-dimensional analogue of DataFrame. Store multi-dimensional in a
  135. size-mutable, labeled data structure
  136. Parameters
  137. ----------
  138. data : BlockManager
  139. axes : list
  140. copy : bool, default False
  141. """
  142. _internal_names: List[str] = [
  143. "_data",
  144. "_cacher",
  145. "_item_cache",
  146. "_cache",
  147. "_is_copy",
  148. "_subtyp",
  149. "_name",
  150. "_index",
  151. "_default_kind",
  152. "_default_fill_value",
  153. "_metadata",
  154. "__array_struct__",
  155. "__array_interface__",
  156. ]
  157. _internal_names_set: Set[str] = set(_internal_names)
  158. _accessors: Set[str] = set()
  159. _deprecations: FrozenSet[str] = frozenset(["get_values", "ix"])
  160. _metadata: List[str] = []
  161. _is_copy = None
  162. _data: BlockManager
  163. _attrs: Dict[Optional[Hashable], Any]
  164. _typ: str
  165. # ----------------------------------------------------------------------
  166. # Constructors
  167. def __init__(
  168. self,
  169. data: BlockManager,
  170. axes: Optional[List[Index]] = None,
  171. copy: bool = False,
  172. dtype: Optional[Dtype] = None,
  173. attrs: Optional[Mapping[Optional[Hashable], Any]] = None,
  174. fastpath: bool = False,
  175. ):
  176. if not fastpath:
  177. if dtype is not None:
  178. data = data.astype(dtype)
  179. elif copy:
  180. data = data.copy()
  181. if axes is not None:
  182. for i, ax in enumerate(axes):
  183. data = data.reindex_axis(ax, axis=i)
  184. object.__setattr__(self, "_is_copy", None)
  185. object.__setattr__(self, "_data", data)
  186. object.__setattr__(self, "_item_cache", {})
  187. if attrs is None:
  188. attrs = {}
  189. else:
  190. attrs = dict(attrs)
  191. object.__setattr__(self, "_attrs", attrs)
  192. def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
  193. """ passed a manager and a axes dict """
  194. for a, axe in axes.items():
  195. if axe is not None:
  196. mgr = mgr.reindex_axis(
  197. axe, axis=self._get_block_manager_axis(a), copy=False
  198. )
  199. # make a copy if explicitly requested
  200. if copy:
  201. mgr = mgr.copy()
  202. if dtype is not None:
  203. # avoid further copies if we can
  204. if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
  205. mgr = mgr.astype(dtype=dtype)
  206. return mgr
  207. # ----------------------------------------------------------------------
  208. @property
  209. def attrs(self) -> Dict[Optional[Hashable], Any]:
  210. """
  211. Dictionary of global attributes on this object.
  212. .. warning::
  213. attrs is experimental and may change without warning.
  214. """
  215. if self._attrs is None:
  216. self._attrs = {}
  217. return self._attrs
  218. @attrs.setter
  219. def attrs(self, value: Mapping[Optional[Hashable], Any]) -> None:
  220. self._attrs = dict(value)
  221. def _validate_dtype(self, dtype):
  222. """ validate the passed dtype """
  223. if dtype is not None:
  224. dtype = pandas_dtype(dtype)
  225. # a compound dtype
  226. if dtype.kind == "V":
  227. raise NotImplementedError(
  228. "compound dtypes are not implemented"
  229. f" in the {type(self).__name__} constructor"
  230. )
  231. return dtype
  232. # ----------------------------------------------------------------------
  233. # Construction
  234. @property
  235. def _constructor(self: FrameOrSeries) -> Type[FrameOrSeries]:
  236. """Used when a manipulation result has the same dimensions as the
  237. original.
  238. """
  239. raise AbstractMethodError(self)
  240. @property
  241. def _constructor_sliced(self):
  242. """Used when a manipulation result has one lower dimension(s) as the
  243. original, such as DataFrame single columns slicing.
  244. """
  245. raise AbstractMethodError(self)
  246. @property
  247. def _constructor_expanddim(self):
  248. """Used when a manipulation result has one higher dimension as the
  249. original, such as Series.to_frame()
  250. """
  251. raise NotImplementedError
  252. # ----------------------------------------------------------------------
  253. # Axis
  254. _AXIS_ALIASES = {"rows": 0}
  255. _AXIS_IALIASES = {0: "rows"}
  256. _stat_axis_number = 0
  257. _stat_axis_name = "index"
  258. _ix = None
  259. _AXIS_ORDERS: List[str]
  260. _AXIS_NUMBERS: Dict[str, int]
  261. _AXIS_NAMES: Dict[int, str]
  262. _AXIS_REVERSED: bool
  263. _info_axis_number: int
  264. _info_axis_name: str
  265. _AXIS_LEN: int
  266. @classmethod
  267. def _setup_axes(cls, axes: List[str], docs: Dict[str, str]) -> None:
  268. """
  269. Provide axes setup for the major PandasObjects.
  270. Parameters
  271. ----------
  272. axes : the names of the axes in order (lowest to highest)
  273. docs : docstrings for the axis properties
  274. """
  275. info_axis = len(axes) - 1
  276. axes_are_reversed = len(axes) > 1
  277. cls._AXIS_ORDERS = axes
  278. cls._AXIS_NUMBERS = {a: i for i, a in enumerate(axes)}
  279. cls._AXIS_LEN = len(axes)
  280. cls._AXIS_NAMES = dict(enumerate(axes))
  281. cls._AXIS_REVERSED = axes_are_reversed
  282. cls._info_axis_number = info_axis
  283. cls._info_axis_name = axes[info_axis]
  284. # setup the actual axis
  285. def set_axis(a, i):
  286. setattr(cls, a, properties.AxisProperty(i, docs.get(a, a)))
  287. cls._internal_names_set.add(a)
  288. if axes_are_reversed:
  289. for i, a in cls._AXIS_NAMES.items():
  290. set_axis(a, 1 - i)
  291. else:
  292. for i, a in cls._AXIS_NAMES.items():
  293. set_axis(a, i)
  294. def _construct_axes_dict(self, axes=None, **kwargs):
  295. """Return an axes dictionary for myself."""
  296. d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}
  297. d.update(kwargs)
  298. return d
  299. @staticmethod
  300. def _construct_axes_dict_from(self, axes, **kwargs):
  301. """Return an axes dictionary for the passed axes."""
  302. d = {a: ax for a, ax in zip(self._AXIS_ORDERS, axes)}
  303. d.update(kwargs)
  304. return d
  305. def _construct_axes_from_arguments(
  306. self, args, kwargs, require_all: bool = False, sentinel=None
  307. ):
  308. """Construct and returns axes if supplied in args/kwargs.
  309. If require_all, raise if all axis arguments are not supplied
  310. return a tuple of (axes, kwargs).
  311. sentinel specifies the default parameter when an axis is not
  312. supplied; useful to distinguish when a user explicitly passes None
  313. in scenarios where None has special meaning.
  314. """
  315. # construct the args
  316. args = list(args)
  317. for a in self._AXIS_ORDERS:
  318. # look for a argument by position
  319. if a not in kwargs:
  320. try:
  321. kwargs[a] = args.pop(0)
  322. except IndexError:
  323. if require_all:
  324. raise TypeError("not enough/duplicate arguments specified!")
  325. axes = {a: kwargs.pop(a, sentinel) for a in self._AXIS_ORDERS}
  326. return axes, kwargs
  327. @classmethod
  328. def _from_axes(cls: Type[FrameOrSeries], data, axes, **kwargs) -> FrameOrSeries:
  329. # for construction from BlockManager
  330. if isinstance(data, BlockManager):
  331. return cls(data, **kwargs)
  332. else:
  333. if cls._AXIS_REVERSED:
  334. axes = axes[::-1]
  335. d = cls._construct_axes_dict_from(cls, axes, copy=False)
  336. d.update(kwargs)
  337. return cls(data, **d)
  338. @classmethod
  339. def _get_axis_number(cls, axis):
  340. axis = cls._AXIS_ALIASES.get(axis, axis)
  341. if is_integer(axis):
  342. if axis in cls._AXIS_NAMES:
  343. return axis
  344. else:
  345. try:
  346. return cls._AXIS_NUMBERS[axis]
  347. except KeyError:
  348. pass
  349. raise ValueError(f"No axis named {axis} for object type {cls}")
  350. @classmethod
  351. def _get_axis_name(cls, axis):
  352. axis = cls._AXIS_ALIASES.get(axis, axis)
  353. if isinstance(axis, str):
  354. if axis in cls._AXIS_NUMBERS:
  355. return axis
  356. else:
  357. try:
  358. return cls._AXIS_NAMES[axis]
  359. except KeyError:
  360. pass
  361. raise ValueError(f"No axis named {axis} for object type {cls}")
  362. def _get_axis(self, axis):
  363. name = self._get_axis_name(axis)
  364. return getattr(self, name)
  365. @classmethod
  366. def _get_block_manager_axis(cls, axis):
  367. """Map the axis to the block_manager axis."""
  368. axis = cls._get_axis_number(axis)
  369. if cls._AXIS_REVERSED:
  370. m = cls._AXIS_LEN - 1
  371. return m - axis
  372. return axis
  373. def _get_axis_resolvers(self, axis: str) -> Dict[str, ABCSeries]:
  374. # index or columns
  375. axis_index = getattr(self, axis)
  376. d = dict()
  377. prefix = axis[0]
  378. for i, name in enumerate(axis_index.names):
  379. if name is not None:
  380. key = level = name
  381. else:
  382. # prefix with 'i' or 'c' depending on the input axis
  383. # e.g., you must do ilevel_0 for the 0th level of an unnamed
  384. # multiiindex
  385. key = f"{prefix}level_{i}"
  386. level = i
  387. level_values = axis_index.get_level_values(level)
  388. s = level_values.to_series()
  389. s.index = axis_index
  390. d[key] = s
  391. # put the index/columns itself in the dict
  392. if isinstance(axis_index, MultiIndex):
  393. dindex = axis_index
  394. else:
  395. dindex = axis_index.to_series()
  396. d[axis] = dindex
  397. return d
  398. def _get_index_resolvers(self) -> Dict[str, ABCSeries]:
  399. from pandas.core.computation.parsing import clean_column_name
  400. d: Dict[str, ABCSeries] = {}
  401. for axis_name in self._AXIS_ORDERS:
  402. d.update(self._get_axis_resolvers(axis_name))
  403. return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)}
  404. def _get_cleaned_column_resolvers(self) -> Dict[str, ABCSeries]:
  405. """
  406. Return the special character free column resolvers of a dataframe.
  407. Column names with special characters are 'cleaned up' so that they can
  408. be referred to by backtick quoting.
  409. Used in :meth:`DataFrame.eval`.
  410. """
  411. from pandas.core.computation.parsing import clean_column_name
  412. if isinstance(self, ABCSeries):
  413. return {clean_column_name(self.name): self}
  414. return {
  415. clean_column_name(k): v for k, v in self.items() if not isinstance(k, int)
  416. }
  417. @property
  418. def _info_axis(self):
  419. return getattr(self, self._info_axis_name)
  420. @property
  421. def _stat_axis(self):
  422. return getattr(self, self._stat_axis_name)
  423. @property
  424. def shape(self) -> Tuple[int, ...]:
  425. """
  426. Return a tuple of axis dimensions
  427. """
  428. return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS)
  429. @property
  430. def axes(self) -> List[Index]:
  431. """
  432. Return index label(s) of the internal NDFrame
  433. """
  434. # we do it this way because if we have reversed axes, then
  435. # the block manager shows then reversed
  436. return [self._get_axis(a) for a in self._AXIS_ORDERS]
  437. @property
  438. def ndim(self) -> int:
  439. """
  440. Return an int representing the number of axes / array dimensions.
  441. Return 1 if Series. Otherwise return 2 if DataFrame.
  442. See Also
  443. --------
  444. ndarray.ndim : Number of array dimensions.
  445. Examples
  446. --------
  447. >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
  448. >>> s.ndim
  449. 1
  450. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  451. >>> df.ndim
  452. 2
  453. """
  454. return self._data.ndim
  455. @property
  456. def size(self):
  457. """
  458. Return an int representing the number of elements in this object.
  459. Return the number of rows if Series. Otherwise return the number of
  460. rows times number of columns if DataFrame.
  461. See Also
  462. --------
  463. ndarray.size : Number of elements in the array.
  464. Examples
  465. --------
  466. >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
  467. >>> s.size
  468. 3
  469. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  470. >>> df.size
  471. 4
  472. """
  473. return np.prod(self.shape)
  474. @property
  475. def _selected_obj(self: FrameOrSeries) -> FrameOrSeries:
  476. """ internal compat with SelectionMixin """
  477. return self
  478. @property
  479. def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries:
  480. """ internal compat with SelectionMixin """
  481. return self
  482. def set_axis(self, labels, axis=0, inplace=False):
  483. """
  484. Assign desired index to given axis.
  485. Indexes for column or row labels can be changed by assigning
  486. a list-like or Index.
  487. .. versionchanged:: 0.21.0
  488. The signature is now `labels` and `axis`, consistent with
  489. the rest of pandas API. Previously, the `axis` and `labels`
  490. arguments were respectively the first and second positional
  491. arguments.
  492. Parameters
  493. ----------
  494. labels : list-like, Index
  495. The values for the new index.
  496. axis : {0 or 'index', 1 or 'columns'}, default 0
  497. The axis to update. The value 0 identifies the rows, and 1
  498. identifies the columns.
  499. inplace : bool, default False
  500. Whether to return a new %(klass)s instance.
  501. Returns
  502. -------
  503. renamed : %(klass)s or None
  504. An object of same type as caller if inplace=False, None otherwise.
  505. See Also
  506. --------
  507. DataFrame.rename_axis : Alter the name of the index or columns.
  508. Examples
  509. --------
  510. **Series**
  511. >>> s = pd.Series([1, 2, 3])
  512. >>> s
  513. 0 1
  514. 1 2
  515. 2 3
  516. dtype: int64
  517. >>> s.set_axis(['a', 'b', 'c'], axis=0)
  518. a 1
  519. b 2
  520. c 3
  521. dtype: int64
  522. **DataFrame**
  523. >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  524. Change the row labels.
  525. >>> df.set_axis(['a', 'b', 'c'], axis='index')
  526. A B
  527. a 1 4
  528. b 2 5
  529. c 3 6
  530. Change the column labels.
  531. >>> df.set_axis(['I', 'II'], axis='columns')
  532. I II
  533. 0 1 4
  534. 1 2 5
  535. 2 3 6
  536. Now, update the labels inplace.
  537. >>> df.set_axis(['i', 'ii'], axis='columns', inplace=True)
  538. >>> df
  539. i ii
  540. 0 1 4
  541. 1 2 5
  542. 2 3 6
  543. """
  544. if inplace:
  545. setattr(self, self._get_axis_name(axis), labels)
  546. else:
  547. obj = self.copy()
  548. obj.set_axis(labels, axis=axis, inplace=True)
  549. return obj
  550. def _set_axis(self, axis, labels) -> None:
  551. self._data.set_axis(axis, labels)
  552. self._clear_item_cache()
  553. def swapaxes(self: FrameOrSeries, axis1, axis2, copy=True) -> FrameOrSeries:
  554. """
  555. Interchange axes and swap values axes appropriately.
  556. Returns
  557. -------
  558. y : same as input
  559. """
  560. i = self._get_axis_number(axis1)
  561. j = self._get_axis_number(axis2)
  562. if i == j:
  563. if copy:
  564. return self.copy()
  565. return self
  566. mapping = {i: j, j: i}
  567. new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN))
  568. new_values = self.values.swapaxes(i, j)
  569. if copy:
  570. new_values = new_values.copy()
  571. return self._constructor(new_values, *new_axes).__finalize__(self)
  572. def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries:
  573. """
  574. Return DataFrame with requested index / column level(s) removed.
  575. .. versionadded:: 0.24.0
  576. Parameters
  577. ----------
  578. level : int, str, or list-like
  579. If a string is given, must be the name of a level
  580. If list-like, elements must be names or positional indexes
  581. of levels.
  582. axis : {0 or 'index', 1 or 'columns'}, default 0
  583. Returns
  584. -------
  585. DataFrame
  586. DataFrame with requested index / column level(s) removed.
  587. Examples
  588. --------
  589. >>> df = pd.DataFrame([
  590. ... [1, 2, 3, 4],
  591. ... [5, 6, 7, 8],
  592. ... [9, 10, 11, 12]
  593. ... ]).set_index([0, 1]).rename_axis(['a', 'b'])
  594. >>> df.columns = pd.MultiIndex.from_tuples([
  595. ... ('c', 'e'), ('d', 'f')
  596. ... ], names=['level_1', 'level_2'])
  597. >>> df
  598. level_1 c d
  599. level_2 e f
  600. a b
  601. 1 2 3 4
  602. 5 6 7 8
  603. 9 10 11 12
  604. >>> df.droplevel('a')
  605. level_1 c d
  606. level_2 e f
  607. b
  608. 2 3 4
  609. 6 7 8
  610. 10 11 12
  611. >>> df.droplevel('level2', axis=1)
  612. level_1 c d
  613. a b
  614. 1 2 3 4
  615. 5 6 7 8
  616. 9 10 11 12
  617. """
  618. labels = self._get_axis(axis)
  619. new_labels = labels.droplevel(level)
  620. result = self.set_axis(new_labels, axis=axis, inplace=False)
  621. return result
  622. def pop(self: FrameOrSeries, item) -> FrameOrSeries:
  623. """
  624. Return item and drop from frame. Raise KeyError if not found.
  625. Parameters
  626. ----------
  627. item : str
  628. Label of column to be popped.
  629. Returns
  630. -------
  631. Series
  632. Examples
  633. --------
  634. >>> df = pd.DataFrame([('falcon', 'bird', 389.0),
  635. ... ('parrot', 'bird', 24.0),
  636. ... ('lion', 'mammal', 80.5),
  637. ... ('monkey', 'mammal', np.nan)],
  638. ... columns=('name', 'class', 'max_speed'))
  639. >>> df
  640. name class max_speed
  641. 0 falcon bird 389.0
  642. 1 parrot bird 24.0
  643. 2 lion mammal 80.5
  644. 3 monkey mammal NaN
  645. >>> df.pop('class')
  646. 0 bird
  647. 1 bird
  648. 2 mammal
  649. 3 mammal
  650. Name: class, dtype: object
  651. >>> df
  652. name max_speed
  653. 0 falcon 389.0
  654. 1 parrot 24.0
  655. 2 lion 80.5
  656. 3 monkey NaN
  657. """
  658. result = self[item]
  659. del self[item]
  660. try:
  661. result._reset_cacher()
  662. except AttributeError:
  663. pass
  664. return result
  665. def squeeze(self, axis=None):
  666. """
  667. Squeeze 1 dimensional axis objects into scalars.
  668. Series or DataFrames with a single element are squeezed to a scalar.
  669. DataFrames with a single column or a single row are squeezed to a
  670. Series. Otherwise the object is unchanged.
  671. This method is most useful when you don't know if your
  672. object is a Series or DataFrame, but you do know it has just a single
  673. column. In that case you can safely call `squeeze` to ensure you have a
  674. Series.
  675. Parameters
  676. ----------
  677. axis : {0 or 'index', 1 or 'columns', None}, default None
  678. A specific axis to squeeze. By default, all length-1 axes are
  679. squeezed.
  680. Returns
  681. -------
  682. DataFrame, Series, or scalar
  683. The projection after squeezing `axis` or all the axes.
  684. See Also
  685. --------
  686. Series.iloc : Integer-location based indexing for selecting scalars.
  687. DataFrame.iloc : Integer-location based indexing for selecting Series.
  688. Series.to_frame : Inverse of DataFrame.squeeze for a
  689. single-column DataFrame.
  690. Examples
  691. --------
  692. >>> primes = pd.Series([2, 3, 5, 7])
  693. Slicing might produce a Series with a single value:
  694. >>> even_primes = primes[primes % 2 == 0]
  695. >>> even_primes
  696. 0 2
  697. dtype: int64
  698. >>> even_primes.squeeze()
  699. 2
  700. Squeezing objects with more than one value in every axis does nothing:
  701. >>> odd_primes = primes[primes % 2 == 1]
  702. >>> odd_primes
  703. 1 3
  704. 2 5
  705. 3 7
  706. dtype: int64
  707. >>> odd_primes.squeeze()
  708. 1 3
  709. 2 5
  710. 3 7
  711. dtype: int64
  712. Squeezing is even more effective when used with DataFrames.
  713. >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
  714. >>> df
  715. a b
  716. 0 1 2
  717. 1 3 4
  718. Slicing a single column will produce a DataFrame with the columns
  719. having only one value:
  720. >>> df_a = df[['a']]
  721. >>> df_a
  722. a
  723. 0 1
  724. 1 3
  725. So the columns can be squeezed down, resulting in a Series:
  726. >>> df_a.squeeze('columns')
  727. 0 1
  728. 1 3
  729. Name: a, dtype: int64
  730. Slicing a single row from a single column will produce a single
  731. scalar DataFrame:
  732. >>> df_0a = df.loc[df.index < 1, ['a']]
  733. >>> df_0a
  734. a
  735. 0 1
  736. Squeezing the rows produces a single scalar Series:
  737. >>> df_0a.squeeze('rows')
  738. a 1
  739. Name: 0, dtype: int64
  740. Squeezing all axes will project directly into a scalar:
  741. >>> df_0a.squeeze()
  742. 1
  743. """
  744. axis = self._AXIS_NAMES if axis is None else (self._get_axis_number(axis),)
  745. return self.iloc[
  746. tuple(
  747. 0 if i in axis and len(a) == 1 else slice(None)
  748. for i, a in enumerate(self.axes)
  749. )
  750. ]
  751. def swaplevel(self: FrameOrSeries, i=-2, j=-1, axis=0) -> FrameOrSeries:
  752. """
  753. Swap levels i and j in a MultiIndex on a particular axis
  754. Parameters
  755. ----------
  756. i, j : int, str (can be mixed)
  757. Level of index to be swapped. Can pass level name as string.
  758. Returns
  759. -------
  760. swapped : same type as caller (new object)
  761. """
  762. axis = self._get_axis_number(axis)
  763. result = self.copy()
  764. labels = result._data.axes[axis]
  765. result._data.set_axis(axis, labels.swaplevel(i, j))
  766. return result
  767. # ----------------------------------------------------------------------
  768. # Rename
  769. def rename(
  770. self: FrameOrSeries,
  771. mapper: Optional[Renamer] = None,
  772. *,
  773. index: Optional[Renamer] = None,
  774. columns: Optional[Renamer] = None,
  775. axis: Optional[Axis] = None,
  776. copy: bool = True,
  777. inplace: bool = False,
  778. level: Optional[Level] = None,
  779. errors: str = "ignore",
  780. ) -> Optional[FrameOrSeries]:
  781. """
  782. Alter axes input function or functions. Function / dict values must be
  783. unique (1-to-1). Labels not contained in a dict / Series will be left
  784. as-is. Extra labels listed don't throw an error. Alternatively, change
  785. ``Series.name`` with a scalar value (Series only).
  786. Parameters
  787. ----------
  788. %(axes)s : scalar, list-like, dict-like or function, optional
  789. Scalar or list-like will alter the ``Series.name`` attribute,
  790. and raise on DataFrame.
  791. dict-like or functions are transformations to apply to
  792. that axis' values
  793. copy : bool, default True
  794. Also copy underlying data.
  795. inplace : bool, default False
  796. Whether to return a new %(klass)s. If True then value of copy is
  797. ignored.
  798. level : int or level name, default None
  799. In case of a MultiIndex, only rename labels in the specified
  800. level.
  801. errors : {'ignore', 'raise'}, default 'ignore'
  802. If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`,
  803. or `columns` contains labels that are not present in the Index
  804. being transformed.
  805. If 'ignore', existing keys will be renamed and extra keys will be
  806. ignored.
  807. Returns
  808. -------
  809. renamed : %(klass)s (new object)
  810. Raises
  811. ------
  812. KeyError
  813. If any of the labels is not found in the selected axis and
  814. "errors='raise'".
  815. See Also
  816. --------
  817. NDFrame.rename_axis
  818. Examples
  819. --------
  820. >>> s = pd.Series([1, 2, 3])
  821. >>> s
  822. 0 1
  823. 1 2
  824. 2 3
  825. dtype: int64
  826. >>> s.rename("my_name") # scalar, changes Series.name
  827. 0 1
  828. 1 2
  829. 2 3
  830. Name: my_name, dtype: int64
  831. >>> s.rename(lambda x: x ** 2) # function, changes labels
  832. 0 1
  833. 1 2
  834. 4 3
  835. dtype: int64
  836. >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
  837. 0 1
  838. 3 2
  839. 5 3
  840. dtype: int64
  841. Since ``DataFrame`` doesn't have a ``.name`` attribute,
  842. only mapping-type arguments are allowed.
  843. >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  844. >>> df.rename(2)
  845. Traceback (most recent call last):
  846. ...
  847. TypeError: 'int' object is not callable
  848. ``DataFrame.rename`` supports two calling conventions
  849. * ``(index=index_mapper, columns=columns_mapper, ...)``
  850. * ``(mapper, axis={'index', 'columns'}, ...)``
  851. We *highly* recommend using keyword arguments to clarify your
  852. intent.
  853. >>> df.rename(index=str, columns={"A": "a", "B": "c"})
  854. a c
  855. 0 1 4
  856. 1 2 5
  857. 2 3 6
  858. >>> df.rename(index=str, columns={"A": "a", "C": "c"})
  859. a B
  860. 0 1 4
  861. 1 2 5
  862. 2 3 6
  863. Using axis-style parameters
  864. >>> df.rename(str.lower, axis='columns')
  865. a b
  866. 0 1 4
  867. 1 2 5
  868. 2 3 6
  869. >>> df.rename({1: 2, 2: 4}, axis='index')
  870. A B
  871. 0 1 4
  872. 2 2 5
  873. 4 3 6
  874. See the :ref:`user guide <basics.rename>` for more.
  875. """
  876. if mapper is None and index is None and columns is None:
  877. raise TypeError("must pass an index to rename")
  878. if index is not None or columns is not None:
  879. if axis is not None:
  880. raise TypeError(
  881. "Cannot specify both 'axis' and any of 'index' or 'columns'"
  882. )
  883. elif mapper is not None:
  884. raise TypeError(
  885. "Cannot specify both 'mapper' and any of 'index' or 'columns'"
  886. )
  887. else:
  888. # use the mapper argument
  889. if axis and self._get_axis_number(axis) == 1:
  890. columns = mapper
  891. else:
  892. index = mapper
  893. result = self if inplace else self.copy(deep=copy)
  894. for axis_no, replacements in enumerate((index, columns)):
  895. if replacements is None:
  896. continue
  897. ax = self._get_axis(axis_no)
  898. baxis = self._get_block_manager_axis(axis_no)
  899. f = com.get_rename_function(replacements)
  900. if level is not None:
  901. level = ax._get_level_number(level)
  902. # GH 13473
  903. if not callable(replacements):
  904. indexer = ax.get_indexer_for(replacements)
  905. if errors == "raise" and len(indexer[indexer == -1]):
  906. missing_labels = [
  907. label
  908. for index, label in enumerate(replacements)
  909. if indexer[index] == -1
  910. ]
  911. raise KeyError(f"{missing_labels} not found in axis")
  912. result._data = result._data.rename_axis(
  913. f, axis=baxis, copy=copy, level=level
  914. )
  915. result._clear_item_cache()
  916. if inplace:
  917. self._update_inplace(result._data)
  918. return None
  919. else:
  920. return result.__finalize__(self)
  921. @rewrite_axis_style_signature("mapper", [("copy", True), ("inplace", False)])
  922. def rename_axis(self, mapper=lib.no_default, **kwargs):
  923. """
  924. Set the name of the axis for the index or columns.
  925. Parameters
  926. ----------
  927. mapper : scalar, list-like, optional
  928. Value to set the axis name attribute.
  929. index, columns : scalar, list-like, dict-like or function, optional
  930. A scalar, list-like, dict-like or functions transformations to
  931. apply to that axis' values.
  932. Use either ``mapper`` and ``axis`` to
  933. specify the axis to target with ``mapper``, or ``index``
  934. and/or ``columns``.
  935. .. versionchanged:: 0.24.0
  936. axis : {0 or 'index', 1 or 'columns'}, default 0
  937. The axis to rename.
  938. copy : bool, default True
  939. Also copy underlying data.
  940. inplace : bool, default False
  941. Modifies the object directly, instead of creating a new Series
  942. or DataFrame.
  943. Returns
  944. -------
  945. Series, DataFrame, or None
  946. The same type as the caller or None if `inplace` is True.
  947. See Also
  948. --------
  949. Series.rename : Alter Series index labels or name.
  950. DataFrame.rename : Alter DataFrame index labels or name.
  951. Index.rename : Set new names on index.
  952. Notes
  953. -----
  954. ``DataFrame.rename_axis`` supports two calling conventions
  955. * ``(index=index_mapper, columns=columns_mapper, ...)``
  956. * ``(mapper, axis={'index', 'columns'}, ...)``
  957. The first calling convention will only modify the names of
  958. the index and/or the names of the Index object that is the columns.
  959. In this case, the parameter ``copy`` is ignored.
  960. The second calling convention will modify the names of the
  961. the corresponding index if mapper is a list or a scalar.
  962. However, if mapper is dict-like or a function, it will use the
  963. deprecated behavior of modifying the axis *labels*.
  964. We *highly* recommend using keyword arguments to clarify your
  965. intent.
  966. Examples
  967. --------
  968. **Series**
  969. >>> s = pd.Series(["dog", "cat", "monkey"])
  970. >>> s
  971. 0 dog
  972. 1 cat
  973. 2 monkey
  974. dtype: object
  975. >>> s.rename_axis("animal")
  976. animal
  977. 0 dog
  978. 1 cat
  979. 2 monkey
  980. dtype: object
  981. **DataFrame**
  982. >>> df = pd.DataFrame({"num_legs": [4, 4, 2],
  983. ... "num_arms": [0, 0, 2]},
  984. ... ["dog", "cat", "monkey"])
  985. >>> df
  986. num_legs num_arms
  987. dog 4 0
  988. cat 4 0
  989. monkey 2 2
  990. >>> df = df.rename_axis("animal")
  991. >>> df
  992. num_legs num_arms
  993. animal
  994. dog 4 0
  995. cat 4 0
  996. monkey 2 2
  997. >>> df = df.rename_axis("limbs", axis="columns")
  998. >>> df
  999. limbs num_legs num_arms
  1000. animal
  1001. dog 4 0
  1002. cat 4 0
  1003. monkey 2 2
  1004. **MultiIndex**
  1005. >>> df.index = pd.MultiIndex.from_product([['mammal'],
  1006. ... ['dog', 'cat', 'monkey']],
  1007. ... names=['type', 'name'])
  1008. >>> df
  1009. limbs num_legs num_arms
  1010. type name
  1011. mammal dog 4 0
  1012. cat 4 0
  1013. monkey 2 2
  1014. >>> df.rename_axis(index={'type': 'class'})
  1015. limbs num_legs num_arms
  1016. class name
  1017. mammal dog 4 0
  1018. cat 4 0
  1019. monkey 2 2
  1020. >>> df.rename_axis(columns=str.upper)
  1021. LIMBS num_legs num_arms
  1022. type name
  1023. mammal dog 4 0
  1024. cat 4 0
  1025. monkey 2 2
  1026. """
  1027. axes, kwargs = self._construct_axes_from_arguments(
  1028. (), kwargs, sentinel=lib.no_default
  1029. )
  1030. copy = kwargs.pop("copy", True)
  1031. inplace = kwargs.pop("inplace", False)
  1032. axis = kwargs.pop("axis", 0)
  1033. if axis is not None:
  1034. axis = self._get_axis_number(axis)
  1035. if kwargs:
  1036. raise TypeError(
  1037. "rename_axis() got an unexpected keyword "
  1038. f'argument "{list(kwargs.keys())[0]}"'
  1039. )
  1040. inplace = validate_bool_kwarg(inplace, "inplace")
  1041. if mapper is not lib.no_default:
  1042. # Use v0.23 behavior if a scalar or list
  1043. non_mapper = is_scalar(mapper) or (
  1044. is_list_like(mapper) and not is_dict_like(mapper)
  1045. )
  1046. if non_mapper:
  1047. return self._set_axis_name(mapper, axis=axis, inplace=inplace)
  1048. else:
  1049. raise ValueError("Use `.rename` to alter labels with a mapper.")
  1050. else:
  1051. # Use new behavior. Means that index and/or columns
  1052. # is specified
  1053. result = self if inplace else self.copy(deep=copy)
  1054. for axis in range(self._AXIS_LEN):
  1055. v = axes.get(self._AXIS_NAMES[axis])
  1056. if v is lib.no_default:
  1057. continue
  1058. non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v))
  1059. if non_mapper:
  1060. newnames = v
  1061. else:
  1062. f = com.get_rename_function(v)
  1063. curnames = self._get_axis(axis).names
  1064. newnames = [f(name) for name in curnames]
  1065. result._set_axis_name(newnames, axis=axis, inplace=True)
  1066. if not inplace:
  1067. return result
  1068. def _set_axis_name(self, name, axis=0, inplace=False):
  1069. """
  1070. Set the name(s) of the axis.
  1071. Parameters
  1072. ----------
  1073. name : str or list of str
  1074. Name(s) to set.
  1075. axis : {0 or 'index', 1 or 'columns'}, default 0
  1076. The axis to set the label. The value 0 or 'index' specifies index,
  1077. and the value 1 or 'columns' specifies columns.
  1078. inplace : bool, default False
  1079. If `True`, do operation inplace and return None.
  1080. .. versionadded:: 0.21.0
  1081. Returns
  1082. -------
  1083. Series, DataFrame, or None
  1084. The same type as the caller or `None` if `inplace` is `True`.
  1085. See Also
  1086. --------
  1087. DataFrame.rename : Alter the axis labels of :class:`DataFrame`.
  1088. Series.rename : Alter the index labels or set the index name
  1089. of :class:`Series`.
  1090. Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`.
  1091. Examples
  1092. --------
  1093. >>> df = pd.DataFrame({"num_legs": [4, 4, 2]},
  1094. ... ["dog", "cat", "monkey"])
  1095. >>> df
  1096. num_legs
  1097. dog 4
  1098. cat 4
  1099. monkey 2
  1100. >>> df._set_axis_name("animal")
  1101. num_legs
  1102. animal
  1103. dog 4
  1104. cat 4
  1105. monkey 2
  1106. >>> df.index = pd.MultiIndex.from_product(
  1107. ... [["mammal"], ['dog', 'cat', 'monkey']])
  1108. >>> df._set_axis_name(["type", "name"])
  1109. legs
  1110. type name
  1111. mammal dog 4
  1112. cat 4
  1113. monkey 2
  1114. """
  1115. axis = self._get_axis_number(axis)
  1116. idx = self._get_axis(axis).set_names(name)
  1117. inplace = validate_bool_kwarg(inplace, "inplace")
  1118. renamed = self if inplace else self.copy()
  1119. renamed.set_axis(idx, axis=axis, inplace=True)
  1120. if not inplace:
  1121. return renamed
  1122. # ----------------------------------------------------------------------
  1123. # Comparison Methods
  1124. def _indexed_same(self, other) -> bool:
  1125. return all(
  1126. self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS
  1127. )
  1128. def equals(self, other):
  1129. """
  1130. Test whether two objects contain the same elements.
  1131. This function allows two Series or DataFrames to be compared against
  1132. each other to see if they have the same shape and elements. NaNs in
  1133. the same location are considered equal. The column headers do not
  1134. need to have the same type, but the elements within the columns must
  1135. be the same dtype.
  1136. Parameters
  1137. ----------
  1138. other : Series or DataFrame
  1139. The other Series or DataFrame to be compared with the first.
  1140. Returns
  1141. -------
  1142. bool
  1143. True if all elements are the same in both objects, False
  1144. otherwise.
  1145. See Also
  1146. --------
  1147. Series.eq : Compare two Series objects of the same length
  1148. and return a Series where each element is True if the element
  1149. in each Series is equal, False otherwise.
  1150. DataFrame.eq : Compare two DataFrame objects of the same shape and
  1151. return a DataFrame where each element is True if the respective
  1152. element in each DataFrame is equal, False otherwise.
  1153. testing.assert_series_equal : Raises an AssertionError if left and
  1154. right are not equal. Provides an easy interface to ignore
  1155. inequality in dtypes, indexes and precision among others.
  1156. testing.assert_frame_equal : Like assert_series_equal, but targets
  1157. DataFrames.
  1158. numpy.array_equal : Return True if two arrays have the same shape
  1159. and elements, False otherwise.
  1160. Notes
  1161. -----
  1162. This function requires that the elements have the same dtype as their
  1163. respective elements in the other Series or DataFrame. However, the
  1164. column labels do not need to have the same type, as long as they are
  1165. still considered equal.
  1166. Examples
  1167. --------
  1168. >>> df = pd.DataFrame({1: [10], 2: [20]})
  1169. >>> df
  1170. 1 2
  1171. 0 10 20
  1172. DataFrames df and exactly_equal have the same types and values for
  1173. their elements and column labels, which will return True.
  1174. >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]})
  1175. >>> exactly_equal
  1176. 1 2
  1177. 0 10 20
  1178. >>> df.equals(exactly_equal)
  1179. True
  1180. DataFrames df and different_column_type have the same element
  1181. types and values, but have different types for the column labels,
  1182. which will still return True.
  1183. >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]})
  1184. >>> different_column_type
  1185. 1.0 2.0
  1186. 0 10 20
  1187. >>> df.equals(different_column_type)
  1188. True
  1189. DataFrames df and different_data_type have different types for the
  1190. same values for their elements, and will return False even though
  1191. their column labels are the same values and types.
  1192. >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]})
  1193. >>> different_data_type
  1194. 1 2
  1195. 0 10.0 20.0
  1196. >>> df.equals(different_data_type)
  1197. False
  1198. """
  1199. if not isinstance(other, self._constructor):
  1200. return False
  1201. return self._data.equals(other._data)
  1202. # -------------------------------------------------------------------------
  1203. # Unary Methods
  1204. def __neg__(self):
  1205. values = com.values_from_object(self)
  1206. if is_bool_dtype(values):
  1207. arr = operator.inv(values)
  1208. elif (
  1209. is_numeric_dtype(values)
  1210. or is_timedelta64_dtype(values)
  1211. or is_object_dtype(values)
  1212. ):
  1213. arr = operator.neg(values)
  1214. else:
  1215. raise TypeError(f"Unary negative expects numeric dtype, not {values.dtype}")
  1216. return self.__array_wrap__(arr)
  1217. def __pos__(self):
  1218. values = com.values_from_object(self)
  1219. if is_bool_dtype(values) or is_period_arraylike(values):
  1220. arr = values
  1221. elif (
  1222. is_numeric_dtype(values)
  1223. or is_timedelta64_dtype(values)
  1224. or is_object_dtype(values)
  1225. ):
  1226. arr = operator.pos(values)
  1227. else:
  1228. raise TypeError(f"Unary plus expects numeric dtype, not {values.dtype}")
  1229. return self.__array_wrap__(arr)
  1230. def __invert__(self):
  1231. if not self.size:
  1232. # inv fails with 0 len
  1233. return self
  1234. new_data = self._data.apply(operator.invert)
  1235. result = self._constructor(new_data).__finalize__(self)
  1236. return result
  1237. def __nonzero__(self):
  1238. raise ValueError(
  1239. f"The truth value of a {type(self).__name__} is ambiguous. "
  1240. "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
  1241. )
  1242. __bool__ = __nonzero__
  1243. def bool(self):
  1244. """
  1245. Return the bool of a single element PandasObject.
  1246. This must be a boolean scalar value, either True or False. Raise a
  1247. ValueError if the PandasObject does not have exactly 1 element, or that
  1248. element is not boolean
  1249. Returns
  1250. -------
  1251. bool
  1252. Same single boolean value converted to bool type.
  1253. """
  1254. v = self.squeeze()
  1255. if isinstance(v, (bool, np.bool_)):
  1256. return bool(v)
  1257. elif is_scalar(v):
  1258. raise ValueError(
  1259. "bool cannot act on a non-boolean single element "
  1260. f"{type(self).__name__}"
  1261. )
  1262. self.__nonzero__()
  1263. def __abs__(self: FrameOrSeries) -> FrameOrSeries:
  1264. return self.abs()
  1265. def __round__(self: FrameOrSeries, decimals: int = 0) -> FrameOrSeries:
  1266. return self.round(decimals)
  1267. # -------------------------------------------------------------------------
  1268. # Label or Level Combination Helpers
  1269. #
  1270. # A collection of helper methods for DataFrame/Series operations that
  1271. # accept a combination of column/index labels and levels. All such
  1272. # operations should utilize/extend these methods when possible so that we
  1273. # have consistent precedence and validation logic throughout the library.
  1274. def _is_level_reference(self, key, axis=0):
  1275. """
  1276. Test whether a key is a level reference for a given axis.
  1277. To be considered a level reference, `key` must be a string that:
  1278. - (axis=0): Matches the name of an index level and does NOT match
  1279. a column label.
  1280. - (axis=1): Matches the name of a column level and does NOT match
  1281. an index label.
  1282. Parameters
  1283. ----------
  1284. key : str
  1285. Potential level name for the given axis
  1286. axis : int, default 0
  1287. Axis that levels are associated with (0 for index, 1 for columns)
  1288. Returns
  1289. -------
  1290. is_level : bool
  1291. """
  1292. axis = self._get_axis_number(axis)
  1293. return (
  1294. key is not None
  1295. and is_hashable(key)
  1296. and key in self.axes[axis].names
  1297. and not self._is_label_reference(key, axis=axis)
  1298. )
  1299. def _is_label_reference(self, key, axis=0) -> bool_t:
  1300. """
  1301. Test whether a key is a label reference for a given axis.
  1302. To be considered a label reference, `key` must be a string that:
  1303. - (axis=0): Matches a column label
  1304. - (axis=1): Matches an index label
  1305. Parameters
  1306. ----------
  1307. key: str
  1308. Potential label name
  1309. axis: int, default 0
  1310. Axis perpendicular to the axis that labels are associated with
  1311. (0 means search for column labels, 1 means search for index labels)
  1312. Returns
  1313. -------
  1314. is_label: bool
  1315. """
  1316. axis = self._get_axis_number(axis)
  1317. other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis)
  1318. return (
  1319. key is not None
  1320. and is_hashable(key)
  1321. and any(key in self.axes[ax] for ax in other_axes)
  1322. )
  1323. def _is_label_or_level_reference(self, key: str, axis: int = 0) -> bool_t:
  1324. """
  1325. Test whether a key is a label or level reference for a given axis.
  1326. To be considered either a label or a level reference, `key` must be a
  1327. string that:
  1328. - (axis=0): Matches a column label or an index level
  1329. - (axis=1): Matches an index label or a column level
  1330. Parameters
  1331. ----------
  1332. key: str
  1333. Potential label or level name
  1334. axis: int, default 0
  1335. Axis that levels are associated with (0 for index, 1 for columns)
  1336. Returns
  1337. -------
  1338. is_label_or_level: bool
  1339. """
  1340. return self._is_level_reference(key, axis=axis) or self._is_label_reference(
  1341. key, axis=axis
  1342. )
  1343. def _check_label_or_level_ambiguity(self, key, axis: int = 0) -> None:
  1344. """
  1345. Check whether `key` is ambiguous.
  1346. By ambiguous, we mean that it matches both a level of the input
  1347. `axis` and a label of the other axis.
  1348. Parameters
  1349. ----------
  1350. key: str or object
  1351. Label or level name.
  1352. axis: int, default 0
  1353. Axis that levels are associated with (0 for index, 1 for columns).
  1354. Raises
  1355. ------
  1356. ValueError: `key` is ambiguous
  1357. """
  1358. axis = self._get_axis_number(axis)
  1359. other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis)
  1360. if (
  1361. key is not None
  1362. and is_hashable(key)
  1363. and key in self.axes[axis].names
  1364. and any(key in self.axes[ax] for ax in other_axes)
  1365. ):
  1366. # Build an informative and grammatical warning
  1367. level_article, level_type = (
  1368. ("an", "index") if axis == 0 else ("a", "column")
  1369. )
  1370. label_article, label_type = (
  1371. ("a", "column") if axis == 0 else ("an", "index")
  1372. )
  1373. msg = (
  1374. f"'{key}' is both {level_article} {level_type} level and "
  1375. f"{label_article} {label_type} label, which is ambiguous."
  1376. )
  1377. raise ValueError(msg)
  1378. def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray:
  1379. """
  1380. Return a 1-D array of values associated with `key`, a label or level
  1381. from the given `axis`.
  1382. Retrieval logic:
  1383. - (axis=0): Return column values if `key` matches a column label.
  1384. Otherwise return index level values if `key` matches an index
  1385. level.
  1386. - (axis=1): Return row values if `key` matches an index label.
  1387. Otherwise return column level values if 'key' matches a column
  1388. level
  1389. Parameters
  1390. ----------
  1391. key: str
  1392. Label or level name.
  1393. axis: int, default 0
  1394. Axis that levels are associated with (0 for index, 1 for columns)
  1395. Returns
  1396. -------
  1397. values: np.ndarray
  1398. Raises
  1399. ------
  1400. KeyError
  1401. if `key` matches neither a label nor a level
  1402. ValueError
  1403. if `key` matches multiple labels
  1404. FutureWarning
  1405. if `key` is ambiguous. This will become an ambiguity error in a
  1406. future version
  1407. """
  1408. axis = self._get_axis_number(axis)
  1409. other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
  1410. if self._is_label_reference(key, axis=axis):
  1411. self._check_label_or_level_ambiguity(key, axis=axis)
  1412. values = self.xs(key, axis=other_axes[0])._values
  1413. elif self._is_level_reference(key, axis=axis):
  1414. values = self.axes[axis].get_level_values(key)._values
  1415. else:
  1416. raise KeyError(key)
  1417. # Check for duplicates
  1418. if values.ndim > 1:
  1419. if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex):
  1420. multi_message = (
  1421. "\n"
  1422. "For a multi-index, the label must be a "
  1423. "tuple with elements corresponding to "
  1424. "each level."
  1425. )
  1426. else:
  1427. multi_message = ""
  1428. label_axis_name = "column" if axis == 0 else "index"
  1429. raise ValueError(
  1430. (
  1431. f"The {label_axis_name} label '{key}' "
  1432. f"is not unique.{multi_message}"
  1433. )
  1434. )
  1435. return values
  1436. def _drop_labels_or_levels(self, keys, axis: int = 0):
  1437. """
  1438. Drop labels and/or levels for the given `axis`.
  1439. For each key in `keys`:
  1440. - (axis=0): If key matches a column label then drop the column.
  1441. Otherwise if key matches an index level then drop the level.
  1442. - (axis=1): If key matches an index label then drop the row.
  1443. Otherwise if key matches a column level then drop the level.
  1444. Parameters
  1445. ----------
  1446. keys: str or list of str
  1447. labels or levels to drop
  1448. axis: int, default 0
  1449. Axis that levels are associated with (0 for index, 1 for columns)
  1450. Returns
  1451. -------
  1452. dropped: DataFrame
  1453. Raises
  1454. ------
  1455. ValueError
  1456. if any `keys` match neither a label nor a level
  1457. """
  1458. axis = self._get_axis_number(axis)
  1459. # Validate keys
  1460. keys = com.maybe_make_list(keys)
  1461. invalid_keys = [
  1462. k for k in keys if not self._is_label_or_level_reference(k, axis=axis)
  1463. ]
  1464. if invalid_keys:
  1465. raise ValueError(
  1466. (
  1467. "The following keys are not valid labels or "
  1468. f"levels for axis {axis}: {invalid_keys}"
  1469. )
  1470. )
  1471. # Compute levels and labels to drop
  1472. levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)]
  1473. labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)]
  1474. # Perform copy upfront and then use inplace operations below.
  1475. # This ensures that we always perform exactly one copy.
  1476. # ``copy`` and/or ``inplace`` options could be added in the future.
  1477. dropped = self.copy()
  1478. if axis == 0:
  1479. # Handle dropping index levels
  1480. if levels_to_drop:
  1481. dropped.reset_index(levels_to_drop, drop=True, inplace=True)
  1482. # Handle dropping columns labels
  1483. if labels_to_drop:
  1484. dropped.drop(labels_to_drop, axis=1, inplace=True)
  1485. else:
  1486. # Handle dropping column levels
  1487. if levels_to_drop:
  1488. if isinstance(dropped.columns, MultiIndex):
  1489. # Drop the specified levels from the MultiIndex
  1490. dropped.columns = dropped.columns.droplevel(levels_to_drop)
  1491. else:
  1492. # Drop the last level of Index by replacing with
  1493. # a RangeIndex
  1494. dropped.columns = RangeIndex(dropped.columns.size)
  1495. # Handle dropping index labels
  1496. if labels_to_drop:
  1497. dropped.drop(labels_to_drop, axis=0, inplace=True)
  1498. return dropped
  1499. # ----------------------------------------------------------------------
  1500. # Iteration
  1501. def __hash__(self):
  1502. raise TypeError(
  1503. f"{repr(type(self).__name__)} objects are mutable, "
  1504. f"thus they cannot be hashed"
  1505. )
  1506. def __iter__(self):
  1507. """
  1508. Iterate over info axis.
  1509. Returns
  1510. -------
  1511. iterator
  1512. Info axis as iterator.
  1513. """
  1514. return iter(self._info_axis)
  1515. # can we get a better explanation of this?
  1516. def keys(self):
  1517. """
  1518. Get the 'info axis' (see Indexing for more).
  1519. This is index for Series, columns for DataFrame.
  1520. Returns
  1521. -------
  1522. Index
  1523. Info axis.
  1524. """
  1525. return self._info_axis
  1526. def items(self):
  1527. """Iterate over (label, values) on info axis
  1528. This is index for Series and columns for DataFrame.
  1529. Returns
  1530. -------
  1531. Generator
  1532. """
  1533. for h in self._info_axis:
  1534. yield h, self[h]
  1535. @Appender(items.__doc__)
  1536. def iteritems(self):
  1537. return self.items()
  1538. def __len__(self) -> int:
  1539. """Returns length of info axis"""
  1540. return len(self._info_axis)
  1541. def __contains__(self, key) -> bool_t:
  1542. """True if the key is in the info axis"""
  1543. return key in self._info_axis
  1544. @property
  1545. def empty(self) -> bool_t:
  1546. """
  1547. Indicator whether DataFrame is empty.
  1548. True if DataFrame is entirely empty (no items), meaning any of the
  1549. axes are of length 0.
  1550. Returns
  1551. -------
  1552. bool
  1553. If DataFrame is empty, return True, if not return False.
  1554. See Also
  1555. --------
  1556. Series.dropna
  1557. DataFrame.dropna
  1558. Notes
  1559. -----
  1560. If DataFrame contains only NaNs, it is still not considered empty. See
  1561. the example below.
  1562. Examples
  1563. --------
  1564. An example of an actual empty DataFrame. Notice the index is empty:
  1565. >>> df_empty = pd.DataFrame({'A' : []})
  1566. >>> df_empty
  1567. Empty DataFrame
  1568. Columns: [A]
  1569. Index: []
  1570. >>> df_empty.empty
  1571. True
  1572. If we only have NaNs in our DataFrame, it is not considered empty! We
  1573. will need to drop the NaNs to make the DataFrame empty:
  1574. >>> df = pd.DataFrame({'A' : [np.nan]})
  1575. >>> df
  1576. A
  1577. 0 NaN
  1578. >>> df.empty
  1579. False
  1580. >>> df.dropna().empty
  1581. True
  1582. """
  1583. return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS)
  1584. # ----------------------------------------------------------------------
  1585. # Array Interface
  1586. # This is also set in IndexOpsMixin
  1587. # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
  1588. __array_priority__ = 1000
  1589. def __array__(self, dtype=None) -> np.ndarray:
  1590. return com.values_from_object(self)
  1591. def __array_wrap__(self, result, context=None):
  1592. result = lib.item_from_zerodim(result)
  1593. if is_scalar(result):
  1594. # e.g. we get here with np.ptp(series)
  1595. # ptp also requires the item_from_zerodim
  1596. return result
  1597. d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False)
  1598. return self._constructor(result, **d).__finalize__(self)
  1599. # ideally we would define this to avoid the getattr checks, but
  1600. # is slower
  1601. # @property
  1602. # def __array_interface__(self):
  1603. # """ provide numpy array interface method """
  1604. # values = self.values
  1605. # return dict(typestr=values.dtype.str,shape=values.shape,data=values)
  1606. # ----------------------------------------------------------------------
  1607. # Picklability
  1608. def __getstate__(self) -> Dict[str, Any]:
  1609. meta = {k: getattr(self, k, None) for k in self._metadata}
  1610. return dict(
  1611. _data=self._data,
  1612. _typ=self._typ,
  1613. _metadata=self._metadata,
  1614. attrs=self.attrs,
  1615. **meta,
  1616. )
  1617. def __setstate__(self, state):
  1618. if isinstance(state, BlockManager):
  1619. self._data = state
  1620. elif isinstance(state, dict):
  1621. typ = state.get("_typ")
  1622. if typ is not None:
  1623. attrs = state.get("_attrs", {})
  1624. object.__setattr__(self, "_attrs", attrs)
  1625. # set in the order of internal names
  1626. # to avoid definitional recursion
  1627. # e.g. say fill_value needing _data to be
  1628. # defined
  1629. meta = set(self._internal_names + self._metadata)
  1630. for k in list(meta):
  1631. if k in state:
  1632. v = state[k]
  1633. object.__setattr__(self, k, v)
  1634. for k, v in state.items():
  1635. if k not in meta:
  1636. object.__setattr__(self, k, v)
  1637. else:
  1638. self._unpickle_series_compat(state)
  1639. elif len(state) == 2:
  1640. self._unpickle_series_compat(state)
  1641. self._item_cache = {}
  1642. # ----------------------------------------------------------------------
  1643. # Rendering Methods
  1644. def __repr__(self) -> str:
  1645. # string representation based upon iterating over self
  1646. # (since, by definition, `PandasContainers` are iterable)
  1647. prepr = f"[{','.join(map(pprint_thing, self))}]"
  1648. return f"{type(self).__name__}({prepr})"
  1649. def _repr_latex_(self):
  1650. """
  1651. Returns a LaTeX representation for a particular object.
  1652. Mainly for use with nbconvert (jupyter notebook conversion to pdf).
  1653. """
  1654. if config.get_option("display.latex.repr"):
  1655. return self.to_latex()
  1656. else:
  1657. return None
  1658. def _repr_data_resource_(self):
  1659. """
  1660. Not a real Jupyter special repr method, but we use the same
  1661. naming convention.
  1662. """
  1663. if config.get_option("display.html.table_schema"):
  1664. data = self.head(config.get_option("display.max_rows"))
  1665. payload = json.loads(
  1666. data.to_json(orient="table"), object_pairs_hook=collections.OrderedDict
  1667. )
  1668. return payload
  1669. # ----------------------------------------------------------------------
  1670. # I/O Methods
  1671. _shared_docs[
  1672. "to_markdown"
  1673. ] = """
  1674. Print %(klass)s in Markdown-friendly format.
  1675. .. versionadded:: 1.0.0
  1676. Parameters
  1677. ----------
  1678. buf : writable buffer, defaults to sys.stdout
  1679. Where to send the output. By default, the output is printed to
  1680. sys.stdout. Pass a writable buffer if you need to further process
  1681. the output.
  1682. mode : str, optional
  1683. Mode in which file is opened.
  1684. **kwargs
  1685. These parameters will be passed to `tabulate`.
  1686. Returns
  1687. -------
  1688. str
  1689. %(klass)s in Markdown-friendly format.
  1690. """
  1691. _shared_docs[
  1692. "to_excel"
  1693. ] = """
  1694. Write %(klass)s to an Excel sheet.
  1695. To write a single %(klass)s to an Excel .xlsx file it is only necessary to
  1696. specify a target file name. To write to multiple sheets it is necessary to
  1697. create an `ExcelWriter` object with a target file name, and specify a sheet
  1698. in the file to write to.
  1699. Multiple sheets may be written to by specifying unique `sheet_name`.
  1700. With all data written to the file it is necessary to save the changes.
  1701. Note that creating an `ExcelWriter` object with a file name that already
  1702. exists will result in the contents of the existing file being erased.
  1703. Parameters
  1704. ----------
  1705. excel_writer : str or ExcelWriter object
  1706. File path or existing ExcelWriter.
  1707. sheet_name : str, default 'Sheet1'
  1708. Name of sheet which will contain DataFrame.
  1709. na_rep : str, default ''
  1710. Missing data representation.
  1711. float_format : str, optional
  1712. Format string for floating point numbers. For example
  1713. ``float_format="%%.2f"`` will format 0.1234 to 0.12.
  1714. columns : sequence or list of str, optional
  1715. Columns to write.
  1716. header : bool or list of str, default True
  1717. Write out the column names. If a list of string is given it is
  1718. assumed to be aliases for the column names.
  1719. index : bool, default True
  1720. Write row names (index).
  1721. index_label : str or sequence, optional
  1722. Column label for index column(s) if desired. If not specified, and
  1723. `header` and `index` are True, then the index names are used. A
  1724. sequence should be given if the DataFrame uses MultiIndex.
  1725. startrow : int, default 0
  1726. Upper left cell row to dump data frame.
  1727. startcol : int, default 0
  1728. Upper left cell column to dump data frame.
  1729. engine : str, optional
  1730. Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this
  1731. via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
  1732. ``io.excel.xlsm.writer``.
  1733. merge_cells : bool, default True
  1734. Write MultiIndex and Hierarchical Rows as merged cells.
  1735. encoding : str, optional
  1736. Encoding of the resulting excel file. Only necessary for xlwt,
  1737. other writers support unicode natively.
  1738. inf_rep : str, default 'inf'
  1739. Representation for infinity (there is no native representation for
  1740. infinity in Excel).
  1741. verbose : bool, default True
  1742. Display more information in the error logs.
  1743. freeze_panes : tuple of int (length 2), optional
  1744. Specifies the one-based bottommost row and rightmost column that
  1745. is to be frozen.
  1746. See Also
  1747. --------
  1748. to_csv : Write DataFrame to a comma-separated values (csv) file.
  1749. ExcelWriter : Class for writing DataFrame objects into excel sheets.
  1750. read_excel : Read an Excel file into a pandas DataFrame.
  1751. read_csv : Read a comma-separated values (csv) file into DataFrame.
  1752. Notes
  1753. -----
  1754. For compatibility with :meth:`~DataFrame.to_csv`,
  1755. to_excel serializes lists and dicts to strings before writing.
  1756. Once a workbook has been saved it is not possible write further data
  1757. without rewriting the whole workbook.
  1758. Examples
  1759. --------
  1760. Create, write to and save a workbook:
  1761. >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']],
  1762. ... index=['row 1', 'row 2'],
  1763. ... columns=['col 1', 'col 2'])
  1764. >>> df1.to_excel("output.xlsx") # doctest: +SKIP
  1765. To specify the sheet name:
  1766. >>> df1.to_excel("output.xlsx",
  1767. ... sheet_name='Sheet_name_1') # doctest: +SKIP
  1768. If you wish to write to more than one sheet in the workbook, it is
  1769. necessary to specify an ExcelWriter object:
  1770. >>> df2 = df1.copy()
  1771. >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP
  1772. ... df1.to_excel(writer, sheet_name='Sheet_name_1')
  1773. ... df2.to_excel(writer, sheet_name='Sheet_name_2')
  1774. ExcelWriter can also be used to append to an existing Excel file:
  1775. >>> with pd.ExcelWriter('output.xlsx',
  1776. ... mode='a') as writer: # doctest: +SKIP
  1777. ... df.to_excel(writer, sheet_name='Sheet_name_3')
  1778. To set the library that is used to write the Excel file,
  1779. you can pass the `engine` keyword (the default engine is
  1780. automatically chosen depending on the file extension):
  1781. >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP
  1782. """
  1783. @Appender(_shared_docs["to_excel"] % dict(klass="object"))
  1784. def to_excel(
  1785. self,
  1786. excel_writer,
  1787. sheet_name="Sheet1",
  1788. na_rep="",
  1789. float_format=None,
  1790. columns=None,
  1791. header=True,
  1792. index=True,
  1793. index_label=None,
  1794. startrow=0,
  1795. startcol=0,
  1796. engine=None,
  1797. merge_cells=True,
  1798. encoding=None,
  1799. inf_rep="inf",
  1800. verbose=True,
  1801. freeze_panes=None,
  1802. ) -> None:
  1803. df = self if isinstance(self, ABCDataFrame) else self.to_frame()
  1804. from pandas.io.formats.excel import ExcelFormatter
  1805. formatter = ExcelFormatter(
  1806. df,
  1807. na_rep=na_rep,
  1808. cols=columns,
  1809. header=header,
  1810. float_format=float_format,
  1811. index=index,
  1812. index_label=index_label,
  1813. merge_cells=merge_cells,
  1814. inf_rep=inf_rep,
  1815. )
  1816. formatter.write(
  1817. excel_writer,
  1818. sheet_name=sheet_name,
  1819. startrow=startrow,
  1820. startcol=startcol,
  1821. freeze_panes=freeze_panes,
  1822. engine=engine,
  1823. )
  1824. def to_json(
  1825. self,
  1826. path_or_buf: Optional[FilePathOrBuffer] = None,
  1827. orient: Optional[str] = None,
  1828. date_format: Optional[str] = None,
  1829. double_precision: int = 10,
  1830. force_ascii: bool_t = True,
  1831. date_unit: str = "ms",
  1832. default_handler: Optional[Callable[[Any], JSONSerializable]] = None,
  1833. lines: bool_t = False,
  1834. compression: Optional[str] = "infer",
  1835. index: bool_t = True,
  1836. indent: Optional[int] = None,
  1837. ) -> Optional[str]:
  1838. """
  1839. Convert the object to a JSON string.
  1840. Note NaN's and None will be converted to null and datetime objects
  1841. will be converted to UNIX timestamps.
  1842. Parameters
  1843. ----------
  1844. path_or_buf : str or file handle, optional
  1845. File path or object. If not specified, the result is returned as
  1846. a string.
  1847. orient : str
  1848. Indication of expected JSON string format.
  1849. * Series:
  1850. - default is 'index'
  1851. - allowed values are: {'split','records','index','table'}.
  1852. * DataFrame:
  1853. - default is 'columns'
  1854. - allowed values are: {'split', 'records', 'index', 'columns',
  1855. 'values', 'table'}.
  1856. * The format of the JSON string:
  1857. - 'split' : dict like {'index' -> [index], 'columns' -> [columns],
  1858. 'data' -> [values]}
  1859. - 'records' : list like [{column -> value}, ... , {column -> value}]
  1860. - 'index' : dict like {index -> {column -> value}}
  1861. - 'columns' : dict like {column -> {index -> value}}
  1862. - 'values' : just the values array
  1863. - 'table' : dict like {'schema': {schema}, 'data': {data}}
  1864. Describing the data, where data component is like ``orient='records'``.
  1865. .. versionchanged:: 0.20.0
  1866. date_format : {None, 'epoch', 'iso'}
  1867. Type of date conversion. 'epoch' = epoch milliseconds,
  1868. 'iso' = ISO8601. The default depends on the `orient`. For
  1869. ``orient='table'``, the default is 'iso'. For all other orients,
  1870. the default is 'epoch'.
  1871. double_precision : int, default 10
  1872. The number of decimal places to use when encoding
  1873. floating point values.
  1874. force_ascii : bool, default True
  1875. Force encoded string to be ASCII.
  1876. date_unit : str, default 'ms' (milliseconds)
  1877. The time unit to encode to, governs timestamp and ISO8601
  1878. precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
  1879. microsecond, and nanosecond respectively.
  1880. default_handler : callable, default None
  1881. Handler to call if object cannot otherwise be converted to a
  1882. suitable format for JSON. Should receive a single argument which is
  1883. the object to convert and return a serialisable object.
  1884. lines : bool, default False
  1885. If 'orient' is 'records' write out line delimited json format. Will
  1886. throw ValueError if incorrect 'orient' since others are not list
  1887. like.
  1888. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
  1889. A string representing the compression to use in the output file,
  1890. only used when the first argument is a filename. By default, the
  1891. compression is inferred from the filename.
  1892. .. versionadded:: 0.21.0
  1893. .. versionchanged:: 0.24.0
  1894. 'infer' option added and set to default
  1895. index : bool, default True
  1896. Whether to include the index values in the JSON string. Not
  1897. including the index (``index=False``) is only supported when
  1898. orient is 'split' or 'table'.
  1899. .. versionadded:: 0.23.0
  1900. indent : int, optional
  1901. Length of whitespace used to indent each record.
  1902. .. versionadded:: 1.0.0
  1903. Returns
  1904. -------
  1905. None or str
  1906. If path_or_buf is None, returns the resulting json format as a
  1907. string. Otherwise returns None.
  1908. See Also
  1909. --------
  1910. read_json
  1911. Notes
  1912. -----
  1913. The behavior of ``indent=0`` varies from the stdlib, which does not
  1914. indent the output but does insert newlines. Currently, ``indent=0``
  1915. and the default ``indent=None`` are equivalent in pandas, though this
  1916. may change in a future release.
  1917. Examples
  1918. --------
  1919. >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
  1920. ... index=['row 1', 'row 2'],
  1921. ... columns=['col 1', 'col 2'])
  1922. >>> df.to_json(orient='split')
  1923. '{"columns":["col 1","col 2"],
  1924. "index":["row 1","row 2"],
  1925. "data":[["a","b"],["c","d"]]}'
  1926. Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
  1927. Note that index labels are not preserved with this encoding.
  1928. >>> df.to_json(orient='records')
  1929. '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]'
  1930. Encoding/decoding a Dataframe using ``'index'`` formatted JSON:
  1931. >>> df.to_json(orient='index')
  1932. '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}'
  1933. Encoding/decoding a Dataframe using ``'columns'`` formatted JSON:
  1934. >>> df.to_json(orient='columns')
  1935. '{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}'
  1936. Encoding/decoding a Dataframe using ``'values'`` formatted JSON:
  1937. >>> df.to_json(orient='values')
  1938. '[["a","b"],["c","d"]]'
  1939. Encoding with Table Schema
  1940. >>> df.to_json(orient='table')
  1941. '{"schema": {"fields": [{"name": "index", "type": "string"},
  1942. {"name": "col 1", "type": "string"},
  1943. {"name": "col 2", "type": "string"}],
  1944. "primaryKey": "index",
  1945. "pandas_version": "0.20.0"},
  1946. "data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
  1947. {"index": "row 2", "col 1": "c", "col 2": "d"}]}'
  1948. """
  1949. from pandas.io import json
  1950. if date_format is None and orient == "table":
  1951. date_format = "iso"
  1952. elif date_format is None:
  1953. date_format = "epoch"
  1954. config.is_nonnegative_int(indent)
  1955. indent = indent or 0
  1956. return json.to_json(
  1957. path_or_buf=path_or_buf,
  1958. obj=self,
  1959. orient=orient,
  1960. date_format=date_format,
  1961. double_precision=double_precision,
  1962. force_ascii=force_ascii,
  1963. date_unit=date_unit,
  1964. default_handler=default_handler,
  1965. lines=lines,
  1966. compression=compression,
  1967. index=index,
  1968. indent=indent,
  1969. )
  1970. def to_hdf(
  1971. self,
  1972. path_or_buf,
  1973. key: str,
  1974. mode: str = "a",
  1975. complevel: Optional[int] = None,
  1976. complib: Optional[str] = None,
  1977. append: bool_t = False,
  1978. format: Optional[str] = None,
  1979. index: bool_t = True,
  1980. min_itemsize: Optional[Union[int, Dict[str, int]]] = None,
  1981. nan_rep=None,
  1982. dropna: Optional[bool_t] = None,
  1983. data_columns: Optional[List[str]] = None,
  1984. errors: str = "strict",
  1985. encoding: str = "UTF-8",
  1986. ) -> None:
  1987. """
  1988. Write the contained data to an HDF5 file using HDFStore.
  1989. Hierarchical Data Format (HDF) is self-describing, allowing an
  1990. application to interpret the structure and contents of a file with
  1991. no outside information. One HDF file can hold a mix of related objects
  1992. which can be accessed as a group or as individual objects.
  1993. In order to add another DataFrame or Series to an existing HDF file
  1994. please use append mode and a different a key.
  1995. For more information see the :ref:`user guide <io.hdf5>`.
  1996. Parameters
  1997. ----------
  1998. path_or_buf : str or pandas.HDFStore
  1999. File path or HDFStore object.
  2000. key : str
  2001. Identifier for the group in the store.
  2002. mode : {'a', 'w', 'r+'}, default 'a'
  2003. Mode to open file:
  2004. - 'w': write, a new file is created (an existing file with
  2005. the same name would be deleted).
  2006. - 'a': append, an existing file is opened for reading and
  2007. writing, and if the file does not exist it is created.
  2008. - 'r+': similar to 'a', but the file must already exist.
  2009. complevel : {0-9}, optional
  2010. Specifies a compression level for data.
  2011. A value of 0 disables compression.
  2012. complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'
  2013. Specifies the compression library to be used.
  2014. As of v0.20.2 these additional compressors for Blosc are supported
  2015. (default if no compressor specified: 'blosc:blosclz'):
  2016. {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',
  2017. 'blosc:zlib', 'blosc:zstd'}.
  2018. Specifying a compression library which is not available issues
  2019. a ValueError.
  2020. append : bool, default False
  2021. For Table formats, append the input data to the existing.
  2022. format : {'fixed', 'table', None}, default 'fixed'
  2023. Possible values:
  2024. - 'fixed': Fixed format. Fast writing/reading. Not-appendable,
  2025. nor searchable.
  2026. - 'table': Table format. Write as a PyTables Table structure
  2027. which may perform worse but allow more flexible operations
  2028. like searching / selecting subsets of the data.
  2029. - If None, pd.get_option('io.hdf.default_format') is checked,
  2030. followed by fallback to "fixed"
  2031. errors : str, default 'strict'
  2032. Specifies how encoding and decoding errors are to be handled.
  2033. See the errors argument for :func:`open` for a full list
  2034. of options.
  2035. encoding : str, default "UTF-8"
  2036. min_itemsize : dict or int, optional
  2037. Map column names to minimum string sizes for columns.
  2038. nan_rep : Any, optional
  2039. How to represent null values as str.
  2040. Not allowed with append=True.
  2041. data_columns : list of columns or True, optional
  2042. List of columns to create as indexed data columns for on-disk
  2043. queries, or True to use all columns. By default only the axes
  2044. of the object are indexed. See :ref:`io.hdf5-query-data-columns`.
  2045. Applicable only to format='table'.
  2046. See Also
  2047. --------
  2048. DataFrame.read_hdf : Read from HDF file.
  2049. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2050. DataFrame.to_sql : Write to a sql table.
  2051. DataFrame.to_feather : Write out feather-format for DataFrames.
  2052. DataFrame.to_csv : Write out to a csv file.
  2053. Examples
  2054. --------
  2055. >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
  2056. ... index=['a', 'b', 'c'])
  2057. >>> df.to_hdf('data.h5', key='df', mode='w')
  2058. We can add another object to the same file:
  2059. >>> s = pd.Series([1, 2, 3, 4])
  2060. >>> s.to_hdf('data.h5', key='s')
  2061. Reading from HDF file:
  2062. >>> pd.read_hdf('data.h5', 'df')
  2063. A B
  2064. a 1 4
  2065. b 2 5
  2066. c 3 6
  2067. >>> pd.read_hdf('data.h5', 's')
  2068. 0 1
  2069. 1 2
  2070. 2 3
  2071. 3 4
  2072. dtype: int64
  2073. Deleting file with data:
  2074. >>> import os
  2075. >>> os.remove('data.h5')
  2076. """
  2077. from pandas.io import pytables
  2078. pytables.to_hdf(
  2079. path_or_buf,
  2080. key,
  2081. self,
  2082. mode=mode,
  2083. complevel=complevel,
  2084. complib=complib,
  2085. append=append,
  2086. format=format,
  2087. index=index,
  2088. min_itemsize=min_itemsize,
  2089. nan_rep=nan_rep,
  2090. dropna=dropna,
  2091. data_columns=data_columns,
  2092. errors=errors,
  2093. encoding=encoding,
  2094. )
  2095. def to_sql(
  2096. self,
  2097. name: str,
  2098. con,
  2099. schema=None,
  2100. if_exists: str = "fail",
  2101. index: bool_t = True,
  2102. index_label=None,
  2103. chunksize=None,
  2104. dtype=None,
  2105. method=None,
  2106. ) -> None:
  2107. """
  2108. Write records stored in a DataFrame to a SQL database.
  2109. Databases supported by SQLAlchemy [1]_ are supported. Tables can be
  2110. newly created, appended to, or overwritten.
  2111. Parameters
  2112. ----------
  2113. name : str
  2114. Name of SQL table.
  2115. con : sqlalchemy.engine.Engine or sqlite3.Connection
  2116. Using SQLAlchemy makes it possible to use any DB supported by that
  2117. library. Legacy support is provided for sqlite3.Connection objects. The user
  2118. is responsible for engine disposal and connection closure for the SQLAlchemy
  2119. connectable See `here \
  2120. <https://docs.sqlalchemy.org/en/13/core/connections.html>`_
  2121. schema : str, optional
  2122. Specify the schema (if database flavor supports this). If None, use
  2123. default schema.
  2124. if_exists : {'fail', 'replace', 'append'}, default 'fail'
  2125. How to behave if the table already exists.
  2126. * fail: Raise a ValueError.
  2127. * replace: Drop the table before inserting new values.
  2128. * append: Insert new values to the existing table.
  2129. index : bool, default True
  2130. Write DataFrame index as a column. Uses `index_label` as the column
  2131. name in the table.
  2132. index_label : str or sequence, default None
  2133. Column label for index column(s). If None is given (default) and
  2134. `index` is True, then the index names are used.
  2135. A sequence should be given if the DataFrame uses MultiIndex.
  2136. chunksize : int, optional
  2137. Specify the number of rows in each batch to be written at a time.
  2138. By default, all rows will be written at once.
  2139. dtype : dict or scalar, optional
  2140. Specifying the datatype for columns. If a dictionary is used, the
  2141. keys should be the column names and the values should be the
  2142. SQLAlchemy types or strings for the sqlite3 legacy mode. If a
  2143. scalar is provided, it will be applied to all columns.
  2144. method : {None, 'multi', callable}, optional
  2145. Controls the SQL insertion clause used:
  2146. * None : Uses standard SQL ``INSERT`` clause (one per row).
  2147. * 'multi': Pass multiple values in a single ``INSERT`` clause.
  2148. * callable with signature ``(pd_table, conn, keys, data_iter)``.
  2149. Details and a sample callable implementation can be found in the
  2150. section :ref:`insert method <io.sql.method>`.
  2151. .. versionadded:: 0.24.0
  2152. Raises
  2153. ------
  2154. ValueError
  2155. When the table already exists and `if_exists` is 'fail' (the
  2156. default).
  2157. See Also
  2158. --------
  2159. read_sql : Read a DataFrame from a table.
  2160. Notes
  2161. -----
  2162. Timezone aware datetime columns will be written as
  2163. ``Timestamp with timezone`` type with SQLAlchemy if supported by the
  2164. database. Otherwise, the datetimes will be stored as timezone unaware
  2165. timestamps local to the original timezone.
  2166. .. versionadded:: 0.24.0
  2167. References
  2168. ----------
  2169. .. [1] http://docs.sqlalchemy.org
  2170. .. [2] https://www.python.org/dev/peps/pep-0249/
  2171. Examples
  2172. --------
  2173. Create an in-memory SQLite database.
  2174. >>> from sqlalchemy import create_engine
  2175. >>> engine = create_engine('sqlite://', echo=False)
  2176. Create a table from scratch with 3 rows.
  2177. >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']})
  2178. >>> df
  2179. name
  2180. 0 User 1
  2181. 1 User 2
  2182. 2 User 3
  2183. >>> df.to_sql('users', con=engine)
  2184. >>> engine.execute("SELECT * FROM users").fetchall()
  2185. [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')]
  2186. >>> df1 = pd.DataFrame({'name' : ['User 4', 'User 5']})
  2187. >>> df1.to_sql('users', con=engine, if_exists='append')
  2188. >>> engine.execute("SELECT * FROM users").fetchall()
  2189. [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'),
  2190. (0, 'User 4'), (1, 'User 5')]
  2191. Overwrite the table with just ``df1``.
  2192. >>> df1.to_sql('users', con=engine, if_exists='replace',
  2193. ... index_label='id')
  2194. >>> engine.execute("SELECT * FROM users").fetchall()
  2195. [(0, 'User 4'), (1, 'User 5')]
  2196. Specify the dtype (especially useful for integers with missing values).
  2197. Notice that while pandas is forced to store the data as floating point,
  2198. the database supports nullable integers. When fetching the data with
  2199. Python, we get back integer scalars.
  2200. >>> df = pd.DataFrame({"A": [1, None, 2]})
  2201. >>> df
  2202. A
  2203. 0 1.0
  2204. 1 NaN
  2205. 2 2.0
  2206. >>> from sqlalchemy.types import Integer
  2207. >>> df.to_sql('integers', con=engine, index=False,
  2208. ... dtype={"A": Integer()})
  2209. >>> engine.execute("SELECT * FROM integers").fetchall()
  2210. [(1,), (None,), (2,)]
  2211. """
  2212. from pandas.io import sql
  2213. sql.to_sql(
  2214. self,
  2215. name,
  2216. con,
  2217. schema=schema,
  2218. if_exists=if_exists,
  2219. index=index,
  2220. index_label=index_label,
  2221. chunksize=chunksize,
  2222. dtype=dtype,
  2223. method=method,
  2224. )
  2225. def to_pickle(
  2226. self,
  2227. path,
  2228. compression: Optional[str] = "infer",
  2229. protocol: int = pickle.HIGHEST_PROTOCOL,
  2230. ) -> None:
  2231. """
  2232. Pickle (serialize) object to file.
  2233. Parameters
  2234. ----------
  2235. path : str
  2236. File path where the pickled object will be stored.
  2237. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, \
  2238. default 'infer'
  2239. A string representing the compression to use in the output file. By
  2240. default, infers from the file extension in specified path.
  2241. protocol : int
  2242. Int which indicates which protocol should be used by the pickler,
  2243. default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible
  2244. values are 0, 1, 2, 3, 4. A negative value for the protocol
  2245. parameter is equivalent to setting its value to HIGHEST_PROTOCOL.
  2246. .. [1] https://docs.python.org/3/library/pickle.html.
  2247. .. versionadded:: 0.21.0.
  2248. See Also
  2249. --------
  2250. read_pickle : Load pickled pandas object (or any object) from file.
  2251. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  2252. DataFrame.to_sql : Write DataFrame to a SQL database.
  2253. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2254. Examples
  2255. --------
  2256. >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
  2257. >>> original_df
  2258. foo bar
  2259. 0 0 5
  2260. 1 1 6
  2261. 2 2 7
  2262. 3 3 8
  2263. 4 4 9
  2264. >>> original_df.to_pickle("./dummy.pkl")
  2265. >>> unpickled_df = pd.read_pickle("./dummy.pkl")
  2266. >>> unpickled_df
  2267. foo bar
  2268. 0 0 5
  2269. 1 1 6
  2270. 2 2 7
  2271. 3 3 8
  2272. 4 4 9
  2273. >>> import os
  2274. >>> os.remove("./dummy.pkl")
  2275. """
  2276. from pandas.io.pickle import to_pickle
  2277. to_pickle(self, path, compression=compression, protocol=protocol)
  2278. def to_clipboard(
  2279. self, excel: bool_t = True, sep: Optional[str] = None, **kwargs
  2280. ) -> None:
  2281. r"""
  2282. Copy object to the system clipboard.
  2283. Write a text representation of object to the system clipboard.
  2284. This can be pasted into Excel, for example.
  2285. Parameters
  2286. ----------
  2287. excel : bool, default True
  2288. Produce output in a csv format for easy pasting into excel.
  2289. - True, use the provided separator for csv pasting.
  2290. - False, write a string representation of the object to the clipboard.
  2291. sep : str, default ``'\t'``
  2292. Field delimiter.
  2293. **kwargs
  2294. These parameters will be passed to DataFrame.to_csv.
  2295. See Also
  2296. --------
  2297. DataFrame.to_csv : Write a DataFrame to a comma-separated values
  2298. (csv) file.
  2299. read_clipboard : Read text from clipboard and pass to read_table.
  2300. Notes
  2301. -----
  2302. Requirements for your platform.
  2303. - Linux : `xclip`, or `xsel` (with `PyQt4` modules)
  2304. - Windows : none
  2305. - OS X : none
  2306. Examples
  2307. --------
  2308. Copy the contents of a DataFrame to the clipboard.
  2309. >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
  2310. >>> df.to_clipboard(sep=',')
  2311. ... # Wrote the following to the system clipboard:
  2312. ... # ,A,B,C
  2313. ... # 0,1,2,3
  2314. ... # 1,4,5,6
  2315. We can omit the the index by passing the keyword `index` and setting
  2316. it to false.
  2317. >>> df.to_clipboard(sep=',', index=False)
  2318. ... # Wrote the following to the system clipboard:
  2319. ... # A,B,C
  2320. ... # 1,2,3
  2321. ... # 4,5,6
  2322. """
  2323. from pandas.io import clipboards
  2324. clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs)
  2325. def to_xarray(self):
  2326. """
  2327. Return an xarray object from the pandas object.
  2328. Returns
  2329. -------
  2330. xarray.DataArray or xarray.Dataset
  2331. Data in the pandas structure converted to Dataset if the object is
  2332. a DataFrame, or a DataArray if the object is a Series.
  2333. See Also
  2334. --------
  2335. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  2336. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2337. Notes
  2338. -----
  2339. See the `xarray docs <http://xarray.pydata.org/en/stable/>`__
  2340. Examples
  2341. --------
  2342. >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2),
  2343. ... ('parrot', 'bird', 24.0, 2),
  2344. ... ('lion', 'mammal', 80.5, 4),
  2345. ... ('monkey', 'mammal', np.nan, 4)],
  2346. ... columns=['name', 'class', 'max_speed',
  2347. ... 'num_legs'])
  2348. >>> df
  2349. name class max_speed num_legs
  2350. 0 falcon bird 389.0 2
  2351. 1 parrot bird 24.0 2
  2352. 2 lion mammal 80.5 4
  2353. 3 monkey mammal NaN 4
  2354. >>> df.to_xarray()
  2355. <xarray.Dataset>
  2356. Dimensions: (index: 4)
  2357. Coordinates:
  2358. * index (index) int64 0 1 2 3
  2359. Data variables:
  2360. name (index) object 'falcon' 'parrot' 'lion' 'monkey'
  2361. class (index) object 'bird' 'bird' 'mammal' 'mammal'
  2362. max_speed (index) float64 389.0 24.0 80.5 nan
  2363. num_legs (index) int64 2 2 4 4
  2364. >>> df['max_speed'].to_xarray()
  2365. <xarray.DataArray 'max_speed' (index: 4)>
  2366. array([389. , 24. , 80.5, nan])
  2367. Coordinates:
  2368. * index (index) int64 0 1 2 3
  2369. >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01',
  2370. ... '2018-01-02', '2018-01-02'])
  2371. >>> df_multiindex = pd.DataFrame({'date': dates,
  2372. ... 'animal': ['falcon', 'parrot',
  2373. ... 'falcon', 'parrot'],
  2374. ... 'speed': [350, 18, 361, 15]})
  2375. >>> df_multiindex = df_multiindex.set_index(['date', 'animal'])
  2376. >>> df_multiindex
  2377. speed
  2378. date animal
  2379. 2018-01-01 falcon 350
  2380. parrot 18
  2381. 2018-01-02 falcon 361
  2382. parrot 15
  2383. >>> df_multiindex.to_xarray()
  2384. <xarray.Dataset>
  2385. Dimensions: (animal: 2, date: 2)
  2386. Coordinates:
  2387. * date (date) datetime64[ns] 2018-01-01 2018-01-02
  2388. * animal (animal) object 'falcon' 'parrot'
  2389. Data variables:
  2390. speed (date, animal) int64 350 18 361 15
  2391. """
  2392. xarray = import_optional_dependency("xarray")
  2393. if self.ndim == 1:
  2394. return xarray.DataArray.from_series(self)
  2395. else:
  2396. return xarray.Dataset.from_dataframe(self)
  2397. @Substitution(returns=fmt.return_docstring)
  2398. def to_latex(
  2399. self,
  2400. buf=None,
  2401. columns=None,
  2402. col_space=None,
  2403. header=True,
  2404. index=True,
  2405. na_rep="NaN",
  2406. formatters=None,
  2407. float_format=None,
  2408. sparsify=None,
  2409. index_names=True,
  2410. bold_rows=False,
  2411. column_format=None,
  2412. longtable=None,
  2413. escape=None,
  2414. encoding=None,
  2415. decimal=".",
  2416. multicolumn=None,
  2417. multicolumn_format=None,
  2418. multirow=None,
  2419. caption=None,
  2420. label=None,
  2421. ):
  2422. r"""
  2423. Render object to a LaTeX tabular, longtable, or nested table/tabular.
  2424. Requires ``\usepackage{booktabs}``. The output can be copy/pasted
  2425. into a main LaTeX document or read from an external file
  2426. with ``\input{table.tex}``.
  2427. .. versionchanged:: 0.20.2
  2428. Added to Series.
  2429. .. versionchanged:: 1.0.0
  2430. Added caption and label arguments.
  2431. Parameters
  2432. ----------
  2433. buf : str, Path or StringIO-like, optional, default None
  2434. Buffer to write to. If None, the output is returned as a string.
  2435. columns : list of label, optional
  2436. The subset of columns to write. Writes all columns by default.
  2437. col_space : int, optional
  2438. The minimum width of each column.
  2439. header : bool or list of str, default True
  2440. Write out the column names. If a list of strings is given,
  2441. it is assumed to be aliases for the column names.
  2442. index : bool, default True
  2443. Write row names (index).
  2444. na_rep : str, default 'NaN'
  2445. Missing data representation.
  2446. formatters : list of functions or dict of {str: function}, optional
  2447. Formatter functions to apply to columns' elements by position or
  2448. name. The result of each function must be a unicode string.
  2449. List must be of length equal to the number of columns.
  2450. float_format : one-parameter function or str, optional, default None
  2451. Formatter for floating point numbers. For example
  2452. ``float_format="%%.2f"`` and ``float_format="{:0.2f}".format`` will
  2453. both result in 0.1234 being formatted as 0.12.
  2454. sparsify : bool, optional
  2455. Set to False for a DataFrame with a hierarchical index to print
  2456. every multiindex key at each row. By default, the value will be
  2457. read from the config module.
  2458. index_names : bool, default True
  2459. Prints the names of the indexes.
  2460. bold_rows : bool, default False
  2461. Make the row labels bold in the output.
  2462. column_format : str, optional
  2463. The columns format as specified in `LaTeX table format
  2464. <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g. 'rcl' for 3
  2465. columns. By default, 'l' will be used for all columns except
  2466. columns of numbers, which default to 'r'.
  2467. longtable : bool, optional
  2468. By default, the value will be read from the pandas config
  2469. module. Use a longtable environment instead of tabular. Requires
  2470. adding a \usepackage{longtable} to your LaTeX preamble.
  2471. escape : bool, optional
  2472. By default, the value will be read from the pandas config
  2473. module. When set to False prevents from escaping latex special
  2474. characters in column names.
  2475. encoding : str, optional
  2476. A string representing the encoding to use in the output file,
  2477. defaults to 'utf-8'.
  2478. decimal : str, default '.'
  2479. Character recognized as decimal separator, e.g. ',' in Europe.
  2480. multicolumn : bool, default True
  2481. Use \multicolumn to enhance MultiIndex columns.
  2482. The default will be read from the config module.
  2483. multicolumn_format : str, default 'l'
  2484. The alignment for multicolumns, similar to `column_format`
  2485. The default will be read from the config module.
  2486. multirow : bool, default False
  2487. Use \multirow to enhance MultiIndex rows. Requires adding a
  2488. \usepackage{multirow} to your LaTeX preamble. Will print
  2489. centered labels (instead of top-aligned) across the contained
  2490. rows, separating groups via clines. The default will be read
  2491. from the pandas config module.
  2492. caption : str, optional
  2493. The LaTeX caption to be placed inside ``\caption{}`` in the output.
  2494. .. versionadded:: 1.0.0
  2495. label : str, optional
  2496. The LaTeX label to be placed inside ``\label{}`` in the output.
  2497. This is used with ``\ref{}`` in the main ``.tex`` file.
  2498. .. versionadded:: 1.0.0
  2499. %(returns)s
  2500. See Also
  2501. --------
  2502. DataFrame.to_string : Render a DataFrame to a console-friendly
  2503. tabular output.
  2504. DataFrame.to_html : Render a DataFrame as an HTML table.
  2505. Examples
  2506. --------
  2507. >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
  2508. ... 'mask': ['red', 'purple'],
  2509. ... 'weapon': ['sai', 'bo staff']})
  2510. >>> print(df.to_latex(index=False)) # doctest: +NORMALIZE_WHITESPACE
  2511. \begin{tabular}{lll}
  2512. \toprule
  2513. name & mask & weapon \\
  2514. \midrule
  2515. Raphael & red & sai \\
  2516. Donatello & purple & bo staff \\
  2517. \bottomrule
  2518. \end{tabular}
  2519. """
  2520. # Get defaults from the pandas config
  2521. if self.ndim == 1:
  2522. self = self.to_frame()
  2523. if longtable is None:
  2524. longtable = config.get_option("display.latex.longtable")
  2525. if escape is None:
  2526. escape = config.get_option("display.latex.escape")
  2527. if multicolumn is None:
  2528. multicolumn = config.get_option("display.latex.multicolumn")
  2529. if multicolumn_format is None:
  2530. multicolumn_format = config.get_option("display.latex.multicolumn_format")
  2531. if multirow is None:
  2532. multirow = config.get_option("display.latex.multirow")
  2533. formatter = DataFrameFormatter(
  2534. self,
  2535. columns=columns,
  2536. col_space=col_space,
  2537. na_rep=na_rep,
  2538. header=header,
  2539. index=index,
  2540. formatters=formatters,
  2541. float_format=float_format,
  2542. bold_rows=bold_rows,
  2543. sparsify=sparsify,
  2544. index_names=index_names,
  2545. escape=escape,
  2546. decimal=decimal,
  2547. )
  2548. return formatter.to_latex(
  2549. buf=buf,
  2550. column_format=column_format,
  2551. longtable=longtable,
  2552. encoding=encoding,
  2553. multicolumn=multicolumn,
  2554. multicolumn_format=multicolumn_format,
  2555. multirow=multirow,
  2556. caption=caption,
  2557. label=label,
  2558. )
  2559. def to_csv(
  2560. self,
  2561. path_or_buf: Optional[FilePathOrBuffer] = None,
  2562. sep: str = ",",
  2563. na_rep: str = "",
  2564. float_format: Optional[str] = None,
  2565. columns: Optional[Sequence[Optional[Hashable]]] = None,
  2566. header: Union[bool_t, List[str]] = True,
  2567. index: bool_t = True,
  2568. index_label: Optional[Union[bool_t, str, Sequence[Optional[Hashable]]]] = None,
  2569. mode: str = "w",
  2570. encoding: Optional[str] = None,
  2571. compression: Optional[Union[str, Mapping[str, str]]] = "infer",
  2572. quoting: Optional[int] = None,
  2573. quotechar: str = '"',
  2574. line_terminator: Optional[str] = None,
  2575. chunksize: Optional[int] = None,
  2576. date_format: Optional[str] = None,
  2577. doublequote: bool_t = True,
  2578. escapechar: Optional[str] = None,
  2579. decimal: Optional[str] = ".",
  2580. ) -> Optional[str]:
  2581. r"""
  2582. Write object to a comma-separated values (csv) file.
  2583. .. versionchanged:: 0.24.0
  2584. The order of arguments for Series was changed.
  2585. Parameters
  2586. ----------
  2587. path_or_buf : str or file handle, default None
  2588. File path or object, if None is provided the result is returned as
  2589. a string. If a file object is passed it should be opened with
  2590. `newline=''`, disabling universal newlines.
  2591. .. versionchanged:: 0.24.0
  2592. Was previously named "path" for Series.
  2593. sep : str, default ','
  2594. String of length 1. Field delimiter for the output file.
  2595. na_rep : str, default ''
  2596. Missing data representation.
  2597. float_format : str, default None
  2598. Format string for floating point numbers.
  2599. columns : sequence, optional
  2600. Columns to write.
  2601. header : bool or list of str, default True
  2602. Write out the column names. If a list of strings is given it is
  2603. assumed to be aliases for the column names.
  2604. .. versionchanged:: 0.24.0
  2605. Previously defaulted to False for Series.
  2606. index : bool, default True
  2607. Write row names (index).
  2608. index_label : str or sequence, or False, default None
  2609. Column label for index column(s) if desired. If None is given, and
  2610. `header` and `index` are True, then the index names are used. A
  2611. sequence should be given if the object uses MultiIndex. If
  2612. False do not print fields for index names. Use index_label=False
  2613. for easier importing in R.
  2614. mode : str
  2615. Python write mode, default 'w'.
  2616. encoding : str, optional
  2617. A string representing the encoding to use in the output file,
  2618. defaults to 'utf-8'.
  2619. compression : str or dict, default 'infer'
  2620. If str, represents compression mode. If dict, value at 'method' is
  2621. the compression mode. Compression mode may be any of the following
  2622. possible values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If
  2623. compression mode is 'infer' and `path_or_buf` is path-like, then
  2624. detect compression mode from the following extensions: '.gz',
  2625. '.bz2', '.zip' or '.xz'. (otherwise no compression). If dict given
  2626. and mode is 'zip' or inferred as 'zip', other entries passed as
  2627. additional compression options.
  2628. .. versionchanged:: 1.0.0
  2629. May now be a dict with key 'method' as compression mode
  2630. and other entries as additional compression options if
  2631. compression mode is 'zip'.
  2632. quoting : optional constant from csv module
  2633. Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
  2634. then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
  2635. will treat them as non-numeric.
  2636. quotechar : str, default '\"'
  2637. String of length 1. Character used to quote fields.
  2638. line_terminator : str, optional
  2639. The newline character or character sequence to use in the output
  2640. file. Defaults to `os.linesep`, which depends on the OS in which
  2641. this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
  2642. .. versionchanged:: 0.24.0
  2643. chunksize : int or None
  2644. Rows to write at a time.
  2645. date_format : str, default None
  2646. Format string for datetime objects.
  2647. doublequote : bool, default True
  2648. Control quoting of `quotechar` inside a field.
  2649. escapechar : str, default None
  2650. String of length 1. Character used to escape `sep` and `quotechar`
  2651. when appropriate.
  2652. decimal : str, default '.'
  2653. Character recognized as decimal separator. E.g. use ',' for
  2654. European data.
  2655. Returns
  2656. -------
  2657. None or str
  2658. If path_or_buf is None, returns the resulting csv format as a
  2659. string. Otherwise returns None.
  2660. See Also
  2661. --------
  2662. read_csv : Load a CSV file into a DataFrame.
  2663. to_excel : Write DataFrame to an Excel file.
  2664. Examples
  2665. --------
  2666. >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
  2667. ... 'mask': ['red', 'purple'],
  2668. ... 'weapon': ['sai', 'bo staff']})
  2669. >>> df.to_csv(index=False)
  2670. 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
  2671. Create 'out.zip' containing 'out.csv'
  2672. >>> compression_opts = dict(method='zip',
  2673. ... archive_name='out.csv') # doctest: +SKIP
  2674. >>> df.to_csv('out.zip', index=False,
  2675. ... compression=compression_opts) # doctest: +SKIP
  2676. """
  2677. df = self if isinstance(self, ABCDataFrame) else self.to_frame()
  2678. from pandas.io.formats.csvs import CSVFormatter
  2679. formatter = CSVFormatter(
  2680. df,
  2681. path_or_buf,
  2682. line_terminator=line_terminator,
  2683. sep=sep,
  2684. encoding=encoding,
  2685. compression=compression,
  2686. quoting=quoting,
  2687. na_rep=na_rep,
  2688. float_format=float_format,
  2689. cols=columns,
  2690. header=header,
  2691. index=index,
  2692. index_label=index_label,
  2693. mode=mode,
  2694. chunksize=chunksize,
  2695. quotechar=quotechar,
  2696. date_format=date_format,
  2697. doublequote=doublequote,
  2698. escapechar=escapechar,
  2699. decimal=decimal,
  2700. )
  2701. formatter.save()
  2702. if path_or_buf is None:
  2703. return formatter.path_or_buf.getvalue()
  2704. return None
  2705. # ----------------------------------------------------------------------
  2706. # Fancy Indexing
  2707. @classmethod
  2708. def _create_indexer(cls, name: str, indexer) -> None:
  2709. """Create an indexer like _name in the class.
  2710. Kept for compatibility with geopandas. To be removed in the future. See GH27258
  2711. """
  2712. if getattr(cls, name, None) is None:
  2713. _indexer = functools.partial(indexer, name)
  2714. setattr(cls, name, property(_indexer, doc=indexer.__doc__))
  2715. # ----------------------------------------------------------------------
  2716. # Lookup Caching
  2717. def _set_as_cached(self, item, cacher) -> None:
  2718. """Set the _cacher attribute on the calling object with a weakref to
  2719. cacher.
  2720. """
  2721. self._cacher = (item, weakref.ref(cacher))
  2722. def _reset_cacher(self) -> None:
  2723. """Reset the cacher."""
  2724. if hasattr(self, "_cacher"):
  2725. del self._cacher
  2726. def _maybe_cache_changed(self, item, value) -> None:
  2727. """The object has called back to us saying maybe it has changed.
  2728. """
  2729. self._data.set(item, value)
  2730. @property
  2731. def _is_cached(self) -> bool_t:
  2732. """Return boolean indicating if self is cached or not."""
  2733. return getattr(self, "_cacher", None) is not None
  2734. def _get_cacher(self):
  2735. """return my cacher or None"""
  2736. cacher = getattr(self, "_cacher", None)
  2737. if cacher is not None:
  2738. cacher = cacher[1]()
  2739. return cacher
  2740. def _maybe_update_cacher(
  2741. self, clear: bool_t = False, verify_is_copy: bool_t = True
  2742. ) -> None:
  2743. """
  2744. See if we need to update our parent cacher if clear, then clear our
  2745. cache.
  2746. Parameters
  2747. ----------
  2748. clear : bool, default False
  2749. Clear the item cache.
  2750. verify_is_copy : bool, default True
  2751. Provide is_copy checks.
  2752. """
  2753. cacher = getattr(self, "_cacher", None)
  2754. if cacher is not None:
  2755. ref = cacher[1]()
  2756. # we are trying to reference a dead referant, hence
  2757. # a copy
  2758. if ref is None:
  2759. del self._cacher
  2760. else:
  2761. # Note: we need to call ref._maybe_cache_changed even in the
  2762. # case where it will raise. (Uh, not clear why)
  2763. try:
  2764. ref._maybe_cache_changed(cacher[0], self)
  2765. except AssertionError:
  2766. # ref._data.setitem can raise
  2767. # AssertionError because of shape mismatch
  2768. pass
  2769. if verify_is_copy:
  2770. self._check_setitem_copy(stacklevel=5, t="referant")
  2771. if clear:
  2772. self._clear_item_cache()
  2773. def _clear_item_cache(self) -> None:
  2774. self._item_cache.clear()
  2775. # ----------------------------------------------------------------------
  2776. # Indexing Methods
  2777. def take(
  2778. self: FrameOrSeries, indices, axis=0, is_copy: Optional[bool_t] = None, **kwargs
  2779. ) -> FrameOrSeries:
  2780. """
  2781. Return the elements in the given *positional* indices along an axis.
  2782. This means that we are not indexing according to actual values in
  2783. the index attribute of the object. We are indexing according to the
  2784. actual position of the element in the object.
  2785. Parameters
  2786. ----------
  2787. indices : array-like
  2788. An array of ints indicating which positions to take.
  2789. axis : {0 or 'index', 1 or 'columns', None}, default 0
  2790. The axis on which to select elements. ``0`` means that we are
  2791. selecting rows, ``1`` means that we are selecting columns.
  2792. is_copy : bool
  2793. Before pandas 1.0, ``is_copy=False`` can be specified to ensure
  2794. that the return value is an actual copy. Starting with pandas 1.0,
  2795. ``take`` always returns a copy, and the keyword is therefore
  2796. deprecated.
  2797. .. deprecated:: 1.0.0
  2798. **kwargs
  2799. For compatibility with :meth:`numpy.take`. Has no effect on the
  2800. output.
  2801. Returns
  2802. -------
  2803. taken : same type as caller
  2804. An array-like containing the elements taken from the object.
  2805. See Also
  2806. --------
  2807. DataFrame.loc : Select a subset of a DataFrame by labels.
  2808. DataFrame.iloc : Select a subset of a DataFrame by positions.
  2809. numpy.take : Take elements from an array along an axis.
  2810. Examples
  2811. --------
  2812. >>> df = pd.DataFrame([('falcon', 'bird', 389.0),
  2813. ... ('parrot', 'bird', 24.0),
  2814. ... ('lion', 'mammal', 80.5),
  2815. ... ('monkey', 'mammal', np.nan)],
  2816. ... columns=['name', 'class', 'max_speed'],
  2817. ... index=[0, 2, 3, 1])
  2818. >>> df
  2819. name class max_speed
  2820. 0 falcon bird 389.0
  2821. 2 parrot bird 24.0
  2822. 3 lion mammal 80.5
  2823. 1 monkey mammal NaN
  2824. Take elements at positions 0 and 3 along the axis 0 (default).
  2825. Note how the actual indices selected (0 and 1) do not correspond to
  2826. our selected indices 0 and 3. That's because we are selecting the 0th
  2827. and 3rd rows, not rows whose indices equal 0 and 3.
  2828. >>> df.take([0, 3])
  2829. name class max_speed
  2830. 0 falcon bird 389.0
  2831. 1 monkey mammal NaN
  2832. Take elements at indices 1 and 2 along the axis 1 (column selection).
  2833. >>> df.take([1, 2], axis=1)
  2834. class max_speed
  2835. 0 bird 389.0
  2836. 2 bird 24.0
  2837. 3 mammal 80.5
  2838. 1 mammal NaN
  2839. We may take elements using negative integers for positive indices,
  2840. starting from the end of the object, just like with Python lists.
  2841. >>> df.take([-1, -2])
  2842. name class max_speed
  2843. 1 monkey mammal NaN
  2844. 3 lion mammal 80.5
  2845. """
  2846. if is_copy is not None:
  2847. warnings.warn(
  2848. "is_copy is deprecated and will be removed in a future version. "
  2849. "'take' always returns a copy, so there is no need to specify this.",
  2850. FutureWarning,
  2851. stacklevel=2,
  2852. )
  2853. nv.validate_take(tuple(), kwargs)
  2854. self._consolidate_inplace()
  2855. new_data = self._data.take(
  2856. indices, axis=self._get_block_manager_axis(axis), verify=True
  2857. )
  2858. return self._constructor(new_data).__finalize__(self)
  2859. def _take_with_is_copy(
  2860. self: FrameOrSeries, indices, axis=0, **kwargs
  2861. ) -> FrameOrSeries:
  2862. """
  2863. Internal version of the `take` method that sets the `_is_copy`
  2864. attribute to keep track of the parent dataframe (using in indexing
  2865. for the SettingWithCopyWarning).
  2866. See the docstring of `take` for full explanation of the parameters.
  2867. """
  2868. result = self.take(indices=indices, axis=axis, **kwargs)
  2869. # Maybe set copy if we didn't actually change the index.
  2870. if not result._get_axis(axis).equals(self._get_axis(axis)):
  2871. result._set_is_copy(self)
  2872. return result
  2873. def xs(self, key, axis=0, level=None, drop_level: bool_t = True):
  2874. """
  2875. Return cross-section from the Series/DataFrame.
  2876. This method takes a `key` argument to select data at a particular
  2877. level of a MultiIndex.
  2878. Parameters
  2879. ----------
  2880. key : label or tuple of label
  2881. Label contained in the index, or partially in a MultiIndex.
  2882. axis : {0 or 'index', 1 or 'columns'}, default 0
  2883. Axis to retrieve cross-section on.
  2884. level : object, defaults to first n levels (n=1 or len(key))
  2885. In case of a key partially contained in a MultiIndex, indicate
  2886. which levels are used. Levels can be referred by label or position.
  2887. drop_level : bool, default True
  2888. If False, returns object with same levels as self.
  2889. Returns
  2890. -------
  2891. Series or DataFrame
  2892. Cross-section from the original Series or DataFrame
  2893. corresponding to the selected index levels.
  2894. See Also
  2895. --------
  2896. DataFrame.loc : Access a group of rows and columns
  2897. by label(s) or a boolean array.
  2898. DataFrame.iloc : Purely integer-location based indexing
  2899. for selection by position.
  2900. Notes
  2901. -----
  2902. `xs` can not be used to set values.
  2903. MultiIndex Slicers is a generic way to get/set values on
  2904. any level or levels.
  2905. It is a superset of `xs` functionality, see
  2906. :ref:`MultiIndex Slicers <advanced.mi_slicers>`.
  2907. Examples
  2908. --------
  2909. >>> d = {'num_legs': [4, 4, 2, 2],
  2910. ... 'num_wings': [0, 0, 2, 2],
  2911. ... 'class': ['mammal', 'mammal', 'mammal', 'bird'],
  2912. ... 'animal': ['cat', 'dog', 'bat', 'penguin'],
  2913. ... 'locomotion': ['walks', 'walks', 'flies', 'walks']}
  2914. >>> df = pd.DataFrame(data=d)
  2915. >>> df = df.set_index(['class', 'animal', 'locomotion'])
  2916. >>> df
  2917. num_legs num_wings
  2918. class animal locomotion
  2919. mammal cat walks 4 0
  2920. dog walks 4 0
  2921. bat flies 2 2
  2922. bird penguin walks 2 2
  2923. Get values at specified index
  2924. >>> df.xs('mammal')
  2925. num_legs num_wings
  2926. animal locomotion
  2927. cat walks 4 0
  2928. dog walks 4 0
  2929. bat flies 2 2
  2930. Get values at several indexes
  2931. >>> df.xs(('mammal', 'dog'))
  2932. num_legs num_wings
  2933. locomotion
  2934. walks 4 0
  2935. Get values at specified index and level
  2936. >>> df.xs('cat', level=1)
  2937. num_legs num_wings
  2938. class locomotion
  2939. mammal walks 4 0
  2940. Get values at several indexes and levels
  2941. >>> df.xs(('bird', 'walks'),
  2942. ... level=[0, 'locomotion'])
  2943. num_legs num_wings
  2944. animal
  2945. penguin 2 2
  2946. Get values at specified column and axis
  2947. >>> df.xs('num_wings', axis=1)
  2948. class animal locomotion
  2949. mammal cat walks 0
  2950. dog walks 0
  2951. bat flies 2
  2952. bird penguin walks 2
  2953. Name: num_wings, dtype: int64
  2954. """
  2955. axis = self._get_axis_number(axis)
  2956. labels = self._get_axis(axis)
  2957. if level is not None:
  2958. loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level)
  2959. # create the tuple of the indexer
  2960. _indexer = [slice(None)] * self.ndim
  2961. _indexer[axis] = loc
  2962. indexer = tuple(_indexer)
  2963. result = self.iloc[indexer]
  2964. setattr(result, result._get_axis_name(axis), new_ax)
  2965. return result
  2966. if axis == 1:
  2967. return self[key]
  2968. self._consolidate_inplace()
  2969. index = self.index
  2970. if isinstance(index, MultiIndex):
  2971. loc, new_index = self.index.get_loc_level(key, drop_level=drop_level)
  2972. else:
  2973. loc = self.index.get_loc(key)
  2974. if isinstance(loc, np.ndarray):
  2975. if loc.dtype == np.bool_:
  2976. (inds,) = loc.nonzero()
  2977. return self._take_with_is_copy(inds, axis=axis)
  2978. else:
  2979. return self._take_with_is_copy(loc, axis=axis)
  2980. if not is_scalar(loc):
  2981. new_index = self.index[loc]
  2982. if is_scalar(loc):
  2983. new_values = self._data.fast_xs(loc)
  2984. # may need to box a datelike-scalar
  2985. #
  2986. # if we encounter an array-like and we only have 1 dim
  2987. # that means that their are list/ndarrays inside the Series!
  2988. # so just return them (GH 6394)
  2989. if not is_list_like(new_values) or self.ndim == 1:
  2990. return com.maybe_box_datetimelike(new_values)
  2991. result = self._constructor_sliced(
  2992. new_values,
  2993. index=self.columns,
  2994. name=self.index[loc],
  2995. dtype=new_values.dtype,
  2996. )
  2997. else:
  2998. result = self.iloc[loc]
  2999. result.index = new_index
  3000. # this could be a view
  3001. # but only in a single-dtyped view sliceable case
  3002. result._set_is_copy(self, copy=not result._is_view)
  3003. return result
  3004. _xs: Callable = xs
  3005. def __getitem__(self, item):
  3006. raise AbstractMethodError(self)
  3007. def _get_item_cache(self, item):
  3008. """Return the cached item, item represents a label indexer."""
  3009. cache = self._item_cache
  3010. res = cache.get(item)
  3011. if res is None:
  3012. values = self._data.get(item)
  3013. res = self._box_item_values(item, values)
  3014. cache[item] = res
  3015. res._set_as_cached(item, self)
  3016. # for a chain
  3017. res._is_copy = self._is_copy
  3018. return res
  3019. def _iget_item_cache(self, item):
  3020. """Return the cached item, item represents a positional indexer."""
  3021. ax = self._info_axis
  3022. if ax.is_unique:
  3023. lower = self._get_item_cache(ax[item])
  3024. else:
  3025. lower = self._take_with_is_copy(item, axis=self._info_axis_number)
  3026. return lower
  3027. def _box_item_values(self, key, values):
  3028. raise AbstractMethodError(self)
  3029. def _slice(self: FrameOrSeries, slobj: slice, axis=0, kind=None) -> FrameOrSeries:
  3030. """
  3031. Construct a slice of this container.
  3032. kind parameter is maintained for compatibility with Series slicing.
  3033. """
  3034. axis = self._get_block_manager_axis(axis)
  3035. result = self._constructor(self._data.get_slice(slobj, axis=axis))
  3036. result = result.__finalize__(self)
  3037. # this could be a view
  3038. # but only in a single-dtyped view sliceable case
  3039. is_copy = axis != 0 or result._is_view
  3040. result._set_is_copy(self, copy=is_copy)
  3041. return result
  3042. def _set_item(self, key, value) -> None:
  3043. self._data.set(key, value)
  3044. self._clear_item_cache()
  3045. def _set_is_copy(self, ref=None, copy: bool_t = True) -> None:
  3046. if not copy:
  3047. self._is_copy = None
  3048. else:
  3049. if ref is not None:
  3050. self._is_copy = weakref.ref(ref)
  3051. else:
  3052. self._is_copy = None
  3053. def _check_is_chained_assignment_possible(self) -> bool_t:
  3054. """
  3055. Check if we are a view, have a cacher, and are of mixed type.
  3056. If so, then force a setitem_copy check.
  3057. Should be called just near setting a value
  3058. Will return a boolean if it we are a view and are cached, but a
  3059. single-dtype meaning that the cacher should be updated following
  3060. setting.
  3061. """
  3062. if self._is_view and self._is_cached:
  3063. ref = self._get_cacher()
  3064. if ref is not None and ref._is_mixed_type:
  3065. self._check_setitem_copy(stacklevel=4, t="referant", force=True)
  3066. return True
  3067. elif self._is_copy:
  3068. self._check_setitem_copy(stacklevel=4, t="referant")
  3069. return False
  3070. def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
  3071. """
  3072. Parameters
  3073. ----------
  3074. stacklevel : int, default 4
  3075. the level to show of the stack when the error is output
  3076. t : str, the type of setting error
  3077. force : bool, default False
  3078. If True, then force showing an error.
  3079. validate if we are doing a setitem on a chained copy.
  3080. If you call this function, be sure to set the stacklevel such that the
  3081. user will see the error *at the level of setting*
  3082. It is technically possible to figure out that we are setting on
  3083. a copy even WITH a multi-dtyped pandas object. In other words, some
  3084. blocks may be views while other are not. Currently _is_view will ALWAYS
  3085. return False for multi-blocks to avoid having to handle this case.
  3086. df = DataFrame(np.arange(0,9), columns=['count'])
  3087. df['group'] = 'b'
  3088. # This technically need not raise SettingWithCopy if both are view
  3089. # (which is not # generally guaranteed but is usually True. However,
  3090. # this is in general not a good practice and we recommend using .loc.
  3091. df.iloc[0:5]['group'] = 'a'
  3092. """
  3093. # return early if the check is not needed
  3094. if not (force or self._is_copy):
  3095. return
  3096. value = config.get_option("mode.chained_assignment")
  3097. if value is None:
  3098. return
  3099. # see if the copy is not actually referred; if so, then dissolve
  3100. # the copy weakref
  3101. if self._is_copy is not None and not isinstance(self._is_copy, str):
  3102. r = self._is_copy()
  3103. if not gc.get_referents(r) or r.shape == self.shape:
  3104. self._is_copy = None
  3105. return
  3106. # a custom message
  3107. if isinstance(self._is_copy, str):
  3108. t = self._is_copy
  3109. elif t == "referant":
  3110. t = (
  3111. "\n"
  3112. "A value is trying to be set on a copy of a slice from a "
  3113. "DataFrame\n\n"
  3114. "See the caveats in the documentation: "
  3115. "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
  3116. "indexing.html#returning-a-view-versus-a-copy"
  3117. )
  3118. else:
  3119. t = (
  3120. "\n"
  3121. "A value is trying to be set on a copy of a slice from a "
  3122. "DataFrame.\n"
  3123. "Try using .loc[row_indexer,col_indexer] = value "
  3124. "instead\n\nSee the caveats in the documentation: "
  3125. "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
  3126. "indexing.html#returning-a-view-versus-a-copy"
  3127. )
  3128. if value == "raise":
  3129. raise com.SettingWithCopyError(t)
  3130. elif value == "warn":
  3131. warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel)
  3132. def __delitem__(self, key) -> None:
  3133. """
  3134. Delete item
  3135. """
  3136. deleted = False
  3137. maybe_shortcut = False
  3138. if self.ndim == 2 and isinstance(self.columns, MultiIndex):
  3139. try:
  3140. maybe_shortcut = key not in self.columns._engine
  3141. except TypeError:
  3142. pass
  3143. if maybe_shortcut:
  3144. # Allow shorthand to delete all columns whose first len(key)
  3145. # elements match key:
  3146. if not isinstance(key, tuple):
  3147. key = (key,)
  3148. for col in self.columns:
  3149. if isinstance(col, tuple) and col[: len(key)] == key:
  3150. del self[col]
  3151. deleted = True
  3152. if not deleted:
  3153. # If the above loop ran and didn't delete anything because
  3154. # there was no match, this call should raise the appropriate
  3155. # exception:
  3156. self._data.delete(key)
  3157. # delete from the caches
  3158. try:
  3159. del self._item_cache[key]
  3160. except KeyError:
  3161. pass
  3162. # ----------------------------------------------------------------------
  3163. # Unsorted
  3164. def get(self, key, default=None):
  3165. """
  3166. Get item from object for given key (ex: DataFrame column).
  3167. Returns default value if not found.
  3168. Parameters
  3169. ----------
  3170. key : object
  3171. Returns
  3172. -------
  3173. value : same type as items contained in object
  3174. """
  3175. try:
  3176. return self[key]
  3177. except (KeyError, ValueError, IndexError):
  3178. return default
  3179. @property
  3180. def _is_view(self):
  3181. """Return boolean indicating if self is view of another array """
  3182. return self._data.is_view
  3183. def reindex_like(
  3184. self: FrameOrSeries,
  3185. other,
  3186. method: Optional[str] = None,
  3187. copy: bool_t = True,
  3188. limit=None,
  3189. tolerance=None,
  3190. ) -> FrameOrSeries:
  3191. """
  3192. Return an object with matching indices as other object.
  3193. Conform the object to the same index on all axes. Optional
  3194. filling logic, placing NaN in locations having no value
  3195. in the previous index. A new object is produced unless the
  3196. new index is equivalent to the current one and copy=False.
  3197. Parameters
  3198. ----------
  3199. other : Object of the same data type
  3200. Its row and column indices are used to define the new indices
  3201. of this object.
  3202. method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
  3203. Method to use for filling holes in reindexed DataFrame.
  3204. Please note: this is only applicable to DataFrames/Series with a
  3205. monotonically increasing/decreasing index.
  3206. * None (default): don't fill gaps
  3207. * pad / ffill: propagate last valid observation forward to next
  3208. valid
  3209. * backfill / bfill: use next valid observation to fill gap
  3210. * nearest: use nearest valid observations to fill gap.
  3211. copy : bool, default True
  3212. Return a new object, even if the passed indexes are the same.
  3213. limit : int, default None
  3214. Maximum number of consecutive labels to fill for inexact matches.
  3215. tolerance : optional
  3216. Maximum distance between original and new labels for inexact
  3217. matches. The values of the index at the matching locations most
  3218. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  3219. Tolerance may be a scalar value, which applies the same tolerance
  3220. to all values, or list-like, which applies variable tolerance per
  3221. element. List-like includes list, tuple, array, Series, and must be
  3222. the same size as the index and its dtype must exactly match the
  3223. index's type.
  3224. .. versionadded:: 0.21.0 (list-like tolerance)
  3225. Returns
  3226. -------
  3227. Series or DataFrame
  3228. Same type as caller, but with changed indices on each axis.
  3229. See Also
  3230. --------
  3231. DataFrame.set_index : Set row labels.
  3232. DataFrame.reset_index : Remove row labels or move them to new columns.
  3233. DataFrame.reindex : Change to new indices or expand indices.
  3234. Notes
  3235. -----
  3236. Same as calling
  3237. ``.reindex(index=other.index, columns=other.columns,...)``.
  3238. Examples
  3239. --------
  3240. >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'],
  3241. ... [31, 87.8, 'high'],
  3242. ... [22, 71.6, 'medium'],
  3243. ... [35, 95, 'medium']],
  3244. ... columns=['temp_celsius', 'temp_fahrenheit',
  3245. ... 'windspeed'],
  3246. ... index=pd.date_range(start='2014-02-12',
  3247. ... end='2014-02-15', freq='D'))
  3248. >>> df1
  3249. temp_celsius temp_fahrenheit windspeed
  3250. 2014-02-12 24.3 75.7 high
  3251. 2014-02-13 31.0 87.8 high
  3252. 2014-02-14 22.0 71.6 medium
  3253. 2014-02-15 35.0 95.0 medium
  3254. >>> df2 = pd.DataFrame([[28, 'low'],
  3255. ... [30, 'low'],
  3256. ... [35.1, 'medium']],
  3257. ... columns=['temp_celsius', 'windspeed'],
  3258. ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13',
  3259. ... '2014-02-15']))
  3260. >>> df2
  3261. temp_celsius windspeed
  3262. 2014-02-12 28.0 low
  3263. 2014-02-13 30.0 low
  3264. 2014-02-15 35.1 medium
  3265. >>> df2.reindex_like(df1)
  3266. temp_celsius temp_fahrenheit windspeed
  3267. 2014-02-12 28.0 NaN low
  3268. 2014-02-13 30.0 NaN low
  3269. 2014-02-14 NaN NaN NaN
  3270. 2014-02-15 35.1 NaN medium
  3271. """
  3272. d = other._construct_axes_dict(
  3273. axes=self._AXIS_ORDERS,
  3274. method=method,
  3275. copy=copy,
  3276. limit=limit,
  3277. tolerance=tolerance,
  3278. )
  3279. return self.reindex(**d)
  3280. def drop(
  3281. self,
  3282. labels=None,
  3283. axis=0,
  3284. index=None,
  3285. columns=None,
  3286. level=None,
  3287. inplace: bool_t = False,
  3288. errors: str = "raise",
  3289. ):
  3290. inplace = validate_bool_kwarg(inplace, "inplace")
  3291. if labels is not None:
  3292. if index is not None or columns is not None:
  3293. raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
  3294. axis_name = self._get_axis_name(axis)
  3295. axes = {axis_name: labels}
  3296. elif index is not None or columns is not None:
  3297. axes, _ = self._construct_axes_from_arguments((index, columns), {})
  3298. else:
  3299. raise ValueError(
  3300. "Need to specify at least one of 'labels', 'index' or 'columns'"
  3301. )
  3302. obj = self
  3303. for axis, labels in axes.items():
  3304. if labels is not None:
  3305. obj = obj._drop_axis(labels, axis, level=level, errors=errors)
  3306. if inplace:
  3307. self._update_inplace(obj)
  3308. else:
  3309. return obj
  3310. def _drop_axis(
  3311. self: FrameOrSeries, labels, axis, level=None, errors: str = "raise"
  3312. ) -> FrameOrSeries:
  3313. """
  3314. Drop labels from specified axis. Used in the ``drop`` method
  3315. internally.
  3316. Parameters
  3317. ----------
  3318. labels : single label or list-like
  3319. axis : int or axis name
  3320. level : int or level name, default None
  3321. For MultiIndex
  3322. errors : {'ignore', 'raise'}, default 'raise'
  3323. If 'ignore', suppress error and existing labels are dropped.
  3324. """
  3325. axis = self._get_axis_number(axis)
  3326. axis_name = self._get_axis_name(axis)
  3327. axis = self._get_axis(axis)
  3328. if axis.is_unique:
  3329. if level is not None:
  3330. if not isinstance(axis, MultiIndex):
  3331. raise AssertionError("axis must be a MultiIndex")
  3332. new_axis = axis.drop(labels, level=level, errors=errors)
  3333. else:
  3334. new_axis = axis.drop(labels, errors=errors)
  3335. result = self.reindex(**{axis_name: new_axis})
  3336. # Case for non-unique axis
  3337. else:
  3338. labels = ensure_object(com.index_labels_to_array(labels))
  3339. if level is not None:
  3340. if not isinstance(axis, MultiIndex):
  3341. raise AssertionError("axis must be a MultiIndex")
  3342. indexer = ~axis.get_level_values(level).isin(labels)
  3343. # GH 18561 MultiIndex.drop should raise if label is absent
  3344. if errors == "raise" and indexer.all():
  3345. raise KeyError(f"{labels} not found in axis")
  3346. else:
  3347. indexer = ~axis.isin(labels)
  3348. # Check if label doesn't exist along axis
  3349. labels_missing = (axis.get_indexer_for(labels) == -1).any()
  3350. if errors == "raise" and labels_missing:
  3351. raise KeyError(f"{labels} not found in axis")
  3352. slicer = [slice(None)] * self.ndim
  3353. slicer[self._get_axis_number(axis_name)] = indexer
  3354. result = self.loc[tuple(slicer)]
  3355. return result
  3356. def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None:
  3357. """
  3358. Replace self internals with result.
  3359. Parameters
  3360. ----------
  3361. verify_is_copy : bool, default True
  3362. Provide is_copy checks.
  3363. """
  3364. # NOTE: This does *not* call __finalize__ and that's an explicit
  3365. # decision that we may revisit in the future.
  3366. self._reset_cache()
  3367. self._clear_item_cache()
  3368. self._data = getattr(result, "_data", result)
  3369. self._maybe_update_cacher(verify_is_copy=verify_is_copy)
  3370. def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries:
  3371. """
  3372. Prefix labels with string `prefix`.
  3373. For Series, the row labels are prefixed.
  3374. For DataFrame, the column labels are prefixed.
  3375. Parameters
  3376. ----------
  3377. prefix : str
  3378. The string to add before each label.
  3379. Returns
  3380. -------
  3381. Series or DataFrame
  3382. New Series or DataFrame with updated labels.
  3383. See Also
  3384. --------
  3385. Series.add_suffix: Suffix row labels with string `suffix`.
  3386. DataFrame.add_suffix: Suffix column labels with string `suffix`.
  3387. Examples
  3388. --------
  3389. >>> s = pd.Series([1, 2, 3, 4])
  3390. >>> s
  3391. 0 1
  3392. 1 2
  3393. 2 3
  3394. 3 4
  3395. dtype: int64
  3396. >>> s.add_prefix('item_')
  3397. item_0 1
  3398. item_1 2
  3399. item_2 3
  3400. item_3 4
  3401. dtype: int64
  3402. >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
  3403. >>> df
  3404. A B
  3405. 0 1 3
  3406. 1 2 4
  3407. 2 3 5
  3408. 3 4 6
  3409. >>> df.add_prefix('col_')
  3410. col_A col_B
  3411. 0 1 3
  3412. 1 2 4
  3413. 2 3 5
  3414. 3 4 6
  3415. """
  3416. f = functools.partial("{prefix}{}".format, prefix=prefix)
  3417. mapper = {self._info_axis_name: f}
  3418. return self.rename(**mapper) # type: ignore
  3419. def add_suffix(self: FrameOrSeries, suffix: str) -> FrameOrSeries:
  3420. """
  3421. Suffix labels with string `suffix`.
  3422. For Series, the row labels are suffixed.
  3423. For DataFrame, the column labels are suffixed.
  3424. Parameters
  3425. ----------
  3426. suffix : str
  3427. The string to add after each label.
  3428. Returns
  3429. -------
  3430. Series or DataFrame
  3431. New Series or DataFrame with updated labels.
  3432. See Also
  3433. --------
  3434. Series.add_prefix: Prefix row labels with string `prefix`.
  3435. DataFrame.add_prefix: Prefix column labels with string `prefix`.
  3436. Examples
  3437. --------
  3438. >>> s = pd.Series([1, 2, 3, 4])
  3439. >>> s
  3440. 0 1
  3441. 1 2
  3442. 2 3
  3443. 3 4
  3444. dtype: int64
  3445. >>> s.add_suffix('_item')
  3446. 0_item 1
  3447. 1_item 2
  3448. 2_item 3
  3449. 3_item 4
  3450. dtype: int64
  3451. >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
  3452. >>> df
  3453. A B
  3454. 0 1 3
  3455. 1 2 4
  3456. 2 3 5
  3457. 3 4 6
  3458. >>> df.add_suffix('_col')
  3459. A_col B_col
  3460. 0 1 3
  3461. 1 2 4
  3462. 2 3 5
  3463. 3 4 6
  3464. """
  3465. f = functools.partial("{}{suffix}".format, suffix=suffix)
  3466. mapper = {self._info_axis_name: f}
  3467. return self.rename(**mapper) # type: ignore
  3468. def sort_values(
  3469. self,
  3470. by=None,
  3471. axis=0,
  3472. ascending=True,
  3473. inplace: bool_t = False,
  3474. kind: str = "quicksort",
  3475. na_position: str = "last",
  3476. ignore_index: bool_t = False,
  3477. ):
  3478. """
  3479. Sort by the values along either axis.
  3480. Parameters
  3481. ----------%(optional_by)s
  3482. axis : %(axes_single_arg)s, default 0
  3483. Axis to be sorted.
  3484. ascending : bool or list of bool, default True
  3485. Sort ascending vs. descending. Specify list for multiple sort
  3486. orders. If this is a list of bools, must match the length of
  3487. the by.
  3488. inplace : bool, default False
  3489. If True, perform operation in-place.
  3490. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
  3491. Choice of sorting algorithm. See also ndarray.np.sort for more
  3492. information. `mergesort` is the only stable algorithm. For
  3493. DataFrames, this option is only applied when sorting on a single
  3494. column or label.
  3495. na_position : {'first', 'last'}, default 'last'
  3496. Puts NaNs at the beginning if `first`; `last` puts NaNs at the
  3497. end.
  3498. ignore_index : bool, default False
  3499. If True, the resulting axis will be labeled 0, 1, …, n - 1.
  3500. .. versionadded:: 1.0.0
  3501. Returns
  3502. -------
  3503. sorted_obj : DataFrame or None
  3504. DataFrame with sorted values if inplace=False, None otherwise.
  3505. Examples
  3506. --------
  3507. >>> df = pd.DataFrame({
  3508. ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'],
  3509. ... 'col2': [2, 1, 9, 8, 7, 4],
  3510. ... 'col3': [0, 1, 9, 4, 2, 3],
  3511. ... })
  3512. >>> df
  3513. col1 col2 col3
  3514. 0 A 2 0
  3515. 1 A 1 1
  3516. 2 B 9 9
  3517. 3 NaN 8 4
  3518. 4 D 7 2
  3519. 5 C 4 3
  3520. Sort by col1
  3521. >>> df.sort_values(by=['col1'])
  3522. col1 col2 col3
  3523. 0 A 2 0
  3524. 1 A 1 1
  3525. 2 B 9 9
  3526. 5 C 4 3
  3527. 4 D 7 2
  3528. 3 NaN 8 4
  3529. Sort by multiple columns
  3530. >>> df.sort_values(by=['col1', 'col2'])
  3531. col1 col2 col3
  3532. 1 A 1 1
  3533. 0 A 2 0
  3534. 2 B 9 9
  3535. 5 C 4 3
  3536. 4 D 7 2
  3537. 3 NaN 8 4
  3538. Sort Descending
  3539. >>> df.sort_values(by='col1', ascending=False)
  3540. col1 col2 col3
  3541. 4 D 7 2
  3542. 5 C 4 3
  3543. 2 B 9 9
  3544. 0 A 2 0
  3545. 1 A 1 1
  3546. 3 NaN 8 4
  3547. Putting NAs first
  3548. >>> df.sort_values(by='col1', ascending=False, na_position='first')
  3549. col1 col2 col3
  3550. 3 NaN 8 4
  3551. 4 D 7 2
  3552. 5 C 4 3
  3553. 2 B 9 9
  3554. 0 A 2 0
  3555. 1 A 1 1
  3556. """
  3557. raise AbstractMethodError(self)
  3558. def sort_index(
  3559. self,
  3560. axis=0,
  3561. level=None,
  3562. ascending: bool_t = True,
  3563. inplace: bool_t = False,
  3564. kind: str = "quicksort",
  3565. na_position: str = "last",
  3566. sort_remaining: bool_t = True,
  3567. ignore_index: bool_t = False,
  3568. ):
  3569. """
  3570. Sort object by labels (along an axis).
  3571. Parameters
  3572. ----------
  3573. axis : {0 or 'index', 1 or 'columns'}, default 0
  3574. The axis along which to sort. The value 0 identifies the rows,
  3575. and 1 identifies the columns.
  3576. level : int or level name or list of ints or list of level names
  3577. If not None, sort on values in specified index level(s).
  3578. ascending : bool, default True
  3579. Sort ascending vs. descending.
  3580. inplace : bool, default False
  3581. If True, perform operation in-place.
  3582. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
  3583. Choice of sorting algorithm. See also ndarray.np.sort for more
  3584. information. `mergesort` is the only stable algorithm. For
  3585. DataFrames, this option is only applied when sorting on a single
  3586. column or label.
  3587. na_position : {'first', 'last'}, default 'last'
  3588. Puts NaNs at the beginning if `first`; `last` puts NaNs at the end.
  3589. Not implemented for MultiIndex.
  3590. sort_remaining : bool, default True
  3591. If True and sorting by level and index is multilevel, sort by other
  3592. levels too (in order) after sorting by specified level.
  3593. ignore_index : bool, default False
  3594. If True, the resulting axis will be labeled 0, 1, …, n - 1.
  3595. .. versionadded:: 1.0.0
  3596. Returns
  3597. -------
  3598. sorted_obj : DataFrame or None
  3599. DataFrame with sorted index if inplace=False, None otherwise.
  3600. """
  3601. inplace = validate_bool_kwarg(inplace, "inplace")
  3602. axis = self._get_axis_number(axis)
  3603. axis_name = self._get_axis_name(axis)
  3604. labels = self._get_axis(axis)
  3605. if level is not None:
  3606. raise NotImplementedError("level is not implemented")
  3607. if inplace:
  3608. raise NotImplementedError("inplace is not implemented")
  3609. sort_index = labels.argsort()
  3610. if not ascending:
  3611. sort_index = sort_index[::-1]
  3612. new_axis = labels.take(sort_index)
  3613. return self.reindex(**{axis_name: new_axis})
  3614. def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries:
  3615. """
  3616. Conform %(klass)s to new index with optional filling logic.
  3617. Places NA/NaN in locations having no value in the previous index. A new object
  3618. is produced unless the new index is equivalent to the current one and
  3619. ``copy=False``.
  3620. Parameters
  3621. ----------
  3622. %(optional_labels)s
  3623. %(axes)s : array-like, optional
  3624. New labels / index to conform to, should be specified using
  3625. keywords. Preferably an Index object to avoid duplicating data.
  3626. %(optional_axis)s
  3627. method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
  3628. Method to use for filling holes in reindexed DataFrame.
  3629. Please note: this is only applicable to DataFrames/Series with a
  3630. monotonically increasing/decreasing index.
  3631. * None (default): don't fill gaps
  3632. * pad / ffill: Propagate last valid observation forward to next
  3633. valid.
  3634. * backfill / bfill: Use next valid observation to fill gap.
  3635. * nearest: Use nearest valid observations to fill gap.
  3636. copy : bool, default True
  3637. Return a new object, even if the passed indexes are the same.
  3638. level : int or name
  3639. Broadcast across a level, matching Index values on the
  3640. passed MultiIndex level.
  3641. fill_value : scalar, default np.NaN
  3642. Value to use for missing values. Defaults to NaN, but can be any
  3643. "compatible" value.
  3644. limit : int, default None
  3645. Maximum number of consecutive elements to forward or backward fill.
  3646. tolerance : optional
  3647. Maximum distance between original and new labels for inexact
  3648. matches. The values of the index at the matching locations most
  3649. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  3650. Tolerance may be a scalar value, which applies the same tolerance
  3651. to all values, or list-like, which applies variable tolerance per
  3652. element. List-like includes list, tuple, array, Series, and must be
  3653. the same size as the index and its dtype must exactly match the
  3654. index's type.
  3655. .. versionadded:: 0.21.0 (list-like tolerance)
  3656. Returns
  3657. -------
  3658. %(klass)s with changed index.
  3659. See Also
  3660. --------
  3661. DataFrame.set_index : Set row labels.
  3662. DataFrame.reset_index : Remove row labels or move them to new columns.
  3663. DataFrame.reindex_like : Change to same indices as other DataFrame.
  3664. Examples
  3665. --------
  3666. ``DataFrame.reindex`` supports two calling conventions
  3667. * ``(index=index_labels, columns=column_labels, ...)``
  3668. * ``(labels, axis={'index', 'columns'}, ...)``
  3669. We *highly* recommend using keyword arguments to clarify your
  3670. intent.
  3671. Create a dataframe with some fictional data.
  3672. >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']
  3673. >>> df = pd.DataFrame({'http_status': [200, 200, 404, 404, 301],
  3674. ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},
  3675. ... index=index)
  3676. >>> df
  3677. http_status response_time
  3678. Firefox 200 0.04
  3679. Chrome 200 0.02
  3680. Safari 404 0.07
  3681. IE10 404 0.08
  3682. Konqueror 301 1.00
  3683. Create a new index and reindex the dataframe. By default
  3684. values in the new index that do not have corresponding
  3685. records in the dataframe are assigned ``NaN``.
  3686. >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10',
  3687. ... 'Chrome']
  3688. >>> df.reindex(new_index)
  3689. http_status response_time
  3690. Safari 404.0 0.07
  3691. Iceweasel NaN NaN
  3692. Comodo Dragon NaN NaN
  3693. IE10 404.0 0.08
  3694. Chrome 200.0 0.02
  3695. We can fill in the missing values by passing a value to
  3696. the keyword ``fill_value``. Because the index is not monotonically
  3697. increasing or decreasing, we cannot use arguments to the keyword
  3698. ``method`` to fill the ``NaN`` values.
  3699. >>> df.reindex(new_index, fill_value=0)
  3700. http_status response_time
  3701. Safari 404 0.07
  3702. Iceweasel 0 0.00
  3703. Comodo Dragon 0 0.00
  3704. IE10 404 0.08
  3705. Chrome 200 0.02
  3706. >>> df.reindex(new_index, fill_value='missing')
  3707. http_status response_time
  3708. Safari 404 0.07
  3709. Iceweasel missing missing
  3710. Comodo Dragon missing missing
  3711. IE10 404 0.08
  3712. Chrome 200 0.02
  3713. We can also reindex the columns.
  3714. >>> df.reindex(columns=['http_status', 'user_agent'])
  3715. http_status user_agent
  3716. Firefox 200 NaN
  3717. Chrome 200 NaN
  3718. Safari 404 NaN
  3719. IE10 404 NaN
  3720. Konqueror 301 NaN
  3721. Or we can use "axis-style" keyword arguments
  3722. >>> df.reindex(['http_status', 'user_agent'], axis="columns")
  3723. http_status user_agent
  3724. Firefox 200 NaN
  3725. Chrome 200 NaN
  3726. Safari 404 NaN
  3727. IE10 404 NaN
  3728. Konqueror 301 NaN
  3729. To further illustrate the filling functionality in
  3730. ``reindex``, we will create a dataframe with a
  3731. monotonically increasing index (for example, a sequence
  3732. of dates).
  3733. >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D')
  3734. >>> df2 = pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]},
  3735. ... index=date_index)
  3736. >>> df2
  3737. prices
  3738. 2010-01-01 100.0
  3739. 2010-01-02 101.0
  3740. 2010-01-03 NaN
  3741. 2010-01-04 100.0
  3742. 2010-01-05 89.0
  3743. 2010-01-06 88.0
  3744. Suppose we decide to expand the dataframe to cover a wider
  3745. date range.
  3746. >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')
  3747. >>> df2.reindex(date_index2)
  3748. prices
  3749. 2009-12-29 NaN
  3750. 2009-12-30 NaN
  3751. 2009-12-31 NaN
  3752. 2010-01-01 100.0
  3753. 2010-01-02 101.0
  3754. 2010-01-03 NaN
  3755. 2010-01-04 100.0
  3756. 2010-01-05 89.0
  3757. 2010-01-06 88.0
  3758. 2010-01-07 NaN
  3759. The index entries that did not have a value in the original data frame
  3760. (for example, '2009-12-29') are by default filled with ``NaN``.
  3761. If desired, we can fill in the missing values using one of several
  3762. options.
  3763. For example, to back-propagate the last valid value to fill the ``NaN``
  3764. values, pass ``bfill`` as an argument to the ``method`` keyword.
  3765. >>> df2.reindex(date_index2, method='bfill')
  3766. prices
  3767. 2009-12-29 100.0
  3768. 2009-12-30 100.0
  3769. 2009-12-31 100.0
  3770. 2010-01-01 100.0
  3771. 2010-01-02 101.0
  3772. 2010-01-03 NaN
  3773. 2010-01-04 100.0
  3774. 2010-01-05 89.0
  3775. 2010-01-06 88.0
  3776. 2010-01-07 NaN
  3777. Please note that the ``NaN`` value present in the original dataframe
  3778. (at index value 2010-01-03) will not be filled by any of the
  3779. value propagation schemes. This is because filling while reindexing
  3780. does not look at dataframe values, but only compares the original and
  3781. desired indexes. If you do want to fill in the ``NaN`` values present
  3782. in the original dataframe, use the ``fillna()`` method.
  3783. See the :ref:`user guide <basics.reindexing>` for more.
  3784. """
  3785. # TODO: Decide if we care about having different examples for different
  3786. # kinds
  3787. # construct the args
  3788. axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
  3789. method = missing.clean_reindex_fill_method(kwargs.pop("method", None))
  3790. level = kwargs.pop("level", None)
  3791. copy = kwargs.pop("copy", True)
  3792. limit = kwargs.pop("limit", None)
  3793. tolerance = kwargs.pop("tolerance", None)
  3794. fill_value = kwargs.pop("fill_value", None)
  3795. # Series.reindex doesn't use / need the axis kwarg
  3796. # We pop and ignore it here, to make writing Series/Frame generic code
  3797. # easier
  3798. kwargs.pop("axis", None)
  3799. if kwargs:
  3800. raise TypeError(
  3801. "reindex() got an unexpected keyword "
  3802. f'argument "{list(kwargs.keys())[0]}"'
  3803. )
  3804. self._consolidate_inplace()
  3805. # if all axes that are requested to reindex are equal, then only copy
  3806. # if indicated must have index names equal here as well as values
  3807. if all(
  3808. self._get_axis(axis).identical(ax)
  3809. for axis, ax in axes.items()
  3810. if ax is not None
  3811. ):
  3812. if copy:
  3813. return self.copy()
  3814. return self
  3815. # check if we are a multi reindex
  3816. if self._needs_reindex_multi(axes, method, level):
  3817. return self._reindex_multi(axes, copy, fill_value)
  3818. # perform the reindex on the axes
  3819. return self._reindex_axes(
  3820. axes, level, limit, tolerance, method, fill_value, copy
  3821. ).__finalize__(self)
  3822. def _reindex_axes(
  3823. self: FrameOrSeries, axes, level, limit, tolerance, method, fill_value, copy
  3824. ) -> FrameOrSeries:
  3825. """Perform the reindex for all the axes."""
  3826. obj = self
  3827. for a in self._AXIS_ORDERS:
  3828. labels = axes[a]
  3829. if labels is None:
  3830. continue
  3831. ax = self._get_axis(a)
  3832. new_index, indexer = ax.reindex(
  3833. labels, level=level, limit=limit, tolerance=tolerance, method=method
  3834. )
  3835. axis = self._get_axis_number(a)
  3836. obj = obj._reindex_with_indexers(
  3837. {axis: [new_index, indexer]},
  3838. fill_value=fill_value,
  3839. copy=copy,
  3840. allow_dups=False,
  3841. )
  3842. return obj
  3843. def _needs_reindex_multi(self, axes, method, level) -> bool_t:
  3844. """Check if we do need a multi reindex."""
  3845. return (
  3846. (com.count_not_none(*axes.values()) == self._AXIS_LEN)
  3847. and method is None
  3848. and level is None
  3849. and not self._is_mixed_type
  3850. )
  3851. def _reindex_multi(self, axes, copy, fill_value):
  3852. raise AbstractMethodError(self)
  3853. def _reindex_with_indexers(
  3854. self: FrameOrSeries,
  3855. reindexers,
  3856. fill_value=None,
  3857. copy: bool_t = False,
  3858. allow_dups: bool_t = False,
  3859. ) -> FrameOrSeries:
  3860. """allow_dups indicates an internal call here """
  3861. # reindex doing multiple operations on different axes if indicated
  3862. new_data = self._data
  3863. for axis in sorted(reindexers.keys()):
  3864. index, indexer = reindexers[axis]
  3865. baxis = self._get_block_manager_axis(axis)
  3866. if index is None:
  3867. continue
  3868. index = ensure_index(index)
  3869. if indexer is not None:
  3870. indexer = ensure_int64(indexer)
  3871. # TODO: speed up on homogeneous DataFrame objects
  3872. new_data = new_data.reindex_indexer(
  3873. index,
  3874. indexer,
  3875. axis=baxis,
  3876. fill_value=fill_value,
  3877. allow_dups=allow_dups,
  3878. copy=copy,
  3879. )
  3880. if copy and new_data is self._data:
  3881. new_data = new_data.copy()
  3882. return self._constructor(new_data).__finalize__(self)
  3883. def filter(
  3884. self: FrameOrSeries,
  3885. items=None,
  3886. like: Optional[str] = None,
  3887. regex: Optional[str] = None,
  3888. axis=None,
  3889. ) -> FrameOrSeries:
  3890. """
  3891. Subset the dataframe rows or columns according to the specified index labels.
  3892. Note that this routine does not filter a dataframe on its
  3893. contents. The filter is applied to the labels of the index.
  3894. Parameters
  3895. ----------
  3896. items : list-like
  3897. Keep labels from axis which are in items.
  3898. like : str
  3899. Keep labels from axis for which "like in label == True".
  3900. regex : str (regular expression)
  3901. Keep labels from axis for which re.search(regex, label) == True.
  3902. axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
  3903. The axis to filter on, expressed either as an index (int)
  3904. or axis name (str). By default this is the info axis,
  3905. 'index' for Series, 'columns' for DataFrame.
  3906. Returns
  3907. -------
  3908. same type as input object
  3909. See Also
  3910. --------
  3911. DataFrame.loc
  3912. Notes
  3913. -----
  3914. The ``items``, ``like``, and ``regex`` parameters are
  3915. enforced to be mutually exclusive.
  3916. ``axis`` defaults to the info axis that is used when indexing
  3917. with ``[]``.
  3918. Examples
  3919. --------
  3920. >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),
  3921. ... index=['mouse', 'rabbit'],
  3922. ... columns=['one', 'two', 'three'])
  3923. >>> # select columns by name
  3924. >>> df.filter(items=['one', 'three'])
  3925. one three
  3926. mouse 1 3
  3927. rabbit 4 6
  3928. >>> # select columns by regular expression
  3929. >>> df.filter(regex='e$', axis=1)
  3930. one three
  3931. mouse 1 3
  3932. rabbit 4 6
  3933. >>> # select rows containing 'bbi'
  3934. >>> df.filter(like='bbi', axis=0)
  3935. one two three
  3936. rabbit 4 5 6
  3937. """
  3938. nkw = com.count_not_none(items, like, regex)
  3939. if nkw > 1:
  3940. raise TypeError(
  3941. "Keyword arguments `items`, `like`, or `regex` "
  3942. "are mutually exclusive"
  3943. )
  3944. if axis is None:
  3945. axis = self._info_axis_name
  3946. labels = self._get_axis(axis)
  3947. if items is not None:
  3948. name = self._get_axis_name(axis)
  3949. return self.reindex(**{name: [r for r in items if r in labels]})
  3950. elif like:
  3951. def f(x):
  3952. return like in ensure_str(x)
  3953. values = labels.map(f)
  3954. return self.loc(axis=axis)[values]
  3955. elif regex:
  3956. def f(x):
  3957. return matcher.search(ensure_str(x)) is not None
  3958. matcher = re.compile(regex)
  3959. values = labels.map(f)
  3960. return self.loc(axis=axis)[values]
  3961. else:
  3962. raise TypeError("Must pass either `items`, `like`, or `regex`")
  3963. def head(self: FrameOrSeries, n: int = 5) -> FrameOrSeries:
  3964. """
  3965. Return the first `n` rows.
  3966. This function returns the first `n` rows for the object based
  3967. on position. It is useful for quickly testing if your object
  3968. has the right type of data in it.
  3969. For negative values of `n`, this function returns all rows except
  3970. the last `n` rows, equivalent to ``df[:-n]``.
  3971. Parameters
  3972. ----------
  3973. n : int, default 5
  3974. Number of rows to select.
  3975. Returns
  3976. -------
  3977. same type as caller
  3978. The first `n` rows of the caller object.
  3979. See Also
  3980. --------
  3981. DataFrame.tail: Returns the last `n` rows.
  3982. Examples
  3983. --------
  3984. >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
  3985. ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})
  3986. >>> df
  3987. animal
  3988. 0 alligator
  3989. 1 bee
  3990. 2 falcon
  3991. 3 lion
  3992. 4 monkey
  3993. 5 parrot
  3994. 6 shark
  3995. 7 whale
  3996. 8 zebra
  3997. Viewing the first 5 lines
  3998. >>> df.head()
  3999. animal
  4000. 0 alligator
  4001. 1 bee
  4002. 2 falcon
  4003. 3 lion
  4004. 4 monkey
  4005. Viewing the first `n` lines (three in this case)
  4006. >>> df.head(3)
  4007. animal
  4008. 0 alligator
  4009. 1 bee
  4010. 2 falcon
  4011. For negative values of `n`
  4012. >>> df.head(-3)
  4013. animal
  4014. 0 alligator
  4015. 1 bee
  4016. 2 falcon
  4017. 3 lion
  4018. 4 monkey
  4019. 5 parrot
  4020. """
  4021. return self.iloc[:n]
  4022. def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries:
  4023. """
  4024. Return the last `n` rows.
  4025. This function returns last `n` rows from the object based on
  4026. position. It is useful for quickly verifying data, for example,
  4027. after sorting or appending rows.
  4028. For negative values of `n`, this function returns all rows except
  4029. the first `n` rows, equivalent to ``df[n:]``.
  4030. Parameters
  4031. ----------
  4032. n : int, default 5
  4033. Number of rows to select.
  4034. Returns
  4035. -------
  4036. type of caller
  4037. The last `n` rows of the caller object.
  4038. See Also
  4039. --------
  4040. DataFrame.head : The first `n` rows of the caller object.
  4041. Examples
  4042. --------
  4043. >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
  4044. ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})
  4045. >>> df
  4046. animal
  4047. 0 alligator
  4048. 1 bee
  4049. 2 falcon
  4050. 3 lion
  4051. 4 monkey
  4052. 5 parrot
  4053. 6 shark
  4054. 7 whale
  4055. 8 zebra
  4056. Viewing the last 5 lines
  4057. >>> df.tail()
  4058. animal
  4059. 4 monkey
  4060. 5 parrot
  4061. 6 shark
  4062. 7 whale
  4063. 8 zebra
  4064. Viewing the last `n` lines (three in this case)
  4065. >>> df.tail(3)
  4066. animal
  4067. 6 shark
  4068. 7 whale
  4069. 8 zebra
  4070. For negative values of `n`
  4071. >>> df.tail(-3)
  4072. animal
  4073. 3 lion
  4074. 4 monkey
  4075. 5 parrot
  4076. 6 shark
  4077. 7 whale
  4078. 8 zebra
  4079. """
  4080. if n == 0:
  4081. return self.iloc[0:0]
  4082. return self.iloc[-n:]
  4083. def sample(
  4084. self: FrameOrSeries,
  4085. n=None,
  4086. frac=None,
  4087. replace=False,
  4088. weights=None,
  4089. random_state=None,
  4090. axis=None,
  4091. ) -> FrameOrSeries:
  4092. """
  4093. Return a random sample of items from an axis of object.
  4094. You can use `random_state` for reproducibility.
  4095. Parameters
  4096. ----------
  4097. n : int, optional
  4098. Number of items from axis to return. Cannot be used with `frac`.
  4099. Default = 1 if `frac` = None.
  4100. frac : float, optional
  4101. Fraction of axis items to return. Cannot be used with `n`.
  4102. replace : bool, default False
  4103. Allow or disallow sampling of the same row more than once.
  4104. weights : str or ndarray-like, optional
  4105. Default 'None' results in equal probability weighting.
  4106. If passed a Series, will align with target object on index. Index
  4107. values in weights not found in sampled object will be ignored and
  4108. index values in sampled object not in weights will be assigned
  4109. weights of zero.
  4110. If called on a DataFrame, will accept the name of a column
  4111. when axis = 0.
  4112. Unless weights are a Series, weights must be same length as axis
  4113. being sampled.
  4114. If weights do not sum to 1, they will be normalized to sum to 1.
  4115. Missing values in the weights column will be treated as zero.
  4116. Infinite values not allowed.
  4117. random_state : int or numpy.random.RandomState, optional
  4118. Seed for the random number generator (if int), or numpy RandomState
  4119. object.
  4120. axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
  4121. Axis to sample. Accepts axis number or name. Default is stat axis
  4122. for given data type (0 for Series and DataFrames).
  4123. Returns
  4124. -------
  4125. Series or DataFrame
  4126. A new object of same type as caller containing `n` items randomly
  4127. sampled from the caller object.
  4128. See Also
  4129. --------
  4130. numpy.random.choice: Generates a random sample from a given 1-D numpy
  4131. array.
  4132. Notes
  4133. -----
  4134. If `frac` > 1, `replacement` should be set to `True`.
  4135. Examples
  4136. --------
  4137. >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
  4138. ... 'num_wings': [2, 0, 0, 0],
  4139. ... 'num_specimen_seen': [10, 2, 1, 8]},
  4140. ... index=['falcon', 'dog', 'spider', 'fish'])
  4141. >>> df
  4142. num_legs num_wings num_specimen_seen
  4143. falcon 2 2 10
  4144. dog 4 0 2
  4145. spider 8 0 1
  4146. fish 0 0 8
  4147. Extract 3 random elements from the ``Series`` ``df['num_legs']``:
  4148. Note that we use `random_state` to ensure the reproducibility of
  4149. the examples.
  4150. >>> df['num_legs'].sample(n=3, random_state=1)
  4151. fish 0
  4152. spider 8
  4153. falcon 2
  4154. Name: num_legs, dtype: int64
  4155. A random 50% sample of the ``DataFrame`` with replacement:
  4156. >>> df.sample(frac=0.5, replace=True, random_state=1)
  4157. num_legs num_wings num_specimen_seen
  4158. dog 4 0 2
  4159. fish 0 0 8
  4160. An upsample sample of the ``DataFrame`` with replacement:
  4161. Note that `replace` parameter has to be `True` for `frac` parameter > 1.
  4162. >>> df.sample(frac=2, replace=True, random_state=1)
  4163. num_legs num_wings num_specimen_seen
  4164. dog 4 0 2
  4165. fish 0 0 8
  4166. falcon 2 2 10
  4167. falcon 2 2 10
  4168. fish 0 0 8
  4169. dog 4 0 2
  4170. fish 0 0 8
  4171. dog 4 0 2
  4172. Using a DataFrame column as weights. Rows with larger value in the
  4173. `num_specimen_seen` column are more likely to be sampled.
  4174. >>> df.sample(n=2, weights='num_specimen_seen', random_state=1)
  4175. num_legs num_wings num_specimen_seen
  4176. falcon 2 2 10
  4177. fish 0 0 8
  4178. """
  4179. if axis is None:
  4180. axis = self._stat_axis_number
  4181. axis = self._get_axis_number(axis)
  4182. axis_length = self.shape[axis]
  4183. # Process random_state argument
  4184. rs = com.random_state(random_state)
  4185. # Check weights for compliance
  4186. if weights is not None:
  4187. # If a series, align with frame
  4188. if isinstance(weights, ABCSeries):
  4189. weights = weights.reindex(self.axes[axis])
  4190. # Strings acceptable if a dataframe and axis = 0
  4191. if isinstance(weights, str):
  4192. if isinstance(self, ABCDataFrame):
  4193. if axis == 0:
  4194. try:
  4195. weights = self[weights]
  4196. except KeyError:
  4197. raise KeyError(
  4198. "String passed to weights not a valid column"
  4199. )
  4200. else:
  4201. raise ValueError(
  4202. "Strings can only be passed to "
  4203. "weights when sampling from rows on "
  4204. "a DataFrame"
  4205. )
  4206. else:
  4207. raise ValueError(
  4208. "Strings cannot be passed as weights "
  4209. "when sampling from a Series."
  4210. )
  4211. weights = pd.Series(weights, dtype="float64")
  4212. if len(weights) != axis_length:
  4213. raise ValueError(
  4214. "Weights and axis to be sampled must be of same length"
  4215. )
  4216. if (weights == np.inf).any() or (weights == -np.inf).any():
  4217. raise ValueError("weight vector may not include `inf` values")
  4218. if (weights < 0).any():
  4219. raise ValueError("weight vector many not include negative values")
  4220. # If has nan, set to zero.
  4221. weights = weights.fillna(0)
  4222. # Renormalize if don't sum to 1
  4223. if weights.sum() != 1:
  4224. if weights.sum() != 0:
  4225. weights = weights / weights.sum()
  4226. else:
  4227. raise ValueError("Invalid weights: weights sum to zero")
  4228. weights = weights.values
  4229. # If no frac or n, default to n=1.
  4230. if n is None and frac is None:
  4231. n = 1
  4232. elif frac is not None and frac > 1 and not replace:
  4233. raise ValueError(
  4234. "Replace has to be set to `True` when "
  4235. "upsampling the population `frac` > 1."
  4236. )
  4237. elif n is not None and frac is None and n % 1 != 0:
  4238. raise ValueError("Only integers accepted as `n` values")
  4239. elif n is None and frac is not None:
  4240. n = int(round(frac * axis_length))
  4241. elif n is not None and frac is not None:
  4242. raise ValueError("Please enter a value for `frac` OR `n`, not both")
  4243. # Check for negative sizes
  4244. if n < 0:
  4245. raise ValueError(
  4246. "A negative number of rows requested. Please provide positive value."
  4247. )
  4248. locs = rs.choice(axis_length, size=n, replace=replace, p=weights)
  4249. return self.take(locs, axis=axis)
  4250. _shared_docs[
  4251. "pipe"
  4252. ] = r"""
  4253. Apply func(self, \*args, \*\*kwargs).
  4254. Parameters
  4255. ----------
  4256. func : function
  4257. Function to apply to the %(klass)s.
  4258. ``args``, and ``kwargs`` are passed into ``func``.
  4259. Alternatively a ``(callable, data_keyword)`` tuple where
  4260. ``data_keyword`` is a string indicating the keyword of
  4261. ``callable`` that expects the %(klass)s.
  4262. args : iterable, optional
  4263. Positional arguments passed into ``func``.
  4264. kwargs : mapping, optional
  4265. A dictionary of keyword arguments passed into ``func``.
  4266. Returns
  4267. -------
  4268. object : the return type of ``func``.
  4269. See Also
  4270. --------
  4271. DataFrame.apply
  4272. DataFrame.applymap
  4273. Series.map
  4274. Notes
  4275. -----
  4276. Use ``.pipe`` when chaining together functions that expect
  4277. Series, DataFrames or GroupBy objects. Instead of writing
  4278. >>> f(g(h(df), arg1=a), arg2=b, arg3=c)
  4279. You can write
  4280. >>> (df.pipe(h)
  4281. ... .pipe(g, arg1=a)
  4282. ... .pipe(f, arg2=b, arg3=c)
  4283. ... )
  4284. If you have a function that takes the data as (say) the second
  4285. argument, pass a tuple indicating which keyword expects the
  4286. data. For example, suppose ``f`` takes its data as ``arg2``:
  4287. >>> (df.pipe(h)
  4288. ... .pipe(g, arg1=a)
  4289. ... .pipe((f, 'arg2'), arg1=a, arg3=c)
  4290. ... )
  4291. """
  4292. @Appender(_shared_docs["pipe"] % _shared_doc_kwargs)
  4293. def pipe(self, func, *args, **kwargs):
  4294. return com.pipe(self, func, *args, **kwargs)
  4295. _shared_docs["aggregate"] = dedent(
  4296. """
  4297. Aggregate using one or more operations over the specified axis.
  4298. %(versionadded)s
  4299. Parameters
  4300. ----------
  4301. func : function, str, list or dict
  4302. Function to use for aggregating the data. If a function, must either
  4303. work when passed a %(klass)s or when passed to %(klass)s.apply.
  4304. Accepted combinations are:
  4305. - function
  4306. - string function name
  4307. - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
  4308. - dict of axis labels -> functions, function names or list of such.
  4309. %(axis)s
  4310. *args
  4311. Positional arguments to pass to `func`.
  4312. **kwargs
  4313. Keyword arguments to pass to `func`.
  4314. Returns
  4315. -------
  4316. scalar, Series or DataFrame
  4317. The return can be:
  4318. * scalar : when Series.agg is called with single function
  4319. * Series : when DataFrame.agg is called with a single function
  4320. * DataFrame : when DataFrame.agg is called with several functions
  4321. Return scalar, Series or DataFrame.
  4322. %(see_also)s
  4323. Notes
  4324. -----
  4325. `agg` is an alias for `aggregate`. Use the alias.
  4326. A passed user-defined-function will be passed a Series for evaluation.
  4327. %(examples)s"""
  4328. )
  4329. _shared_docs[
  4330. "transform"
  4331. ] = """
  4332. Call ``func`` on self producing a %(klass)s with transformed values.
  4333. Produced %(klass)s will have same axis length as self.
  4334. Parameters
  4335. ----------
  4336. func : function, str, list or dict
  4337. Function to use for transforming the data. If a function, must either
  4338. work when passed a %(klass)s or when passed to %(klass)s.apply.
  4339. Accepted combinations are:
  4340. - function
  4341. - string function name
  4342. - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']``
  4343. - dict of axis labels -> functions, function names or list of such.
  4344. %(axis)s
  4345. *args
  4346. Positional arguments to pass to `func`.
  4347. **kwargs
  4348. Keyword arguments to pass to `func`.
  4349. Returns
  4350. -------
  4351. %(klass)s
  4352. A %(klass)s that must have the same length as self.
  4353. Raises
  4354. ------
  4355. ValueError : If the returned %(klass)s has a different length than self.
  4356. See Also
  4357. --------
  4358. %(klass)s.agg : Only perform aggregating type operations.
  4359. %(klass)s.apply : Invoke function on a %(klass)s.
  4360. Examples
  4361. --------
  4362. >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)})
  4363. >>> df
  4364. A B
  4365. 0 0 1
  4366. 1 1 2
  4367. 2 2 3
  4368. >>> df.transform(lambda x: x + 1)
  4369. A B
  4370. 0 1 2
  4371. 1 2 3
  4372. 2 3 4
  4373. Even though the resulting %(klass)s must have the same length as the
  4374. input %(klass)s, it is possible to provide several input functions:
  4375. >>> s = pd.Series(range(3))
  4376. >>> s
  4377. 0 0
  4378. 1 1
  4379. 2 2
  4380. dtype: int64
  4381. >>> s.transform([np.sqrt, np.exp])
  4382. sqrt exp
  4383. 0 0.000000 1.000000
  4384. 1 1.000000 2.718282
  4385. 2 1.414214 7.389056
  4386. """
  4387. # ----------------------------------------------------------------------
  4388. # Attribute access
  4389. def __finalize__(
  4390. self: FrameOrSeries, other, method=None, **kwargs
  4391. ) -> FrameOrSeries:
  4392. """
  4393. Propagate metadata from other to self.
  4394. Parameters
  4395. ----------
  4396. other : the object from which to get the attributes that we are going
  4397. to propagate
  4398. method : optional, a passed method name ; possibly to take different
  4399. types of propagation actions based on this
  4400. """
  4401. if isinstance(other, NDFrame):
  4402. for name in other.attrs:
  4403. self.attrs[name] = other.attrs[name]
  4404. # For subclasses using _metadata.
  4405. for name in self._metadata:
  4406. object.__setattr__(self, name, getattr(other, name, None))
  4407. return self
  4408. def __getattr__(self, name: str):
  4409. """After regular attribute access, try looking up the name
  4410. This allows simpler access to columns for interactive use.
  4411. """
  4412. # Note: obj.x will always call obj.__getattribute__('x') prior to
  4413. # calling obj.__getattr__('x').
  4414. if (
  4415. name in self._internal_names_set
  4416. or name in self._metadata
  4417. or name in self._accessors
  4418. ):
  4419. return object.__getattribute__(self, name)
  4420. else:
  4421. if self._info_axis._can_hold_identifiers_and_holds_name(name):
  4422. return self[name]
  4423. return object.__getattribute__(self, name)
  4424. def __setattr__(self, name: str, value) -> None:
  4425. """After regular attribute access, try setting the name
  4426. This allows simpler access to columns for interactive use.
  4427. """
  4428. # first try regular attribute access via __getattribute__, so that
  4429. # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
  4430. # the same attribute.
  4431. try:
  4432. object.__getattribute__(self, name)
  4433. return object.__setattr__(self, name, value)
  4434. except AttributeError:
  4435. pass
  4436. # if this fails, go on to more involved attribute setting
  4437. # (note that this matches __getattr__, above).
  4438. if name in self._internal_names_set:
  4439. object.__setattr__(self, name, value)
  4440. elif name in self._metadata:
  4441. object.__setattr__(self, name, value)
  4442. else:
  4443. try:
  4444. existing = getattr(self, name)
  4445. if isinstance(existing, Index):
  4446. object.__setattr__(self, name, value)
  4447. elif name in self._info_axis:
  4448. self[name] = value
  4449. else:
  4450. object.__setattr__(self, name, value)
  4451. except (AttributeError, TypeError):
  4452. if isinstance(self, ABCDataFrame) and (is_list_like(value)):
  4453. warnings.warn(
  4454. "Pandas doesn't allow columns to be "
  4455. "created via a new attribute name - see "
  4456. "https://pandas.pydata.org/pandas-docs/"
  4457. "stable/indexing.html#attribute-access",
  4458. stacklevel=2,
  4459. )
  4460. object.__setattr__(self, name, value)
  4461. def _dir_additions(self):
  4462. """ add the string-like attributes from the info_axis.
  4463. If info_axis is a MultiIndex, it's first level values are used.
  4464. """
  4465. additions = {
  4466. c
  4467. for c in self._info_axis.unique(level=0)[:100]
  4468. if isinstance(c, str) and c.isidentifier()
  4469. }
  4470. return super()._dir_additions().union(additions)
  4471. # ----------------------------------------------------------------------
  4472. # Consolidation of internals
  4473. def _protect_consolidate(self, f):
  4474. """Consolidate _data -- if the blocks have changed, then clear the
  4475. cache
  4476. """
  4477. blocks_before = len(self._data.blocks)
  4478. result = f()
  4479. if len(self._data.blocks) != blocks_before:
  4480. self._clear_item_cache()
  4481. return result
  4482. def _consolidate_inplace(self) -> None:
  4483. """Consolidate data in place and return None"""
  4484. def f():
  4485. self._data = self._data.consolidate()
  4486. self._protect_consolidate(f)
  4487. def _consolidate(self, inplace: bool_t = False):
  4488. """
  4489. Compute NDFrame with "consolidated" internals (data of each dtype
  4490. grouped together in a single ndarray).
  4491. Parameters
  4492. ----------
  4493. inplace : bool, default False
  4494. If False return new object, otherwise modify existing object.
  4495. Returns
  4496. -------
  4497. consolidated : same type as caller
  4498. """
  4499. inplace = validate_bool_kwarg(inplace, "inplace")
  4500. if inplace:
  4501. self._consolidate_inplace()
  4502. else:
  4503. f = lambda: self._data.consolidate()
  4504. cons_data = self._protect_consolidate(f)
  4505. return self._constructor(cons_data).__finalize__(self)
  4506. @property
  4507. def _is_mixed_type(self):
  4508. f = lambda: self._data.is_mixed_type
  4509. return self._protect_consolidate(f)
  4510. @property
  4511. def _is_numeric_mixed_type(self):
  4512. f = lambda: self._data.is_numeric_mixed_type
  4513. return self._protect_consolidate(f)
  4514. @property
  4515. def _is_datelike_mixed_type(self):
  4516. f = lambda: self._data.is_datelike_mixed_type
  4517. return self._protect_consolidate(f)
  4518. def _check_inplace_setting(self, value) -> bool_t:
  4519. """ check whether we allow in-place setting with this type of value """
  4520. if self._is_mixed_type:
  4521. if not self._is_numeric_mixed_type:
  4522. # allow an actual np.nan thru
  4523. if is_float(value) and np.isnan(value):
  4524. return True
  4525. raise TypeError(
  4526. "Cannot do inplace boolean setting on "
  4527. "mixed-types with a non np.nan value"
  4528. )
  4529. return True
  4530. def _get_numeric_data(self):
  4531. return self._constructor(self._data.get_numeric_data()).__finalize__(self)
  4532. def _get_bool_data(self):
  4533. return self._constructor(self._data.get_bool_data()).__finalize__(self)
  4534. # ----------------------------------------------------------------------
  4535. # Internal Interface Methods
  4536. @property
  4537. def values(self) -> np.ndarray:
  4538. """
  4539. Return a Numpy representation of the DataFrame.
  4540. .. warning::
  4541. We recommend using :meth:`DataFrame.to_numpy` instead.
  4542. Only the values in the DataFrame will be returned, the axes labels
  4543. will be removed.
  4544. Returns
  4545. -------
  4546. numpy.ndarray
  4547. The values of the DataFrame.
  4548. See Also
  4549. --------
  4550. DataFrame.to_numpy : Recommended alternative to this method.
  4551. DataFrame.index : Retrieve the index labels.
  4552. DataFrame.columns : Retrieving the column names.
  4553. Notes
  4554. -----
  4555. The dtype will be a lower-common-denominator dtype (implicit
  4556. upcasting); that is to say if the dtypes (even of numeric types)
  4557. are mixed, the one that accommodates all will be chosen. Use this
  4558. with care if you are not dealing with the blocks.
  4559. e.g. If the dtypes are float16 and float32, dtype will be upcast to
  4560. float32. If dtypes are int32 and uint8, dtype will be upcast to
  4561. int32. By :func:`numpy.find_common_type` convention, mixing int64
  4562. and uint64 will result in a float64 dtype.
  4563. Examples
  4564. --------
  4565. A DataFrame where all columns are the same type (e.g., int64) results
  4566. in an array of the same type.
  4567. >>> df = pd.DataFrame({'age': [ 3, 29],
  4568. ... 'height': [94, 170],
  4569. ... 'weight': [31, 115]})
  4570. >>> df
  4571. age height weight
  4572. 0 3 94 31
  4573. 1 29 170 115
  4574. >>> df.dtypes
  4575. age int64
  4576. height int64
  4577. weight int64
  4578. dtype: object
  4579. >>> df.values
  4580. array([[ 3, 94, 31],
  4581. [ 29, 170, 115]], dtype=int64)
  4582. A DataFrame with mixed type columns(e.g., str/object, int64, float32)
  4583. results in an ndarray of the broadest type that accommodates these
  4584. mixed types (e.g., object).
  4585. >>> df2 = pd.DataFrame([('parrot', 24.0, 'second'),
  4586. ... ('lion', 80.5, 1),
  4587. ... ('monkey', np.nan, None)],
  4588. ... columns=('name', 'max_speed', 'rank'))
  4589. >>> df2.dtypes
  4590. name object
  4591. max_speed float64
  4592. rank object
  4593. dtype: object
  4594. >>> df2.values
  4595. array([['parrot', 24.0, 'second'],
  4596. ['lion', 80.5, 1],
  4597. ['monkey', nan, None]], dtype=object)
  4598. """
  4599. self._consolidate_inplace()
  4600. return self._data.as_array(transpose=self._AXIS_REVERSED)
  4601. @property
  4602. def _values(self) -> np.ndarray:
  4603. """internal implementation"""
  4604. return self.values
  4605. @property
  4606. def _get_values(self) -> np.ndarray:
  4607. # compat
  4608. return self.values
  4609. def _internal_get_values(self) -> np.ndarray:
  4610. """
  4611. Return an ndarray after converting sparse values to dense.
  4612. This is the same as ``.values`` for non-sparse data. For sparse
  4613. data contained in a `SparseArray`, the data are first
  4614. converted to a dense representation.
  4615. Returns
  4616. -------
  4617. numpy.ndarray
  4618. Numpy representation of DataFrame.
  4619. See Also
  4620. --------
  4621. values : Numpy representation of DataFrame.
  4622. SparseArray : Container for sparse data.
  4623. """
  4624. return self.values
  4625. @property
  4626. def dtypes(self):
  4627. """
  4628. Return the dtypes in the DataFrame.
  4629. This returns a Series with the data type of each column.
  4630. The result's index is the original DataFrame's columns. Columns
  4631. with mixed types are stored with the ``object`` dtype. See
  4632. :ref:`the User Guide <basics.dtypes>` for more.
  4633. Returns
  4634. -------
  4635. pandas.Series
  4636. The data type of each column.
  4637. Examples
  4638. --------
  4639. >>> df = pd.DataFrame({'float': [1.0],
  4640. ... 'int': [1],
  4641. ... 'datetime': [pd.Timestamp('20180310')],
  4642. ... 'string': ['foo']})
  4643. >>> df.dtypes
  4644. float float64
  4645. int int64
  4646. datetime datetime64[ns]
  4647. string object
  4648. dtype: object
  4649. """
  4650. from pandas import Series
  4651. return Series(self._data.get_dtypes(), index=self._info_axis, dtype=np.object_)
  4652. def _to_dict_of_blocks(self, copy: bool_t = True):
  4653. """
  4654. Return a dict of dtype -> Constructor Types that
  4655. each is a homogeneous dtype.
  4656. Internal ONLY
  4657. """
  4658. return {
  4659. k: self._constructor(v).__finalize__(self)
  4660. for k, v, in self._data.to_dict(copy=copy).items()
  4661. }
  4662. def astype(
  4663. self: FrameOrSeries, dtype, copy: bool_t = True, errors: str = "raise"
  4664. ) -> FrameOrSeries:
  4665. """
  4666. Cast a pandas object to a specified dtype ``dtype``.
  4667. Parameters
  4668. ----------
  4669. dtype : data type, or dict of column name -> data type
  4670. Use a numpy.dtype or Python type to cast entire pandas object to
  4671. the same type. Alternatively, use {col: dtype, ...}, where col is a
  4672. column label and dtype is a numpy.dtype or Python type to cast one
  4673. or more of the DataFrame's columns to column-specific types.
  4674. copy : bool, default True
  4675. Return a copy when ``copy=True`` (be very careful setting
  4676. ``copy=False`` as changes to values then may propagate to other
  4677. pandas objects).
  4678. errors : {'raise', 'ignore'}, default 'raise'
  4679. Control raising of exceptions on invalid data for provided dtype.
  4680. - ``raise`` : allow exceptions to be raised
  4681. - ``ignore`` : suppress exceptions. On error return original object.
  4682. Returns
  4683. -------
  4684. casted : same type as caller
  4685. See Also
  4686. --------
  4687. to_datetime : Convert argument to datetime.
  4688. to_timedelta : Convert argument to timedelta.
  4689. to_numeric : Convert argument to a numeric type.
  4690. numpy.ndarray.astype : Cast a numpy array to a specified type.
  4691. Examples
  4692. --------
  4693. Create a DataFrame:
  4694. >>> d = {'col1': [1, 2], 'col2': [3, 4]}
  4695. >>> df = pd.DataFrame(data=d)
  4696. >>> df.dtypes
  4697. col1 int64
  4698. col2 int64
  4699. dtype: object
  4700. Cast all columns to int32:
  4701. >>> df.astype('int32').dtypes
  4702. col1 int32
  4703. col2 int32
  4704. dtype: object
  4705. Cast col1 to int32 using a dictionary:
  4706. >>> df.astype({'col1': 'int32'}).dtypes
  4707. col1 int32
  4708. col2 int64
  4709. dtype: object
  4710. Create a series:
  4711. >>> ser = pd.Series([1, 2], dtype='int32')
  4712. >>> ser
  4713. 0 1
  4714. 1 2
  4715. dtype: int32
  4716. >>> ser.astype('int64')
  4717. 0 1
  4718. 1 2
  4719. dtype: int64
  4720. Convert to categorical type:
  4721. >>> ser.astype('category')
  4722. 0 1
  4723. 1 2
  4724. dtype: category
  4725. Categories (2, int64): [1, 2]
  4726. Convert to ordered categorical type with custom ordering:
  4727. >>> cat_dtype = pd.api.types.CategoricalDtype(
  4728. ... categories=[2, 1], ordered=True)
  4729. >>> ser.astype(cat_dtype)
  4730. 0 1
  4731. 1 2
  4732. dtype: category
  4733. Categories (2, int64): [2 < 1]
  4734. Note that using ``copy=False`` and changing data on a new
  4735. pandas object may propagate changes:
  4736. >>> s1 = pd.Series([1, 2])
  4737. >>> s2 = s1.astype('int64', copy=False)
  4738. >>> s2[0] = 10
  4739. >>> s1 # note that s1[0] has changed too
  4740. 0 10
  4741. 1 2
  4742. dtype: int64
  4743. """
  4744. if is_dict_like(dtype):
  4745. if self.ndim == 1: # i.e. Series
  4746. if len(dtype) > 1 or self.name not in dtype:
  4747. raise KeyError(
  4748. "Only the Series name can be used for "
  4749. "the key in Series dtype mappings."
  4750. )
  4751. new_type = dtype[self.name]
  4752. return self.astype(new_type, copy, errors)
  4753. for col_name in dtype.keys():
  4754. if col_name not in self:
  4755. raise KeyError(
  4756. "Only a column name can be used for the "
  4757. "key in a dtype mappings argument."
  4758. )
  4759. results = []
  4760. for col_name, col in self.items():
  4761. if col_name in dtype:
  4762. results.append(
  4763. col.astype(dtype=dtype[col_name], copy=copy, errors=errors)
  4764. )
  4765. else:
  4766. results.append(col.copy() if copy else col)
  4767. elif is_extension_array_dtype(dtype) and self.ndim > 1:
  4768. # GH 18099/22869: columnwise conversion to extension dtype
  4769. # GH 24704: use iloc to handle duplicate column names
  4770. results = [
  4771. self.iloc[:, i].astype(dtype, copy=copy)
  4772. for i in range(len(self.columns))
  4773. ]
  4774. else:
  4775. # else, only a single dtype is given
  4776. new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors)
  4777. return self._constructor(new_data).__finalize__(self)
  4778. # GH 19920: retain column metadata after concat
  4779. result = pd.concat(results, axis=1, copy=False)
  4780. result.columns = self.columns
  4781. return result
  4782. def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries:
  4783. """
  4784. Make a copy of this object's indices and data.
  4785. When ``deep=True`` (default), a new object will be created with a
  4786. copy of the calling object's data and indices. Modifications to
  4787. the data or indices of the copy will not be reflected in the
  4788. original object (see notes below).
  4789. When ``deep=False``, a new object will be created without copying
  4790. the calling object's data or index (only references to the data
  4791. and index are copied). Any changes to the data of the original
  4792. will be reflected in the shallow copy (and vice versa).
  4793. Parameters
  4794. ----------
  4795. deep : bool, default True
  4796. Make a deep copy, including a copy of the data and the indices.
  4797. With ``deep=False`` neither the indices nor the data are copied.
  4798. Returns
  4799. -------
  4800. copy : Series or DataFrame
  4801. Object type matches caller.
  4802. Notes
  4803. -----
  4804. When ``deep=True``, data is copied but actual Python objects
  4805. will not be copied recursively, only the reference to the object.
  4806. This is in contrast to `copy.deepcopy` in the Standard Library,
  4807. which recursively copies object data (see examples below).
  4808. While ``Index`` objects are copied when ``deep=True``, the underlying
  4809. numpy array is not copied for performance reasons. Since ``Index`` is
  4810. immutable, the underlying data can be safely shared and a copy
  4811. is not needed.
  4812. Examples
  4813. --------
  4814. >>> s = pd.Series([1, 2], index=["a", "b"])
  4815. >>> s
  4816. a 1
  4817. b 2
  4818. dtype: int64
  4819. >>> s_copy = s.copy()
  4820. >>> s_copy
  4821. a 1
  4822. b 2
  4823. dtype: int64
  4824. **Shallow copy versus default (deep) copy:**
  4825. >>> s = pd.Series([1, 2], index=["a", "b"])
  4826. >>> deep = s.copy()
  4827. >>> shallow = s.copy(deep=False)
  4828. Shallow copy shares data and index with original.
  4829. >>> s is shallow
  4830. False
  4831. >>> s.values is shallow.values and s.index is shallow.index
  4832. True
  4833. Deep copy has own copy of data and index.
  4834. >>> s is deep
  4835. False
  4836. >>> s.values is deep.values or s.index is deep.index
  4837. False
  4838. Updates to the data shared by shallow copy and original is reflected
  4839. in both; deep copy remains unchanged.
  4840. >>> s[0] = 3
  4841. >>> shallow[1] = 4
  4842. >>> s
  4843. a 3
  4844. b 4
  4845. dtype: int64
  4846. >>> shallow
  4847. a 3
  4848. b 4
  4849. dtype: int64
  4850. >>> deep
  4851. a 1
  4852. b 2
  4853. dtype: int64
  4854. Note that when copying an object containing Python objects, a deep copy
  4855. will copy the data, but will not do so recursively. Updating a nested
  4856. data object will be reflected in the deep copy.
  4857. >>> s = pd.Series([[1, 2], [3, 4]])
  4858. >>> deep = s.copy()
  4859. >>> s[0][0] = 10
  4860. >>> s
  4861. 0 [10, 2]
  4862. 1 [3, 4]
  4863. dtype: object
  4864. >>> deep
  4865. 0 [10, 2]
  4866. 1 [3, 4]
  4867. dtype: object
  4868. """
  4869. data = self._data.copy(deep=deep)
  4870. return self._constructor(data).__finalize__(self)
  4871. def __copy__(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries:
  4872. return self.copy(deep=deep)
  4873. def __deepcopy__(self: FrameOrSeries, memo=None) -> FrameOrSeries:
  4874. """
  4875. Parameters
  4876. ----------
  4877. memo, default None
  4878. Standard signature. Unused
  4879. """
  4880. return self.copy(deep=True)
  4881. def _convert(
  4882. self: FrameOrSeries,
  4883. datetime: bool_t = False,
  4884. numeric: bool_t = False,
  4885. timedelta: bool_t = False,
  4886. coerce: bool_t = False,
  4887. copy: bool_t = True,
  4888. ) -> FrameOrSeries:
  4889. """
  4890. Attempt to infer better dtype for object columns
  4891. Parameters
  4892. ----------
  4893. datetime : bool, default False
  4894. If True, convert to date where possible.
  4895. numeric : bool, default False
  4896. If True, attempt to convert to numbers (including strings), with
  4897. unconvertible values becoming NaN.
  4898. timedelta : bool, default False
  4899. If True, convert to timedelta where possible.
  4900. coerce : bool, default False
  4901. If True, force conversion with unconvertible values converted to
  4902. nulls (NaN or NaT).
  4903. copy : bool, default True
  4904. If True, return a copy even if no copy is necessary (e.g. no
  4905. conversion was done). Note: This is meant for internal use, and
  4906. should not be confused with inplace.
  4907. Returns
  4908. -------
  4909. converted : same as input object
  4910. """
  4911. validate_bool_kwarg(datetime, "datetime")
  4912. validate_bool_kwarg(numeric, "numeric")
  4913. validate_bool_kwarg(timedelta, "timedelta")
  4914. validate_bool_kwarg(coerce, "coerce")
  4915. validate_bool_kwarg(copy, "copy")
  4916. return self._constructor(
  4917. self._data.convert(
  4918. datetime=datetime,
  4919. numeric=numeric,
  4920. timedelta=timedelta,
  4921. coerce=coerce,
  4922. copy=copy,
  4923. )
  4924. ).__finalize__(self)
  4925. def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
  4926. """
  4927. Attempt to infer better dtypes for object columns.
  4928. Attempts soft conversion of object-dtyped
  4929. columns, leaving non-object and unconvertible
  4930. columns unchanged. The inference rules are the
  4931. same as during normal Series/DataFrame construction.
  4932. .. versionadded:: 0.21.0
  4933. Returns
  4934. -------
  4935. converted : same type as input object
  4936. See Also
  4937. --------
  4938. to_datetime : Convert argument to datetime.
  4939. to_timedelta : Convert argument to timedelta.
  4940. to_numeric : Convert argument to numeric type.
  4941. convert_dtypes : Convert argument to best possible dtype.
  4942. Examples
  4943. --------
  4944. >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]})
  4945. >>> df = df.iloc[1:]
  4946. >>> df
  4947. A
  4948. 1 1
  4949. 2 2
  4950. 3 3
  4951. >>> df.dtypes
  4952. A object
  4953. dtype: object
  4954. >>> df.infer_objects().dtypes
  4955. A int64
  4956. dtype: object
  4957. """
  4958. # numeric=False necessary to only soft convert;
  4959. # python objects will still be converted to
  4960. # native numpy numeric types
  4961. return self._constructor(
  4962. self._data.convert(
  4963. datetime=True, numeric=False, timedelta=True, coerce=False, copy=True
  4964. )
  4965. ).__finalize__(self)
  4966. def convert_dtypes(
  4967. self: FrameOrSeries,
  4968. infer_objects: bool_t = True,
  4969. convert_string: bool_t = True,
  4970. convert_integer: bool_t = True,
  4971. convert_boolean: bool_t = True,
  4972. ) -> FrameOrSeries:
  4973. """
  4974. Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
  4975. .. versionadded:: 1.0.0
  4976. Parameters
  4977. ----------
  4978. infer_objects : bool, default True
  4979. Whether object dtypes should be converted to the best possible types.
  4980. convert_string : bool, default True
  4981. Whether object dtypes should be converted to ``StringDtype()``.
  4982. convert_integer : bool, default True
  4983. Whether, if possible, conversion can be done to integer extension types.
  4984. convert_boolean : bool, defaults True
  4985. Whether object dtypes should be converted to ``BooleanDtypes()``.
  4986. Returns
  4987. -------
  4988. Series or DataFrame
  4989. Copy of input object with new dtype.
  4990. See Also
  4991. --------
  4992. infer_objects : Infer dtypes of objects.
  4993. to_datetime : Convert argument to datetime.
  4994. to_timedelta : Convert argument to timedelta.
  4995. to_numeric : Convert argument to a numeric type.
  4996. Notes
  4997. -----
  4998. By default, ``convert_dtypes`` will attempt to convert a Series (or each
  4999. Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
  5000. ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is
  5001. possible to turn off individual conversions to ``StringDtype``, the integer
  5002. extension types or ``BooleanDtype``, respectively.
  5003. For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
  5004. rules as during normal Series/DataFrame construction. Then, if possible,
  5005. convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer extension
  5006. type, otherwise leave as ``object``.
  5007. If the dtype is integer, convert to an appropriate integer extension type.
  5008. If the dtype is numeric, and consists of all integers, convert to an
  5009. appropriate integer extension type.
  5010. In the future, as new dtypes are added that support ``pd.NA``, the results
  5011. of this method will change to support those new dtypes.
  5012. Examples
  5013. --------
  5014. >>> df = pd.DataFrame(
  5015. ... {
  5016. ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
  5017. ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
  5018. ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
  5019. ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
  5020. ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
  5021. ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
  5022. ... }
  5023. ... )
  5024. Start with a DataFrame with default dtypes.
  5025. >>> df
  5026. a b c d e f
  5027. 0 1 x True h 10.0 NaN
  5028. 1 2 y False i NaN 100.5
  5029. 2 3 z NaN NaN 20.0 200.0
  5030. >>> df.dtypes
  5031. a int32
  5032. b object
  5033. c object
  5034. d object
  5035. e float64
  5036. f float64
  5037. dtype: object
  5038. Convert the DataFrame to use best possible dtypes.
  5039. >>> dfn = df.convert_dtypes()
  5040. >>> dfn
  5041. a b c d e f
  5042. 0 1 x True h 10 NaN
  5043. 1 2 y False i <NA> 100.5
  5044. 2 3 z <NA> <NA> 20 200.0
  5045. >>> dfn.dtypes
  5046. a Int32
  5047. b string
  5048. c boolean
  5049. d string
  5050. e Int64
  5051. f float64
  5052. dtype: object
  5053. Start with a Series of strings and missing data represented by ``np.nan``.
  5054. >>> s = pd.Series(["a", "b", np.nan])
  5055. >>> s
  5056. 0 a
  5057. 1 b
  5058. 2 NaN
  5059. dtype: object
  5060. Obtain a Series with dtype ``StringDtype``.
  5061. >>> s.convert_dtypes()
  5062. 0 a
  5063. 1 b
  5064. 2 <NA>
  5065. dtype: string
  5066. """
  5067. if self.ndim == 1:
  5068. return self._convert_dtypes(
  5069. infer_objects, convert_string, convert_integer, convert_boolean
  5070. )
  5071. else:
  5072. results = [
  5073. col._convert_dtypes(
  5074. infer_objects, convert_string, convert_integer, convert_boolean
  5075. )
  5076. for col_name, col in self.items()
  5077. ]
  5078. result = pd.concat(results, axis=1, copy=False)
  5079. return result
  5080. # ----------------------------------------------------------------------
  5081. # Filling NA's
  5082. def fillna(
  5083. self: FrameOrSeries,
  5084. value=None,
  5085. method=None,
  5086. axis=None,
  5087. inplace: bool_t = False,
  5088. limit=None,
  5089. downcast=None,
  5090. ) -> Optional[FrameOrSeries]:
  5091. """
  5092. Fill NA/NaN values using the specified method.
  5093. Parameters
  5094. ----------
  5095. value : scalar, dict, Series, or DataFrame
  5096. Value to use to fill holes (e.g. 0), alternately a
  5097. dict/Series/DataFrame of values specifying which value to use for
  5098. each index (for a Series) or column (for a DataFrame). Values not
  5099. in the dict/Series/DataFrame will not be filled. This value cannot
  5100. be a list.
  5101. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
  5102. Method to use for filling holes in reindexed Series
  5103. pad / ffill: propagate last valid observation forward to next valid
  5104. backfill / bfill: use next valid observation to fill gap.
  5105. axis : %(axes_single_arg)s
  5106. Axis along which to fill missing values.
  5107. inplace : bool, default False
  5108. If True, fill in-place. Note: this will modify any
  5109. other views on this object (e.g., a no-copy slice for a column in a
  5110. DataFrame).
  5111. limit : int, default None
  5112. If method is specified, this is the maximum number of consecutive
  5113. NaN values to forward/backward fill. In other words, if there is
  5114. a gap with more than this number of consecutive NaNs, it will only
  5115. be partially filled. If method is not specified, this is the
  5116. maximum number of entries along the entire axis where NaNs will be
  5117. filled. Must be greater than 0 if not None.
  5118. downcast : dict, default is None
  5119. A dict of item->dtype of what to downcast if possible,
  5120. or the string 'infer' which will try to downcast to an appropriate
  5121. equal type (e.g. float64 to int64 if possible).
  5122. Returns
  5123. -------
  5124. %(klass)s or None
  5125. Object with missing values filled or None if ``inplace=True``.
  5126. See Also
  5127. --------
  5128. interpolate : Fill NaN values using interpolation.
  5129. reindex : Conform object to new index.
  5130. asfreq : Convert TimeSeries to specified frequency.
  5131. Examples
  5132. --------
  5133. >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0],
  5134. ... [3, 4, np.nan, 1],
  5135. ... [np.nan, np.nan, np.nan, 5],
  5136. ... [np.nan, 3, np.nan, 4]],
  5137. ... columns=list('ABCD'))
  5138. >>> df
  5139. A B C D
  5140. 0 NaN 2.0 NaN 0
  5141. 1 3.0 4.0 NaN 1
  5142. 2 NaN NaN NaN 5
  5143. 3 NaN 3.0 NaN 4
  5144. Replace all NaN elements with 0s.
  5145. >>> df.fillna(0)
  5146. A B C D
  5147. 0 0.0 2.0 0.0 0
  5148. 1 3.0 4.0 0.0 1
  5149. 2 0.0 0.0 0.0 5
  5150. 3 0.0 3.0 0.0 4
  5151. We can also propagate non-null values forward or backward.
  5152. >>> df.fillna(method='ffill')
  5153. A B C D
  5154. 0 NaN 2.0 NaN 0
  5155. 1 3.0 4.0 NaN 1
  5156. 2 3.0 4.0 NaN 5
  5157. 3 3.0 3.0 NaN 4
  5158. Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1,
  5159. 2, and 3 respectively.
  5160. >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
  5161. >>> df.fillna(value=values)
  5162. A B C D
  5163. 0 0.0 2.0 2.0 0
  5164. 1 3.0 4.0 2.0 1
  5165. 2 0.0 1.0 2.0 5
  5166. 3 0.0 3.0 2.0 4
  5167. Only replace the first NaN element.
  5168. >>> df.fillna(value=values, limit=1)
  5169. A B C D
  5170. 0 0.0 2.0 2.0 0
  5171. 1 3.0 4.0 NaN 1
  5172. 2 NaN 1.0 NaN 5
  5173. 3 NaN 3.0 NaN 4
  5174. """
  5175. inplace = validate_bool_kwarg(inplace, "inplace")
  5176. value, method = validate_fillna_kwargs(value, method)
  5177. self._consolidate_inplace()
  5178. # set the default here, so functions examining the signaure
  5179. # can detect if something was set (e.g. in groupby) (GH9221)
  5180. if axis is None:
  5181. axis = 0
  5182. axis = self._get_axis_number(axis)
  5183. if value is None:
  5184. if self._is_mixed_type and axis == 1:
  5185. if inplace:
  5186. raise NotImplementedError()
  5187. result = self.T.fillna(method=method, limit=limit).T
  5188. # need to downcast here because of all of the transposes
  5189. result._data = result._data.downcast()
  5190. return result
  5191. new_data = self._data.interpolate(
  5192. method=method,
  5193. axis=axis,
  5194. limit=limit,
  5195. inplace=inplace,
  5196. coerce=True,
  5197. downcast=downcast,
  5198. )
  5199. else:
  5200. if len(self._get_axis(axis)) == 0:
  5201. return self
  5202. if self.ndim == 1:
  5203. if isinstance(value, (dict, ABCSeries)):
  5204. value = create_series_with_explicit_dtype(
  5205. value, dtype_if_empty=object
  5206. )
  5207. elif not is_list_like(value):
  5208. pass
  5209. else:
  5210. raise TypeError(
  5211. '"value" parameter must be a scalar, dict '
  5212. "or Series, but you passed a "
  5213. f'"{type(value).__name__}"'
  5214. )
  5215. new_data = self._data.fillna(
  5216. value=value, limit=limit, inplace=inplace, downcast=downcast
  5217. )
  5218. elif isinstance(value, (dict, ABCSeries)):
  5219. if axis == 1:
  5220. raise NotImplementedError(
  5221. "Currently only can fill "
  5222. "with dict/Series column "
  5223. "by column"
  5224. )
  5225. result = self if inplace else self.copy()
  5226. for k, v in value.items():
  5227. if k not in result:
  5228. continue
  5229. obj = result[k]
  5230. obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
  5231. return result if not inplace else None
  5232. elif not is_list_like(value):
  5233. new_data = self._data.fillna(
  5234. value=value, limit=limit, inplace=inplace, downcast=downcast
  5235. )
  5236. elif isinstance(value, ABCDataFrame) and self.ndim == 2:
  5237. new_data = self.where(self.notna(), value)
  5238. else:
  5239. raise ValueError(f"invalid fill value with a {type(value)}")
  5240. if inplace:
  5241. self._update_inplace(new_data)
  5242. return None
  5243. else:
  5244. return self._constructor(new_data).__finalize__(self)
  5245. def ffill(
  5246. self: FrameOrSeries,
  5247. axis=None,
  5248. inplace: bool_t = False,
  5249. limit=None,
  5250. downcast=None,
  5251. ) -> Optional[FrameOrSeries]:
  5252. """
  5253. Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.
  5254. Returns
  5255. -------
  5256. %(klass)s or None
  5257. Object with missing values filled or None if ``inplace=True``.
  5258. """
  5259. return self.fillna(
  5260. method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
  5261. )
  5262. def bfill(
  5263. self: FrameOrSeries,
  5264. axis=None,
  5265. inplace: bool_t = False,
  5266. limit=None,
  5267. downcast=None,
  5268. ) -> Optional[FrameOrSeries]:
  5269. """
  5270. Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
  5271. Returns
  5272. -------
  5273. %(klass)s or None
  5274. Object with missing values filled or None if ``inplace=True``.
  5275. """
  5276. return self.fillna(
  5277. method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
  5278. )
  5279. _shared_docs[
  5280. "replace"
  5281. ] = """
  5282. Replace values given in `to_replace` with `value`.
  5283. Values of the %(klass)s are replaced with other values dynamically.
  5284. This differs from updating with ``.loc`` or ``.iloc``, which require
  5285. you to specify a location to update with some value.
  5286. Parameters
  5287. ----------
  5288. to_replace : str, regex, list, dict, Series, int, float, or None
  5289. How to find the values that will be replaced.
  5290. * numeric, str or regex:
  5291. - numeric: numeric values equal to `to_replace` will be
  5292. replaced with `value`
  5293. - str: string exactly matching `to_replace` will be replaced
  5294. with `value`
  5295. - regex: regexs matching `to_replace` will be replaced with
  5296. `value`
  5297. * list of str, regex, or numeric:
  5298. - First, if `to_replace` and `value` are both lists, they
  5299. **must** be the same length.
  5300. - Second, if ``regex=True`` then all of the strings in **both**
  5301. lists will be interpreted as regexs otherwise they will match
  5302. directly. This doesn't matter much for `value` since there
  5303. are only a few possible substitution regexes you can use.
  5304. - str, regex and numeric rules apply as above.
  5305. * dict:
  5306. - Dicts can be used to specify different replacement values
  5307. for different existing values. For example,
  5308. ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and
  5309. 'y' with 'z'. To use a dict in this way the `value`
  5310. parameter should be `None`.
  5311. - For a DataFrame a dict can specify that different values
  5312. should be replaced in different columns. For example,
  5313. ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a'
  5314. and the value 'z' in column 'b' and replaces these values
  5315. with whatever is specified in `value`. The `value` parameter
  5316. should not be ``None`` in this case. You can treat this as a
  5317. special case of passing two lists except that you are
  5318. specifying the column to search in.
  5319. - For a DataFrame nested dictionaries, e.g.,
  5320. ``{'a': {'b': np.nan}}``, are read as follows: look in column
  5321. 'a' for the value 'b' and replace it with NaN. The `value`
  5322. parameter should be ``None`` to use a nested dict in this
  5323. way. You can nest regular expressions as well. Note that
  5324. column names (the top-level dictionary keys in a nested
  5325. dictionary) **cannot** be regular expressions.
  5326. * None:
  5327. - This means that the `regex` argument must be a string,
  5328. compiled regular expression, or list, dict, ndarray or
  5329. Series of such elements. If `value` is also ``None`` then
  5330. this **must** be a nested dictionary or Series.
  5331. See the examples section for examples of each of these.
  5332. value : scalar, dict, list, str, regex, default None
  5333. Value to replace any values matching `to_replace` with.
  5334. For a DataFrame a dict of values can be used to specify which
  5335. value to use for each column (columns not in the dict will not be
  5336. filled). Regular expressions, strings and lists or dicts of such
  5337. objects are also allowed.
  5338. inplace : bool, default False
  5339. If True, in place. Note: this will modify any
  5340. other views on this object (e.g. a column from a DataFrame).
  5341. Returns the caller if this is True.
  5342. limit : int, default None
  5343. Maximum size gap to forward or backward fill.
  5344. regex : bool or same types as `to_replace`, default False
  5345. Whether to interpret `to_replace` and/or `value` as regular
  5346. expressions. If this is ``True`` then `to_replace` *must* be a
  5347. string. Alternatively, this could be a regular expression or a
  5348. list, dict, or array of regular expressions in which case
  5349. `to_replace` must be ``None``.
  5350. method : {'pad', 'ffill', 'bfill', `None`}
  5351. The method to use when for replacement, when `to_replace` is a
  5352. scalar, list or tuple and `value` is ``None``.
  5353. .. versionchanged:: 0.23.0
  5354. Added to DataFrame.
  5355. Returns
  5356. -------
  5357. %(klass)s
  5358. Object after replacement.
  5359. Raises
  5360. ------
  5361. AssertionError
  5362. * If `regex` is not a ``bool`` and `to_replace` is not
  5363. ``None``.
  5364. TypeError
  5365. * If `to_replace` is a ``dict`` and `value` is not a ``list``,
  5366. ``dict``, ``ndarray``, or ``Series``
  5367. * If `to_replace` is ``None`` and `regex` is not compilable
  5368. into a regular expression or is a list, dict, ndarray, or
  5369. Series.
  5370. * When replacing multiple ``bool`` or ``datetime64`` objects and
  5371. the arguments to `to_replace` does not match the type of the
  5372. value being replaced
  5373. ValueError
  5374. * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
  5375. `value` but they are not the same length.
  5376. See Also
  5377. --------
  5378. %(klass)s.fillna : Fill NA values.
  5379. %(klass)s.where : Replace values based on boolean condition.
  5380. Series.str.replace : Simple string replacement.
  5381. Notes
  5382. -----
  5383. * Regex substitution is performed under the hood with ``re.sub``. The
  5384. rules for substitution for ``re.sub`` are the same.
  5385. * Regular expressions will only substitute on strings, meaning you
  5386. cannot provide, for example, a regular expression matching floating
  5387. point numbers and expect the columns in your frame that have a
  5388. numeric dtype to be matched. However, if those floating point
  5389. numbers *are* strings, then you can do this.
  5390. * This method has *a lot* of options. You are encouraged to experiment
  5391. and play with this method to gain intuition about how it works.
  5392. * When dict is used as the `to_replace` value, it is like
  5393. key(s) in the dict are the to_replace part and
  5394. value(s) in the dict are the value parameter.
  5395. Examples
  5396. --------
  5397. **Scalar `to_replace` and `value`**
  5398. >>> s = pd.Series([0, 1, 2, 3, 4])
  5399. >>> s.replace(0, 5)
  5400. 0 5
  5401. 1 1
  5402. 2 2
  5403. 3 3
  5404. 4 4
  5405. dtype: int64
  5406. >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
  5407. ... 'B': [5, 6, 7, 8, 9],
  5408. ... 'C': ['a', 'b', 'c', 'd', 'e']})
  5409. >>> df.replace(0, 5)
  5410. A B C
  5411. 0 5 5 a
  5412. 1 1 6 b
  5413. 2 2 7 c
  5414. 3 3 8 d
  5415. 4 4 9 e
  5416. **List-like `to_replace`**
  5417. >>> df.replace([0, 1, 2, 3], 4)
  5418. A B C
  5419. 0 4 5 a
  5420. 1 4 6 b
  5421. 2 4 7 c
  5422. 3 4 8 d
  5423. 4 4 9 e
  5424. >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
  5425. A B C
  5426. 0 4 5 a
  5427. 1 3 6 b
  5428. 2 2 7 c
  5429. 3 1 8 d
  5430. 4 4 9 e
  5431. >>> s.replace([1, 2], method='bfill')
  5432. 0 0
  5433. 1 3
  5434. 2 3
  5435. 3 3
  5436. 4 4
  5437. dtype: int64
  5438. **dict-like `to_replace`**
  5439. >>> df.replace({0: 10, 1: 100})
  5440. A B C
  5441. 0 10 5 a
  5442. 1 100 6 b
  5443. 2 2 7 c
  5444. 3 3 8 d
  5445. 4 4 9 e
  5446. >>> df.replace({'A': 0, 'B': 5}, 100)
  5447. A B C
  5448. 0 100 100 a
  5449. 1 1 6 b
  5450. 2 2 7 c
  5451. 3 3 8 d
  5452. 4 4 9 e
  5453. >>> df.replace({'A': {0: 100, 4: 400}})
  5454. A B C
  5455. 0 100 5 a
  5456. 1 1 6 b
  5457. 2 2 7 c
  5458. 3 3 8 d
  5459. 4 400 9 e
  5460. **Regular expression `to_replace`**
  5461. >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
  5462. ... 'B': ['abc', 'bar', 'xyz']})
  5463. >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
  5464. A B
  5465. 0 new abc
  5466. 1 foo new
  5467. 2 bait xyz
  5468. >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
  5469. A B
  5470. 0 new abc
  5471. 1 foo bar
  5472. 2 bait xyz
  5473. >>> df.replace(regex=r'^ba.$', value='new')
  5474. A B
  5475. 0 new abc
  5476. 1 foo new
  5477. 2 bait xyz
  5478. >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
  5479. A B
  5480. 0 new abc
  5481. 1 xyz new
  5482. 2 bait xyz
  5483. >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
  5484. A B
  5485. 0 new abc
  5486. 1 new new
  5487. 2 bait xyz
  5488. Note that when replacing multiple ``bool`` or ``datetime64`` objects,
  5489. the data types in the `to_replace` parameter must match the data
  5490. type of the value being replaced:
  5491. >>> df = pd.DataFrame({'A': [True, False, True],
  5492. ... 'B': [False, True, False]})
  5493. >>> df.replace({'a string': 'new value', True: False}) # raises
  5494. Traceback (most recent call last):
  5495. ...
  5496. TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
  5497. This raises a ``TypeError`` because one of the ``dict`` keys is not of
  5498. the correct type for replacement.
  5499. Compare the behavior of ``s.replace({'a': None})`` and
  5500. ``s.replace('a', None)`` to understand the peculiarities
  5501. of the `to_replace` parameter:
  5502. >>> s = pd.Series([10, 'a', 'a', 'b', 'a'])
  5503. When one uses a dict as the `to_replace` value, it is like the
  5504. value(s) in the dict are equal to the `value` parameter.
  5505. ``s.replace({'a': None})`` is equivalent to
  5506. ``s.replace(to_replace={'a': None}, value=None, method=None)``:
  5507. >>> s.replace({'a': None})
  5508. 0 10
  5509. 1 None
  5510. 2 None
  5511. 3 b
  5512. 4 None
  5513. dtype: object
  5514. When ``value=None`` and `to_replace` is a scalar, list or
  5515. tuple, `replace` uses the method parameter (default 'pad') to do the
  5516. replacement. So this is why the 'a' values are being replaced by 10
  5517. in rows 1 and 2 and 'b' in row 4 in this case.
  5518. The command ``s.replace('a', None)`` is actually equivalent to
  5519. ``s.replace(to_replace='a', value=None, method='pad')``:
  5520. >>> s.replace('a', None)
  5521. 0 10
  5522. 1 10
  5523. 2 10
  5524. 3 b
  5525. 4 b
  5526. dtype: object
  5527. """
  5528. @Appender(_shared_docs["replace"] % _shared_doc_kwargs)
  5529. def replace(
  5530. self,
  5531. to_replace=None,
  5532. value=None,
  5533. inplace=False,
  5534. limit=None,
  5535. regex=False,
  5536. method="pad",
  5537. ):
  5538. inplace = validate_bool_kwarg(inplace, "inplace")
  5539. if not is_bool(regex) and to_replace is not None:
  5540. raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool")
  5541. self._consolidate_inplace()
  5542. if value is None:
  5543. # passing a single value that is scalar like
  5544. # when value is None (GH5319), for compat
  5545. if not is_dict_like(to_replace) and not is_dict_like(regex):
  5546. to_replace = [to_replace]
  5547. if isinstance(to_replace, (tuple, list)):
  5548. if isinstance(self, ABCDataFrame):
  5549. return self.apply(
  5550. _single_replace, args=(to_replace, method, inplace, limit)
  5551. )
  5552. return _single_replace(self, to_replace, method, inplace, limit)
  5553. if not is_dict_like(to_replace):
  5554. if not is_dict_like(regex):
  5555. raise TypeError(
  5556. 'If "to_replace" and "value" are both None '
  5557. 'and "to_replace" is not a list, then '
  5558. "regex must be a mapping"
  5559. )
  5560. to_replace = regex
  5561. regex = True
  5562. items = list(to_replace.items())
  5563. keys, values = zip(*items) if items else ([], [])
  5564. are_mappings = [is_dict_like(v) for v in values]
  5565. if any(are_mappings):
  5566. if not all(are_mappings):
  5567. raise TypeError(
  5568. "If a nested mapping is passed, all values "
  5569. "of the top level mapping must be mappings"
  5570. )
  5571. # passed a nested dict/Series
  5572. to_rep_dict = {}
  5573. value_dict = {}
  5574. for k, v in items:
  5575. keys, values = list(zip(*v.items())) or ([], [])
  5576. to_rep_dict[k] = list(keys)
  5577. value_dict[k] = list(values)
  5578. to_replace, value = to_rep_dict, value_dict
  5579. else:
  5580. to_replace, value = keys, values
  5581. return self.replace(
  5582. to_replace, value, inplace=inplace, limit=limit, regex=regex
  5583. )
  5584. else:
  5585. # need a non-zero len on all axes
  5586. if not self.size:
  5587. return self
  5588. new_data = self._data
  5589. if is_dict_like(to_replace):
  5590. if is_dict_like(value): # {'A' : NA} -> {'A' : 0}
  5591. res = self if inplace else self.copy()
  5592. for c, src in to_replace.items():
  5593. if c in value and c in self:
  5594. # object conversion is handled in
  5595. # series.replace which is called recursively
  5596. res[c] = res[c].replace(
  5597. to_replace=src,
  5598. value=value[c],
  5599. inplace=False,
  5600. regex=regex,
  5601. )
  5602. return None if inplace else res
  5603. # {'A': NA} -> 0
  5604. elif not is_list_like(value):
  5605. keys = [(k, src) for k, src in to_replace.items() if k in self]
  5606. keys_len = len(keys) - 1
  5607. for i, (k, src) in enumerate(keys):
  5608. convert = i == keys_len
  5609. new_data = new_data.replace(
  5610. to_replace=src,
  5611. value=value,
  5612. filter=[k],
  5613. inplace=inplace,
  5614. regex=regex,
  5615. convert=convert,
  5616. )
  5617. else:
  5618. raise TypeError("value argument must be scalar, dict, or Series")
  5619. elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing']
  5620. if is_list_like(value):
  5621. if len(to_replace) != len(value):
  5622. raise ValueError(
  5623. f"Replacement lists must match in length. "
  5624. f"Expecting {len(to_replace)} got {len(value)} "
  5625. )
  5626. new_data = self._data.replace_list(
  5627. src_list=to_replace,
  5628. dest_list=value,
  5629. inplace=inplace,
  5630. regex=regex,
  5631. )
  5632. else: # [NA, ''] -> 0
  5633. new_data = self._data.replace(
  5634. to_replace=to_replace, value=value, inplace=inplace, regex=regex
  5635. )
  5636. elif to_replace is None:
  5637. if not (
  5638. is_re_compilable(regex)
  5639. or is_list_like(regex)
  5640. or is_dict_like(regex)
  5641. ):
  5642. raise TypeError(
  5643. f"'regex' must be a string or a compiled regular expression "
  5644. f"or a list or dict of strings or regular expressions, "
  5645. f"you passed a {repr(type(regex).__name__)}"
  5646. )
  5647. return self.replace(
  5648. regex, value, inplace=inplace, limit=limit, regex=True
  5649. )
  5650. else:
  5651. # dest iterable dict-like
  5652. if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1}
  5653. new_data = self._data
  5654. for k, v in value.items():
  5655. if k in self:
  5656. new_data = new_data.replace(
  5657. to_replace=to_replace,
  5658. value=v,
  5659. filter=[k],
  5660. inplace=inplace,
  5661. regex=regex,
  5662. )
  5663. elif not is_list_like(value): # NA -> 0
  5664. new_data = self._data.replace(
  5665. to_replace=to_replace, value=value, inplace=inplace, regex=regex
  5666. )
  5667. else:
  5668. raise TypeError(
  5669. f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}'
  5670. )
  5671. if inplace:
  5672. self._update_inplace(new_data)
  5673. else:
  5674. return self._constructor(new_data).__finalize__(self)
  5675. _shared_docs[
  5676. "interpolate"
  5677. ] = """
  5678. Please note that only ``method='linear'`` is supported for
  5679. DataFrame/Series with a MultiIndex.
  5680. Parameters
  5681. ----------
  5682. method : str, default 'linear'
  5683. Interpolation technique to use. One of:
  5684. * 'linear': Ignore the index and treat the values as equally
  5685. spaced. This is the only method supported on MultiIndexes.
  5686. * 'time': Works on daily and higher resolution data to interpolate
  5687. given length of interval.
  5688. * 'index', 'values': use the actual numerical values of the index.
  5689. * 'pad': Fill in NaNs using existing values.
  5690. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline',
  5691. 'barycentric', 'polynomial': Passed to
  5692. `scipy.interpolate.interp1d`. These methods use the numerical
  5693. values of the index. Both 'polynomial' and 'spline' require that
  5694. you also specify an `order` (int), e.g.
  5695. ``df.interpolate(method='polynomial', order=5)``.
  5696. * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima':
  5697. Wrappers around the SciPy interpolation methods of similar
  5698. names. See `Notes`.
  5699. * 'from_derivatives': Refers to
  5700. `scipy.interpolate.BPoly.from_derivatives` which
  5701. replaces 'piecewise_polynomial' interpolation method in
  5702. scipy 0.18.
  5703. axis : {0 or 'index', 1 or 'columns', None}, default None
  5704. Axis to interpolate along.
  5705. limit : int, optional
  5706. Maximum number of consecutive NaNs to fill. Must be greater than
  5707. 0.
  5708. inplace : bool, default False
  5709. Update the data in place if possible.
  5710. limit_direction : {'forward', 'backward', 'both'}, default 'forward'
  5711. If limit is specified, consecutive NaNs will be filled in this
  5712. direction.
  5713. limit_area : {`None`, 'inside', 'outside'}, default None
  5714. If limit is specified, consecutive NaNs will be filled with this
  5715. restriction.
  5716. * ``None``: No fill restriction.
  5717. * 'inside': Only fill NaNs surrounded by valid values
  5718. (interpolate).
  5719. * 'outside': Only fill NaNs outside valid values (extrapolate).
  5720. .. versionadded:: 0.23.0
  5721. downcast : optional, 'infer' or None, defaults to None
  5722. Downcast dtypes if possible.
  5723. **kwargs
  5724. Keyword arguments to pass on to the interpolating function.
  5725. Returns
  5726. -------
  5727. Series or DataFrame
  5728. Returns the same object type as the caller, interpolated at
  5729. some or all ``NaN`` values.
  5730. See Also
  5731. --------
  5732. fillna : Fill missing values using different methods.
  5733. scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials
  5734. (Akima interpolator).
  5735. scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the
  5736. Bernstein basis.
  5737. scipy.interpolate.interp1d : Interpolate a 1-D function.
  5738. scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh
  5739. interpolator).
  5740. scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic
  5741. interpolation.
  5742. scipy.interpolate.CubicSpline : Cubic spline data interpolator.
  5743. Notes
  5744. -----
  5745. The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima'
  5746. methods are wrappers around the respective SciPy implementations of
  5747. similar names. These use the actual numerical values of the index.
  5748. For more information on their behavior, see the
  5749. `SciPy documentation
  5750. <http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__
  5751. and `SciPy tutorial
  5752. <http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html>`__.
  5753. Examples
  5754. --------
  5755. Filling in ``NaN`` in a :class:`~pandas.Series` via linear
  5756. interpolation.
  5757. >>> s = pd.Series([0, 1, np.nan, 3])
  5758. >>> s
  5759. 0 0.0
  5760. 1 1.0
  5761. 2 NaN
  5762. 3 3.0
  5763. dtype: float64
  5764. >>> s.interpolate()
  5765. 0 0.0
  5766. 1 1.0
  5767. 2 2.0
  5768. 3 3.0
  5769. dtype: float64
  5770. Filling in ``NaN`` in a Series by padding, but filling at most two
  5771. consecutive ``NaN`` at a time.
  5772. >>> s = pd.Series([np.nan, "single_one", np.nan,
  5773. ... "fill_two_more", np.nan, np.nan, np.nan,
  5774. ... 4.71, np.nan])
  5775. >>> s
  5776. 0 NaN
  5777. 1 single_one
  5778. 2 NaN
  5779. 3 fill_two_more
  5780. 4 NaN
  5781. 5 NaN
  5782. 6 NaN
  5783. 7 4.71
  5784. 8 NaN
  5785. dtype: object
  5786. >>> s.interpolate(method='pad', limit=2)
  5787. 0 NaN
  5788. 1 single_one
  5789. 2 single_one
  5790. 3 fill_two_more
  5791. 4 fill_two_more
  5792. 5 fill_two_more
  5793. 6 NaN
  5794. 7 4.71
  5795. 8 4.71
  5796. dtype: object
  5797. Filling in ``NaN`` in a Series via polynomial interpolation or splines:
  5798. Both 'polynomial' and 'spline' methods require that you also specify
  5799. an ``order`` (int).
  5800. >>> s = pd.Series([0, 2, np.nan, 8])
  5801. >>> s.interpolate(method='polynomial', order=2)
  5802. 0 0.000000
  5803. 1 2.000000
  5804. 2 4.666667
  5805. 3 8.000000
  5806. dtype: float64
  5807. Fill the DataFrame forward (that is, going down) along each column
  5808. using linear interpolation.
  5809. Note how the last entry in column 'a' is interpolated differently,
  5810. because there is no entry after it to use for interpolation.
  5811. Note how the first entry in column 'b' remains ``NaN``, because there
  5812. is no entry before it to use for interpolation.
  5813. >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0),
  5814. ... (np.nan, 2.0, np.nan, np.nan),
  5815. ... (2.0, 3.0, np.nan, 9.0),
  5816. ... (np.nan, 4.0, -4.0, 16.0)],
  5817. ... columns=list('abcd'))
  5818. >>> df
  5819. a b c d
  5820. 0 0.0 NaN -1.0 1.0
  5821. 1 NaN 2.0 NaN NaN
  5822. 2 2.0 3.0 NaN 9.0
  5823. 3 NaN 4.0 -4.0 16.0
  5824. >>> df.interpolate(method='linear', limit_direction='forward', axis=0)
  5825. a b c d
  5826. 0 0.0 NaN -1.0 1.0
  5827. 1 1.0 2.0 -2.0 5.0
  5828. 2 2.0 3.0 -3.0 9.0
  5829. 3 2.0 4.0 -4.0 16.0
  5830. Using polynomial interpolation.
  5831. >>> df['d'].interpolate(method='polynomial', order=2)
  5832. 0 1.0
  5833. 1 4.0
  5834. 2 9.0
  5835. 3 16.0
  5836. Name: d, dtype: float64
  5837. """
  5838. @Appender(_shared_docs["interpolate"] % _shared_doc_kwargs)
  5839. def interpolate(
  5840. self,
  5841. method="linear",
  5842. axis=0,
  5843. limit=None,
  5844. inplace=False,
  5845. limit_direction="forward",
  5846. limit_area=None,
  5847. downcast=None,
  5848. **kwargs,
  5849. ):
  5850. """
  5851. Interpolate values according to different methods.
  5852. """
  5853. inplace = validate_bool_kwarg(inplace, "inplace")
  5854. axis = self._get_axis_number(axis)
  5855. if axis == 0:
  5856. ax = self._info_axis_name
  5857. _maybe_transposed_self = self
  5858. elif axis == 1:
  5859. _maybe_transposed_self = self.T
  5860. ax = 1
  5861. ax = _maybe_transposed_self._get_axis_number(ax)
  5862. if _maybe_transposed_self.ndim == 2:
  5863. alt_ax = 1 - ax
  5864. else:
  5865. alt_ax = ax
  5866. if isinstance(_maybe_transposed_self.index, MultiIndex) and method != "linear":
  5867. raise ValueError(
  5868. "Only `method=linear` interpolation is supported on MultiIndexes."
  5869. )
  5870. if _maybe_transposed_self._data.get_dtype_counts().get("object") == len(
  5871. _maybe_transposed_self.T
  5872. ):
  5873. raise TypeError(
  5874. "Cannot interpolate with all object-dtype columns "
  5875. "in the DataFrame. Try setting at least one "
  5876. "column to a numeric dtype."
  5877. )
  5878. # create/use the index
  5879. if method == "linear":
  5880. # prior default
  5881. index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax)))
  5882. else:
  5883. index = _maybe_transposed_self._get_axis(alt_ax)
  5884. methods = {"index", "values", "nearest", "time"}
  5885. is_numeric_or_datetime = (
  5886. is_numeric_dtype(index)
  5887. or is_datetime64_any_dtype(index)
  5888. or is_timedelta64_dtype(index)
  5889. )
  5890. if method not in methods and not is_numeric_or_datetime:
  5891. raise ValueError(
  5892. "Index column must be numeric or datetime type when "
  5893. f"using {method} method other than linear. "
  5894. "Try setting a numeric or datetime index column before "
  5895. "interpolating."
  5896. )
  5897. if isna(index).any():
  5898. raise NotImplementedError(
  5899. "Interpolation with NaNs in the index "
  5900. "has not been implemented. Try filling "
  5901. "those NaNs before interpolating."
  5902. )
  5903. data = _maybe_transposed_self._data
  5904. new_data = data.interpolate(
  5905. method=method,
  5906. axis=ax,
  5907. index=index,
  5908. values=_maybe_transposed_self,
  5909. limit=limit,
  5910. limit_direction=limit_direction,
  5911. limit_area=limit_area,
  5912. inplace=inplace,
  5913. downcast=downcast,
  5914. **kwargs,
  5915. )
  5916. if inplace:
  5917. if axis == 1:
  5918. new_data = self._constructor(new_data).T._data
  5919. self._update_inplace(new_data)
  5920. else:
  5921. res = self._constructor(new_data).__finalize__(self)
  5922. if axis == 1:
  5923. res = res.T
  5924. return res
  5925. # ----------------------------------------------------------------------
  5926. # Timeseries methods Methods
  5927. def asof(self, where, subset=None):
  5928. """
  5929. Return the last row(s) without any NaNs before `where`.
  5930. The last row (for each element in `where`, if list) without any
  5931. NaN is taken.
  5932. In case of a :class:`~pandas.DataFrame`, the last row without NaN
  5933. considering only the subset of columns (if not `None`)
  5934. If there is no good value, NaN is returned for a Series or
  5935. a Series of NaN values for a DataFrame
  5936. Parameters
  5937. ----------
  5938. where : date or array-like of dates
  5939. Date(s) before which the last row(s) are returned.
  5940. subset : str or array-like of str, default `None`
  5941. For DataFrame, if not `None`, only use these columns to
  5942. check for NaNs.
  5943. Returns
  5944. -------
  5945. scalar, Series, or DataFrame
  5946. The return can be:
  5947. * scalar : when `self` is a Series and `where` is a scalar
  5948. * Series: when `self` is a Series and `where` is an array-like,
  5949. or when `self` is a DataFrame and `where` is a scalar
  5950. * DataFrame : when `self` is a DataFrame and `where` is an
  5951. array-like
  5952. Return scalar, Series, or DataFrame.
  5953. See Also
  5954. --------
  5955. merge_asof : Perform an asof merge. Similar to left join.
  5956. Notes
  5957. -----
  5958. Dates are assumed to be sorted. Raises if this is not the case.
  5959. Examples
  5960. --------
  5961. A Series and a scalar `where`.
  5962. >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40])
  5963. >>> s
  5964. 10 1.0
  5965. 20 2.0
  5966. 30 NaN
  5967. 40 4.0
  5968. dtype: float64
  5969. >>> s.asof(20)
  5970. 2.0
  5971. For a sequence `where`, a Series is returned. The first value is
  5972. NaN, because the first element of `where` is before the first
  5973. index value.
  5974. >>> s.asof([5, 20])
  5975. 5 NaN
  5976. 20 2.0
  5977. dtype: float64
  5978. Missing values are not considered. The following is ``2.0``, not
  5979. NaN, even though NaN is at the index location for ``30``.
  5980. >>> s.asof(30)
  5981. 2.0
  5982. Take all columns into consideration
  5983. >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50],
  5984. ... 'b': [None, None, None, None, 500]},
  5985. ... index=pd.DatetimeIndex(['2018-02-27 09:01:00',
  5986. ... '2018-02-27 09:02:00',
  5987. ... '2018-02-27 09:03:00',
  5988. ... '2018-02-27 09:04:00',
  5989. ... '2018-02-27 09:05:00']))
  5990. >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',
  5991. ... '2018-02-27 09:04:30']))
  5992. a b
  5993. 2018-02-27 09:03:30 NaN NaN
  5994. 2018-02-27 09:04:30 NaN NaN
  5995. Take a single column into consideration
  5996. >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',
  5997. ... '2018-02-27 09:04:30']),
  5998. ... subset=['a'])
  5999. a b
  6000. 2018-02-27 09:03:30 30.0 NaN
  6001. 2018-02-27 09:04:30 40.0 NaN
  6002. """
  6003. if isinstance(where, str):
  6004. where = Timestamp(where)
  6005. if not self.index.is_monotonic:
  6006. raise ValueError("asof requires a sorted index")
  6007. is_series = isinstance(self, ABCSeries)
  6008. if is_series:
  6009. if subset is not None:
  6010. raise ValueError("subset is not valid for Series")
  6011. else:
  6012. if subset is None:
  6013. subset = self.columns
  6014. if not is_list_like(subset):
  6015. subset = [subset]
  6016. is_list = is_list_like(where)
  6017. if not is_list:
  6018. start = self.index[0]
  6019. if isinstance(self.index, PeriodIndex):
  6020. where = Period(where, freq=self.index.freq)
  6021. if where < start:
  6022. if not is_series:
  6023. from pandas import Series
  6024. return Series(index=self.columns, name=where, dtype=np.float64)
  6025. return np.nan
  6026. # It's always much faster to use a *while* loop here for
  6027. # Series than pre-computing all the NAs. However a
  6028. # *while* loop is extremely expensive for DataFrame
  6029. # so we later pre-compute all the NAs and use the same
  6030. # code path whether *where* is a scalar or list.
  6031. # See PR: https://github.com/pandas-dev/pandas/pull/14476
  6032. if is_series:
  6033. loc = self.index.searchsorted(where, side="right")
  6034. if loc > 0:
  6035. loc -= 1
  6036. values = self._values
  6037. while loc > 0 and isna(values[loc]):
  6038. loc -= 1
  6039. return values[loc]
  6040. if not isinstance(where, Index):
  6041. where = Index(where) if is_list else Index([where])
  6042. nulls = self.isna() if is_series else self[subset].isna().any(1)
  6043. if nulls.all():
  6044. if is_series:
  6045. return self._constructor(np.nan, index=where, name=self.name)
  6046. elif is_list:
  6047. from pandas import DataFrame
  6048. return DataFrame(np.nan, index=where, columns=self.columns)
  6049. else:
  6050. from pandas import Series
  6051. return Series(np.nan, index=self.columns, name=where[0])
  6052. locs = self.index.asof_locs(where, ~(nulls.values))
  6053. # mask the missing
  6054. missing = locs == -1
  6055. data = self.take(locs)
  6056. data.index = where
  6057. data.loc[missing] = np.nan
  6058. return data if is_list else data.iloc[-1]
  6059. # ----------------------------------------------------------------------
  6060. # Action Methods
  6061. _shared_docs[
  6062. "isna"
  6063. ] = """
  6064. Detect missing values.
  6065. Return a boolean same-sized object indicating if the values are NA.
  6066. NA values, such as None or :attr:`numpy.NaN`, gets mapped to True
  6067. values.
  6068. Everything else gets mapped to False values. Characters such as empty
  6069. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  6070. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  6071. Returns
  6072. -------
  6073. %(klass)s
  6074. Mask of bool values for each element in %(klass)s that
  6075. indicates whether an element is not an NA value.
  6076. See Also
  6077. --------
  6078. %(klass)s.isnull : Alias of isna.
  6079. %(klass)s.notna : Boolean inverse of isna.
  6080. %(klass)s.dropna : Omit axes labels with missing values.
  6081. isna : Top-level isna.
  6082. Examples
  6083. --------
  6084. Show which entries in a DataFrame are NA.
  6085. >>> df = pd.DataFrame({'age': [5, 6, np.NaN],
  6086. ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'),
  6087. ... pd.Timestamp('1940-04-25')],
  6088. ... 'name': ['Alfred', 'Batman', ''],
  6089. ... 'toy': [None, 'Batmobile', 'Joker']})
  6090. >>> df
  6091. age born name toy
  6092. 0 5.0 NaT Alfred None
  6093. 1 6.0 1939-05-27 Batman Batmobile
  6094. 2 NaN 1940-04-25 Joker
  6095. >>> df.isna()
  6096. age born name toy
  6097. 0 False True False True
  6098. 1 False False False False
  6099. 2 True False False False
  6100. Show which entries in a Series are NA.
  6101. >>> ser = pd.Series([5, 6, np.NaN])
  6102. >>> ser
  6103. 0 5.0
  6104. 1 6.0
  6105. 2 NaN
  6106. dtype: float64
  6107. >>> ser.isna()
  6108. 0 False
  6109. 1 False
  6110. 2 True
  6111. dtype: bool
  6112. """
  6113. @Appender(_shared_docs["isna"] % _shared_doc_kwargs)
  6114. def isna(self: FrameOrSeries) -> FrameOrSeries:
  6115. return isna(self).__finalize__(self)
  6116. @Appender(_shared_docs["isna"] % _shared_doc_kwargs)
  6117. def isnull(self: FrameOrSeries) -> FrameOrSeries:
  6118. return isna(self).__finalize__(self)
  6119. _shared_docs[
  6120. "notna"
  6121. ] = """
  6122. Detect existing (non-missing) values.
  6123. Return a boolean same-sized object indicating if the values are not NA.
  6124. Non-missing values get mapped to True. Characters such as empty
  6125. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  6126. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  6127. NA values, such as None or :attr:`numpy.NaN`, get mapped to False
  6128. values.
  6129. Returns
  6130. -------
  6131. %(klass)s
  6132. Mask of bool values for each element in %(klass)s that
  6133. indicates whether an element is not an NA value.
  6134. See Also
  6135. --------
  6136. %(klass)s.notnull : Alias of notna.
  6137. %(klass)s.isna : Boolean inverse of notna.
  6138. %(klass)s.dropna : Omit axes labels with missing values.
  6139. notna : Top-level notna.
  6140. Examples
  6141. --------
  6142. Show which entries in a DataFrame are not NA.
  6143. >>> df = pd.DataFrame({'age': [5, 6, np.NaN],
  6144. ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'),
  6145. ... pd.Timestamp('1940-04-25')],
  6146. ... 'name': ['Alfred', 'Batman', ''],
  6147. ... 'toy': [None, 'Batmobile', 'Joker']})
  6148. >>> df
  6149. age born name toy
  6150. 0 5.0 NaT Alfred None
  6151. 1 6.0 1939-05-27 Batman Batmobile
  6152. 2 NaN 1940-04-25 Joker
  6153. >>> df.notna()
  6154. age born name toy
  6155. 0 True False True False
  6156. 1 True True True True
  6157. 2 False True True True
  6158. Show which entries in a Series are not NA.
  6159. >>> ser = pd.Series([5, 6, np.NaN])
  6160. >>> ser
  6161. 0 5.0
  6162. 1 6.0
  6163. 2 NaN
  6164. dtype: float64
  6165. >>> ser.notna()
  6166. 0 True
  6167. 1 True
  6168. 2 False
  6169. dtype: bool
  6170. """
  6171. @Appender(_shared_docs["notna"] % _shared_doc_kwargs)
  6172. def notna(self: FrameOrSeries) -> FrameOrSeries:
  6173. return notna(self).__finalize__(self)
  6174. @Appender(_shared_docs["notna"] % _shared_doc_kwargs)
  6175. def notnull(self: FrameOrSeries) -> FrameOrSeries:
  6176. return notna(self).__finalize__(self)
  6177. def _clip_with_scalar(self, lower, upper, inplace: bool_t = False):
  6178. if (lower is not None and np.any(isna(lower))) or (
  6179. upper is not None and np.any(isna(upper))
  6180. ):
  6181. raise ValueError("Cannot use an NA value as a clip threshold")
  6182. result = self
  6183. mask = isna(self.values)
  6184. with np.errstate(all="ignore"):
  6185. if upper is not None:
  6186. subset = self.to_numpy() <= upper
  6187. result = result.where(subset, upper, axis=None, inplace=False)
  6188. if lower is not None:
  6189. subset = self.to_numpy() >= lower
  6190. result = result.where(subset, lower, axis=None, inplace=False)
  6191. if np.any(mask):
  6192. result[mask] = np.nan
  6193. if inplace:
  6194. self._update_inplace(result)
  6195. else:
  6196. return result
  6197. def _clip_with_one_bound(self, threshold, method, axis, inplace):
  6198. if axis is not None:
  6199. axis = self._get_axis_number(axis)
  6200. # method is self.le for upper bound and self.ge for lower bound
  6201. if is_scalar(threshold) and is_number(threshold):
  6202. if method.__name__ == "le":
  6203. return self._clip_with_scalar(None, threshold, inplace=inplace)
  6204. return self._clip_with_scalar(threshold, None, inplace=inplace)
  6205. subset = method(threshold, axis=axis) | isna(self)
  6206. # GH #15390
  6207. # In order for where method to work, the threshold must
  6208. # be transformed to NDFrame from other array like structure.
  6209. if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):
  6210. if isinstance(self, ABCSeries):
  6211. threshold = self._constructor(threshold, index=self.index)
  6212. else:
  6213. threshold = _align_method_FRAME(self, threshold, axis)
  6214. return self.where(subset, threshold, axis=axis, inplace=inplace)
  6215. def clip(
  6216. self: FrameOrSeries,
  6217. lower=None,
  6218. upper=None,
  6219. axis=None,
  6220. inplace: bool_t = False,
  6221. *args,
  6222. **kwargs,
  6223. ) -> FrameOrSeries:
  6224. """
  6225. Trim values at input threshold(s).
  6226. Assigns values outside boundary to boundary values. Thresholds
  6227. can be singular values or array like, and in the latter case
  6228. the clipping is performed element-wise in the specified axis.
  6229. Parameters
  6230. ----------
  6231. lower : float or array_like, default None
  6232. Minimum threshold value. All values below this
  6233. threshold will be set to it.
  6234. upper : float or array_like, default None
  6235. Maximum threshold value. All values above this
  6236. threshold will be set to it.
  6237. axis : int or str axis name, optional
  6238. Align object with lower and upper along the given axis.
  6239. inplace : bool, default False
  6240. Whether to perform the operation in place on the data.
  6241. .. versionadded:: 0.21.0
  6242. *args, **kwargs
  6243. Additional keywords have no effect but might be accepted
  6244. for compatibility with numpy.
  6245. Returns
  6246. -------
  6247. Series or DataFrame
  6248. Same type as calling object with the values outside the
  6249. clip boundaries replaced.
  6250. Examples
  6251. --------
  6252. >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}
  6253. >>> df = pd.DataFrame(data)
  6254. >>> df
  6255. col_0 col_1
  6256. 0 9 -2
  6257. 1 -3 -7
  6258. 2 0 6
  6259. 3 -1 8
  6260. 4 5 -5
  6261. Clips per column using lower and upper thresholds:
  6262. >>> df.clip(-4, 6)
  6263. col_0 col_1
  6264. 0 6 -2
  6265. 1 -3 -4
  6266. 2 0 6
  6267. 3 -1 6
  6268. 4 5 -4
  6269. Clips using specific lower and upper thresholds per column element:
  6270. >>> t = pd.Series([2, -4, -1, 6, 3])
  6271. >>> t
  6272. 0 2
  6273. 1 -4
  6274. 2 -1
  6275. 3 6
  6276. 4 3
  6277. dtype: int64
  6278. >>> df.clip(t, t + 4, axis=0)
  6279. col_0 col_1
  6280. 0 6 2
  6281. 1 -3 -4
  6282. 2 0 3
  6283. 3 6 8
  6284. 4 5 3
  6285. """
  6286. inplace = validate_bool_kwarg(inplace, "inplace")
  6287. axis = nv.validate_clip_with_axis(axis, args, kwargs)
  6288. if axis is not None:
  6289. axis = self._get_axis_number(axis)
  6290. # GH 17276
  6291. # numpy doesn't like NaN as a clip value
  6292. # so ignore
  6293. # GH 19992
  6294. # numpy doesn't drop a list-like bound containing NaN
  6295. if not is_list_like(lower) and np.any(isna(lower)):
  6296. lower = None
  6297. if not is_list_like(upper) and np.any(isna(upper)):
  6298. upper = None
  6299. # GH 2747 (arguments were reversed)
  6300. if lower is not None and upper is not None:
  6301. if is_scalar(lower) and is_scalar(upper):
  6302. lower, upper = min(lower, upper), max(lower, upper)
  6303. # fast-path for scalars
  6304. if (lower is None or (is_scalar(lower) and is_number(lower))) and (
  6305. upper is None or (is_scalar(upper) and is_number(upper))
  6306. ):
  6307. return self._clip_with_scalar(lower, upper, inplace=inplace)
  6308. result = self
  6309. if lower is not None:
  6310. result = result._clip_with_one_bound(
  6311. lower, method=self.ge, axis=axis, inplace=inplace
  6312. )
  6313. if upper is not None:
  6314. if inplace:
  6315. result = self
  6316. result = result._clip_with_one_bound(
  6317. upper, method=self.le, axis=axis, inplace=inplace
  6318. )
  6319. return result
  6320. _shared_docs[
  6321. "groupby"
  6322. ] = """
  6323. Group %(klass)s using a mapper or by a Series of columns.
  6324. A groupby operation involves some combination of splitting the
  6325. object, applying a function, and combining the results. This can be
  6326. used to group large amounts of data and compute operations on these
  6327. groups.
  6328. Parameters
  6329. ----------
  6330. by : mapping, function, label, or list of labels
  6331. Used to determine the groups for the groupby.
  6332. If ``by`` is a function, it's called on each value of the object's
  6333. index. If a dict or Series is passed, the Series or dict VALUES
  6334. will be used to determine the groups (the Series' values are first
  6335. aligned; see ``.align()`` method). If an ndarray is passed, the
  6336. values are used as-is determine the groups. A label or list of
  6337. labels may be passed to group by the columns in ``self``. Notice
  6338. that a tuple is interpreted as a (single) key.
  6339. axis : {0 or 'index', 1 or 'columns'}, default 0
  6340. Split along rows (0) or columns (1).
  6341. level : int, level name, or sequence of such, default None
  6342. If the axis is a MultiIndex (hierarchical), group by a particular
  6343. level or levels.
  6344. as_index : bool, default True
  6345. For aggregated output, return object with group labels as the
  6346. index. Only relevant for DataFrame input. as_index=False is
  6347. effectively "SQL-style" grouped output.
  6348. sort : bool, default True
  6349. Sort group keys. Get better performance by turning this off.
  6350. Note this does not influence the order of observations within each
  6351. group. Groupby preserves the order of rows within each group.
  6352. group_keys : bool, default True
  6353. When calling apply, add group keys to index to identify pieces.
  6354. squeeze : bool, default False
  6355. Reduce the dimensionality of the return type if possible,
  6356. otherwise return a consistent type.
  6357. observed : bool, default False
  6358. This only applies if any of the groupers are Categoricals.
  6359. If True: only show observed values for categorical groupers.
  6360. If False: show all values for categorical groupers.
  6361. .. versionadded:: 0.23.0
  6362. Returns
  6363. -------
  6364. %(klass)sGroupBy
  6365. Returns a groupby object that contains information about the groups.
  6366. See Also
  6367. --------
  6368. resample : Convenience method for frequency conversion and resampling
  6369. of time series.
  6370. Notes
  6371. -----
  6372. See the `user guide
  6373. <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
  6374. """
  6375. def asfreq(
  6376. self: FrameOrSeries,
  6377. freq,
  6378. method=None,
  6379. how: Optional[str] = None,
  6380. normalize: bool_t = False,
  6381. fill_value=None,
  6382. ) -> FrameOrSeries:
  6383. """
  6384. Convert TimeSeries to specified frequency.
  6385. Optionally provide filling method to pad/backfill missing values.
  6386. Returns the original data conformed to a new index with the specified
  6387. frequency. ``resample`` is more appropriate if an operation, such as
  6388. summarization, is necessary to represent the data at the new frequency.
  6389. Parameters
  6390. ----------
  6391. freq : DateOffset or str
  6392. method : {'backfill'/'bfill', 'pad'/'ffill'}, default None
  6393. Method to use for filling holes in reindexed Series (note this
  6394. does not fill NaNs that already were present):
  6395. * 'pad' / 'ffill': propagate last valid observation forward to next
  6396. valid
  6397. * 'backfill' / 'bfill': use NEXT valid observation to fill.
  6398. how : {'start', 'end'}, default end
  6399. For PeriodIndex only (see PeriodIndex.asfreq).
  6400. normalize : bool, default False
  6401. Whether to reset output index to midnight.
  6402. fill_value : scalar, optional
  6403. Value to use for missing values, applied during upsampling (note
  6404. this does not fill NaNs that already were present).
  6405. Returns
  6406. -------
  6407. converted : same type as caller
  6408. See Also
  6409. --------
  6410. reindex
  6411. Notes
  6412. -----
  6413. To learn more about the frequency strings, please see `this link
  6414. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
  6415. Examples
  6416. --------
  6417. Start by creating a series with 4 one minute timestamps.
  6418. >>> index = pd.date_range('1/1/2000', periods=4, freq='T')
  6419. >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)
  6420. >>> df = pd.DataFrame({'s':series})
  6421. >>> df
  6422. s
  6423. 2000-01-01 00:00:00 0.0
  6424. 2000-01-01 00:01:00 NaN
  6425. 2000-01-01 00:02:00 2.0
  6426. 2000-01-01 00:03:00 3.0
  6427. Upsample the series into 30 second bins.
  6428. >>> df.asfreq(freq='30S')
  6429. s
  6430. 2000-01-01 00:00:00 0.0
  6431. 2000-01-01 00:00:30 NaN
  6432. 2000-01-01 00:01:00 NaN
  6433. 2000-01-01 00:01:30 NaN
  6434. 2000-01-01 00:02:00 2.0
  6435. 2000-01-01 00:02:30 NaN
  6436. 2000-01-01 00:03:00 3.0
  6437. Upsample again, providing a ``fill value``.
  6438. >>> df.asfreq(freq='30S', fill_value=9.0)
  6439. s
  6440. 2000-01-01 00:00:00 0.0
  6441. 2000-01-01 00:00:30 9.0
  6442. 2000-01-01 00:01:00 NaN
  6443. 2000-01-01 00:01:30 9.0
  6444. 2000-01-01 00:02:00 2.0
  6445. 2000-01-01 00:02:30 9.0
  6446. 2000-01-01 00:03:00 3.0
  6447. Upsample again, providing a ``method``.
  6448. >>> df.asfreq(freq='30S', method='bfill')
  6449. s
  6450. 2000-01-01 00:00:00 0.0
  6451. 2000-01-01 00:00:30 NaN
  6452. 2000-01-01 00:01:00 NaN
  6453. 2000-01-01 00:01:30 2.0
  6454. 2000-01-01 00:02:00 2.0
  6455. 2000-01-01 00:02:30 3.0
  6456. 2000-01-01 00:03:00 3.0
  6457. """
  6458. from pandas.core.resample import asfreq
  6459. return asfreq(
  6460. self,
  6461. freq,
  6462. method=method,
  6463. how=how,
  6464. normalize=normalize,
  6465. fill_value=fill_value,
  6466. )
  6467. def at_time(
  6468. self: FrameOrSeries, time, asof: bool_t = False, axis=None
  6469. ) -> FrameOrSeries:
  6470. """
  6471. Select values at particular time of day (e.g. 9:30AM).
  6472. Parameters
  6473. ----------
  6474. time : datetime.time or str
  6475. axis : {0 or 'index', 1 or 'columns'}, default 0
  6476. .. versionadded:: 0.24.0
  6477. Returns
  6478. -------
  6479. Series or DataFrame
  6480. Raises
  6481. ------
  6482. TypeError
  6483. If the index is not a :class:`DatetimeIndex`
  6484. See Also
  6485. --------
  6486. between_time : Select values between particular times of the day.
  6487. first : Select initial periods of time series based on a date offset.
  6488. last : Select final periods of time series based on a date offset.
  6489. DatetimeIndex.indexer_at_time : Get just the index locations for
  6490. values at particular time of the day.
  6491. Examples
  6492. --------
  6493. >>> i = pd.date_range('2018-04-09', periods=4, freq='12H')
  6494. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  6495. >>> ts
  6496. A
  6497. 2018-04-09 00:00:00 1
  6498. 2018-04-09 12:00:00 2
  6499. 2018-04-10 00:00:00 3
  6500. 2018-04-10 12:00:00 4
  6501. >>> ts.at_time('12:00')
  6502. A
  6503. 2018-04-09 12:00:00 2
  6504. 2018-04-10 12:00:00 4
  6505. """
  6506. if axis is None:
  6507. axis = self._stat_axis_number
  6508. axis = self._get_axis_number(axis)
  6509. index = self._get_axis(axis)
  6510. try:
  6511. indexer = index.indexer_at_time(time, asof=asof)
  6512. except AttributeError:
  6513. raise TypeError("Index must be DatetimeIndex")
  6514. return self._take_with_is_copy(indexer, axis=axis)
  6515. def between_time(
  6516. self: FrameOrSeries,
  6517. start_time,
  6518. end_time,
  6519. include_start: bool_t = True,
  6520. include_end: bool_t = True,
  6521. axis=None,
  6522. ) -> FrameOrSeries:
  6523. """
  6524. Select values between particular times of the day (e.g., 9:00-9:30 AM).
  6525. By setting ``start_time`` to be later than ``end_time``,
  6526. you can get the times that are *not* between the two times.
  6527. Parameters
  6528. ----------
  6529. start_time : datetime.time or str
  6530. end_time : datetime.time or str
  6531. include_start : bool, default True
  6532. include_end : bool, default True
  6533. axis : {0 or 'index', 1 or 'columns'}, default 0
  6534. .. versionadded:: 0.24.0
  6535. Returns
  6536. -------
  6537. Series or DataFrame
  6538. Raises
  6539. ------
  6540. TypeError
  6541. If the index is not a :class:`DatetimeIndex`
  6542. See Also
  6543. --------
  6544. at_time : Select values at a particular time of the day.
  6545. first : Select initial periods of time series based on a date offset.
  6546. last : Select final periods of time series based on a date offset.
  6547. DatetimeIndex.indexer_between_time : Get just the index locations for
  6548. values between particular times of the day.
  6549. Examples
  6550. --------
  6551. >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min')
  6552. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  6553. >>> ts
  6554. A
  6555. 2018-04-09 00:00:00 1
  6556. 2018-04-10 00:20:00 2
  6557. 2018-04-11 00:40:00 3
  6558. 2018-04-12 01:00:00 4
  6559. >>> ts.between_time('0:15', '0:45')
  6560. A
  6561. 2018-04-10 00:20:00 2
  6562. 2018-04-11 00:40:00 3
  6563. You get the times that are *not* between two times by setting
  6564. ``start_time`` later than ``end_time``:
  6565. >>> ts.between_time('0:45', '0:15')
  6566. A
  6567. 2018-04-09 00:00:00 1
  6568. 2018-04-12 01:00:00 4
  6569. """
  6570. if axis is None:
  6571. axis = self._stat_axis_number
  6572. axis = self._get_axis_number(axis)
  6573. index = self._get_axis(axis)
  6574. try:
  6575. indexer = index.indexer_between_time(
  6576. start_time,
  6577. end_time,
  6578. include_start=include_start,
  6579. include_end=include_end,
  6580. )
  6581. except AttributeError:
  6582. raise TypeError("Index must be DatetimeIndex")
  6583. return self._take_with_is_copy(indexer, axis=axis)
  6584. def resample(
  6585. self,
  6586. rule,
  6587. axis=0,
  6588. closed: Optional[str] = None,
  6589. label: Optional[str] = None,
  6590. convention: str = "start",
  6591. kind: Optional[str] = None,
  6592. loffset=None,
  6593. base: int = 0,
  6594. on=None,
  6595. level=None,
  6596. ):
  6597. """
  6598. Resample time-series data.
  6599. Convenience method for frequency conversion and resampling of time
  6600. series. Object must have a datetime-like index (`DatetimeIndex`,
  6601. `PeriodIndex`, or `TimedeltaIndex`), or pass datetime-like values
  6602. to the `on` or `level` keyword.
  6603. Parameters
  6604. ----------
  6605. rule : DateOffset, Timedelta or str
  6606. The offset string or object representing target conversion.
  6607. axis : {0 or 'index', 1 or 'columns'}, default 0
  6608. Which axis to use for up- or down-sampling. For `Series` this
  6609. will default to 0, i.e. along the rows. Must be
  6610. `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`.
  6611. closed : {'right', 'left'}, default None
  6612. Which side of bin interval is closed. The default is 'left'
  6613. for all frequency offsets except for 'M', 'A', 'Q', 'BM',
  6614. 'BA', 'BQ', and 'W' which all have a default of 'right'.
  6615. label : {'right', 'left'}, default None
  6616. Which bin edge label to label bucket with. The default is 'left'
  6617. for all frequency offsets except for 'M', 'A', 'Q', 'BM',
  6618. 'BA', 'BQ', and 'W' which all have a default of 'right'.
  6619. convention : {'start', 'end', 's', 'e'}, default 'start'
  6620. For `PeriodIndex` only, controls whether to use the start or
  6621. end of `rule`.
  6622. kind : {'timestamp', 'period'}, optional, default None
  6623. Pass 'timestamp' to convert the resulting index to a
  6624. `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`.
  6625. By default the input representation is retained.
  6626. loffset : timedelta, default None
  6627. Adjust the resampled time labels.
  6628. base : int, default 0
  6629. For frequencies that evenly subdivide 1 day, the "origin" of the
  6630. aggregated intervals. For example, for '5min' frequency, base could
  6631. range from 0 through 4. Defaults to 0.
  6632. on : str, optional
  6633. For a DataFrame, column to use instead of index for resampling.
  6634. Column must be datetime-like.
  6635. level : str or int, optional
  6636. For a MultiIndex, level (name or number) to use for
  6637. resampling. `level` must be datetime-like.
  6638. Returns
  6639. -------
  6640. Resampler object
  6641. See Also
  6642. --------
  6643. groupby : Group by mapping, function, label, or list of labels.
  6644. Series.resample : Resample a Series.
  6645. DataFrame.resample: Resample a DataFrame.
  6646. Notes
  6647. -----
  6648. See the `user guide
  6649. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`_
  6650. for more.
  6651. To learn more about the offset strings, please see `this link
  6652. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects>`__.
  6653. Examples
  6654. --------
  6655. Start by creating a series with 9 one minute timestamps.
  6656. >>> index = pd.date_range('1/1/2000', periods=9, freq='T')
  6657. >>> series = pd.Series(range(9), index=index)
  6658. >>> series
  6659. 2000-01-01 00:00:00 0
  6660. 2000-01-01 00:01:00 1
  6661. 2000-01-01 00:02:00 2
  6662. 2000-01-01 00:03:00 3
  6663. 2000-01-01 00:04:00 4
  6664. 2000-01-01 00:05:00 5
  6665. 2000-01-01 00:06:00 6
  6666. 2000-01-01 00:07:00 7
  6667. 2000-01-01 00:08:00 8
  6668. Freq: T, dtype: int64
  6669. Downsample the series into 3 minute bins and sum the values
  6670. of the timestamps falling into a bin.
  6671. >>> series.resample('3T').sum()
  6672. 2000-01-01 00:00:00 3
  6673. 2000-01-01 00:03:00 12
  6674. 2000-01-01 00:06:00 21
  6675. Freq: 3T, dtype: int64
  6676. Downsample the series into 3 minute bins as above, but label each
  6677. bin using the right edge instead of the left. Please note that the
  6678. value in the bucket used as the label is not included in the bucket,
  6679. which it labels. For example, in the original series the
  6680. bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed
  6681. value in the resampled bucket with the label ``2000-01-01 00:03:00``
  6682. does not include 3 (if it did, the summed value would be 6, not 3).
  6683. To include this value close the right side of the bin interval as
  6684. illustrated in the example below this one.
  6685. >>> series.resample('3T', label='right').sum()
  6686. 2000-01-01 00:03:00 3
  6687. 2000-01-01 00:06:00 12
  6688. 2000-01-01 00:09:00 21
  6689. Freq: 3T, dtype: int64
  6690. Downsample the series into 3 minute bins as above, but close the right
  6691. side of the bin interval.
  6692. >>> series.resample('3T', label='right', closed='right').sum()
  6693. 2000-01-01 00:00:00 0
  6694. 2000-01-01 00:03:00 6
  6695. 2000-01-01 00:06:00 15
  6696. 2000-01-01 00:09:00 15
  6697. Freq: 3T, dtype: int64
  6698. Upsample the series into 30 second bins.
  6699. >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows
  6700. 2000-01-01 00:00:00 0.0
  6701. 2000-01-01 00:00:30 NaN
  6702. 2000-01-01 00:01:00 1.0
  6703. 2000-01-01 00:01:30 NaN
  6704. 2000-01-01 00:02:00 2.0
  6705. Freq: 30S, dtype: float64
  6706. Upsample the series into 30 second bins and fill the ``NaN``
  6707. values using the ``pad`` method.
  6708. >>> series.resample('30S').pad()[0:5]
  6709. 2000-01-01 00:00:00 0
  6710. 2000-01-01 00:00:30 0
  6711. 2000-01-01 00:01:00 1
  6712. 2000-01-01 00:01:30 1
  6713. 2000-01-01 00:02:00 2
  6714. Freq: 30S, dtype: int64
  6715. Upsample the series into 30 second bins and fill the
  6716. ``NaN`` values using the ``bfill`` method.
  6717. >>> series.resample('30S').bfill()[0:5]
  6718. 2000-01-01 00:00:00 0
  6719. 2000-01-01 00:00:30 1
  6720. 2000-01-01 00:01:00 1
  6721. 2000-01-01 00:01:30 2
  6722. 2000-01-01 00:02:00 2
  6723. Freq: 30S, dtype: int64
  6724. Pass a custom function via ``apply``
  6725. >>> def custom_resampler(array_like):
  6726. ... return np.sum(array_like) + 5
  6727. ...
  6728. >>> series.resample('3T').apply(custom_resampler)
  6729. 2000-01-01 00:00:00 8
  6730. 2000-01-01 00:03:00 17
  6731. 2000-01-01 00:06:00 26
  6732. Freq: 3T, dtype: int64
  6733. For a Series with a PeriodIndex, the keyword `convention` can be
  6734. used to control whether to use the start or end of `rule`.
  6735. Resample a year by quarter using 'start' `convention`. Values are
  6736. assigned to the first quarter of the period.
  6737. >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01',
  6738. ... freq='A',
  6739. ... periods=2))
  6740. >>> s
  6741. 2012 1
  6742. 2013 2
  6743. Freq: A-DEC, dtype: int64
  6744. >>> s.resample('Q', convention='start').asfreq()
  6745. 2012Q1 1.0
  6746. 2012Q2 NaN
  6747. 2012Q3 NaN
  6748. 2012Q4 NaN
  6749. 2013Q1 2.0
  6750. 2013Q2 NaN
  6751. 2013Q3 NaN
  6752. 2013Q4 NaN
  6753. Freq: Q-DEC, dtype: float64
  6754. Resample quarters by month using 'end' `convention`. Values are
  6755. assigned to the last month of the period.
  6756. >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01',
  6757. ... freq='Q',
  6758. ... periods=4))
  6759. >>> q
  6760. 2018Q1 1
  6761. 2018Q2 2
  6762. 2018Q3 3
  6763. 2018Q4 4
  6764. Freq: Q-DEC, dtype: int64
  6765. >>> q.resample('M', convention='end').asfreq()
  6766. 2018-03 1.0
  6767. 2018-04 NaN
  6768. 2018-05 NaN
  6769. 2018-06 2.0
  6770. 2018-07 NaN
  6771. 2018-08 NaN
  6772. 2018-09 3.0
  6773. 2018-10 NaN
  6774. 2018-11 NaN
  6775. 2018-12 4.0
  6776. Freq: M, dtype: float64
  6777. For DataFrame objects, the keyword `on` can be used to specify the
  6778. column instead of the index for resampling.
  6779. >>> d = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19],
  6780. ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]})
  6781. >>> df = pd.DataFrame(d)
  6782. >>> df['week_starting'] = pd.date_range('01/01/2018',
  6783. ... periods=8,
  6784. ... freq='W')
  6785. >>> df
  6786. price volume week_starting
  6787. 0 10 50 2018-01-07
  6788. 1 11 60 2018-01-14
  6789. 2 9 40 2018-01-21
  6790. 3 13 100 2018-01-28
  6791. 4 14 50 2018-02-04
  6792. 5 18 100 2018-02-11
  6793. 6 17 40 2018-02-18
  6794. 7 19 50 2018-02-25
  6795. >>> df.resample('M', on='week_starting').mean()
  6796. price volume
  6797. week_starting
  6798. 2018-01-31 10.75 62.5
  6799. 2018-02-28 17.00 60.0
  6800. For a DataFrame with MultiIndex, the keyword `level` can be used to
  6801. specify on which level the resampling needs to take place.
  6802. >>> days = pd.date_range('1/1/2000', periods=4, freq='D')
  6803. >>> d2 = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19],
  6804. ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]})
  6805. >>> df2 = pd.DataFrame(d2,
  6806. ... index=pd.MultiIndex.from_product([days,
  6807. ... ['morning',
  6808. ... 'afternoon']]
  6809. ... ))
  6810. >>> df2
  6811. price volume
  6812. 2000-01-01 morning 10 50
  6813. afternoon 11 60
  6814. 2000-01-02 morning 9 40
  6815. afternoon 13 100
  6816. 2000-01-03 morning 14 50
  6817. afternoon 18 100
  6818. 2000-01-04 morning 17 40
  6819. afternoon 19 50
  6820. >>> df2.resample('D', level=0).sum()
  6821. price volume
  6822. 2000-01-01 21 110
  6823. 2000-01-02 22 140
  6824. 2000-01-03 32 150
  6825. 2000-01-04 36 90
  6826. """
  6827. from pandas.core.resample import resample
  6828. axis = self._get_axis_number(axis)
  6829. return resample(
  6830. self,
  6831. freq=rule,
  6832. label=label,
  6833. closed=closed,
  6834. axis=axis,
  6835. kind=kind,
  6836. loffset=loffset,
  6837. convention=convention,
  6838. base=base,
  6839. key=on,
  6840. level=level,
  6841. )
  6842. def first(self: FrameOrSeries, offset) -> FrameOrSeries:
  6843. """
  6844. Method to subset initial periods of time series data based on a date offset.
  6845. Parameters
  6846. ----------
  6847. offset : str, DateOffset, dateutil.relativedelta
  6848. Returns
  6849. -------
  6850. subset : same type as caller
  6851. Raises
  6852. ------
  6853. TypeError
  6854. If the index is not a :class:`DatetimeIndex`
  6855. See Also
  6856. --------
  6857. last : Select final periods of time series based on a date offset.
  6858. at_time : Select values at a particular time of the day.
  6859. between_time : Select values between particular times of the day.
  6860. Examples
  6861. --------
  6862. >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
  6863. >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
  6864. >>> ts
  6865. A
  6866. 2018-04-09 1
  6867. 2018-04-11 2
  6868. 2018-04-13 3
  6869. 2018-04-15 4
  6870. Get the rows for the first 3 days:
  6871. >>> ts.first('3D')
  6872. A
  6873. 2018-04-09 1
  6874. 2018-04-11 2
  6875. Notice the data for 3 first calender days were returned, not the first
  6876. 3 days observed in the dataset, and therefore data for 2018-04-13 was
  6877. not returned.
  6878. """
  6879. if not isinstance(self.index, DatetimeIndex):
  6880. raise TypeError("'first' only supports a DatetimeIndex index")
  6881. if len(self.index) == 0:
  6882. return self
  6883. offset = to_offset(offset)
  6884. end_date = end = self.index[0] + offset
  6885. # Tick-like, e.g. 3 weeks
  6886. if not offset.is_anchored() and hasattr(offset, "_inc"):
  6887. if end_date in self.index:
  6888. end = self.index.searchsorted(end_date, side="left")
  6889. return self.iloc[:end]
  6890. return self.loc[:end]
  6891. def last(self: FrameOrSeries, offset) -> FrameOrSeries:
  6892. """
  6893. Method to subset final periods of time series data based on a date offset.
  6894. Parameters
  6895. ----------
  6896. offset : str, DateOffset, dateutil.relativedelta
  6897. Returns
  6898. -------
  6899. subset : same type as caller
  6900. Raises
  6901. ------
  6902. TypeError
  6903. If the index is not a :class:`DatetimeIndex`
  6904. See Also
  6905. --------
  6906. first : Select initial periods of time series based on a date offset.
  6907. at_time : Select values at a particular time of the day.
  6908. between_time : Select values between particular times of the day.
  6909. Examples
  6910. --------
  6911. >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
  6912. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  6913. >>> ts
  6914. A
  6915. 2018-04-09 1
  6916. 2018-04-11 2
  6917. 2018-04-13 3
  6918. 2018-04-15 4
  6919. Get the rows for the last 3 days:
  6920. >>> ts.last('3D')
  6921. A
  6922. 2018-04-13 3
  6923. 2018-04-15 4
  6924. Notice the data for 3 last calender days were returned, not the last
  6925. 3 observed days in the dataset, and therefore data for 2018-04-11 was
  6926. not returned.
  6927. """
  6928. if not isinstance(self.index, DatetimeIndex):
  6929. raise TypeError("'last' only supports a DatetimeIndex index")
  6930. if len(self.index) == 0:
  6931. return self
  6932. offset = to_offset(offset)
  6933. start_date = self.index[-1] - offset
  6934. start = self.index.searchsorted(start_date, side="right")
  6935. return self.iloc[start:]
  6936. def rank(
  6937. self: FrameOrSeries,
  6938. axis=0,
  6939. method: str = "average",
  6940. numeric_only: Optional[bool_t] = None,
  6941. na_option: str = "keep",
  6942. ascending: bool_t = True,
  6943. pct: bool_t = False,
  6944. ) -> FrameOrSeries:
  6945. """
  6946. Compute numerical data ranks (1 through n) along axis.
  6947. By default, equal values are assigned a rank that is the average of the
  6948. ranks of those values.
  6949. Parameters
  6950. ----------
  6951. axis : {0 or 'index', 1 or 'columns'}, default 0
  6952. Index to direct ranking.
  6953. method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
  6954. How to rank the group of records that have the same value (i.e. ties):
  6955. * average: average rank of the group
  6956. * min: lowest rank in the group
  6957. * max: highest rank in the group
  6958. * first: ranks assigned in order they appear in the array
  6959. * dense: like 'min', but rank always increases by 1 between groups.
  6960. numeric_only : bool, optional
  6961. For DataFrame objects, rank only numeric columns if set to True.
  6962. na_option : {'keep', 'top', 'bottom'}, default 'keep'
  6963. How to rank NaN values:
  6964. * keep: assign NaN rank to NaN values
  6965. * top: assign smallest rank to NaN values if ascending
  6966. * bottom: assign highest rank to NaN values if ascending.
  6967. ascending : bool, default True
  6968. Whether or not the elements should be ranked in ascending order.
  6969. pct : bool, default False
  6970. Whether or not to display the returned rankings in percentile
  6971. form.
  6972. Returns
  6973. -------
  6974. same type as caller
  6975. Return a Series or DataFrame with data ranks as values.
  6976. See Also
  6977. --------
  6978. core.groupby.GroupBy.rank : Rank of values within each group.
  6979. Examples
  6980. --------
  6981. >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog',
  6982. ... 'spider', 'snake'],
  6983. ... 'Number_legs': [4, 2, 4, 8, np.nan]})
  6984. >>> df
  6985. Animal Number_legs
  6986. 0 cat 4.0
  6987. 1 penguin 2.0
  6988. 2 dog 4.0
  6989. 3 spider 8.0
  6990. 4 snake NaN
  6991. The following example shows how the method behaves with the above
  6992. parameters:
  6993. * default_rank: this is the default behaviour obtained without using
  6994. any parameter.
  6995. * max_rank: setting ``method = 'max'`` the records that have the
  6996. same values are ranked using the highest rank (e.g.: since 'cat'
  6997. and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.)
  6998. * NA_bottom: choosing ``na_option = 'bottom'``, if there are records
  6999. with NaN values they are placed at the bottom of the ranking.
  7000. * pct_rank: when setting ``pct = True``, the ranking is expressed as
  7001. percentile rank.
  7002. >>> df['default_rank'] = df['Number_legs'].rank()
  7003. >>> df['max_rank'] = df['Number_legs'].rank(method='max')
  7004. >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom')
  7005. >>> df['pct_rank'] = df['Number_legs'].rank(pct=True)
  7006. >>> df
  7007. Animal Number_legs default_rank max_rank NA_bottom pct_rank
  7008. 0 cat 4.0 2.5 3.0 2.5 0.625
  7009. 1 penguin 2.0 1.0 1.0 1.0 0.250
  7010. 2 dog 4.0 2.5 3.0 2.5 0.625
  7011. 3 spider 8.0 4.0 4.0 4.0 1.000
  7012. 4 snake NaN NaN NaN 5.0 NaN
  7013. """
  7014. axis = self._get_axis_number(axis)
  7015. if na_option not in {"keep", "top", "bottom"}:
  7016. msg = "na_option must be one of 'keep', 'top', or 'bottom'"
  7017. raise ValueError(msg)
  7018. def ranker(data):
  7019. ranks = algos.rank(
  7020. data.values,
  7021. axis=axis,
  7022. method=method,
  7023. ascending=ascending,
  7024. na_option=na_option,
  7025. pct=pct,
  7026. )
  7027. ranks = self._constructor(ranks, **data._construct_axes_dict())
  7028. return ranks.__finalize__(self)
  7029. # if numeric_only is None, and we can't get anything, we try with
  7030. # numeric_only=True
  7031. if numeric_only is None:
  7032. try:
  7033. return ranker(self)
  7034. except TypeError:
  7035. numeric_only = True
  7036. if numeric_only:
  7037. data = self._get_numeric_data()
  7038. else:
  7039. data = self
  7040. return ranker(data)
  7041. _shared_docs[
  7042. "align"
  7043. ] = """
  7044. Align two objects on their axes with the specified join method.
  7045. Join method is specified for each axis Index.
  7046. Parameters
  7047. ----------
  7048. other : DataFrame or Series
  7049. join : {'outer', 'inner', 'left', 'right'}, default 'outer'
  7050. axis : allowed axis of the other object, default None
  7051. Align on index (0), columns (1), or both (None).
  7052. level : int or level name, default None
  7053. Broadcast across a level, matching Index values on the
  7054. passed MultiIndex level.
  7055. copy : bool, default True
  7056. Always returns new objects. If copy=False and no reindexing is
  7057. required then original objects are returned.
  7058. fill_value : scalar, default np.NaN
  7059. Value to use for missing values. Defaults to NaN, but can be any
  7060. "compatible" value.
  7061. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
  7062. Method to use for filling holes in reindexed Series:
  7063. - pad / ffill: propagate last valid observation forward to next valid.
  7064. - backfill / bfill: use NEXT valid observation to fill gap.
  7065. limit : int, default None
  7066. If method is specified, this is the maximum number of consecutive
  7067. NaN values to forward/backward fill. In other words, if there is
  7068. a gap with more than this number of consecutive NaNs, it will only
  7069. be partially filled. If method is not specified, this is the
  7070. maximum number of entries along the entire axis where NaNs will be
  7071. filled. Must be greater than 0 if not None.
  7072. fill_axis : %(axes_single_arg)s, default 0
  7073. Filling axis, method and limit.
  7074. broadcast_axis : %(axes_single_arg)s, default None
  7075. Broadcast values along this axis, if aligning two objects of
  7076. different dimensions.
  7077. Returns
  7078. -------
  7079. (left, right) : (%(klass)s, type of other)
  7080. Aligned objects.
  7081. """
  7082. @Appender(_shared_docs["align"] % _shared_doc_kwargs)
  7083. def align(
  7084. self,
  7085. other,
  7086. join="outer",
  7087. axis=None,
  7088. level=None,
  7089. copy=True,
  7090. fill_value=None,
  7091. method=None,
  7092. limit=None,
  7093. fill_axis=0,
  7094. broadcast_axis=None,
  7095. ):
  7096. method = missing.clean_fill_method(method)
  7097. if broadcast_axis == 1 and self.ndim != other.ndim:
  7098. if isinstance(self, ABCSeries):
  7099. # this means other is a DataFrame, and we need to broadcast
  7100. # self
  7101. cons = self._constructor_expanddim
  7102. df = cons(
  7103. {c: self for c in other.columns}, **other._construct_axes_dict()
  7104. )
  7105. return df._align_frame(
  7106. other,
  7107. join=join,
  7108. axis=axis,
  7109. level=level,
  7110. copy=copy,
  7111. fill_value=fill_value,
  7112. method=method,
  7113. limit=limit,
  7114. fill_axis=fill_axis,
  7115. )
  7116. elif isinstance(other, ABCSeries):
  7117. # this means self is a DataFrame, and we need to broadcast
  7118. # other
  7119. cons = other._constructor_expanddim
  7120. df = cons(
  7121. {c: other for c in self.columns}, **self._construct_axes_dict()
  7122. )
  7123. return self._align_frame(
  7124. df,
  7125. join=join,
  7126. axis=axis,
  7127. level=level,
  7128. copy=copy,
  7129. fill_value=fill_value,
  7130. method=method,
  7131. limit=limit,
  7132. fill_axis=fill_axis,
  7133. )
  7134. if axis is not None:
  7135. axis = self._get_axis_number(axis)
  7136. if isinstance(other, ABCDataFrame):
  7137. return self._align_frame(
  7138. other,
  7139. join=join,
  7140. axis=axis,
  7141. level=level,
  7142. copy=copy,
  7143. fill_value=fill_value,
  7144. method=method,
  7145. limit=limit,
  7146. fill_axis=fill_axis,
  7147. )
  7148. elif isinstance(other, ABCSeries):
  7149. return self._align_series(
  7150. other,
  7151. join=join,
  7152. axis=axis,
  7153. level=level,
  7154. copy=copy,
  7155. fill_value=fill_value,
  7156. method=method,
  7157. limit=limit,
  7158. fill_axis=fill_axis,
  7159. )
  7160. else: # pragma: no cover
  7161. raise TypeError(f"unsupported type: {type(other)}")
  7162. def _align_frame(
  7163. self,
  7164. other,
  7165. join="outer",
  7166. axis=None,
  7167. level=None,
  7168. copy: bool_t = True,
  7169. fill_value=None,
  7170. method=None,
  7171. limit=None,
  7172. fill_axis=0,
  7173. ):
  7174. # defaults
  7175. join_index, join_columns = None, None
  7176. ilidx, iridx = None, None
  7177. clidx, cridx = None, None
  7178. is_series = isinstance(self, ABCSeries)
  7179. if axis is None or axis == 0:
  7180. if not self.index.equals(other.index):
  7181. join_index, ilidx, iridx = self.index.join(
  7182. other.index, how=join, level=level, return_indexers=True
  7183. )
  7184. if axis is None or axis == 1:
  7185. if not is_series and not self.columns.equals(other.columns):
  7186. join_columns, clidx, cridx = self.columns.join(
  7187. other.columns, how=join, level=level, return_indexers=True
  7188. )
  7189. if is_series:
  7190. reindexers = {0: [join_index, ilidx]}
  7191. else:
  7192. reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]}
  7193. left = self._reindex_with_indexers(
  7194. reindexers, copy=copy, fill_value=fill_value, allow_dups=True
  7195. )
  7196. # other must be always DataFrame
  7197. right = other._reindex_with_indexers(
  7198. {0: [join_index, iridx], 1: [join_columns, cridx]},
  7199. copy=copy,
  7200. fill_value=fill_value,
  7201. allow_dups=True,
  7202. )
  7203. if method is not None:
  7204. _left = left.fillna(method=method, axis=fill_axis, limit=limit)
  7205. assert _left is not None # needed for mypy
  7206. left = _left
  7207. right = right.fillna(method=method, axis=fill_axis, limit=limit)
  7208. # if DatetimeIndex have different tz, convert to UTC
  7209. if is_datetime64tz_dtype(left.index):
  7210. if left.index.tz != right.index.tz:
  7211. if join_index is not None:
  7212. left.index = join_index
  7213. right.index = join_index
  7214. return left.__finalize__(self), right.__finalize__(other)
  7215. def _align_series(
  7216. self,
  7217. other,
  7218. join="outer",
  7219. axis=None,
  7220. level=None,
  7221. copy: bool_t = True,
  7222. fill_value=None,
  7223. method=None,
  7224. limit=None,
  7225. fill_axis=0,
  7226. ):
  7227. is_series = isinstance(self, ABCSeries)
  7228. # series/series compat, other must always be a Series
  7229. if is_series:
  7230. if axis:
  7231. raise ValueError("cannot align series to a series other than axis 0")
  7232. # equal
  7233. if self.index.equals(other.index):
  7234. join_index, lidx, ridx = None, None, None
  7235. else:
  7236. join_index, lidx, ridx = self.index.join(
  7237. other.index, how=join, level=level, return_indexers=True
  7238. )
  7239. left = self._reindex_indexer(join_index, lidx, copy)
  7240. right = other._reindex_indexer(join_index, ridx, copy)
  7241. else:
  7242. # one has > 1 ndim
  7243. fdata = self._data
  7244. if axis == 0:
  7245. join_index = self.index
  7246. lidx, ridx = None, None
  7247. if not self.index.equals(other.index):
  7248. join_index, lidx, ridx = self.index.join(
  7249. other.index, how=join, level=level, return_indexers=True
  7250. )
  7251. if lidx is not None:
  7252. fdata = fdata.reindex_indexer(join_index, lidx, axis=1)
  7253. elif axis == 1:
  7254. join_index = self.columns
  7255. lidx, ridx = None, None
  7256. if not self.columns.equals(other.index):
  7257. join_index, lidx, ridx = self.columns.join(
  7258. other.index, how=join, level=level, return_indexers=True
  7259. )
  7260. if lidx is not None:
  7261. fdata = fdata.reindex_indexer(join_index, lidx, axis=0)
  7262. else:
  7263. raise ValueError("Must specify axis=0 or 1")
  7264. if copy and fdata is self._data:
  7265. fdata = fdata.copy()
  7266. left = self._constructor(fdata)
  7267. if ridx is None:
  7268. right = other
  7269. else:
  7270. right = other.reindex(join_index, level=level)
  7271. # fill
  7272. fill_na = notna(fill_value) or (method is not None)
  7273. if fill_na:
  7274. left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis)
  7275. right = right.fillna(fill_value, method=method, limit=limit)
  7276. # if DatetimeIndex have different tz, convert to UTC
  7277. if is_series or (not is_series and axis == 0):
  7278. if is_datetime64tz_dtype(left.index):
  7279. if left.index.tz != right.index.tz:
  7280. if join_index is not None:
  7281. left.index = join_index
  7282. right.index = join_index
  7283. return left.__finalize__(self), right.__finalize__(other)
  7284. def _where(
  7285. self,
  7286. cond,
  7287. other=np.nan,
  7288. inplace=False,
  7289. axis=None,
  7290. level=None,
  7291. errors="raise",
  7292. try_cast=False,
  7293. ):
  7294. """
  7295. Equivalent to public method `where`, except that `other` is not
  7296. applied as a function even if callable. Used in __setitem__.
  7297. """
  7298. inplace = validate_bool_kwarg(inplace, "inplace")
  7299. # align the cond to same shape as myself
  7300. cond = com.apply_if_callable(cond, self)
  7301. if isinstance(cond, NDFrame):
  7302. cond, _ = cond.align(self, join="right", broadcast_axis=1)
  7303. else:
  7304. if not hasattr(cond, "shape"):
  7305. cond = np.asanyarray(cond)
  7306. if cond.shape != self.shape:
  7307. raise ValueError("Array conditional must be same shape as self")
  7308. cond = self._constructor(cond, **self._construct_axes_dict())
  7309. # make sure we are boolean
  7310. fill_value = bool(inplace)
  7311. cond = cond.fillna(fill_value)
  7312. msg = "Boolean array expected for the condition, not {dtype}"
  7313. if not isinstance(cond, ABCDataFrame):
  7314. # This is a single-dimensional object.
  7315. if not is_bool_dtype(cond):
  7316. raise ValueError(msg.format(dtype=cond.dtype))
  7317. elif not cond.empty:
  7318. for dt in cond.dtypes:
  7319. if not is_bool_dtype(dt):
  7320. raise ValueError(msg.format(dtype=dt))
  7321. cond = -cond if inplace else cond
  7322. # try to align with other
  7323. try_quick = True
  7324. if hasattr(other, "align"):
  7325. # align with me
  7326. if other.ndim <= self.ndim:
  7327. _, other = self.align(
  7328. other, join="left", axis=axis, level=level, fill_value=np.nan
  7329. )
  7330. # if we are NOT aligned, raise as we cannot where index
  7331. if axis is None and not all(
  7332. other._get_axis(i).equals(ax) for i, ax in enumerate(self.axes)
  7333. ):
  7334. raise InvalidIndexError
  7335. # slice me out of the other
  7336. else:
  7337. raise NotImplementedError(
  7338. "cannot align with a higher dimensional NDFrame"
  7339. )
  7340. if isinstance(other, np.ndarray):
  7341. if other.shape != self.shape:
  7342. if self.ndim == 1:
  7343. icond = cond.values
  7344. # GH 2745 / GH 4192
  7345. # treat like a scalar
  7346. if len(other) == 1:
  7347. other = np.array(other[0])
  7348. # GH 3235
  7349. # match True cond to other
  7350. elif len(cond[icond]) == len(other):
  7351. # try to not change dtype at first (if try_quick)
  7352. if try_quick:
  7353. new_other = com.values_from_object(self)
  7354. new_other = new_other.copy()
  7355. new_other[icond] = other
  7356. other = new_other
  7357. else:
  7358. raise ValueError(
  7359. "Length of replacements must equal series length"
  7360. )
  7361. else:
  7362. raise ValueError(
  7363. "other must be the same shape as self when an ndarray"
  7364. )
  7365. # we are the same shape, so create an actual object for alignment
  7366. else:
  7367. other = self._constructor(other, **self._construct_axes_dict())
  7368. if axis is None:
  7369. axis = 0
  7370. if self.ndim == getattr(other, "ndim", 0):
  7371. align = True
  7372. else:
  7373. align = self._get_axis_number(axis) == 1
  7374. block_axis = self._get_block_manager_axis(axis)
  7375. if inplace:
  7376. # we may have different type blocks come out of putmask, so
  7377. # reconstruct the block manager
  7378. self._check_inplace_setting(other)
  7379. new_data = self._data.putmask(
  7380. mask=cond,
  7381. new=other,
  7382. align=align,
  7383. inplace=True,
  7384. axis=block_axis,
  7385. transpose=self._AXIS_REVERSED,
  7386. )
  7387. self._update_inplace(new_data)
  7388. else:
  7389. new_data = self._data.where(
  7390. other=other,
  7391. cond=cond,
  7392. align=align,
  7393. errors=errors,
  7394. try_cast=try_cast,
  7395. axis=block_axis,
  7396. )
  7397. return self._constructor(new_data).__finalize__(self)
  7398. _shared_docs[
  7399. "where"
  7400. ] = """
  7401. Replace values where the condition is %(cond_rev)s.
  7402. Parameters
  7403. ----------
  7404. cond : bool %(klass)s, array-like, or callable
  7405. Where `cond` is %(cond)s, keep the original value. Where
  7406. %(cond_rev)s, replace with corresponding value from `other`.
  7407. If `cond` is callable, it is computed on the %(klass)s and
  7408. should return boolean %(klass)s or array. The callable must
  7409. not change input %(klass)s (though pandas doesn't check it).
  7410. other : scalar, %(klass)s, or callable
  7411. Entries where `cond` is %(cond_rev)s are replaced with
  7412. corresponding value from `other`.
  7413. If other is callable, it is computed on the %(klass)s and
  7414. should return scalar or %(klass)s. The callable must not
  7415. change input %(klass)s (though pandas doesn't check it).
  7416. inplace : bool, default False
  7417. Whether to perform the operation in place on the data.
  7418. axis : int, default None
  7419. Alignment axis if needed.
  7420. level : int, default None
  7421. Alignment level if needed.
  7422. errors : str, {'raise', 'ignore'}, default 'raise'
  7423. Note that currently this parameter won't affect
  7424. the results and will always coerce to a suitable dtype.
  7425. - 'raise' : allow exceptions to be raised.
  7426. - 'ignore' : suppress exceptions. On error return original object.
  7427. try_cast : bool, default False
  7428. Try to cast the result back to the input type (if possible).
  7429. Returns
  7430. -------
  7431. Same type as caller
  7432. See Also
  7433. --------
  7434. :func:`DataFrame.%(name_other)s` : Return an object of same shape as
  7435. self.
  7436. Notes
  7437. -----
  7438. The %(name)s method is an application of the if-then idiom. For each
  7439. element in the calling DataFrame, if ``cond`` is ``%(cond)s`` the
  7440. element is used; otherwise the corresponding element from the DataFrame
  7441. ``other`` is used.
  7442. The signature for :func:`DataFrame.where` differs from
  7443. :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to
  7444. ``np.where(m, df1, df2)``.
  7445. For further details and examples see the ``%(name)s`` documentation in
  7446. :ref:`indexing <indexing.where_mask>`.
  7447. Examples
  7448. --------
  7449. >>> s = pd.Series(range(5))
  7450. >>> s.where(s > 0)
  7451. 0 NaN
  7452. 1 1.0
  7453. 2 2.0
  7454. 3 3.0
  7455. 4 4.0
  7456. dtype: float64
  7457. >>> s.mask(s > 0)
  7458. 0 0.0
  7459. 1 NaN
  7460. 2 NaN
  7461. 3 NaN
  7462. 4 NaN
  7463. dtype: float64
  7464. >>> s.where(s > 1, 10)
  7465. 0 10
  7466. 1 10
  7467. 2 2
  7468. 3 3
  7469. 4 4
  7470. dtype: int64
  7471. >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
  7472. >>> df
  7473. A B
  7474. 0 0 1
  7475. 1 2 3
  7476. 2 4 5
  7477. 3 6 7
  7478. 4 8 9
  7479. >>> m = df %% 3 == 0
  7480. >>> df.where(m, -df)
  7481. A B
  7482. 0 0 -1
  7483. 1 -2 3
  7484. 2 -4 -5
  7485. 3 6 -7
  7486. 4 -8 9
  7487. >>> df.where(m, -df) == np.where(m, df, -df)
  7488. A B
  7489. 0 True True
  7490. 1 True True
  7491. 2 True True
  7492. 3 True True
  7493. 4 True True
  7494. >>> df.where(m, -df) == df.mask(~m, -df)
  7495. A B
  7496. 0 True True
  7497. 1 True True
  7498. 2 True True
  7499. 3 True True
  7500. 4 True True
  7501. """
  7502. @Appender(
  7503. _shared_docs["where"]
  7504. % dict(
  7505. _shared_doc_kwargs,
  7506. cond="True",
  7507. cond_rev="False",
  7508. name="where",
  7509. name_other="mask",
  7510. )
  7511. )
  7512. def where(
  7513. self,
  7514. cond,
  7515. other=np.nan,
  7516. inplace=False,
  7517. axis=None,
  7518. level=None,
  7519. errors="raise",
  7520. try_cast=False,
  7521. ):
  7522. other = com.apply_if_callable(other, self)
  7523. return self._where(
  7524. cond, other, inplace, axis, level, errors=errors, try_cast=try_cast
  7525. )
  7526. @Appender(
  7527. _shared_docs["where"]
  7528. % dict(
  7529. _shared_doc_kwargs,
  7530. cond="False",
  7531. cond_rev="True",
  7532. name="mask",
  7533. name_other="where",
  7534. )
  7535. )
  7536. def mask(
  7537. self,
  7538. cond,
  7539. other=np.nan,
  7540. inplace=False,
  7541. axis=None,
  7542. level=None,
  7543. errors="raise",
  7544. try_cast=False,
  7545. ):
  7546. inplace = validate_bool_kwarg(inplace, "inplace")
  7547. cond = com.apply_if_callable(cond, self)
  7548. # see gh-21891
  7549. if not hasattr(cond, "__invert__"):
  7550. cond = np.array(cond)
  7551. return self.where(
  7552. ~cond,
  7553. other=other,
  7554. inplace=inplace,
  7555. axis=axis,
  7556. level=level,
  7557. try_cast=try_cast,
  7558. errors=errors,
  7559. )
  7560. _shared_docs[
  7561. "shift"
  7562. ] = """
  7563. Shift index by desired number of periods with an optional time `freq`.
  7564. When `freq` is not passed, shift the index without realigning the data.
  7565. If `freq` is passed (in this case, the index must be date or datetime,
  7566. or it will raise a `NotImplementedError`), the index will be
  7567. increased using the periods and the `freq`.
  7568. Parameters
  7569. ----------
  7570. periods : int
  7571. Number of periods to shift. Can be positive or negative.
  7572. freq : DateOffset, tseries.offsets, timedelta, or str, optional
  7573. Offset to use from the tseries module or time rule (e.g. 'EOM').
  7574. If `freq` is specified then the index values are shifted but the
  7575. data is not realigned. That is, use `freq` if you would like to
  7576. extend the index when shifting and preserve the original data.
  7577. axis : {0 or 'index', 1 or 'columns', None}, default None
  7578. Shift direction.
  7579. fill_value : object, optional
  7580. The scalar value to use for newly introduced missing values.
  7581. the default depends on the dtype of `self`.
  7582. For numeric data, ``np.nan`` is used.
  7583. For datetime, timedelta, or period data, etc. :attr:`NaT` is used.
  7584. For extension dtypes, ``self.dtype.na_value`` is used.
  7585. .. versionchanged:: 0.24.0
  7586. Returns
  7587. -------
  7588. %(klass)s
  7589. Copy of input object, shifted.
  7590. See Also
  7591. --------
  7592. Index.shift : Shift values of Index.
  7593. DatetimeIndex.shift : Shift values of DatetimeIndex.
  7594. PeriodIndex.shift : Shift values of PeriodIndex.
  7595. tshift : Shift the time index, using the index's frequency if
  7596. available.
  7597. Examples
  7598. --------
  7599. >>> df = pd.DataFrame({'Col1': [10, 20, 15, 30, 45],
  7600. ... 'Col2': [13, 23, 18, 33, 48],
  7601. ... 'Col3': [17, 27, 22, 37, 52]})
  7602. >>> df.shift(periods=3)
  7603. Col1 Col2 Col3
  7604. 0 NaN NaN NaN
  7605. 1 NaN NaN NaN
  7606. 2 NaN NaN NaN
  7607. 3 10.0 13.0 17.0
  7608. 4 20.0 23.0 27.0
  7609. >>> df.shift(periods=1, axis='columns')
  7610. Col1 Col2 Col3
  7611. 0 NaN 10.0 13.0
  7612. 1 NaN 20.0 23.0
  7613. 2 NaN 15.0 18.0
  7614. 3 NaN 30.0 33.0
  7615. 4 NaN 45.0 48.0
  7616. >>> df.shift(periods=3, fill_value=0)
  7617. Col1 Col2 Col3
  7618. 0 0 0 0
  7619. 1 0 0 0
  7620. 2 0 0 0
  7621. 3 10 13 17
  7622. 4 20 23 27
  7623. """
  7624. @Appender(_shared_docs["shift"] % _shared_doc_kwargs)
  7625. def shift(
  7626. self: FrameOrSeries, periods=1, freq=None, axis=0, fill_value=None
  7627. ) -> FrameOrSeries:
  7628. if periods == 0:
  7629. return self.copy()
  7630. block_axis = self._get_block_manager_axis(axis)
  7631. if freq is None:
  7632. new_data = self._data.shift(
  7633. periods=periods, axis=block_axis, fill_value=fill_value
  7634. )
  7635. else:
  7636. return self.tshift(periods, freq)
  7637. return self._constructor(new_data).__finalize__(self)
  7638. def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries:
  7639. """
  7640. Equivalent to `shift` without copying data.
  7641. The shifted data will not include the dropped periods and the
  7642. shifted axis will be smaller than the original.
  7643. Parameters
  7644. ----------
  7645. periods : int
  7646. Number of periods to move, can be positive or negative.
  7647. Returns
  7648. -------
  7649. shifted : same type as caller
  7650. Notes
  7651. -----
  7652. While the `slice_shift` is faster than `shift`, you may pay for it
  7653. later during alignment.
  7654. """
  7655. if periods == 0:
  7656. return self
  7657. if periods > 0:
  7658. vslicer = slice(None, -periods)
  7659. islicer = slice(periods, None)
  7660. else:
  7661. vslicer = slice(-periods, None)
  7662. islicer = slice(None, periods)
  7663. new_obj = self._slice(vslicer, axis=axis)
  7664. shifted_axis = self._get_axis(axis)[islicer]
  7665. new_obj.set_axis(shifted_axis, axis=axis, inplace=True)
  7666. return new_obj.__finalize__(self)
  7667. def tshift(
  7668. self: FrameOrSeries, periods: int = 1, freq=None, axis=0
  7669. ) -> FrameOrSeries:
  7670. """
  7671. Shift the time index, using the index's frequency if available.
  7672. Parameters
  7673. ----------
  7674. periods : int
  7675. Number of periods to move, can be positive or negative.
  7676. freq : DateOffset, timedelta, or str, default None
  7677. Increment to use from the tseries module
  7678. or time rule expressed as a string (e.g. 'EOM').
  7679. axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0
  7680. Corresponds to the axis that contains the Index.
  7681. Returns
  7682. -------
  7683. shifted : Series/DataFrame
  7684. Notes
  7685. -----
  7686. If freq is not specified then tries to use the freq or inferred_freq
  7687. attributes of the index. If neither of those attributes exist, a
  7688. ValueError is thrown
  7689. """
  7690. index = self._get_axis(axis)
  7691. if freq is None:
  7692. freq = getattr(index, "freq", None)
  7693. if freq is None:
  7694. freq = getattr(index, "inferred_freq", None)
  7695. if freq is None:
  7696. msg = "Freq was not given and was not set in the index"
  7697. raise ValueError(msg)
  7698. if periods == 0:
  7699. return self
  7700. if isinstance(freq, str):
  7701. freq = to_offset(freq)
  7702. block_axis = self._get_block_manager_axis(axis)
  7703. if isinstance(index, PeriodIndex):
  7704. orig_freq = to_offset(index.freq)
  7705. if freq == orig_freq:
  7706. new_data = self._data.copy()
  7707. new_data.axes[block_axis] = index.shift(periods)
  7708. elif orig_freq is not None:
  7709. msg = (
  7710. f"Given freq {freq.rule_code} does not match"
  7711. f" PeriodIndex freq {orig_freq.rule_code}"
  7712. )
  7713. raise ValueError(msg)
  7714. else:
  7715. new_data = self._data.copy()
  7716. new_data.axes[block_axis] = index.shift(periods, freq)
  7717. return self._constructor(new_data).__finalize__(self)
  7718. def truncate(
  7719. self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True
  7720. ) -> FrameOrSeries:
  7721. """
  7722. Truncate a Series or DataFrame before and after some index value.
  7723. This is a useful shorthand for boolean indexing based on index
  7724. values above or below certain thresholds.
  7725. Parameters
  7726. ----------
  7727. before : date, str, int
  7728. Truncate all rows before this index value.
  7729. after : date, str, int
  7730. Truncate all rows after this index value.
  7731. axis : {0 or 'index', 1 or 'columns'}, optional
  7732. Axis to truncate. Truncates the index (rows) by default.
  7733. copy : bool, default is True,
  7734. Return a copy of the truncated section.
  7735. Returns
  7736. -------
  7737. type of caller
  7738. The truncated Series or DataFrame.
  7739. See Also
  7740. --------
  7741. DataFrame.loc : Select a subset of a DataFrame by label.
  7742. DataFrame.iloc : Select a subset of a DataFrame by position.
  7743. Notes
  7744. -----
  7745. If the index being truncated contains only datetime values,
  7746. `before` and `after` may be specified as strings instead of
  7747. Timestamps.
  7748. Examples
  7749. --------
  7750. >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],
  7751. ... 'B': ['f', 'g', 'h', 'i', 'j'],
  7752. ... 'C': ['k', 'l', 'm', 'n', 'o']},
  7753. ... index=[1, 2, 3, 4, 5])
  7754. >>> df
  7755. A B C
  7756. 1 a f k
  7757. 2 b g l
  7758. 3 c h m
  7759. 4 d i n
  7760. 5 e j o
  7761. >>> df.truncate(before=2, after=4)
  7762. A B C
  7763. 2 b g l
  7764. 3 c h m
  7765. 4 d i n
  7766. The columns of a DataFrame can be truncated.
  7767. >>> df.truncate(before="A", after="B", axis="columns")
  7768. A B
  7769. 1 a f
  7770. 2 b g
  7771. 3 c h
  7772. 4 d i
  7773. 5 e j
  7774. For Series, only rows can be truncated.
  7775. >>> df['A'].truncate(before=2, after=4)
  7776. 2 b
  7777. 3 c
  7778. 4 d
  7779. Name: A, dtype: object
  7780. The index values in ``truncate`` can be datetimes or string
  7781. dates.
  7782. >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s')
  7783. >>> df = pd.DataFrame(index=dates, data={'A': 1})
  7784. >>> df.tail()
  7785. A
  7786. 2016-01-31 23:59:56 1
  7787. 2016-01-31 23:59:57 1
  7788. 2016-01-31 23:59:58 1
  7789. 2016-01-31 23:59:59 1
  7790. 2016-02-01 00:00:00 1
  7791. >>> df.truncate(before=pd.Timestamp('2016-01-05'),
  7792. ... after=pd.Timestamp('2016-01-10')).tail()
  7793. A
  7794. 2016-01-09 23:59:56 1
  7795. 2016-01-09 23:59:57 1
  7796. 2016-01-09 23:59:58 1
  7797. 2016-01-09 23:59:59 1
  7798. 2016-01-10 00:00:00 1
  7799. Because the index is a DatetimeIndex containing only dates, we can
  7800. specify `before` and `after` as strings. They will be coerced to
  7801. Timestamps before truncation.
  7802. >>> df.truncate('2016-01-05', '2016-01-10').tail()
  7803. A
  7804. 2016-01-09 23:59:56 1
  7805. 2016-01-09 23:59:57 1
  7806. 2016-01-09 23:59:58 1
  7807. 2016-01-09 23:59:59 1
  7808. 2016-01-10 00:00:00 1
  7809. Note that ``truncate`` assumes a 0 value for any unspecified time
  7810. component (midnight). This differs from partial string slicing, which
  7811. returns any partially matching dates.
  7812. >>> df.loc['2016-01-05':'2016-01-10', :].tail()
  7813. A
  7814. 2016-01-10 23:59:55 1
  7815. 2016-01-10 23:59:56 1
  7816. 2016-01-10 23:59:57 1
  7817. 2016-01-10 23:59:58 1
  7818. 2016-01-10 23:59:59 1
  7819. """
  7820. if axis is None:
  7821. axis = self._stat_axis_number
  7822. axis = self._get_axis_number(axis)
  7823. ax = self._get_axis(axis)
  7824. # GH 17935
  7825. # Check that index is sorted
  7826. if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing:
  7827. raise ValueError("truncate requires a sorted index")
  7828. # if we have a date index, convert to dates, otherwise
  7829. # treat like a slice
  7830. if ax.is_all_dates:
  7831. from pandas.core.tools.datetimes import to_datetime
  7832. before = to_datetime(before)
  7833. after = to_datetime(after)
  7834. if before is not None and after is not None:
  7835. if before > after:
  7836. raise ValueError(f"Truncate: {after} must be after {before}")
  7837. slicer = [slice(None, None)] * self._AXIS_LEN
  7838. slicer[axis] = slice(before, after)
  7839. result = self.loc[tuple(slicer)]
  7840. if isinstance(ax, MultiIndex):
  7841. setattr(result, self._get_axis_name(axis), ax.truncate(before, after))
  7842. if copy:
  7843. result = result.copy()
  7844. return result
  7845. def tz_convert(
  7846. self: FrameOrSeries, tz, axis=0, level=None, copy: bool_t = True
  7847. ) -> FrameOrSeries:
  7848. """
  7849. Convert tz-aware axis to target time zone.
  7850. Parameters
  7851. ----------
  7852. tz : str or tzinfo object
  7853. axis : the axis to convert
  7854. level : int, str, default None
  7855. If axis is a MultiIndex, convert a specific level. Otherwise
  7856. must be None.
  7857. copy : bool, default True
  7858. Also make a copy of the underlying data.
  7859. Returns
  7860. -------
  7861. %(klass)s
  7862. Object with time zone converted axis.
  7863. Raises
  7864. ------
  7865. TypeError
  7866. If the axis is tz-naive.
  7867. """
  7868. axis = self._get_axis_number(axis)
  7869. ax = self._get_axis(axis)
  7870. def _tz_convert(ax, tz):
  7871. if not hasattr(ax, "tz_convert"):
  7872. if len(ax) > 0:
  7873. ax_name = self._get_axis_name(axis)
  7874. raise TypeError(
  7875. f"{ax_name} is not a valid DatetimeIndex or PeriodIndex"
  7876. )
  7877. else:
  7878. ax = DatetimeIndex([], tz=tz)
  7879. else:
  7880. ax = ax.tz_convert(tz)
  7881. return ax
  7882. # if a level is given it must be a MultiIndex level or
  7883. # equivalent to the axis name
  7884. if isinstance(ax, MultiIndex):
  7885. level = ax._get_level_number(level)
  7886. new_level = _tz_convert(ax.levels[level], tz)
  7887. ax = ax.set_levels(new_level, level=level)
  7888. else:
  7889. if level not in (None, 0, ax.name):
  7890. raise ValueError(f"The level {level} is not valid")
  7891. ax = _tz_convert(ax, tz)
  7892. result = self._constructor(self._data, copy=copy)
  7893. result = result.set_axis(ax, axis=axis, inplace=False)
  7894. return result.__finalize__(self)
  7895. def tz_localize(
  7896. self: FrameOrSeries,
  7897. tz,
  7898. axis=0,
  7899. level=None,
  7900. copy: bool_t = True,
  7901. ambiguous="raise",
  7902. nonexistent: str = "raise",
  7903. ) -> FrameOrSeries:
  7904. """
  7905. Localize tz-naive index of a Series or DataFrame to target time zone.
  7906. This operation localizes the Index. To localize the values in a
  7907. timezone-naive Series, use :meth:`Series.dt.tz_localize`.
  7908. Parameters
  7909. ----------
  7910. tz : str or tzinfo
  7911. axis : the axis to localize
  7912. level : int, str, default None
  7913. If axis ia a MultiIndex, localize a specific level. Otherwise
  7914. must be None.
  7915. copy : bool, default True
  7916. Also make a copy of the underlying data.
  7917. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
  7918. When clocks moved backward due to DST, ambiguous times may arise.
  7919. For example in Central European Time (UTC+01), when going from
  7920. 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
  7921. 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
  7922. `ambiguous` parameter dictates how ambiguous times should be
  7923. handled.
  7924. - 'infer' will attempt to infer fall dst-transition hours based on
  7925. order
  7926. - bool-ndarray where True signifies a DST time, False designates
  7927. a non-DST time (note that this flag is only applicable for
  7928. ambiguous times)
  7929. - 'NaT' will return NaT where there are ambiguous times
  7930. - 'raise' will raise an AmbiguousTimeError if there are ambiguous
  7931. times.
  7932. nonexistent : str, default 'raise'
  7933. A nonexistent time does not exist in a particular timezone
  7934. where clocks moved forward due to DST. Valid values are:
  7935. - 'shift_forward' will shift the nonexistent time forward to the
  7936. closest existing time
  7937. - 'shift_backward' will shift the nonexistent time backward to the
  7938. closest existing time
  7939. - 'NaT' will return NaT where there are nonexistent times
  7940. - timedelta objects will shift nonexistent times by the timedelta
  7941. - 'raise' will raise an NonExistentTimeError if there are
  7942. nonexistent times.
  7943. .. versionadded:: 0.24.0
  7944. Returns
  7945. -------
  7946. Series or DataFrame
  7947. Same type as the input.
  7948. Raises
  7949. ------
  7950. TypeError
  7951. If the TimeSeries is tz-aware and tz is not None.
  7952. Examples
  7953. --------
  7954. Localize local times:
  7955. >>> s = pd.Series([1],
  7956. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00']))
  7957. >>> s.tz_localize('CET')
  7958. 2018-09-15 01:30:00+02:00 1
  7959. dtype: int64
  7960. Be careful with DST changes. When there is sequential data, pandas
  7961. can infer the DST time:
  7962. >>> s = pd.Series(range(7),
  7963. ... index=pd.DatetimeIndex(['2018-10-28 01:30:00',
  7964. ... '2018-10-28 02:00:00',
  7965. ... '2018-10-28 02:30:00',
  7966. ... '2018-10-28 02:00:00',
  7967. ... '2018-10-28 02:30:00',
  7968. ... '2018-10-28 03:00:00',
  7969. ... '2018-10-28 03:30:00']))
  7970. >>> s.tz_localize('CET', ambiguous='infer')
  7971. 2018-10-28 01:30:00+02:00 0
  7972. 2018-10-28 02:00:00+02:00 1
  7973. 2018-10-28 02:30:00+02:00 2
  7974. 2018-10-28 02:00:00+01:00 3
  7975. 2018-10-28 02:30:00+01:00 4
  7976. 2018-10-28 03:00:00+01:00 5
  7977. 2018-10-28 03:30:00+01:00 6
  7978. dtype: int64
  7979. In some cases, inferring the DST is impossible. In such cases, you can
  7980. pass an ndarray to the ambiguous parameter to set the DST explicitly
  7981. >>> s = pd.Series(range(3),
  7982. ... index=pd.DatetimeIndex(['2018-10-28 01:20:00',
  7983. ... '2018-10-28 02:36:00',
  7984. ... '2018-10-28 03:46:00']))
  7985. >>> s.tz_localize('CET', ambiguous=np.array([True, True, False]))
  7986. 2018-10-28 01:20:00+02:00 0
  7987. 2018-10-28 02:36:00+02:00 1
  7988. 2018-10-28 03:46:00+01:00 2
  7989. dtype: int64
  7990. If the DST transition causes nonexistent times, you can shift these
  7991. dates forward or backwards with a timedelta object or `'shift_forward'`
  7992. or `'shift_backwards'`.
  7993. >>> s = pd.Series(range(2),
  7994. ... index=pd.DatetimeIndex(['2015-03-29 02:30:00',
  7995. ... '2015-03-29 03:30:00']))
  7996. >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
  7997. 2015-03-29 03:00:00+02:00 0
  7998. 2015-03-29 03:30:00+02:00 1
  7999. dtype: int64
  8000. >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
  8001. 2015-03-29 01:59:59.999999999+01:00 0
  8002. 2015-03-29 03:30:00+02:00 1
  8003. dtype: int64
  8004. >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))
  8005. 2015-03-29 03:30:00+02:00 0
  8006. 2015-03-29 03:30:00+02:00 1
  8007. dtype: int64
  8008. """
  8009. nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
  8010. if nonexistent not in nonexistent_options and not isinstance(
  8011. nonexistent, timedelta
  8012. ):
  8013. raise ValueError(
  8014. "The nonexistent argument must be one of 'raise', "
  8015. "'NaT', 'shift_forward', 'shift_backward' or "
  8016. "a timedelta object"
  8017. )
  8018. axis = self._get_axis_number(axis)
  8019. ax = self._get_axis(axis)
  8020. def _tz_localize(ax, tz, ambiguous, nonexistent):
  8021. if not hasattr(ax, "tz_localize"):
  8022. if len(ax) > 0:
  8023. ax_name = self._get_axis_name(axis)
  8024. raise TypeError(
  8025. f"{ax_name} is not a valid DatetimeIndex or PeriodIndex"
  8026. )
  8027. else:
  8028. ax = DatetimeIndex([], tz=tz)
  8029. else:
  8030. ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent)
  8031. return ax
  8032. # if a level is given it must be a MultiIndex level or
  8033. # equivalent to the axis name
  8034. if isinstance(ax, MultiIndex):
  8035. level = ax._get_level_number(level)
  8036. new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent)
  8037. ax = ax.set_levels(new_level, level=level)
  8038. else:
  8039. if level not in (None, 0, ax.name):
  8040. raise ValueError(f"The level {level} is not valid")
  8041. ax = _tz_localize(ax, tz, ambiguous, nonexistent)
  8042. result = self._constructor(self._data, copy=copy)
  8043. result = result.set_axis(ax, axis=axis, inplace=False)
  8044. return result.__finalize__(self)
  8045. # ----------------------------------------------------------------------
  8046. # Numeric Methods
  8047. def abs(self: FrameOrSeries) -> FrameOrSeries:
  8048. """
  8049. Return a Series/DataFrame with absolute numeric value of each element.
  8050. This function only applies to elements that are all numeric.
  8051. Returns
  8052. -------
  8053. abs
  8054. Series/DataFrame containing the absolute value of each element.
  8055. See Also
  8056. --------
  8057. numpy.absolute : Calculate the absolute value element-wise.
  8058. Notes
  8059. -----
  8060. For ``complex`` inputs, ``1.2 + 1j``, the absolute value is
  8061. :math:`\\sqrt{ a^2 + b^2 }`.
  8062. Examples
  8063. --------
  8064. Absolute numeric values in a Series.
  8065. >>> s = pd.Series([-1.10, 2, -3.33, 4])
  8066. >>> s.abs()
  8067. 0 1.10
  8068. 1 2.00
  8069. 2 3.33
  8070. 3 4.00
  8071. dtype: float64
  8072. Absolute numeric values in a Series with complex numbers.
  8073. >>> s = pd.Series([1.2 + 1j])
  8074. >>> s.abs()
  8075. 0 1.56205
  8076. dtype: float64
  8077. Absolute numeric values in a Series with a Timedelta element.
  8078. >>> s = pd.Series([pd.Timedelta('1 days')])
  8079. >>> s.abs()
  8080. 0 1 days
  8081. dtype: timedelta64[ns]
  8082. Select rows with data closest to certain value using argsort (from
  8083. `StackOverflow <https://stackoverflow.com/a/17758115>`__).
  8084. >>> df = pd.DataFrame({
  8085. ... 'a': [4, 5, 6, 7],
  8086. ... 'b': [10, 20, 30, 40],
  8087. ... 'c': [100, 50, -30, -50]
  8088. ... })
  8089. >>> df
  8090. a b c
  8091. 0 4 10 100
  8092. 1 5 20 50
  8093. 2 6 30 -30
  8094. 3 7 40 -50
  8095. >>> df.loc[(df.c - 43).abs().argsort()]
  8096. a b c
  8097. 1 5 20 50
  8098. 0 4 10 100
  8099. 2 6 30 -30
  8100. 3 7 40 -50
  8101. """
  8102. return np.abs(self)
  8103. def describe(
  8104. self: FrameOrSeries, percentiles=None, include=None, exclude=None
  8105. ) -> FrameOrSeries:
  8106. """
  8107. Generate descriptive statistics.
  8108. Descriptive statistics include those that summarize the central
  8109. tendency, dispersion and shape of a
  8110. dataset's distribution, excluding ``NaN`` values.
  8111. Analyzes both numeric and object series, as well
  8112. as ``DataFrame`` column sets of mixed data types. The output
  8113. will vary depending on what is provided. Refer to the notes
  8114. below for more detail.
  8115. Parameters
  8116. ----------
  8117. percentiles : list-like of numbers, optional
  8118. The percentiles to include in the output. All should
  8119. fall between 0 and 1. The default is
  8120. ``[.25, .5, .75]``, which returns the 25th, 50th, and
  8121. 75th percentiles.
  8122. include : 'all', list-like of dtypes or None (default), optional
  8123. A white list of data types to include in the result. Ignored
  8124. for ``Series``. Here are the options:
  8125. - 'all' : All columns of the input will be included in the output.
  8126. - A list-like of dtypes : Limits the results to the
  8127. provided data types.
  8128. To limit the result to numeric types submit
  8129. ``numpy.number``. To limit it instead to object columns submit
  8130. the ``numpy.object`` data type. Strings
  8131. can also be used in the style of
  8132. ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
  8133. select pandas categorical columns, use ``'category'``
  8134. - None (default) : The result will include all numeric columns.
  8135. exclude : list-like of dtypes or None (default), optional,
  8136. A black list of data types to omit from the result. Ignored
  8137. for ``Series``. Here are the options:
  8138. - A list-like of dtypes : Excludes the provided data types
  8139. from the result. To exclude numeric types submit
  8140. ``numpy.number``. To exclude object columns submit the data
  8141. type ``numpy.object``. Strings can also be used in the style of
  8142. ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
  8143. exclude pandas categorical columns, use ``'category'``
  8144. - None (default) : The result will exclude nothing.
  8145. Returns
  8146. -------
  8147. Series or DataFrame
  8148. Summary statistics of the Series or Dataframe provided.
  8149. See Also
  8150. --------
  8151. DataFrame.count: Count number of non-NA/null observations.
  8152. DataFrame.max: Maximum of the values in the object.
  8153. DataFrame.min: Minimum of the values in the object.
  8154. DataFrame.mean: Mean of the values.
  8155. DataFrame.std: Standard deviation of the observations.
  8156. DataFrame.select_dtypes: Subset of a DataFrame including/excluding
  8157. columns based on their dtype.
  8158. Notes
  8159. -----
  8160. For numeric data, the result's index will include ``count``,
  8161. ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
  8162. upper percentiles. By default the lower percentile is ``25`` and the
  8163. upper percentile is ``75``. The ``50`` percentile is the
  8164. same as the median.
  8165. For object data (e.g. strings or timestamps), the result's index
  8166. will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
  8167. is the most common value. The ``freq`` is the most common value's
  8168. frequency. Timestamps also include the ``first`` and ``last`` items.
  8169. If multiple object values have the highest count, then the
  8170. ``count`` and ``top`` results will be arbitrarily chosen from
  8171. among those with the highest count.
  8172. For mixed data types provided via a ``DataFrame``, the default is to
  8173. return only an analysis of numeric columns. If the dataframe consists
  8174. only of object and categorical data without any numeric columns, the
  8175. default is to return an analysis of both the object and categorical
  8176. columns. If ``include='all'`` is provided as an option, the result
  8177. will include a union of attributes of each type.
  8178. The `include` and `exclude` parameters can be used to limit
  8179. which columns in a ``DataFrame`` are analyzed for the output.
  8180. The parameters are ignored when analyzing a ``Series``.
  8181. Examples
  8182. --------
  8183. Describing a numeric ``Series``.
  8184. >>> s = pd.Series([1, 2, 3])
  8185. >>> s.describe()
  8186. count 3.0
  8187. mean 2.0
  8188. std 1.0
  8189. min 1.0
  8190. 25% 1.5
  8191. 50% 2.0
  8192. 75% 2.5
  8193. max 3.0
  8194. dtype: float64
  8195. Describing a categorical ``Series``.
  8196. >>> s = pd.Series(['a', 'a', 'b', 'c'])
  8197. >>> s.describe()
  8198. count 4
  8199. unique 3
  8200. top a
  8201. freq 2
  8202. dtype: object
  8203. Describing a timestamp ``Series``.
  8204. >>> s = pd.Series([
  8205. ... np.datetime64("2000-01-01"),
  8206. ... np.datetime64("2010-01-01"),
  8207. ... np.datetime64("2010-01-01")
  8208. ... ])
  8209. >>> s.describe()
  8210. count 3
  8211. unique 2
  8212. top 2010-01-01 00:00:00
  8213. freq 2
  8214. first 2000-01-01 00:00:00
  8215. last 2010-01-01 00:00:00
  8216. dtype: object
  8217. Describing a ``DataFrame``. By default only numeric fields
  8218. are returned.
  8219. >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']),
  8220. ... 'numeric': [1, 2, 3],
  8221. ... 'object': ['a', 'b', 'c']
  8222. ... })
  8223. >>> df.describe()
  8224. numeric
  8225. count 3.0
  8226. mean 2.0
  8227. std 1.0
  8228. min 1.0
  8229. 25% 1.5
  8230. 50% 2.0
  8231. 75% 2.5
  8232. max 3.0
  8233. Describing all columns of a ``DataFrame`` regardless of data type.
  8234. >>> df.describe(include='all')
  8235. categorical numeric object
  8236. count 3 3.0 3
  8237. unique 3 NaN 3
  8238. top f NaN c
  8239. freq 1 NaN 1
  8240. mean NaN 2.0 NaN
  8241. std NaN 1.0 NaN
  8242. min NaN 1.0 NaN
  8243. 25% NaN 1.5 NaN
  8244. 50% NaN 2.0 NaN
  8245. 75% NaN 2.5 NaN
  8246. max NaN 3.0 NaN
  8247. Describing a column from a ``DataFrame`` by accessing it as
  8248. an attribute.
  8249. >>> df.numeric.describe()
  8250. count 3.0
  8251. mean 2.0
  8252. std 1.0
  8253. min 1.0
  8254. 25% 1.5
  8255. 50% 2.0
  8256. 75% 2.5
  8257. max 3.0
  8258. Name: numeric, dtype: float64
  8259. Including only numeric columns in a ``DataFrame`` description.
  8260. >>> df.describe(include=[np.number])
  8261. numeric
  8262. count 3.0
  8263. mean 2.0
  8264. std 1.0
  8265. min 1.0
  8266. 25% 1.5
  8267. 50% 2.0
  8268. 75% 2.5
  8269. max 3.0
  8270. Including only string columns in a ``DataFrame`` description.
  8271. >>> df.describe(include=[np.object])
  8272. object
  8273. count 3
  8274. unique 3
  8275. top c
  8276. freq 1
  8277. Including only categorical columns from a ``DataFrame`` description.
  8278. >>> df.describe(include=['category'])
  8279. categorical
  8280. count 3
  8281. unique 3
  8282. top f
  8283. freq 1
  8284. Excluding numeric columns from a ``DataFrame`` description.
  8285. >>> df.describe(exclude=[np.number])
  8286. categorical object
  8287. count 3 3
  8288. unique 3 3
  8289. top f c
  8290. freq 1 1
  8291. Excluding object columns from a ``DataFrame`` description.
  8292. >>> df.describe(exclude=[np.object])
  8293. categorical numeric
  8294. count 3 3.0
  8295. unique 3 NaN
  8296. top f NaN
  8297. freq 1 NaN
  8298. mean NaN 2.0
  8299. std NaN 1.0
  8300. min NaN 1.0
  8301. 25% NaN 1.5
  8302. 50% NaN 2.0
  8303. 75% NaN 2.5
  8304. max NaN 3.0
  8305. """
  8306. if self.ndim == 2 and self.columns.size == 0:
  8307. raise ValueError("Cannot describe a DataFrame without columns")
  8308. if percentiles is not None:
  8309. # explicit conversion of `percentiles` to list
  8310. percentiles = list(percentiles)
  8311. # get them all to be in [0, 1]
  8312. validate_percentile(percentiles)
  8313. # median should always be included
  8314. if 0.5 not in percentiles:
  8315. percentiles.append(0.5)
  8316. percentiles = np.asarray(percentiles)
  8317. else:
  8318. percentiles = np.array([0.25, 0.5, 0.75])
  8319. # sort and check for duplicates
  8320. unique_pcts = np.unique(percentiles)
  8321. if len(unique_pcts) < len(percentiles):
  8322. raise ValueError("percentiles cannot contain duplicates")
  8323. percentiles = unique_pcts
  8324. formatted_percentiles = format_percentiles(percentiles)
  8325. def describe_numeric_1d(series):
  8326. stat_index = (
  8327. ["count", "mean", "std", "min"] + formatted_percentiles + ["max"]
  8328. )
  8329. d = (
  8330. [series.count(), series.mean(), series.std(), series.min()]
  8331. + series.quantile(percentiles).tolist()
  8332. + [series.max()]
  8333. )
  8334. return pd.Series(d, index=stat_index, name=series.name)
  8335. def describe_categorical_1d(data):
  8336. names = ["count", "unique"]
  8337. objcounts = data.value_counts()
  8338. count_unique = len(objcounts[objcounts != 0])
  8339. result = [data.count(), count_unique]
  8340. dtype = None
  8341. if result[1] > 0:
  8342. top, freq = objcounts.index[0], objcounts.iloc[0]
  8343. if is_datetime64_any_dtype(data):
  8344. tz = data.dt.tz
  8345. asint = data.dropna().values.view("i8")
  8346. top = Timestamp(top)
  8347. if top.tzinfo is not None and tz is not None:
  8348. # Don't tz_localize(None) if key is already tz-aware
  8349. top = top.tz_convert(tz)
  8350. else:
  8351. top = top.tz_localize(tz)
  8352. names += ["top", "freq", "first", "last"]
  8353. result += [
  8354. top,
  8355. freq,
  8356. Timestamp(asint.min(), tz=tz),
  8357. Timestamp(asint.max(), tz=tz),
  8358. ]
  8359. else:
  8360. names += ["top", "freq"]
  8361. result += [top, freq]
  8362. # If the DataFrame is empty, set 'top' and 'freq' to None
  8363. # to maintain output shape consistency
  8364. else:
  8365. names += ["top", "freq"]
  8366. result += [np.nan, np.nan]
  8367. dtype = "object"
  8368. return pd.Series(result, index=names, name=data.name, dtype=dtype)
  8369. def describe_1d(data):
  8370. if is_bool_dtype(data):
  8371. return describe_categorical_1d(data)
  8372. elif is_numeric_dtype(data):
  8373. return describe_numeric_1d(data)
  8374. elif is_timedelta64_dtype(data):
  8375. return describe_numeric_1d(data)
  8376. else:
  8377. return describe_categorical_1d(data)
  8378. if self.ndim == 1:
  8379. return describe_1d(self)
  8380. elif (include is None) and (exclude is None):
  8381. # when some numerics are found, keep only numerics
  8382. data = self.select_dtypes(include=[np.number])
  8383. if len(data.columns) == 0:
  8384. data = self
  8385. elif include == "all":
  8386. if exclude is not None:
  8387. msg = "exclude must be None when include is 'all'"
  8388. raise ValueError(msg)
  8389. data = self
  8390. else:
  8391. data = self.select_dtypes(include=include, exclude=exclude)
  8392. ldesc = [describe_1d(s) for _, s in data.items()]
  8393. # set a convenient order for rows
  8394. names: List[Optional[Hashable]] = []
  8395. ldesc_indexes = sorted((x.index for x in ldesc), key=len)
  8396. for idxnames in ldesc_indexes:
  8397. for name in idxnames:
  8398. if name not in names:
  8399. names.append(name)
  8400. d = pd.concat([x.reindex(names, copy=False) for x in ldesc], axis=1, sort=False)
  8401. d.columns = data.columns.copy()
  8402. return d
  8403. _shared_docs[
  8404. "pct_change"
  8405. ] = """
  8406. Percentage change between the current and a prior element.
  8407. Computes the percentage change from the immediately previous row by
  8408. default. This is useful in comparing the percentage of change in a time
  8409. series of elements.
  8410. Parameters
  8411. ----------
  8412. periods : int, default 1
  8413. Periods to shift for forming percent change.
  8414. fill_method : str, default 'pad'
  8415. How to handle NAs before computing percent changes.
  8416. limit : int, default None
  8417. The number of consecutive NAs to fill before stopping.
  8418. freq : DateOffset, timedelta, or str, optional
  8419. Increment to use from time series API (e.g. 'M' or BDay()).
  8420. **kwargs
  8421. Additional keyword arguments are passed into
  8422. `DataFrame.shift` or `Series.shift`.
  8423. Returns
  8424. -------
  8425. chg : Series or DataFrame
  8426. The same type as the calling object.
  8427. See Also
  8428. --------
  8429. Series.diff : Compute the difference of two elements in a Series.
  8430. DataFrame.diff : Compute the difference of two elements in a DataFrame.
  8431. Series.shift : Shift the index by some number of periods.
  8432. DataFrame.shift : Shift the index by some number of periods.
  8433. Examples
  8434. --------
  8435. **Series**
  8436. >>> s = pd.Series([90, 91, 85])
  8437. >>> s
  8438. 0 90
  8439. 1 91
  8440. 2 85
  8441. dtype: int64
  8442. >>> s.pct_change()
  8443. 0 NaN
  8444. 1 0.011111
  8445. 2 -0.065934
  8446. dtype: float64
  8447. >>> s.pct_change(periods=2)
  8448. 0 NaN
  8449. 1 NaN
  8450. 2 -0.055556
  8451. dtype: float64
  8452. See the percentage change in a Series where filling NAs with last
  8453. valid observation forward to next valid.
  8454. >>> s = pd.Series([90, 91, None, 85])
  8455. >>> s
  8456. 0 90.0
  8457. 1 91.0
  8458. 2 NaN
  8459. 3 85.0
  8460. dtype: float64
  8461. >>> s.pct_change(fill_method='ffill')
  8462. 0 NaN
  8463. 1 0.011111
  8464. 2 0.000000
  8465. 3 -0.065934
  8466. dtype: float64
  8467. **DataFrame**
  8468. Percentage change in French franc, Deutsche Mark, and Italian lira from
  8469. 1980-01-01 to 1980-03-01.
  8470. >>> df = pd.DataFrame({
  8471. ... 'FR': [4.0405, 4.0963, 4.3149],
  8472. ... 'GR': [1.7246, 1.7482, 1.8519],
  8473. ... 'IT': [804.74, 810.01, 860.13]},
  8474. ... index=['1980-01-01', '1980-02-01', '1980-03-01'])
  8475. >>> df
  8476. FR GR IT
  8477. 1980-01-01 4.0405 1.7246 804.74
  8478. 1980-02-01 4.0963 1.7482 810.01
  8479. 1980-03-01 4.3149 1.8519 860.13
  8480. >>> df.pct_change()
  8481. FR GR IT
  8482. 1980-01-01 NaN NaN NaN
  8483. 1980-02-01 0.013810 0.013684 0.006549
  8484. 1980-03-01 0.053365 0.059318 0.061876
  8485. Percentage of change in GOOG and APPL stock volume. Shows computing
  8486. the percentage change between columns.
  8487. >>> df = pd.DataFrame({
  8488. ... '2016': [1769950, 30586265],
  8489. ... '2015': [1500923, 40912316],
  8490. ... '2014': [1371819, 41403351]},
  8491. ... index=['GOOG', 'APPL'])
  8492. >>> df
  8493. 2016 2015 2014
  8494. GOOG 1769950 1500923 1371819
  8495. APPL 30586265 40912316 41403351
  8496. >>> df.pct_change(axis='columns')
  8497. 2016 2015 2014
  8498. GOOG NaN -0.151997 -0.086016
  8499. APPL NaN 0.337604 0.012002
  8500. """
  8501. @Appender(_shared_docs["pct_change"] % _shared_doc_kwargs)
  8502. def pct_change(
  8503. self: FrameOrSeries,
  8504. periods=1,
  8505. fill_method="pad",
  8506. limit=None,
  8507. freq=None,
  8508. **kwargs,
  8509. ) -> FrameOrSeries:
  8510. # TODO: Not sure if above is correct - need someone to confirm.
  8511. axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name))
  8512. if fill_method is None:
  8513. data = self
  8514. else:
  8515. _data = self.fillna(method=fill_method, axis=axis, limit=limit)
  8516. assert _data is not None # needed for mypy
  8517. data = _data
  8518. rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1
  8519. if freq is not None:
  8520. # Shift method is implemented differently when freq is not None
  8521. # We want to restore the original index
  8522. rs = rs.loc[~rs.index.duplicated()]
  8523. rs = rs.reindex_like(data)
  8524. return rs
  8525. def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
  8526. if axis is None:
  8527. raise ValueError("Must specify 'axis' when aggregating by level.")
  8528. grouped = self.groupby(level=level, axis=axis, sort=False)
  8529. if hasattr(grouped, name) and skipna:
  8530. return getattr(grouped, name)(**kwargs)
  8531. axis = self._get_axis_number(axis)
  8532. method = getattr(type(self), name)
  8533. applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs)
  8534. return grouped.aggregate(applyf)
  8535. @classmethod
  8536. def _add_numeric_operations(cls):
  8537. """
  8538. Add the operations to the cls; evaluate the doc strings again
  8539. """
  8540. axis_descr, name, name2 = _doc_parms(cls)
  8541. cls.any = _make_logical_function(
  8542. cls,
  8543. "any",
  8544. name,
  8545. name2,
  8546. axis_descr,
  8547. _any_desc,
  8548. nanops.nanany,
  8549. _any_see_also,
  8550. _any_examples,
  8551. empty_value=False,
  8552. )
  8553. cls.all = _make_logical_function(
  8554. cls,
  8555. "all",
  8556. name,
  8557. name2,
  8558. axis_descr,
  8559. _all_desc,
  8560. nanops.nanall,
  8561. _all_see_also,
  8562. _all_examples,
  8563. empty_value=True,
  8564. )
  8565. @Substitution(
  8566. desc="Return the mean absolute deviation of the values "
  8567. "for the requested axis.",
  8568. name1=name,
  8569. name2=name2,
  8570. axis_descr=axis_descr,
  8571. min_count="",
  8572. see_also="",
  8573. examples="",
  8574. )
  8575. @Appender(_num_doc)
  8576. def mad(self, axis=None, skipna=None, level=None):
  8577. if skipna is None:
  8578. skipna = True
  8579. if axis is None:
  8580. axis = self._stat_axis_number
  8581. if level is not None:
  8582. return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna)
  8583. data = self._get_numeric_data()
  8584. if axis == 0:
  8585. demeaned = data - data.mean(axis=0)
  8586. else:
  8587. demeaned = data.sub(data.mean(axis=1), axis=0)
  8588. return np.abs(demeaned).mean(axis=axis, skipna=skipna)
  8589. cls.mad = mad
  8590. cls.sem = _make_stat_function_ddof(
  8591. cls,
  8592. "sem",
  8593. name,
  8594. name2,
  8595. axis_descr,
  8596. "Return unbiased standard error of the mean over requested "
  8597. "axis.\n\nNormalized by N-1 by default. This can be changed "
  8598. "using the ddof argument",
  8599. nanops.nansem,
  8600. )
  8601. cls.var = _make_stat_function_ddof(
  8602. cls,
  8603. "var",
  8604. name,
  8605. name2,
  8606. axis_descr,
  8607. "Return unbiased variance over requested axis.\n\nNormalized by "
  8608. "N-1 by default. This can be changed using the ddof argument",
  8609. nanops.nanvar,
  8610. )
  8611. cls.std = _make_stat_function_ddof(
  8612. cls,
  8613. "std",
  8614. name,
  8615. name2,
  8616. axis_descr,
  8617. "Return sample standard deviation over requested axis."
  8618. "\n\nNormalized by N-1 by default. This can be changed using the "
  8619. "ddof argument",
  8620. nanops.nanstd,
  8621. )
  8622. cls.cummin = _make_cum_function(
  8623. cls,
  8624. "cummin",
  8625. name,
  8626. name2,
  8627. axis_descr,
  8628. "minimum",
  8629. np.minimum.accumulate,
  8630. "min",
  8631. np.inf,
  8632. np.nan,
  8633. _cummin_examples,
  8634. )
  8635. cls.cumsum = _make_cum_function(
  8636. cls,
  8637. "cumsum",
  8638. name,
  8639. name2,
  8640. axis_descr,
  8641. "sum",
  8642. np.cumsum,
  8643. "sum",
  8644. 0.0,
  8645. np.nan,
  8646. _cumsum_examples,
  8647. )
  8648. cls.cumprod = _make_cum_function(
  8649. cls,
  8650. "cumprod",
  8651. name,
  8652. name2,
  8653. axis_descr,
  8654. "product",
  8655. np.cumprod,
  8656. "prod",
  8657. 1.0,
  8658. np.nan,
  8659. _cumprod_examples,
  8660. )
  8661. cls.cummax = _make_cum_function(
  8662. cls,
  8663. "cummax",
  8664. name,
  8665. name2,
  8666. axis_descr,
  8667. "maximum",
  8668. np.maximum.accumulate,
  8669. "max",
  8670. -np.inf,
  8671. np.nan,
  8672. _cummax_examples,
  8673. )
  8674. cls.sum = _make_min_count_stat_function(
  8675. cls,
  8676. "sum",
  8677. name,
  8678. name2,
  8679. axis_descr,
  8680. """Return the sum of the values for the requested axis.\n
  8681. This is equivalent to the method ``numpy.sum``.""",
  8682. nanops.nansum,
  8683. _stat_func_see_also,
  8684. _sum_examples,
  8685. )
  8686. cls.mean = _make_stat_function(
  8687. cls,
  8688. "mean",
  8689. name,
  8690. name2,
  8691. axis_descr,
  8692. "Return the mean of the values for the requested axis.",
  8693. nanops.nanmean,
  8694. )
  8695. cls.skew = _make_stat_function(
  8696. cls,
  8697. "skew",
  8698. name,
  8699. name2,
  8700. axis_descr,
  8701. "Return unbiased skew over requested axis.\n\nNormalized by N-1.",
  8702. nanops.nanskew,
  8703. )
  8704. cls.kurt = _make_stat_function(
  8705. cls,
  8706. "kurt",
  8707. name,
  8708. name2,
  8709. axis_descr,
  8710. "Return unbiased kurtosis over requested axis.\n\n"
  8711. "Kurtosis obtained using Fisher's definition of\n"
  8712. "kurtosis (kurtosis of normal == 0.0). Normalized "
  8713. "by N-1.",
  8714. nanops.nankurt,
  8715. )
  8716. cls.kurtosis = cls.kurt
  8717. cls.prod = _make_min_count_stat_function(
  8718. cls,
  8719. "prod",
  8720. name,
  8721. name2,
  8722. axis_descr,
  8723. "Return the product of the values for the requested axis.",
  8724. nanops.nanprod,
  8725. examples=_prod_examples,
  8726. )
  8727. cls.product = cls.prod
  8728. cls.median = _make_stat_function(
  8729. cls,
  8730. "median",
  8731. name,
  8732. name2,
  8733. axis_descr,
  8734. "Return the median of the values for the requested axis.",
  8735. nanops.nanmedian,
  8736. )
  8737. cls.max = _make_stat_function(
  8738. cls,
  8739. "max",
  8740. name,
  8741. name2,
  8742. axis_descr,
  8743. """Return the maximum of the values for the requested axis.\n
  8744. If you want the *index* of the maximum, use ``idxmax``. This is
  8745. the equivalent of the ``numpy.ndarray`` method ``argmax``.""",
  8746. nanops.nanmax,
  8747. _stat_func_see_also,
  8748. _max_examples,
  8749. )
  8750. cls.min = _make_stat_function(
  8751. cls,
  8752. "min",
  8753. name,
  8754. name2,
  8755. axis_descr,
  8756. """Return the minimum of the values for the requested axis.\n
  8757. If you want the *index* of the minimum, use ``idxmin``. This is
  8758. the equivalent of the ``numpy.ndarray`` method ``argmin``.""",
  8759. nanops.nanmin,
  8760. _stat_func_see_also,
  8761. _min_examples,
  8762. )
  8763. @classmethod
  8764. def _add_series_or_dataframe_operations(cls):
  8765. """
  8766. Add the series or dataframe only operations to the cls; evaluate
  8767. the doc strings again.
  8768. """
  8769. from pandas.core.window import EWM, Expanding, Rolling, Window
  8770. @Appender(Rolling.__doc__)
  8771. def rolling(
  8772. self,
  8773. window,
  8774. min_periods=None,
  8775. center=False,
  8776. win_type=None,
  8777. on=None,
  8778. axis=0,
  8779. closed=None,
  8780. ):
  8781. axis = self._get_axis_number(axis)
  8782. if win_type is not None:
  8783. return Window(
  8784. self,
  8785. window=window,
  8786. min_periods=min_periods,
  8787. center=center,
  8788. win_type=win_type,
  8789. on=on,
  8790. axis=axis,
  8791. closed=closed,
  8792. )
  8793. return Rolling(
  8794. self,
  8795. window=window,
  8796. min_periods=min_periods,
  8797. center=center,
  8798. win_type=win_type,
  8799. on=on,
  8800. axis=axis,
  8801. closed=closed,
  8802. )
  8803. cls.rolling = rolling
  8804. @Appender(Expanding.__doc__)
  8805. def expanding(self, min_periods=1, center=False, axis=0):
  8806. axis = self._get_axis_number(axis)
  8807. return Expanding(self, min_periods=min_periods, center=center, axis=axis)
  8808. cls.expanding = expanding
  8809. @Appender(EWM.__doc__)
  8810. def ewm(
  8811. self,
  8812. com=None,
  8813. span=None,
  8814. halflife=None,
  8815. alpha=None,
  8816. min_periods=0,
  8817. adjust=True,
  8818. ignore_na=False,
  8819. axis=0,
  8820. ):
  8821. axis = self._get_axis_number(axis)
  8822. return EWM(
  8823. self,
  8824. com=com,
  8825. span=span,
  8826. halflife=halflife,
  8827. alpha=alpha,
  8828. min_periods=min_periods,
  8829. adjust=adjust,
  8830. ignore_na=ignore_na,
  8831. axis=axis,
  8832. )
  8833. cls.ewm = ewm
  8834. @Appender(_shared_docs["transform"] % dict(axis="", **_shared_doc_kwargs))
  8835. def transform(self, func, *args, **kwargs):
  8836. result = self.agg(func, *args, **kwargs)
  8837. if is_scalar(result) or len(result) != len(self):
  8838. raise ValueError("transforms cannot produce aggregated results")
  8839. return result
  8840. # ----------------------------------------------------------------------
  8841. # Misc methods
  8842. _shared_docs[
  8843. "valid_index"
  8844. ] = """
  8845. Return index for %(position)s non-NA/null value.
  8846. Returns
  8847. -------
  8848. scalar : type of index
  8849. Notes
  8850. -----
  8851. If all elements are non-NA/null, returns None.
  8852. Also returns None for empty %(klass)s.
  8853. """
  8854. def _find_valid_index(self, how: str):
  8855. """
  8856. Retrieves the index of the first valid value.
  8857. Parameters
  8858. ----------
  8859. how : {'first', 'last'}
  8860. Use this parameter to change between the first or last valid index.
  8861. Returns
  8862. -------
  8863. idx_first_valid : type of index
  8864. """
  8865. idxpos = find_valid_index(self._values, how)
  8866. if idxpos is None:
  8867. return None
  8868. return self.index[idxpos]
  8869. @Appender(
  8870. _shared_docs["valid_index"] % {"position": "first", "klass": "Series/DataFrame"}
  8871. )
  8872. def first_valid_index(self):
  8873. return self._find_valid_index("first")
  8874. @Appender(
  8875. _shared_docs["valid_index"] % {"position": "last", "klass": "Series/DataFrame"}
  8876. )
  8877. def last_valid_index(self):
  8878. return self._find_valid_index("last")
  8879. def _doc_parms(cls):
  8880. """Return a tuple of the doc parms."""
  8881. axis_descr = (
  8882. f"{{{', '.join(f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS))}}}"
  8883. )
  8884. name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar"
  8885. name2 = cls.__name__
  8886. return axis_descr, name, name2
  8887. _num_doc = """
  8888. %(desc)s
  8889. Parameters
  8890. ----------
  8891. axis : %(axis_descr)s
  8892. Axis for the function to be applied on.
  8893. skipna : bool, default True
  8894. Exclude NA/null values when computing the result.
  8895. level : int or level name, default None
  8896. If the axis is a MultiIndex (hierarchical), count along a
  8897. particular level, collapsing into a %(name1)s.
  8898. numeric_only : bool, default None
  8899. Include only float, int, boolean columns. If None, will attempt to use
  8900. everything, then use only numeric data. Not implemented for Series.
  8901. %(min_count)s\
  8902. **kwargs
  8903. Additional keyword arguments to be passed to the function.
  8904. Returns
  8905. -------
  8906. %(name1)s or %(name2)s (if level specified)\
  8907. %(see_also)s\
  8908. %(examples)s
  8909. """
  8910. _num_ddof_doc = """
  8911. %(desc)s
  8912. Parameters
  8913. ----------
  8914. axis : %(axis_descr)s
  8915. skipna : bool, default True
  8916. Exclude NA/null values. If an entire row/column is NA, the result
  8917. will be NA.
  8918. level : int or level name, default None
  8919. If the axis is a MultiIndex (hierarchical), count along a
  8920. particular level, collapsing into a %(name1)s.
  8921. ddof : int, default 1
  8922. Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
  8923. where N represents the number of elements.
  8924. numeric_only : bool, default None
  8925. Include only float, int, boolean columns. If None, will attempt to use
  8926. everything, then use only numeric data. Not implemented for Series.
  8927. Returns
  8928. -------
  8929. %(name1)s or %(name2)s (if level specified)\n"""
  8930. _bool_doc = """
  8931. %(desc)s
  8932. Parameters
  8933. ----------
  8934. axis : {0 or 'index', 1 or 'columns', None}, default 0
  8935. Indicate which axis or axes should be reduced.
  8936. * 0 / 'index' : reduce the index, return a Series whose index is the
  8937. original column labels.
  8938. * 1 / 'columns' : reduce the columns, return a Series whose index is the
  8939. original index.
  8940. * None : reduce all axes, return a scalar.
  8941. bool_only : bool, default None
  8942. Include only boolean columns. If None, will attempt to use everything,
  8943. then use only boolean data. Not implemented for Series.
  8944. skipna : bool, default True
  8945. Exclude NA/null values. If the entire row/column is NA and skipna is
  8946. True, then the result will be %(empty_value)s, as for an empty row/column.
  8947. If skipna is False, then NA are treated as True, because these are not
  8948. equal to zero.
  8949. level : int or level name, default None
  8950. If the axis is a MultiIndex (hierarchical), count along a
  8951. particular level, collapsing into a %(name1)s.
  8952. **kwargs : any, default None
  8953. Additional keywords have no effect but might be accepted for
  8954. compatibility with NumPy.
  8955. Returns
  8956. -------
  8957. %(name1)s or %(name2)s
  8958. If level is specified, then, %(name2)s is returned; otherwise, %(name1)s
  8959. is returned.
  8960. %(see_also)s
  8961. %(examples)s"""
  8962. _all_desc = """\
  8963. Return whether all elements are True, potentially over an axis.
  8964. Returns True unless there at least one element within a series or
  8965. along a Dataframe axis that is False or equivalent (e.g. zero or
  8966. empty)."""
  8967. _all_examples = """\
  8968. Examples
  8969. --------
  8970. **Series**
  8971. >>> pd.Series([True, True]).all()
  8972. True
  8973. >>> pd.Series([True, False]).all()
  8974. False
  8975. >>> pd.Series([]).all()
  8976. True
  8977. >>> pd.Series([np.nan]).all()
  8978. True
  8979. >>> pd.Series([np.nan]).all(skipna=False)
  8980. True
  8981. **DataFrames**
  8982. Create a dataframe from a dictionary.
  8983. >>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]})
  8984. >>> df
  8985. col1 col2
  8986. 0 True True
  8987. 1 True False
  8988. Default behaviour checks if column-wise values all return True.
  8989. >>> df.all()
  8990. col1 True
  8991. col2 False
  8992. dtype: bool
  8993. Specify ``axis='columns'`` to check if row-wise values all return True.
  8994. >>> df.all(axis='columns')
  8995. 0 True
  8996. 1 False
  8997. dtype: bool
  8998. Or ``axis=None`` for whether every value is True.
  8999. >>> df.all(axis=None)
  9000. False
  9001. """
  9002. _all_see_also = """\
  9003. See Also
  9004. --------
  9005. Series.all : Return True if all elements are True.
  9006. DataFrame.any : Return True if one (or more) elements are True.
  9007. """
  9008. _cnum_doc = """
  9009. Return cumulative %(desc)s over a DataFrame or Series axis.
  9010. Returns a DataFrame or Series of the same size containing the cumulative
  9011. %(desc)s.
  9012. Parameters
  9013. ----------
  9014. axis : {0 or 'index', 1 or 'columns'}, default 0
  9015. The index or the name of the axis. 0 is equivalent to None or 'index'.
  9016. skipna : bool, default True
  9017. Exclude NA/null values. If an entire row/column is NA, the result
  9018. will be NA.
  9019. *args, **kwargs :
  9020. Additional keywords have no effect but might be accepted for
  9021. compatibility with NumPy.
  9022. Returns
  9023. -------
  9024. %(name1)s or %(name2)s
  9025. See Also
  9026. --------
  9027. core.window.Expanding.%(accum_func_name)s : Similar functionality
  9028. but ignores ``NaN`` values.
  9029. %(name2)s.%(accum_func_name)s : Return the %(desc)s over
  9030. %(name2)s axis.
  9031. %(name2)s.cummax : Return cumulative maximum over %(name2)s axis.
  9032. %(name2)s.cummin : Return cumulative minimum over %(name2)s axis.
  9033. %(name2)s.cumsum : Return cumulative sum over %(name2)s axis.
  9034. %(name2)s.cumprod : Return cumulative product over %(name2)s axis.
  9035. %(examples)s"""
  9036. _cummin_examples = """\
  9037. Examples
  9038. --------
  9039. **Series**
  9040. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  9041. >>> s
  9042. 0 2.0
  9043. 1 NaN
  9044. 2 5.0
  9045. 3 -1.0
  9046. 4 0.0
  9047. dtype: float64
  9048. By default, NA values are ignored.
  9049. >>> s.cummin()
  9050. 0 2.0
  9051. 1 NaN
  9052. 2 2.0
  9053. 3 -1.0
  9054. 4 -1.0
  9055. dtype: float64
  9056. To include NA values in the operation, use ``skipna=False``
  9057. >>> s.cummin(skipna=False)
  9058. 0 2.0
  9059. 1 NaN
  9060. 2 NaN
  9061. 3 NaN
  9062. 4 NaN
  9063. dtype: float64
  9064. **DataFrame**
  9065. >>> df = pd.DataFrame([[2.0, 1.0],
  9066. ... [3.0, np.nan],
  9067. ... [1.0, 0.0]],
  9068. ... columns=list('AB'))
  9069. >>> df
  9070. A B
  9071. 0 2.0 1.0
  9072. 1 3.0 NaN
  9073. 2 1.0 0.0
  9074. By default, iterates over rows and finds the minimum
  9075. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  9076. >>> df.cummin()
  9077. A B
  9078. 0 2.0 1.0
  9079. 1 2.0 NaN
  9080. 2 1.0 0.0
  9081. To iterate over columns and find the minimum in each row,
  9082. use ``axis=1``
  9083. >>> df.cummin(axis=1)
  9084. A B
  9085. 0 2.0 1.0
  9086. 1 3.0 NaN
  9087. 2 1.0 0.0
  9088. """
  9089. _cumsum_examples = """\
  9090. Examples
  9091. --------
  9092. **Series**
  9093. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  9094. >>> s
  9095. 0 2.0
  9096. 1 NaN
  9097. 2 5.0
  9098. 3 -1.0
  9099. 4 0.0
  9100. dtype: float64
  9101. By default, NA values are ignored.
  9102. >>> s.cumsum()
  9103. 0 2.0
  9104. 1 NaN
  9105. 2 7.0
  9106. 3 6.0
  9107. 4 6.0
  9108. dtype: float64
  9109. To include NA values in the operation, use ``skipna=False``
  9110. >>> s.cumsum(skipna=False)
  9111. 0 2.0
  9112. 1 NaN
  9113. 2 NaN
  9114. 3 NaN
  9115. 4 NaN
  9116. dtype: float64
  9117. **DataFrame**
  9118. >>> df = pd.DataFrame([[2.0, 1.0],
  9119. ... [3.0, np.nan],
  9120. ... [1.0, 0.0]],
  9121. ... columns=list('AB'))
  9122. >>> df
  9123. A B
  9124. 0 2.0 1.0
  9125. 1 3.0 NaN
  9126. 2 1.0 0.0
  9127. By default, iterates over rows and finds the sum
  9128. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  9129. >>> df.cumsum()
  9130. A B
  9131. 0 2.0 1.0
  9132. 1 5.0 NaN
  9133. 2 6.0 1.0
  9134. To iterate over columns and find the sum in each row,
  9135. use ``axis=1``
  9136. >>> df.cumsum(axis=1)
  9137. A B
  9138. 0 2.0 3.0
  9139. 1 3.0 NaN
  9140. 2 1.0 1.0
  9141. """
  9142. _cumprod_examples = """\
  9143. Examples
  9144. --------
  9145. **Series**
  9146. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  9147. >>> s
  9148. 0 2.0
  9149. 1 NaN
  9150. 2 5.0
  9151. 3 -1.0
  9152. 4 0.0
  9153. dtype: float64
  9154. By default, NA values are ignored.
  9155. >>> s.cumprod()
  9156. 0 2.0
  9157. 1 NaN
  9158. 2 10.0
  9159. 3 -10.0
  9160. 4 -0.0
  9161. dtype: float64
  9162. To include NA values in the operation, use ``skipna=False``
  9163. >>> s.cumprod(skipna=False)
  9164. 0 2.0
  9165. 1 NaN
  9166. 2 NaN
  9167. 3 NaN
  9168. 4 NaN
  9169. dtype: float64
  9170. **DataFrame**
  9171. >>> df = pd.DataFrame([[2.0, 1.0],
  9172. ... [3.0, np.nan],
  9173. ... [1.0, 0.0]],
  9174. ... columns=list('AB'))
  9175. >>> df
  9176. A B
  9177. 0 2.0 1.0
  9178. 1 3.0 NaN
  9179. 2 1.0 0.0
  9180. By default, iterates over rows and finds the product
  9181. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  9182. >>> df.cumprod()
  9183. A B
  9184. 0 2.0 1.0
  9185. 1 6.0 NaN
  9186. 2 6.0 0.0
  9187. To iterate over columns and find the product in each row,
  9188. use ``axis=1``
  9189. >>> df.cumprod(axis=1)
  9190. A B
  9191. 0 2.0 2.0
  9192. 1 3.0 NaN
  9193. 2 1.0 0.0
  9194. """
  9195. _cummax_examples = """\
  9196. Examples
  9197. --------
  9198. **Series**
  9199. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  9200. >>> s
  9201. 0 2.0
  9202. 1 NaN
  9203. 2 5.0
  9204. 3 -1.0
  9205. 4 0.0
  9206. dtype: float64
  9207. By default, NA values are ignored.
  9208. >>> s.cummax()
  9209. 0 2.0
  9210. 1 NaN
  9211. 2 5.0
  9212. 3 5.0
  9213. 4 5.0
  9214. dtype: float64
  9215. To include NA values in the operation, use ``skipna=False``
  9216. >>> s.cummax(skipna=False)
  9217. 0 2.0
  9218. 1 NaN
  9219. 2 NaN
  9220. 3 NaN
  9221. 4 NaN
  9222. dtype: float64
  9223. **DataFrame**
  9224. >>> df = pd.DataFrame([[2.0, 1.0],
  9225. ... [3.0, np.nan],
  9226. ... [1.0, 0.0]],
  9227. ... columns=list('AB'))
  9228. >>> df
  9229. A B
  9230. 0 2.0 1.0
  9231. 1 3.0 NaN
  9232. 2 1.0 0.0
  9233. By default, iterates over rows and finds the maximum
  9234. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  9235. >>> df.cummax()
  9236. A B
  9237. 0 2.0 1.0
  9238. 1 3.0 NaN
  9239. 2 3.0 1.0
  9240. To iterate over columns and find the maximum in each row,
  9241. use ``axis=1``
  9242. >>> df.cummax(axis=1)
  9243. A B
  9244. 0 2.0 2.0
  9245. 1 3.0 NaN
  9246. 2 1.0 1.0
  9247. """
  9248. _any_see_also = """\
  9249. See Also
  9250. --------
  9251. numpy.any : Numpy version of this method.
  9252. Series.any : Return whether any element is True.
  9253. Series.all : Return whether all elements are True.
  9254. DataFrame.any : Return whether any element is True over requested axis.
  9255. DataFrame.all : Return whether all elements are True over requested axis.
  9256. """
  9257. _any_desc = """\
  9258. Return whether any element is True, potentially over an axis.
  9259. Returns False unless there at least one element within a series or
  9260. along a Dataframe axis that is True or equivalent (e.g. non-zero or
  9261. non-empty)."""
  9262. _any_examples = """\
  9263. Examples
  9264. --------
  9265. **Series**
  9266. For Series input, the output is a scalar indicating whether any element
  9267. is True.
  9268. >>> pd.Series([False, False]).any()
  9269. False
  9270. >>> pd.Series([True, False]).any()
  9271. True
  9272. >>> pd.Series([]).any()
  9273. False
  9274. >>> pd.Series([np.nan]).any()
  9275. False
  9276. >>> pd.Series([np.nan]).any(skipna=False)
  9277. True
  9278. **DataFrame**
  9279. Whether each column contains at least one True element (the default).
  9280. >>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]})
  9281. >>> df
  9282. A B C
  9283. 0 1 0 0
  9284. 1 2 2 0
  9285. >>> df.any()
  9286. A True
  9287. B True
  9288. C False
  9289. dtype: bool
  9290. Aggregating over the columns.
  9291. >>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]})
  9292. >>> df
  9293. A B
  9294. 0 True 1
  9295. 1 False 2
  9296. >>> df.any(axis='columns')
  9297. 0 True
  9298. 1 True
  9299. dtype: bool
  9300. >>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]})
  9301. >>> df
  9302. A B
  9303. 0 True 1
  9304. 1 False 0
  9305. >>> df.any(axis='columns')
  9306. 0 True
  9307. 1 False
  9308. dtype: bool
  9309. Aggregating over the entire DataFrame with ``axis=None``.
  9310. >>> df.any(axis=None)
  9311. True
  9312. `any` for an empty DataFrame is an empty Series.
  9313. >>> pd.DataFrame([]).any()
  9314. Series([], dtype: bool)
  9315. """
  9316. _shared_docs[
  9317. "stat_func_example"
  9318. ] = """
  9319. Examples
  9320. --------
  9321. >>> idx = pd.MultiIndex.from_arrays([
  9322. ... ['warm', 'warm', 'cold', 'cold'],
  9323. ... ['dog', 'falcon', 'fish', 'spider']],
  9324. ... names=['blooded', 'animal'])
  9325. >>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx)
  9326. >>> s
  9327. blooded animal
  9328. warm dog 4
  9329. falcon 2
  9330. cold fish 0
  9331. spider 8
  9332. Name: legs, dtype: int64
  9333. >>> s.{stat_func}()
  9334. {default_output}
  9335. {verb} using level names, as well as indices.
  9336. >>> s.{stat_func}(level='blooded')
  9337. blooded
  9338. warm {level_output_0}
  9339. cold {level_output_1}
  9340. Name: legs, dtype: int64
  9341. >>> s.{stat_func}(level=0)
  9342. blooded
  9343. warm {level_output_0}
  9344. cold {level_output_1}
  9345. Name: legs, dtype: int64"""
  9346. _sum_examples = _shared_docs["stat_func_example"].format(
  9347. stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8
  9348. )
  9349. _sum_examples += """
  9350. By default, the sum of an empty or all-NA Series is ``0``.
  9351. >>> pd.Series([]).sum() # min_count=0 is the default
  9352. 0.0
  9353. This can be controlled with the ``min_count`` parameter. For example, if
  9354. you'd like the sum of an empty series to be NaN, pass ``min_count=1``.
  9355. >>> pd.Series([]).sum(min_count=1)
  9356. nan
  9357. Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
  9358. empty series identically.
  9359. >>> pd.Series([np.nan]).sum()
  9360. 0.0
  9361. >>> pd.Series([np.nan]).sum(min_count=1)
  9362. nan"""
  9363. _max_examples = _shared_docs["stat_func_example"].format(
  9364. stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8
  9365. )
  9366. _min_examples = _shared_docs["stat_func_example"].format(
  9367. stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0
  9368. )
  9369. _stat_func_see_also = """
  9370. See Also
  9371. --------
  9372. Series.sum : Return the sum.
  9373. Series.min : Return the minimum.
  9374. Series.max : Return the maximum.
  9375. Series.idxmin : Return the index of the minimum.
  9376. Series.idxmax : Return the index of the maximum.
  9377. DataFrame.sum : Return the sum over the requested axis.
  9378. DataFrame.min : Return the minimum over the requested axis.
  9379. DataFrame.max : Return the maximum over the requested axis.
  9380. DataFrame.idxmin : Return the index of the minimum over the requested axis.
  9381. DataFrame.idxmax : Return the index of the maximum over the requested axis."""
  9382. _prod_examples = """
  9383. Examples
  9384. --------
  9385. By default, the product of an empty or all-NA Series is ``1``
  9386. >>> pd.Series([]).prod()
  9387. 1.0
  9388. This can be controlled with the ``min_count`` parameter
  9389. >>> pd.Series([]).prod(min_count=1)
  9390. nan
  9391. Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
  9392. empty series identically.
  9393. >>> pd.Series([np.nan]).prod()
  9394. 1.0
  9395. >>> pd.Series([np.nan]).prod(min_count=1)
  9396. nan"""
  9397. _min_count_stub = """\
  9398. min_count : int, default 0
  9399. The required number of valid values to perform the operation. If fewer than
  9400. ``min_count`` non-NA values are present the result will be NA.
  9401. .. versionadded:: 0.22.0
  9402. Added with the default being 0. This means the sum of an all-NA
  9403. or empty Series is 0, and the product of an all-NA or empty
  9404. Series is 1.
  9405. """
  9406. def _make_min_count_stat_function(
  9407. cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = ""
  9408. ):
  9409. @Substitution(
  9410. desc=desc,
  9411. name1=name1,
  9412. name2=name2,
  9413. axis_descr=axis_descr,
  9414. min_count=_min_count_stub,
  9415. see_also=see_also,
  9416. examples=examples,
  9417. )
  9418. @Appender(_num_doc)
  9419. def stat_func(
  9420. self,
  9421. axis=None,
  9422. skipna=None,
  9423. level=None,
  9424. numeric_only=None,
  9425. min_count=0,
  9426. **kwargs,
  9427. ):
  9428. if name == "sum":
  9429. nv.validate_sum(tuple(), kwargs)
  9430. elif name == "prod":
  9431. nv.validate_prod(tuple(), kwargs)
  9432. else:
  9433. nv.validate_stat_func(tuple(), kwargs, fname=name)
  9434. if skipna is None:
  9435. skipna = True
  9436. if axis is None:
  9437. axis = self._stat_axis_number
  9438. if level is not None:
  9439. return self._agg_by_level(
  9440. name, axis=axis, level=level, skipna=skipna, min_count=min_count
  9441. )
  9442. return self._reduce(
  9443. f,
  9444. name,
  9445. axis=axis,
  9446. skipna=skipna,
  9447. numeric_only=numeric_only,
  9448. min_count=min_count,
  9449. )
  9450. return set_function_name(stat_func, name, cls)
  9451. def _make_stat_function(
  9452. cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = ""
  9453. ):
  9454. @Substitution(
  9455. desc=desc,
  9456. name1=name1,
  9457. name2=name2,
  9458. axis_descr=axis_descr,
  9459. min_count="",
  9460. see_also=see_also,
  9461. examples=examples,
  9462. )
  9463. @Appender(_num_doc)
  9464. def stat_func(
  9465. self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
  9466. ):
  9467. if name == "median":
  9468. nv.validate_median(tuple(), kwargs)
  9469. else:
  9470. nv.validate_stat_func(tuple(), kwargs, fname=name)
  9471. if skipna is None:
  9472. skipna = True
  9473. if axis is None:
  9474. axis = self._stat_axis_number
  9475. if level is not None:
  9476. return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)
  9477. return self._reduce(
  9478. f, name, axis=axis, skipna=skipna, numeric_only=numeric_only
  9479. )
  9480. return set_function_name(stat_func, name, cls)
  9481. def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f):
  9482. @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr)
  9483. @Appender(_num_ddof_doc)
  9484. def stat_func(
  9485. self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs
  9486. ):
  9487. nv.validate_stat_ddof_func(tuple(), kwargs, fname=name)
  9488. if skipna is None:
  9489. skipna = True
  9490. if axis is None:
  9491. axis = self._stat_axis_number
  9492. if level is not None:
  9493. return self._agg_by_level(
  9494. name, axis=axis, level=level, skipna=skipna, ddof=ddof
  9495. )
  9496. return self._reduce(
  9497. f, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof
  9498. )
  9499. return set_function_name(stat_func, name, cls)
  9500. def _make_cum_function(
  9501. cls,
  9502. name,
  9503. name1,
  9504. name2,
  9505. axis_descr,
  9506. desc,
  9507. accum_func,
  9508. accum_func_name,
  9509. mask_a,
  9510. mask_b,
  9511. examples,
  9512. ):
  9513. @Substitution(
  9514. desc=desc,
  9515. name1=name1,
  9516. name2=name2,
  9517. axis_descr=axis_descr,
  9518. accum_func_name=accum_func_name,
  9519. examples=examples,
  9520. )
  9521. @Appender(_cnum_doc)
  9522. def cum_func(self, axis=None, skipna=True, *args, **kwargs):
  9523. skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name)
  9524. if axis is None:
  9525. axis = self._stat_axis_number
  9526. else:
  9527. axis = self._get_axis_number(axis)
  9528. if axis == 1:
  9529. return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T
  9530. def na_accum_func(blk_values):
  9531. # We will be applying this function to block values
  9532. if blk_values.dtype.kind in ["m", "M"]:
  9533. # GH#30460, GH#29058
  9534. # numpy 1.18 started sorting NaTs at the end instead of beginning,
  9535. # so we need to work around to maintain backwards-consistency.
  9536. orig_dtype = blk_values.dtype
  9537. # We need to define mask before masking NaTs
  9538. mask = isna(blk_values)
  9539. if accum_func == np.minimum.accumulate:
  9540. # Note: the accum_func comparison fails as an "is" comparison
  9541. y = blk_values.view("i8")
  9542. y[mask] = np.iinfo(np.int64).max
  9543. changed = True
  9544. else:
  9545. y = blk_values
  9546. changed = False
  9547. result = accum_func(y.view("i8"), axis)
  9548. if skipna:
  9549. np.putmask(result, mask, iNaT)
  9550. elif accum_func == np.minimum.accumulate:
  9551. # Restore NaTs that we masked previously
  9552. nz = (~np.asarray(mask)).nonzero()[0]
  9553. if len(nz):
  9554. # everything up to the first non-na entry stays NaT
  9555. result[: nz[0]] = iNaT
  9556. if changed:
  9557. # restore NaT elements
  9558. y[mask] = iNaT # TODO: could try/finally for this?
  9559. if isinstance(blk_values, np.ndarray):
  9560. result = result.view(orig_dtype)
  9561. else:
  9562. # DatetimeArray
  9563. result = type(blk_values)._from_sequence(result, dtype=orig_dtype)
  9564. elif skipna and not issubclass(
  9565. blk_values.dtype.type, (np.integer, np.bool_)
  9566. ):
  9567. vals = blk_values.copy().T
  9568. mask = isna(vals)
  9569. np.putmask(vals, mask, mask_a)
  9570. result = accum_func(vals, axis)
  9571. np.putmask(result, mask, mask_b)
  9572. else:
  9573. result = accum_func(blk_values.T, axis)
  9574. # transpose back for ndarray, not for EA
  9575. return result.T if hasattr(result, "T") else result
  9576. result = self._data.apply(na_accum_func)
  9577. d = self._construct_axes_dict()
  9578. d["copy"] = False
  9579. return self._constructor(result, **d).__finalize__(self)
  9580. return set_function_name(cum_func, name, cls)
  9581. def _make_logical_function(
  9582. cls, name, name1, name2, axis_descr, desc, f, see_also, examples, empty_value
  9583. ):
  9584. @Substitution(
  9585. desc=desc,
  9586. name1=name1,
  9587. name2=name2,
  9588. axis_descr=axis_descr,
  9589. see_also=see_also,
  9590. examples=examples,
  9591. empty_value=empty_value,
  9592. )
  9593. @Appender(_bool_doc)
  9594. def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
  9595. nv.validate_logical_func(tuple(), kwargs, fname=name)
  9596. if level is not None:
  9597. if bool_only is not None:
  9598. raise NotImplementedError(
  9599. "Option bool_only is not implemented with option level."
  9600. )
  9601. return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)
  9602. return self._reduce(
  9603. f,
  9604. name,
  9605. axis=axis,
  9606. skipna=skipna,
  9607. numeric_only=bool_only,
  9608. filter_type="bool",
  9609. )
  9610. return set_function_name(logical_func, name, cls)