1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576 |
- """
- Data structure for 1-dimensional cross-sectional and time series data
- """
- from io import StringIO
- from shutil import get_terminal_size
- from textwrap import dedent
- from typing import IO, Any, Callable, Hashable, List, Optional
- import warnings
- import numpy as np
- from pandas._config import get_option
- from pandas._libs import index as libindex, lib, reshape, tslibs
- from pandas.compat.numpy import function as nv
- from pandas.util._decorators import Appender, Substitution
- from pandas.util._validators import validate_bool_kwarg, validate_percentile
- from pandas.core.dtypes.cast import convert_dtypes
- from pandas.core.dtypes.common import (
- _is_unorderable_exception,
- ensure_platform_int,
- is_bool,
- is_categorical_dtype,
- is_datetime64_dtype,
- is_dict_like,
- is_extension_array_dtype,
- is_integer,
- is_iterator,
- is_list_like,
- is_object_dtype,
- is_scalar,
- is_timedelta64_dtype,
- )
- from pandas.core.dtypes.generic import (
- ABCDataFrame,
- ABCDatetimeIndex,
- ABCSeries,
- ABCSparseArray,
- )
- from pandas.core.dtypes.inference import is_hashable
- from pandas.core.dtypes.missing import (
- isna,
- na_value_for_dtype,
- notna,
- remove_na_arraylike,
- )
- import pandas as pd
- from pandas.core import algorithms, base, generic, nanops, ops
- from pandas.core.accessor import CachedAccessor
- from pandas.core.arrays import ExtensionArray, try_cast_to_ea
- from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
- from pandas.core.arrays.sparse import SparseAccessor
- import pandas.core.common as com
- from pandas.core.construction import (
- create_series_with_explicit_dtype,
- extract_array,
- is_empty_data,
- sanitize_array,
- )
- from pandas.core.groupby import generic as groupby_generic
- from pandas.core.indexers import maybe_convert_indices
- from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
- from pandas.core.indexes.api import (
- Float64Index,
- Index,
- InvalidIndexError,
- MultiIndex,
- ensure_index,
- )
- import pandas.core.indexes.base as ibase
- from pandas.core.indexes.datetimes import DatetimeIndex
- from pandas.core.indexes.period import PeriodIndex
- from pandas.core.indexes.timedeltas import TimedeltaIndex
- from pandas.core.indexing import check_bool_indexer
- from pandas.core.internals import SingleBlockManager
- from pandas.core.strings import StringMethods
- from pandas.core.tools.datetimes import to_datetime
- import pandas.io.formats.format as fmt
- import pandas.plotting
- __all__ = ["Series"]
- _shared_doc_kwargs = dict(
- axes="index",
- klass="Series",
- axes_single_arg="{0 or 'index'}",
- axis="""axis : {0 or 'index'}
- Parameter needed for compatibility with DataFrame.""",
- inplace="""inplace : boolean, default False
- If True, performs operation inplace and returns None.""",
- unique="np.ndarray",
- duplicated="Series",
- optional_by="",
- optional_mapper="",
- optional_labels="",
- optional_axis="",
- versionadded_to_excel="\n .. versionadded:: 0.20.0\n",
- )
- def _coerce_method(converter):
- """
- Install the scalar coercion methods.
- """
- def wrapper(self):
- if len(self) == 1:
- return converter(self.iloc[0])
- raise TypeError(f"cannot convert the series to {converter}")
- wrapper.__name__ = f"__{converter.__name__}__"
- return wrapper
- # ----------------------------------------------------------------------
- # Series class
- class Series(base.IndexOpsMixin, generic.NDFrame):
- """
- One-dimensional ndarray with axis labels (including time series).
- Labels need not be unique but must be a hashable type. The object
- supports both integer- and label-based indexing and provides a host of
- methods for performing operations involving the index. Statistical
- methods from ndarray have been overridden to automatically exclude
- missing data (currently represented as NaN).
- Operations between Series (+, -, /, *, **) align values based on their
- associated index values-- they need not be the same length. The result
- index will be the sorted union of the two indexes.
- Parameters
- ----------
- data : array-like, Iterable, dict, or scalar value
- Contains data stored in Series.
- .. versionchanged:: 0.23.0
- If data is a dict, argument order is maintained for Python 3.6
- and later.
- index : array-like or Index (1d)
- Values must be hashable and have the same length as `data`.
- Non-unique index values are allowed. Will default to
- RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
- sequence are used, the index will override the keys found in the
- dict.
- dtype : str, numpy.dtype, or ExtensionDtype, optional
- Data type for the output Series. If not specified, this will be
- inferred from `data`.
- See the :ref:`user guide <basics.dtypes>` for more usages.
- name : str, optional
- The name to give to the Series.
- copy : bool, default False
- Copy input data.
- """
- _typ = "series"
- _name: Optional[Hashable]
- _metadata: List[str] = ["name"]
- _accessors = {"dt", "cat", "str", "sparse"}
- _deprecations = (
- base.IndexOpsMixin._deprecations
- | generic.NDFrame._deprecations
- | frozenset(["compress", "ptp"])
- )
- # Override cache_readonly bc Series is mutable
- hasnans = property(
- base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__
- )
- _data: SingleBlockManager
- div: Callable[["Series", Any], "Series"]
- rdiv: Callable[["Series", Any], "Series"]
- # ----------------------------------------------------------------------
- # Constructors
- def __init__(
- self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
- ):
- # we are called internally, so short-circuit
- if fastpath:
- # data is an ndarray, index is defined
- if not isinstance(data, SingleBlockManager):
- data = SingleBlockManager(data, index, fastpath=True)
- if copy:
- data = data.copy()
- if index is None:
- index = data.index
- else:
- name = ibase.maybe_extract_name(name, data, type(self))
- if is_empty_data(data) and dtype is None:
- # gh-17261
- warnings.warn(
- "The default dtype for empty Series will be 'object' instead "
- "of 'float64' in a future version. Specify a dtype explicitly "
- "to silence this warning.",
- DeprecationWarning,
- stacklevel=2,
- )
- # uncomment the line below when removing the DeprecationWarning
- # dtype = np.dtype(object)
- if index is not None:
- index = ensure_index(index)
- if data is None:
- data = {}
- if dtype is not None:
- dtype = self._validate_dtype(dtype)
- if isinstance(data, MultiIndex):
- raise NotImplementedError(
- "initializing a Series from a MultiIndex is not supported"
- )
- elif isinstance(data, Index):
- if dtype is not None:
- # astype copies
- data = data.astype(dtype)
- else:
- # need to copy to avoid aliasing issues
- data = data._values.copy()
- if isinstance(data, ABCDatetimeIndex) and data.tz is not None:
- # GH#24096 need copy to be deep for datetime64tz case
- # TODO: See if we can avoid these copies
- data = data._values.copy(deep=True)
- copy = False
- elif isinstance(data, np.ndarray):
- if len(data.dtype):
- # GH#13296 we are dealing with a compound dtype, which
- # should be treated as 2D
- raise ValueError(
- "Cannot construct a Series from an ndarray with "
- "compound dtype. Use DataFrame instead."
- )
- pass
- elif isinstance(data, ABCSeries):
- if index is None:
- index = data.index
- else:
- data = data.reindex(index, copy=copy)
- data = data._data
- elif is_dict_like(data):
- data, index = self._init_dict(data, index, dtype)
- dtype = None
- copy = False
- elif isinstance(data, SingleBlockManager):
- if index is None:
- index = data.index
- elif not data.index.equals(index) or copy:
- # GH#19275 SingleBlockManager input should only be called
- # internally
- raise AssertionError(
- "Cannot pass both SingleBlockManager "
- "`data` argument and a different "
- "`index` argument. `copy` must be False."
- )
- elif is_extension_array_dtype(data):
- pass
- elif isinstance(data, (set, frozenset)):
- raise TypeError(f"'{type(data).__name__}' type is unordered")
- elif isinstance(data, ABCSparseArray):
- # handle sparse passed here (and force conversion)
- data = data.to_dense()
- else:
- data = com.maybe_iterable_to_list(data)
- if index is None:
- if not is_list_like(data):
- data = [data]
- index = ibase.default_index(len(data))
- elif is_list_like(data):
- # a scalar numpy array is list-like but doesn't
- # have a proper length
- try:
- if len(index) != len(data):
- raise ValueError(
- f"Length of passed values is {len(data)}, "
- f"index implies {len(index)}."
- )
- except TypeError:
- pass
- # create/copy the manager
- if isinstance(data, SingleBlockManager):
- if dtype is not None:
- data = data.astype(dtype=dtype, errors="ignore", copy=copy)
- elif copy:
- data = data.copy()
- else:
- data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)
- data = SingleBlockManager(data, index, fastpath=True)
- generic.NDFrame.__init__(self, data, fastpath=True)
- self.name = name
- self._set_axis(0, index, fastpath=True)
- def _init_dict(self, data, index=None, dtype=None):
- """
- Derive the "_data" and "index" attributes of a new Series from a
- dictionary input.
- Parameters
- ----------
- data : dict or dict-like
- Data used to populate the new Series.
- index : Index or index-like, default None
- Index for the new Series: if None, use dict keys.
- dtype : dtype, default None
- The dtype for the new Series: if None, infer from data.
- Returns
- -------
- _data : BlockManager for the new Series
- index : index for the new Series
- """
- # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
- # raises KeyError), so we iterate the entire dict, and align
- if data:
- keys, values = zip(*data.items())
- values = list(values)
- elif index is not None:
- # fastpath for Series(data=None). Just use broadcasting a scalar
- # instead of reindexing.
- values = na_value_for_dtype(dtype)
- keys = index
- else:
- keys, values = [], []
- # Input is now list-like, so rely on "standard" construction:
- # TODO: passing np.float64 to not break anything yet. See GH-17261
- s = create_series_with_explicit_dtype(
- values, index=keys, dtype=dtype, dtype_if_empty=np.float64
- )
- # Now we just make sure the order is respected, if any
- if data and index is not None:
- s = s.reindex(index, copy=False)
- return s._data, s.index
- # ----------------------------------------------------------------------
- @property
- def _constructor(self):
- return Series
- @property
- def _constructor_expanddim(self):
- from pandas.core.frame import DataFrame
- return DataFrame
- # types
- @property
- def _can_hold_na(self):
- return self._data._can_hold_na
- _index = None
- def _set_axis(self, axis, labels, fastpath=False):
- """
- Override generic, we want to set the _typ here.
- """
- if not fastpath:
- labels = ensure_index(labels)
- is_all_dates = labels.is_all_dates
- if is_all_dates:
- if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
- try:
- labels = DatetimeIndex(labels)
- # need to set here because we changed the index
- if fastpath:
- self._data.set_axis(axis, labels)
- except (tslibs.OutOfBoundsDatetime, ValueError):
- # labels may exceeds datetime bounds,
- # or not be a DatetimeIndex
- pass
- self._set_subtyp(is_all_dates)
- object.__setattr__(self, "_index", labels)
- if not fastpath:
- self._data.set_axis(axis, labels)
- def _set_subtyp(self, is_all_dates):
- if is_all_dates:
- object.__setattr__(self, "_subtyp", "time_series")
- else:
- object.__setattr__(self, "_subtyp", "series")
- def _update_inplace(self, result, **kwargs):
- # we want to call the generic version and not the IndexOpsMixin
- return generic.NDFrame._update_inplace(self, result, **kwargs)
- # ndarray compatibility
- @property
- def dtype(self):
- """
- Return the dtype object of the underlying data.
- """
- return self._data.dtype
- @property
- def dtypes(self):
- """
- Return the dtype object of the underlying data.
- """
- return self._data.dtype
- @property
- def name(self) -> Optional[Hashable]:
- return self._name
- @name.setter
- def name(self, value: Optional[Hashable]) -> None:
- if not is_hashable(value):
- raise TypeError("Series.name must be a hashable type")
- object.__setattr__(self, "_name", value)
- @property
- def values(self):
- """
- Return Series as ndarray or ndarray-like depending on the dtype.
- .. warning::
- We recommend using :attr:`Series.array` or
- :meth:`Series.to_numpy`, depending on whether you need
- a reference to the underlying data or a NumPy array.
- Returns
- -------
- numpy.ndarray or ndarray-like
- See Also
- --------
- Series.array : Reference to the underlying data.
- Series.to_numpy : A NumPy array representing the underlying data.
- Examples
- --------
- >>> pd.Series([1, 2, 3]).values
- array([1, 2, 3])
- >>> pd.Series(list('aabc')).values
- array(['a', 'a', 'b', 'c'], dtype=object)
- >>> pd.Series(list('aabc')).astype('category').values
- [a, a, b, c]
- Categories (3, object): [a, b, c]
- Timezone aware datetime data is converted to UTC:
- >>> pd.Series(pd.date_range('20130101', periods=3,
- ... tz='US/Eastern')).values
- array(['2013-01-01T05:00:00.000000000',
- '2013-01-02T05:00:00.000000000',
- '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
- """
- return self._data.external_values()
- @property
- def _values(self):
- """
- Return the internal repr of this data (defined by Block.interval_values).
- This are the values as stored in the Block (ndarray or ExtensionArray
- depending on the Block class).
- Differs from the public ``.values`` for certain data types, because of
- historical backwards compatibility of the public attribute (e.g. period
- returns object ndarray and datetimetz a datetime64[ns] ndarray for
- ``.values`` while it returns an ExtensionArray for ``._values`` in those
- cases).
- Differs from ``.array`` in that this still returns the numpy array if
- the Block is backed by a numpy array, while ``.array`` ensures to always
- return an ExtensionArray.
- Differs from ``._ndarray_values``, as that ensures to always return a
- numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if
- the Series was backed by an ExtensionArray).
- Overview:
- dtype | values | _values | array | _ndarray_values |
- ----------- | ------------- | ------------- | ------------- | --------------- |
- Numeric | ndarray | ndarray | PandasArray | ndarray |
- Category | Categorical | Categorical | Categorical | ndarray[int] |
- dt64[ns] | ndarray[M8ns] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
- dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
- Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] |
- Nullable | EA | EA | EA | ndarray |
- """
- return self._data.internal_values()
- @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore
- @property
- def array(self) -> ExtensionArray:
- return self._data._block.array_values()
- def _internal_get_values(self):
- """
- Same as values (but handles sparseness conversions); is a view.
- Returns
- -------
- numpy.ndarray
- Data of the Series.
- """
- return self._data.get_values()
- # ops
- def ravel(self, order="C"):
- """
- Return the flattened underlying data as an ndarray.
- Returns
- -------
- numpy.ndarray or ndarray-like
- Flattened data of the Series.
- See Also
- --------
- numpy.ndarray.ravel
- """
- return self._values.ravel(order=order)
- def __len__(self) -> int:
- """
- Return the length of the Series.
- """
- return len(self._data)
- def view(self, dtype=None):
- """
- Create a new view of the Series.
- This function will return a new Series with a view of the same
- underlying values in memory, optionally reinterpreted with a new data
- type. The new data type must preserve the same size in bytes as to not
- cause index misalignment.
- Parameters
- ----------
- dtype : data type
- Data type object or one of their string representations.
- Returns
- -------
- Series
- A new Series object as a view of the same data in memory.
- See Also
- --------
- numpy.ndarray.view : Equivalent numpy function to create a new view of
- the same data in memory.
- Notes
- -----
- Series are instantiated with ``dtype=float64`` by default. While
- ``numpy.ndarray.view()`` will return a view with the same data type as
- the original array, ``Series.view()`` (without specified dtype)
- will try using ``float64`` and may fail if the original data type size
- in bytes is not the same.
- Examples
- --------
- >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
- >>> s
- 0 -2
- 1 -1
- 2 0
- 3 1
- 4 2
- dtype: int8
- The 8 bit signed integer representation of `-1` is `0b11111111`, but
- the same bytes represent 255 if read as an 8 bit unsigned integer:
- >>> us = s.view('uint8')
- >>> us
- 0 254
- 1 255
- 2 0
- 3 1
- 4 2
- dtype: uint8
- The views share the same underlying values:
- >>> us[0] = 128
- >>> s
- 0 -128
- 1 -1
- 2 0
- 3 1
- 4 2
- dtype: int8
- """
- return self._constructor(
- self._values.view(dtype), index=self.index
- ).__finalize__(self)
- # ----------------------------------------------------------------------
- # NDArray Compat
- _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
- def __array_ufunc__(
- self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any
- ):
- # TODO: handle DataFrame
- cls = type(self)
- # for binary ops, use our custom dunder methods
- result = ops.maybe_dispatch_ufunc_to_dunder_op(
- self, ufunc, method, *inputs, **kwargs
- )
- if result is not NotImplemented:
- return result
- # Determine if we should defer.
- no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
- for item in inputs:
- higher_priority = (
- hasattr(item, "__array_priority__")
- and item.__array_priority__ > self.__array_priority__
- )
- has_array_ufunc = (
- hasattr(item, "__array_ufunc__")
- and type(item).__array_ufunc__ not in no_defer
- and not isinstance(item, self._HANDLED_TYPES)
- )
- if higher_priority or has_array_ufunc:
- return NotImplemented
- # align all the inputs.
- names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
- types = tuple(type(x) for x in inputs)
- # TODO: dataframe
- alignable = [x for x, t in zip(inputs, types) if issubclass(t, Series)]
- if len(alignable) > 1:
- # This triggers alignment.
- # At the moment, there aren't any ufuncs with more than two inputs
- # so this ends up just being x1.index | x2.index, but we write
- # it to handle *args.
- index = alignable[0].index
- for s in alignable[1:]:
- index |= s.index
- inputs = tuple(
- x.reindex(index) if issubclass(t, Series) else x
- for x, t in zip(inputs, types)
- )
- else:
- index = self.index
- inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
- result = getattr(ufunc, method)(*inputs, **kwargs)
- name: Optional[Hashable]
- if len(set(names)) == 1:
- name = names[0]
- else:
- name = None
- def construct_return(result):
- if lib.is_scalar(result):
- return result
- elif result.ndim > 1:
- # e.g. np.subtract.outer
- if method == "outer":
- # GH#27198
- raise NotImplementedError
- return result
- return self._constructor(result, index=index, name=name, copy=False)
- if type(result) is tuple:
- # multiple return values
- return tuple(construct_return(x) for x in result)
- elif method == "at":
- # no return value
- return None
- else:
- return construct_return(result)
- def __array__(self, dtype=None) -> np.ndarray:
- """
- Return the values as a NumPy array.
- Users should not call this directly. Rather, it is invoked by
- :func:`numpy.array` and :func:`numpy.asarray`.
- Parameters
- ----------
- dtype : str or numpy.dtype, optional
- The dtype to use for the resulting NumPy array. By default,
- the dtype is inferred from the data.
- Returns
- -------
- numpy.ndarray
- The values in the series converted to a :class:`numpy.ndarary`
- with the specified `dtype`.
- See Also
- --------
- array : Create a new array from data.
- Series.array : Zero-copy view to the array backing the Series.
- Series.to_numpy : Series method for similar behavior.
- Examples
- --------
- >>> ser = pd.Series([1, 2, 3])
- >>> np.asarray(ser)
- array([1, 2, 3])
- For timezone-aware data, the timezones may be retained with
- ``dtype='object'``
- >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
- >>> np.asarray(tzser, dtype="object")
- array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
- Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
- dtype=object)
- Or the values may be localized to UTC and the tzinfo discarded with
- ``dtype='datetime64[ns]'``
- >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
- array(['1999-12-31T23:00:00.000000000', ...],
- dtype='datetime64[ns]')
- """
- return np.asarray(self.array, dtype)
- # ----------------------------------------------------------------------
- # Unary Methods
- # coercion
- __float__ = _coerce_method(float)
- __long__ = _coerce_method(int)
- __int__ = _coerce_method(int)
- # ----------------------------------------------------------------------
- def _unpickle_series_compat(self, state):
- if isinstance(state, dict):
- self._data = state["_data"]
- self.name = state["name"]
- self.index = self._data.index
- elif isinstance(state, tuple):
- # < 0.12 series pickle
- nd_state, own_state = state
- # recreate the ndarray
- data = np.empty(nd_state[1], dtype=nd_state[2])
- np.ndarray.__setstate__(data, nd_state)
- # backwards compat
- index, name = own_state[0], None
- if len(own_state) > 1:
- name = own_state[1]
- # recreate
- self._data = SingleBlockManager(data, index, fastpath=True)
- self._index = index
- self.name = name
- else:
- raise Exception(f"cannot unpickle legacy formats -> [{state}]")
- # indexers
- @property
- def axes(self):
- """
- Return a list of the row axis labels.
- """
- return [self.index]
- # ----------------------------------------------------------------------
- # Indexing Methods
- @Appender(generic.NDFrame.take.__doc__)
- def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
- if is_copy is not None:
- warnings.warn(
- "is_copy is deprecated and will be removed in a future version. "
- "'take' always returns a copy, so there is no need to specify this.",
- FutureWarning,
- stacklevel=2,
- )
- nv.validate_take(tuple(), kwargs)
- indices = ensure_platform_int(indices)
- new_index = self.index.take(indices)
- if is_categorical_dtype(self):
- # https://github.com/pandas-dev/pandas/issues/20664
- # TODO: remove when the default Categorical.take behavior changes
- indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
- kwargs = {"allow_fill": False}
- else:
- kwargs = {}
- new_values = self._values.take(indices, **kwargs)
- return self._constructor(
- new_values, index=new_index, fastpath=True
- ).__finalize__(self)
- def _take_with_is_copy(self, indices, axis=0, **kwargs):
- """
- Internal version of the `take` method that sets the `_is_copy`
- attribute to keep track of the parent dataframe (using in indexing
- for the SettingWithCopyWarning). For Series this does the same
- as the public take (it never sets `_is_copy`).
- See the docstring of `take` for full explanation of the parameters.
- """
- return self.take(indices=indices, axis=axis, **kwargs)
- def _ixs(self, i: int, axis: int = 0):
- """
- Return the i-th value or values in the Series by location.
- Parameters
- ----------
- i : int
- Returns
- -------
- scalar (int) or Series (slice, sequence)
- """
- # dispatch to the values if we need
- values = self._values
- if isinstance(values, np.ndarray):
- return libindex.get_value_at(values, i)
- else:
- return values[i]
- def _slice(self, slobj: slice, axis: int = 0, kind=None):
- slobj = self.index._convert_slice_indexer(slobj, kind=kind or "getitem")
- return self._get_values(slobj)
- def __getitem__(self, key):
- key = com.apply_if_callable(key, self)
- try:
- result = self.index.get_value(self, key)
- if not is_scalar(result):
- if is_list_like(result) and not isinstance(result, Series):
- # we need to box if loc of the key isn't scalar here
- # otherwise have inline ndarray/lists
- try:
- if not is_scalar(self.index.get_loc(key)):
- result = self._constructor(
- result, index=[key] * len(result), dtype=self.dtype
- ).__finalize__(self)
- except KeyError:
- pass
- return result
- except InvalidIndexError:
- pass
- except (KeyError, ValueError):
- if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
- # kludge
- pass
- elif key is Ellipsis:
- return self
- elif com.is_bool_indexer(key):
- pass
- else:
- # we can try to coerce the indexer (or this will raise)
- new_key = self.index._convert_scalar_indexer(key, kind="getitem")
- if type(new_key) != type(key):
- return self.__getitem__(new_key)
- raise
- if is_iterator(key):
- key = list(key)
- if com.is_bool_indexer(key):
- key = check_bool_indexer(self.index, key)
- return self._get_with(key)
- def _get_with(self, key):
- # other: fancy integer or otherwise
- if isinstance(key, slice):
- return self._slice(key)
- elif isinstance(key, ABCDataFrame):
- raise TypeError(
- "Indexing a Series with DataFrame is not "
- "supported, use the appropriate DataFrame column"
- )
- elif isinstance(key, tuple):
- try:
- return self._get_values_tuple(key)
- except ValueError:
- # if we don't have a MultiIndex, we may still be able to handle
- # a 1-tuple. see test_1tuple_without_multiindex
- if len(key) == 1:
- key = key[0]
- if isinstance(key, slice):
- return self._get_values(key)
- raise
- if not isinstance(key, (list, np.ndarray, Series, Index)):
- key = list(key)
- if isinstance(key, Index):
- key_type = key.inferred_type
- else:
- key_type = lib.infer_dtype(key, skipna=False)
- if key_type == "integer":
- if self.index.is_integer() or self.index.is_floating():
- return self.loc[key]
- else:
- return self._get_values(key)
- elif key_type == "boolean":
- return self._get_values(key)
- if isinstance(key, (list, tuple)):
- # TODO: de-dup with tuple case handled above?
- # handle the dup indexing case GH#4246
- if len(key) == 1 and isinstance(key[0], slice):
- # [slice(0, 5, None)] will break if you convert to ndarray,
- # e.g. as requested by np.median
- # FIXME: hack
- return self._get_values(key)
- return self.loc[key]
- return self.reindex(key)
- def _get_values_tuple(self, key):
- # mpl hackaround
- if com.any_none(*key):
- # suppress warning from slicing the index with a 2d indexer.
- # eventually we'll want Series itself to warn.
- with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore", "Support for multi-dim", DeprecationWarning
- )
- return self._get_values(key)
- if not isinstance(self.index, MultiIndex):
- raise ValueError("Can only tuple-index with a MultiIndex")
- # If key is contained, would have returned by now
- indexer, new_index = self.index.get_loc_level(key)
- return self._constructor(self._values[indexer], index=new_index).__finalize__(
- self
- )
- def _get_values(self, indexer):
- try:
- return self._constructor(
- self._data.get_slice(indexer), fastpath=True
- ).__finalize__(self)
- except ValueError:
- # mpl compat if we look up e.g. ser[:, np.newaxis];
- # see tests.series.timeseries.test_mpl_compat_hack
- return self._values[indexer]
- def _get_value(self, label, takeable: bool = False):
- """
- Quickly retrieve single value at passed index label.
- Parameters
- ----------
- label : object
- takeable : interpret the index as indexers, default False
- Returns
- -------
- scalar value
- """
- if takeable:
- return com.maybe_box_datetimelike(self._values[label])
- return self.index.get_value(self._values, label)
- def __setitem__(self, key, value):
- key = com.apply_if_callable(key, self)
- cacher_needs_updating = self._check_is_chained_assignment_possible()
- try:
- self._set_with_engine(key, value)
- except com.SettingWithCopyError:
- raise
- except (KeyError, ValueError):
- values = self._values
- if is_integer(key) and not self.index.inferred_type == "integer":
- values[key] = value
- elif key is Ellipsis:
- self[:] = value
- else:
- self.loc[key] = value
- except TypeError as e:
- if isinstance(key, tuple) and not isinstance(self.index, MultiIndex):
- raise ValueError("Can only tuple-index with a MultiIndex")
- # python 3 type errors should be raised
- if _is_unorderable_exception(e):
- raise IndexError(key)
- if com.is_bool_indexer(key):
- key = check_bool_indexer(self.index, key)
- try:
- self._where(~key, value, inplace=True)
- return
- except InvalidIndexError:
- pass
- self._set_with(key, value)
- if cacher_needs_updating:
- self._maybe_update_cacher()
- def _set_with_engine(self, key, value):
- values = self._values
- if is_extension_array_dtype(values.dtype):
- # The cython indexing engine does not support ExtensionArrays.
- values[self.index.get_loc(key)] = value
- return
- try:
- self.index._engine.set_value(values, key, value)
- return
- except KeyError:
- values[self.index.get_loc(key)] = value
- return
- def _set_with(self, key, value):
- # other: fancy integer or otherwise
- if isinstance(key, slice):
- indexer = self.index._convert_slice_indexer(key, kind="getitem")
- return self._set_values(indexer, value)
- elif is_scalar(key) and not is_integer(key) and key not in self.index:
- # GH#12862 adding an new key to the Series
- # Note: have to exclude integers because that is ambiguously
- # position-based
- self.loc[key] = value
- return
- else:
- if isinstance(key, tuple):
- try:
- # TODO: no test cases that get here
- self._set_values(key, value)
- except Exception:
- pass
- if is_scalar(key):
- key = [key]
- if isinstance(key, Index):
- key_type = key.inferred_type
- key = key._values
- else:
- key_type = lib.infer_dtype(key, skipna=False)
- if key_type == "integer":
- if self.index.inferred_type == "integer":
- self._set_labels(key, value)
- else:
- return self._set_values(key, value)
- elif key_type == "boolean":
- self._set_values(key.astype(np.bool_), value)
- else:
- self._set_labels(key, value)
- def _set_labels(self, key, value):
- key = com.asarray_tuplesafe(key)
- indexer = self.index.get_indexer(key)
- mask = indexer == -1
- if mask.any():
- raise ValueError(f"{key[mask]} not contained in the index")
- self._set_values(indexer, value)
- def _set_values(self, key, value):
- if isinstance(key, Series):
- key = key._values
- self._data = self._data.setitem(indexer=key, value=value)
- self._maybe_update_cacher()
- def _set_value(self, label, value, takeable: bool = False):
- """
- Quickly set single value at passed label.
- If label is not contained, a new object is created with the label
- placed at the end of the result index.
- Parameters
- ----------
- label : object
- Partial indexing with MultiIndex not allowed.
- value : object
- Scalar value.
- takeable : interpret the index as indexers, default False
- Returns
- -------
- Series
- If label is contained, will be reference to calling Series,
- otherwise a new object.
- """
- try:
- if takeable:
- self._values[label] = value
- else:
- self.index._engine.set_value(self._values, label, value)
- except (KeyError, TypeError):
- # set using a non-recursive method
- self.loc[label] = value
- return self
- # ----------------------------------------------------------------------
- # Unsorted
- @property
- def _is_mixed_type(self):
- return False
- def repeat(self, repeats, axis=None):
- """
- Repeat elements of a Series.
- Returns a new Series where each element of the current Series
- is repeated consecutively a given number of times.
- Parameters
- ----------
- repeats : int or array of ints
- The number of repetitions for each element. This should be a
- non-negative integer. Repeating 0 times will return an empty
- Series.
- axis : None
- Must be ``None``. Has no effect but is accepted for compatibility
- with numpy.
- Returns
- -------
- Series
- Newly created Series with repeated elements.
- See Also
- --------
- Index.repeat : Equivalent function for Index.
- numpy.repeat : Similar method for :class:`numpy.ndarray`.
- Examples
- --------
- >>> s = pd.Series(['a', 'b', 'c'])
- >>> s
- 0 a
- 1 b
- 2 c
- dtype: object
- >>> s.repeat(2)
- 0 a
- 0 a
- 1 b
- 1 b
- 2 c
- 2 c
- dtype: object
- >>> s.repeat([1, 2, 3])
- 0 a
- 1 b
- 1 b
- 2 c
- 2 c
- 2 c
- dtype: object
- """
- nv.validate_repeat(tuple(), dict(axis=axis))
- new_index = self.index.repeat(repeats)
- new_values = self._values.repeat(repeats)
- return self._constructor(new_values, index=new_index).__finalize__(self)
- def reset_index(self, level=None, drop=False, name=None, inplace=False):
- """
- Generate a new DataFrame or Series with the index reset.
- This is useful when the index needs to be treated as a column, or
- when the index is meaningless and needs to be reset to the default
- before another operation.
- Parameters
- ----------
- level : int, str, tuple, or list, default optional
- For a Series with a MultiIndex, only remove the specified levels
- from the index. Removes all levels by default.
- drop : bool, default False
- Just reset the index, without inserting it as a column in
- the new DataFrame.
- name : object, optional
- The name to use for the column containing the original Series
- values. Uses ``self.name`` by default. This argument is ignored
- when `drop` is True.
- inplace : bool, default False
- Modify the Series in place (do not create a new object).
- Returns
- -------
- Series or DataFrame
- When `drop` is False (the default), a DataFrame is returned.
- The newly created columns will come first in the DataFrame,
- followed by the original Series values.
- When `drop` is True, a `Series` is returned.
- In either case, if ``inplace=True``, no value is returned.
- See Also
- --------
- DataFrame.reset_index: Analogous function for DataFrame.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4], name='foo',
- ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
- Generate a DataFrame with default index.
- >>> s.reset_index()
- idx foo
- 0 a 1
- 1 b 2
- 2 c 3
- 3 d 4
- To specify the name of the new column use `name`.
- >>> s.reset_index(name='values')
- idx values
- 0 a 1
- 1 b 2
- 2 c 3
- 3 d 4
- To generate a new Series with the default set `drop` to True.
- >>> s.reset_index(drop=True)
- 0 1
- 1 2
- 2 3
- 3 4
- Name: foo, dtype: int64
- To update the Series in place, without generating a new one
- set `inplace` to True. Note that it also requires ``drop=True``.
- >>> s.reset_index(inplace=True, drop=True)
- >>> s
- 0 1
- 1 2
- 2 3
- 3 4
- Name: foo, dtype: int64
- The `level` parameter is interesting for Series with a multi-level
- index.
- >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
- ... np.array(['one', 'two', 'one', 'two'])]
- >>> s2 = pd.Series(
- ... range(4), name='foo',
- ... index=pd.MultiIndex.from_arrays(arrays,
- ... names=['a', 'b']))
- To remove a specific level from the Index, use `level`.
- >>> s2.reset_index(level='a')
- a foo
- b
- one bar 0
- two bar 1
- one baz 2
- two baz 3
- If `level` is not set, all levels are removed from the Index.
- >>> s2.reset_index()
- a b foo
- 0 bar one 0
- 1 bar two 1
- 2 baz one 2
- 3 baz two 3
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- if drop:
- new_index = ibase.default_index(len(self))
- if level is not None:
- if not isinstance(level, (tuple, list)):
- level = [level]
- level = [self.index._get_level_number(lev) for lev in level]
- if len(level) < self.index.nlevels:
- new_index = self.index.droplevel(level)
- if inplace:
- self.index = new_index
- # set name if it was passed, otherwise, keep the previous name
- self.name = name or self.name
- else:
- return self._constructor(
- self._values.copy(), index=new_index
- ).__finalize__(self)
- elif inplace:
- raise TypeError(
- "Cannot reset_index inplace on a Series to create a DataFrame"
- )
- else:
- df = self.to_frame(name)
- return df.reset_index(level=level, drop=drop)
- # ----------------------------------------------------------------------
- # Rendering Methods
- def __repr__(self) -> str:
- """
- Return a string representation for a particular Series.
- """
- buf = StringIO("")
- width, height = get_terminal_size()
- max_rows = (
- height
- if get_option("display.max_rows") == 0
- else get_option("display.max_rows")
- )
- min_rows = (
- height
- if get_option("display.max_rows") == 0
- else get_option("display.min_rows")
- )
- show_dimensions = get_option("display.show_dimensions")
- self.to_string(
- buf=buf,
- name=self.name,
- dtype=self.dtype,
- min_rows=min_rows,
- max_rows=max_rows,
- length=show_dimensions,
- )
- result = buf.getvalue()
- return result
- def to_string(
- self,
- buf=None,
- na_rep="NaN",
- float_format=None,
- header=True,
- index=True,
- length=False,
- dtype=False,
- name=False,
- max_rows=None,
- min_rows=None,
- ):
- """
- Render a string representation of the Series.
- Parameters
- ----------
- buf : StringIO-like, optional
- Buffer to write to.
- na_rep : str, optional
- String representation of NaN to use, default 'NaN'.
- float_format : one-parameter function, optional
- Formatter function to apply to columns' elements if they are
- floats, default None.
- header : bool, default True
- Add the Series header (index name).
- index : bool, optional
- Add index (row) labels, default True.
- length : bool, default False
- Add the Series length.
- dtype : bool, default False
- Add the Series dtype.
- name : bool, default False
- Add the Series name if not None.
- max_rows : int, optional
- Maximum number of rows to show before truncating. If None, show
- all.
- min_rows : int, optional
- The number of rows to display in a truncated repr (when number
- of rows is above `max_rows`).
- Returns
- -------
- str or None
- String representation of Series if ``buf=None``, otherwise None.
- """
- formatter = fmt.SeriesFormatter(
- self,
- name=name,
- length=length,
- header=header,
- index=index,
- dtype=dtype,
- na_rep=na_rep,
- float_format=float_format,
- min_rows=min_rows,
- max_rows=max_rows,
- )
- result = formatter.to_string()
- # catch contract violations
- if not isinstance(result, str):
- raise AssertionError(
- "result must be of type str, type"
- f" of result is {repr(type(result).__name__)}"
- )
- if buf is None:
- return result
- else:
- try:
- buf.write(result)
- except AttributeError:
- with open(buf, "w") as f:
- f.write(result)
- @Appender(
- """
- Examples
- --------
- >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal")
- >>> print(s.to_markdown())
- | | animal |
- |---:|:---------|
- | 0 | elk |
- | 1 | pig |
- | 2 | dog |
- | 3 | quetzal |
- """
- )
- @Substitution(klass="Series")
- @Appender(generic._shared_docs["to_markdown"])
- def to_markdown(
- self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs
- ) -> Optional[str]:
- return self.to_frame().to_markdown(buf, mode, **kwargs)
- # ----------------------------------------------------------------------
- def items(self):
- """
- Lazily iterate over (index, value) tuples.
- This method returns an iterable tuple (index, value). This is
- convenient if you want to create a lazy iterator.
- Returns
- -------
- iterable
- Iterable of tuples containing the (index, value) pairs from a
- Series.
- See Also
- --------
- DataFrame.items : Iterate over (column name, Series) pairs.
- DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
- Examples
- --------
- >>> s = pd.Series(['A', 'B', 'C'])
- >>> for index, value in s.items():
- ... print(f"Index : {index}, Value : {value}")
- Index : 0, Value : A
- Index : 1, Value : B
- Index : 2, Value : C
- """
- return zip(iter(self.index), iter(self))
- @Appender(items.__doc__)
- def iteritems(self):
- return self.items()
- # ----------------------------------------------------------------------
- # Misc public methods
- def keys(self):
- """
- Return alias for index.
- Returns
- -------
- Index
- Index of the Series.
- """
- return self.index
- def to_dict(self, into=dict):
- """
- Convert Series to {label -> value} dict or dict-like object.
- Parameters
- ----------
- into : class, default dict
- The collections.abc.Mapping subclass to use as the return
- object. Can be the actual class or an empty
- instance of the mapping type you want. If you want a
- collections.defaultdict, you must pass it initialized.
- .. versionadded:: 0.21.0
- Returns
- -------
- collections.abc.Mapping
- Key-value representation of Series.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4])
- >>> s.to_dict()
- {0: 1, 1: 2, 2: 3, 3: 4}
- >>> from collections import OrderedDict, defaultdict
- >>> s.to_dict(OrderedDict)
- OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
- >>> dd = defaultdict(list)
- >>> s.to_dict(dd)
- defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
- """
- # GH16122
- into_c = com.standardize_mapping(into)
- return into_c(self.items())
- def to_frame(self, name=None):
- """
- Convert Series to DataFrame.
- Parameters
- ----------
- name : object, default None
- The passed name should substitute for the series name (if it has
- one).
- Returns
- -------
- DataFrame
- DataFrame representation of Series.
- Examples
- --------
- >>> s = pd.Series(["a", "b", "c"],
- ... name="vals")
- >>> s.to_frame()
- vals
- 0 a
- 1 b
- 2 c
- """
- if name is None:
- df = self._constructor_expanddim(self)
- else:
- df = self._constructor_expanddim({name: self})
- return df
- def _set_name(self, name, inplace=False):
- """
- Set the Series name.
- Parameters
- ----------
- name : str
- inplace : bool
- Whether to modify `self` directly or return a copy.
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- ser = self if inplace else self.copy()
- ser.name = name
- return ser
- @Appender(
- """
- Examples
- --------
- >>> ser = pd.Series([390., 350., 30., 20.],
- ... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
- >>> ser
- Falcon 390.0
- Falcon 350.0
- Parrot 30.0
- Parrot 20.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(["a", "b", "a", "b"]).mean()
- a 210.0
- b 185.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(level=0).mean()
- Falcon 370.0
- Parrot 25.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(ser > 100).mean()
- Max Speed
- False 25.0
- True 370.0
- Name: Max Speed, dtype: float64
- **Grouping by Indexes**
- We can groupby different levels of a hierarchical index
- using the `level` parameter:
- >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
- ... ['Captive', 'Wild', 'Captive', 'Wild']]
- >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
- >>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
- >>> ser
- Animal Type
- Falcon Captive 390.0
- Wild 350.0
- Parrot Captive 30.0
- Wild 20.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(level=0).mean()
- Animal
- Falcon 370.0
- Parrot 25.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(level="Type").mean()
- Type
- Captive 210.0
- Wild 185.0
- Name: Max Speed, dtype: float64
- """
- )
- @Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
- def groupby(
- self,
- by=None,
- axis=0,
- level=None,
- as_index: bool = True,
- sort: bool = True,
- group_keys: bool = True,
- squeeze: bool = False,
- observed: bool = False,
- ) -> "groupby_generic.SeriesGroupBy":
- if level is None and by is None:
- raise TypeError("You have to supply one of 'by' and 'level'")
- axis = self._get_axis_number(axis)
- return groupby_generic.SeriesGroupBy(
- obj=self,
- keys=by,
- axis=axis,
- level=level,
- as_index=as_index,
- sort=sort,
- group_keys=group_keys,
- squeeze=squeeze,
- observed=observed,
- )
- # ----------------------------------------------------------------------
- # Statistics, overridden ndarray methods
- # TODO: integrate bottleneck
- def count(self, level=None):
- """
- Return number of non-NA/null observations in the Series.
- Parameters
- ----------
- level : int or level name, default None
- If the axis is a MultiIndex (hierarchical), count along a
- particular level, collapsing into a smaller Series.
- Returns
- -------
- int or Series (if level specified)
- Number of non-null values in the Series.
- Examples
- --------
- >>> s = pd.Series([0.0, 1.0, np.nan])
- >>> s.count()
- 2
- """
- if level is None:
- return notna(self.array).sum()
- if isinstance(level, str):
- level = self.index._get_level_number(level)
- lev = self.index.levels[level]
- level_codes = np.array(self.index.codes[level], subok=False, copy=True)
- mask = level_codes == -1
- if mask.any():
- level_codes[mask] = cnt = len(lev)
- lev = lev.insert(cnt, lev._na_value)
- obs = level_codes[notna(self.values)]
- out = np.bincount(obs, minlength=len(lev) or None)
- return self._constructor(out, index=lev, dtype="int64").__finalize__(self)
- def mode(self, dropna=True):
- """
- Return the mode(s) of the dataset.
- Always returns Series even if only one value is returned.
- Parameters
- ----------
- dropna : bool, default True
- Don't consider counts of NaN/NaT.
- .. versionadded:: 0.24.0
- Returns
- -------
- Series
- Modes of the Series in sorted order.
- """
- # TODO: Add option for bins like value_counts()
- return algorithms.mode(self, dropna=dropna)
- def unique(self):
- """
- Return unique values of Series object.
- Uniques are returned in order of appearance. Hash table-based unique,
- therefore does NOT sort.
- Returns
- -------
- ndarray or ExtensionArray
- The unique values returned as a NumPy array. See Notes.
- See Also
- --------
- unique : Top-level unique method for any 1-d array-like object.
- Index.unique : Return Index with unique values from an Index object.
- Notes
- -----
- Returns the unique values as a NumPy array. In case of an
- extension-array backed Series, a new
- :class:`~api.extensions.ExtensionArray` of that type with just
- the unique values is returned. This includes
- * Categorical
- * Period
- * Datetime with Timezone
- * Interval
- * Sparse
- * IntegerNA
- See Examples section.
- Examples
- --------
- >>> pd.Series([2, 1, 3, 3], name='A').unique()
- array([2, 1, 3])
- >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
- array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
- >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
- ... for _ in range(3)]).unique()
- <DatetimeArray>
- ['2016-01-01 00:00:00-05:00']
- Length: 1, dtype: datetime64[ns, US/Eastern]
- An unordered Categorical will return categories in the order of
- appearance.
- >>> pd.Series(pd.Categorical(list('baabc'))).unique()
- [b, a, c]
- Categories (3, object): [b, a, c]
- An ordered Categorical preserves the category ordering.
- >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
- ... ordered=True)).unique()
- [b, a, c]
- Categories (3, object): [a < b < c]
- """
- result = super().unique()
- return result
- def drop_duplicates(self, keep="first", inplace=False):
- """
- Return Series with duplicate values removed.
- Parameters
- ----------
- keep : {'first', 'last', ``False``}, default 'first'
- Method to handle dropping duplicates:
- - 'first' : Drop duplicates except for the first occurrence.
- - 'last' : Drop duplicates except for the last occurrence.
- - ``False`` : Drop all duplicates.
- inplace : bool, default ``False``
- If ``True``, performs operation inplace and returns None.
- Returns
- -------
- Series
- Series with duplicates dropped.
- See Also
- --------
- Index.drop_duplicates : Equivalent method on Index.
- DataFrame.drop_duplicates : Equivalent method on DataFrame.
- Series.duplicated : Related method on Series, indicating duplicate
- Series values.
- Examples
- --------
- Generate a Series with duplicated entries.
- >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'],
- ... name='animal')
- >>> s
- 0 lama
- 1 cow
- 2 lama
- 3 beetle
- 4 lama
- 5 hippo
- Name: animal, dtype: object
- With the 'keep' parameter, the selection behaviour of duplicated values
- can be changed. The value 'first' keeps the first occurrence for each
- set of duplicated entries. The default value of keep is 'first'.
- >>> s.drop_duplicates()
- 0 lama
- 1 cow
- 3 beetle
- 5 hippo
- Name: animal, dtype: object
- The value 'last' for parameter 'keep' keeps the last occurrence for
- each set of duplicated entries.
- >>> s.drop_duplicates(keep='last')
- 1 cow
- 3 beetle
- 4 lama
- 5 hippo
- Name: animal, dtype: object
- The value ``False`` for parameter 'keep' discards all sets of
- duplicated entries. Setting the value of 'inplace' to ``True`` performs
- the operation inplace and returns ``None``.
- >>> s.drop_duplicates(keep=False, inplace=True)
- >>> s
- 1 cow
- 3 beetle
- 5 hippo
- Name: animal, dtype: object
- """
- return super().drop_duplicates(keep=keep, inplace=inplace)
- def duplicated(self, keep="first"):
- """
- Indicate duplicate Series values.
- Duplicated values are indicated as ``True`` values in the resulting
- Series. Either all duplicates, all except the first or all except the
- last occurrence of duplicates can be indicated.
- Parameters
- ----------
- keep : {'first', 'last', False}, default 'first'
- Method to handle dropping duplicates:
- - 'first' : Mark duplicates as ``True`` except for the first
- occurrence.
- - 'last' : Mark duplicates as ``True`` except for the last
- occurrence.
- - ``False`` : Mark all duplicates as ``True``.
- Returns
- -------
- Series
- Series indicating whether each value has occurred in the
- preceding values.
- See Also
- --------
- Index.duplicated : Equivalent method on pandas.Index.
- DataFrame.duplicated : Equivalent method on pandas.DataFrame.
- Series.drop_duplicates : Remove duplicate values from Series.
- Examples
- --------
- By default, for each set of duplicated values, the first occurrence is
- set on False and all others on True:
- >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
- >>> animals.duplicated()
- 0 False
- 1 False
- 2 True
- 3 False
- 4 True
- dtype: bool
- which is equivalent to
- >>> animals.duplicated(keep='first')
- 0 False
- 1 False
- 2 True
- 3 False
- 4 True
- dtype: bool
- By using 'last', the last occurrence of each set of duplicated values
- is set on False and all others on True:
- >>> animals.duplicated(keep='last')
- 0 True
- 1 False
- 2 True
- 3 False
- 4 False
- dtype: bool
- By setting keep on ``False``, all duplicates are True:
- >>> animals.duplicated(keep=False)
- 0 True
- 1 False
- 2 True
- 3 False
- 4 True
- dtype: bool
- """
- return super().duplicated(keep=keep)
- def idxmin(self, axis=0, skipna=True, *args, **kwargs):
- """
- Return the row label of the minimum value.
- If multiple values equal the minimum, the first row label with that
- value is returned.
- Parameters
- ----------
- axis : int, default 0
- For compatibility with DataFrame.idxmin. Redundant for application
- on Series.
- skipna : bool, default True
- Exclude NA/null values. If the entire Series is NA, the result
- will be NA.
- *args, **kwargs
- Additional arguments and keywords have no effect but might be
- accepted for compatibility with NumPy.
- Returns
- -------
- Index
- Label of the minimum value.
- Raises
- ------
- ValueError
- If the Series is empty.
- See Also
- --------
- numpy.argmin : Return indices of the minimum values
- along the given axis.
- DataFrame.idxmin : Return index of first occurrence of minimum
- over requested axis.
- Series.idxmax : Return index *label* of the first occurrence
- of maximum of values.
- Notes
- -----
- This method is the Series version of ``ndarray.argmin``. This method
- returns the label of the minimum, while ``ndarray.argmin`` returns
- the position. To get the position, use ``series.values.argmin()``.
- Examples
- --------
- >>> s = pd.Series(data=[1, None, 4, 1],
- ... index=['A', 'B', 'C', 'D'])
- >>> s
- A 1.0
- B NaN
- C 4.0
- D 1.0
- dtype: float64
- >>> s.idxmin()
- 'A'
- If `skipna` is False and there is an NA value in the data,
- the function returns ``nan``.
- >>> s.idxmin(skipna=False)
- nan
- """
- skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
- i = nanops.nanargmin(com.values_from_object(self), skipna=skipna)
- if i == -1:
- return np.nan
- return self.index[i]
- def idxmax(self, axis=0, skipna=True, *args, **kwargs):
- """
- Return the row label of the maximum value.
- If multiple values equal the maximum, the first row label with that
- value is returned.
- Parameters
- ----------
- axis : int, default 0
- For compatibility with DataFrame.idxmax. Redundant for application
- on Series.
- skipna : bool, default True
- Exclude NA/null values. If the entire Series is NA, the result
- will be NA.
- *args, **kwargs
- Additional arguments and keywords have no effect but might be
- accepted for compatibility with NumPy.
- Returns
- -------
- Index
- Label of the maximum value.
- Raises
- ------
- ValueError
- If the Series is empty.
- See Also
- --------
- numpy.argmax : Return indices of the maximum values
- along the given axis.
- DataFrame.idxmax : Return index of first occurrence of maximum
- over requested axis.
- Series.idxmin : Return index *label* of the first occurrence
- of minimum of values.
- Notes
- -----
- This method is the Series version of ``ndarray.argmax``. This method
- returns the label of the maximum, while ``ndarray.argmax`` returns
- the position. To get the position, use ``series.values.argmax()``.
- Examples
- --------
- >>> s = pd.Series(data=[1, None, 4, 3, 4],
- ... index=['A', 'B', 'C', 'D', 'E'])
- >>> s
- A 1.0
- B NaN
- C 4.0
- D 3.0
- E 4.0
- dtype: float64
- >>> s.idxmax()
- 'C'
- If `skipna` is False and there is an NA value in the data,
- the function returns ``nan``.
- >>> s.idxmax(skipna=False)
- nan
- """
- skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
- i = nanops.nanargmax(com.values_from_object(self), skipna=skipna)
- if i == -1:
- return np.nan
- return self.index[i]
- def round(self, decimals=0, *args, **kwargs):
- """
- Round each value in a Series to the given number of decimals.
- Parameters
- ----------
- decimals : int, default 0
- Number of decimal places to round to. If decimals is negative,
- it specifies the number of positions to the left of the decimal point.
- Returns
- -------
- Series
- Rounded values of the Series.
- See Also
- --------
- numpy.around : Round values of an np.array.
- DataFrame.round : Round values of a DataFrame.
- Examples
- --------
- >>> s = pd.Series([0.1, 1.3, 2.7])
- >>> s.round()
- 0 0.0
- 1 1.0
- 2 3.0
- dtype: float64
- """
- nv.validate_round(args, kwargs)
- result = com.values_from_object(self).round(decimals)
- result = self._constructor(result, index=self.index).__finalize__(self)
- return result
- def quantile(self, q=0.5, interpolation="linear"):
- """
- Return value at the given quantile.
- Parameters
- ----------
- q : float or array-like, default 0.5 (50% quantile)
- The quantile(s) to compute, which can lie in range: 0 <= q <= 1.
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
- This optional parameter specifies the interpolation method to use,
- when the desired quantile lies between two data points `i` and `j`:
- * linear: `i + (j - i) * fraction`, where `fraction` is the
- fractional part of the index surrounded by `i` and `j`.
- * lower: `i`.
- * higher: `j`.
- * nearest: `i` or `j` whichever is nearest.
- * midpoint: (`i` + `j`) / 2.
- Returns
- -------
- float or Series
- If ``q`` is an array, a Series will be returned where the
- index is ``q`` and the values are the quantiles, otherwise
- a float will be returned.
- See Also
- --------
- core.window.Rolling.quantile
- numpy.percentile
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4])
- >>> s.quantile(.5)
- 2.5
- >>> s.quantile([.25, .5, .75])
- 0.25 1.75
- 0.50 2.50
- 0.75 3.25
- dtype: float64
- """
- validate_percentile(q)
- # We dispatch to DataFrame so that core.internals only has to worry
- # about 2D cases.
- df = self.to_frame()
- result = df.quantile(q=q, interpolation=interpolation, numeric_only=False)
- if result.ndim == 2:
- result = result.iloc[:, 0]
- if is_list_like(q):
- result.name = self.name
- return self._constructor(result, index=Float64Index(q), name=self.name)
- else:
- # scalar
- return result.iloc[0]
- def corr(self, other, method="pearson", min_periods=None):
- """
- Compute correlation with `other` Series, excluding missing values.
- Parameters
- ----------
- other : Series
- Series with which to compute the correlation.
- method : {'pearson', 'kendall', 'spearman'} or callable
- Method used to compute correlation:
- - pearson : Standard correlation coefficient
- - kendall : Kendall Tau correlation coefficient
- - spearman : Spearman rank correlation
- - callable: Callable with input two 1d ndarrays and returning a float.
- .. versionadded:: 0.24.0
- Note that the returned matrix from corr will have 1 along the
- diagonals and will be symmetric regardless of the callable's
- behavior.
- min_periods : int, optional
- Minimum number of observations needed to have a valid result.
- Returns
- -------
- float
- Correlation with other.
- Examples
- --------
- >>> def histogram_intersection(a, b):
- ... v = np.minimum(a, b).sum().round(decimals=1)
- ... return v
- >>> s1 = pd.Series([.2, .0, .6, .2])
- >>> s2 = pd.Series([.3, .6, .0, .1])
- >>> s1.corr(s2, method=histogram_intersection)
- 0.3
- """
- this, other = self.align(other, join="inner", copy=False)
- if len(this) == 0:
- return np.nan
- if method in ["pearson", "spearman", "kendall"] or callable(method):
- return nanops.nancorr(
- this.values, other.values, method=method, min_periods=min_periods
- )
- raise ValueError(
- "method must be either 'pearson', "
- "'spearman', 'kendall', or a callable, "
- f"'{method}' was supplied"
- )
- def cov(self, other, min_periods=None):
- """
- Compute covariance with Series, excluding missing values.
- Parameters
- ----------
- other : Series
- Series with which to compute the covariance.
- min_periods : int, optional
- Minimum number of observations needed to have a valid result.
- Returns
- -------
- float
- Covariance between Series and other normalized by N-1
- (unbiased estimator).
- Examples
- --------
- >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035])
- >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198])
- >>> s1.cov(s2)
- -0.01685762652715874
- """
- this, other = self.align(other, join="inner", copy=False)
- if len(this) == 0:
- return np.nan
- return nanops.nancov(this.values, other.values, min_periods=min_periods)
- def diff(self, periods=1):
- """
- First discrete difference of element.
- Calculates the difference of a Series element compared with another
- element in the Series (default is element in previous row).
- Parameters
- ----------
- periods : int, default 1
- Periods to shift for calculating difference, accepts negative
- values.
- Returns
- -------
- Series
- First differences of the Series.
- See Also
- --------
- Series.pct_change: Percent change over given number of periods.
- Series.shift: Shift index by desired number of periods with an
- optional time freq.
- DataFrame.diff: First discrete difference of object.
- Notes
- -----
- For boolean dtypes, this uses :meth:`operator.xor` rather than
- :meth:`operator.sub`.
- Examples
- --------
- Difference with previous row
- >>> s = pd.Series([1, 1, 2, 3, 5, 8])
- >>> s.diff()
- 0 NaN
- 1 0.0
- 2 1.0
- 3 1.0
- 4 2.0
- 5 3.0
- dtype: float64
- Difference with 3rd previous row
- >>> s.diff(periods=3)
- 0 NaN
- 1 NaN
- 2 NaN
- 3 2.0
- 4 4.0
- 5 6.0
- dtype: float64
- Difference with following row
- >>> s.diff(periods=-1)
- 0 0.0
- 1 -1.0
- 2 -1.0
- 3 -2.0
- 4 -3.0
- 5 NaN
- dtype: float64
- """
- result = algorithms.diff(self.array, periods)
- return self._constructor(result, index=self.index).__finalize__(self)
- def autocorr(self, lag=1):
- """
- Compute the lag-N autocorrelation.
- This method computes the Pearson correlation between
- the Series and its shifted self.
- Parameters
- ----------
- lag : int, default 1
- Number of lags to apply before performing autocorrelation.
- Returns
- -------
- float
- The Pearson correlation between self and self.shift(lag).
- See Also
- --------
- Series.corr : Compute the correlation between two Series.
- Series.shift : Shift index by desired number of periods.
- DataFrame.corr : Compute pairwise correlation of columns.
- DataFrame.corrwith : Compute pairwise correlation between rows or
- columns of two DataFrame objects.
- Notes
- -----
- If the Pearson correlation is not well defined return 'NaN'.
- Examples
- --------
- >>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
- >>> s.autocorr() # doctest: +ELLIPSIS
- 0.10355...
- >>> s.autocorr(lag=2) # doctest: +ELLIPSIS
- -0.99999...
- If the Pearson correlation is not well defined, then 'NaN' is returned.
- >>> s = pd.Series([1, 0, 0, 0])
- >>> s.autocorr()
- nan
- """
- return self.corr(self.shift(lag))
- def dot(self, other):
- """
- Compute the dot product between the Series and the columns of other.
- This method computes the dot product between the Series and another
- one, or the Series and each columns of a DataFrame, or the Series and
- each columns of an array.
- It can also be called using `self @ other` in Python >= 3.5.
- Parameters
- ----------
- other : Series, DataFrame or array-like
- The other object to compute the dot product with its columns.
- Returns
- -------
- scalar, Series or numpy.ndarray
- Return the dot product of the Series and other if other is a
- Series, the Series of the dot product of Series and each rows of
- other if other is a DataFrame or a numpy.ndarray between the Series
- and each columns of the numpy array.
- See Also
- --------
- DataFrame.dot: Compute the matrix product with the DataFrame.
- Series.mul: Multiplication of series and other, element-wise.
- Notes
- -----
- The Series and other has to share the same index if other is a Series
- or a DataFrame.
- Examples
- --------
- >>> s = pd.Series([0, 1, 2, 3])
- >>> other = pd.Series([-1, 2, -3, 4])
- >>> s.dot(other)
- 8
- >>> s @ other
- 8
- >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
- >>> s.dot(df)
- 0 24
- 1 14
- dtype: int64
- >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]])
- >>> s.dot(arr)
- array([24, 14])
- """
- if isinstance(other, (Series, ABCDataFrame)):
- common = self.index.union(other.index)
- if len(common) > len(self.index) or len(common) > len(other.index):
- raise ValueError("matrices are not aligned")
- left = self.reindex(index=common, copy=False)
- right = other.reindex(index=common, copy=False)
- lvals = left.values
- rvals = right.values
- else:
- lvals = self.values
- rvals = np.asarray(other)
- if lvals.shape[0] != rvals.shape[0]:
- raise Exception(
- f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
- )
- if isinstance(other, ABCDataFrame):
- return self._constructor(
- np.dot(lvals, rvals), index=other.columns
- ).__finalize__(self)
- elif isinstance(other, Series):
- return np.dot(lvals, rvals)
- elif isinstance(rvals, np.ndarray):
- return np.dot(lvals, rvals)
- else: # pragma: no cover
- raise TypeError(f"unsupported type: {type(other)}")
- def __matmul__(self, other):
- """
- Matrix multiplication using binary `@` operator in Python>=3.5.
- """
- return self.dot(other)
- def __rmatmul__(self, other):
- """
- Matrix multiplication using binary `@` operator in Python>=3.5.
- """
- return self.dot(np.transpose(other))
- @Substitution(klass="Series")
- @Appender(base._shared_docs["searchsorted"])
- def searchsorted(self, value, side="left", sorter=None):
- return algorithms.searchsorted(self._values, value, side=side, sorter=sorter)
- # -------------------------------------------------------------------
- # Combination
- def append(self, to_append, ignore_index=False, verify_integrity=False):
- """
- Concatenate two or more Series.
- Parameters
- ----------
- to_append : Series or list/tuple of Series
- Series to append with self.
- ignore_index : bool, default False
- If True, do not use the index labels.
- verify_integrity : bool, default False
- If True, raise Exception on creating index with duplicates.
- Returns
- -------
- Series
- Concatenated Series.
- See Also
- --------
- concat : General function to concatenate DataFrame or Series objects.
- Notes
- -----
- Iteratively appending to a Series can be more computationally intensive
- than a single concatenate. A better solution is to append values to a
- list and then concatenate the list with the original Series all at
- once.
- Examples
- --------
- >>> s1 = pd.Series([1, 2, 3])
- >>> s2 = pd.Series([4, 5, 6])
- >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5])
- >>> s1.append(s2)
- 0 1
- 1 2
- 2 3
- 0 4
- 1 5
- 2 6
- dtype: int64
- >>> s1.append(s3)
- 0 1
- 1 2
- 2 3
- 3 4
- 4 5
- 5 6
- dtype: int64
- With `ignore_index` set to True:
- >>> s1.append(s2, ignore_index=True)
- 0 1
- 1 2
- 2 3
- 3 4
- 4 5
- 5 6
- dtype: int64
- With `verify_integrity` set to True:
- >>> s1.append(s2, verify_integrity=True)
- Traceback (most recent call last):
- ...
- ValueError: Indexes have overlapping values: [0, 1, 2]
- """
- from pandas.core.reshape.concat import concat
- if isinstance(to_append, (list, tuple)):
- to_concat = [self]
- to_concat.extend(to_append)
- else:
- to_concat = [self, to_append]
- return concat(
- to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity
- )
- def _binop(self, other, func, level=None, fill_value=None):
- """
- Perform generic binary operation with optional fill value.
- Parameters
- ----------
- other : Series
- func : binary operator
- fill_value : float or object
- Value to substitute for NA/null values. If both Series are NA in a
- location, the result will be NA regardless of the passed fill value.
- level : int or level name, default None
- Broadcast across a level, matching Index values on the
- passed MultiIndex level.
- Returns
- -------
- Series
- """
- if not isinstance(other, Series):
- raise AssertionError("Other operand must be Series")
- new_index = self.index
- this = self
- if not self.index.equals(other.index):
- this, other = self.align(other, level=level, join="outer", copy=False)
- new_index = this.index
- this_vals, other_vals = ops.fill_binop(this.values, other.values, fill_value)
- with np.errstate(all="ignore"):
- result = func(this_vals, other_vals)
- name = ops.get_op_result_name(self, other)
- ret = ops._construct_result(self, result, new_index, name)
- return ret
- def combine(self, other, func, fill_value=None):
- """
- Combine the Series with a Series or scalar according to `func`.
- Combine the Series and `other` using `func` to perform elementwise
- selection for combined Series.
- `fill_value` is assumed when value is missing at some index
- from one of the two objects being combined.
- Parameters
- ----------
- other : Series or scalar
- The value(s) to be combined with the `Series`.
- func : function
- Function that takes two scalars as inputs and returns an element.
- fill_value : scalar, optional
- The value to assume when an index is missing from
- one Series or the other. The default specifies to use the
- appropriate NaN value for the underlying dtype of the Series.
- Returns
- -------
- Series
- The result of combining the Series with the other object.
- See Also
- --------
- Series.combine_first : Combine Series values, choosing the calling
- Series' values first.
- Examples
- --------
- Consider 2 Datasets ``s1`` and ``s2`` containing
- highest clocked speeds of different birds.
- >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
- >>> s1
- falcon 330.0
- eagle 160.0
- dtype: float64
- >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
- >>> s2
- falcon 345.0
- eagle 200.0
- duck 30.0
- dtype: float64
- Now, to combine the two datasets and view the highest speeds
- of the birds across the two datasets
- >>> s1.combine(s2, max)
- duck NaN
- eagle 200.0
- falcon 345.0
- dtype: float64
- In the previous example, the resulting value for duck is missing,
- because the maximum of a NaN and a float is a NaN.
- So, in the example, we set ``fill_value=0``,
- so the maximum value returned will be the value from some dataset.
- >>> s1.combine(s2, max, fill_value=0)
- duck 30.0
- eagle 200.0
- falcon 345.0
- dtype: float64
- """
- if fill_value is None:
- fill_value = na_value_for_dtype(self.dtype, compat=False)
- if isinstance(other, Series):
- # If other is a Series, result is based on union of Series,
- # so do this element by element
- new_index = self.index.union(other.index)
- new_name = ops.get_op_result_name(self, other)
- new_values = []
- for idx in new_index:
- lv = self.get(idx, fill_value)
- rv = other.get(idx, fill_value)
- with np.errstate(all="ignore"):
- new_values.append(func(lv, rv))
- else:
- # Assume that other is a scalar, so apply the function for
- # each element in the Series
- new_index = self.index
- with np.errstate(all="ignore"):
- new_values = [func(lv, other) for lv in self._values]
- new_name = self.name
- if is_categorical_dtype(self.values):
- pass
- elif is_extension_array_dtype(self.values):
- # The function can return something of any type, so check
- # if the type is compatible with the calling EA.
- new_values = try_cast_to_ea(self._values, new_values)
- return self._constructor(new_values, index=new_index, name=new_name)
- def combine_first(self, other):
- """
- Combine Series values, choosing the calling Series's values first.
- Parameters
- ----------
- other : Series
- The value(s) to be combined with the `Series`.
- Returns
- -------
- Series
- The result of combining the Series with the other object.
- See Also
- --------
- Series.combine : Perform elementwise operation on two Series
- using a given function.
- Notes
- -----
- Result index will be the union of the two indexes.
- Examples
- --------
- >>> s1 = pd.Series([1, np.nan])
- >>> s2 = pd.Series([3, 4])
- >>> s1.combine_first(s2)
- 0 1.0
- 1 4.0
- dtype: float64
- """
- new_index = self.index.union(other.index)
- this = self.reindex(new_index, copy=False)
- other = other.reindex(new_index, copy=False)
- if this.dtype.kind == "M" and other.dtype.kind != "M":
- other = to_datetime(other)
- return this.where(notna(this), other)
- def update(self, other):
- """
- Modify Series in place using non-NA values from passed
- Series. Aligns on index.
- Parameters
- ----------
- other : Series
- Examples
- --------
- >>> s = pd.Series([1, 2, 3])
- >>> s.update(pd.Series([4, 5, 6]))
- >>> s
- 0 4
- 1 5
- 2 6
- dtype: int64
- >>> s = pd.Series(['a', 'b', 'c'])
- >>> s.update(pd.Series(['d', 'e'], index=[0, 2]))
- >>> s
- 0 d
- 1 b
- 2 e
- dtype: object
- >>> s = pd.Series([1, 2, 3])
- >>> s.update(pd.Series([4, 5, 6, 7, 8]))
- >>> s
- 0 4
- 1 5
- 2 6
- dtype: int64
- If ``other`` contains NaNs the corresponding values are not updated
- in the original Series.
- >>> s = pd.Series([1, 2, 3])
- >>> s.update(pd.Series([4, np.nan, 6]))
- >>> s
- 0 4
- 1 2
- 2 6
- dtype: int64
- """
- other = other.reindex_like(self)
- mask = notna(other)
- self._data = self._data.putmask(mask=mask, new=other, inplace=True)
- self._maybe_update_cacher()
- # ----------------------------------------------------------------------
- # Reindexing, sorting
- def sort_values(
- self,
- axis=0,
- ascending=True,
- inplace=False,
- kind="quicksort",
- na_position="last",
- ignore_index=False,
- ):
- """
- Sort by the values.
- Sort a Series in ascending or descending order by some
- criterion.
- Parameters
- ----------
- axis : {0 or 'index'}, default 0
- Axis to direct sorting. The value 'index' is accepted for
- compatibility with DataFrame.sort_values.
- ascending : bool, default True
- If True, sort values in ascending order, otherwise descending.
- inplace : bool, default False
- If True, perform operation in-place.
- kind : {'quicksort', 'mergesort' or 'heapsort'}, default 'quicksort'
- Choice of sorting algorithm. See also :func:`numpy.sort` for more
- information. 'mergesort' is the only stable algorithm.
- na_position : {'first' or 'last'}, default 'last'
- Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
- the end.
- ignore_index : bool, default False
- If True, the resulting axis will be labeled 0, 1, …, n - 1.
- .. versionadded:: 1.0.0
- Returns
- -------
- Series
- Series ordered by values.
- See Also
- --------
- Series.sort_index : Sort by the Series indices.
- DataFrame.sort_values : Sort DataFrame by the values along either axis.
- DataFrame.sort_index : Sort DataFrame by indices.
- Examples
- --------
- >>> s = pd.Series([np.nan, 1, 3, 10, 5])
- >>> s
- 0 NaN
- 1 1.0
- 2 3.0
- 3 10.0
- 4 5.0
- dtype: float64
- Sort values ascending order (default behaviour)
- >>> s.sort_values(ascending=True)
- 1 1.0
- 2 3.0
- 4 5.0
- 3 10.0
- 0 NaN
- dtype: float64
- Sort values descending order
- >>> s.sort_values(ascending=False)
- 3 10.0
- 4 5.0
- 2 3.0
- 1 1.0
- 0 NaN
- dtype: float64
- Sort values inplace
- >>> s.sort_values(ascending=False, inplace=True)
- >>> s
- 3 10.0
- 4 5.0
- 2 3.0
- 1 1.0
- 0 NaN
- dtype: float64
- Sort values putting NAs first
- >>> s.sort_values(na_position='first')
- 0 NaN
- 1 1.0
- 2 3.0
- 4 5.0
- 3 10.0
- dtype: float64
- Sort a series of strings
- >>> s = pd.Series(['z', 'b', 'd', 'a', 'c'])
- >>> s
- 0 z
- 1 b
- 2 d
- 3 a
- 4 c
- dtype: object
- >>> s.sort_values()
- 3 a
- 1 b
- 4 c
- 2 d
- 0 z
- dtype: object
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- # Validate the axis parameter
- self._get_axis_number(axis)
- # GH 5856/5853
- if inplace and self._is_cached:
- raise ValueError(
- "This Series is a view of some other array, to "
- "sort in-place you must create a copy"
- )
- def _try_kind_sort(arr):
- # easier to ask forgiveness than permission
- try:
- # if kind==mergesort, it can fail for object dtype
- return arr.argsort(kind=kind)
- except TypeError:
- # stable sort not available for object dtype
- # uses the argsort default quicksort
- return arr.argsort(kind="quicksort")
- arr = self._values
- sorted_index = np.empty(len(self), dtype=np.int32)
- bad = isna(arr)
- good = ~bad
- idx = ibase.default_index(len(self))
- argsorted = _try_kind_sort(arr[good])
- if is_list_like(ascending):
- if len(ascending) != 1:
- raise ValueError(
- f"Length of ascending ({len(ascending)}) must be 1 for Series"
- )
- ascending = ascending[0]
- if not is_bool(ascending):
- raise ValueError("ascending must be boolean")
- if not ascending:
- argsorted = argsorted[::-1]
- if na_position == "last":
- n = good.sum()
- sorted_index[:n] = idx[good][argsorted]
- sorted_index[n:] = idx[bad]
- elif na_position == "first":
- n = bad.sum()
- sorted_index[n:] = idx[good][argsorted]
- sorted_index[:n] = idx[bad]
- else:
- raise ValueError(f"invalid na_position: {na_position}")
- result = self._constructor(arr[sorted_index], index=self.index[sorted_index])
- if ignore_index:
- result.index = ibase.default_index(len(sorted_index))
- if inplace:
- self._update_inplace(result)
- else:
- return result.__finalize__(self)
- def sort_index(
- self,
- axis=0,
- level=None,
- ascending=True,
- inplace=False,
- kind="quicksort",
- na_position="last",
- sort_remaining=True,
- ignore_index: bool = False,
- ):
- """
- Sort Series by index labels.
- Returns a new Series sorted by label if `inplace` argument is
- ``False``, otherwise updates the original series and returns None.
- Parameters
- ----------
- axis : int, default 0
- Axis to direct sorting. This can only be 0 for Series.
- level : int, optional
- If not None, sort on values in specified index level(s).
- ascending : bool, default true
- Sort ascending vs. descending.
- inplace : bool, default False
- If True, perform operation in-place.
- kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
- Choice of sorting algorithm. See also :func:`numpy.sort` for more
- information. 'mergesort' is the only stable algorithm. For
- DataFrames, this option is only applied when sorting on a single
- column or label.
- na_position : {'first', 'last'}, default 'last'
- If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
- Not implemented for MultiIndex.
- sort_remaining : bool, default True
- If True and sorting by level and index is multilevel, sort by other
- levels too (in order) after sorting by specified level.
- ignore_index : bool, default False
- If True, the resulting axis will be labeled 0, 1, …, n - 1.
- .. versionadded:: 1.0.0
- Returns
- -------
- Series
- The original Series sorted by the labels.
- See Also
- --------
- DataFrame.sort_index: Sort DataFrame by the index.
- DataFrame.sort_values: Sort DataFrame by the value.
- Series.sort_values : Sort Series by the value.
- Examples
- --------
- >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4])
- >>> s.sort_index()
- 1 c
- 2 b
- 3 a
- 4 d
- dtype: object
- Sort Descending
- >>> s.sort_index(ascending=False)
- 4 d
- 3 a
- 2 b
- 1 c
- dtype: object
- Sort Inplace
- >>> s.sort_index(inplace=True)
- >>> s
- 1 c
- 2 b
- 3 a
- 4 d
- dtype: object
- By default NaNs are put at the end, but use `na_position` to place
- them at the beginning
- >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan])
- >>> s.sort_index(na_position='first')
- NaN d
- 1.0 c
- 2.0 b
- 3.0 a
- dtype: object
- Specify index level to sort
- >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',
- ... 'baz', 'baz', 'bar', 'bar']),
- ... np.array(['two', 'one', 'two', 'one',
- ... 'two', 'one', 'two', 'one'])]
- >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays)
- >>> s.sort_index(level=1)
- bar one 8
- baz one 6
- foo one 4
- qux one 2
- bar two 7
- baz two 5
- foo two 3
- qux two 1
- dtype: int64
- Does not sort by remaining levels when sorting by levels
- >>> s.sort_index(level=1, sort_remaining=False)
- qux one 2
- foo one 4
- baz one 6
- bar one 8
- qux two 1
- foo two 3
- baz two 5
- bar two 7
- dtype: int64
- """
- # TODO: this can be combined with DataFrame.sort_index impl as
- # almost identical
- inplace = validate_bool_kwarg(inplace, "inplace")
- # Validate the axis parameter
- self._get_axis_number(axis)
- index = self.index
- if level is not None:
- new_index, indexer = index.sortlevel(
- level, ascending=ascending, sort_remaining=sort_remaining
- )
- elif isinstance(index, MultiIndex):
- from pandas.core.sorting import lexsort_indexer
- labels = index._sort_levels_monotonic()
- indexer = lexsort_indexer(
- labels._get_codes_for_sorting(),
- orders=ascending,
- na_position=na_position,
- )
- else:
- from pandas.core.sorting import nargsort
- # Check monotonic-ness before sort an index
- # GH11080
- if (ascending and index.is_monotonic_increasing) or (
- not ascending and index.is_monotonic_decreasing
- ):
- if inplace:
- return
- else:
- return self.copy()
- indexer = nargsort(
- index, kind=kind, ascending=ascending, na_position=na_position
- )
- indexer = ensure_platform_int(indexer)
- new_index = index.take(indexer)
- new_index = new_index._sort_levels_monotonic()
- new_values = self._values.take(indexer)
- result = self._constructor(new_values, index=new_index)
- if ignore_index:
- result.index = ibase.default_index(len(result))
- if inplace:
- self._update_inplace(result)
- else:
- return result.__finalize__(self)
- def argsort(self, axis=0, kind="quicksort", order=None):
- """
- Override ndarray.argsort. Argsorts the value, omitting NA/null values,
- and places the result in the same locations as the non-NA values.
- Parameters
- ----------
- axis : {0 or "index"}
- Has no effect but is accepted for compatibility with numpy.
- kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
- Choice of sorting algorithm. See np.sort for more
- information. 'mergesort' is the only stable algorithm.
- order : None
- Has no effect but is accepted for compatibility with numpy.
- Returns
- -------
- Series
- Positions of values within the sort order with -1 indicating
- nan values.
- See Also
- --------
- numpy.ndarray.argsort
- """
- values = self._values
- mask = isna(values)
- if mask.any():
- result = Series(-1, index=self.index, name=self.name, dtype="int64")
- notmask = ~mask
- result[notmask] = np.argsort(values[notmask], kind=kind)
- return self._constructor(result, index=self.index).__finalize__(self)
- else:
- return self._constructor(
- np.argsort(values, kind=kind), index=self.index, dtype="int64"
- ).__finalize__(self)
- def nlargest(self, n=5, keep="first"):
- """
- Return the largest `n` elements.
- Parameters
- ----------
- n : int, default 5
- Return this many descending sorted values.
- keep : {'first', 'last', 'all'}, default 'first'
- When there are duplicate values that cannot all fit in a
- Series of `n` elements:
- - ``first`` : return the first `n` occurrences in order
- of appearance.
- - ``last`` : return the last `n` occurrences in reverse
- order of appearance.
- - ``all`` : keep all occurrences. This can result in a Series of
- size larger than `n`.
- Returns
- -------
- Series
- The `n` largest values in the Series, sorted in decreasing order.
- See Also
- --------
- Series.nsmallest: Get the `n` smallest elements.
- Series.sort_values: Sort Series by values.
- Series.head: Return the first `n` rows.
- Notes
- -----
- Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
- relative to the size of the ``Series`` object.
- Examples
- --------
- >>> countries_population = {"Italy": 59000000, "France": 65000000,
- ... "Malta": 434000, "Maldives": 434000,
- ... "Brunei": 434000, "Iceland": 337000,
- ... "Nauru": 11300, "Tuvalu": 11300,
- ... "Anguilla": 11300, "Monserat": 5200}
- >>> s = pd.Series(countries_population)
- >>> s
- Italy 59000000
- France 65000000
- Malta 434000
- Maldives 434000
- Brunei 434000
- Iceland 337000
- Nauru 11300
- Tuvalu 11300
- Anguilla 11300
- Monserat 5200
- dtype: int64
- The `n` largest elements where ``n=5`` by default.
- >>> s.nlargest()
- France 65000000
- Italy 59000000
- Malta 434000
- Maldives 434000
- Brunei 434000
- dtype: int64
- The `n` largest elements where ``n=3``. Default `keep` value is 'first'
- so Malta will be kept.
- >>> s.nlargest(3)
- France 65000000
- Italy 59000000
- Malta 434000
- dtype: int64
- The `n` largest elements where ``n=3`` and keeping the last duplicates.
- Brunei will be kept since it is the last with value 434000 based on
- the index order.
- >>> s.nlargest(3, keep='last')
- France 65000000
- Italy 59000000
- Brunei 434000
- dtype: int64
- The `n` largest elements where ``n=3`` with all duplicates kept. Note
- that the returned Series has five elements due to the three duplicates.
- >>> s.nlargest(3, keep='all')
- France 65000000
- Italy 59000000
- Malta 434000
- Maldives 434000
- Brunei 434000
- dtype: int64
- """
- return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
- def nsmallest(self, n=5, keep="first"):
- """
- Return the smallest `n` elements.
- Parameters
- ----------
- n : int, default 5
- Return this many ascending sorted values.
- keep : {'first', 'last', 'all'}, default 'first'
- When there are duplicate values that cannot all fit in a
- Series of `n` elements:
- - ``first`` : return the first `n` occurrences in order
- of appearance.
- - ``last`` : return the last `n` occurrences in reverse
- order of appearance.
- - ``all`` : keep all occurrences. This can result in a Series of
- size larger than `n`.
- Returns
- -------
- Series
- The `n` smallest values in the Series, sorted in increasing order.
- See Also
- --------
- Series.nlargest: Get the `n` largest elements.
- Series.sort_values: Sort Series by values.
- Series.head: Return the first `n` rows.
- Notes
- -----
- Faster than ``.sort_values().head(n)`` for small `n` relative to
- the size of the ``Series`` object.
- Examples
- --------
- >>> countries_population = {"Italy": 59000000, "France": 65000000,
- ... "Brunei": 434000, "Malta": 434000,
- ... "Maldives": 434000, "Iceland": 337000,
- ... "Nauru": 11300, "Tuvalu": 11300,
- ... "Anguilla": 11300, "Monserat": 5200}
- >>> s = pd.Series(countries_population)
- >>> s
- Italy 59000000
- France 65000000
- Brunei 434000
- Malta 434000
- Maldives 434000
- Iceland 337000
- Nauru 11300
- Tuvalu 11300
- Anguilla 11300
- Monserat 5200
- dtype: int64
- The `n` smallest elements where ``n=5`` by default.
- >>> s.nsmallest()
- Monserat 5200
- Nauru 11300
- Tuvalu 11300
- Anguilla 11300
- Iceland 337000
- dtype: int64
- The `n` smallest elements where ``n=3``. Default `keep` value is
- 'first' so Nauru and Tuvalu will be kept.
- >>> s.nsmallest(3)
- Monserat 5200
- Nauru 11300
- Tuvalu 11300
- dtype: int64
- The `n` smallest elements where ``n=3`` and keeping the last
- duplicates. Anguilla and Tuvalu will be kept since they are the last
- with value 11300 based on the index order.
- >>> s.nsmallest(3, keep='last')
- Monserat 5200
- Anguilla 11300
- Tuvalu 11300
- dtype: int64
- The `n` smallest elements where ``n=3`` with all duplicates kept. Note
- that the returned Series has four elements due to the three duplicates.
- >>> s.nsmallest(3, keep='all')
- Monserat 5200
- Nauru 11300
- Tuvalu 11300
- Anguilla 11300
- dtype: int64
- """
- return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()
- def swaplevel(self, i=-2, j=-1, copy=True):
- """
- Swap levels i and j in a :class:`MultiIndex`.
- Default is to swap the two innermost levels of the index.
- Parameters
- ----------
- i, j : int, str
- Level of the indices to be swapped. Can pass level name as string.
- copy : bool, default True
- Whether to copy underlying data.
- Returns
- -------
- Series
- Series with levels swapped in MultiIndex.
- """
- new_index = self.index.swaplevel(i, j)
- return self._constructor(self._values, index=new_index, copy=copy).__finalize__(
- self
- )
- def reorder_levels(self, order):
- """
- Rearrange index levels using input order.
- May not drop or duplicate levels.
- Parameters
- ----------
- order : list of int representing new level order
- Reference level by number or key.
- Returns
- -------
- type of caller (new object)
- """
- if not isinstance(self.index, MultiIndex): # pragma: no cover
- raise Exception("Can only reorder levels on a hierarchical axis.")
- result = self.copy()
- result.index = result.index.reorder_levels(order)
- return result
- def explode(self) -> "Series":
- """
- Transform each element of a list-like to a row, replicating the
- index values.
- .. versionadded:: 0.25.0
- Returns
- -------
- Series
- Exploded lists to rows; index will be duplicated for these rows.
- See Also
- --------
- Series.str.split : Split string values on specified separator.
- Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex
- to produce DataFrame.
- DataFrame.melt : Unpivot a DataFrame from wide format to long format.
- DataFrame.explode : Explode a DataFrame from list-like
- columns to long format.
- Notes
- -----
- This routine will explode list-likes including lists, tuples,
- Series, and np.ndarray. The result dtype of the subset rows will
- be object. Scalars will be returned unchanged. Empty list-likes will
- result in a np.nan for that row.
- Examples
- --------
- >>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]])
- >>> s
- 0 [1, 2, 3]
- 1 foo
- 2 []
- 3 [3, 4]
- dtype: object
- >>> s.explode()
- 0 1
- 0 2
- 0 3
- 1 foo
- 2 NaN
- 3 3
- 3 4
- dtype: object
- """
- if not len(self) or not is_object_dtype(self):
- return self.copy()
- values, counts = reshape.explode(np.asarray(self.array))
- result = Series(values, index=self.index.repeat(counts), name=self.name)
- return result
- def unstack(self, level=-1, fill_value=None):
- """
- Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.
- The level involved will automatically get sorted.
- Parameters
- ----------
- level : int, str, or list of these, default last level
- Level(s) to unstack, can pass level name.
- fill_value : scalar value, default None
- Value to use when replacing NaN values.
- Returns
- -------
- DataFrame
- Unstacked Series.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4],
- ... index=pd.MultiIndex.from_product([['one', 'two'],
- ... ['a', 'b']]))
- >>> s
- one a 1
- b 2
- two a 3
- b 4
- dtype: int64
- >>> s.unstack(level=-1)
- a b
- one 1 2
- two 3 4
- >>> s.unstack(level=0)
- one two
- a 1 3
- b 2 4
- """
- from pandas.core.reshape.reshape import unstack
- return unstack(self, level, fill_value)
- # ----------------------------------------------------------------------
- # function application
- def map(self, arg, na_action=None):
- """
- Map values of Series according to input correspondence.
- Used for substituting each value in a Series with another value,
- that may be derived from a function, a ``dict`` or
- a :class:`Series`.
- Parameters
- ----------
- arg : function, collections.abc.Mapping subclass or Series
- Mapping correspondence.
- na_action : {None, 'ignore'}, default None
- If 'ignore', propagate NaN values, without passing them to the
- mapping correspondence.
- Returns
- -------
- Series
- Same index as caller.
- See Also
- --------
- Series.apply : For applying more complex functions on a Series.
- DataFrame.apply : Apply a function row-/column-wise.
- DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
- Notes
- -----
- When ``arg`` is a dictionary, values in Series that are not in the
- dictionary (as keys) are converted to ``NaN``. However, if the
- dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
- provides a method for default values), then this default is used
- rather than ``NaN``.
- Examples
- --------
- >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
- >>> s
- 0 cat
- 1 dog
- 2 NaN
- 3 rabbit
- dtype: object
- ``map`` accepts a ``dict`` or a ``Series``. Values that are not found
- in the ``dict`` are converted to ``NaN``, unless the dict has a default
- value (e.g. ``defaultdict``):
- >>> s.map({'cat': 'kitten', 'dog': 'puppy'})
- 0 kitten
- 1 puppy
- 2 NaN
- 3 NaN
- dtype: object
- It also accepts a function:
- >>> s.map('I am a {}'.format)
- 0 I am a cat
- 1 I am a dog
- 2 I am a nan
- 3 I am a rabbit
- dtype: object
- To avoid applying the function to missing values (and keep them as
- ``NaN``) ``na_action='ignore'`` can be used:
- >>> s.map('I am a {}'.format, na_action='ignore')
- 0 I am a cat
- 1 I am a dog
- 2 NaN
- 3 I am a rabbit
- dtype: object
- """
- new_values = super()._map_values(arg, na_action=na_action)
- return self._constructor(new_values, index=self.index).__finalize__(self)
- def _gotitem(self, key, ndim, subset=None):
- """
- Sub-classes to define. Return a sliced object.
- Parameters
- ----------
- key : string / list of selections
- ndim : 1,2
- Requested ndim of result.
- subset : object, default None
- Subset to act on.
- """
- return self
- _agg_see_also_doc = dedent(
- """
- See Also
- --------
- Series.apply : Invoke function on a Series.
- Series.transform : Transform function producing a Series with like indexes.
- """
- )
- _agg_examples_doc = dedent(
- """
- Examples
- --------
- >>> s = pd.Series([1, 2, 3, 4])
- >>> s
- 0 1
- 1 2
- 2 3
- 3 4
- dtype: int64
- >>> s.agg('min')
- 1
- >>> s.agg(['min', 'max'])
- min 1
- max 4
- dtype: int64
- """
- )
- @Substitution(
- see_also=_agg_see_also_doc,
- examples=_agg_examples_doc,
- versionadded="\n.. versionadded:: 0.20.0\n",
- **_shared_doc_kwargs,
- )
- @Appender(generic._shared_docs["aggregate"])
- def aggregate(self, func, axis=0, *args, **kwargs):
- # Validate the axis parameter
- self._get_axis_number(axis)
- result, how = self._aggregate(func, *args, **kwargs)
- if result is None:
- # we can be called from an inner function which
- # passes this meta-data
- kwargs.pop("_axis", None)
- kwargs.pop("_level", None)
- # try a regular apply, this evaluates lambdas
- # row-by-row; however if the lambda is expected a Series
- # expression, e.g.: lambda x: x-x.quantile(0.25)
- # this will fail, so we can try a vectorized evaluation
- # we cannot FIRST try the vectorized evaluation, because
- # then .agg and .apply would have different semantics if the
- # operation is actually defined on the Series, e.g. str
- try:
- result = self.apply(func, *args, **kwargs)
- except (ValueError, AttributeError, TypeError):
- result = func(self, *args, **kwargs)
- return result
- agg = aggregate
- @Appender(generic._shared_docs["transform"] % _shared_doc_kwargs)
- def transform(self, func, axis=0, *args, **kwargs):
- # Validate the axis parameter
- self._get_axis_number(axis)
- return super().transform(func, *args, **kwargs)
- def apply(self, func, convert_dtype=True, args=(), **kwds):
- """
- Invoke function on values of Series.
- Can be ufunc (a NumPy function that applies to the entire Series)
- or a Python function that only works on single values.
- Parameters
- ----------
- func : function
- Python function or NumPy ufunc to apply.
- convert_dtype : bool, default True
- Try to find better dtype for elementwise function results. If
- False, leave as dtype=object.
- args : tuple
- Positional arguments passed to func after the series value.
- **kwds
- Additional keyword arguments passed to func.
- Returns
- -------
- Series or DataFrame
- If func returns a Series object the result will be a DataFrame.
- See Also
- --------
- Series.map: For element-wise operations.
- Series.agg: Only perform aggregating type operations.
- Series.transform: Only perform transforming type operations.
- Examples
- --------
- Create a series with typical summer temperatures for each city.
- >>> s = pd.Series([20, 21, 12],
- ... index=['London', 'New York', 'Helsinki'])
- >>> s
- London 20
- New York 21
- Helsinki 12
- dtype: int64
- Square the values by defining a function and passing it as an
- argument to ``apply()``.
- >>> def square(x):
- ... return x ** 2
- >>> s.apply(square)
- London 400
- New York 441
- Helsinki 144
- dtype: int64
- Square the values by passing an anonymous function as an
- argument to ``apply()``.
- >>> s.apply(lambda x: x ** 2)
- London 400
- New York 441
- Helsinki 144
- dtype: int64
- Define a custom function that needs additional positional
- arguments and pass these additional arguments using the
- ``args`` keyword.
- >>> def subtract_custom_value(x, custom_value):
- ... return x - custom_value
- >>> s.apply(subtract_custom_value, args=(5,))
- London 15
- New York 16
- Helsinki 7
- dtype: int64
- Define a custom function that takes keyword arguments
- and pass these arguments to ``apply``.
- >>> def add_custom_values(x, **kwargs):
- ... for month in kwargs:
- ... x += kwargs[month]
- ... return x
- >>> s.apply(add_custom_values, june=30, july=20, august=25)
- London 95
- New York 96
- Helsinki 87
- dtype: int64
- Use a function from the Numpy library.
- >>> s.apply(np.log)
- London 2.995732
- New York 3.044522
- Helsinki 2.484907
- dtype: float64
- """
- if len(self) == 0:
- return self._constructor(dtype=self.dtype, index=self.index).__finalize__(
- self
- )
- # dispatch to agg
- if isinstance(func, (list, dict)):
- return self.aggregate(func, *args, **kwds)
- # if we are a string, try to dispatch
- if isinstance(func, str):
- return self._try_aggregate_string_function(func, *args, **kwds)
- # handle ufuncs and lambdas
- if kwds or args and not isinstance(func, np.ufunc):
- def f(x):
- return func(x, *args, **kwds)
- else:
- f = func
- with np.errstate(all="ignore"):
- if isinstance(f, np.ufunc):
- return f(self)
- # row-wise access
- if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"):
- # GH#23179 some EAs do not have `map`
- mapped = self._values.map(f)
- else:
- values = self.astype(object).values
- mapped = lib.map_infer(values, f, convert=convert_dtype)
- if len(mapped) and isinstance(mapped[0], Series):
- # GH 25959 use pd.array instead of tolist
- # so extension arrays can be used
- return self._constructor_expanddim(pd.array(mapped), index=self.index)
- else:
- return self._constructor(mapped, index=self.index).__finalize__(self)
- def _reduce(
- self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
- ):
- """
- Perform a reduction operation.
- If we have an ndarray as a value, then simply perform the operation,
- otherwise delegate to the object.
- """
- delegate = self._values
- if axis is not None:
- self._get_axis_number(axis)
- if isinstance(delegate, Categorical):
- return delegate._reduce(name, skipna=skipna, **kwds)
- elif isinstance(delegate, ExtensionArray):
- # dispatch to ExtensionArray interface
- return delegate._reduce(name, skipna=skipna, **kwds)
- elif is_datetime64_dtype(delegate):
- # use DatetimeIndex implementation to handle skipna correctly
- delegate = DatetimeIndex(delegate)
- elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name):
- # use TimedeltaIndex to handle skipna correctly
- # TODO: remove hasattr check after TimedeltaIndex has `std` method
- delegate = TimedeltaIndex(delegate)
- # dispatch to numpy arrays
- elif isinstance(delegate, np.ndarray):
- if numeric_only:
- raise NotImplementedError(
- f"Series.{name} does not implement numeric_only."
- )
- with np.errstate(all="ignore"):
- return op(delegate, skipna=skipna, **kwds)
- # TODO(EA) dispatch to Index
- # remove once all internals extension types are
- # moved to ExtensionArrays
- return delegate._reduce(
- op=op,
- name=name,
- axis=axis,
- skipna=skipna,
- numeric_only=numeric_only,
- filter_type=filter_type,
- **kwds,
- )
- def _reindex_indexer(self, new_index, indexer, copy):
- if indexer is None:
- if copy:
- return self.copy()
- return self
- new_values = algorithms.take_1d(
- self._values, indexer, allow_fill=True, fill_value=None
- )
- return self._constructor(new_values, index=new_index)
- def _needs_reindex_multi(self, axes, method, level):
- """
- Check if we do need a multi reindex; this is for compat with
- higher dims.
- """
- return False
- @Appender(generic._shared_docs["align"] % _shared_doc_kwargs)
- def align(
- self,
- other,
- join="outer",
- axis=None,
- level=None,
- copy=True,
- fill_value=None,
- method=None,
- limit=None,
- fill_axis=0,
- broadcast_axis=None,
- ):
- return super().align(
- other,
- join=join,
- axis=axis,
- level=level,
- copy=copy,
- fill_value=fill_value,
- method=method,
- limit=limit,
- fill_axis=fill_axis,
- broadcast_axis=broadcast_axis,
- )
- def rename(
- self,
- index=None,
- *,
- axis=None,
- copy=True,
- inplace=False,
- level=None,
- errors="ignore",
- ):
- """
- Alter Series index labels or name.
- Function / dict values must be unique (1-to-1). Labels not contained in
- a dict / Series will be left as-is. Extra labels listed don't throw an
- error.
- Alternatively, change ``Series.name`` with a scalar value.
- See the :ref:`user guide <basics.rename>` for more.
- Parameters
- ----------
- axis : {0 or "index"}
- Unused. Accepted for compatability with DataFrame method only.
- index : scalar, hashable sequence, dict-like or function, optional
- Functions or dict-like are transformations to apply to
- the index.
- Scalar or hashable sequence-like will alter the ``Series.name``
- attribute.
- **kwargs
- Additional keyword arguments passed to the function. Only the
- "inplace" keyword is used.
- Returns
- -------
- Series
- Series with index labels or name altered.
- See Also
- --------
- DataFrame.rename : Corresponding DataFrame method.
- Series.rename_axis : Set the name of the axis.
- Examples
- --------
- >>> s = pd.Series([1, 2, 3])
- >>> s
- 0 1
- 1 2
- 2 3
- dtype: int64
- >>> s.rename("my_name") # scalar, changes Series.name
- 0 1
- 1 2
- 2 3
- Name: my_name, dtype: int64
- >>> s.rename(lambda x: x ** 2) # function, changes labels
- 0 1
- 1 2
- 4 3
- dtype: int64
- >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
- 0 1
- 3 2
- 5 3
- dtype: int64
- """
- if callable(index) or is_dict_like(index):
- return super().rename(
- index, copy=copy, inplace=inplace, level=level, errors=errors
- )
- else:
- return self._set_name(index, inplace=inplace)
- @Substitution(**_shared_doc_kwargs)
- @Appender(generic.NDFrame.reindex.__doc__)
- def reindex(self, index=None, **kwargs):
- return super().reindex(index=index, **kwargs)
- def drop(
- self,
- labels=None,
- axis=0,
- index=None,
- columns=None,
- level=None,
- inplace=False,
- errors="raise",
- ):
- """
- Return Series with specified index labels removed.
- Remove elements of a Series based on specifying the index labels.
- When using a multi-index, labels on different levels can be removed
- by specifying the level.
- Parameters
- ----------
- labels : single label or list-like
- Index labels to drop.
- axis : 0, default 0
- Redundant for application on Series.
- index : single label or list-like
- Redundant for application on Series, but 'index' can be used instead
- of 'labels'.
- .. versionadded:: 0.21.0
- columns : single label or list-like
- No change is made to the Series; use 'index' or 'labels' instead.
- .. versionadded:: 0.21.0
- level : int or level name, optional
- For MultiIndex, level for which the labels will be removed.
- inplace : bool, default False
- If True, do operation inplace and return None.
- errors : {'ignore', 'raise'}, default 'raise'
- If 'ignore', suppress error and only existing labels are dropped.
- Returns
- -------
- Series
- Series with specified index labels removed.
- Raises
- ------
- KeyError
- If none of the labels are found in the index.
- See Also
- --------
- Series.reindex : Return only specified index labels of Series.
- Series.dropna : Return series without null values.
- Series.drop_duplicates : Return Series with duplicate values removed.
- DataFrame.drop : Drop specified labels from rows or columns.
- Examples
- --------
- >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C'])
- >>> s
- A 0
- B 1
- C 2
- dtype: int64
- Drop labels B en C
- >>> s.drop(labels=['B', 'C'])
- A 0
- dtype: int64
- Drop 2nd level label in MultiIndex Series
- >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
- ... ['speed', 'weight', 'length']],
- ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
- ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
- >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
- ... index=midx)
- >>> s
- lama speed 45.0
- weight 200.0
- length 1.2
- cow speed 30.0
- weight 250.0
- length 1.5
- falcon speed 320.0
- weight 1.0
- length 0.3
- dtype: float64
- >>> s.drop(labels='weight', level=1)
- lama speed 45.0
- length 1.2
- cow speed 30.0
- length 1.5
- falcon speed 320.0
- length 0.3
- dtype: float64
- """
- return super().drop(
- labels=labels,
- axis=axis,
- index=index,
- columns=columns,
- level=level,
- inplace=inplace,
- errors=errors,
- )
- @Substitution(**_shared_doc_kwargs)
- @Appender(generic.NDFrame.fillna.__doc__)
- def fillna(
- self,
- value=None,
- method=None,
- axis=None,
- inplace=False,
- limit=None,
- downcast=None,
- ) -> Optional["Series"]:
- return super().fillna(
- value=value,
- method=method,
- axis=axis,
- inplace=inplace,
- limit=limit,
- downcast=downcast,
- )
- @Appender(generic._shared_docs["replace"] % _shared_doc_kwargs)
- def replace(
- self,
- to_replace=None,
- value=None,
- inplace=False,
- limit=None,
- regex=False,
- method="pad",
- ):
- return super().replace(
- to_replace=to_replace,
- value=value,
- inplace=inplace,
- limit=limit,
- regex=regex,
- method=method,
- )
- @Appender(generic._shared_docs["shift"] % _shared_doc_kwargs)
- def shift(self, periods=1, freq=None, axis=0, fill_value=None):
- return super().shift(
- periods=periods, freq=freq, axis=axis, fill_value=fill_value
- )
- def memory_usage(self, index=True, deep=False):
- """
- Return the memory usage of the Series.
- The memory usage can optionally include the contribution of
- the index and of elements of `object` dtype.
- Parameters
- ----------
- index : bool, default True
- Specifies whether to include the memory usage of the Series index.
- deep : bool, default False
- If True, introspect the data deeply by interrogating
- `object` dtypes for system-level memory consumption, and include
- it in the returned value.
- Returns
- -------
- int
- Bytes of memory consumed.
- See Also
- --------
- numpy.ndarray.nbytes : Total bytes consumed by the elements of the
- array.
- DataFrame.memory_usage : Bytes consumed by a DataFrame.
- Examples
- --------
- >>> s = pd.Series(range(3))
- >>> s.memory_usage()
- 152
- Not including the index gives the size of the rest of the data, which
- is necessarily smaller:
- >>> s.memory_usage(index=False)
- 24
- The memory footprint of `object` values is ignored by default:
- >>> s = pd.Series(["a", "b"])
- >>> s.values
- array(['a', 'b'], dtype=object)
- >>> s.memory_usage()
- 144
- >>> s.memory_usage(deep=True)
- 260
- """
- v = super().memory_usage(deep=deep)
- if index:
- v += self.index.memory_usage(deep=deep)
- return v
- def isin(self, values):
- """
- Check whether `values` are contained in Series.
- Return a boolean Series showing whether each element in the Series
- matches an element in the passed sequence of `values` exactly.
- Parameters
- ----------
- values : set or list-like
- The sequence of values to test. Passing in a single string will
- raise a ``TypeError``. Instead, turn a single string into a
- list of one element.
- Returns
- -------
- Series
- Series of booleans indicating if each element is in values.
- Raises
- ------
- TypeError
- * If `values` is a string
- See Also
- --------
- DataFrame.isin : Equivalent method on DataFrame.
- Examples
- --------
- >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
- ... 'hippo'], name='animal')
- >>> s.isin(['cow', 'lama'])
- 0 True
- 1 True
- 2 True
- 3 False
- 4 True
- 5 False
- Name: animal, dtype: bool
- Passing a single string as ``s.isin('lama')`` will raise an error. Use
- a list of one element instead:
- >>> s.isin(['lama'])
- 0 True
- 1 False
- 2 True
- 3 False
- 4 True
- 5 False
- Name: animal, dtype: bool
- """
- result = algorithms.isin(self, values)
- return self._constructor(result, index=self.index).__finalize__(self)
- def between(self, left, right, inclusive=True):
- """
- Return boolean Series equivalent to left <= series <= right.
- This function returns a boolean vector containing `True` wherever the
- corresponding Series element is between the boundary values `left` and
- `right`. NA values are treated as `False`.
- Parameters
- ----------
- left : scalar or list-like
- Left boundary.
- right : scalar or list-like
- Right boundary.
- inclusive : bool, default True
- Include boundaries.
- Returns
- -------
- Series
- Series representing whether each element is between left and
- right (inclusive).
- See Also
- --------
- Series.gt : Greater than of series and other.
- Series.lt : Less than of series and other.
- Notes
- -----
- This function is equivalent to ``(left <= ser) & (ser <= right)``
- Examples
- --------
- >>> s = pd.Series([2, 0, 4, 8, np.nan])
- Boundary values are included by default:
- >>> s.between(1, 4)
- 0 True
- 1 False
- 2 True
- 3 False
- 4 False
- dtype: bool
- With `inclusive` set to ``False`` boundary values are excluded:
- >>> s.between(1, 4, inclusive=False)
- 0 True
- 1 False
- 2 False
- 3 False
- 4 False
- dtype: bool
- `left` and `right` can be any scalar value:
- >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve'])
- >>> s.between('Anna', 'Daniel')
- 0 False
- 1 True
- 2 True
- 3 False
- dtype: bool
- """
- if inclusive:
- lmask = self >= left
- rmask = self <= right
- else:
- lmask = self > left
- rmask = self < right
- return lmask & rmask
- # ----------------------------------------------------------------------
- # Convert to types that support pd.NA
- def _convert_dtypes(
- self: ABCSeries,
- infer_objects: bool = True,
- convert_string: bool = True,
- convert_integer: bool = True,
- convert_boolean: bool = True,
- ) -> "Series":
- input_series = self
- if infer_objects:
- input_series = input_series.infer_objects()
- if is_object_dtype(input_series):
- input_series = input_series.copy()
- if convert_string or convert_integer or convert_boolean:
- inferred_dtype = convert_dtypes(
- input_series._values, convert_string, convert_integer, convert_boolean
- )
- try:
- result = input_series.astype(inferred_dtype)
- except TypeError:
- result = input_series.copy()
- else:
- result = input_series.copy()
- return result
- @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs)
- def isna(self):
- return super().isna()
- @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs)
- def isnull(self):
- return super().isnull()
- @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs)
- def notna(self):
- return super().notna()
- @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs)
- def notnull(self):
- return super().notnull()
- def dropna(self, axis=0, inplace=False, how=None):
- """
- Return a new Series with missing values removed.
- See the :ref:`User Guide <missing_data>` for more on which values are
- considered missing, and how to work with missing data.
- Parameters
- ----------
- axis : {0 or 'index'}, default 0
- There is only one axis to drop values from.
- inplace : bool, default False
- If True, do operation inplace and return None.
- how : str, optional
- Not in use. Kept for compatibility.
- Returns
- -------
- Series
- Series with NA entries dropped from it.
- See Also
- --------
- Series.isna: Indicate missing values.
- Series.notna : Indicate existing (non-missing) values.
- Series.fillna : Replace missing values.
- DataFrame.dropna : Drop rows or columns which contain NA values.
- Index.dropna : Drop missing indices.
- Examples
- --------
- >>> ser = pd.Series([1., 2., np.nan])
- >>> ser
- 0 1.0
- 1 2.0
- 2 NaN
- dtype: float64
- Drop NA values from a Series.
- >>> ser.dropna()
- 0 1.0
- 1 2.0
- dtype: float64
- Keep the Series with valid entries in the same variable.
- >>> ser.dropna(inplace=True)
- >>> ser
- 0 1.0
- 1 2.0
- dtype: float64
- Empty strings are not considered NA values. ``None`` is considered an
- NA value.
- >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
- >>> ser
- 0 NaN
- 1 2
- 2 NaT
- 3
- 4 None
- 5 I stay
- dtype: object
- >>> ser.dropna()
- 1 2
- 3
- 5 I stay
- dtype: object
- """
- inplace = validate_bool_kwarg(inplace, "inplace")
- # Validate the axis parameter
- self._get_axis_number(axis or 0)
- if self._can_hold_na:
- result = remove_na_arraylike(self)
- if inplace:
- self._update_inplace(result)
- else:
- return result
- else:
- if inplace:
- # do nothing
- pass
- else:
- return self.copy()
- # ----------------------------------------------------------------------
- # Time series-oriented methods
- def to_timestamp(self, freq=None, how="start", copy=True):
- """
- Cast to DatetimeIndex of Timestamps, at *beginning* of period.
- Parameters
- ----------
- freq : str, default frequency of PeriodIndex
- Desired frequency.
- how : {'s', 'e', 'start', 'end'}
- Convention for converting period to timestamp; start of period
- vs. end.
- copy : bool, default True
- Whether or not to return a copy.
- Returns
- -------
- Series with DatetimeIndex
- """
- new_values = self._values
- if copy:
- new_values = new_values.copy()
- new_index = self.index.to_timestamp(freq=freq, how=how)
- return self._constructor(new_values, index=new_index).__finalize__(self)
- def to_period(self, freq=None, copy=True):
- """
- Convert Series from DatetimeIndex to PeriodIndex with desired
- frequency (inferred from index if not passed).
- Parameters
- ----------
- freq : str, default None
- Frequency associated with the PeriodIndex.
- copy : bool, default True
- Whether or not to return a copy.
- Returns
- -------
- Series
- Series with index converted to PeriodIndex.
- """
- new_values = self._values
- if copy:
- new_values = new_values.copy()
- new_index = self.index.to_period(freq=freq)
- return self._constructor(new_values, index=new_index).__finalize__(self)
- # ----------------------------------------------------------------------
- # Accessor Methods
- # ----------------------------------------------------------------------
- str = CachedAccessor("str", StringMethods)
- dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
- cat = CachedAccessor("cat", CategoricalAccessor)
- plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
- sparse = CachedAccessor("sparse", SparseAccessor)
- # ----------------------------------------------------------------------
- # Add plotting methods to Series
- hist = pandas.plotting.hist_series
- Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."})
- Series._add_numeric_operations()
- Series._add_series_or_dataframe_operations()
- # Add arithmetic!
- ops.add_flex_arithmetic_methods(Series)
- ops.add_special_arithmetic_methods(Series)
|