1
0

series.py 139 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576
  1. """
  2. Data structure for 1-dimensional cross-sectional and time series data
  3. """
  4. from io import StringIO
  5. from shutil import get_terminal_size
  6. from textwrap import dedent
  7. from typing import IO, Any, Callable, Hashable, List, Optional
  8. import warnings
  9. import numpy as np
  10. from pandas._config import get_option
  11. from pandas._libs import index as libindex, lib, reshape, tslibs
  12. from pandas.compat.numpy import function as nv
  13. from pandas.util._decorators import Appender, Substitution
  14. from pandas.util._validators import validate_bool_kwarg, validate_percentile
  15. from pandas.core.dtypes.cast import convert_dtypes
  16. from pandas.core.dtypes.common import (
  17. _is_unorderable_exception,
  18. ensure_platform_int,
  19. is_bool,
  20. is_categorical_dtype,
  21. is_datetime64_dtype,
  22. is_dict_like,
  23. is_extension_array_dtype,
  24. is_integer,
  25. is_iterator,
  26. is_list_like,
  27. is_object_dtype,
  28. is_scalar,
  29. is_timedelta64_dtype,
  30. )
  31. from pandas.core.dtypes.generic import (
  32. ABCDataFrame,
  33. ABCDatetimeIndex,
  34. ABCSeries,
  35. ABCSparseArray,
  36. )
  37. from pandas.core.dtypes.inference import is_hashable
  38. from pandas.core.dtypes.missing import (
  39. isna,
  40. na_value_for_dtype,
  41. notna,
  42. remove_na_arraylike,
  43. )
  44. import pandas as pd
  45. from pandas.core import algorithms, base, generic, nanops, ops
  46. from pandas.core.accessor import CachedAccessor
  47. from pandas.core.arrays import ExtensionArray, try_cast_to_ea
  48. from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
  49. from pandas.core.arrays.sparse import SparseAccessor
  50. import pandas.core.common as com
  51. from pandas.core.construction import (
  52. create_series_with_explicit_dtype,
  53. extract_array,
  54. is_empty_data,
  55. sanitize_array,
  56. )
  57. from pandas.core.groupby import generic as groupby_generic
  58. from pandas.core.indexers import maybe_convert_indices
  59. from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
  60. from pandas.core.indexes.api import (
  61. Float64Index,
  62. Index,
  63. InvalidIndexError,
  64. MultiIndex,
  65. ensure_index,
  66. )
  67. import pandas.core.indexes.base as ibase
  68. from pandas.core.indexes.datetimes import DatetimeIndex
  69. from pandas.core.indexes.period import PeriodIndex
  70. from pandas.core.indexes.timedeltas import TimedeltaIndex
  71. from pandas.core.indexing import check_bool_indexer
  72. from pandas.core.internals import SingleBlockManager
  73. from pandas.core.strings import StringMethods
  74. from pandas.core.tools.datetimes import to_datetime
  75. import pandas.io.formats.format as fmt
  76. import pandas.plotting
  77. __all__ = ["Series"]
  78. _shared_doc_kwargs = dict(
  79. axes="index",
  80. klass="Series",
  81. axes_single_arg="{0 or 'index'}",
  82. axis="""axis : {0 or 'index'}
  83. Parameter needed for compatibility with DataFrame.""",
  84. inplace="""inplace : boolean, default False
  85. If True, performs operation inplace and returns None.""",
  86. unique="np.ndarray",
  87. duplicated="Series",
  88. optional_by="",
  89. optional_mapper="",
  90. optional_labels="",
  91. optional_axis="",
  92. versionadded_to_excel="\n .. versionadded:: 0.20.0\n",
  93. )
  94. def _coerce_method(converter):
  95. """
  96. Install the scalar coercion methods.
  97. """
  98. def wrapper(self):
  99. if len(self) == 1:
  100. return converter(self.iloc[0])
  101. raise TypeError(f"cannot convert the series to {converter}")
  102. wrapper.__name__ = f"__{converter.__name__}__"
  103. return wrapper
  104. # ----------------------------------------------------------------------
  105. # Series class
  106. class Series(base.IndexOpsMixin, generic.NDFrame):
  107. """
  108. One-dimensional ndarray with axis labels (including time series).
  109. Labels need not be unique but must be a hashable type. The object
  110. supports both integer- and label-based indexing and provides a host of
  111. methods for performing operations involving the index. Statistical
  112. methods from ndarray have been overridden to automatically exclude
  113. missing data (currently represented as NaN).
  114. Operations between Series (+, -, /, *, **) align values based on their
  115. associated index values-- they need not be the same length. The result
  116. index will be the sorted union of the two indexes.
  117. Parameters
  118. ----------
  119. data : array-like, Iterable, dict, or scalar value
  120. Contains data stored in Series.
  121. .. versionchanged:: 0.23.0
  122. If data is a dict, argument order is maintained for Python 3.6
  123. and later.
  124. index : array-like or Index (1d)
  125. Values must be hashable and have the same length as `data`.
  126. Non-unique index values are allowed. Will default to
  127. RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
  128. sequence are used, the index will override the keys found in the
  129. dict.
  130. dtype : str, numpy.dtype, or ExtensionDtype, optional
  131. Data type for the output Series. If not specified, this will be
  132. inferred from `data`.
  133. See the :ref:`user guide <basics.dtypes>` for more usages.
  134. name : str, optional
  135. The name to give to the Series.
  136. copy : bool, default False
  137. Copy input data.
  138. """
  139. _typ = "series"
  140. _name: Optional[Hashable]
  141. _metadata: List[str] = ["name"]
  142. _accessors = {"dt", "cat", "str", "sparse"}
  143. _deprecations = (
  144. base.IndexOpsMixin._deprecations
  145. | generic.NDFrame._deprecations
  146. | frozenset(["compress", "ptp"])
  147. )
  148. # Override cache_readonly bc Series is mutable
  149. hasnans = property(
  150. base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__
  151. )
  152. _data: SingleBlockManager
  153. div: Callable[["Series", Any], "Series"]
  154. rdiv: Callable[["Series", Any], "Series"]
  155. # ----------------------------------------------------------------------
  156. # Constructors
  157. def __init__(
  158. self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
  159. ):
  160. # we are called internally, so short-circuit
  161. if fastpath:
  162. # data is an ndarray, index is defined
  163. if not isinstance(data, SingleBlockManager):
  164. data = SingleBlockManager(data, index, fastpath=True)
  165. if copy:
  166. data = data.copy()
  167. if index is None:
  168. index = data.index
  169. else:
  170. name = ibase.maybe_extract_name(name, data, type(self))
  171. if is_empty_data(data) and dtype is None:
  172. # gh-17261
  173. warnings.warn(
  174. "The default dtype for empty Series will be 'object' instead "
  175. "of 'float64' in a future version. Specify a dtype explicitly "
  176. "to silence this warning.",
  177. DeprecationWarning,
  178. stacklevel=2,
  179. )
  180. # uncomment the line below when removing the DeprecationWarning
  181. # dtype = np.dtype(object)
  182. if index is not None:
  183. index = ensure_index(index)
  184. if data is None:
  185. data = {}
  186. if dtype is not None:
  187. dtype = self._validate_dtype(dtype)
  188. if isinstance(data, MultiIndex):
  189. raise NotImplementedError(
  190. "initializing a Series from a MultiIndex is not supported"
  191. )
  192. elif isinstance(data, Index):
  193. if dtype is not None:
  194. # astype copies
  195. data = data.astype(dtype)
  196. else:
  197. # need to copy to avoid aliasing issues
  198. data = data._values.copy()
  199. if isinstance(data, ABCDatetimeIndex) and data.tz is not None:
  200. # GH#24096 need copy to be deep for datetime64tz case
  201. # TODO: See if we can avoid these copies
  202. data = data._values.copy(deep=True)
  203. copy = False
  204. elif isinstance(data, np.ndarray):
  205. if len(data.dtype):
  206. # GH#13296 we are dealing with a compound dtype, which
  207. # should be treated as 2D
  208. raise ValueError(
  209. "Cannot construct a Series from an ndarray with "
  210. "compound dtype. Use DataFrame instead."
  211. )
  212. pass
  213. elif isinstance(data, ABCSeries):
  214. if index is None:
  215. index = data.index
  216. else:
  217. data = data.reindex(index, copy=copy)
  218. data = data._data
  219. elif is_dict_like(data):
  220. data, index = self._init_dict(data, index, dtype)
  221. dtype = None
  222. copy = False
  223. elif isinstance(data, SingleBlockManager):
  224. if index is None:
  225. index = data.index
  226. elif not data.index.equals(index) or copy:
  227. # GH#19275 SingleBlockManager input should only be called
  228. # internally
  229. raise AssertionError(
  230. "Cannot pass both SingleBlockManager "
  231. "`data` argument and a different "
  232. "`index` argument. `copy` must be False."
  233. )
  234. elif is_extension_array_dtype(data):
  235. pass
  236. elif isinstance(data, (set, frozenset)):
  237. raise TypeError(f"'{type(data).__name__}' type is unordered")
  238. elif isinstance(data, ABCSparseArray):
  239. # handle sparse passed here (and force conversion)
  240. data = data.to_dense()
  241. else:
  242. data = com.maybe_iterable_to_list(data)
  243. if index is None:
  244. if not is_list_like(data):
  245. data = [data]
  246. index = ibase.default_index(len(data))
  247. elif is_list_like(data):
  248. # a scalar numpy array is list-like but doesn't
  249. # have a proper length
  250. try:
  251. if len(index) != len(data):
  252. raise ValueError(
  253. f"Length of passed values is {len(data)}, "
  254. f"index implies {len(index)}."
  255. )
  256. except TypeError:
  257. pass
  258. # create/copy the manager
  259. if isinstance(data, SingleBlockManager):
  260. if dtype is not None:
  261. data = data.astype(dtype=dtype, errors="ignore", copy=copy)
  262. elif copy:
  263. data = data.copy()
  264. else:
  265. data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)
  266. data = SingleBlockManager(data, index, fastpath=True)
  267. generic.NDFrame.__init__(self, data, fastpath=True)
  268. self.name = name
  269. self._set_axis(0, index, fastpath=True)
  270. def _init_dict(self, data, index=None, dtype=None):
  271. """
  272. Derive the "_data" and "index" attributes of a new Series from a
  273. dictionary input.
  274. Parameters
  275. ----------
  276. data : dict or dict-like
  277. Data used to populate the new Series.
  278. index : Index or index-like, default None
  279. Index for the new Series: if None, use dict keys.
  280. dtype : dtype, default None
  281. The dtype for the new Series: if None, infer from data.
  282. Returns
  283. -------
  284. _data : BlockManager for the new Series
  285. index : index for the new Series
  286. """
  287. # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
  288. # raises KeyError), so we iterate the entire dict, and align
  289. if data:
  290. keys, values = zip(*data.items())
  291. values = list(values)
  292. elif index is not None:
  293. # fastpath for Series(data=None). Just use broadcasting a scalar
  294. # instead of reindexing.
  295. values = na_value_for_dtype(dtype)
  296. keys = index
  297. else:
  298. keys, values = [], []
  299. # Input is now list-like, so rely on "standard" construction:
  300. # TODO: passing np.float64 to not break anything yet. See GH-17261
  301. s = create_series_with_explicit_dtype(
  302. values, index=keys, dtype=dtype, dtype_if_empty=np.float64
  303. )
  304. # Now we just make sure the order is respected, if any
  305. if data and index is not None:
  306. s = s.reindex(index, copy=False)
  307. return s._data, s.index
  308. # ----------------------------------------------------------------------
  309. @property
  310. def _constructor(self):
  311. return Series
  312. @property
  313. def _constructor_expanddim(self):
  314. from pandas.core.frame import DataFrame
  315. return DataFrame
  316. # types
  317. @property
  318. def _can_hold_na(self):
  319. return self._data._can_hold_na
  320. _index = None
  321. def _set_axis(self, axis, labels, fastpath=False):
  322. """
  323. Override generic, we want to set the _typ here.
  324. """
  325. if not fastpath:
  326. labels = ensure_index(labels)
  327. is_all_dates = labels.is_all_dates
  328. if is_all_dates:
  329. if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
  330. try:
  331. labels = DatetimeIndex(labels)
  332. # need to set here because we changed the index
  333. if fastpath:
  334. self._data.set_axis(axis, labels)
  335. except (tslibs.OutOfBoundsDatetime, ValueError):
  336. # labels may exceeds datetime bounds,
  337. # or not be a DatetimeIndex
  338. pass
  339. self._set_subtyp(is_all_dates)
  340. object.__setattr__(self, "_index", labels)
  341. if not fastpath:
  342. self._data.set_axis(axis, labels)
  343. def _set_subtyp(self, is_all_dates):
  344. if is_all_dates:
  345. object.__setattr__(self, "_subtyp", "time_series")
  346. else:
  347. object.__setattr__(self, "_subtyp", "series")
  348. def _update_inplace(self, result, **kwargs):
  349. # we want to call the generic version and not the IndexOpsMixin
  350. return generic.NDFrame._update_inplace(self, result, **kwargs)
  351. # ndarray compatibility
  352. @property
  353. def dtype(self):
  354. """
  355. Return the dtype object of the underlying data.
  356. """
  357. return self._data.dtype
  358. @property
  359. def dtypes(self):
  360. """
  361. Return the dtype object of the underlying data.
  362. """
  363. return self._data.dtype
  364. @property
  365. def name(self) -> Optional[Hashable]:
  366. return self._name
  367. @name.setter
  368. def name(self, value: Optional[Hashable]) -> None:
  369. if not is_hashable(value):
  370. raise TypeError("Series.name must be a hashable type")
  371. object.__setattr__(self, "_name", value)
  372. @property
  373. def values(self):
  374. """
  375. Return Series as ndarray or ndarray-like depending on the dtype.
  376. .. warning::
  377. We recommend using :attr:`Series.array` or
  378. :meth:`Series.to_numpy`, depending on whether you need
  379. a reference to the underlying data or a NumPy array.
  380. Returns
  381. -------
  382. numpy.ndarray or ndarray-like
  383. See Also
  384. --------
  385. Series.array : Reference to the underlying data.
  386. Series.to_numpy : A NumPy array representing the underlying data.
  387. Examples
  388. --------
  389. >>> pd.Series([1, 2, 3]).values
  390. array([1, 2, 3])
  391. >>> pd.Series(list('aabc')).values
  392. array(['a', 'a', 'b', 'c'], dtype=object)
  393. >>> pd.Series(list('aabc')).astype('category').values
  394. [a, a, b, c]
  395. Categories (3, object): [a, b, c]
  396. Timezone aware datetime data is converted to UTC:
  397. >>> pd.Series(pd.date_range('20130101', periods=3,
  398. ... tz='US/Eastern')).values
  399. array(['2013-01-01T05:00:00.000000000',
  400. '2013-01-02T05:00:00.000000000',
  401. '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
  402. """
  403. return self._data.external_values()
  404. @property
  405. def _values(self):
  406. """
  407. Return the internal repr of this data (defined by Block.interval_values).
  408. This are the values as stored in the Block (ndarray or ExtensionArray
  409. depending on the Block class).
  410. Differs from the public ``.values`` for certain data types, because of
  411. historical backwards compatibility of the public attribute (e.g. period
  412. returns object ndarray and datetimetz a datetime64[ns] ndarray for
  413. ``.values`` while it returns an ExtensionArray for ``._values`` in those
  414. cases).
  415. Differs from ``.array`` in that this still returns the numpy array if
  416. the Block is backed by a numpy array, while ``.array`` ensures to always
  417. return an ExtensionArray.
  418. Differs from ``._ndarray_values``, as that ensures to always return a
  419. numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if
  420. the Series was backed by an ExtensionArray).
  421. Overview:
  422. dtype | values | _values | array | _ndarray_values |
  423. ----------- | ------------- | ------------- | ------------- | --------------- |
  424. Numeric | ndarray | ndarray | PandasArray | ndarray |
  425. Category | Categorical | Categorical | Categorical | ndarray[int] |
  426. dt64[ns] | ndarray[M8ns] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
  427. dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
  428. Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] |
  429. Nullable | EA | EA | EA | ndarray |
  430. """
  431. return self._data.internal_values()
  432. @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore
  433. @property
  434. def array(self) -> ExtensionArray:
  435. return self._data._block.array_values()
  436. def _internal_get_values(self):
  437. """
  438. Same as values (but handles sparseness conversions); is a view.
  439. Returns
  440. -------
  441. numpy.ndarray
  442. Data of the Series.
  443. """
  444. return self._data.get_values()
  445. # ops
  446. def ravel(self, order="C"):
  447. """
  448. Return the flattened underlying data as an ndarray.
  449. Returns
  450. -------
  451. numpy.ndarray or ndarray-like
  452. Flattened data of the Series.
  453. See Also
  454. --------
  455. numpy.ndarray.ravel
  456. """
  457. return self._values.ravel(order=order)
  458. def __len__(self) -> int:
  459. """
  460. Return the length of the Series.
  461. """
  462. return len(self._data)
  463. def view(self, dtype=None):
  464. """
  465. Create a new view of the Series.
  466. This function will return a new Series with a view of the same
  467. underlying values in memory, optionally reinterpreted with a new data
  468. type. The new data type must preserve the same size in bytes as to not
  469. cause index misalignment.
  470. Parameters
  471. ----------
  472. dtype : data type
  473. Data type object or one of their string representations.
  474. Returns
  475. -------
  476. Series
  477. A new Series object as a view of the same data in memory.
  478. See Also
  479. --------
  480. numpy.ndarray.view : Equivalent numpy function to create a new view of
  481. the same data in memory.
  482. Notes
  483. -----
  484. Series are instantiated with ``dtype=float64`` by default. While
  485. ``numpy.ndarray.view()`` will return a view with the same data type as
  486. the original array, ``Series.view()`` (without specified dtype)
  487. will try using ``float64`` and may fail if the original data type size
  488. in bytes is not the same.
  489. Examples
  490. --------
  491. >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
  492. >>> s
  493. 0 -2
  494. 1 -1
  495. 2 0
  496. 3 1
  497. 4 2
  498. dtype: int8
  499. The 8 bit signed integer representation of `-1` is `0b11111111`, but
  500. the same bytes represent 255 if read as an 8 bit unsigned integer:
  501. >>> us = s.view('uint8')
  502. >>> us
  503. 0 254
  504. 1 255
  505. 2 0
  506. 3 1
  507. 4 2
  508. dtype: uint8
  509. The views share the same underlying values:
  510. >>> us[0] = 128
  511. >>> s
  512. 0 -128
  513. 1 -1
  514. 2 0
  515. 3 1
  516. 4 2
  517. dtype: int8
  518. """
  519. return self._constructor(
  520. self._values.view(dtype), index=self.index
  521. ).__finalize__(self)
  522. # ----------------------------------------------------------------------
  523. # NDArray Compat
  524. _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
  525. def __array_ufunc__(
  526. self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any
  527. ):
  528. # TODO: handle DataFrame
  529. cls = type(self)
  530. # for binary ops, use our custom dunder methods
  531. result = ops.maybe_dispatch_ufunc_to_dunder_op(
  532. self, ufunc, method, *inputs, **kwargs
  533. )
  534. if result is not NotImplemented:
  535. return result
  536. # Determine if we should defer.
  537. no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
  538. for item in inputs:
  539. higher_priority = (
  540. hasattr(item, "__array_priority__")
  541. and item.__array_priority__ > self.__array_priority__
  542. )
  543. has_array_ufunc = (
  544. hasattr(item, "__array_ufunc__")
  545. and type(item).__array_ufunc__ not in no_defer
  546. and not isinstance(item, self._HANDLED_TYPES)
  547. )
  548. if higher_priority or has_array_ufunc:
  549. return NotImplemented
  550. # align all the inputs.
  551. names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
  552. types = tuple(type(x) for x in inputs)
  553. # TODO: dataframe
  554. alignable = [x for x, t in zip(inputs, types) if issubclass(t, Series)]
  555. if len(alignable) > 1:
  556. # This triggers alignment.
  557. # At the moment, there aren't any ufuncs with more than two inputs
  558. # so this ends up just being x1.index | x2.index, but we write
  559. # it to handle *args.
  560. index = alignable[0].index
  561. for s in alignable[1:]:
  562. index |= s.index
  563. inputs = tuple(
  564. x.reindex(index) if issubclass(t, Series) else x
  565. for x, t in zip(inputs, types)
  566. )
  567. else:
  568. index = self.index
  569. inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
  570. result = getattr(ufunc, method)(*inputs, **kwargs)
  571. name: Optional[Hashable]
  572. if len(set(names)) == 1:
  573. name = names[0]
  574. else:
  575. name = None
  576. def construct_return(result):
  577. if lib.is_scalar(result):
  578. return result
  579. elif result.ndim > 1:
  580. # e.g. np.subtract.outer
  581. if method == "outer":
  582. # GH#27198
  583. raise NotImplementedError
  584. return result
  585. return self._constructor(result, index=index, name=name, copy=False)
  586. if type(result) is tuple:
  587. # multiple return values
  588. return tuple(construct_return(x) for x in result)
  589. elif method == "at":
  590. # no return value
  591. return None
  592. else:
  593. return construct_return(result)
  594. def __array__(self, dtype=None) -> np.ndarray:
  595. """
  596. Return the values as a NumPy array.
  597. Users should not call this directly. Rather, it is invoked by
  598. :func:`numpy.array` and :func:`numpy.asarray`.
  599. Parameters
  600. ----------
  601. dtype : str or numpy.dtype, optional
  602. The dtype to use for the resulting NumPy array. By default,
  603. the dtype is inferred from the data.
  604. Returns
  605. -------
  606. numpy.ndarray
  607. The values in the series converted to a :class:`numpy.ndarary`
  608. with the specified `dtype`.
  609. See Also
  610. --------
  611. array : Create a new array from data.
  612. Series.array : Zero-copy view to the array backing the Series.
  613. Series.to_numpy : Series method for similar behavior.
  614. Examples
  615. --------
  616. >>> ser = pd.Series([1, 2, 3])
  617. >>> np.asarray(ser)
  618. array([1, 2, 3])
  619. For timezone-aware data, the timezones may be retained with
  620. ``dtype='object'``
  621. >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
  622. >>> np.asarray(tzser, dtype="object")
  623. array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
  624. Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
  625. dtype=object)
  626. Or the values may be localized to UTC and the tzinfo discarded with
  627. ``dtype='datetime64[ns]'``
  628. >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
  629. array(['1999-12-31T23:00:00.000000000', ...],
  630. dtype='datetime64[ns]')
  631. """
  632. return np.asarray(self.array, dtype)
  633. # ----------------------------------------------------------------------
  634. # Unary Methods
  635. # coercion
  636. __float__ = _coerce_method(float)
  637. __long__ = _coerce_method(int)
  638. __int__ = _coerce_method(int)
  639. # ----------------------------------------------------------------------
  640. def _unpickle_series_compat(self, state):
  641. if isinstance(state, dict):
  642. self._data = state["_data"]
  643. self.name = state["name"]
  644. self.index = self._data.index
  645. elif isinstance(state, tuple):
  646. # < 0.12 series pickle
  647. nd_state, own_state = state
  648. # recreate the ndarray
  649. data = np.empty(nd_state[1], dtype=nd_state[2])
  650. np.ndarray.__setstate__(data, nd_state)
  651. # backwards compat
  652. index, name = own_state[0], None
  653. if len(own_state) > 1:
  654. name = own_state[1]
  655. # recreate
  656. self._data = SingleBlockManager(data, index, fastpath=True)
  657. self._index = index
  658. self.name = name
  659. else:
  660. raise Exception(f"cannot unpickle legacy formats -> [{state}]")
  661. # indexers
  662. @property
  663. def axes(self):
  664. """
  665. Return a list of the row axis labels.
  666. """
  667. return [self.index]
  668. # ----------------------------------------------------------------------
  669. # Indexing Methods
  670. @Appender(generic.NDFrame.take.__doc__)
  671. def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
  672. if is_copy is not None:
  673. warnings.warn(
  674. "is_copy is deprecated and will be removed in a future version. "
  675. "'take' always returns a copy, so there is no need to specify this.",
  676. FutureWarning,
  677. stacklevel=2,
  678. )
  679. nv.validate_take(tuple(), kwargs)
  680. indices = ensure_platform_int(indices)
  681. new_index = self.index.take(indices)
  682. if is_categorical_dtype(self):
  683. # https://github.com/pandas-dev/pandas/issues/20664
  684. # TODO: remove when the default Categorical.take behavior changes
  685. indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
  686. kwargs = {"allow_fill": False}
  687. else:
  688. kwargs = {}
  689. new_values = self._values.take(indices, **kwargs)
  690. return self._constructor(
  691. new_values, index=new_index, fastpath=True
  692. ).__finalize__(self)
  693. def _take_with_is_copy(self, indices, axis=0, **kwargs):
  694. """
  695. Internal version of the `take` method that sets the `_is_copy`
  696. attribute to keep track of the parent dataframe (using in indexing
  697. for the SettingWithCopyWarning). For Series this does the same
  698. as the public take (it never sets `_is_copy`).
  699. See the docstring of `take` for full explanation of the parameters.
  700. """
  701. return self.take(indices=indices, axis=axis, **kwargs)
  702. def _ixs(self, i: int, axis: int = 0):
  703. """
  704. Return the i-th value or values in the Series by location.
  705. Parameters
  706. ----------
  707. i : int
  708. Returns
  709. -------
  710. scalar (int) or Series (slice, sequence)
  711. """
  712. # dispatch to the values if we need
  713. values = self._values
  714. if isinstance(values, np.ndarray):
  715. return libindex.get_value_at(values, i)
  716. else:
  717. return values[i]
  718. def _slice(self, slobj: slice, axis: int = 0, kind=None):
  719. slobj = self.index._convert_slice_indexer(slobj, kind=kind or "getitem")
  720. return self._get_values(slobj)
  721. def __getitem__(self, key):
  722. key = com.apply_if_callable(key, self)
  723. try:
  724. result = self.index.get_value(self, key)
  725. if not is_scalar(result):
  726. if is_list_like(result) and not isinstance(result, Series):
  727. # we need to box if loc of the key isn't scalar here
  728. # otherwise have inline ndarray/lists
  729. try:
  730. if not is_scalar(self.index.get_loc(key)):
  731. result = self._constructor(
  732. result, index=[key] * len(result), dtype=self.dtype
  733. ).__finalize__(self)
  734. except KeyError:
  735. pass
  736. return result
  737. except InvalidIndexError:
  738. pass
  739. except (KeyError, ValueError):
  740. if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
  741. # kludge
  742. pass
  743. elif key is Ellipsis:
  744. return self
  745. elif com.is_bool_indexer(key):
  746. pass
  747. else:
  748. # we can try to coerce the indexer (or this will raise)
  749. new_key = self.index._convert_scalar_indexer(key, kind="getitem")
  750. if type(new_key) != type(key):
  751. return self.__getitem__(new_key)
  752. raise
  753. if is_iterator(key):
  754. key = list(key)
  755. if com.is_bool_indexer(key):
  756. key = check_bool_indexer(self.index, key)
  757. return self._get_with(key)
  758. def _get_with(self, key):
  759. # other: fancy integer or otherwise
  760. if isinstance(key, slice):
  761. return self._slice(key)
  762. elif isinstance(key, ABCDataFrame):
  763. raise TypeError(
  764. "Indexing a Series with DataFrame is not "
  765. "supported, use the appropriate DataFrame column"
  766. )
  767. elif isinstance(key, tuple):
  768. try:
  769. return self._get_values_tuple(key)
  770. except ValueError:
  771. # if we don't have a MultiIndex, we may still be able to handle
  772. # a 1-tuple. see test_1tuple_without_multiindex
  773. if len(key) == 1:
  774. key = key[0]
  775. if isinstance(key, slice):
  776. return self._get_values(key)
  777. raise
  778. if not isinstance(key, (list, np.ndarray, Series, Index)):
  779. key = list(key)
  780. if isinstance(key, Index):
  781. key_type = key.inferred_type
  782. else:
  783. key_type = lib.infer_dtype(key, skipna=False)
  784. if key_type == "integer":
  785. if self.index.is_integer() or self.index.is_floating():
  786. return self.loc[key]
  787. else:
  788. return self._get_values(key)
  789. elif key_type == "boolean":
  790. return self._get_values(key)
  791. if isinstance(key, (list, tuple)):
  792. # TODO: de-dup with tuple case handled above?
  793. # handle the dup indexing case GH#4246
  794. if len(key) == 1 and isinstance(key[0], slice):
  795. # [slice(0, 5, None)] will break if you convert to ndarray,
  796. # e.g. as requested by np.median
  797. # FIXME: hack
  798. return self._get_values(key)
  799. return self.loc[key]
  800. return self.reindex(key)
  801. def _get_values_tuple(self, key):
  802. # mpl hackaround
  803. if com.any_none(*key):
  804. # suppress warning from slicing the index with a 2d indexer.
  805. # eventually we'll want Series itself to warn.
  806. with warnings.catch_warnings():
  807. warnings.filterwarnings(
  808. "ignore", "Support for multi-dim", DeprecationWarning
  809. )
  810. return self._get_values(key)
  811. if not isinstance(self.index, MultiIndex):
  812. raise ValueError("Can only tuple-index with a MultiIndex")
  813. # If key is contained, would have returned by now
  814. indexer, new_index = self.index.get_loc_level(key)
  815. return self._constructor(self._values[indexer], index=new_index).__finalize__(
  816. self
  817. )
  818. def _get_values(self, indexer):
  819. try:
  820. return self._constructor(
  821. self._data.get_slice(indexer), fastpath=True
  822. ).__finalize__(self)
  823. except ValueError:
  824. # mpl compat if we look up e.g. ser[:, np.newaxis];
  825. # see tests.series.timeseries.test_mpl_compat_hack
  826. return self._values[indexer]
  827. def _get_value(self, label, takeable: bool = False):
  828. """
  829. Quickly retrieve single value at passed index label.
  830. Parameters
  831. ----------
  832. label : object
  833. takeable : interpret the index as indexers, default False
  834. Returns
  835. -------
  836. scalar value
  837. """
  838. if takeable:
  839. return com.maybe_box_datetimelike(self._values[label])
  840. return self.index.get_value(self._values, label)
  841. def __setitem__(self, key, value):
  842. key = com.apply_if_callable(key, self)
  843. cacher_needs_updating = self._check_is_chained_assignment_possible()
  844. try:
  845. self._set_with_engine(key, value)
  846. except com.SettingWithCopyError:
  847. raise
  848. except (KeyError, ValueError):
  849. values = self._values
  850. if is_integer(key) and not self.index.inferred_type == "integer":
  851. values[key] = value
  852. elif key is Ellipsis:
  853. self[:] = value
  854. else:
  855. self.loc[key] = value
  856. except TypeError as e:
  857. if isinstance(key, tuple) and not isinstance(self.index, MultiIndex):
  858. raise ValueError("Can only tuple-index with a MultiIndex")
  859. # python 3 type errors should be raised
  860. if _is_unorderable_exception(e):
  861. raise IndexError(key)
  862. if com.is_bool_indexer(key):
  863. key = check_bool_indexer(self.index, key)
  864. try:
  865. self._where(~key, value, inplace=True)
  866. return
  867. except InvalidIndexError:
  868. pass
  869. self._set_with(key, value)
  870. if cacher_needs_updating:
  871. self._maybe_update_cacher()
  872. def _set_with_engine(self, key, value):
  873. values = self._values
  874. if is_extension_array_dtype(values.dtype):
  875. # The cython indexing engine does not support ExtensionArrays.
  876. values[self.index.get_loc(key)] = value
  877. return
  878. try:
  879. self.index._engine.set_value(values, key, value)
  880. return
  881. except KeyError:
  882. values[self.index.get_loc(key)] = value
  883. return
  884. def _set_with(self, key, value):
  885. # other: fancy integer or otherwise
  886. if isinstance(key, slice):
  887. indexer = self.index._convert_slice_indexer(key, kind="getitem")
  888. return self._set_values(indexer, value)
  889. elif is_scalar(key) and not is_integer(key) and key not in self.index:
  890. # GH#12862 adding an new key to the Series
  891. # Note: have to exclude integers because that is ambiguously
  892. # position-based
  893. self.loc[key] = value
  894. return
  895. else:
  896. if isinstance(key, tuple):
  897. try:
  898. # TODO: no test cases that get here
  899. self._set_values(key, value)
  900. except Exception:
  901. pass
  902. if is_scalar(key):
  903. key = [key]
  904. if isinstance(key, Index):
  905. key_type = key.inferred_type
  906. key = key._values
  907. else:
  908. key_type = lib.infer_dtype(key, skipna=False)
  909. if key_type == "integer":
  910. if self.index.inferred_type == "integer":
  911. self._set_labels(key, value)
  912. else:
  913. return self._set_values(key, value)
  914. elif key_type == "boolean":
  915. self._set_values(key.astype(np.bool_), value)
  916. else:
  917. self._set_labels(key, value)
  918. def _set_labels(self, key, value):
  919. key = com.asarray_tuplesafe(key)
  920. indexer = self.index.get_indexer(key)
  921. mask = indexer == -1
  922. if mask.any():
  923. raise ValueError(f"{key[mask]} not contained in the index")
  924. self._set_values(indexer, value)
  925. def _set_values(self, key, value):
  926. if isinstance(key, Series):
  927. key = key._values
  928. self._data = self._data.setitem(indexer=key, value=value)
  929. self._maybe_update_cacher()
  930. def _set_value(self, label, value, takeable: bool = False):
  931. """
  932. Quickly set single value at passed label.
  933. If label is not contained, a new object is created with the label
  934. placed at the end of the result index.
  935. Parameters
  936. ----------
  937. label : object
  938. Partial indexing with MultiIndex not allowed.
  939. value : object
  940. Scalar value.
  941. takeable : interpret the index as indexers, default False
  942. Returns
  943. -------
  944. Series
  945. If label is contained, will be reference to calling Series,
  946. otherwise a new object.
  947. """
  948. try:
  949. if takeable:
  950. self._values[label] = value
  951. else:
  952. self.index._engine.set_value(self._values, label, value)
  953. except (KeyError, TypeError):
  954. # set using a non-recursive method
  955. self.loc[label] = value
  956. return self
  957. # ----------------------------------------------------------------------
  958. # Unsorted
  959. @property
  960. def _is_mixed_type(self):
  961. return False
  962. def repeat(self, repeats, axis=None):
  963. """
  964. Repeat elements of a Series.
  965. Returns a new Series where each element of the current Series
  966. is repeated consecutively a given number of times.
  967. Parameters
  968. ----------
  969. repeats : int or array of ints
  970. The number of repetitions for each element. This should be a
  971. non-negative integer. Repeating 0 times will return an empty
  972. Series.
  973. axis : None
  974. Must be ``None``. Has no effect but is accepted for compatibility
  975. with numpy.
  976. Returns
  977. -------
  978. Series
  979. Newly created Series with repeated elements.
  980. See Also
  981. --------
  982. Index.repeat : Equivalent function for Index.
  983. numpy.repeat : Similar method for :class:`numpy.ndarray`.
  984. Examples
  985. --------
  986. >>> s = pd.Series(['a', 'b', 'c'])
  987. >>> s
  988. 0 a
  989. 1 b
  990. 2 c
  991. dtype: object
  992. >>> s.repeat(2)
  993. 0 a
  994. 0 a
  995. 1 b
  996. 1 b
  997. 2 c
  998. 2 c
  999. dtype: object
  1000. >>> s.repeat([1, 2, 3])
  1001. 0 a
  1002. 1 b
  1003. 1 b
  1004. 2 c
  1005. 2 c
  1006. 2 c
  1007. dtype: object
  1008. """
  1009. nv.validate_repeat(tuple(), dict(axis=axis))
  1010. new_index = self.index.repeat(repeats)
  1011. new_values = self._values.repeat(repeats)
  1012. return self._constructor(new_values, index=new_index).__finalize__(self)
  1013. def reset_index(self, level=None, drop=False, name=None, inplace=False):
  1014. """
  1015. Generate a new DataFrame or Series with the index reset.
  1016. This is useful when the index needs to be treated as a column, or
  1017. when the index is meaningless and needs to be reset to the default
  1018. before another operation.
  1019. Parameters
  1020. ----------
  1021. level : int, str, tuple, or list, default optional
  1022. For a Series with a MultiIndex, only remove the specified levels
  1023. from the index. Removes all levels by default.
  1024. drop : bool, default False
  1025. Just reset the index, without inserting it as a column in
  1026. the new DataFrame.
  1027. name : object, optional
  1028. The name to use for the column containing the original Series
  1029. values. Uses ``self.name`` by default. This argument is ignored
  1030. when `drop` is True.
  1031. inplace : bool, default False
  1032. Modify the Series in place (do not create a new object).
  1033. Returns
  1034. -------
  1035. Series or DataFrame
  1036. When `drop` is False (the default), a DataFrame is returned.
  1037. The newly created columns will come first in the DataFrame,
  1038. followed by the original Series values.
  1039. When `drop` is True, a `Series` is returned.
  1040. In either case, if ``inplace=True``, no value is returned.
  1041. See Also
  1042. --------
  1043. DataFrame.reset_index: Analogous function for DataFrame.
  1044. Examples
  1045. --------
  1046. >>> s = pd.Series([1, 2, 3, 4], name='foo',
  1047. ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
  1048. Generate a DataFrame with default index.
  1049. >>> s.reset_index()
  1050. idx foo
  1051. 0 a 1
  1052. 1 b 2
  1053. 2 c 3
  1054. 3 d 4
  1055. To specify the name of the new column use `name`.
  1056. >>> s.reset_index(name='values')
  1057. idx values
  1058. 0 a 1
  1059. 1 b 2
  1060. 2 c 3
  1061. 3 d 4
  1062. To generate a new Series with the default set `drop` to True.
  1063. >>> s.reset_index(drop=True)
  1064. 0 1
  1065. 1 2
  1066. 2 3
  1067. 3 4
  1068. Name: foo, dtype: int64
  1069. To update the Series in place, without generating a new one
  1070. set `inplace` to True. Note that it also requires ``drop=True``.
  1071. >>> s.reset_index(inplace=True, drop=True)
  1072. >>> s
  1073. 0 1
  1074. 1 2
  1075. 2 3
  1076. 3 4
  1077. Name: foo, dtype: int64
  1078. The `level` parameter is interesting for Series with a multi-level
  1079. index.
  1080. >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
  1081. ... np.array(['one', 'two', 'one', 'two'])]
  1082. >>> s2 = pd.Series(
  1083. ... range(4), name='foo',
  1084. ... index=pd.MultiIndex.from_arrays(arrays,
  1085. ... names=['a', 'b']))
  1086. To remove a specific level from the Index, use `level`.
  1087. >>> s2.reset_index(level='a')
  1088. a foo
  1089. b
  1090. one bar 0
  1091. two bar 1
  1092. one baz 2
  1093. two baz 3
  1094. If `level` is not set, all levels are removed from the Index.
  1095. >>> s2.reset_index()
  1096. a b foo
  1097. 0 bar one 0
  1098. 1 bar two 1
  1099. 2 baz one 2
  1100. 3 baz two 3
  1101. """
  1102. inplace = validate_bool_kwarg(inplace, "inplace")
  1103. if drop:
  1104. new_index = ibase.default_index(len(self))
  1105. if level is not None:
  1106. if not isinstance(level, (tuple, list)):
  1107. level = [level]
  1108. level = [self.index._get_level_number(lev) for lev in level]
  1109. if len(level) < self.index.nlevels:
  1110. new_index = self.index.droplevel(level)
  1111. if inplace:
  1112. self.index = new_index
  1113. # set name if it was passed, otherwise, keep the previous name
  1114. self.name = name or self.name
  1115. else:
  1116. return self._constructor(
  1117. self._values.copy(), index=new_index
  1118. ).__finalize__(self)
  1119. elif inplace:
  1120. raise TypeError(
  1121. "Cannot reset_index inplace on a Series to create a DataFrame"
  1122. )
  1123. else:
  1124. df = self.to_frame(name)
  1125. return df.reset_index(level=level, drop=drop)
  1126. # ----------------------------------------------------------------------
  1127. # Rendering Methods
  1128. def __repr__(self) -> str:
  1129. """
  1130. Return a string representation for a particular Series.
  1131. """
  1132. buf = StringIO("")
  1133. width, height = get_terminal_size()
  1134. max_rows = (
  1135. height
  1136. if get_option("display.max_rows") == 0
  1137. else get_option("display.max_rows")
  1138. )
  1139. min_rows = (
  1140. height
  1141. if get_option("display.max_rows") == 0
  1142. else get_option("display.min_rows")
  1143. )
  1144. show_dimensions = get_option("display.show_dimensions")
  1145. self.to_string(
  1146. buf=buf,
  1147. name=self.name,
  1148. dtype=self.dtype,
  1149. min_rows=min_rows,
  1150. max_rows=max_rows,
  1151. length=show_dimensions,
  1152. )
  1153. result = buf.getvalue()
  1154. return result
  1155. def to_string(
  1156. self,
  1157. buf=None,
  1158. na_rep="NaN",
  1159. float_format=None,
  1160. header=True,
  1161. index=True,
  1162. length=False,
  1163. dtype=False,
  1164. name=False,
  1165. max_rows=None,
  1166. min_rows=None,
  1167. ):
  1168. """
  1169. Render a string representation of the Series.
  1170. Parameters
  1171. ----------
  1172. buf : StringIO-like, optional
  1173. Buffer to write to.
  1174. na_rep : str, optional
  1175. String representation of NaN to use, default 'NaN'.
  1176. float_format : one-parameter function, optional
  1177. Formatter function to apply to columns' elements if they are
  1178. floats, default None.
  1179. header : bool, default True
  1180. Add the Series header (index name).
  1181. index : bool, optional
  1182. Add index (row) labels, default True.
  1183. length : bool, default False
  1184. Add the Series length.
  1185. dtype : bool, default False
  1186. Add the Series dtype.
  1187. name : bool, default False
  1188. Add the Series name if not None.
  1189. max_rows : int, optional
  1190. Maximum number of rows to show before truncating. If None, show
  1191. all.
  1192. min_rows : int, optional
  1193. The number of rows to display in a truncated repr (when number
  1194. of rows is above `max_rows`).
  1195. Returns
  1196. -------
  1197. str or None
  1198. String representation of Series if ``buf=None``, otherwise None.
  1199. """
  1200. formatter = fmt.SeriesFormatter(
  1201. self,
  1202. name=name,
  1203. length=length,
  1204. header=header,
  1205. index=index,
  1206. dtype=dtype,
  1207. na_rep=na_rep,
  1208. float_format=float_format,
  1209. min_rows=min_rows,
  1210. max_rows=max_rows,
  1211. )
  1212. result = formatter.to_string()
  1213. # catch contract violations
  1214. if not isinstance(result, str):
  1215. raise AssertionError(
  1216. "result must be of type str, type"
  1217. f" of result is {repr(type(result).__name__)}"
  1218. )
  1219. if buf is None:
  1220. return result
  1221. else:
  1222. try:
  1223. buf.write(result)
  1224. except AttributeError:
  1225. with open(buf, "w") as f:
  1226. f.write(result)
  1227. @Appender(
  1228. """
  1229. Examples
  1230. --------
  1231. >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal")
  1232. >>> print(s.to_markdown())
  1233. | | animal |
  1234. |---:|:---------|
  1235. | 0 | elk |
  1236. | 1 | pig |
  1237. | 2 | dog |
  1238. | 3 | quetzal |
  1239. """
  1240. )
  1241. @Substitution(klass="Series")
  1242. @Appender(generic._shared_docs["to_markdown"])
  1243. def to_markdown(
  1244. self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs
  1245. ) -> Optional[str]:
  1246. return self.to_frame().to_markdown(buf, mode, **kwargs)
  1247. # ----------------------------------------------------------------------
  1248. def items(self):
  1249. """
  1250. Lazily iterate over (index, value) tuples.
  1251. This method returns an iterable tuple (index, value). This is
  1252. convenient if you want to create a lazy iterator.
  1253. Returns
  1254. -------
  1255. iterable
  1256. Iterable of tuples containing the (index, value) pairs from a
  1257. Series.
  1258. See Also
  1259. --------
  1260. DataFrame.items : Iterate over (column name, Series) pairs.
  1261. DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
  1262. Examples
  1263. --------
  1264. >>> s = pd.Series(['A', 'B', 'C'])
  1265. >>> for index, value in s.items():
  1266. ... print(f"Index : {index}, Value : {value}")
  1267. Index : 0, Value : A
  1268. Index : 1, Value : B
  1269. Index : 2, Value : C
  1270. """
  1271. return zip(iter(self.index), iter(self))
  1272. @Appender(items.__doc__)
  1273. def iteritems(self):
  1274. return self.items()
  1275. # ----------------------------------------------------------------------
  1276. # Misc public methods
  1277. def keys(self):
  1278. """
  1279. Return alias for index.
  1280. Returns
  1281. -------
  1282. Index
  1283. Index of the Series.
  1284. """
  1285. return self.index
  1286. def to_dict(self, into=dict):
  1287. """
  1288. Convert Series to {label -> value} dict or dict-like object.
  1289. Parameters
  1290. ----------
  1291. into : class, default dict
  1292. The collections.abc.Mapping subclass to use as the return
  1293. object. Can be the actual class or an empty
  1294. instance of the mapping type you want. If you want a
  1295. collections.defaultdict, you must pass it initialized.
  1296. .. versionadded:: 0.21.0
  1297. Returns
  1298. -------
  1299. collections.abc.Mapping
  1300. Key-value representation of Series.
  1301. Examples
  1302. --------
  1303. >>> s = pd.Series([1, 2, 3, 4])
  1304. >>> s.to_dict()
  1305. {0: 1, 1: 2, 2: 3, 3: 4}
  1306. >>> from collections import OrderedDict, defaultdict
  1307. >>> s.to_dict(OrderedDict)
  1308. OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
  1309. >>> dd = defaultdict(list)
  1310. >>> s.to_dict(dd)
  1311. defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
  1312. """
  1313. # GH16122
  1314. into_c = com.standardize_mapping(into)
  1315. return into_c(self.items())
  1316. def to_frame(self, name=None):
  1317. """
  1318. Convert Series to DataFrame.
  1319. Parameters
  1320. ----------
  1321. name : object, default None
  1322. The passed name should substitute for the series name (if it has
  1323. one).
  1324. Returns
  1325. -------
  1326. DataFrame
  1327. DataFrame representation of Series.
  1328. Examples
  1329. --------
  1330. >>> s = pd.Series(["a", "b", "c"],
  1331. ... name="vals")
  1332. >>> s.to_frame()
  1333. vals
  1334. 0 a
  1335. 1 b
  1336. 2 c
  1337. """
  1338. if name is None:
  1339. df = self._constructor_expanddim(self)
  1340. else:
  1341. df = self._constructor_expanddim({name: self})
  1342. return df
  1343. def _set_name(self, name, inplace=False):
  1344. """
  1345. Set the Series name.
  1346. Parameters
  1347. ----------
  1348. name : str
  1349. inplace : bool
  1350. Whether to modify `self` directly or return a copy.
  1351. """
  1352. inplace = validate_bool_kwarg(inplace, "inplace")
  1353. ser = self if inplace else self.copy()
  1354. ser.name = name
  1355. return ser
  1356. @Appender(
  1357. """
  1358. Examples
  1359. --------
  1360. >>> ser = pd.Series([390., 350., 30., 20.],
  1361. ... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
  1362. >>> ser
  1363. Falcon 390.0
  1364. Falcon 350.0
  1365. Parrot 30.0
  1366. Parrot 20.0
  1367. Name: Max Speed, dtype: float64
  1368. >>> ser.groupby(["a", "b", "a", "b"]).mean()
  1369. a 210.0
  1370. b 185.0
  1371. Name: Max Speed, dtype: float64
  1372. >>> ser.groupby(level=0).mean()
  1373. Falcon 370.0
  1374. Parrot 25.0
  1375. Name: Max Speed, dtype: float64
  1376. >>> ser.groupby(ser > 100).mean()
  1377. Max Speed
  1378. False 25.0
  1379. True 370.0
  1380. Name: Max Speed, dtype: float64
  1381. **Grouping by Indexes**
  1382. We can groupby different levels of a hierarchical index
  1383. using the `level` parameter:
  1384. >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
  1385. ... ['Captive', 'Wild', 'Captive', 'Wild']]
  1386. >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
  1387. >>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
  1388. >>> ser
  1389. Animal Type
  1390. Falcon Captive 390.0
  1391. Wild 350.0
  1392. Parrot Captive 30.0
  1393. Wild 20.0
  1394. Name: Max Speed, dtype: float64
  1395. >>> ser.groupby(level=0).mean()
  1396. Animal
  1397. Falcon 370.0
  1398. Parrot 25.0
  1399. Name: Max Speed, dtype: float64
  1400. >>> ser.groupby(level="Type").mean()
  1401. Type
  1402. Captive 210.0
  1403. Wild 185.0
  1404. Name: Max Speed, dtype: float64
  1405. """
  1406. )
  1407. @Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
  1408. def groupby(
  1409. self,
  1410. by=None,
  1411. axis=0,
  1412. level=None,
  1413. as_index: bool = True,
  1414. sort: bool = True,
  1415. group_keys: bool = True,
  1416. squeeze: bool = False,
  1417. observed: bool = False,
  1418. ) -> "groupby_generic.SeriesGroupBy":
  1419. if level is None and by is None:
  1420. raise TypeError("You have to supply one of 'by' and 'level'")
  1421. axis = self._get_axis_number(axis)
  1422. return groupby_generic.SeriesGroupBy(
  1423. obj=self,
  1424. keys=by,
  1425. axis=axis,
  1426. level=level,
  1427. as_index=as_index,
  1428. sort=sort,
  1429. group_keys=group_keys,
  1430. squeeze=squeeze,
  1431. observed=observed,
  1432. )
  1433. # ----------------------------------------------------------------------
  1434. # Statistics, overridden ndarray methods
  1435. # TODO: integrate bottleneck
  1436. def count(self, level=None):
  1437. """
  1438. Return number of non-NA/null observations in the Series.
  1439. Parameters
  1440. ----------
  1441. level : int or level name, default None
  1442. If the axis is a MultiIndex (hierarchical), count along a
  1443. particular level, collapsing into a smaller Series.
  1444. Returns
  1445. -------
  1446. int or Series (if level specified)
  1447. Number of non-null values in the Series.
  1448. Examples
  1449. --------
  1450. >>> s = pd.Series([0.0, 1.0, np.nan])
  1451. >>> s.count()
  1452. 2
  1453. """
  1454. if level is None:
  1455. return notna(self.array).sum()
  1456. if isinstance(level, str):
  1457. level = self.index._get_level_number(level)
  1458. lev = self.index.levels[level]
  1459. level_codes = np.array(self.index.codes[level], subok=False, copy=True)
  1460. mask = level_codes == -1
  1461. if mask.any():
  1462. level_codes[mask] = cnt = len(lev)
  1463. lev = lev.insert(cnt, lev._na_value)
  1464. obs = level_codes[notna(self.values)]
  1465. out = np.bincount(obs, minlength=len(lev) or None)
  1466. return self._constructor(out, index=lev, dtype="int64").__finalize__(self)
  1467. def mode(self, dropna=True):
  1468. """
  1469. Return the mode(s) of the dataset.
  1470. Always returns Series even if only one value is returned.
  1471. Parameters
  1472. ----------
  1473. dropna : bool, default True
  1474. Don't consider counts of NaN/NaT.
  1475. .. versionadded:: 0.24.0
  1476. Returns
  1477. -------
  1478. Series
  1479. Modes of the Series in sorted order.
  1480. """
  1481. # TODO: Add option for bins like value_counts()
  1482. return algorithms.mode(self, dropna=dropna)
  1483. def unique(self):
  1484. """
  1485. Return unique values of Series object.
  1486. Uniques are returned in order of appearance. Hash table-based unique,
  1487. therefore does NOT sort.
  1488. Returns
  1489. -------
  1490. ndarray or ExtensionArray
  1491. The unique values returned as a NumPy array. See Notes.
  1492. See Also
  1493. --------
  1494. unique : Top-level unique method for any 1-d array-like object.
  1495. Index.unique : Return Index with unique values from an Index object.
  1496. Notes
  1497. -----
  1498. Returns the unique values as a NumPy array. In case of an
  1499. extension-array backed Series, a new
  1500. :class:`~api.extensions.ExtensionArray` of that type with just
  1501. the unique values is returned. This includes
  1502. * Categorical
  1503. * Period
  1504. * Datetime with Timezone
  1505. * Interval
  1506. * Sparse
  1507. * IntegerNA
  1508. See Examples section.
  1509. Examples
  1510. --------
  1511. >>> pd.Series([2, 1, 3, 3], name='A').unique()
  1512. array([2, 1, 3])
  1513. >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
  1514. array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
  1515. >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
  1516. ... for _ in range(3)]).unique()
  1517. <DatetimeArray>
  1518. ['2016-01-01 00:00:00-05:00']
  1519. Length: 1, dtype: datetime64[ns, US/Eastern]
  1520. An unordered Categorical will return categories in the order of
  1521. appearance.
  1522. >>> pd.Series(pd.Categorical(list('baabc'))).unique()
  1523. [b, a, c]
  1524. Categories (3, object): [b, a, c]
  1525. An ordered Categorical preserves the category ordering.
  1526. >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
  1527. ... ordered=True)).unique()
  1528. [b, a, c]
  1529. Categories (3, object): [a < b < c]
  1530. """
  1531. result = super().unique()
  1532. return result
  1533. def drop_duplicates(self, keep="first", inplace=False):
  1534. """
  1535. Return Series with duplicate values removed.
  1536. Parameters
  1537. ----------
  1538. keep : {'first', 'last', ``False``}, default 'first'
  1539. Method to handle dropping duplicates:
  1540. - 'first' : Drop duplicates except for the first occurrence.
  1541. - 'last' : Drop duplicates except for the last occurrence.
  1542. - ``False`` : Drop all duplicates.
  1543. inplace : bool, default ``False``
  1544. If ``True``, performs operation inplace and returns None.
  1545. Returns
  1546. -------
  1547. Series
  1548. Series with duplicates dropped.
  1549. See Also
  1550. --------
  1551. Index.drop_duplicates : Equivalent method on Index.
  1552. DataFrame.drop_duplicates : Equivalent method on DataFrame.
  1553. Series.duplicated : Related method on Series, indicating duplicate
  1554. Series values.
  1555. Examples
  1556. --------
  1557. Generate a Series with duplicated entries.
  1558. >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'],
  1559. ... name='animal')
  1560. >>> s
  1561. 0 lama
  1562. 1 cow
  1563. 2 lama
  1564. 3 beetle
  1565. 4 lama
  1566. 5 hippo
  1567. Name: animal, dtype: object
  1568. With the 'keep' parameter, the selection behaviour of duplicated values
  1569. can be changed. The value 'first' keeps the first occurrence for each
  1570. set of duplicated entries. The default value of keep is 'first'.
  1571. >>> s.drop_duplicates()
  1572. 0 lama
  1573. 1 cow
  1574. 3 beetle
  1575. 5 hippo
  1576. Name: animal, dtype: object
  1577. The value 'last' for parameter 'keep' keeps the last occurrence for
  1578. each set of duplicated entries.
  1579. >>> s.drop_duplicates(keep='last')
  1580. 1 cow
  1581. 3 beetle
  1582. 4 lama
  1583. 5 hippo
  1584. Name: animal, dtype: object
  1585. The value ``False`` for parameter 'keep' discards all sets of
  1586. duplicated entries. Setting the value of 'inplace' to ``True`` performs
  1587. the operation inplace and returns ``None``.
  1588. >>> s.drop_duplicates(keep=False, inplace=True)
  1589. >>> s
  1590. 1 cow
  1591. 3 beetle
  1592. 5 hippo
  1593. Name: animal, dtype: object
  1594. """
  1595. return super().drop_duplicates(keep=keep, inplace=inplace)
  1596. def duplicated(self, keep="first"):
  1597. """
  1598. Indicate duplicate Series values.
  1599. Duplicated values are indicated as ``True`` values in the resulting
  1600. Series. Either all duplicates, all except the first or all except the
  1601. last occurrence of duplicates can be indicated.
  1602. Parameters
  1603. ----------
  1604. keep : {'first', 'last', False}, default 'first'
  1605. Method to handle dropping duplicates:
  1606. - 'first' : Mark duplicates as ``True`` except for the first
  1607. occurrence.
  1608. - 'last' : Mark duplicates as ``True`` except for the last
  1609. occurrence.
  1610. - ``False`` : Mark all duplicates as ``True``.
  1611. Returns
  1612. -------
  1613. Series
  1614. Series indicating whether each value has occurred in the
  1615. preceding values.
  1616. See Also
  1617. --------
  1618. Index.duplicated : Equivalent method on pandas.Index.
  1619. DataFrame.duplicated : Equivalent method on pandas.DataFrame.
  1620. Series.drop_duplicates : Remove duplicate values from Series.
  1621. Examples
  1622. --------
  1623. By default, for each set of duplicated values, the first occurrence is
  1624. set on False and all others on True:
  1625. >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
  1626. >>> animals.duplicated()
  1627. 0 False
  1628. 1 False
  1629. 2 True
  1630. 3 False
  1631. 4 True
  1632. dtype: bool
  1633. which is equivalent to
  1634. >>> animals.duplicated(keep='first')
  1635. 0 False
  1636. 1 False
  1637. 2 True
  1638. 3 False
  1639. 4 True
  1640. dtype: bool
  1641. By using 'last', the last occurrence of each set of duplicated values
  1642. is set on False and all others on True:
  1643. >>> animals.duplicated(keep='last')
  1644. 0 True
  1645. 1 False
  1646. 2 True
  1647. 3 False
  1648. 4 False
  1649. dtype: bool
  1650. By setting keep on ``False``, all duplicates are True:
  1651. >>> animals.duplicated(keep=False)
  1652. 0 True
  1653. 1 False
  1654. 2 True
  1655. 3 False
  1656. 4 True
  1657. dtype: bool
  1658. """
  1659. return super().duplicated(keep=keep)
  1660. def idxmin(self, axis=0, skipna=True, *args, **kwargs):
  1661. """
  1662. Return the row label of the minimum value.
  1663. If multiple values equal the minimum, the first row label with that
  1664. value is returned.
  1665. Parameters
  1666. ----------
  1667. axis : int, default 0
  1668. For compatibility with DataFrame.idxmin. Redundant for application
  1669. on Series.
  1670. skipna : bool, default True
  1671. Exclude NA/null values. If the entire Series is NA, the result
  1672. will be NA.
  1673. *args, **kwargs
  1674. Additional arguments and keywords have no effect but might be
  1675. accepted for compatibility with NumPy.
  1676. Returns
  1677. -------
  1678. Index
  1679. Label of the minimum value.
  1680. Raises
  1681. ------
  1682. ValueError
  1683. If the Series is empty.
  1684. See Also
  1685. --------
  1686. numpy.argmin : Return indices of the minimum values
  1687. along the given axis.
  1688. DataFrame.idxmin : Return index of first occurrence of minimum
  1689. over requested axis.
  1690. Series.idxmax : Return index *label* of the first occurrence
  1691. of maximum of values.
  1692. Notes
  1693. -----
  1694. This method is the Series version of ``ndarray.argmin``. This method
  1695. returns the label of the minimum, while ``ndarray.argmin`` returns
  1696. the position. To get the position, use ``series.values.argmin()``.
  1697. Examples
  1698. --------
  1699. >>> s = pd.Series(data=[1, None, 4, 1],
  1700. ... index=['A', 'B', 'C', 'D'])
  1701. >>> s
  1702. A 1.0
  1703. B NaN
  1704. C 4.0
  1705. D 1.0
  1706. dtype: float64
  1707. >>> s.idxmin()
  1708. 'A'
  1709. If `skipna` is False and there is an NA value in the data,
  1710. the function returns ``nan``.
  1711. >>> s.idxmin(skipna=False)
  1712. nan
  1713. """
  1714. skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
  1715. i = nanops.nanargmin(com.values_from_object(self), skipna=skipna)
  1716. if i == -1:
  1717. return np.nan
  1718. return self.index[i]
  1719. def idxmax(self, axis=0, skipna=True, *args, **kwargs):
  1720. """
  1721. Return the row label of the maximum value.
  1722. If multiple values equal the maximum, the first row label with that
  1723. value is returned.
  1724. Parameters
  1725. ----------
  1726. axis : int, default 0
  1727. For compatibility with DataFrame.idxmax. Redundant for application
  1728. on Series.
  1729. skipna : bool, default True
  1730. Exclude NA/null values. If the entire Series is NA, the result
  1731. will be NA.
  1732. *args, **kwargs
  1733. Additional arguments and keywords have no effect but might be
  1734. accepted for compatibility with NumPy.
  1735. Returns
  1736. -------
  1737. Index
  1738. Label of the maximum value.
  1739. Raises
  1740. ------
  1741. ValueError
  1742. If the Series is empty.
  1743. See Also
  1744. --------
  1745. numpy.argmax : Return indices of the maximum values
  1746. along the given axis.
  1747. DataFrame.idxmax : Return index of first occurrence of maximum
  1748. over requested axis.
  1749. Series.idxmin : Return index *label* of the first occurrence
  1750. of minimum of values.
  1751. Notes
  1752. -----
  1753. This method is the Series version of ``ndarray.argmax``. This method
  1754. returns the label of the maximum, while ``ndarray.argmax`` returns
  1755. the position. To get the position, use ``series.values.argmax()``.
  1756. Examples
  1757. --------
  1758. >>> s = pd.Series(data=[1, None, 4, 3, 4],
  1759. ... index=['A', 'B', 'C', 'D', 'E'])
  1760. >>> s
  1761. A 1.0
  1762. B NaN
  1763. C 4.0
  1764. D 3.0
  1765. E 4.0
  1766. dtype: float64
  1767. >>> s.idxmax()
  1768. 'C'
  1769. If `skipna` is False and there is an NA value in the data,
  1770. the function returns ``nan``.
  1771. >>> s.idxmax(skipna=False)
  1772. nan
  1773. """
  1774. skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
  1775. i = nanops.nanargmax(com.values_from_object(self), skipna=skipna)
  1776. if i == -1:
  1777. return np.nan
  1778. return self.index[i]
  1779. def round(self, decimals=0, *args, **kwargs):
  1780. """
  1781. Round each value in a Series to the given number of decimals.
  1782. Parameters
  1783. ----------
  1784. decimals : int, default 0
  1785. Number of decimal places to round to. If decimals is negative,
  1786. it specifies the number of positions to the left of the decimal point.
  1787. Returns
  1788. -------
  1789. Series
  1790. Rounded values of the Series.
  1791. See Also
  1792. --------
  1793. numpy.around : Round values of an np.array.
  1794. DataFrame.round : Round values of a DataFrame.
  1795. Examples
  1796. --------
  1797. >>> s = pd.Series([0.1, 1.3, 2.7])
  1798. >>> s.round()
  1799. 0 0.0
  1800. 1 1.0
  1801. 2 3.0
  1802. dtype: float64
  1803. """
  1804. nv.validate_round(args, kwargs)
  1805. result = com.values_from_object(self).round(decimals)
  1806. result = self._constructor(result, index=self.index).__finalize__(self)
  1807. return result
  1808. def quantile(self, q=0.5, interpolation="linear"):
  1809. """
  1810. Return value at the given quantile.
  1811. Parameters
  1812. ----------
  1813. q : float or array-like, default 0.5 (50% quantile)
  1814. The quantile(s) to compute, which can lie in range: 0 <= q <= 1.
  1815. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
  1816. This optional parameter specifies the interpolation method to use,
  1817. when the desired quantile lies between two data points `i` and `j`:
  1818. * linear: `i + (j - i) * fraction`, where `fraction` is the
  1819. fractional part of the index surrounded by `i` and `j`.
  1820. * lower: `i`.
  1821. * higher: `j`.
  1822. * nearest: `i` or `j` whichever is nearest.
  1823. * midpoint: (`i` + `j`) / 2.
  1824. Returns
  1825. -------
  1826. float or Series
  1827. If ``q`` is an array, a Series will be returned where the
  1828. index is ``q`` and the values are the quantiles, otherwise
  1829. a float will be returned.
  1830. See Also
  1831. --------
  1832. core.window.Rolling.quantile
  1833. numpy.percentile
  1834. Examples
  1835. --------
  1836. >>> s = pd.Series([1, 2, 3, 4])
  1837. >>> s.quantile(.5)
  1838. 2.5
  1839. >>> s.quantile([.25, .5, .75])
  1840. 0.25 1.75
  1841. 0.50 2.50
  1842. 0.75 3.25
  1843. dtype: float64
  1844. """
  1845. validate_percentile(q)
  1846. # We dispatch to DataFrame so that core.internals only has to worry
  1847. # about 2D cases.
  1848. df = self.to_frame()
  1849. result = df.quantile(q=q, interpolation=interpolation, numeric_only=False)
  1850. if result.ndim == 2:
  1851. result = result.iloc[:, 0]
  1852. if is_list_like(q):
  1853. result.name = self.name
  1854. return self._constructor(result, index=Float64Index(q), name=self.name)
  1855. else:
  1856. # scalar
  1857. return result.iloc[0]
  1858. def corr(self, other, method="pearson", min_periods=None):
  1859. """
  1860. Compute correlation with `other` Series, excluding missing values.
  1861. Parameters
  1862. ----------
  1863. other : Series
  1864. Series with which to compute the correlation.
  1865. method : {'pearson', 'kendall', 'spearman'} or callable
  1866. Method used to compute correlation:
  1867. - pearson : Standard correlation coefficient
  1868. - kendall : Kendall Tau correlation coefficient
  1869. - spearman : Spearman rank correlation
  1870. - callable: Callable with input two 1d ndarrays and returning a float.
  1871. .. versionadded:: 0.24.0
  1872. Note that the returned matrix from corr will have 1 along the
  1873. diagonals and will be symmetric regardless of the callable's
  1874. behavior.
  1875. min_periods : int, optional
  1876. Minimum number of observations needed to have a valid result.
  1877. Returns
  1878. -------
  1879. float
  1880. Correlation with other.
  1881. Examples
  1882. --------
  1883. >>> def histogram_intersection(a, b):
  1884. ... v = np.minimum(a, b).sum().round(decimals=1)
  1885. ... return v
  1886. >>> s1 = pd.Series([.2, .0, .6, .2])
  1887. >>> s2 = pd.Series([.3, .6, .0, .1])
  1888. >>> s1.corr(s2, method=histogram_intersection)
  1889. 0.3
  1890. """
  1891. this, other = self.align(other, join="inner", copy=False)
  1892. if len(this) == 0:
  1893. return np.nan
  1894. if method in ["pearson", "spearman", "kendall"] or callable(method):
  1895. return nanops.nancorr(
  1896. this.values, other.values, method=method, min_periods=min_periods
  1897. )
  1898. raise ValueError(
  1899. "method must be either 'pearson', "
  1900. "'spearman', 'kendall', or a callable, "
  1901. f"'{method}' was supplied"
  1902. )
  1903. def cov(self, other, min_periods=None):
  1904. """
  1905. Compute covariance with Series, excluding missing values.
  1906. Parameters
  1907. ----------
  1908. other : Series
  1909. Series with which to compute the covariance.
  1910. min_periods : int, optional
  1911. Minimum number of observations needed to have a valid result.
  1912. Returns
  1913. -------
  1914. float
  1915. Covariance between Series and other normalized by N-1
  1916. (unbiased estimator).
  1917. Examples
  1918. --------
  1919. >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035])
  1920. >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198])
  1921. >>> s1.cov(s2)
  1922. -0.01685762652715874
  1923. """
  1924. this, other = self.align(other, join="inner", copy=False)
  1925. if len(this) == 0:
  1926. return np.nan
  1927. return nanops.nancov(this.values, other.values, min_periods=min_periods)
  1928. def diff(self, periods=1):
  1929. """
  1930. First discrete difference of element.
  1931. Calculates the difference of a Series element compared with another
  1932. element in the Series (default is element in previous row).
  1933. Parameters
  1934. ----------
  1935. periods : int, default 1
  1936. Periods to shift for calculating difference, accepts negative
  1937. values.
  1938. Returns
  1939. -------
  1940. Series
  1941. First differences of the Series.
  1942. See Also
  1943. --------
  1944. Series.pct_change: Percent change over given number of periods.
  1945. Series.shift: Shift index by desired number of periods with an
  1946. optional time freq.
  1947. DataFrame.diff: First discrete difference of object.
  1948. Notes
  1949. -----
  1950. For boolean dtypes, this uses :meth:`operator.xor` rather than
  1951. :meth:`operator.sub`.
  1952. Examples
  1953. --------
  1954. Difference with previous row
  1955. >>> s = pd.Series([1, 1, 2, 3, 5, 8])
  1956. >>> s.diff()
  1957. 0 NaN
  1958. 1 0.0
  1959. 2 1.0
  1960. 3 1.0
  1961. 4 2.0
  1962. 5 3.0
  1963. dtype: float64
  1964. Difference with 3rd previous row
  1965. >>> s.diff(periods=3)
  1966. 0 NaN
  1967. 1 NaN
  1968. 2 NaN
  1969. 3 2.0
  1970. 4 4.0
  1971. 5 6.0
  1972. dtype: float64
  1973. Difference with following row
  1974. >>> s.diff(periods=-1)
  1975. 0 0.0
  1976. 1 -1.0
  1977. 2 -1.0
  1978. 3 -2.0
  1979. 4 -3.0
  1980. 5 NaN
  1981. dtype: float64
  1982. """
  1983. result = algorithms.diff(self.array, periods)
  1984. return self._constructor(result, index=self.index).__finalize__(self)
  1985. def autocorr(self, lag=1):
  1986. """
  1987. Compute the lag-N autocorrelation.
  1988. This method computes the Pearson correlation between
  1989. the Series and its shifted self.
  1990. Parameters
  1991. ----------
  1992. lag : int, default 1
  1993. Number of lags to apply before performing autocorrelation.
  1994. Returns
  1995. -------
  1996. float
  1997. The Pearson correlation between self and self.shift(lag).
  1998. See Also
  1999. --------
  2000. Series.corr : Compute the correlation between two Series.
  2001. Series.shift : Shift index by desired number of periods.
  2002. DataFrame.corr : Compute pairwise correlation of columns.
  2003. DataFrame.corrwith : Compute pairwise correlation between rows or
  2004. columns of two DataFrame objects.
  2005. Notes
  2006. -----
  2007. If the Pearson correlation is not well defined return 'NaN'.
  2008. Examples
  2009. --------
  2010. >>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
  2011. >>> s.autocorr() # doctest: +ELLIPSIS
  2012. 0.10355...
  2013. >>> s.autocorr(lag=2) # doctest: +ELLIPSIS
  2014. -0.99999...
  2015. If the Pearson correlation is not well defined, then 'NaN' is returned.
  2016. >>> s = pd.Series([1, 0, 0, 0])
  2017. >>> s.autocorr()
  2018. nan
  2019. """
  2020. return self.corr(self.shift(lag))
  2021. def dot(self, other):
  2022. """
  2023. Compute the dot product between the Series and the columns of other.
  2024. This method computes the dot product between the Series and another
  2025. one, or the Series and each columns of a DataFrame, or the Series and
  2026. each columns of an array.
  2027. It can also be called using `self @ other` in Python >= 3.5.
  2028. Parameters
  2029. ----------
  2030. other : Series, DataFrame or array-like
  2031. The other object to compute the dot product with its columns.
  2032. Returns
  2033. -------
  2034. scalar, Series or numpy.ndarray
  2035. Return the dot product of the Series and other if other is a
  2036. Series, the Series of the dot product of Series and each rows of
  2037. other if other is a DataFrame or a numpy.ndarray between the Series
  2038. and each columns of the numpy array.
  2039. See Also
  2040. --------
  2041. DataFrame.dot: Compute the matrix product with the DataFrame.
  2042. Series.mul: Multiplication of series and other, element-wise.
  2043. Notes
  2044. -----
  2045. The Series and other has to share the same index if other is a Series
  2046. or a DataFrame.
  2047. Examples
  2048. --------
  2049. >>> s = pd.Series([0, 1, 2, 3])
  2050. >>> other = pd.Series([-1, 2, -3, 4])
  2051. >>> s.dot(other)
  2052. 8
  2053. >>> s @ other
  2054. 8
  2055. >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
  2056. >>> s.dot(df)
  2057. 0 24
  2058. 1 14
  2059. dtype: int64
  2060. >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]])
  2061. >>> s.dot(arr)
  2062. array([24, 14])
  2063. """
  2064. if isinstance(other, (Series, ABCDataFrame)):
  2065. common = self.index.union(other.index)
  2066. if len(common) > len(self.index) or len(common) > len(other.index):
  2067. raise ValueError("matrices are not aligned")
  2068. left = self.reindex(index=common, copy=False)
  2069. right = other.reindex(index=common, copy=False)
  2070. lvals = left.values
  2071. rvals = right.values
  2072. else:
  2073. lvals = self.values
  2074. rvals = np.asarray(other)
  2075. if lvals.shape[0] != rvals.shape[0]:
  2076. raise Exception(
  2077. f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
  2078. )
  2079. if isinstance(other, ABCDataFrame):
  2080. return self._constructor(
  2081. np.dot(lvals, rvals), index=other.columns
  2082. ).__finalize__(self)
  2083. elif isinstance(other, Series):
  2084. return np.dot(lvals, rvals)
  2085. elif isinstance(rvals, np.ndarray):
  2086. return np.dot(lvals, rvals)
  2087. else: # pragma: no cover
  2088. raise TypeError(f"unsupported type: {type(other)}")
  2089. def __matmul__(self, other):
  2090. """
  2091. Matrix multiplication using binary `@` operator in Python>=3.5.
  2092. """
  2093. return self.dot(other)
  2094. def __rmatmul__(self, other):
  2095. """
  2096. Matrix multiplication using binary `@` operator in Python>=3.5.
  2097. """
  2098. return self.dot(np.transpose(other))
  2099. @Substitution(klass="Series")
  2100. @Appender(base._shared_docs["searchsorted"])
  2101. def searchsorted(self, value, side="left", sorter=None):
  2102. return algorithms.searchsorted(self._values, value, side=side, sorter=sorter)
  2103. # -------------------------------------------------------------------
  2104. # Combination
  2105. def append(self, to_append, ignore_index=False, verify_integrity=False):
  2106. """
  2107. Concatenate two or more Series.
  2108. Parameters
  2109. ----------
  2110. to_append : Series or list/tuple of Series
  2111. Series to append with self.
  2112. ignore_index : bool, default False
  2113. If True, do not use the index labels.
  2114. verify_integrity : bool, default False
  2115. If True, raise Exception on creating index with duplicates.
  2116. Returns
  2117. -------
  2118. Series
  2119. Concatenated Series.
  2120. See Also
  2121. --------
  2122. concat : General function to concatenate DataFrame or Series objects.
  2123. Notes
  2124. -----
  2125. Iteratively appending to a Series can be more computationally intensive
  2126. than a single concatenate. A better solution is to append values to a
  2127. list and then concatenate the list with the original Series all at
  2128. once.
  2129. Examples
  2130. --------
  2131. >>> s1 = pd.Series([1, 2, 3])
  2132. >>> s2 = pd.Series([4, 5, 6])
  2133. >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5])
  2134. >>> s1.append(s2)
  2135. 0 1
  2136. 1 2
  2137. 2 3
  2138. 0 4
  2139. 1 5
  2140. 2 6
  2141. dtype: int64
  2142. >>> s1.append(s3)
  2143. 0 1
  2144. 1 2
  2145. 2 3
  2146. 3 4
  2147. 4 5
  2148. 5 6
  2149. dtype: int64
  2150. With `ignore_index` set to True:
  2151. >>> s1.append(s2, ignore_index=True)
  2152. 0 1
  2153. 1 2
  2154. 2 3
  2155. 3 4
  2156. 4 5
  2157. 5 6
  2158. dtype: int64
  2159. With `verify_integrity` set to True:
  2160. >>> s1.append(s2, verify_integrity=True)
  2161. Traceback (most recent call last):
  2162. ...
  2163. ValueError: Indexes have overlapping values: [0, 1, 2]
  2164. """
  2165. from pandas.core.reshape.concat import concat
  2166. if isinstance(to_append, (list, tuple)):
  2167. to_concat = [self]
  2168. to_concat.extend(to_append)
  2169. else:
  2170. to_concat = [self, to_append]
  2171. return concat(
  2172. to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity
  2173. )
  2174. def _binop(self, other, func, level=None, fill_value=None):
  2175. """
  2176. Perform generic binary operation with optional fill value.
  2177. Parameters
  2178. ----------
  2179. other : Series
  2180. func : binary operator
  2181. fill_value : float or object
  2182. Value to substitute for NA/null values. If both Series are NA in a
  2183. location, the result will be NA regardless of the passed fill value.
  2184. level : int or level name, default None
  2185. Broadcast across a level, matching Index values on the
  2186. passed MultiIndex level.
  2187. Returns
  2188. -------
  2189. Series
  2190. """
  2191. if not isinstance(other, Series):
  2192. raise AssertionError("Other operand must be Series")
  2193. new_index = self.index
  2194. this = self
  2195. if not self.index.equals(other.index):
  2196. this, other = self.align(other, level=level, join="outer", copy=False)
  2197. new_index = this.index
  2198. this_vals, other_vals = ops.fill_binop(this.values, other.values, fill_value)
  2199. with np.errstate(all="ignore"):
  2200. result = func(this_vals, other_vals)
  2201. name = ops.get_op_result_name(self, other)
  2202. ret = ops._construct_result(self, result, new_index, name)
  2203. return ret
  2204. def combine(self, other, func, fill_value=None):
  2205. """
  2206. Combine the Series with a Series or scalar according to `func`.
  2207. Combine the Series and `other` using `func` to perform elementwise
  2208. selection for combined Series.
  2209. `fill_value` is assumed when value is missing at some index
  2210. from one of the two objects being combined.
  2211. Parameters
  2212. ----------
  2213. other : Series or scalar
  2214. The value(s) to be combined with the `Series`.
  2215. func : function
  2216. Function that takes two scalars as inputs and returns an element.
  2217. fill_value : scalar, optional
  2218. The value to assume when an index is missing from
  2219. one Series or the other. The default specifies to use the
  2220. appropriate NaN value for the underlying dtype of the Series.
  2221. Returns
  2222. -------
  2223. Series
  2224. The result of combining the Series with the other object.
  2225. See Also
  2226. --------
  2227. Series.combine_first : Combine Series values, choosing the calling
  2228. Series' values first.
  2229. Examples
  2230. --------
  2231. Consider 2 Datasets ``s1`` and ``s2`` containing
  2232. highest clocked speeds of different birds.
  2233. >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
  2234. >>> s1
  2235. falcon 330.0
  2236. eagle 160.0
  2237. dtype: float64
  2238. >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
  2239. >>> s2
  2240. falcon 345.0
  2241. eagle 200.0
  2242. duck 30.0
  2243. dtype: float64
  2244. Now, to combine the two datasets and view the highest speeds
  2245. of the birds across the two datasets
  2246. >>> s1.combine(s2, max)
  2247. duck NaN
  2248. eagle 200.0
  2249. falcon 345.0
  2250. dtype: float64
  2251. In the previous example, the resulting value for duck is missing,
  2252. because the maximum of a NaN and a float is a NaN.
  2253. So, in the example, we set ``fill_value=0``,
  2254. so the maximum value returned will be the value from some dataset.
  2255. >>> s1.combine(s2, max, fill_value=0)
  2256. duck 30.0
  2257. eagle 200.0
  2258. falcon 345.0
  2259. dtype: float64
  2260. """
  2261. if fill_value is None:
  2262. fill_value = na_value_for_dtype(self.dtype, compat=False)
  2263. if isinstance(other, Series):
  2264. # If other is a Series, result is based on union of Series,
  2265. # so do this element by element
  2266. new_index = self.index.union(other.index)
  2267. new_name = ops.get_op_result_name(self, other)
  2268. new_values = []
  2269. for idx in new_index:
  2270. lv = self.get(idx, fill_value)
  2271. rv = other.get(idx, fill_value)
  2272. with np.errstate(all="ignore"):
  2273. new_values.append(func(lv, rv))
  2274. else:
  2275. # Assume that other is a scalar, so apply the function for
  2276. # each element in the Series
  2277. new_index = self.index
  2278. with np.errstate(all="ignore"):
  2279. new_values = [func(lv, other) for lv in self._values]
  2280. new_name = self.name
  2281. if is_categorical_dtype(self.values):
  2282. pass
  2283. elif is_extension_array_dtype(self.values):
  2284. # The function can return something of any type, so check
  2285. # if the type is compatible with the calling EA.
  2286. new_values = try_cast_to_ea(self._values, new_values)
  2287. return self._constructor(new_values, index=new_index, name=new_name)
  2288. def combine_first(self, other):
  2289. """
  2290. Combine Series values, choosing the calling Series's values first.
  2291. Parameters
  2292. ----------
  2293. other : Series
  2294. The value(s) to be combined with the `Series`.
  2295. Returns
  2296. -------
  2297. Series
  2298. The result of combining the Series with the other object.
  2299. See Also
  2300. --------
  2301. Series.combine : Perform elementwise operation on two Series
  2302. using a given function.
  2303. Notes
  2304. -----
  2305. Result index will be the union of the two indexes.
  2306. Examples
  2307. --------
  2308. >>> s1 = pd.Series([1, np.nan])
  2309. >>> s2 = pd.Series([3, 4])
  2310. >>> s1.combine_first(s2)
  2311. 0 1.0
  2312. 1 4.0
  2313. dtype: float64
  2314. """
  2315. new_index = self.index.union(other.index)
  2316. this = self.reindex(new_index, copy=False)
  2317. other = other.reindex(new_index, copy=False)
  2318. if this.dtype.kind == "M" and other.dtype.kind != "M":
  2319. other = to_datetime(other)
  2320. return this.where(notna(this), other)
  2321. def update(self, other):
  2322. """
  2323. Modify Series in place using non-NA values from passed
  2324. Series. Aligns on index.
  2325. Parameters
  2326. ----------
  2327. other : Series
  2328. Examples
  2329. --------
  2330. >>> s = pd.Series([1, 2, 3])
  2331. >>> s.update(pd.Series([4, 5, 6]))
  2332. >>> s
  2333. 0 4
  2334. 1 5
  2335. 2 6
  2336. dtype: int64
  2337. >>> s = pd.Series(['a', 'b', 'c'])
  2338. >>> s.update(pd.Series(['d', 'e'], index=[0, 2]))
  2339. >>> s
  2340. 0 d
  2341. 1 b
  2342. 2 e
  2343. dtype: object
  2344. >>> s = pd.Series([1, 2, 3])
  2345. >>> s.update(pd.Series([4, 5, 6, 7, 8]))
  2346. >>> s
  2347. 0 4
  2348. 1 5
  2349. 2 6
  2350. dtype: int64
  2351. If ``other`` contains NaNs the corresponding values are not updated
  2352. in the original Series.
  2353. >>> s = pd.Series([1, 2, 3])
  2354. >>> s.update(pd.Series([4, np.nan, 6]))
  2355. >>> s
  2356. 0 4
  2357. 1 2
  2358. 2 6
  2359. dtype: int64
  2360. """
  2361. other = other.reindex_like(self)
  2362. mask = notna(other)
  2363. self._data = self._data.putmask(mask=mask, new=other, inplace=True)
  2364. self._maybe_update_cacher()
  2365. # ----------------------------------------------------------------------
  2366. # Reindexing, sorting
  2367. def sort_values(
  2368. self,
  2369. axis=0,
  2370. ascending=True,
  2371. inplace=False,
  2372. kind="quicksort",
  2373. na_position="last",
  2374. ignore_index=False,
  2375. ):
  2376. """
  2377. Sort by the values.
  2378. Sort a Series in ascending or descending order by some
  2379. criterion.
  2380. Parameters
  2381. ----------
  2382. axis : {0 or 'index'}, default 0
  2383. Axis to direct sorting. The value 'index' is accepted for
  2384. compatibility with DataFrame.sort_values.
  2385. ascending : bool, default True
  2386. If True, sort values in ascending order, otherwise descending.
  2387. inplace : bool, default False
  2388. If True, perform operation in-place.
  2389. kind : {'quicksort', 'mergesort' or 'heapsort'}, default 'quicksort'
  2390. Choice of sorting algorithm. See also :func:`numpy.sort` for more
  2391. information. 'mergesort' is the only stable algorithm.
  2392. na_position : {'first' or 'last'}, default 'last'
  2393. Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
  2394. the end.
  2395. ignore_index : bool, default False
  2396. If True, the resulting axis will be labeled 0, 1, …, n - 1.
  2397. .. versionadded:: 1.0.0
  2398. Returns
  2399. -------
  2400. Series
  2401. Series ordered by values.
  2402. See Also
  2403. --------
  2404. Series.sort_index : Sort by the Series indices.
  2405. DataFrame.sort_values : Sort DataFrame by the values along either axis.
  2406. DataFrame.sort_index : Sort DataFrame by indices.
  2407. Examples
  2408. --------
  2409. >>> s = pd.Series([np.nan, 1, 3, 10, 5])
  2410. >>> s
  2411. 0 NaN
  2412. 1 1.0
  2413. 2 3.0
  2414. 3 10.0
  2415. 4 5.0
  2416. dtype: float64
  2417. Sort values ascending order (default behaviour)
  2418. >>> s.sort_values(ascending=True)
  2419. 1 1.0
  2420. 2 3.0
  2421. 4 5.0
  2422. 3 10.0
  2423. 0 NaN
  2424. dtype: float64
  2425. Sort values descending order
  2426. >>> s.sort_values(ascending=False)
  2427. 3 10.0
  2428. 4 5.0
  2429. 2 3.0
  2430. 1 1.0
  2431. 0 NaN
  2432. dtype: float64
  2433. Sort values inplace
  2434. >>> s.sort_values(ascending=False, inplace=True)
  2435. >>> s
  2436. 3 10.0
  2437. 4 5.0
  2438. 2 3.0
  2439. 1 1.0
  2440. 0 NaN
  2441. dtype: float64
  2442. Sort values putting NAs first
  2443. >>> s.sort_values(na_position='first')
  2444. 0 NaN
  2445. 1 1.0
  2446. 2 3.0
  2447. 4 5.0
  2448. 3 10.0
  2449. dtype: float64
  2450. Sort a series of strings
  2451. >>> s = pd.Series(['z', 'b', 'd', 'a', 'c'])
  2452. >>> s
  2453. 0 z
  2454. 1 b
  2455. 2 d
  2456. 3 a
  2457. 4 c
  2458. dtype: object
  2459. >>> s.sort_values()
  2460. 3 a
  2461. 1 b
  2462. 4 c
  2463. 2 d
  2464. 0 z
  2465. dtype: object
  2466. """
  2467. inplace = validate_bool_kwarg(inplace, "inplace")
  2468. # Validate the axis parameter
  2469. self._get_axis_number(axis)
  2470. # GH 5856/5853
  2471. if inplace and self._is_cached:
  2472. raise ValueError(
  2473. "This Series is a view of some other array, to "
  2474. "sort in-place you must create a copy"
  2475. )
  2476. def _try_kind_sort(arr):
  2477. # easier to ask forgiveness than permission
  2478. try:
  2479. # if kind==mergesort, it can fail for object dtype
  2480. return arr.argsort(kind=kind)
  2481. except TypeError:
  2482. # stable sort not available for object dtype
  2483. # uses the argsort default quicksort
  2484. return arr.argsort(kind="quicksort")
  2485. arr = self._values
  2486. sorted_index = np.empty(len(self), dtype=np.int32)
  2487. bad = isna(arr)
  2488. good = ~bad
  2489. idx = ibase.default_index(len(self))
  2490. argsorted = _try_kind_sort(arr[good])
  2491. if is_list_like(ascending):
  2492. if len(ascending) != 1:
  2493. raise ValueError(
  2494. f"Length of ascending ({len(ascending)}) must be 1 for Series"
  2495. )
  2496. ascending = ascending[0]
  2497. if not is_bool(ascending):
  2498. raise ValueError("ascending must be boolean")
  2499. if not ascending:
  2500. argsorted = argsorted[::-1]
  2501. if na_position == "last":
  2502. n = good.sum()
  2503. sorted_index[:n] = idx[good][argsorted]
  2504. sorted_index[n:] = idx[bad]
  2505. elif na_position == "first":
  2506. n = bad.sum()
  2507. sorted_index[n:] = idx[good][argsorted]
  2508. sorted_index[:n] = idx[bad]
  2509. else:
  2510. raise ValueError(f"invalid na_position: {na_position}")
  2511. result = self._constructor(arr[sorted_index], index=self.index[sorted_index])
  2512. if ignore_index:
  2513. result.index = ibase.default_index(len(sorted_index))
  2514. if inplace:
  2515. self._update_inplace(result)
  2516. else:
  2517. return result.__finalize__(self)
  2518. def sort_index(
  2519. self,
  2520. axis=0,
  2521. level=None,
  2522. ascending=True,
  2523. inplace=False,
  2524. kind="quicksort",
  2525. na_position="last",
  2526. sort_remaining=True,
  2527. ignore_index: bool = False,
  2528. ):
  2529. """
  2530. Sort Series by index labels.
  2531. Returns a new Series sorted by label if `inplace` argument is
  2532. ``False``, otherwise updates the original series and returns None.
  2533. Parameters
  2534. ----------
  2535. axis : int, default 0
  2536. Axis to direct sorting. This can only be 0 for Series.
  2537. level : int, optional
  2538. If not None, sort on values in specified index level(s).
  2539. ascending : bool, default true
  2540. Sort ascending vs. descending.
  2541. inplace : bool, default False
  2542. If True, perform operation in-place.
  2543. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
  2544. Choice of sorting algorithm. See also :func:`numpy.sort` for more
  2545. information. 'mergesort' is the only stable algorithm. For
  2546. DataFrames, this option is only applied when sorting on a single
  2547. column or label.
  2548. na_position : {'first', 'last'}, default 'last'
  2549. If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
  2550. Not implemented for MultiIndex.
  2551. sort_remaining : bool, default True
  2552. If True and sorting by level and index is multilevel, sort by other
  2553. levels too (in order) after sorting by specified level.
  2554. ignore_index : bool, default False
  2555. If True, the resulting axis will be labeled 0, 1, …, n - 1.
  2556. .. versionadded:: 1.0.0
  2557. Returns
  2558. -------
  2559. Series
  2560. The original Series sorted by the labels.
  2561. See Also
  2562. --------
  2563. DataFrame.sort_index: Sort DataFrame by the index.
  2564. DataFrame.sort_values: Sort DataFrame by the value.
  2565. Series.sort_values : Sort Series by the value.
  2566. Examples
  2567. --------
  2568. >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4])
  2569. >>> s.sort_index()
  2570. 1 c
  2571. 2 b
  2572. 3 a
  2573. 4 d
  2574. dtype: object
  2575. Sort Descending
  2576. >>> s.sort_index(ascending=False)
  2577. 4 d
  2578. 3 a
  2579. 2 b
  2580. 1 c
  2581. dtype: object
  2582. Sort Inplace
  2583. >>> s.sort_index(inplace=True)
  2584. >>> s
  2585. 1 c
  2586. 2 b
  2587. 3 a
  2588. 4 d
  2589. dtype: object
  2590. By default NaNs are put at the end, but use `na_position` to place
  2591. them at the beginning
  2592. >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan])
  2593. >>> s.sort_index(na_position='first')
  2594. NaN d
  2595. 1.0 c
  2596. 2.0 b
  2597. 3.0 a
  2598. dtype: object
  2599. Specify index level to sort
  2600. >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',
  2601. ... 'baz', 'baz', 'bar', 'bar']),
  2602. ... np.array(['two', 'one', 'two', 'one',
  2603. ... 'two', 'one', 'two', 'one'])]
  2604. >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays)
  2605. >>> s.sort_index(level=1)
  2606. bar one 8
  2607. baz one 6
  2608. foo one 4
  2609. qux one 2
  2610. bar two 7
  2611. baz two 5
  2612. foo two 3
  2613. qux two 1
  2614. dtype: int64
  2615. Does not sort by remaining levels when sorting by levels
  2616. >>> s.sort_index(level=1, sort_remaining=False)
  2617. qux one 2
  2618. foo one 4
  2619. baz one 6
  2620. bar one 8
  2621. qux two 1
  2622. foo two 3
  2623. baz two 5
  2624. bar two 7
  2625. dtype: int64
  2626. """
  2627. # TODO: this can be combined with DataFrame.sort_index impl as
  2628. # almost identical
  2629. inplace = validate_bool_kwarg(inplace, "inplace")
  2630. # Validate the axis parameter
  2631. self._get_axis_number(axis)
  2632. index = self.index
  2633. if level is not None:
  2634. new_index, indexer = index.sortlevel(
  2635. level, ascending=ascending, sort_remaining=sort_remaining
  2636. )
  2637. elif isinstance(index, MultiIndex):
  2638. from pandas.core.sorting import lexsort_indexer
  2639. labels = index._sort_levels_monotonic()
  2640. indexer = lexsort_indexer(
  2641. labels._get_codes_for_sorting(),
  2642. orders=ascending,
  2643. na_position=na_position,
  2644. )
  2645. else:
  2646. from pandas.core.sorting import nargsort
  2647. # Check monotonic-ness before sort an index
  2648. # GH11080
  2649. if (ascending and index.is_monotonic_increasing) or (
  2650. not ascending and index.is_monotonic_decreasing
  2651. ):
  2652. if inplace:
  2653. return
  2654. else:
  2655. return self.copy()
  2656. indexer = nargsort(
  2657. index, kind=kind, ascending=ascending, na_position=na_position
  2658. )
  2659. indexer = ensure_platform_int(indexer)
  2660. new_index = index.take(indexer)
  2661. new_index = new_index._sort_levels_monotonic()
  2662. new_values = self._values.take(indexer)
  2663. result = self._constructor(new_values, index=new_index)
  2664. if ignore_index:
  2665. result.index = ibase.default_index(len(result))
  2666. if inplace:
  2667. self._update_inplace(result)
  2668. else:
  2669. return result.__finalize__(self)
  2670. def argsort(self, axis=0, kind="quicksort", order=None):
  2671. """
  2672. Override ndarray.argsort. Argsorts the value, omitting NA/null values,
  2673. and places the result in the same locations as the non-NA values.
  2674. Parameters
  2675. ----------
  2676. axis : {0 or "index"}
  2677. Has no effect but is accepted for compatibility with numpy.
  2678. kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
  2679. Choice of sorting algorithm. See np.sort for more
  2680. information. 'mergesort' is the only stable algorithm.
  2681. order : None
  2682. Has no effect but is accepted for compatibility with numpy.
  2683. Returns
  2684. -------
  2685. Series
  2686. Positions of values within the sort order with -1 indicating
  2687. nan values.
  2688. See Also
  2689. --------
  2690. numpy.ndarray.argsort
  2691. """
  2692. values = self._values
  2693. mask = isna(values)
  2694. if mask.any():
  2695. result = Series(-1, index=self.index, name=self.name, dtype="int64")
  2696. notmask = ~mask
  2697. result[notmask] = np.argsort(values[notmask], kind=kind)
  2698. return self._constructor(result, index=self.index).__finalize__(self)
  2699. else:
  2700. return self._constructor(
  2701. np.argsort(values, kind=kind), index=self.index, dtype="int64"
  2702. ).__finalize__(self)
  2703. def nlargest(self, n=5, keep="first"):
  2704. """
  2705. Return the largest `n` elements.
  2706. Parameters
  2707. ----------
  2708. n : int, default 5
  2709. Return this many descending sorted values.
  2710. keep : {'first', 'last', 'all'}, default 'first'
  2711. When there are duplicate values that cannot all fit in a
  2712. Series of `n` elements:
  2713. - ``first`` : return the first `n` occurrences in order
  2714. of appearance.
  2715. - ``last`` : return the last `n` occurrences in reverse
  2716. order of appearance.
  2717. - ``all`` : keep all occurrences. This can result in a Series of
  2718. size larger than `n`.
  2719. Returns
  2720. -------
  2721. Series
  2722. The `n` largest values in the Series, sorted in decreasing order.
  2723. See Also
  2724. --------
  2725. Series.nsmallest: Get the `n` smallest elements.
  2726. Series.sort_values: Sort Series by values.
  2727. Series.head: Return the first `n` rows.
  2728. Notes
  2729. -----
  2730. Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
  2731. relative to the size of the ``Series`` object.
  2732. Examples
  2733. --------
  2734. >>> countries_population = {"Italy": 59000000, "France": 65000000,
  2735. ... "Malta": 434000, "Maldives": 434000,
  2736. ... "Brunei": 434000, "Iceland": 337000,
  2737. ... "Nauru": 11300, "Tuvalu": 11300,
  2738. ... "Anguilla": 11300, "Monserat": 5200}
  2739. >>> s = pd.Series(countries_population)
  2740. >>> s
  2741. Italy 59000000
  2742. France 65000000
  2743. Malta 434000
  2744. Maldives 434000
  2745. Brunei 434000
  2746. Iceland 337000
  2747. Nauru 11300
  2748. Tuvalu 11300
  2749. Anguilla 11300
  2750. Monserat 5200
  2751. dtype: int64
  2752. The `n` largest elements where ``n=5`` by default.
  2753. >>> s.nlargest()
  2754. France 65000000
  2755. Italy 59000000
  2756. Malta 434000
  2757. Maldives 434000
  2758. Brunei 434000
  2759. dtype: int64
  2760. The `n` largest elements where ``n=3``. Default `keep` value is 'first'
  2761. so Malta will be kept.
  2762. >>> s.nlargest(3)
  2763. France 65000000
  2764. Italy 59000000
  2765. Malta 434000
  2766. dtype: int64
  2767. The `n` largest elements where ``n=3`` and keeping the last duplicates.
  2768. Brunei will be kept since it is the last with value 434000 based on
  2769. the index order.
  2770. >>> s.nlargest(3, keep='last')
  2771. France 65000000
  2772. Italy 59000000
  2773. Brunei 434000
  2774. dtype: int64
  2775. The `n` largest elements where ``n=3`` with all duplicates kept. Note
  2776. that the returned Series has five elements due to the three duplicates.
  2777. >>> s.nlargest(3, keep='all')
  2778. France 65000000
  2779. Italy 59000000
  2780. Malta 434000
  2781. Maldives 434000
  2782. Brunei 434000
  2783. dtype: int64
  2784. """
  2785. return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
  2786. def nsmallest(self, n=5, keep="first"):
  2787. """
  2788. Return the smallest `n` elements.
  2789. Parameters
  2790. ----------
  2791. n : int, default 5
  2792. Return this many ascending sorted values.
  2793. keep : {'first', 'last', 'all'}, default 'first'
  2794. When there are duplicate values that cannot all fit in a
  2795. Series of `n` elements:
  2796. - ``first`` : return the first `n` occurrences in order
  2797. of appearance.
  2798. - ``last`` : return the last `n` occurrences in reverse
  2799. order of appearance.
  2800. - ``all`` : keep all occurrences. This can result in a Series of
  2801. size larger than `n`.
  2802. Returns
  2803. -------
  2804. Series
  2805. The `n` smallest values in the Series, sorted in increasing order.
  2806. See Also
  2807. --------
  2808. Series.nlargest: Get the `n` largest elements.
  2809. Series.sort_values: Sort Series by values.
  2810. Series.head: Return the first `n` rows.
  2811. Notes
  2812. -----
  2813. Faster than ``.sort_values().head(n)`` for small `n` relative to
  2814. the size of the ``Series`` object.
  2815. Examples
  2816. --------
  2817. >>> countries_population = {"Italy": 59000000, "France": 65000000,
  2818. ... "Brunei": 434000, "Malta": 434000,
  2819. ... "Maldives": 434000, "Iceland": 337000,
  2820. ... "Nauru": 11300, "Tuvalu": 11300,
  2821. ... "Anguilla": 11300, "Monserat": 5200}
  2822. >>> s = pd.Series(countries_population)
  2823. >>> s
  2824. Italy 59000000
  2825. France 65000000
  2826. Brunei 434000
  2827. Malta 434000
  2828. Maldives 434000
  2829. Iceland 337000
  2830. Nauru 11300
  2831. Tuvalu 11300
  2832. Anguilla 11300
  2833. Monserat 5200
  2834. dtype: int64
  2835. The `n` smallest elements where ``n=5`` by default.
  2836. >>> s.nsmallest()
  2837. Monserat 5200
  2838. Nauru 11300
  2839. Tuvalu 11300
  2840. Anguilla 11300
  2841. Iceland 337000
  2842. dtype: int64
  2843. The `n` smallest elements where ``n=3``. Default `keep` value is
  2844. 'first' so Nauru and Tuvalu will be kept.
  2845. >>> s.nsmallest(3)
  2846. Monserat 5200
  2847. Nauru 11300
  2848. Tuvalu 11300
  2849. dtype: int64
  2850. The `n` smallest elements where ``n=3`` and keeping the last
  2851. duplicates. Anguilla and Tuvalu will be kept since they are the last
  2852. with value 11300 based on the index order.
  2853. >>> s.nsmallest(3, keep='last')
  2854. Monserat 5200
  2855. Anguilla 11300
  2856. Tuvalu 11300
  2857. dtype: int64
  2858. The `n` smallest elements where ``n=3`` with all duplicates kept. Note
  2859. that the returned Series has four elements due to the three duplicates.
  2860. >>> s.nsmallest(3, keep='all')
  2861. Monserat 5200
  2862. Nauru 11300
  2863. Tuvalu 11300
  2864. Anguilla 11300
  2865. dtype: int64
  2866. """
  2867. return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()
  2868. def swaplevel(self, i=-2, j=-1, copy=True):
  2869. """
  2870. Swap levels i and j in a :class:`MultiIndex`.
  2871. Default is to swap the two innermost levels of the index.
  2872. Parameters
  2873. ----------
  2874. i, j : int, str
  2875. Level of the indices to be swapped. Can pass level name as string.
  2876. copy : bool, default True
  2877. Whether to copy underlying data.
  2878. Returns
  2879. -------
  2880. Series
  2881. Series with levels swapped in MultiIndex.
  2882. """
  2883. new_index = self.index.swaplevel(i, j)
  2884. return self._constructor(self._values, index=new_index, copy=copy).__finalize__(
  2885. self
  2886. )
  2887. def reorder_levels(self, order):
  2888. """
  2889. Rearrange index levels using input order.
  2890. May not drop or duplicate levels.
  2891. Parameters
  2892. ----------
  2893. order : list of int representing new level order
  2894. Reference level by number or key.
  2895. Returns
  2896. -------
  2897. type of caller (new object)
  2898. """
  2899. if not isinstance(self.index, MultiIndex): # pragma: no cover
  2900. raise Exception("Can only reorder levels on a hierarchical axis.")
  2901. result = self.copy()
  2902. result.index = result.index.reorder_levels(order)
  2903. return result
  2904. def explode(self) -> "Series":
  2905. """
  2906. Transform each element of a list-like to a row, replicating the
  2907. index values.
  2908. .. versionadded:: 0.25.0
  2909. Returns
  2910. -------
  2911. Series
  2912. Exploded lists to rows; index will be duplicated for these rows.
  2913. See Also
  2914. --------
  2915. Series.str.split : Split string values on specified separator.
  2916. Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex
  2917. to produce DataFrame.
  2918. DataFrame.melt : Unpivot a DataFrame from wide format to long format.
  2919. DataFrame.explode : Explode a DataFrame from list-like
  2920. columns to long format.
  2921. Notes
  2922. -----
  2923. This routine will explode list-likes including lists, tuples,
  2924. Series, and np.ndarray. The result dtype of the subset rows will
  2925. be object. Scalars will be returned unchanged. Empty list-likes will
  2926. result in a np.nan for that row.
  2927. Examples
  2928. --------
  2929. >>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]])
  2930. >>> s
  2931. 0 [1, 2, 3]
  2932. 1 foo
  2933. 2 []
  2934. 3 [3, 4]
  2935. dtype: object
  2936. >>> s.explode()
  2937. 0 1
  2938. 0 2
  2939. 0 3
  2940. 1 foo
  2941. 2 NaN
  2942. 3 3
  2943. 3 4
  2944. dtype: object
  2945. """
  2946. if not len(self) or not is_object_dtype(self):
  2947. return self.copy()
  2948. values, counts = reshape.explode(np.asarray(self.array))
  2949. result = Series(values, index=self.index.repeat(counts), name=self.name)
  2950. return result
  2951. def unstack(self, level=-1, fill_value=None):
  2952. """
  2953. Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.
  2954. The level involved will automatically get sorted.
  2955. Parameters
  2956. ----------
  2957. level : int, str, or list of these, default last level
  2958. Level(s) to unstack, can pass level name.
  2959. fill_value : scalar value, default None
  2960. Value to use when replacing NaN values.
  2961. Returns
  2962. -------
  2963. DataFrame
  2964. Unstacked Series.
  2965. Examples
  2966. --------
  2967. >>> s = pd.Series([1, 2, 3, 4],
  2968. ... index=pd.MultiIndex.from_product([['one', 'two'],
  2969. ... ['a', 'b']]))
  2970. >>> s
  2971. one a 1
  2972. b 2
  2973. two a 3
  2974. b 4
  2975. dtype: int64
  2976. >>> s.unstack(level=-1)
  2977. a b
  2978. one 1 2
  2979. two 3 4
  2980. >>> s.unstack(level=0)
  2981. one two
  2982. a 1 3
  2983. b 2 4
  2984. """
  2985. from pandas.core.reshape.reshape import unstack
  2986. return unstack(self, level, fill_value)
  2987. # ----------------------------------------------------------------------
  2988. # function application
  2989. def map(self, arg, na_action=None):
  2990. """
  2991. Map values of Series according to input correspondence.
  2992. Used for substituting each value in a Series with another value,
  2993. that may be derived from a function, a ``dict`` or
  2994. a :class:`Series`.
  2995. Parameters
  2996. ----------
  2997. arg : function, collections.abc.Mapping subclass or Series
  2998. Mapping correspondence.
  2999. na_action : {None, 'ignore'}, default None
  3000. If 'ignore', propagate NaN values, without passing them to the
  3001. mapping correspondence.
  3002. Returns
  3003. -------
  3004. Series
  3005. Same index as caller.
  3006. See Also
  3007. --------
  3008. Series.apply : For applying more complex functions on a Series.
  3009. DataFrame.apply : Apply a function row-/column-wise.
  3010. DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
  3011. Notes
  3012. -----
  3013. When ``arg`` is a dictionary, values in Series that are not in the
  3014. dictionary (as keys) are converted to ``NaN``. However, if the
  3015. dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
  3016. provides a method for default values), then this default is used
  3017. rather than ``NaN``.
  3018. Examples
  3019. --------
  3020. >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
  3021. >>> s
  3022. 0 cat
  3023. 1 dog
  3024. 2 NaN
  3025. 3 rabbit
  3026. dtype: object
  3027. ``map`` accepts a ``dict`` or a ``Series``. Values that are not found
  3028. in the ``dict`` are converted to ``NaN``, unless the dict has a default
  3029. value (e.g. ``defaultdict``):
  3030. >>> s.map({'cat': 'kitten', 'dog': 'puppy'})
  3031. 0 kitten
  3032. 1 puppy
  3033. 2 NaN
  3034. 3 NaN
  3035. dtype: object
  3036. It also accepts a function:
  3037. >>> s.map('I am a {}'.format)
  3038. 0 I am a cat
  3039. 1 I am a dog
  3040. 2 I am a nan
  3041. 3 I am a rabbit
  3042. dtype: object
  3043. To avoid applying the function to missing values (and keep them as
  3044. ``NaN``) ``na_action='ignore'`` can be used:
  3045. >>> s.map('I am a {}'.format, na_action='ignore')
  3046. 0 I am a cat
  3047. 1 I am a dog
  3048. 2 NaN
  3049. 3 I am a rabbit
  3050. dtype: object
  3051. """
  3052. new_values = super()._map_values(arg, na_action=na_action)
  3053. return self._constructor(new_values, index=self.index).__finalize__(self)
  3054. def _gotitem(self, key, ndim, subset=None):
  3055. """
  3056. Sub-classes to define. Return a sliced object.
  3057. Parameters
  3058. ----------
  3059. key : string / list of selections
  3060. ndim : 1,2
  3061. Requested ndim of result.
  3062. subset : object, default None
  3063. Subset to act on.
  3064. """
  3065. return self
  3066. _agg_see_also_doc = dedent(
  3067. """
  3068. See Also
  3069. --------
  3070. Series.apply : Invoke function on a Series.
  3071. Series.transform : Transform function producing a Series with like indexes.
  3072. """
  3073. )
  3074. _agg_examples_doc = dedent(
  3075. """
  3076. Examples
  3077. --------
  3078. >>> s = pd.Series([1, 2, 3, 4])
  3079. >>> s
  3080. 0 1
  3081. 1 2
  3082. 2 3
  3083. 3 4
  3084. dtype: int64
  3085. >>> s.agg('min')
  3086. 1
  3087. >>> s.agg(['min', 'max'])
  3088. min 1
  3089. max 4
  3090. dtype: int64
  3091. """
  3092. )
  3093. @Substitution(
  3094. see_also=_agg_see_also_doc,
  3095. examples=_agg_examples_doc,
  3096. versionadded="\n.. versionadded:: 0.20.0\n",
  3097. **_shared_doc_kwargs,
  3098. )
  3099. @Appender(generic._shared_docs["aggregate"])
  3100. def aggregate(self, func, axis=0, *args, **kwargs):
  3101. # Validate the axis parameter
  3102. self._get_axis_number(axis)
  3103. result, how = self._aggregate(func, *args, **kwargs)
  3104. if result is None:
  3105. # we can be called from an inner function which
  3106. # passes this meta-data
  3107. kwargs.pop("_axis", None)
  3108. kwargs.pop("_level", None)
  3109. # try a regular apply, this evaluates lambdas
  3110. # row-by-row; however if the lambda is expected a Series
  3111. # expression, e.g.: lambda x: x-x.quantile(0.25)
  3112. # this will fail, so we can try a vectorized evaluation
  3113. # we cannot FIRST try the vectorized evaluation, because
  3114. # then .agg and .apply would have different semantics if the
  3115. # operation is actually defined on the Series, e.g. str
  3116. try:
  3117. result = self.apply(func, *args, **kwargs)
  3118. except (ValueError, AttributeError, TypeError):
  3119. result = func(self, *args, **kwargs)
  3120. return result
  3121. agg = aggregate
  3122. @Appender(generic._shared_docs["transform"] % _shared_doc_kwargs)
  3123. def transform(self, func, axis=0, *args, **kwargs):
  3124. # Validate the axis parameter
  3125. self._get_axis_number(axis)
  3126. return super().transform(func, *args, **kwargs)
  3127. def apply(self, func, convert_dtype=True, args=(), **kwds):
  3128. """
  3129. Invoke function on values of Series.
  3130. Can be ufunc (a NumPy function that applies to the entire Series)
  3131. or a Python function that only works on single values.
  3132. Parameters
  3133. ----------
  3134. func : function
  3135. Python function or NumPy ufunc to apply.
  3136. convert_dtype : bool, default True
  3137. Try to find better dtype for elementwise function results. If
  3138. False, leave as dtype=object.
  3139. args : tuple
  3140. Positional arguments passed to func after the series value.
  3141. **kwds
  3142. Additional keyword arguments passed to func.
  3143. Returns
  3144. -------
  3145. Series or DataFrame
  3146. If func returns a Series object the result will be a DataFrame.
  3147. See Also
  3148. --------
  3149. Series.map: For element-wise operations.
  3150. Series.agg: Only perform aggregating type operations.
  3151. Series.transform: Only perform transforming type operations.
  3152. Examples
  3153. --------
  3154. Create a series with typical summer temperatures for each city.
  3155. >>> s = pd.Series([20, 21, 12],
  3156. ... index=['London', 'New York', 'Helsinki'])
  3157. >>> s
  3158. London 20
  3159. New York 21
  3160. Helsinki 12
  3161. dtype: int64
  3162. Square the values by defining a function and passing it as an
  3163. argument to ``apply()``.
  3164. >>> def square(x):
  3165. ... return x ** 2
  3166. >>> s.apply(square)
  3167. London 400
  3168. New York 441
  3169. Helsinki 144
  3170. dtype: int64
  3171. Square the values by passing an anonymous function as an
  3172. argument to ``apply()``.
  3173. >>> s.apply(lambda x: x ** 2)
  3174. London 400
  3175. New York 441
  3176. Helsinki 144
  3177. dtype: int64
  3178. Define a custom function that needs additional positional
  3179. arguments and pass these additional arguments using the
  3180. ``args`` keyword.
  3181. >>> def subtract_custom_value(x, custom_value):
  3182. ... return x - custom_value
  3183. >>> s.apply(subtract_custom_value, args=(5,))
  3184. London 15
  3185. New York 16
  3186. Helsinki 7
  3187. dtype: int64
  3188. Define a custom function that takes keyword arguments
  3189. and pass these arguments to ``apply``.
  3190. >>> def add_custom_values(x, **kwargs):
  3191. ... for month in kwargs:
  3192. ... x += kwargs[month]
  3193. ... return x
  3194. >>> s.apply(add_custom_values, june=30, july=20, august=25)
  3195. London 95
  3196. New York 96
  3197. Helsinki 87
  3198. dtype: int64
  3199. Use a function from the Numpy library.
  3200. >>> s.apply(np.log)
  3201. London 2.995732
  3202. New York 3.044522
  3203. Helsinki 2.484907
  3204. dtype: float64
  3205. """
  3206. if len(self) == 0:
  3207. return self._constructor(dtype=self.dtype, index=self.index).__finalize__(
  3208. self
  3209. )
  3210. # dispatch to agg
  3211. if isinstance(func, (list, dict)):
  3212. return self.aggregate(func, *args, **kwds)
  3213. # if we are a string, try to dispatch
  3214. if isinstance(func, str):
  3215. return self._try_aggregate_string_function(func, *args, **kwds)
  3216. # handle ufuncs and lambdas
  3217. if kwds or args and not isinstance(func, np.ufunc):
  3218. def f(x):
  3219. return func(x, *args, **kwds)
  3220. else:
  3221. f = func
  3222. with np.errstate(all="ignore"):
  3223. if isinstance(f, np.ufunc):
  3224. return f(self)
  3225. # row-wise access
  3226. if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"):
  3227. # GH#23179 some EAs do not have `map`
  3228. mapped = self._values.map(f)
  3229. else:
  3230. values = self.astype(object).values
  3231. mapped = lib.map_infer(values, f, convert=convert_dtype)
  3232. if len(mapped) and isinstance(mapped[0], Series):
  3233. # GH 25959 use pd.array instead of tolist
  3234. # so extension arrays can be used
  3235. return self._constructor_expanddim(pd.array(mapped), index=self.index)
  3236. else:
  3237. return self._constructor(mapped, index=self.index).__finalize__(self)
  3238. def _reduce(
  3239. self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
  3240. ):
  3241. """
  3242. Perform a reduction operation.
  3243. If we have an ndarray as a value, then simply perform the operation,
  3244. otherwise delegate to the object.
  3245. """
  3246. delegate = self._values
  3247. if axis is not None:
  3248. self._get_axis_number(axis)
  3249. if isinstance(delegate, Categorical):
  3250. return delegate._reduce(name, skipna=skipna, **kwds)
  3251. elif isinstance(delegate, ExtensionArray):
  3252. # dispatch to ExtensionArray interface
  3253. return delegate._reduce(name, skipna=skipna, **kwds)
  3254. elif is_datetime64_dtype(delegate):
  3255. # use DatetimeIndex implementation to handle skipna correctly
  3256. delegate = DatetimeIndex(delegate)
  3257. elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name):
  3258. # use TimedeltaIndex to handle skipna correctly
  3259. # TODO: remove hasattr check after TimedeltaIndex has `std` method
  3260. delegate = TimedeltaIndex(delegate)
  3261. # dispatch to numpy arrays
  3262. elif isinstance(delegate, np.ndarray):
  3263. if numeric_only:
  3264. raise NotImplementedError(
  3265. f"Series.{name} does not implement numeric_only."
  3266. )
  3267. with np.errstate(all="ignore"):
  3268. return op(delegate, skipna=skipna, **kwds)
  3269. # TODO(EA) dispatch to Index
  3270. # remove once all internals extension types are
  3271. # moved to ExtensionArrays
  3272. return delegate._reduce(
  3273. op=op,
  3274. name=name,
  3275. axis=axis,
  3276. skipna=skipna,
  3277. numeric_only=numeric_only,
  3278. filter_type=filter_type,
  3279. **kwds,
  3280. )
  3281. def _reindex_indexer(self, new_index, indexer, copy):
  3282. if indexer is None:
  3283. if copy:
  3284. return self.copy()
  3285. return self
  3286. new_values = algorithms.take_1d(
  3287. self._values, indexer, allow_fill=True, fill_value=None
  3288. )
  3289. return self._constructor(new_values, index=new_index)
  3290. def _needs_reindex_multi(self, axes, method, level):
  3291. """
  3292. Check if we do need a multi reindex; this is for compat with
  3293. higher dims.
  3294. """
  3295. return False
  3296. @Appender(generic._shared_docs["align"] % _shared_doc_kwargs)
  3297. def align(
  3298. self,
  3299. other,
  3300. join="outer",
  3301. axis=None,
  3302. level=None,
  3303. copy=True,
  3304. fill_value=None,
  3305. method=None,
  3306. limit=None,
  3307. fill_axis=0,
  3308. broadcast_axis=None,
  3309. ):
  3310. return super().align(
  3311. other,
  3312. join=join,
  3313. axis=axis,
  3314. level=level,
  3315. copy=copy,
  3316. fill_value=fill_value,
  3317. method=method,
  3318. limit=limit,
  3319. fill_axis=fill_axis,
  3320. broadcast_axis=broadcast_axis,
  3321. )
  3322. def rename(
  3323. self,
  3324. index=None,
  3325. *,
  3326. axis=None,
  3327. copy=True,
  3328. inplace=False,
  3329. level=None,
  3330. errors="ignore",
  3331. ):
  3332. """
  3333. Alter Series index labels or name.
  3334. Function / dict values must be unique (1-to-1). Labels not contained in
  3335. a dict / Series will be left as-is. Extra labels listed don't throw an
  3336. error.
  3337. Alternatively, change ``Series.name`` with a scalar value.
  3338. See the :ref:`user guide <basics.rename>` for more.
  3339. Parameters
  3340. ----------
  3341. axis : {0 or "index"}
  3342. Unused. Accepted for compatability with DataFrame method only.
  3343. index : scalar, hashable sequence, dict-like or function, optional
  3344. Functions or dict-like are transformations to apply to
  3345. the index.
  3346. Scalar or hashable sequence-like will alter the ``Series.name``
  3347. attribute.
  3348. **kwargs
  3349. Additional keyword arguments passed to the function. Only the
  3350. "inplace" keyword is used.
  3351. Returns
  3352. -------
  3353. Series
  3354. Series with index labels or name altered.
  3355. See Also
  3356. --------
  3357. DataFrame.rename : Corresponding DataFrame method.
  3358. Series.rename_axis : Set the name of the axis.
  3359. Examples
  3360. --------
  3361. >>> s = pd.Series([1, 2, 3])
  3362. >>> s
  3363. 0 1
  3364. 1 2
  3365. 2 3
  3366. dtype: int64
  3367. >>> s.rename("my_name") # scalar, changes Series.name
  3368. 0 1
  3369. 1 2
  3370. 2 3
  3371. Name: my_name, dtype: int64
  3372. >>> s.rename(lambda x: x ** 2) # function, changes labels
  3373. 0 1
  3374. 1 2
  3375. 4 3
  3376. dtype: int64
  3377. >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
  3378. 0 1
  3379. 3 2
  3380. 5 3
  3381. dtype: int64
  3382. """
  3383. if callable(index) or is_dict_like(index):
  3384. return super().rename(
  3385. index, copy=copy, inplace=inplace, level=level, errors=errors
  3386. )
  3387. else:
  3388. return self._set_name(index, inplace=inplace)
  3389. @Substitution(**_shared_doc_kwargs)
  3390. @Appender(generic.NDFrame.reindex.__doc__)
  3391. def reindex(self, index=None, **kwargs):
  3392. return super().reindex(index=index, **kwargs)
  3393. def drop(
  3394. self,
  3395. labels=None,
  3396. axis=0,
  3397. index=None,
  3398. columns=None,
  3399. level=None,
  3400. inplace=False,
  3401. errors="raise",
  3402. ):
  3403. """
  3404. Return Series with specified index labels removed.
  3405. Remove elements of a Series based on specifying the index labels.
  3406. When using a multi-index, labels on different levels can be removed
  3407. by specifying the level.
  3408. Parameters
  3409. ----------
  3410. labels : single label or list-like
  3411. Index labels to drop.
  3412. axis : 0, default 0
  3413. Redundant for application on Series.
  3414. index : single label or list-like
  3415. Redundant for application on Series, but 'index' can be used instead
  3416. of 'labels'.
  3417. .. versionadded:: 0.21.0
  3418. columns : single label or list-like
  3419. No change is made to the Series; use 'index' or 'labels' instead.
  3420. .. versionadded:: 0.21.0
  3421. level : int or level name, optional
  3422. For MultiIndex, level for which the labels will be removed.
  3423. inplace : bool, default False
  3424. If True, do operation inplace and return None.
  3425. errors : {'ignore', 'raise'}, default 'raise'
  3426. If 'ignore', suppress error and only existing labels are dropped.
  3427. Returns
  3428. -------
  3429. Series
  3430. Series with specified index labels removed.
  3431. Raises
  3432. ------
  3433. KeyError
  3434. If none of the labels are found in the index.
  3435. See Also
  3436. --------
  3437. Series.reindex : Return only specified index labels of Series.
  3438. Series.dropna : Return series without null values.
  3439. Series.drop_duplicates : Return Series with duplicate values removed.
  3440. DataFrame.drop : Drop specified labels from rows or columns.
  3441. Examples
  3442. --------
  3443. >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C'])
  3444. >>> s
  3445. A 0
  3446. B 1
  3447. C 2
  3448. dtype: int64
  3449. Drop labels B en C
  3450. >>> s.drop(labels=['B', 'C'])
  3451. A 0
  3452. dtype: int64
  3453. Drop 2nd level label in MultiIndex Series
  3454. >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
  3455. ... ['speed', 'weight', 'length']],
  3456. ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
  3457. ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
  3458. >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
  3459. ... index=midx)
  3460. >>> s
  3461. lama speed 45.0
  3462. weight 200.0
  3463. length 1.2
  3464. cow speed 30.0
  3465. weight 250.0
  3466. length 1.5
  3467. falcon speed 320.0
  3468. weight 1.0
  3469. length 0.3
  3470. dtype: float64
  3471. >>> s.drop(labels='weight', level=1)
  3472. lama speed 45.0
  3473. length 1.2
  3474. cow speed 30.0
  3475. length 1.5
  3476. falcon speed 320.0
  3477. length 0.3
  3478. dtype: float64
  3479. """
  3480. return super().drop(
  3481. labels=labels,
  3482. axis=axis,
  3483. index=index,
  3484. columns=columns,
  3485. level=level,
  3486. inplace=inplace,
  3487. errors=errors,
  3488. )
  3489. @Substitution(**_shared_doc_kwargs)
  3490. @Appender(generic.NDFrame.fillna.__doc__)
  3491. def fillna(
  3492. self,
  3493. value=None,
  3494. method=None,
  3495. axis=None,
  3496. inplace=False,
  3497. limit=None,
  3498. downcast=None,
  3499. ) -> Optional["Series"]:
  3500. return super().fillna(
  3501. value=value,
  3502. method=method,
  3503. axis=axis,
  3504. inplace=inplace,
  3505. limit=limit,
  3506. downcast=downcast,
  3507. )
  3508. @Appender(generic._shared_docs["replace"] % _shared_doc_kwargs)
  3509. def replace(
  3510. self,
  3511. to_replace=None,
  3512. value=None,
  3513. inplace=False,
  3514. limit=None,
  3515. regex=False,
  3516. method="pad",
  3517. ):
  3518. return super().replace(
  3519. to_replace=to_replace,
  3520. value=value,
  3521. inplace=inplace,
  3522. limit=limit,
  3523. regex=regex,
  3524. method=method,
  3525. )
  3526. @Appender(generic._shared_docs["shift"] % _shared_doc_kwargs)
  3527. def shift(self, periods=1, freq=None, axis=0, fill_value=None):
  3528. return super().shift(
  3529. periods=periods, freq=freq, axis=axis, fill_value=fill_value
  3530. )
  3531. def memory_usage(self, index=True, deep=False):
  3532. """
  3533. Return the memory usage of the Series.
  3534. The memory usage can optionally include the contribution of
  3535. the index and of elements of `object` dtype.
  3536. Parameters
  3537. ----------
  3538. index : bool, default True
  3539. Specifies whether to include the memory usage of the Series index.
  3540. deep : bool, default False
  3541. If True, introspect the data deeply by interrogating
  3542. `object` dtypes for system-level memory consumption, and include
  3543. it in the returned value.
  3544. Returns
  3545. -------
  3546. int
  3547. Bytes of memory consumed.
  3548. See Also
  3549. --------
  3550. numpy.ndarray.nbytes : Total bytes consumed by the elements of the
  3551. array.
  3552. DataFrame.memory_usage : Bytes consumed by a DataFrame.
  3553. Examples
  3554. --------
  3555. >>> s = pd.Series(range(3))
  3556. >>> s.memory_usage()
  3557. 152
  3558. Not including the index gives the size of the rest of the data, which
  3559. is necessarily smaller:
  3560. >>> s.memory_usage(index=False)
  3561. 24
  3562. The memory footprint of `object` values is ignored by default:
  3563. >>> s = pd.Series(["a", "b"])
  3564. >>> s.values
  3565. array(['a', 'b'], dtype=object)
  3566. >>> s.memory_usage()
  3567. 144
  3568. >>> s.memory_usage(deep=True)
  3569. 260
  3570. """
  3571. v = super().memory_usage(deep=deep)
  3572. if index:
  3573. v += self.index.memory_usage(deep=deep)
  3574. return v
  3575. def isin(self, values):
  3576. """
  3577. Check whether `values` are contained in Series.
  3578. Return a boolean Series showing whether each element in the Series
  3579. matches an element in the passed sequence of `values` exactly.
  3580. Parameters
  3581. ----------
  3582. values : set or list-like
  3583. The sequence of values to test. Passing in a single string will
  3584. raise a ``TypeError``. Instead, turn a single string into a
  3585. list of one element.
  3586. Returns
  3587. -------
  3588. Series
  3589. Series of booleans indicating if each element is in values.
  3590. Raises
  3591. ------
  3592. TypeError
  3593. * If `values` is a string
  3594. See Also
  3595. --------
  3596. DataFrame.isin : Equivalent method on DataFrame.
  3597. Examples
  3598. --------
  3599. >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
  3600. ... 'hippo'], name='animal')
  3601. >>> s.isin(['cow', 'lama'])
  3602. 0 True
  3603. 1 True
  3604. 2 True
  3605. 3 False
  3606. 4 True
  3607. 5 False
  3608. Name: animal, dtype: bool
  3609. Passing a single string as ``s.isin('lama')`` will raise an error. Use
  3610. a list of one element instead:
  3611. >>> s.isin(['lama'])
  3612. 0 True
  3613. 1 False
  3614. 2 True
  3615. 3 False
  3616. 4 True
  3617. 5 False
  3618. Name: animal, dtype: bool
  3619. """
  3620. result = algorithms.isin(self, values)
  3621. return self._constructor(result, index=self.index).__finalize__(self)
  3622. def between(self, left, right, inclusive=True):
  3623. """
  3624. Return boolean Series equivalent to left <= series <= right.
  3625. This function returns a boolean vector containing `True` wherever the
  3626. corresponding Series element is between the boundary values `left` and
  3627. `right`. NA values are treated as `False`.
  3628. Parameters
  3629. ----------
  3630. left : scalar or list-like
  3631. Left boundary.
  3632. right : scalar or list-like
  3633. Right boundary.
  3634. inclusive : bool, default True
  3635. Include boundaries.
  3636. Returns
  3637. -------
  3638. Series
  3639. Series representing whether each element is between left and
  3640. right (inclusive).
  3641. See Also
  3642. --------
  3643. Series.gt : Greater than of series and other.
  3644. Series.lt : Less than of series and other.
  3645. Notes
  3646. -----
  3647. This function is equivalent to ``(left <= ser) & (ser <= right)``
  3648. Examples
  3649. --------
  3650. >>> s = pd.Series([2, 0, 4, 8, np.nan])
  3651. Boundary values are included by default:
  3652. >>> s.between(1, 4)
  3653. 0 True
  3654. 1 False
  3655. 2 True
  3656. 3 False
  3657. 4 False
  3658. dtype: bool
  3659. With `inclusive` set to ``False`` boundary values are excluded:
  3660. >>> s.between(1, 4, inclusive=False)
  3661. 0 True
  3662. 1 False
  3663. 2 False
  3664. 3 False
  3665. 4 False
  3666. dtype: bool
  3667. `left` and `right` can be any scalar value:
  3668. >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve'])
  3669. >>> s.between('Anna', 'Daniel')
  3670. 0 False
  3671. 1 True
  3672. 2 True
  3673. 3 False
  3674. dtype: bool
  3675. """
  3676. if inclusive:
  3677. lmask = self >= left
  3678. rmask = self <= right
  3679. else:
  3680. lmask = self > left
  3681. rmask = self < right
  3682. return lmask & rmask
  3683. # ----------------------------------------------------------------------
  3684. # Convert to types that support pd.NA
  3685. def _convert_dtypes(
  3686. self: ABCSeries,
  3687. infer_objects: bool = True,
  3688. convert_string: bool = True,
  3689. convert_integer: bool = True,
  3690. convert_boolean: bool = True,
  3691. ) -> "Series":
  3692. input_series = self
  3693. if infer_objects:
  3694. input_series = input_series.infer_objects()
  3695. if is_object_dtype(input_series):
  3696. input_series = input_series.copy()
  3697. if convert_string or convert_integer or convert_boolean:
  3698. inferred_dtype = convert_dtypes(
  3699. input_series._values, convert_string, convert_integer, convert_boolean
  3700. )
  3701. try:
  3702. result = input_series.astype(inferred_dtype)
  3703. except TypeError:
  3704. result = input_series.copy()
  3705. else:
  3706. result = input_series.copy()
  3707. return result
  3708. @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs)
  3709. def isna(self):
  3710. return super().isna()
  3711. @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs)
  3712. def isnull(self):
  3713. return super().isnull()
  3714. @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs)
  3715. def notna(self):
  3716. return super().notna()
  3717. @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs)
  3718. def notnull(self):
  3719. return super().notnull()
  3720. def dropna(self, axis=0, inplace=False, how=None):
  3721. """
  3722. Return a new Series with missing values removed.
  3723. See the :ref:`User Guide <missing_data>` for more on which values are
  3724. considered missing, and how to work with missing data.
  3725. Parameters
  3726. ----------
  3727. axis : {0 or 'index'}, default 0
  3728. There is only one axis to drop values from.
  3729. inplace : bool, default False
  3730. If True, do operation inplace and return None.
  3731. how : str, optional
  3732. Not in use. Kept for compatibility.
  3733. Returns
  3734. -------
  3735. Series
  3736. Series with NA entries dropped from it.
  3737. See Also
  3738. --------
  3739. Series.isna: Indicate missing values.
  3740. Series.notna : Indicate existing (non-missing) values.
  3741. Series.fillna : Replace missing values.
  3742. DataFrame.dropna : Drop rows or columns which contain NA values.
  3743. Index.dropna : Drop missing indices.
  3744. Examples
  3745. --------
  3746. >>> ser = pd.Series([1., 2., np.nan])
  3747. >>> ser
  3748. 0 1.0
  3749. 1 2.0
  3750. 2 NaN
  3751. dtype: float64
  3752. Drop NA values from a Series.
  3753. >>> ser.dropna()
  3754. 0 1.0
  3755. 1 2.0
  3756. dtype: float64
  3757. Keep the Series with valid entries in the same variable.
  3758. >>> ser.dropna(inplace=True)
  3759. >>> ser
  3760. 0 1.0
  3761. 1 2.0
  3762. dtype: float64
  3763. Empty strings are not considered NA values. ``None`` is considered an
  3764. NA value.
  3765. >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
  3766. >>> ser
  3767. 0 NaN
  3768. 1 2
  3769. 2 NaT
  3770. 3
  3771. 4 None
  3772. 5 I stay
  3773. dtype: object
  3774. >>> ser.dropna()
  3775. 1 2
  3776. 3
  3777. 5 I stay
  3778. dtype: object
  3779. """
  3780. inplace = validate_bool_kwarg(inplace, "inplace")
  3781. # Validate the axis parameter
  3782. self._get_axis_number(axis or 0)
  3783. if self._can_hold_na:
  3784. result = remove_na_arraylike(self)
  3785. if inplace:
  3786. self._update_inplace(result)
  3787. else:
  3788. return result
  3789. else:
  3790. if inplace:
  3791. # do nothing
  3792. pass
  3793. else:
  3794. return self.copy()
  3795. # ----------------------------------------------------------------------
  3796. # Time series-oriented methods
  3797. def to_timestamp(self, freq=None, how="start", copy=True):
  3798. """
  3799. Cast to DatetimeIndex of Timestamps, at *beginning* of period.
  3800. Parameters
  3801. ----------
  3802. freq : str, default frequency of PeriodIndex
  3803. Desired frequency.
  3804. how : {'s', 'e', 'start', 'end'}
  3805. Convention for converting period to timestamp; start of period
  3806. vs. end.
  3807. copy : bool, default True
  3808. Whether or not to return a copy.
  3809. Returns
  3810. -------
  3811. Series with DatetimeIndex
  3812. """
  3813. new_values = self._values
  3814. if copy:
  3815. new_values = new_values.copy()
  3816. new_index = self.index.to_timestamp(freq=freq, how=how)
  3817. return self._constructor(new_values, index=new_index).__finalize__(self)
  3818. def to_period(self, freq=None, copy=True):
  3819. """
  3820. Convert Series from DatetimeIndex to PeriodIndex with desired
  3821. frequency (inferred from index if not passed).
  3822. Parameters
  3823. ----------
  3824. freq : str, default None
  3825. Frequency associated with the PeriodIndex.
  3826. copy : bool, default True
  3827. Whether or not to return a copy.
  3828. Returns
  3829. -------
  3830. Series
  3831. Series with index converted to PeriodIndex.
  3832. """
  3833. new_values = self._values
  3834. if copy:
  3835. new_values = new_values.copy()
  3836. new_index = self.index.to_period(freq=freq)
  3837. return self._constructor(new_values, index=new_index).__finalize__(self)
  3838. # ----------------------------------------------------------------------
  3839. # Accessor Methods
  3840. # ----------------------------------------------------------------------
  3841. str = CachedAccessor("str", StringMethods)
  3842. dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
  3843. cat = CachedAccessor("cat", CategoricalAccessor)
  3844. plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
  3845. sparse = CachedAccessor("sparse", SparseAccessor)
  3846. # ----------------------------------------------------------------------
  3847. # Add plotting methods to Series
  3848. hist = pandas.plotting.hist_series
  3849. Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."})
  3850. Series._add_numeric_operations()
  3851. Series._add_series_or_dataframe_operations()
  3852. # Add arithmetic!
  3853. ops.add_flex_arithmetic_methods(Series)
  3854. ops.add_special_arithmetic_methods(Series)