defchararray.py 69 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819
  1. """
  2. This module contains a set of functions for vectorized string
  3. operations and methods.
  4. .. note::
  5. The `chararray` class exists for backwards compatibility with
  6. Numarray, it is not recommended for new development. Starting from numpy
  7. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  8. `dtype` `object_`, `string_` or `unicode_`, and use the free functions
  9. in the `numpy.char` module for fast vectorized string operations.
  10. Some methods will only be available if the corresponding string method is
  11. available in your version of Python.
  12. The preferred alias for `defchararray` is `numpy.char`.
  13. """
  14. from __future__ import division, absolute_import, print_function
  15. import functools
  16. import sys
  17. from .numerictypes import string_, unicode_, integer, object_, bool_, character
  18. from .numeric import ndarray, compare_chararrays
  19. from .numeric import array as narray
  20. from numpy.core.multiarray import _vec_string
  21. from numpy.core.overrides import set_module
  22. from numpy.core import overrides
  23. from numpy.compat import asbytes, long
  24. import numpy
  25. __all__ = [
  26. 'equal', 'not_equal', 'greater_equal', 'less_equal',
  27. 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
  28. 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
  29. 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
  30. 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
  31. 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
  32. 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
  33. 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
  34. 'array', 'asarray'
  35. ]
  36. _globalvar = 0
  37. if sys.version_info[0] >= 3:
  38. _unicode = str
  39. _bytes = bytes
  40. else:
  41. _unicode = unicode
  42. _bytes = str
  43. _len = len
  44. array_function_dispatch = functools.partial(
  45. overrides.array_function_dispatch, module='numpy.char')
  46. def _use_unicode(*args):
  47. """
  48. Helper function for determining the output type of some string
  49. operations.
  50. For an operation on two ndarrays, if at least one is unicode, the
  51. result should be unicode.
  52. """
  53. for x in args:
  54. if (isinstance(x, _unicode) or
  55. issubclass(numpy.asarray(x).dtype.type, unicode_)):
  56. return unicode_
  57. return string_
  58. def _to_string_or_unicode_array(result):
  59. """
  60. Helper function to cast a result back into a string or unicode array
  61. if an object array must be used as an intermediary.
  62. """
  63. return numpy.asarray(result.tolist())
  64. def _clean_args(*args):
  65. """
  66. Helper function for delegating arguments to Python string
  67. functions.
  68. Many of the Python string operations that have optional arguments
  69. do not use 'None' to indicate a default value. In these cases,
  70. we need to remove all None arguments, and those following them.
  71. """
  72. newargs = []
  73. for chk in args:
  74. if chk is None:
  75. break
  76. newargs.append(chk)
  77. return newargs
  78. def _get_num_chars(a):
  79. """
  80. Helper function that returns the number of characters per field in
  81. a string or unicode array. This is to abstract out the fact that
  82. for a unicode array this is itemsize / 4.
  83. """
  84. if issubclass(a.dtype.type, unicode_):
  85. return a.itemsize // 4
  86. return a.itemsize
  87. def _binary_op_dispatcher(x1, x2):
  88. return (x1, x2)
  89. @array_function_dispatch(_binary_op_dispatcher)
  90. def equal(x1, x2):
  91. """
  92. Return (x1 == x2) element-wise.
  93. Unlike `numpy.equal`, this comparison is performed by first
  94. stripping whitespace characters from the end of the string. This
  95. behavior is provided for backward-compatibility with numarray.
  96. Parameters
  97. ----------
  98. x1, x2 : array_like of str or unicode
  99. Input arrays of the same shape.
  100. Returns
  101. -------
  102. out : ndarray or bool
  103. Output array of bools, or a single bool if x1 and x2 are scalars.
  104. See Also
  105. --------
  106. not_equal, greater_equal, less_equal, greater, less
  107. """
  108. return compare_chararrays(x1, x2, '==', True)
  109. @array_function_dispatch(_binary_op_dispatcher)
  110. def not_equal(x1, x2):
  111. """
  112. Return (x1 != x2) element-wise.
  113. Unlike `numpy.not_equal`, this comparison is performed by first
  114. stripping whitespace characters from the end of the string. This
  115. behavior is provided for backward-compatibility with numarray.
  116. Parameters
  117. ----------
  118. x1, x2 : array_like of str or unicode
  119. Input arrays of the same shape.
  120. Returns
  121. -------
  122. out : ndarray or bool
  123. Output array of bools, or a single bool if x1 and x2 are scalars.
  124. See Also
  125. --------
  126. equal, greater_equal, less_equal, greater, less
  127. """
  128. return compare_chararrays(x1, x2, '!=', True)
  129. @array_function_dispatch(_binary_op_dispatcher)
  130. def greater_equal(x1, x2):
  131. """
  132. Return (x1 >= x2) element-wise.
  133. Unlike `numpy.greater_equal`, this comparison is performed by
  134. first stripping whitespace characters from the end of the string.
  135. This behavior is provided for backward-compatibility with
  136. numarray.
  137. Parameters
  138. ----------
  139. x1, x2 : array_like of str or unicode
  140. Input arrays of the same shape.
  141. Returns
  142. -------
  143. out : ndarray or bool
  144. Output array of bools, or a single bool if x1 and x2 are scalars.
  145. See Also
  146. --------
  147. equal, not_equal, less_equal, greater, less
  148. """
  149. return compare_chararrays(x1, x2, '>=', True)
  150. @array_function_dispatch(_binary_op_dispatcher)
  151. def less_equal(x1, x2):
  152. """
  153. Return (x1 <= x2) element-wise.
  154. Unlike `numpy.less_equal`, this comparison is performed by first
  155. stripping whitespace characters from the end of the string. This
  156. behavior is provided for backward-compatibility with numarray.
  157. Parameters
  158. ----------
  159. x1, x2 : array_like of str or unicode
  160. Input arrays of the same shape.
  161. Returns
  162. -------
  163. out : ndarray or bool
  164. Output array of bools, or a single bool if x1 and x2 are scalars.
  165. See Also
  166. --------
  167. equal, not_equal, greater_equal, greater, less
  168. """
  169. return compare_chararrays(x1, x2, '<=', True)
  170. @array_function_dispatch(_binary_op_dispatcher)
  171. def greater(x1, x2):
  172. """
  173. Return (x1 > x2) element-wise.
  174. Unlike `numpy.greater`, this comparison is performed by first
  175. stripping whitespace characters from the end of the string. This
  176. behavior is provided for backward-compatibility with numarray.
  177. Parameters
  178. ----------
  179. x1, x2 : array_like of str or unicode
  180. Input arrays of the same shape.
  181. Returns
  182. -------
  183. out : ndarray or bool
  184. Output array of bools, or a single bool if x1 and x2 are scalars.
  185. See Also
  186. --------
  187. equal, not_equal, greater_equal, less_equal, less
  188. """
  189. return compare_chararrays(x1, x2, '>', True)
  190. @array_function_dispatch(_binary_op_dispatcher)
  191. def less(x1, x2):
  192. """
  193. Return (x1 < x2) element-wise.
  194. Unlike `numpy.greater`, this comparison is performed by first
  195. stripping whitespace characters from the end of the string. This
  196. behavior is provided for backward-compatibility with numarray.
  197. Parameters
  198. ----------
  199. x1, x2 : array_like of str or unicode
  200. Input arrays of the same shape.
  201. Returns
  202. -------
  203. out : ndarray or bool
  204. Output array of bools, or a single bool if x1 and x2 are scalars.
  205. See Also
  206. --------
  207. equal, not_equal, greater_equal, less_equal, greater
  208. """
  209. return compare_chararrays(x1, x2, '<', True)
  210. def _unary_op_dispatcher(a):
  211. return (a,)
  212. @array_function_dispatch(_unary_op_dispatcher)
  213. def str_len(a):
  214. """
  215. Return len(a) element-wise.
  216. Parameters
  217. ----------
  218. a : array_like of str or unicode
  219. Returns
  220. -------
  221. out : ndarray
  222. Output array of integers
  223. See also
  224. --------
  225. __builtin__.len
  226. """
  227. return _vec_string(a, integer, '__len__')
  228. @array_function_dispatch(_binary_op_dispatcher)
  229. def add(x1, x2):
  230. """
  231. Return element-wise string concatenation for two arrays of str or unicode.
  232. Arrays `x1` and `x2` must have the same shape.
  233. Parameters
  234. ----------
  235. x1 : array_like of str or unicode
  236. Input array.
  237. x2 : array_like of str or unicode
  238. Input array.
  239. Returns
  240. -------
  241. add : ndarray
  242. Output array of `string_` or `unicode_`, depending on input types
  243. of the same shape as `x1` and `x2`.
  244. """
  245. arr1 = numpy.asarray(x1)
  246. arr2 = numpy.asarray(x2)
  247. out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
  248. dtype = _use_unicode(arr1, arr2)
  249. return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
  250. def _multiply_dispatcher(a, i):
  251. return (a,)
  252. @array_function_dispatch(_multiply_dispatcher)
  253. def multiply(a, i):
  254. """
  255. Return (a * i), that is string multiple concatenation,
  256. element-wise.
  257. Values in `i` of less than 0 are treated as 0 (which yields an
  258. empty string).
  259. Parameters
  260. ----------
  261. a : array_like of str or unicode
  262. i : array_like of ints
  263. Returns
  264. -------
  265. out : ndarray
  266. Output array of str or unicode, depending on input types
  267. """
  268. a_arr = numpy.asarray(a)
  269. i_arr = numpy.asarray(i)
  270. if not issubclass(i_arr.dtype.type, integer):
  271. raise ValueError("Can only multiply by integers")
  272. out_size = _get_num_chars(a_arr) * max(long(i_arr.max()), 0)
  273. return _vec_string(
  274. a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
  275. def _mod_dispatcher(a, values):
  276. return (a, values)
  277. @array_function_dispatch(_mod_dispatcher)
  278. def mod(a, values):
  279. """
  280. Return (a % i), that is pre-Python 2.6 string formatting
  281. (iterpolation), element-wise for a pair of array_likes of str
  282. or unicode.
  283. Parameters
  284. ----------
  285. a : array_like of str or unicode
  286. values : array_like of values
  287. These values will be element-wise interpolated into the string.
  288. Returns
  289. -------
  290. out : ndarray
  291. Output array of str or unicode, depending on input types
  292. See also
  293. --------
  294. str.__mod__
  295. """
  296. return _to_string_or_unicode_array(
  297. _vec_string(a, object_, '__mod__', (values,)))
  298. @array_function_dispatch(_unary_op_dispatcher)
  299. def capitalize(a):
  300. """
  301. Return a copy of `a` with only the first character of each element
  302. capitalized.
  303. Calls `str.capitalize` element-wise.
  304. For 8-bit strings, this method is locale-dependent.
  305. Parameters
  306. ----------
  307. a : array_like of str or unicode
  308. Input array of strings to capitalize.
  309. Returns
  310. -------
  311. out : ndarray
  312. Output array of str or unicode, depending on input
  313. types
  314. See also
  315. --------
  316. str.capitalize
  317. Examples
  318. --------
  319. >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
  320. array(['a1b2', '1b2a', 'b2a1', '2a1b'],
  321. dtype='|S4')
  322. >>> np.char.capitalize(c)
  323. array(['A1b2', '1b2a', 'B2a1', '2a1b'],
  324. dtype='|S4')
  325. """
  326. a_arr = numpy.asarray(a)
  327. return _vec_string(a_arr, a_arr.dtype, 'capitalize')
  328. def _center_dispatcher(a, width, fillchar=None):
  329. return (a,)
  330. @array_function_dispatch(_center_dispatcher)
  331. def center(a, width, fillchar=' '):
  332. """
  333. Return a copy of `a` with its elements centered in a string of
  334. length `width`.
  335. Calls `str.center` element-wise.
  336. Parameters
  337. ----------
  338. a : array_like of str or unicode
  339. width : int
  340. The length of the resulting strings
  341. fillchar : str or unicode, optional
  342. The padding character to use (default is space).
  343. Returns
  344. -------
  345. out : ndarray
  346. Output array of str or unicode, depending on input
  347. types
  348. See also
  349. --------
  350. str.center
  351. """
  352. a_arr = numpy.asarray(a)
  353. width_arr = numpy.asarray(width)
  354. size = long(numpy.max(width_arr.flat))
  355. if numpy.issubdtype(a_arr.dtype, numpy.string_):
  356. fillchar = asbytes(fillchar)
  357. return _vec_string(
  358. a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
  359. def _count_dispatcher(a, sub, start=None, end=None):
  360. return (a,)
  361. @array_function_dispatch(_count_dispatcher)
  362. def count(a, sub, start=0, end=None):
  363. """
  364. Returns an array with the number of non-overlapping occurrences of
  365. substring `sub` in the range [`start`, `end`].
  366. Calls `str.count` element-wise.
  367. Parameters
  368. ----------
  369. a : array_like of str or unicode
  370. sub : str or unicode
  371. The substring to search for.
  372. start, end : int, optional
  373. Optional arguments `start` and `end` are interpreted as slice
  374. notation to specify the range in which to count.
  375. Returns
  376. -------
  377. out : ndarray
  378. Output array of ints.
  379. See also
  380. --------
  381. str.count
  382. Examples
  383. --------
  384. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  385. >>> c
  386. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  387. >>> np.char.count(c, 'A')
  388. array([3, 1, 1])
  389. >>> np.char.count(c, 'aA')
  390. array([3, 1, 0])
  391. >>> np.char.count(c, 'A', start=1, end=4)
  392. array([2, 1, 1])
  393. >>> np.char.count(c, 'A', start=1, end=3)
  394. array([1, 0, 0])
  395. """
  396. return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
  397. def _code_dispatcher(a, encoding=None, errors=None):
  398. return (a,)
  399. @array_function_dispatch(_code_dispatcher)
  400. def decode(a, encoding=None, errors=None):
  401. """
  402. Calls `str.decode` element-wise.
  403. The set of available codecs comes from the Python standard library,
  404. and may be extended at runtime. For more information, see the
  405. :mod:`codecs` module.
  406. Parameters
  407. ----------
  408. a : array_like of str or unicode
  409. encoding : str, optional
  410. The name of an encoding
  411. errors : str, optional
  412. Specifies how to handle encoding errors
  413. Returns
  414. -------
  415. out : ndarray
  416. See also
  417. --------
  418. str.decode
  419. Notes
  420. -----
  421. The type of the result will depend on the encoding specified.
  422. Examples
  423. --------
  424. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  425. >>> c
  426. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  427. >>> np.char.encode(c, encoding='cp037')
  428. array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
  429. '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
  430. dtype='|S7')
  431. """
  432. return _to_string_or_unicode_array(
  433. _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
  434. @array_function_dispatch(_code_dispatcher)
  435. def encode(a, encoding=None, errors=None):
  436. """
  437. Calls `str.encode` element-wise.
  438. The set of available codecs comes from the Python standard library,
  439. and may be extended at runtime. For more information, see the codecs
  440. module.
  441. Parameters
  442. ----------
  443. a : array_like of str or unicode
  444. encoding : str, optional
  445. The name of an encoding
  446. errors : str, optional
  447. Specifies how to handle encoding errors
  448. Returns
  449. -------
  450. out : ndarray
  451. See also
  452. --------
  453. str.encode
  454. Notes
  455. -----
  456. The type of the result will depend on the encoding specified.
  457. """
  458. return _to_string_or_unicode_array(
  459. _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
  460. def _endswith_dispatcher(a, suffix, start=None, end=None):
  461. return (a,)
  462. @array_function_dispatch(_endswith_dispatcher)
  463. def endswith(a, suffix, start=0, end=None):
  464. """
  465. Returns a boolean array which is `True` where the string element
  466. in `a` ends with `suffix`, otherwise `False`.
  467. Calls `str.endswith` element-wise.
  468. Parameters
  469. ----------
  470. a : array_like of str or unicode
  471. suffix : str
  472. start, end : int, optional
  473. With optional `start`, test beginning at that position. With
  474. optional `end`, stop comparing at that position.
  475. Returns
  476. -------
  477. out : ndarray
  478. Outputs an array of bools.
  479. See also
  480. --------
  481. str.endswith
  482. Examples
  483. --------
  484. >>> s = np.array(['foo', 'bar'])
  485. >>> s[0] = 'foo'
  486. >>> s[1] = 'bar'
  487. >>> s
  488. array(['foo', 'bar'], dtype='<U3')
  489. >>> np.char.endswith(s, 'ar')
  490. array([False, True])
  491. >>> np.char.endswith(s, 'a', start=1, end=2)
  492. array([False, True])
  493. """
  494. return _vec_string(
  495. a, bool_, 'endswith', [suffix, start] + _clean_args(end))
  496. def _expandtabs_dispatcher(a, tabsize=None):
  497. return (a,)
  498. @array_function_dispatch(_expandtabs_dispatcher)
  499. def expandtabs(a, tabsize=8):
  500. """
  501. Return a copy of each string element where all tab characters are
  502. replaced by one or more spaces.
  503. Calls `str.expandtabs` element-wise.
  504. Return a copy of each string element where all tab characters are
  505. replaced by one or more spaces, depending on the current column
  506. and the given `tabsize`. The column number is reset to zero after
  507. each newline occurring in the string. This doesn't understand other
  508. non-printing characters or escape sequences.
  509. Parameters
  510. ----------
  511. a : array_like of str or unicode
  512. Input array
  513. tabsize : int, optional
  514. Replace tabs with `tabsize` number of spaces. If not given defaults
  515. to 8 spaces.
  516. Returns
  517. -------
  518. out : ndarray
  519. Output array of str or unicode, depending on input type
  520. See also
  521. --------
  522. str.expandtabs
  523. """
  524. return _to_string_or_unicode_array(
  525. _vec_string(a, object_, 'expandtabs', (tabsize,)))
  526. @array_function_dispatch(_count_dispatcher)
  527. def find(a, sub, start=0, end=None):
  528. """
  529. For each element, return the lowest index in the string where
  530. substring `sub` is found.
  531. Calls `str.find` element-wise.
  532. For each element, return the lowest index in the string where
  533. substring `sub` is found, such that `sub` is contained in the
  534. range [`start`, `end`].
  535. Parameters
  536. ----------
  537. a : array_like of str or unicode
  538. sub : str or unicode
  539. start, end : int, optional
  540. Optional arguments `start` and `end` are interpreted as in
  541. slice notation.
  542. Returns
  543. -------
  544. out : ndarray or int
  545. Output array of ints. Returns -1 if `sub` is not found.
  546. See also
  547. --------
  548. str.find
  549. """
  550. return _vec_string(
  551. a, integer, 'find', [sub, start] + _clean_args(end))
  552. @array_function_dispatch(_count_dispatcher)
  553. def index(a, sub, start=0, end=None):
  554. """
  555. Like `find`, but raises `ValueError` when the substring is not found.
  556. Calls `str.index` element-wise.
  557. Parameters
  558. ----------
  559. a : array_like of str or unicode
  560. sub : str or unicode
  561. start, end : int, optional
  562. Returns
  563. -------
  564. out : ndarray
  565. Output array of ints. Returns -1 if `sub` is not found.
  566. See also
  567. --------
  568. find, str.find
  569. """
  570. return _vec_string(
  571. a, integer, 'index', [sub, start] + _clean_args(end))
  572. @array_function_dispatch(_unary_op_dispatcher)
  573. def isalnum(a):
  574. """
  575. Returns true for each element if all characters in the string are
  576. alphanumeric and there is at least one character, false otherwise.
  577. Calls `str.isalnum` element-wise.
  578. For 8-bit strings, this method is locale-dependent.
  579. Parameters
  580. ----------
  581. a : array_like of str or unicode
  582. Returns
  583. -------
  584. out : ndarray
  585. Output array of str or unicode, depending on input type
  586. See also
  587. --------
  588. str.isalnum
  589. """
  590. return _vec_string(a, bool_, 'isalnum')
  591. @array_function_dispatch(_unary_op_dispatcher)
  592. def isalpha(a):
  593. """
  594. Returns true for each element if all characters in the string are
  595. alphabetic and there is at least one character, false otherwise.
  596. Calls `str.isalpha` element-wise.
  597. For 8-bit strings, this method is locale-dependent.
  598. Parameters
  599. ----------
  600. a : array_like of str or unicode
  601. Returns
  602. -------
  603. out : ndarray
  604. Output array of bools
  605. See also
  606. --------
  607. str.isalpha
  608. """
  609. return _vec_string(a, bool_, 'isalpha')
  610. @array_function_dispatch(_unary_op_dispatcher)
  611. def isdigit(a):
  612. """
  613. Returns true for each element if all characters in the string are
  614. digits and there is at least one character, false otherwise.
  615. Calls `str.isdigit` element-wise.
  616. For 8-bit strings, this method is locale-dependent.
  617. Parameters
  618. ----------
  619. a : array_like of str or unicode
  620. Returns
  621. -------
  622. out : ndarray
  623. Output array of bools
  624. See also
  625. --------
  626. str.isdigit
  627. """
  628. return _vec_string(a, bool_, 'isdigit')
  629. @array_function_dispatch(_unary_op_dispatcher)
  630. def islower(a):
  631. """
  632. Returns true for each element if all cased characters in the
  633. string are lowercase and there is at least one cased character,
  634. false otherwise.
  635. Calls `str.islower` element-wise.
  636. For 8-bit strings, this method is locale-dependent.
  637. Parameters
  638. ----------
  639. a : array_like of str or unicode
  640. Returns
  641. -------
  642. out : ndarray
  643. Output array of bools
  644. See also
  645. --------
  646. str.islower
  647. """
  648. return _vec_string(a, bool_, 'islower')
  649. @array_function_dispatch(_unary_op_dispatcher)
  650. def isspace(a):
  651. """
  652. Returns true for each element if there are only whitespace
  653. characters in the string and there is at least one character,
  654. false otherwise.
  655. Calls `str.isspace` element-wise.
  656. For 8-bit strings, this method is locale-dependent.
  657. Parameters
  658. ----------
  659. a : array_like of str or unicode
  660. Returns
  661. -------
  662. out : ndarray
  663. Output array of bools
  664. See also
  665. --------
  666. str.isspace
  667. """
  668. return _vec_string(a, bool_, 'isspace')
  669. @array_function_dispatch(_unary_op_dispatcher)
  670. def istitle(a):
  671. """
  672. Returns true for each element if the element is a titlecased
  673. string and there is at least one character, false otherwise.
  674. Call `str.istitle` element-wise.
  675. For 8-bit strings, this method is locale-dependent.
  676. Parameters
  677. ----------
  678. a : array_like of str or unicode
  679. Returns
  680. -------
  681. out : ndarray
  682. Output array of bools
  683. See also
  684. --------
  685. str.istitle
  686. """
  687. return _vec_string(a, bool_, 'istitle')
  688. @array_function_dispatch(_unary_op_dispatcher)
  689. def isupper(a):
  690. """
  691. Returns true for each element if all cased characters in the
  692. string are uppercase and there is at least one character, false
  693. otherwise.
  694. Call `str.isupper` element-wise.
  695. For 8-bit strings, this method is locale-dependent.
  696. Parameters
  697. ----------
  698. a : array_like of str or unicode
  699. Returns
  700. -------
  701. out : ndarray
  702. Output array of bools
  703. See also
  704. --------
  705. str.isupper
  706. """
  707. return _vec_string(a, bool_, 'isupper')
  708. def _join_dispatcher(sep, seq):
  709. return (sep, seq)
  710. @array_function_dispatch(_join_dispatcher)
  711. def join(sep, seq):
  712. """
  713. Return a string which is the concatenation of the strings in the
  714. sequence `seq`.
  715. Calls `str.join` element-wise.
  716. Parameters
  717. ----------
  718. sep : array_like of str or unicode
  719. seq : array_like of str or unicode
  720. Returns
  721. -------
  722. out : ndarray
  723. Output array of str or unicode, depending on input types
  724. See also
  725. --------
  726. str.join
  727. """
  728. return _to_string_or_unicode_array(
  729. _vec_string(sep, object_, 'join', (seq,)))
  730. def _just_dispatcher(a, width, fillchar=None):
  731. return (a,)
  732. @array_function_dispatch(_just_dispatcher)
  733. def ljust(a, width, fillchar=' '):
  734. """
  735. Return an array with the elements of `a` left-justified in a
  736. string of length `width`.
  737. Calls `str.ljust` element-wise.
  738. Parameters
  739. ----------
  740. a : array_like of str or unicode
  741. width : int
  742. The length of the resulting strings
  743. fillchar : str or unicode, optional
  744. The character to use for padding
  745. Returns
  746. -------
  747. out : ndarray
  748. Output array of str or unicode, depending on input type
  749. See also
  750. --------
  751. str.ljust
  752. """
  753. a_arr = numpy.asarray(a)
  754. width_arr = numpy.asarray(width)
  755. size = long(numpy.max(width_arr.flat))
  756. if numpy.issubdtype(a_arr.dtype, numpy.string_):
  757. fillchar = asbytes(fillchar)
  758. return _vec_string(
  759. a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
  760. @array_function_dispatch(_unary_op_dispatcher)
  761. def lower(a):
  762. """
  763. Return an array with the elements converted to lowercase.
  764. Call `str.lower` element-wise.
  765. For 8-bit strings, this method is locale-dependent.
  766. Parameters
  767. ----------
  768. a : array_like, {str, unicode}
  769. Input array.
  770. Returns
  771. -------
  772. out : ndarray, {str, unicode}
  773. Output array of str or unicode, depending on input type
  774. See also
  775. --------
  776. str.lower
  777. Examples
  778. --------
  779. >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
  780. array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
  781. >>> np.char.lower(c)
  782. array(['a1b c', '1bca', 'bca1'], dtype='<U5')
  783. """
  784. a_arr = numpy.asarray(a)
  785. return _vec_string(a_arr, a_arr.dtype, 'lower')
  786. def _strip_dispatcher(a, chars=None):
  787. return (a,)
  788. @array_function_dispatch(_strip_dispatcher)
  789. def lstrip(a, chars=None):
  790. """
  791. For each element in `a`, return a copy with the leading characters
  792. removed.
  793. Calls `str.lstrip` element-wise.
  794. Parameters
  795. ----------
  796. a : array-like, {str, unicode}
  797. Input array.
  798. chars : {str, unicode}, optional
  799. The `chars` argument is a string specifying the set of
  800. characters to be removed. If omitted or None, the `chars`
  801. argument defaults to removing whitespace. The `chars` argument
  802. is not a prefix; rather, all combinations of its values are
  803. stripped.
  804. Returns
  805. -------
  806. out : ndarray, {str, unicode}
  807. Output array of str or unicode, depending on input type
  808. See also
  809. --------
  810. str.lstrip
  811. Examples
  812. --------
  813. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  814. >>> c
  815. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  816. The 'a' variable is unstripped from c[1] because whitespace leading.
  817. >>> np.char.lstrip(c, 'a')
  818. array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
  819. >>> np.char.lstrip(c, 'A') # leaves c unchanged
  820. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  821. >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
  822. ... # XXX: is this a regression? This used to return True
  823. ... # np.char.lstrip(c,'') does not modify c at all.
  824. False
  825. >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
  826. True
  827. """
  828. a_arr = numpy.asarray(a)
  829. return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
  830. def _partition_dispatcher(a, sep):
  831. return (a,)
  832. @array_function_dispatch(_partition_dispatcher)
  833. def partition(a, sep):
  834. """
  835. Partition each element in `a` around `sep`.
  836. Calls `str.partition` element-wise.
  837. For each element in `a`, split the element as the first
  838. occurrence of `sep`, and return 3 strings containing the part
  839. before the separator, the separator itself, and the part after
  840. the separator. If the separator is not found, return 3 strings
  841. containing the string itself, followed by two empty strings.
  842. Parameters
  843. ----------
  844. a : array_like, {str, unicode}
  845. Input array
  846. sep : {str, unicode}
  847. Separator to split each string element in `a`.
  848. Returns
  849. -------
  850. out : ndarray, {str, unicode}
  851. Output array of str or unicode, depending on input type.
  852. The output array will have an extra dimension with 3
  853. elements per input element.
  854. See also
  855. --------
  856. str.partition
  857. """
  858. return _to_string_or_unicode_array(
  859. _vec_string(a, object_, 'partition', (sep,)))
  860. def _replace_dispatcher(a, old, new, count=None):
  861. return (a,)
  862. @array_function_dispatch(_replace_dispatcher)
  863. def replace(a, old, new, count=None):
  864. """
  865. For each element in `a`, return a copy of the string with all
  866. occurrences of substring `old` replaced by `new`.
  867. Calls `str.replace` element-wise.
  868. Parameters
  869. ----------
  870. a : array-like of str or unicode
  871. old, new : str or unicode
  872. count : int, optional
  873. If the optional argument `count` is given, only the first
  874. `count` occurrences are replaced.
  875. Returns
  876. -------
  877. out : ndarray
  878. Output array of str or unicode, depending on input type
  879. See also
  880. --------
  881. str.replace
  882. """
  883. return _to_string_or_unicode_array(
  884. _vec_string(
  885. a, object_, 'replace', [old, new] + _clean_args(count)))
  886. @array_function_dispatch(_count_dispatcher)
  887. def rfind(a, sub, start=0, end=None):
  888. """
  889. For each element in `a`, return the highest index in the string
  890. where substring `sub` is found, such that `sub` is contained
  891. within [`start`, `end`].
  892. Calls `str.rfind` element-wise.
  893. Parameters
  894. ----------
  895. a : array-like of str or unicode
  896. sub : str or unicode
  897. start, end : int, optional
  898. Optional arguments `start` and `end` are interpreted as in
  899. slice notation.
  900. Returns
  901. -------
  902. out : ndarray
  903. Output array of ints. Return -1 on failure.
  904. See also
  905. --------
  906. str.rfind
  907. """
  908. return _vec_string(
  909. a, integer, 'rfind', [sub, start] + _clean_args(end))
  910. @array_function_dispatch(_count_dispatcher)
  911. def rindex(a, sub, start=0, end=None):
  912. """
  913. Like `rfind`, but raises `ValueError` when the substring `sub` is
  914. not found.
  915. Calls `str.rindex` element-wise.
  916. Parameters
  917. ----------
  918. a : array-like of str or unicode
  919. sub : str or unicode
  920. start, end : int, optional
  921. Returns
  922. -------
  923. out : ndarray
  924. Output array of ints.
  925. See also
  926. --------
  927. rfind, str.rindex
  928. """
  929. return _vec_string(
  930. a, integer, 'rindex', [sub, start] + _clean_args(end))
  931. @array_function_dispatch(_just_dispatcher)
  932. def rjust(a, width, fillchar=' '):
  933. """
  934. Return an array with the elements of `a` right-justified in a
  935. string of length `width`.
  936. Calls `str.rjust` element-wise.
  937. Parameters
  938. ----------
  939. a : array_like of str or unicode
  940. width : int
  941. The length of the resulting strings
  942. fillchar : str or unicode, optional
  943. The character to use for padding
  944. Returns
  945. -------
  946. out : ndarray
  947. Output array of str or unicode, depending on input type
  948. See also
  949. --------
  950. str.rjust
  951. """
  952. a_arr = numpy.asarray(a)
  953. width_arr = numpy.asarray(width)
  954. size = long(numpy.max(width_arr.flat))
  955. if numpy.issubdtype(a_arr.dtype, numpy.string_):
  956. fillchar = asbytes(fillchar)
  957. return _vec_string(
  958. a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
  959. @array_function_dispatch(_partition_dispatcher)
  960. def rpartition(a, sep):
  961. """
  962. Partition (split) each element around the right-most separator.
  963. Calls `str.rpartition` element-wise.
  964. For each element in `a`, split the element as the last
  965. occurrence of `sep`, and return 3 strings containing the part
  966. before the separator, the separator itself, and the part after
  967. the separator. If the separator is not found, return 3 strings
  968. containing the string itself, followed by two empty strings.
  969. Parameters
  970. ----------
  971. a : array_like of str or unicode
  972. Input array
  973. sep : str or unicode
  974. Right-most separator to split each element in array.
  975. Returns
  976. -------
  977. out : ndarray
  978. Output array of string or unicode, depending on input
  979. type. The output array will have an extra dimension with
  980. 3 elements per input element.
  981. See also
  982. --------
  983. str.rpartition
  984. """
  985. return _to_string_or_unicode_array(
  986. _vec_string(a, object_, 'rpartition', (sep,)))
  987. def _split_dispatcher(a, sep=None, maxsplit=None):
  988. return (a,)
  989. @array_function_dispatch(_split_dispatcher)
  990. def rsplit(a, sep=None, maxsplit=None):
  991. """
  992. For each element in `a`, return a list of the words in the
  993. string, using `sep` as the delimiter string.
  994. Calls `str.rsplit` element-wise.
  995. Except for splitting from the right, `rsplit`
  996. behaves like `split`.
  997. Parameters
  998. ----------
  999. a : array_like of str or unicode
  1000. sep : str or unicode, optional
  1001. If `sep` is not specified or None, any whitespace string
  1002. is a separator.
  1003. maxsplit : int, optional
  1004. If `maxsplit` is given, at most `maxsplit` splits are done,
  1005. the rightmost ones.
  1006. Returns
  1007. -------
  1008. out : ndarray
  1009. Array of list objects
  1010. See also
  1011. --------
  1012. str.rsplit, split
  1013. """
  1014. # This will return an array of lists of different sizes, so we
  1015. # leave it as an object array
  1016. return _vec_string(
  1017. a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
  1018. def _strip_dispatcher(a, chars=None):
  1019. return (a,)
  1020. @array_function_dispatch(_strip_dispatcher)
  1021. def rstrip(a, chars=None):
  1022. """
  1023. For each element in `a`, return a copy with the trailing
  1024. characters removed.
  1025. Calls `str.rstrip` element-wise.
  1026. Parameters
  1027. ----------
  1028. a : array-like of str or unicode
  1029. chars : str or unicode, optional
  1030. The `chars` argument is a string specifying the set of
  1031. characters to be removed. If omitted or None, the `chars`
  1032. argument defaults to removing whitespace. The `chars` argument
  1033. is not a suffix; rather, all combinations of its values are
  1034. stripped.
  1035. Returns
  1036. -------
  1037. out : ndarray
  1038. Output array of str or unicode, depending on input type
  1039. See also
  1040. --------
  1041. str.rstrip
  1042. Examples
  1043. --------
  1044. >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
  1045. array(['aAaAaA', 'abBABba'],
  1046. dtype='|S7')
  1047. >>> np.char.rstrip(c, b'a')
  1048. array(['aAaAaA', 'abBABb'],
  1049. dtype='|S7')
  1050. >>> np.char.rstrip(c, b'A')
  1051. array(['aAaAa', 'abBABba'],
  1052. dtype='|S7')
  1053. """
  1054. a_arr = numpy.asarray(a)
  1055. return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
  1056. @array_function_dispatch(_split_dispatcher)
  1057. def split(a, sep=None, maxsplit=None):
  1058. """
  1059. For each element in `a`, return a list of the words in the
  1060. string, using `sep` as the delimiter string.
  1061. Calls `str.split` element-wise.
  1062. Parameters
  1063. ----------
  1064. a : array_like of str or unicode
  1065. sep : str or unicode, optional
  1066. If `sep` is not specified or None, any whitespace string is a
  1067. separator.
  1068. maxsplit : int, optional
  1069. If `maxsplit` is given, at most `maxsplit` splits are done.
  1070. Returns
  1071. -------
  1072. out : ndarray
  1073. Array of list objects
  1074. See also
  1075. --------
  1076. str.split, rsplit
  1077. """
  1078. # This will return an array of lists of different sizes, so we
  1079. # leave it as an object array
  1080. return _vec_string(
  1081. a, object_, 'split', [sep] + _clean_args(maxsplit))
  1082. def _splitlines_dispatcher(a, keepends=None):
  1083. return (a,)
  1084. @array_function_dispatch(_splitlines_dispatcher)
  1085. def splitlines(a, keepends=None):
  1086. """
  1087. For each element in `a`, return a list of the lines in the
  1088. element, breaking at line boundaries.
  1089. Calls `str.splitlines` element-wise.
  1090. Parameters
  1091. ----------
  1092. a : array_like of str or unicode
  1093. keepends : bool, optional
  1094. Line breaks are not included in the resulting list unless
  1095. keepends is given and true.
  1096. Returns
  1097. -------
  1098. out : ndarray
  1099. Array of list objects
  1100. See also
  1101. --------
  1102. str.splitlines
  1103. """
  1104. return _vec_string(
  1105. a, object_, 'splitlines', _clean_args(keepends))
  1106. def _startswith_dispatcher(a, prefix, start=None, end=None):
  1107. return (a,)
  1108. @array_function_dispatch(_startswith_dispatcher)
  1109. def startswith(a, prefix, start=0, end=None):
  1110. """
  1111. Returns a boolean array which is `True` where the string element
  1112. in `a` starts with `prefix`, otherwise `False`.
  1113. Calls `str.startswith` element-wise.
  1114. Parameters
  1115. ----------
  1116. a : array_like of str or unicode
  1117. prefix : str
  1118. start, end : int, optional
  1119. With optional `start`, test beginning at that position. With
  1120. optional `end`, stop comparing at that position.
  1121. Returns
  1122. -------
  1123. out : ndarray
  1124. Array of booleans
  1125. See also
  1126. --------
  1127. str.startswith
  1128. """
  1129. return _vec_string(
  1130. a, bool_, 'startswith', [prefix, start] + _clean_args(end))
  1131. @array_function_dispatch(_strip_dispatcher)
  1132. def strip(a, chars=None):
  1133. """
  1134. For each element in `a`, return a copy with the leading and
  1135. trailing characters removed.
  1136. Calls `str.strip` element-wise.
  1137. Parameters
  1138. ----------
  1139. a : array-like of str or unicode
  1140. chars : str or unicode, optional
  1141. The `chars` argument is a string specifying the set of
  1142. characters to be removed. If omitted or None, the `chars`
  1143. argument defaults to removing whitespace. The `chars` argument
  1144. is not a prefix or suffix; rather, all combinations of its
  1145. values are stripped.
  1146. Returns
  1147. -------
  1148. out : ndarray
  1149. Output array of str or unicode, depending on input type
  1150. See also
  1151. --------
  1152. str.strip
  1153. Examples
  1154. --------
  1155. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  1156. >>> c
  1157. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  1158. >>> np.char.strip(c)
  1159. array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
  1160. >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
  1161. array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
  1162. >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
  1163. array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
  1164. """
  1165. a_arr = numpy.asarray(a)
  1166. return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
  1167. @array_function_dispatch(_unary_op_dispatcher)
  1168. def swapcase(a):
  1169. """
  1170. Return element-wise a copy of the string with
  1171. uppercase characters converted to lowercase and vice versa.
  1172. Calls `str.swapcase` element-wise.
  1173. For 8-bit strings, this method is locale-dependent.
  1174. Parameters
  1175. ----------
  1176. a : array_like, {str, unicode}
  1177. Input array.
  1178. Returns
  1179. -------
  1180. out : ndarray, {str, unicode}
  1181. Output array of str or unicode, depending on input type
  1182. See also
  1183. --------
  1184. str.swapcase
  1185. Examples
  1186. --------
  1187. >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
  1188. array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
  1189. dtype='|S5')
  1190. >>> np.char.swapcase(c)
  1191. array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
  1192. dtype='|S5')
  1193. """
  1194. a_arr = numpy.asarray(a)
  1195. return _vec_string(a_arr, a_arr.dtype, 'swapcase')
  1196. @array_function_dispatch(_unary_op_dispatcher)
  1197. def title(a):
  1198. """
  1199. Return element-wise title cased version of string or unicode.
  1200. Title case words start with uppercase characters, all remaining cased
  1201. characters are lowercase.
  1202. Calls `str.title` element-wise.
  1203. For 8-bit strings, this method is locale-dependent.
  1204. Parameters
  1205. ----------
  1206. a : array_like, {str, unicode}
  1207. Input array.
  1208. Returns
  1209. -------
  1210. out : ndarray
  1211. Output array of str or unicode, depending on input type
  1212. See also
  1213. --------
  1214. str.title
  1215. Examples
  1216. --------
  1217. >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
  1218. array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
  1219. dtype='|S5')
  1220. >>> np.char.title(c)
  1221. array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
  1222. dtype='|S5')
  1223. """
  1224. a_arr = numpy.asarray(a)
  1225. return _vec_string(a_arr, a_arr.dtype, 'title')
  1226. def _translate_dispatcher(a, table, deletechars=None):
  1227. return (a,)
  1228. @array_function_dispatch(_translate_dispatcher)
  1229. def translate(a, table, deletechars=None):
  1230. """
  1231. For each element in `a`, return a copy of the string where all
  1232. characters occurring in the optional argument `deletechars` are
  1233. removed, and the remaining characters have been mapped through the
  1234. given translation table.
  1235. Calls `str.translate` element-wise.
  1236. Parameters
  1237. ----------
  1238. a : array-like of str or unicode
  1239. table : str of length 256
  1240. deletechars : str
  1241. Returns
  1242. -------
  1243. out : ndarray
  1244. Output array of str or unicode, depending on input type
  1245. See also
  1246. --------
  1247. str.translate
  1248. """
  1249. a_arr = numpy.asarray(a)
  1250. if issubclass(a_arr.dtype.type, unicode_):
  1251. return _vec_string(
  1252. a_arr, a_arr.dtype, 'translate', (table,))
  1253. else:
  1254. return _vec_string(
  1255. a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
  1256. @array_function_dispatch(_unary_op_dispatcher)
  1257. def upper(a):
  1258. """
  1259. Return an array with the elements converted to uppercase.
  1260. Calls `str.upper` element-wise.
  1261. For 8-bit strings, this method is locale-dependent.
  1262. Parameters
  1263. ----------
  1264. a : array_like, {str, unicode}
  1265. Input array.
  1266. Returns
  1267. -------
  1268. out : ndarray, {str, unicode}
  1269. Output array of str or unicode, depending on input type
  1270. See also
  1271. --------
  1272. str.upper
  1273. Examples
  1274. --------
  1275. >>> c = np.array(['a1b c', '1bca', 'bca1']); c
  1276. array(['a1b c', '1bca', 'bca1'], dtype='<U5')
  1277. >>> np.char.upper(c)
  1278. array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
  1279. """
  1280. a_arr = numpy.asarray(a)
  1281. return _vec_string(a_arr, a_arr.dtype, 'upper')
  1282. def _zfill_dispatcher(a, width):
  1283. return (a,)
  1284. @array_function_dispatch(_zfill_dispatcher)
  1285. def zfill(a, width):
  1286. """
  1287. Return the numeric string left-filled with zeros
  1288. Calls `str.zfill` element-wise.
  1289. Parameters
  1290. ----------
  1291. a : array_like, {str, unicode}
  1292. Input array.
  1293. width : int
  1294. Width of string to left-fill elements in `a`.
  1295. Returns
  1296. -------
  1297. out : ndarray, {str, unicode}
  1298. Output array of str or unicode, depending on input type
  1299. See also
  1300. --------
  1301. str.zfill
  1302. """
  1303. a_arr = numpy.asarray(a)
  1304. width_arr = numpy.asarray(width)
  1305. size = long(numpy.max(width_arr.flat))
  1306. return _vec_string(
  1307. a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
  1308. @array_function_dispatch(_unary_op_dispatcher)
  1309. def isnumeric(a):
  1310. """
  1311. For each element, return True if there are only numeric
  1312. characters in the element.
  1313. Calls `unicode.isnumeric` element-wise.
  1314. Numeric characters include digit characters, and all characters
  1315. that have the Unicode numeric value property, e.g. ``U+2155,
  1316. VULGAR FRACTION ONE FIFTH``.
  1317. Parameters
  1318. ----------
  1319. a : array_like, unicode
  1320. Input array.
  1321. Returns
  1322. -------
  1323. out : ndarray, bool
  1324. Array of booleans of same shape as `a`.
  1325. See also
  1326. --------
  1327. unicode.isnumeric
  1328. """
  1329. if _use_unicode(a) != unicode_:
  1330. raise TypeError("isnumeric is only available for Unicode strings and arrays")
  1331. return _vec_string(a, bool_, 'isnumeric')
  1332. @array_function_dispatch(_unary_op_dispatcher)
  1333. def isdecimal(a):
  1334. """
  1335. For each element, return True if there are only decimal
  1336. characters in the element.
  1337. Calls `unicode.isdecimal` element-wise.
  1338. Decimal characters include digit characters, and all characters
  1339. that that can be used to form decimal-radix numbers,
  1340. e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
  1341. Parameters
  1342. ----------
  1343. a : array_like, unicode
  1344. Input array.
  1345. Returns
  1346. -------
  1347. out : ndarray, bool
  1348. Array of booleans identical in shape to `a`.
  1349. See also
  1350. --------
  1351. unicode.isdecimal
  1352. """
  1353. if _use_unicode(a) != unicode_:
  1354. raise TypeError("isnumeric is only available for Unicode strings and arrays")
  1355. return _vec_string(a, bool_, 'isdecimal')
  1356. @set_module('numpy')
  1357. class chararray(ndarray):
  1358. """
  1359. chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
  1360. strides=None, order=None)
  1361. Provides a convenient view on arrays of string and unicode values.
  1362. .. note::
  1363. The `chararray` class exists for backwards compatibility with
  1364. Numarray, it is not recommended for new development. Starting from numpy
  1365. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  1366. `dtype` `object_`, `string_` or `unicode_`, and use the free functions
  1367. in the `numpy.char` module for fast vectorized string operations.
  1368. Versus a regular NumPy array of type `str` or `unicode`, this
  1369. class adds the following functionality:
  1370. 1) values automatically have whitespace removed from the end
  1371. when indexed
  1372. 2) comparison operators automatically remove whitespace from the
  1373. end when comparing values
  1374. 3) vectorized string operations are provided as methods
  1375. (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
  1376. chararrays should be created using `numpy.char.array` or
  1377. `numpy.char.asarray`, rather than this constructor directly.
  1378. This constructor creates the array, using `buffer` (with `offset`
  1379. and `strides`) if it is not ``None``. If `buffer` is ``None``, then
  1380. constructs a new array with `strides` in "C order", unless both
  1381. ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
  1382. is in "Fortran order".
  1383. Methods
  1384. -------
  1385. astype
  1386. argsort
  1387. copy
  1388. count
  1389. decode
  1390. dump
  1391. dumps
  1392. encode
  1393. endswith
  1394. expandtabs
  1395. fill
  1396. find
  1397. flatten
  1398. getfield
  1399. index
  1400. isalnum
  1401. isalpha
  1402. isdecimal
  1403. isdigit
  1404. islower
  1405. isnumeric
  1406. isspace
  1407. istitle
  1408. isupper
  1409. item
  1410. join
  1411. ljust
  1412. lower
  1413. lstrip
  1414. nonzero
  1415. put
  1416. ravel
  1417. repeat
  1418. replace
  1419. reshape
  1420. resize
  1421. rfind
  1422. rindex
  1423. rjust
  1424. rsplit
  1425. rstrip
  1426. searchsorted
  1427. setfield
  1428. setflags
  1429. sort
  1430. split
  1431. splitlines
  1432. squeeze
  1433. startswith
  1434. strip
  1435. swapaxes
  1436. swapcase
  1437. take
  1438. title
  1439. tofile
  1440. tolist
  1441. tostring
  1442. translate
  1443. transpose
  1444. upper
  1445. view
  1446. zfill
  1447. Parameters
  1448. ----------
  1449. shape : tuple
  1450. Shape of the array.
  1451. itemsize : int, optional
  1452. Length of each array element, in number of characters. Default is 1.
  1453. unicode : bool, optional
  1454. Are the array elements of type unicode (True) or string (False).
  1455. Default is False.
  1456. buffer : int, optional
  1457. Memory address of the start of the array data. Default is None,
  1458. in which case a new array is created.
  1459. offset : int, optional
  1460. Fixed stride displacement from the beginning of an axis?
  1461. Default is 0. Needs to be >=0.
  1462. strides : array_like of ints, optional
  1463. Strides for the array (see `ndarray.strides` for full description).
  1464. Default is None.
  1465. order : {'C', 'F'}, optional
  1466. The order in which the array data is stored in memory: 'C' ->
  1467. "row major" order (the default), 'F' -> "column major"
  1468. (Fortran) order.
  1469. Examples
  1470. --------
  1471. >>> charar = np.chararray((3, 3))
  1472. >>> charar[:] = 'a'
  1473. >>> charar
  1474. chararray([[b'a', b'a', b'a'],
  1475. [b'a', b'a', b'a'],
  1476. [b'a', b'a', b'a']], dtype='|S1')
  1477. >>> charar = np.chararray(charar.shape, itemsize=5)
  1478. >>> charar[:] = 'abc'
  1479. >>> charar
  1480. chararray([[b'abc', b'abc', b'abc'],
  1481. [b'abc', b'abc', b'abc'],
  1482. [b'abc', b'abc', b'abc']], dtype='|S5')
  1483. """
  1484. def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
  1485. offset=0, strides=None, order='C'):
  1486. global _globalvar
  1487. if unicode:
  1488. dtype = unicode_
  1489. else:
  1490. dtype = string_
  1491. # force itemsize to be a Python long, since using NumPy integer
  1492. # types results in itemsize.itemsize being used as the size of
  1493. # strings in the new array.
  1494. itemsize = long(itemsize)
  1495. if sys.version_info[0] >= 3 and isinstance(buffer, _unicode):
  1496. # On Py3, unicode objects do not have the buffer interface
  1497. filler = buffer
  1498. buffer = None
  1499. else:
  1500. filler = None
  1501. _globalvar = 1
  1502. if buffer is None:
  1503. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  1504. order=order)
  1505. else:
  1506. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  1507. buffer=buffer,
  1508. offset=offset, strides=strides,
  1509. order=order)
  1510. if filler is not None:
  1511. self[...] = filler
  1512. _globalvar = 0
  1513. return self
  1514. def __array_finalize__(self, obj):
  1515. # The b is a special case because it is used for reconstructing.
  1516. if not _globalvar and self.dtype.char not in 'SUbc':
  1517. raise ValueError("Can only create a chararray from string data.")
  1518. def __getitem__(self, obj):
  1519. val = ndarray.__getitem__(self, obj)
  1520. if isinstance(val, character):
  1521. temp = val.rstrip()
  1522. if _len(temp) == 0:
  1523. val = ''
  1524. else:
  1525. val = temp
  1526. return val
  1527. # IMPLEMENTATION NOTE: Most of the methods of this class are
  1528. # direct delegations to the free functions in this module.
  1529. # However, those that return an array of strings should instead
  1530. # return a chararray, so some extra wrapping is required.
  1531. def __eq__(self, other):
  1532. """
  1533. Return (self == other) element-wise.
  1534. See also
  1535. --------
  1536. equal
  1537. """
  1538. return equal(self, other)
  1539. def __ne__(self, other):
  1540. """
  1541. Return (self != other) element-wise.
  1542. See also
  1543. --------
  1544. not_equal
  1545. """
  1546. return not_equal(self, other)
  1547. def __ge__(self, other):
  1548. """
  1549. Return (self >= other) element-wise.
  1550. See also
  1551. --------
  1552. greater_equal
  1553. """
  1554. return greater_equal(self, other)
  1555. def __le__(self, other):
  1556. """
  1557. Return (self <= other) element-wise.
  1558. See also
  1559. --------
  1560. less_equal
  1561. """
  1562. return less_equal(self, other)
  1563. def __gt__(self, other):
  1564. """
  1565. Return (self > other) element-wise.
  1566. See also
  1567. --------
  1568. greater
  1569. """
  1570. return greater(self, other)
  1571. def __lt__(self, other):
  1572. """
  1573. Return (self < other) element-wise.
  1574. See also
  1575. --------
  1576. less
  1577. """
  1578. return less(self, other)
  1579. def __add__(self, other):
  1580. """
  1581. Return (self + other), that is string concatenation,
  1582. element-wise for a pair of array_likes of str or unicode.
  1583. See also
  1584. --------
  1585. add
  1586. """
  1587. return asarray(add(self, other))
  1588. def __radd__(self, other):
  1589. """
  1590. Return (other + self), that is string concatenation,
  1591. element-wise for a pair of array_likes of `string_` or `unicode_`.
  1592. See also
  1593. --------
  1594. add
  1595. """
  1596. return asarray(add(numpy.asarray(other), self))
  1597. def __mul__(self, i):
  1598. """
  1599. Return (self * i), that is string multiple concatenation,
  1600. element-wise.
  1601. See also
  1602. --------
  1603. multiply
  1604. """
  1605. return asarray(multiply(self, i))
  1606. def __rmul__(self, i):
  1607. """
  1608. Return (self * i), that is string multiple concatenation,
  1609. element-wise.
  1610. See also
  1611. --------
  1612. multiply
  1613. """
  1614. return asarray(multiply(self, i))
  1615. def __mod__(self, i):
  1616. """
  1617. Return (self % i), that is pre-Python 2.6 string formatting
  1618. (iterpolation), element-wise for a pair of array_likes of `string_`
  1619. or `unicode_`.
  1620. See also
  1621. --------
  1622. mod
  1623. """
  1624. return asarray(mod(self, i))
  1625. def __rmod__(self, other):
  1626. return NotImplemented
  1627. def argsort(self, axis=-1, kind=None, order=None):
  1628. """
  1629. Return the indices that sort the array lexicographically.
  1630. For full documentation see `numpy.argsort`, for which this method is
  1631. in fact merely a "thin wrapper."
  1632. Examples
  1633. --------
  1634. >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
  1635. >>> c = c.view(np.chararray); c
  1636. chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
  1637. dtype='|S5')
  1638. >>> c[c.argsort()]
  1639. chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
  1640. dtype='|S5')
  1641. """
  1642. return self.__array__().argsort(axis, kind, order)
  1643. argsort.__doc__ = ndarray.argsort.__doc__
  1644. def capitalize(self):
  1645. """
  1646. Return a copy of `self` with only the first character of each element
  1647. capitalized.
  1648. See also
  1649. --------
  1650. char.capitalize
  1651. """
  1652. return asarray(capitalize(self))
  1653. def center(self, width, fillchar=' '):
  1654. """
  1655. Return a copy of `self` with its elements centered in a
  1656. string of length `width`.
  1657. See also
  1658. --------
  1659. center
  1660. """
  1661. return asarray(center(self, width, fillchar))
  1662. def count(self, sub, start=0, end=None):
  1663. """
  1664. Returns an array with the number of non-overlapping occurrences of
  1665. substring `sub` in the range [`start`, `end`].
  1666. See also
  1667. --------
  1668. char.count
  1669. """
  1670. return count(self, sub, start, end)
  1671. def decode(self, encoding=None, errors=None):
  1672. """
  1673. Calls `str.decode` element-wise.
  1674. See also
  1675. --------
  1676. char.decode
  1677. """
  1678. return decode(self, encoding, errors)
  1679. def encode(self, encoding=None, errors=None):
  1680. """
  1681. Calls `str.encode` element-wise.
  1682. See also
  1683. --------
  1684. char.encode
  1685. """
  1686. return encode(self, encoding, errors)
  1687. def endswith(self, suffix, start=0, end=None):
  1688. """
  1689. Returns a boolean array which is `True` where the string element
  1690. in `self` ends with `suffix`, otherwise `False`.
  1691. See also
  1692. --------
  1693. char.endswith
  1694. """
  1695. return endswith(self, suffix, start, end)
  1696. def expandtabs(self, tabsize=8):
  1697. """
  1698. Return a copy of each string element where all tab characters are
  1699. replaced by one or more spaces.
  1700. See also
  1701. --------
  1702. char.expandtabs
  1703. """
  1704. return asarray(expandtabs(self, tabsize))
  1705. def find(self, sub, start=0, end=None):
  1706. """
  1707. For each element, return the lowest index in the string where
  1708. substring `sub` is found.
  1709. See also
  1710. --------
  1711. char.find
  1712. """
  1713. return find(self, sub, start, end)
  1714. def index(self, sub, start=0, end=None):
  1715. """
  1716. Like `find`, but raises `ValueError` when the substring is not found.
  1717. See also
  1718. --------
  1719. char.index
  1720. """
  1721. return index(self, sub, start, end)
  1722. def isalnum(self):
  1723. """
  1724. Returns true for each element if all characters in the string
  1725. are alphanumeric and there is at least one character, false
  1726. otherwise.
  1727. See also
  1728. --------
  1729. char.isalnum
  1730. """
  1731. return isalnum(self)
  1732. def isalpha(self):
  1733. """
  1734. Returns true for each element if all characters in the string
  1735. are alphabetic and there is at least one character, false
  1736. otherwise.
  1737. See also
  1738. --------
  1739. char.isalpha
  1740. """
  1741. return isalpha(self)
  1742. def isdigit(self):
  1743. """
  1744. Returns true for each element if all characters in the string are
  1745. digits and there is at least one character, false otherwise.
  1746. See also
  1747. --------
  1748. char.isdigit
  1749. """
  1750. return isdigit(self)
  1751. def islower(self):
  1752. """
  1753. Returns true for each element if all cased characters in the
  1754. string are lowercase and there is at least one cased character,
  1755. false otherwise.
  1756. See also
  1757. --------
  1758. char.islower
  1759. """
  1760. return islower(self)
  1761. def isspace(self):
  1762. """
  1763. Returns true for each element if there are only whitespace
  1764. characters in the string and there is at least one character,
  1765. false otherwise.
  1766. See also
  1767. --------
  1768. char.isspace
  1769. """
  1770. return isspace(self)
  1771. def istitle(self):
  1772. """
  1773. Returns true for each element if the element is a titlecased
  1774. string and there is at least one character, false otherwise.
  1775. See also
  1776. --------
  1777. char.istitle
  1778. """
  1779. return istitle(self)
  1780. def isupper(self):
  1781. """
  1782. Returns true for each element if all cased characters in the
  1783. string are uppercase and there is at least one character, false
  1784. otherwise.
  1785. See also
  1786. --------
  1787. char.isupper
  1788. """
  1789. return isupper(self)
  1790. def join(self, seq):
  1791. """
  1792. Return a string which is the concatenation of the strings in the
  1793. sequence `seq`.
  1794. See also
  1795. --------
  1796. char.join
  1797. """
  1798. return join(self, seq)
  1799. def ljust(self, width, fillchar=' '):
  1800. """
  1801. Return an array with the elements of `self` left-justified in a
  1802. string of length `width`.
  1803. See also
  1804. --------
  1805. char.ljust
  1806. """
  1807. return asarray(ljust(self, width, fillchar))
  1808. def lower(self):
  1809. """
  1810. Return an array with the elements of `self` converted to
  1811. lowercase.
  1812. See also
  1813. --------
  1814. char.lower
  1815. """
  1816. return asarray(lower(self))
  1817. def lstrip(self, chars=None):
  1818. """
  1819. For each element in `self`, return a copy with the leading characters
  1820. removed.
  1821. See also
  1822. --------
  1823. char.lstrip
  1824. """
  1825. return asarray(lstrip(self, chars))
  1826. def partition(self, sep):
  1827. """
  1828. Partition each element in `self` around `sep`.
  1829. See also
  1830. --------
  1831. partition
  1832. """
  1833. return asarray(partition(self, sep))
  1834. def replace(self, old, new, count=None):
  1835. """
  1836. For each element in `self`, return a copy of the string with all
  1837. occurrences of substring `old` replaced by `new`.
  1838. See also
  1839. --------
  1840. char.replace
  1841. """
  1842. return asarray(replace(self, old, new, count))
  1843. def rfind(self, sub, start=0, end=None):
  1844. """
  1845. For each element in `self`, return the highest index in the string
  1846. where substring `sub` is found, such that `sub` is contained
  1847. within [`start`, `end`].
  1848. See also
  1849. --------
  1850. char.rfind
  1851. """
  1852. return rfind(self, sub, start, end)
  1853. def rindex(self, sub, start=0, end=None):
  1854. """
  1855. Like `rfind`, but raises `ValueError` when the substring `sub` is
  1856. not found.
  1857. See also
  1858. --------
  1859. char.rindex
  1860. """
  1861. return rindex(self, sub, start, end)
  1862. def rjust(self, width, fillchar=' '):
  1863. """
  1864. Return an array with the elements of `self`
  1865. right-justified in a string of length `width`.
  1866. See also
  1867. --------
  1868. char.rjust
  1869. """
  1870. return asarray(rjust(self, width, fillchar))
  1871. def rpartition(self, sep):
  1872. """
  1873. Partition each element in `self` around `sep`.
  1874. See also
  1875. --------
  1876. rpartition
  1877. """
  1878. return asarray(rpartition(self, sep))
  1879. def rsplit(self, sep=None, maxsplit=None):
  1880. """
  1881. For each element in `self`, return a list of the words in
  1882. the string, using `sep` as the delimiter string.
  1883. See also
  1884. --------
  1885. char.rsplit
  1886. """
  1887. return rsplit(self, sep, maxsplit)
  1888. def rstrip(self, chars=None):
  1889. """
  1890. For each element in `self`, return a copy with the trailing
  1891. characters removed.
  1892. See also
  1893. --------
  1894. char.rstrip
  1895. """
  1896. return asarray(rstrip(self, chars))
  1897. def split(self, sep=None, maxsplit=None):
  1898. """
  1899. For each element in `self`, return a list of the words in the
  1900. string, using `sep` as the delimiter string.
  1901. See also
  1902. --------
  1903. char.split
  1904. """
  1905. return split(self, sep, maxsplit)
  1906. def splitlines(self, keepends=None):
  1907. """
  1908. For each element in `self`, return a list of the lines in the
  1909. element, breaking at line boundaries.
  1910. See also
  1911. --------
  1912. char.splitlines
  1913. """
  1914. return splitlines(self, keepends)
  1915. def startswith(self, prefix, start=0, end=None):
  1916. """
  1917. Returns a boolean array which is `True` where the string element
  1918. in `self` starts with `prefix`, otherwise `False`.
  1919. See also
  1920. --------
  1921. char.startswith
  1922. """
  1923. return startswith(self, prefix, start, end)
  1924. def strip(self, chars=None):
  1925. """
  1926. For each element in `self`, return a copy with the leading and
  1927. trailing characters removed.
  1928. See also
  1929. --------
  1930. char.strip
  1931. """
  1932. return asarray(strip(self, chars))
  1933. def swapcase(self):
  1934. """
  1935. For each element in `self`, return a copy of the string with
  1936. uppercase characters converted to lowercase and vice versa.
  1937. See also
  1938. --------
  1939. char.swapcase
  1940. """
  1941. return asarray(swapcase(self))
  1942. def title(self):
  1943. """
  1944. For each element in `self`, return a titlecased version of the
  1945. string: words start with uppercase characters, all remaining cased
  1946. characters are lowercase.
  1947. See also
  1948. --------
  1949. char.title
  1950. """
  1951. return asarray(title(self))
  1952. def translate(self, table, deletechars=None):
  1953. """
  1954. For each element in `self`, return a copy of the string where
  1955. all characters occurring in the optional argument
  1956. `deletechars` are removed, and the remaining characters have
  1957. been mapped through the given translation table.
  1958. See also
  1959. --------
  1960. char.translate
  1961. """
  1962. return asarray(translate(self, table, deletechars))
  1963. def upper(self):
  1964. """
  1965. Return an array with the elements of `self` converted to
  1966. uppercase.
  1967. See also
  1968. --------
  1969. char.upper
  1970. """
  1971. return asarray(upper(self))
  1972. def zfill(self, width):
  1973. """
  1974. Return the numeric string left-filled with zeros in a string of
  1975. length `width`.
  1976. See also
  1977. --------
  1978. char.zfill
  1979. """
  1980. return asarray(zfill(self, width))
  1981. def isnumeric(self):
  1982. """
  1983. For each element in `self`, return True if there are only
  1984. numeric characters in the element.
  1985. See also
  1986. --------
  1987. char.isnumeric
  1988. """
  1989. return isnumeric(self)
  1990. def isdecimal(self):
  1991. """
  1992. For each element in `self`, return True if there are only
  1993. decimal characters in the element.
  1994. See also
  1995. --------
  1996. char.isdecimal
  1997. """
  1998. return isdecimal(self)
  1999. def array(obj, itemsize=None, copy=True, unicode=None, order=None):
  2000. """
  2001. Create a `chararray`.
  2002. .. note::
  2003. This class is provided for numarray backward-compatibility.
  2004. New code (not concerned with numarray compatibility) should use
  2005. arrays of type `string_` or `unicode_` and use the free functions
  2006. in :mod:`numpy.char <numpy.core.defchararray>` for fast
  2007. vectorized string operations instead.
  2008. Versus a regular NumPy array of type `str` or `unicode`, this
  2009. class adds the following functionality:
  2010. 1) values automatically have whitespace removed from the end
  2011. when indexed
  2012. 2) comparison operators automatically remove whitespace from the
  2013. end when comparing values
  2014. 3) vectorized string operations are provided as methods
  2015. (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
  2016. Parameters
  2017. ----------
  2018. obj : array of str or unicode-like
  2019. itemsize : int, optional
  2020. `itemsize` is the number of characters per scalar in the
  2021. resulting array. If `itemsize` is None, and `obj` is an
  2022. object array or a Python list, the `itemsize` will be
  2023. automatically determined. If `itemsize` is provided and `obj`
  2024. is of type str or unicode, then the `obj` string will be
  2025. chunked into `itemsize` pieces.
  2026. copy : bool, optional
  2027. If true (default), then the object is copied. Otherwise, a copy
  2028. will only be made if __array__ returns a copy, if obj is a
  2029. nested sequence, or if a copy is needed to satisfy any of the other
  2030. requirements (`itemsize`, unicode, `order`, etc.).
  2031. unicode : bool, optional
  2032. When true, the resulting `chararray` can contain Unicode
  2033. characters, when false only 8-bit characters. If unicode is
  2034. None and `obj` is one of the following:
  2035. - a `chararray`,
  2036. - an ndarray of type `str` or `unicode`
  2037. - a Python str or unicode object,
  2038. then the unicode setting of the output array will be
  2039. automatically determined.
  2040. order : {'C', 'F', 'A'}, optional
  2041. Specify the order of the array. If order is 'C' (default), then the
  2042. array will be in C-contiguous order (last-index varies the
  2043. fastest). If order is 'F', then the returned array
  2044. will be in Fortran-contiguous order (first-index varies the
  2045. fastest). If order is 'A', then the returned array may
  2046. be in any order (either C-, Fortran-contiguous, or even
  2047. discontiguous).
  2048. """
  2049. if isinstance(obj, (_bytes, _unicode)):
  2050. if unicode is None:
  2051. if isinstance(obj, _unicode):
  2052. unicode = True
  2053. else:
  2054. unicode = False
  2055. if itemsize is None:
  2056. itemsize = _len(obj)
  2057. shape = _len(obj) // itemsize
  2058. if unicode:
  2059. if sys.maxunicode == 0xffff:
  2060. # On a narrow Python build, the buffer for Unicode
  2061. # strings is UCS2, which doesn't match the buffer for
  2062. # NumPy Unicode types, which is ALWAYS UCS4.
  2063. # Therefore, we need to convert the buffer. On Python
  2064. # 2.6 and later, we can use the utf_32 codec. Earlier
  2065. # versions don't have that codec, so we convert to a
  2066. # numerical array that matches the input buffer, and
  2067. # then use NumPy to convert it to UCS4. All of this
  2068. # should happen in native endianness.
  2069. obj = obj.encode('utf_32')
  2070. else:
  2071. obj = _unicode(obj)
  2072. else:
  2073. # Let the default Unicode -> string encoding (if any) take
  2074. # precedence.
  2075. obj = _bytes(obj)
  2076. return chararray(shape, itemsize=itemsize, unicode=unicode,
  2077. buffer=obj, order=order)
  2078. if isinstance(obj, (list, tuple)):
  2079. obj = numpy.asarray(obj)
  2080. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
  2081. # If we just have a vanilla chararray, create a chararray
  2082. # view around it.
  2083. if not isinstance(obj, chararray):
  2084. obj = obj.view(chararray)
  2085. if itemsize is None:
  2086. itemsize = obj.itemsize
  2087. # itemsize is in 8-bit chars, so for Unicode, we need
  2088. # to divide by the size of a single Unicode character,
  2089. # which for NumPy is always 4
  2090. if issubclass(obj.dtype.type, unicode_):
  2091. itemsize //= 4
  2092. if unicode is None:
  2093. if issubclass(obj.dtype.type, unicode_):
  2094. unicode = True
  2095. else:
  2096. unicode = False
  2097. if unicode:
  2098. dtype = unicode_
  2099. else:
  2100. dtype = string_
  2101. if order is not None:
  2102. obj = numpy.asarray(obj, order=order)
  2103. if (copy or
  2104. (itemsize != obj.itemsize) or
  2105. (not unicode and isinstance(obj, unicode_)) or
  2106. (unicode and isinstance(obj, string_))):
  2107. obj = obj.astype((dtype, long(itemsize)))
  2108. return obj
  2109. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
  2110. if itemsize is None:
  2111. # Since no itemsize was specified, convert the input array to
  2112. # a list so the ndarray constructor will automatically
  2113. # determine the itemsize for us.
  2114. obj = obj.tolist()
  2115. # Fall through to the default case
  2116. if unicode:
  2117. dtype = unicode_
  2118. else:
  2119. dtype = string_
  2120. if itemsize is None:
  2121. val = narray(obj, dtype=dtype, order=order, subok=True)
  2122. else:
  2123. val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
  2124. return val.view(chararray)
  2125. def asarray(obj, itemsize=None, unicode=None, order=None):
  2126. """
  2127. Convert the input to a `chararray`, copying the data only if
  2128. necessary.
  2129. Versus a regular NumPy array of type `str` or `unicode`, this
  2130. class adds the following functionality:
  2131. 1) values automatically have whitespace removed from the end
  2132. when indexed
  2133. 2) comparison operators automatically remove whitespace from the
  2134. end when comparing values
  2135. 3) vectorized string operations are provided as methods
  2136. (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
  2137. Parameters
  2138. ----------
  2139. obj : array of str or unicode-like
  2140. itemsize : int, optional
  2141. `itemsize` is the number of characters per scalar in the
  2142. resulting array. If `itemsize` is None, and `obj` is an
  2143. object array or a Python list, the `itemsize` will be
  2144. automatically determined. If `itemsize` is provided and `obj`
  2145. is of type str or unicode, then the `obj` string will be
  2146. chunked into `itemsize` pieces.
  2147. unicode : bool, optional
  2148. When true, the resulting `chararray` can contain Unicode
  2149. characters, when false only 8-bit characters. If unicode is
  2150. None and `obj` is one of the following:
  2151. - a `chararray`,
  2152. - an ndarray of type `str` or 'unicode`
  2153. - a Python str or unicode object,
  2154. then the unicode setting of the output array will be
  2155. automatically determined.
  2156. order : {'C', 'F'}, optional
  2157. Specify the order of the array. If order is 'C' (default), then the
  2158. array will be in C-contiguous order (last-index varies the
  2159. fastest). If order is 'F', then the returned array
  2160. will be in Fortran-contiguous order (first-index varies the
  2161. fastest).
  2162. """
  2163. return array(obj, itemsize, copy=False,
  2164. unicode=unicode, order=order)