holiday.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. from datetime import datetime, timedelta
  2. from typing import List
  3. import warnings
  4. from dateutil.relativedelta import FR, MO, SA, SU, TH, TU, WE # noqa
  5. import numpy as np
  6. from pandas.errors import PerformanceWarning
  7. from pandas import DateOffset, Series, Timestamp, date_range
  8. from pandas.tseries.offsets import Day, Easter
  9. def next_monday(dt):
  10. """
  11. If holiday falls on Saturday, use following Monday instead;
  12. if holiday falls on Sunday, use Monday instead
  13. """
  14. if dt.weekday() == 5:
  15. return dt + timedelta(2)
  16. elif dt.weekday() == 6:
  17. return dt + timedelta(1)
  18. return dt
  19. def next_monday_or_tuesday(dt):
  20. """
  21. For second holiday of two adjacent ones!
  22. If holiday falls on Saturday, use following Monday instead;
  23. if holiday falls on Sunday or Monday, use following Tuesday instead
  24. (because Monday is already taken by adjacent holiday on the day before)
  25. """
  26. dow = dt.weekday()
  27. if dow == 5 or dow == 6:
  28. return dt + timedelta(2)
  29. elif dow == 0:
  30. return dt + timedelta(1)
  31. return dt
  32. def previous_friday(dt):
  33. """
  34. If holiday falls on Saturday or Sunday, use previous Friday instead.
  35. """
  36. if dt.weekday() == 5:
  37. return dt - timedelta(1)
  38. elif dt.weekday() == 6:
  39. return dt - timedelta(2)
  40. return dt
  41. def sunday_to_monday(dt):
  42. """
  43. If holiday falls on Sunday, use day thereafter (Monday) instead.
  44. """
  45. if dt.weekday() == 6:
  46. return dt + timedelta(1)
  47. return dt
  48. def weekend_to_monday(dt):
  49. """
  50. If holiday falls on Sunday or Saturday,
  51. use day thereafter (Monday) instead.
  52. Needed for holidays such as Christmas observation in Europe
  53. """
  54. if dt.weekday() == 6:
  55. return dt + timedelta(1)
  56. elif dt.weekday() == 5:
  57. return dt + timedelta(2)
  58. return dt
  59. def nearest_workday(dt):
  60. """
  61. If holiday falls on Saturday, use day before (Friday) instead;
  62. if holiday falls on Sunday, use day thereafter (Monday) instead.
  63. """
  64. if dt.weekday() == 5:
  65. return dt - timedelta(1)
  66. elif dt.weekday() == 6:
  67. return dt + timedelta(1)
  68. return dt
  69. def next_workday(dt):
  70. """
  71. returns next weekday used for observances
  72. """
  73. dt += timedelta(days=1)
  74. while dt.weekday() > 4:
  75. # Mon-Fri are 0-4
  76. dt += timedelta(days=1)
  77. return dt
  78. def previous_workday(dt):
  79. """
  80. returns previous weekday used for observances
  81. """
  82. dt -= timedelta(days=1)
  83. while dt.weekday() > 4:
  84. # Mon-Fri are 0-4
  85. dt -= timedelta(days=1)
  86. return dt
  87. def before_nearest_workday(dt):
  88. """
  89. returns previous workday after nearest workday
  90. """
  91. return previous_workday(nearest_workday(dt))
  92. def after_nearest_workday(dt):
  93. """
  94. returns next workday after nearest workday
  95. needed for Boxing day or multiple holidays in a series
  96. """
  97. return next_workday(nearest_workday(dt))
  98. class Holiday:
  99. """
  100. Class that defines a holiday with start/end dates and rules
  101. for observance.
  102. """
  103. def __init__(
  104. self,
  105. name,
  106. year=None,
  107. month=None,
  108. day=None,
  109. offset=None,
  110. observance=None,
  111. start_date=None,
  112. end_date=None,
  113. days_of_week=None,
  114. ):
  115. """
  116. Parameters
  117. ----------
  118. name : str
  119. Name of the holiday , defaults to class name
  120. offset : array of pandas.tseries.offsets or
  121. class from pandas.tseries.offsets
  122. computes offset from date
  123. observance: function
  124. computes when holiday is given a pandas Timestamp
  125. days_of_week:
  126. provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday
  127. Monday=0,..,Sunday=6
  128. Examples
  129. --------
  130. >>> from pandas.tseries.holiday import Holiday, nearest_workday
  131. >>> from dateutil.relativedelta import MO
  132. >>> USMemorialDay = Holiday('Memorial Day', month=5, day=31,
  133. offset=pd.DateOffset(weekday=MO(-1)))
  134. >>> USLaborDay = Holiday('Labor Day', month=9, day=1,
  135. offset=pd.DateOffset(weekday=MO(1)))
  136. >>> July3rd = Holiday('July 3rd', month=7, day=3,)
  137. >>> NewYears = Holiday('New Years Day', month=1, day=1,
  138. observance=nearest_workday),
  139. >>> July3rd = Holiday('July 3rd', month=7, day=3,
  140. days_of_week=(0, 1, 2, 3))
  141. """
  142. if offset is not None and observance is not None:
  143. raise NotImplementedError("Cannot use both offset and observance.")
  144. self.name = name
  145. self.year = year
  146. self.month = month
  147. self.day = day
  148. self.offset = offset
  149. self.start_date = (
  150. Timestamp(start_date) if start_date is not None else start_date
  151. )
  152. self.end_date = Timestamp(end_date) if end_date is not None else end_date
  153. self.observance = observance
  154. assert days_of_week is None or type(days_of_week) == tuple
  155. self.days_of_week = days_of_week
  156. def __repr__(self) -> str:
  157. info = ""
  158. if self.year is not None:
  159. info += f"year={self.year}, "
  160. info += f"month={self.month}, day={self.day}, "
  161. if self.offset is not None:
  162. info += f"offset={self.offset}"
  163. if self.observance is not None:
  164. info += f"observance={self.observance}"
  165. repr = f"Holiday: {self.name} ({info})"
  166. return repr
  167. def dates(self, start_date, end_date, return_name=False):
  168. """
  169. Calculate holidays observed between start date and end date
  170. Parameters
  171. ----------
  172. start_date : starting date, datetime-like, optional
  173. end_date : ending date, datetime-like, optional
  174. return_name : bool, optional, default=False
  175. If True, return a series that has dates and holiday names.
  176. False will only return dates.
  177. """
  178. start_date = Timestamp(start_date)
  179. end_date = Timestamp(end_date)
  180. filter_start_date = start_date
  181. filter_end_date = end_date
  182. if self.year is not None:
  183. dt = Timestamp(datetime(self.year, self.month, self.day))
  184. if return_name:
  185. return Series(self.name, index=[dt])
  186. else:
  187. return [dt]
  188. dates = self._reference_dates(start_date, end_date)
  189. holiday_dates = self._apply_rule(dates)
  190. if self.days_of_week is not None:
  191. holiday_dates = holiday_dates[
  192. np.in1d(holiday_dates.dayofweek, self.days_of_week)
  193. ]
  194. if self.start_date is not None:
  195. filter_start_date = max(
  196. self.start_date.tz_localize(filter_start_date.tz), filter_start_date
  197. )
  198. if self.end_date is not None:
  199. filter_end_date = min(
  200. self.end_date.tz_localize(filter_end_date.tz), filter_end_date
  201. )
  202. holiday_dates = holiday_dates[
  203. (holiday_dates >= filter_start_date) & (holiday_dates <= filter_end_date)
  204. ]
  205. if return_name:
  206. return Series(self.name, index=holiday_dates)
  207. return holiday_dates
  208. def _reference_dates(self, start_date, end_date):
  209. """
  210. Get reference dates for the holiday.
  211. Return reference dates for the holiday also returning the year
  212. prior to the start_date and year following the end_date. This ensures
  213. that any offsets to be applied will yield the holidays within
  214. the passed in dates.
  215. """
  216. if self.start_date is not None:
  217. start_date = self.start_date.tz_localize(start_date.tz)
  218. if self.end_date is not None:
  219. end_date = self.end_date.tz_localize(start_date.tz)
  220. year_offset = DateOffset(years=1)
  221. reference_start_date = Timestamp(
  222. datetime(start_date.year - 1, self.month, self.day)
  223. )
  224. reference_end_date = Timestamp(
  225. datetime(end_date.year + 1, self.month, self.day)
  226. )
  227. # Don't process unnecessary holidays
  228. dates = date_range(
  229. start=reference_start_date,
  230. end=reference_end_date,
  231. freq=year_offset,
  232. tz=start_date.tz,
  233. )
  234. return dates
  235. def _apply_rule(self, dates):
  236. """
  237. Apply the given offset/observance to a DatetimeIndex of dates.
  238. Parameters
  239. ----------
  240. dates : DatetimeIndex
  241. Dates to apply the given offset/observance rule
  242. Returns
  243. -------
  244. Dates with rules applied
  245. """
  246. if self.observance is not None:
  247. return dates.map(lambda d: self.observance(d))
  248. if self.offset is not None:
  249. if not isinstance(self.offset, list):
  250. offsets = [self.offset]
  251. else:
  252. offsets = self.offset
  253. for offset in offsets:
  254. # if we are adding a non-vectorized value
  255. # ignore the PerformanceWarnings:
  256. with warnings.catch_warnings():
  257. warnings.simplefilter("ignore", PerformanceWarning)
  258. dates += offset
  259. return dates
  260. holiday_calendars = {}
  261. def register(cls):
  262. try:
  263. name = cls.name
  264. except AttributeError:
  265. name = cls.__name__
  266. holiday_calendars[name] = cls
  267. def get_calendar(name):
  268. """
  269. Return an instance of a calendar based on its name.
  270. Parameters
  271. ----------
  272. name : str
  273. Calendar name to return an instance of
  274. """
  275. return holiday_calendars[name]()
  276. class HolidayCalendarMetaClass(type):
  277. def __new__(cls, clsname, bases, attrs):
  278. calendar_class = super().__new__(cls, clsname, bases, attrs)
  279. register(calendar_class)
  280. return calendar_class
  281. class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass):
  282. """
  283. Abstract interface to create holidays following certain rules.
  284. """
  285. rules: List[Holiday] = []
  286. start_date = Timestamp(datetime(1970, 1, 1))
  287. end_date = Timestamp(datetime(2200, 12, 31))
  288. _cache = None
  289. def __init__(self, name=None, rules=None):
  290. """
  291. Initializes holiday object with a given set a rules. Normally
  292. classes just have the rules defined within them.
  293. Parameters
  294. ----------
  295. name : str
  296. Name of the holiday calendar, defaults to class name
  297. rules : array of Holiday objects
  298. A set of rules used to create the holidays.
  299. """
  300. super().__init__()
  301. if name is None:
  302. name = type(self).__name__
  303. self.name = name
  304. if rules is not None:
  305. self.rules = rules
  306. def rule_from_name(self, name):
  307. for rule in self.rules:
  308. if rule.name == name:
  309. return rule
  310. return None
  311. def holidays(self, start=None, end=None, return_name=False):
  312. """
  313. Returns a curve with holidays between start_date and end_date
  314. Parameters
  315. ----------
  316. start : starting date, datetime-like, optional
  317. end : ending date, datetime-like, optional
  318. return_name : bool, optional
  319. If True, return a series that has dates and holiday names.
  320. False will only return a DatetimeIndex of dates.
  321. Returns
  322. -------
  323. DatetimeIndex of holidays
  324. """
  325. if self.rules is None:
  326. raise Exception(
  327. f"Holiday Calendar {self.name} does not have any rules specified"
  328. )
  329. if start is None:
  330. start = AbstractHolidayCalendar.start_date
  331. if end is None:
  332. end = AbstractHolidayCalendar.end_date
  333. start = Timestamp(start)
  334. end = Timestamp(end)
  335. holidays = None
  336. # If we don't have a cache or the dates are outside the prior cache, we
  337. # get them again
  338. if self._cache is None or start < self._cache[0] or end > self._cache[1]:
  339. for rule in self.rules:
  340. rule_holidays = rule.dates(start, end, return_name=True)
  341. if holidays is None:
  342. holidays = rule_holidays
  343. else:
  344. holidays = holidays.append(rule_holidays)
  345. self._cache = (start, end, holidays.sort_index())
  346. holidays = self._cache[2]
  347. holidays = holidays[start:end]
  348. if return_name:
  349. return holidays
  350. else:
  351. return holidays.index
  352. @staticmethod
  353. def merge_class(base, other):
  354. """
  355. Merge holiday calendars together. The base calendar
  356. will take precedence to other. The merge will be done
  357. based on each holiday's name.
  358. Parameters
  359. ----------
  360. base : AbstractHolidayCalendar
  361. instance/subclass or array of Holiday objects
  362. other : AbstractHolidayCalendar
  363. instance/subclass or array of Holiday objects
  364. """
  365. try:
  366. other = other.rules
  367. except AttributeError:
  368. pass
  369. if not isinstance(other, list):
  370. other = [other]
  371. other_holidays = {holiday.name: holiday for holiday in other}
  372. try:
  373. base = base.rules
  374. except AttributeError:
  375. pass
  376. if not isinstance(base, list):
  377. base = [base]
  378. base_holidays = {holiday.name: holiday for holiday in base}
  379. other_holidays.update(base_holidays)
  380. return list(other_holidays.values())
  381. def merge(self, other, inplace=False):
  382. """
  383. Merge holiday calendars together. The caller's class
  384. rules take precedence. The merge will be done
  385. based on each holiday's name.
  386. Parameters
  387. ----------
  388. other : holiday calendar
  389. inplace : bool (default=False)
  390. If True set rule_table to holidays, else return array of Holidays
  391. """
  392. holidays = self.merge_class(self, other)
  393. if inplace:
  394. self.rules = holidays
  395. else:
  396. return holidays
  397. USMemorialDay = Holiday(
  398. "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1))
  399. )
  400. USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1)))
  401. USColumbusDay = Holiday(
  402. "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2))
  403. )
  404. USThanksgivingDay = Holiday(
  405. "Thanksgiving", month=11, day=1, offset=DateOffset(weekday=TH(4))
  406. )
  407. USMartinLutherKingJr = Holiday(
  408. "Martin Luther King Jr. Day",
  409. start_date=datetime(1986, 1, 1),
  410. month=1,
  411. day=1,
  412. offset=DateOffset(weekday=MO(3)),
  413. )
  414. USPresidentsDay = Holiday(
  415. "Presidents Day", month=2, day=1, offset=DateOffset(weekday=MO(3))
  416. )
  417. GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])
  418. EasterMonday = Holiday("Easter Monday", month=1, day=1, offset=[Easter(), Day(1)])
  419. class USFederalHolidayCalendar(AbstractHolidayCalendar):
  420. """
  421. US Federal Government Holiday Calendar based on rules specified by:
  422. https://www.opm.gov/policy-data-oversight/
  423. snow-dismissal-procedures/federal-holidays/
  424. """
  425. rules = [
  426. Holiday("New Years Day", month=1, day=1, observance=nearest_workday),
  427. USMartinLutherKingJr,
  428. USPresidentsDay,
  429. USMemorialDay,
  430. Holiday("July 4th", month=7, day=4, observance=nearest_workday),
  431. USLaborDay,
  432. USColumbusDay,
  433. Holiday("Veterans Day", month=11, day=11, observance=nearest_workday),
  434. USThanksgivingDay,
  435. Holiday("Christmas", month=12, day=25, observance=nearest_workday),
  436. ]
  437. def HolidayCalendarFactory(name, base, other, base_class=AbstractHolidayCalendar):
  438. rules = AbstractHolidayCalendar.merge_class(base, other)
  439. calendar_class = type(name, (base_class,), {"rules": rules, "name": name})
  440. return calendar_class