template.py 83 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201
  1. from abc import ABCMeta, abstractmethod
  2. from random import randint
  3. import re
  4. from os import getcwd
  5. import os
  6. import logging
  7. import numpy as np
  8. from sklearn.feature_extraction import DictVectorizer
  9. from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
  10. from sklearn.linear_model import *
  11. from sklearn.model_selection import train_test_split
  12. from pyecharts.components import Table
  13. from pyecharts.globals import SymbolType
  14. from pyecharts.charts import *
  15. from pyecharts import options as opts
  16. import pandas as pd
  17. import pandas_profiling as pp
  18. from pyecharts.globals import CurrentConfig
  19. from pyecharts.globals import GeoType # 地图推荐使用GeoType而不是str
  20. from system import plugin_class_loading, get_path, basicConfig
  21. logging.basicConfig(**basicConfig)
  22. CurrentConfig.ONLINE_HOST = f"{getcwd()}{os.sep}assets{os.sep}"
  23. class FormBase(metaclass=ABCMeta):
  24. def __init__(self, *args, **kwargs):
  25. class Del:
  26. pass
  27. self.sheet_dict = {}
  28. self.clean_func = {}
  29. self.clean_func_code = {}
  30. self.DEL = Del()
  31. self.named_domain = {
  32. "pd": pd,
  33. "DEL": self.DEL,
  34. "re": re,
  35. "Sheet": self.sheet_dict,
  36. }
  37. self.all_render = {} # 存放所有的图
  38. @abstractmethod
  39. def add_sheet(self, data, name):
  40. pass
  41. @abstractmethod
  42. def get_column(self, name, only):
  43. pass
  44. @abstractmethod
  45. def get_index(self, name, only):
  46. pass
  47. @abstractmethod
  48. def get_sheet(self, name, all_row=None, all_colunm=None) -> pd.DataFrame:
  49. pass
  50. @plugin_class_loading(get_path(r"template/datascience"))
  51. class SheetIO(FormBase, metaclass=ABCMeta):
  52. def add_sheet(self, data, name=""):
  53. if name == "":
  54. name = f"Sheet[{len(self.sheet_dict)}]"
  55. else:
  56. name += f"_[{len(self.sheet_dict)}]"
  57. self.sheet_dict[name] = data
  58. return data
  59. def __add_sheet(self, data_dir, func, name="", index=True, **kwargs): # 新增表格的核心方式
  60. try:
  61. data = func(data_dir, **kwargs)
  62. except UnicodeDecodeError: # 找不到编码方式
  63. return False
  64. if not index:
  65. data.index = data.iloc[:, 0].tolist()
  66. data.drop(data.columns.values.tolist()[0], inplace=True, axis=1)
  67. return self.add_sheet(data, name)
  68. def add_csv(
  69. self, data_dir, name="", sep=",", encodeding="utf-8", str_=True, index=True
  70. ):
  71. if str_:
  72. k = {"dtype": "object"}
  73. else:
  74. k = {}
  75. return self.__add_sheet(
  76. data_dir, pd.read_csv, name, index, sep=sep, encoding=encodeding, **k
  77. )
  78. def add_python(self, python_file, sheet_name="") -> pd.DataFrame:
  79. name = {"Sheet": self.get_sheet}
  80. name.update(globals().copy())
  81. name.update(locals().copy())
  82. exec(python_file, name)
  83. exec("get = Creat()", name)
  84. if isinstance(name["get"], pd.DataFrame): # 已经是DataFram
  85. get = name["get"]
  86. elif isinstance(name["get"], np.array):
  87. if bool(name.get("downNdim", False)): # 执行降或升维操作
  88. a = name["get"]
  89. array = []
  90. for i in a:
  91. c = np.ravel(i, "C")
  92. array.append(c)
  93. get = pd.DataFrame(array)
  94. else:
  95. array = name["get"].tolist()
  96. get = pd.DataFrame(array)
  97. else:
  98. get = pd.DataFrame(name["get"])
  99. self.add_sheet(get, sheet_name)
  100. return get
  101. def add_html(self, data_dir, name="", encoding="utf-8", str_=True, index=True):
  102. if str_:
  103. k = {"dtype": "object"}
  104. else:
  105. k = {}
  106. return self.__add_sheet(
  107. data_dir, pd.read_html, name, index, encoding=encoding, **k
  108. )
  109. def get_sheet_list(self):
  110. return list(self.sheet_dict.keys()) # 返回列表
  111. def to_csv(self, name, save_dir, sep=","):
  112. if sep == "":
  113. sep = ","
  114. get = self.get_sheet(name)
  115. get.to_csv(save_dir, sep=sep, na_rep="")
  116. def get_sheet(self, name, all_row=None, all_colunm=None) -> pd.DataFrame:
  117. try:
  118. pd.set_option("display.max_rows", all_row)
  119. pd.set_option("display.max_columns", all_colunm)
  120. finally:
  121. return self.sheet_dict[name]
  122. def del_sheet(self, name):
  123. del self.sheet_dict[name]
  124. @plugin_class_loading(get_path(r"template/datascience"))
  125. class SheetRender(FormBase, metaclass=ABCMeta):
  126. def render_html_one(self, name, render_dir=""):
  127. if render_dir == "":
  128. render_dir = f"{name}.html"
  129. get = self.get_sheet(name)
  130. headers = [f"{name}"] + self.get_column(name, True).tolist()
  131. rows = []
  132. table = Table()
  133. for i in get.iterrows(): # 按行迭代
  134. q = i[1].tolist()
  135. rows.append([f"{i[0]}"] + q)
  136. table.add(headers, rows).set_global_opts(
  137. title_opts=opts.ComponentTitleOpts(
  138. title=f"表格:{name}", subtitle="CoTan~数据处理:查看表格"
  139. )
  140. )
  141. table.render(render_dir)
  142. return render_dir
  143. def render_html_all(self, name, tab_render_dir="", render_type=0):
  144. if tab_render_dir == "":
  145. tab_render_dir = f"{name}.html"
  146. # 把要画的sheet放到第一个
  147. sheet_dict = self.sheet_dict.copy()
  148. del sheet_dict[name]
  149. sheet_list = [name] + list(sheet_dict.keys())
  150. class TabNew:
  151. def __init__(self, original_tab):
  152. self.original_tab = original_tab # 一个Tab
  153. def render(self, render_dir):
  154. return self.original_tab.render(render_dir)
  155. # 生成一个显示页面
  156. if render_type == 0:
  157. class TabZero(TabNew):
  158. def add(self, render, k, *more):
  159. self.original_tab.add(render, k)
  160. tab = TabZero(Tab(page_title="CoTan:查看表格")) # 一个Tab
  161. elif render_type == 1:
  162. class TabOne(TabNew):
  163. def add(self, render, *more):
  164. self.original_tab.add(render)
  165. tab = TabOne(Page(page_title="CoTan:查看表格", layout=Page.DraggablePageLayout))
  166. else:
  167. class TabTwo(TabNew):
  168. def add(self, render, *more):
  169. self.original_tab.add(render)
  170. tab = TabTwo(Page(page_title="CoTan:查看表格", layout=Page.SimplePageLayout))
  171. # 迭代添加内容
  172. for name in sheet_list:
  173. try:
  174. get = self.get_sheet(name)
  175. headers = [f"{name}"] + self.get_column(name, True).tolist()
  176. rows = []
  177. table = Table()
  178. for i in get.iterrows(): # 按行迭代
  179. q = i[1].tolist()
  180. rows.append([f"{i[0]}"] + q)
  181. table.add(headers, rows).set_global_opts(
  182. title_opts=opts.ComponentTitleOpts(
  183. title=f"表格:{name}", subtitle="CoTan~数据处理:查看表格"
  184. )
  185. )
  186. tab.add(table, f"表格:{name}")
  187. finally:
  188. tab.render(tab_render_dir)
  189. return tab_render_dir
  190. @plugin_class_loading(get_path(r"template/datascience"))
  191. class SheetReport(FormBase, metaclass=ABCMeta):
  192. def describe(self, name, new=False): # 生成描述
  193. get = self.get_sheet(name)
  194. des = get.describe()
  195. if new:
  196. self.add_sheet(des, f"{name}_describe[{len(self.sheet_dict)}]")
  197. shape = get.shape
  198. dtype = get.dtypes
  199. n = get.ndim
  200. head = get.head()
  201. tail = get.tail(3)
  202. return (
  203. f"1)基本\n{des}\n\n2)形状:{shape}\n\n3)数据类型\n{dtype}\n\n4)数据维度:{n}\n\n5)头部数据\n{head}"
  204. f"\n\n6)尾部数据\n{tail}\n\n7)行名\n{get.index}\n\n8)列名\n{get.columns}"
  205. )
  206. @staticmethod
  207. def sheet_profile_report_core(sheet, save_dir):
  208. report = pp.ProfileReport(sheet)
  209. report.to_file(save_dir)
  210. def to_report(self, name, save_dir=""):
  211. if save_dir == "":
  212. save_dir = f"{name}.html"
  213. sheet = self.get_sheet(name)
  214. self.sheet_profile_report_core(sheet, save_dir)
  215. return save_dir
  216. @plugin_class_loading(get_path(r"template/datascience"))
  217. class Rename(FormBase, metaclass=ABCMeta):
  218. def number_naming(self, name, is_column, save):
  219. get = self.get_sheet(name).copy()
  220. if is_column: # 处理列名
  221. column = self.get_column(name, True)
  222. if save: # 保存原数据
  223. get.loc["column"] = column
  224. get.columns = [i for i in range(len(column))]
  225. else:
  226. row = self.get_index(name, True)
  227. if save:
  228. get.loc[:, "row"] = row
  229. get.index = [i for i in range(len(row))]
  230. self.add_sheet(get, f"{name}")
  231. return get
  232. def name_with_number(self, name, is_column, save):
  233. get = self.get_sheet(name).copy()
  234. if is_column: # 处理列名
  235. column = self.get_column(name, True)
  236. if save: # 保存原数据
  237. get.loc["column"] = column
  238. get.columns = [f"[{i}]{column[i]}" for i in range(len(column))]
  239. else:
  240. row = self.get_index(name, True)
  241. if save:
  242. get.loc[:, "row"] = row
  243. get.index = [f"[{i}]{row[i]}" for i in range(len(row))]
  244. self.add_sheet(get, f"{name}")
  245. return get
  246. def data_naming(self, name, is_column, save, **data_init):
  247. # Date_Init:start,end,freq 任意两样
  248. get = self.get_sheet(name)
  249. if is_column: # 处理列名
  250. column = self.get_column(name, True)
  251. if save: # 保存原数据
  252. get.loc["column"] = column
  253. data_init["periods"] = len(column)
  254. get.columns = pd.date_range(**data_init)
  255. else:
  256. row = self.get_index(name, True)
  257. if save:
  258. get.loc[:, "row"] = row
  259. data_init["periods"] = len(row)
  260. get.index = pd.date_range(**data_init)
  261. self.add_sheet(get, f"{name}")
  262. return get
  263. def time_naming(self, name, is_column, save, **time_init):
  264. # Date_Init:start,end,freq 任意两样
  265. get = self.get_sheet(name)
  266. if is_column: # 处理列名
  267. column = self.get_column(name, True)
  268. if save: # 保存原数据
  269. get.loc["column"] = column
  270. time_init["periods"] = len(column)
  271. get.columns = pd.timedelta_range(**time_init)
  272. else:
  273. row = self.get_index(name, True)
  274. if save:
  275. get.loc[:, "row"] = row
  276. time_init["periods"] = len(row)
  277. get.index = pd.timedelta_range(**time_init)
  278. self.add_sheet(get, f"{name}")
  279. return get
  280. @plugin_class_loading(get_path(r"template/datascience"))
  281. class Sorted(FormBase, metaclass=ABCMeta):
  282. def sorted_index(self, name, row: bool, new=False, a=True):
  283. get = self.get_sheet(name)
  284. if row: # row-行名排序
  285. sorted_sheet = get.sort_index(axis=0, ascending=a)
  286. else:
  287. sorted_sheet = get.sort_index(axis=1, ascending=a)
  288. if new:
  289. self.add_sheet(sorted_sheet, f"{name}:排序")
  290. return sorted_sheet
  291. def stored_value(self, name, collation, new=False):
  292. get = self.get_sheet(name)
  293. row = get.columns.values
  294. by = []
  295. ascending = []
  296. for i in collation:
  297. by.append(row[i[0]])
  298. ascending.append(i[1])
  299. if len(by) == 1:
  300. by = by[0]
  301. ascending = ascending[0]
  302. sorted_sheet = get.sort_values(by=by, ascending=ascending)
  303. if new:
  304. self.add_sheet(sorted_sheet, f"{name}:排序")
  305. return sorted_sheet
  306. @plugin_class_loading(get_path(r"template/datascience"))
  307. class RowColumn(Rename, Sorted, metaclass=ABCMeta):
  308. def get_column(self, name, only=False): # 列名
  309. get = self.get_sheet(name)
  310. if only:
  311. return_ = get.columns.values
  312. else:
  313. return_ = []
  314. loc_list = get.columns.values
  315. a = 0
  316. for i in loc_list:
  317. data = get[i].to_list()
  318. return_.append(f"[列号:{a}]{i} -> {data}")
  319. a += 1
  320. return return_
  321. def get_index(self, name, only=False):
  322. get = self.get_sheet(name)
  323. if only:
  324. values = get.index.values
  325. else:
  326. values = []
  327. loc_list = get.index.values
  328. a = 0
  329. for i in range(len(loc_list)):
  330. index_num = loc_list[i]
  331. data = get.iloc[i].to_list()
  332. values.append(f"[行号:{a}]{index_num} -> {data}")
  333. a += 1
  334. return values
  335. def replace_index(self, name, is_column, rename, save):
  336. get = self.get_sheet(name)
  337. if is_column:
  338. if save: # 保存原数据
  339. get.loc["column"] = self.get_column(name, True)
  340. new = get.rename(columns=rename)
  341. else:
  342. if save:
  343. get.loc[:, "row"] = self.get_index(name, True)
  344. new = get.rename(index=rename)
  345. self.add_sheet(new, f"{name}")
  346. return new
  347. def change_index(
  348. self,
  349. name: str,
  350. is_column: bool,
  351. iloc: int,
  352. save: bool = True,
  353. drop: bool = False,
  354. ):
  355. get = self.get_sheet(name).copy()
  356. if is_column: # 列名
  357. row = self.get_index(name, True) # 行数据
  358. t = row.tolist()[iloc]
  359. if save: # 保存原数据
  360. get.loc["column"] = self.get_column(name, True)
  361. # new_colums = get.loc[t].values
  362. get.columns = get.loc[t].values
  363. if drop:
  364. get.drop(t, axis=0, inplace=True) # 删除行
  365. else:
  366. column = self.get_column(name, True)
  367. t = column.tolist()[iloc]
  368. if save:
  369. get.loc[:, "row"] = self.get_index(name, True)
  370. get.index = get.loc[:, t].values # 调整
  371. if drop:
  372. get.drop(t, axis=1, inplace=True) # 删除行
  373. self.add_sheet(get, f"{name}")
  374. return get
  375. @plugin_class_loading(get_path(r"template/datascience"))
  376. class SheetSlice(FormBase, metaclass=ABCMeta):
  377. def get_slice(
  378. self, name, column, row, is_iloc=True, new=False
  379. ): # iloc(Row,Column) or loc
  380. get = self.get_sheet(name)
  381. if is_iloc:
  382. new_sheet = get.iloc[row, column]
  383. else:
  384. new_sheet = get.loc[row, column]
  385. if new:
  386. self.add_sheet(new_sheet, f"{name}:切片")
  387. return new_sheet
  388. def del_slice(self, name, column, row, new):
  389. new_sheet = self.get_sheet(name)
  390. column_list = new_sheet.columns.values
  391. for i in column:
  392. try:
  393. new_sheet = new_sheet.drop(column_list[int(i)], axis=1)
  394. except BaseException as e:
  395. logging.warning(str(e))
  396. row_list = new_sheet.index.values
  397. for i in row:
  398. try:
  399. new_sheet = new_sheet.drop(row_list[int(i)])
  400. except BaseException as e:
  401. logging.warning(str(e))
  402. if new:
  403. self.add_sheet(new_sheet, f"{name}:删减")
  404. return new_sheet
  405. @plugin_class_loading(get_path(r"template/datascience"))
  406. class DatacleaningFunc(FormBase, metaclass=ABCMeta):
  407. def add_clean_func(self, code):
  408. name = self.named_domain.copy()
  409. exec(code, name)
  410. func_dict = {
  411. "Done_Row": name.get("Done_Row", []),
  412. "Done_Column": name.get("Done_Column", []),
  413. "axis": name.get("axis", True),
  414. "check": name.get("check", lambda data, x, b, c, d, e: True),
  415. "done": name.get("done", lambda data, x, b, c, d, e: data),
  416. }
  417. title = (
  418. f"[{name.get('name', f'[{len(self.clean_func)}')}] Done_Row={func_dict['Done_Row']}_Done_Column="
  419. f"{func_dict['Done_Column']}_axis={func_dict['axis']}"
  420. )
  421. self.clean_func[title] = func_dict
  422. self.clean_func_code[title] = code
  423. def get_clean_func(self):
  424. return list(self.clean_func.keys())
  425. def del_clean_func(self, key):
  426. del self.clean_func[key]
  427. del self.clean_func_code[key]
  428. def del_all_clean_func(self):
  429. self.clean_func = {}
  430. self.clean_func_code = {}
  431. def get_clean_code(self, key):
  432. return self.clean_func_code[key]
  433. def data_clean(self, name):
  434. get = self.get_sheet(name).copy()
  435. for i in list(self.clean_func.values()):
  436. done_row = i["Done_Row"]
  437. done_column = i["Done_Column"]
  438. if not done_row:
  439. done_row = range(get.shape[0]) # shape=[行,列]#不需要回调
  440. if not done_column:
  441. done_column = range(get.shape[1]) # shape=[行,列]#不需要回调
  442. if i["axis"]:
  443. axis = 0
  444. else:
  445. axis = 1
  446. check = i["check"]
  447. done = i["done"]
  448. for row in done_row:
  449. for column in done_column:
  450. try:
  451. data = eval(
  452. f"get.iloc[{row},{column}]", {"get": get}
  453. ) # 第一个是行号,然后是列号
  454. column_data = eval(f"get.iloc[{row}]", {"get": get})
  455. row_data = eval(f"get.iloc[:,{column}]", {"get": get})
  456. if not check(
  457. data,
  458. row,
  459. column,
  460. get.copy(),
  461. column_data.copy(),
  462. row_data.copy(),
  463. ):
  464. d = done(
  465. data,
  466. row,
  467. column,
  468. get.copy(),
  469. column_data.copy(),
  470. row_data.copy(),
  471. )
  472. if d == self.DEL:
  473. if axis == 0: # 常规删除
  474. row_list = get.index.values
  475. get = get.drop(row_list[int(row)])
  476. else: # 常规删除
  477. columns_list = get.columns.values
  478. get = get.drop(columns_list[int(row)], axis=1)
  479. else:
  480. # 第一个是行名,然后是列名
  481. exec(f"get.iloc[{row},{column}] = {d}", {"get": get})
  482. except BaseException as e:
  483. logging.warning(str(e))
  484. self.add_sheet(get, f"{name}:清洗")
  485. return get
  486. @plugin_class_loading(get_path(r"template/datascience"))
  487. class SheetDtype(FormBase, metaclass=ABCMeta):
  488. def set_dtype(self, name, column, dtype, wrong):
  489. get = self.get_sheet(name).copy()
  490. for i in range(len(column)):
  491. try:
  492. column[i] = int(column[i])
  493. except BaseException as e:
  494. logging.warning(str(e))
  495. if dtype != "":
  496. func_dic = {
  497. "Num": pd.to_numeric,
  498. "Date": pd.to_datetime,
  499. "Time": pd.to_timedelta,
  500. }
  501. if column:
  502. get.iloc[:, column] = get.iloc[:, column].apply(
  503. func_dic.get(dtype, pd.to_numeric), errors=wrong
  504. )
  505. else:
  506. get = get.apply(func_dic.get(dtype, pd.to_numeric), errors=wrong)
  507. else:
  508. if column:
  509. get.iloc[:, column] = get.iloc[:, column].infer_objects()
  510. else:
  511. get = get.infer_objects()
  512. self.add_sheet(get, f"{name}")
  513. return get
  514. def as_dtype(self, name, column, dtype, wrong):
  515. get = self.get_sheet(name).copy()
  516. for i in range(len(column)):
  517. try:
  518. column[i] = int(column[i])
  519. except BaseException as e:
  520. logging.warning(str(e))
  521. func_dic = {
  522. "Int": int,
  523. "Float": float,
  524. "Str": str,
  525. "Date": pd.Timestamp,
  526. "TimeDelta": pd.Timedelta,
  527. }
  528. if column:
  529. get.iloc[:, column] = get.iloc[:, column].astype(
  530. func_dic.get(dtype, dtype), errors=wrong
  531. )
  532. else:
  533. get = get.astype(func_dic.get(dtype, dtype), errors=wrong)
  534. self.add_sheet(get, f"{name}")
  535. return get
  536. @plugin_class_loading(get_path(r"template/datascience"))
  537. class DataNan(FormBase, metaclass=ABCMeta):
  538. def is_nan(self, name):
  539. get = self.get_sheet(name)
  540. bool_nan = pd.isna(get)
  541. return bool_nan
  542. def del_nan(self, name, new):
  543. get = self.get_sheet(name)
  544. clean_sheet = get.dropna(axis=0)
  545. if new:
  546. self.add_sheet(clean_sheet, f"{name}:清洗")
  547. return clean_sheet
  548. @plugin_class_loading(get_path(r"template/datascience"))
  549. class BoolSheet(FormBase, metaclass=ABCMeta):
  550. def to_bool(self, name, exp, new=False):
  551. get = self.get_sheet(name)
  552. bool_sheet = eval(exp, {"S": get, "Sheet": get.iloc})
  553. if new:
  554. self.add_sheet(bool_sheet, f"{name}:布尔")
  555. return bool_sheet
  556. @plugin_class_loading(get_path(r"template/datascience"))
  557. class DataSample(FormBase, metaclass=ABCMeta):
  558. def sample(self, name, new):
  559. get = self.get_sheet(name)
  560. sample = get.sample(frac=1) # 返回比,默认按行打乱
  561. if new:
  562. self.add_sheet(sample, f"{name}:打乱")
  563. return sample
  564. @plugin_class_loading(get_path(r"template/datascience"))
  565. class DataTranspose(FormBase, metaclass=ABCMeta):
  566. def transpose(self, name, new=True):
  567. get = self.get_sheet(name)
  568. t = get.T.copy() # 复制一份,防止冲突
  569. if new:
  570. self.add_sheet(t, f"{name}.T")
  571. return t
  572. @plugin_class_loading(get_path(r"template/datascience"))
  573. class PlotBase(
  574. SheetRender,
  575. SheetReport,
  576. RowColumn,
  577. SheetSlice,
  578. DatacleaningFunc,
  579. SheetDtype,
  580. DataNan,
  581. BoolSheet,
  582. DataSample,
  583. DataTranspose,
  584. SheetIO,
  585. ):
  586. @staticmethod
  587. def parsing_parameters(text): # 解析文本参数
  588. args = {} # 解析到的参数
  589. exec(text, args)
  590. args_use = {
  591. "title": args.get("title", None),
  592. "vice_title": args.get("vice_title", "CoTan~数据处理:"),
  593. "show_Legend": bool(args.get("show_Legend", True)),
  594. "ori_Legend": args.get("ori_Legend", "horizontal"),
  595. "show_Visual_mapping": bool(args.get("show_Visual_mapping", True)),
  596. "is_color_Visual_mapping": bool(args.get("is_color_Visual_mapping", True)),
  597. "min_Visual_mapping": args.get("min_Visual_mapping", None),
  598. "max_Visual_mapping": args.get("max_Visual_mapping", None),
  599. "color_Visual_mapping": args.get("color_Visual_mapping", None),
  600. "size_Visual_mapping": args.get("size_Visual_mapping", None),
  601. "text_Visual_mapping": args.get("text_Visual_mapping", None),
  602. "is_Subsection": bool(args.get("is_Subsection", False)),
  603. "Subsection_list": args.get("Subsection_list", []),
  604. "ori_Visual": args.get("ori_Visual", "vertical"),
  605. "Tool_BOX": bool(args.get("Tool_BOX", True)),
  606. "Theme": args.get("Theme", "white"),
  607. "BG_Color": args.get("BG_Color", None),
  608. "width": args.get("width", "900px"),
  609. "heigh": (
  610. args.get("heigh", "500px")
  611. if not bool(args.get("Square", False))
  612. else args.get("width", "900px")
  613. ),
  614. "page_Title": args.get("page_Title", ""),
  615. "show_Animation": args.get("show_Animation", True),
  616. "show_Axis": bool(args.get("show_Axis", True)),
  617. "Axis_Zero": bool(args.get("Axis_Zero", False)),
  618. "show_Axis_Scale": bool(args.get("show_Axis_Scale", True)),
  619. "x_type": args.get("x_type", None),
  620. "y_type": args.get("y_type", None),
  621. "z_type": args.get("z_type", None),
  622. "make_Line": args.get("make_Line", []),
  623. "Datazoom": args.get("Datazoom", "N"),
  624. "show_Text": bool(args.get("show_Text", False)),
  625. "Size": args.get("Size", 10),
  626. "Symbol": args.get("Symbol", "circle"),
  627. "bar_Stacking": bool(args.get("bar_Stacking", False)),
  628. "EffectScatter": bool(args.get("EffectScatter", False)),
  629. "connect_None": bool(args.get("connect_None", False)),
  630. "Smooth_Line": bool(args.get("Smooth_Line", False)),
  631. "Area_chart": bool(args.get("Area_chart", False)),
  632. "paste_Y": bool(args.get("paste_Y", False)),
  633. "step_Line": bool(args.get("step_Line", False)),
  634. "size_PictorialBar": args.get("size_PictorialBar", None),
  635. "Polar_units": args.get("Polar_units", "100"),
  636. "More": bool(args.get("More", False)),
  637. "WordCould_Size": args.get("WordCould_Size", [20, 100]),
  638. "WordCould_Shape": args.get("WordCould_Shape", "circle"),
  639. "symbol_Graph": args.get("symbol_Graph", "circle"),
  640. "Repulsion": float(args.get("Repulsion", 8000)),
  641. "Area_radar": bool(args.get("Area_radar", True)),
  642. "HTML_Type": args.get("HTML_Type", 2),
  643. "Map": args.get("Map", "china"),
  644. "show_Map_Symbol": bool(args.get("show_Map_Symbol", False)),
  645. "Geo_Type": {
  646. "heatmap": GeoType.HEATMAP,
  647. "scatter": "scatter",
  648. "EFFECT": GeoType.EFFECT_SCATTER,
  649. }.get(args.get("Geo_Type", "heatmap"), GeoType.HEATMAP),
  650. "map_Type": args.get("map_Type", "2D"),
  651. "is_Dark": bool(args.get("is_Dark", False)),
  652. } # 真实的参数
  653. # 标题设置,global
  654. # 图例设置global
  655. # 视觉映射设置global
  656. # 工具箱设置global
  657. # Init设置global
  658. # 坐标轴设置,2D坐标图和3D坐标图
  659. # Mark设置 坐标图专属
  660. # Datazoom设置 坐标图专属
  661. # 显示文字设置
  662. # 统一化的设置
  663. # Bar设置
  664. # 散点图设置
  665. # 折线图设置
  666. return args_use
  667. @staticmethod
  668. def global_set(
  669. args_use, title, min_, max_, data_zoom=False, visual_mapping=True, axis=()
  670. ):
  671. k = {}
  672. # 标题设置
  673. if args_use["title"] is None:
  674. args_use["title"] = title
  675. k["title_opts"] = opts.TitleOpts(
  676. title=args_use["title"], subtitle=args_use["vice_title"]
  677. )
  678. # 图例设置
  679. if not args_use["show_Legend"]:
  680. k["legend_opts"] = opts.LegendOpts(is_show=False)
  681. else:
  682. k["legend_opts"] = opts.LegendOpts(
  683. type_="scroll", orient=args_use["ori_Legend"], pos_bottom="2%"
  684. ) # 移动到底部,避免和标题冲突
  685. # 视觉映射
  686. if not args_use["show_Visual_mapping"]:
  687. pass
  688. elif not visual_mapping:
  689. pass
  690. else:
  691. if args_use["min_Visual_mapping"] is not None:
  692. min_ = args_use["min_Visual_mapping"]
  693. if args_use["max_Visual_mapping"] is not None:
  694. max_ = args_use["max_Visual_mapping"]
  695. k["visualmap_opts"] = opts.VisualMapOpts(
  696. type_="color" if args_use["is_color_Visual_mapping"] else "size",
  697. max_=max_,
  698. min_=min_,
  699. range_color=args_use["color_Visual_mapping"],
  700. range_size=args_use["size_Visual_mapping"],
  701. range_text=args_use["text_Visual_mapping"],
  702. is_piecewise=args_use["is_Subsection"],
  703. pieces=args_use["Subsection_list"],
  704. orient=args_use["ori_Visual"],
  705. )
  706. k["toolbox_opts"] = opts.ToolboxOpts(is_show=args_use["Tool_BOX"])
  707. if data_zoom:
  708. if args_use["Datazoom"] == "all":
  709. k["datazoom_opts"] = [
  710. opts.DataZoomOpts(),
  711. opts.DataZoomOpts(orient="horizontal"),
  712. ]
  713. elif args_use["Datazoom"] == "horizontal":
  714. k["datazoom_opts"] = opts.DataZoomOpts(type_="inside")
  715. elif args_use["Datazoom"] == "vertical":
  716. opts.DataZoomOpts(orient="vertical")
  717. elif args_use["Datazoom"] == "inside_vertical":
  718. opts.DataZoomOpts(type_="inside", orient="vertical")
  719. elif args_use["Datazoom"] == "inside_vertical":
  720. opts.DataZoomOpts(type_="inside", orient="horizontal")
  721. # 坐标轴设定,输入设定的坐标轴即可
  722. def axis_seeting(args_use_, axis_="x"):
  723. axis_k = {}
  724. if args_use_[f"{axis_[0]}_type"] == "Display" or not args_use_["show_Axis"]:
  725. axis_k[f"{axis_[0]}axis_opts"] = opts.AxisOpts(is_show=False)
  726. else:
  727. axis_k[f"{axis_[0]}axis_opts"] = opts.AxisOpts(
  728. type_=args_use_[f"{axis_[0]}_type"],
  729. axisline_opts=opts.AxisLineOpts(is_on_zero=args_use_["Axis_Zero"]),
  730. axistick_opts=opts.AxisTickOpts(
  731. is_show=args_use_["show_Axis_Scale"]
  732. ),
  733. )
  734. return axis_k
  735. for i in axis:
  736. k.update(axis_seeting(args_use, i))
  737. return k
  738. @staticmethod
  739. def init_setting(args_use):
  740. k = {}
  741. # 设置标题
  742. if args_use["page_Title"] == "":
  743. title = "CoTan_数据处理"
  744. else:
  745. title = f"CoTan_数据处理:{args_use['page_Title']}"
  746. k["init_opts"] = opts.InitOpts(
  747. theme=args_use["Theme"],
  748. bg_color=args_use["BG_Color"],
  749. width=args_use["width"],
  750. height=args_use["heigh"],
  751. page_title=title,
  752. animation_opts=opts.AnimationOpts(animation=args_use["show_Animation"]),
  753. )
  754. return k
  755. @staticmethod
  756. def get_title(args_use):
  757. return f":{args_use['title']}"
  758. @staticmethod
  759. def mark(args_use):
  760. k = {}
  761. line = []
  762. for i in args_use["make_Line"]:
  763. if i[2] == "c" or i[0] in ("min", "max", "average"):
  764. line.append(opts.MarkLineItem(type_=i[0], name=i[1]))
  765. elif i[2] == "x":
  766. line.append(opts.MarkLineItem(x=i[0], name=i[1]))
  767. else:
  768. line.append(opts.MarkLineItem(y=i[0], name=i[1]))
  769. if not line:
  770. return k
  771. k["markline_opts"] = opts.MarkLineOpts(data=line)
  772. return k
  773. @staticmethod
  774. def yaxis_label(args_use, position="inside"):
  775. return {
  776. "label_opts": opts.LabelOpts(
  777. is_show=args_use["show_Text"], position=position
  778. )
  779. }
  780. @staticmethod
  781. def special_setting(args_use, type_): # 私人设定
  782. k = {}
  783. if type_ == "Bar": # 设置y的重叠
  784. if args_use["bar_Stacking"]:
  785. k = {"stack": "stack1"}
  786. elif type_ == "Scatter":
  787. k["Beautiful"] = args_use["EffectScatter"]
  788. k["symbol"] = args_use["Symbol"]
  789. k["symbol_size"] = args_use["Size"]
  790. elif type_ == "Line":
  791. k["is_connect_nones"] = args_use["connect_None"]
  792. # 平滑曲线或连接y轴
  793. k["is_smooth"] = (
  794. True if args_use["Smooth_Line"] or args_use["paste_Y"] else False
  795. )
  796. k["areastyle_opts"] = opts.AreaStyleOpts(
  797. opacity=0.5 if args_use["Area_chart"] else 0
  798. )
  799. if args_use["step_Line"]:
  800. del k["is_smooth"]
  801. k["is_step"] = True
  802. elif type_ == "PictorialBar":
  803. k["symbol_size"] = args_use["Size"]
  804. elif type_ == "Polar":
  805. return args_use["Polar_units"] # 回复的是单位制而不是设定
  806. elif type_ == "WordCloud":
  807. k["word_size_range"] = args_use["WordCould_Size"] # 放到x轴
  808. k["shape"] = args_use["Symbol"] # 放到x轴
  809. elif type_ == "Graph":
  810. k["symbol_Graph"] = args_use["Symbol"] # 放到x轴
  811. elif type_ == "Radar": # 雷达图
  812. k["areastyle_opts"] = opts.AreaStyleOpts(
  813. opacity=0.1 if args_use["Area_chart"] else 0
  814. )
  815. k["symbol"] = args_use["Symbol"] # 雷达图symbol
  816. return k
  817. @plugin_class_loading(get_path(r"template/datascience"))
  818. class Render(PlotBase):
  819. def render_all(self, text, render_dir) -> Page:
  820. args = self.parsing_parameters(text)
  821. if args["page_Title"] == "":
  822. title = "CoTan_数据处理"
  823. else:
  824. title = f"CoTan_数据处理:{args['page_Title']}"
  825. if args["HTML_Type"] == 1:
  826. page = Page(page_title=title, layout=Page.DraggablePageLayout)
  827. page.add(*self.all_render.values())
  828. elif args["HTML_Type"] == 2:
  829. page = Page(page_title=title, layout=Page.SimplePageLayout)
  830. page.add(*self.all_render.values())
  831. else:
  832. page = Tab(page_title=title)
  833. for i in self.all_render:
  834. page.add(self.all_render[i], i)
  835. page.render(render_dir)
  836. return render_dir
  837. def overlap(self, down, up):
  838. over_down = self.all_render[down]
  839. over_up = self.all_render[up]
  840. over_down.overlap(over_up)
  841. return over_down
  842. @staticmethod
  843. def get_random_color():
  844. # 随机颜色,雷达图默认非随机颜色
  845. rgb = [randint(0, 255), randint(0, 255), randint(0, 255)]
  846. color = "#"
  847. for a in rgb:
  848. # 转换为16进制,upper表示小写(规范化)
  849. color += str(hex(a))[-2:].replace("x", "0").upper()
  850. return color
  851. def get_all_render(self):
  852. return self.all_render.copy()
  853. def del_render(self, key):
  854. del self.all_render[key]
  855. def clean_render(self):
  856. self.all_render = {}
  857. def custom_graph(self, text):
  858. named_domain = {}
  859. named_domain.update(locals())
  860. named_domain.update(globals())
  861. exec(text, named_domain)
  862. exec("c = Page()", named_domain)
  863. self.all_render[f"自定义图[{len(self.all_render)}]"] = named_domain["c"]
  864. return named_domain["c"]
  865. @plugin_class_loading(get_path(r"template/datascience"))
  866. class AxisPlot(Render):
  867. def to_bar(self, name, text) -> Bar: # Bar:数据堆叠
  868. get = self.get_sheet(name)
  869. x = self.get_index(name, True).tolist()
  870. args = self.parsing_parameters(text)
  871. c = Bar(**self.init_setting(args)).add_xaxis(
  872. list(map(str, list(set(x))))
  873. ) # 转变为str类型
  874. y = []
  875. for i in get.iteritems(): # 按列迭代
  876. q = i[1].tolist() # 转换为列表
  877. try:
  878. c.add_yaxis(
  879. f"{name}_{i[0]}",
  880. q,
  881. **self.special_setting(args, "Bar"),
  882. **self.yaxis_label(args),
  883. color=self.get_random_color(),
  884. ) # i[0]是名字,i是tuple,其中i[1]是data
  885. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  886. y += list(map(int, q))
  887. except BaseException as e:
  888. logging.warning(str(e))
  889. if not y:
  890. args["show_Visual_mapping"] = False # 关闭视觉映射
  891. y = [0, 100]
  892. c.set_global_opts(
  893. **self.global_set(args, f"{name}柱状图", min(y), max(y), True, axis=["x", "y"])
  894. )
  895. c.set_series_opts(**self.mark(args))
  896. self.all_render[f"{name}柱状图[{len(self.all_render)}]{self.get_title(args)}"] = c
  897. return c
  898. def to_line(self, name, text) -> Line: # 折线图:连接空数据、显示数值、平滑曲线、面积图以及紧贴Y轴
  899. get = self.get_sheet(name)
  900. x = self.get_index(name, True).tolist()
  901. args = self.parsing_parameters(text)
  902. c = Line(**self.init_setting(args)).add_xaxis(
  903. list(map(str, list(set(x))))
  904. ) # 转变为str类型
  905. y = []
  906. for i in get.iteritems(): # 按列迭代
  907. q = i[1].tolist() # 转换为列表
  908. try:
  909. c.add_yaxis(
  910. f"{name}_{i[0]}",
  911. q,
  912. **self.special_setting(args, "Line"),
  913. **self.yaxis_label(args),
  914. color=self.get_random_color(),
  915. ) # i[0]是名字,i是tuple,其中i[1]是data
  916. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  917. y += list(map(int, q))
  918. except BaseException as e:
  919. logging.warning(str(e))
  920. if not y:
  921. args["show_Visual_mapping"] = False # 关闭视觉映射
  922. y = [0, 100]
  923. c.set_global_opts(
  924. **self.global_set(args, f"{name}折线图", min(y), max(y), True, axis=["x", "y"])
  925. )
  926. c.set_series_opts(**self.mark(args))
  927. self.all_render[f"{name}折线图[{len(self.all_render)}]{self.get_title(args)}"] = c
  928. return c
  929. def to_scatter(self, name, text) -> Scatter: # 散点图标记形状和大小、特效、标记线
  930. get = self.get_sheet(name)
  931. args = self.parsing_parameters(text)
  932. x = self.get_index(name, True).tolist()
  933. type_ = self.special_setting(args, "Scatter")
  934. if type_["Beautiful"]:
  935. func = EffectScatter
  936. else:
  937. func = Scatter
  938. del type_["Beautiful"]
  939. c = func(**self.init_setting(args)).add_xaxis(
  940. list(map(str, list(set(x))))
  941. ) # 转变为str类型
  942. y = []
  943. for i in get.iteritems(): # 按列迭代
  944. q = i[1].tolist() # 转换为列表
  945. try:
  946. c.add_yaxis(
  947. f"{name}_{i[0]}",
  948. q,
  949. **type_,
  950. **self.yaxis_label(args),
  951. color=self.get_random_color(),
  952. ) # i[0]是名字,i是tuple,其中i[1]是data
  953. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  954. y += list(map(int, q))
  955. except BaseException as e:
  956. logging.warning(str(e))
  957. if not y:
  958. args["show_Visual_mapping"] = False # 关闭视觉映射
  959. y = [0, 100]
  960. c.set_global_opts(
  961. **self.global_set(args, f"{name}散点图", min(y), max(y), True, axis=["x", "y"])
  962. )
  963. c.set_series_opts(**self.mark(args))
  964. self.all_render[f"{name}散点图[{len(self.all_render)}]{self.get_title(args)}"] = c
  965. return c
  966. def to_pictorialbar(self, name, text) -> PictorialBar: # 象形柱状图:图形、剪裁图像、元素重复和间隔
  967. get = self.get_sheet(name)
  968. x = self.get_index(name, True).tolist()
  969. args = self.parsing_parameters(text)
  970. c = (
  971. PictorialBar(**self.init_setting(args))
  972. .add_xaxis(list(map(str, list(set(x))))) # 转变为str类型
  973. .reversal_axis()
  974. )
  975. y = []
  976. k = self.special_setting(args, "PictorialBar")
  977. for i in get.iteritems(): # 按列迭代
  978. q = i[1].tolist() # 转换为列表
  979. try:
  980. c.add_yaxis(
  981. f"{name}_{i[0]}",
  982. q,
  983. label_opts=opts.LabelOpts(is_show=False),
  984. symbol_repeat=True,
  985. is_symbol_clip=True,
  986. symbol=SymbolType.ROUND_RECT,
  987. **k,
  988. color=self.get_random_color(),
  989. )
  990. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  991. y += list(map(int, q))
  992. except BaseException as e:
  993. logging.warning(str(e))
  994. if not y:
  995. args["show_Visual_mapping"] = False # 关闭视觉映射
  996. y = [0, 100]
  997. c.set_global_opts(
  998. **self.global_set(
  999. args, f"{name}象形柱状图", min(y), max(y), True, axis=["x", "y"]
  1000. )
  1001. )
  1002. c.set_series_opts(**self.mark(args))
  1003. self.all_render[f"{name}[{len(self.all_render)}]{self.get_title(args)}"] = c
  1004. return c
  1005. def to_boxpolt(self, name, text) -> Boxplot:
  1006. get = self.get_sheet(name)
  1007. args = self.parsing_parameters(text)
  1008. c = Boxplot(**self.init_setting(args)).add_xaxis([f"{name}"])
  1009. y = []
  1010. for i in get.iteritems(): # 按列迭代
  1011. q = i[1].tolist() # 转换为列表
  1012. try:
  1013. c.add_yaxis(f"{name}_{i[0]}", [q], **self.yaxis_label(args))
  1014. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  1015. y += list(map(float, q))
  1016. except BaseException as e:
  1017. logging.warning(str(e))
  1018. if not y:
  1019. args["show_Visual_mapping"] = False # 关闭视觉映射
  1020. y = [0, 100]
  1021. c.set_global_opts(
  1022. **self.global_set(args, f"{name}箱形图", min(y), max(y), True, axis=["x", "y"])
  1023. )
  1024. c.set_series_opts(**self.mark(args))
  1025. self.all_render[f"{name}箱形图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1026. return c
  1027. def to_heatmap(self, name, text) -> HeatMap: # 显示数据
  1028. get = self.get_sheet(name)
  1029. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1030. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1031. value_list = []
  1032. q = []
  1033. for c in range(len(x)): # c-列,r-行
  1034. for r in range(len(y)):
  1035. try:
  1036. v = float(eval(f"get.iloc[{r},{c}]", {"get": get})) # 先行后列
  1037. except ValueError:
  1038. continue
  1039. q.append(v)
  1040. value_list.append([c, r, v])
  1041. args = self.parsing_parameters(text)
  1042. try:
  1043. max_, min_ = max(q), min(q)
  1044. except TypeError:
  1045. args["show_Visual_mapping"] = False # 关闭视觉映射
  1046. max_, min_ = 0, 100
  1047. c = (
  1048. HeatMap(**self.init_setting(args))
  1049. .add_xaxis(list(map(str, list(set(x))))) # 转变为str类型
  1050. .add_yaxis(
  1051. f"{name}", list(map(str, y)), value_list, **self.yaxis_label(args)
  1052. )
  1053. .set_global_opts(
  1054. **self.global_set(args, f"{name}热力图", min_, max_, True, axis=["x", "y"])
  1055. )
  1056. .set_series_opts(**self.mark(args))
  1057. )
  1058. self.all_render[f"{name}热力图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1059. return c
  1060. @plugin_class_loading(get_path(r"template/datascience"))
  1061. class GeneralPlot(Render):
  1062. def to_format_graph(self, name, text) -> Graph:
  1063. get = self.get_sheet(name)
  1064. y_name = self.get_index(name, True).tolist() # 拿行名
  1065. nodes = []
  1066. link = []
  1067. for i in get.iterrows(): # 按行迭代
  1068. q = i[1].tolist() # 转换为列表
  1069. try:
  1070. nodes.append(
  1071. {"name": f"{i[0]}", "symbolSize": float(q[0]), "value": float(q[0])}
  1072. )
  1073. for a in q[1:]:
  1074. n = str(a).split(":")
  1075. try:
  1076. link.append(
  1077. {"source": f"{i[0]}", "target": n[0], "value": float(n[1])}
  1078. )
  1079. except BaseException as e:
  1080. logging.warning(str(e))
  1081. except BaseException as e:
  1082. logging.warning(str(e))
  1083. if not link:
  1084. for i in nodes:
  1085. for j in nodes:
  1086. link.append(
  1087. {
  1088. "source": i.get("name"),
  1089. "target": j.get("name"),
  1090. "value": abs(i.get("value") - j.get("value")),
  1091. }
  1092. )
  1093. args = self.parsing_parameters(text)
  1094. c = (
  1095. Graph(**self.init_setting(args))
  1096. .add(
  1097. f"{y_name[0]}",
  1098. nodes,
  1099. link,
  1100. repulsion=args["Repulsion"],
  1101. **self.yaxis_label(args),
  1102. )
  1103. .set_global_opts(
  1104. **self.global_set(args, f"{name}关系图", 0, 100, False, False)
  1105. )
  1106. )
  1107. self.all_render[f"{name}关系图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1108. return c
  1109. def to_graph(self, name, text) -> Graph: # XY关系图,新的书写方式
  1110. get = self.get_sheet(name)
  1111. args = self.parsing_parameters(text)
  1112. size = args["Size"] * 3
  1113. # 生成节点信息
  1114. y_name = self.get_index(name, True).tolist() # 拿行名
  1115. x_name = self.get_column(name, True).tolist() # 拿列名
  1116. nodes_list = list(set(y_name + x_name)) # 处理重复,作为nodes列表
  1117. nodes = []
  1118. for i in nodes_list:
  1119. nodes.append({"name": f"{i}", "symbolSize": size})
  1120. # 生成link信息
  1121. link = [] # 记录连接的信息
  1122. have = []
  1123. for y in range(len(y_name)): # 按行迭代
  1124. for x in range(len(x_name)):
  1125. y_n = y_name[y] # 节点1
  1126. x_n = x_name[x] # 节点2
  1127. if y_n == x_n:
  1128. continue
  1129. if (y_n, x_n) in have or (x_n, y_n) in have:
  1130. continue
  1131. else:
  1132. have.append((y_n, x_n))
  1133. try:
  1134. v = float(eval(f"get.iloc[{y},{x}]", {"get": get})) # 取得value
  1135. link.append({"source": y_n, "target": x_n, "value": v})
  1136. except BaseException as e:
  1137. logging.warning(str(e))
  1138. c = (
  1139. Graph(**self.init_setting(args))
  1140. .add(
  1141. f"{y_name[0]}",
  1142. nodes,
  1143. link,
  1144. repulsion=args["Repulsion"],
  1145. **self.yaxis_label(args),
  1146. )
  1147. .set_global_opts(
  1148. **self.global_set(args, f"{name}关系图", 0, 100, False, False)
  1149. )
  1150. )
  1151. self.all_render[f"{name}关系图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1152. return c
  1153. def to_sankey(self, name, text):
  1154. get = self.get_sheet(name)
  1155. args = self.parsing_parameters(text)
  1156. # 生成节点信息
  1157. y_name = self.get_index(name, True).tolist() # 拿行名
  1158. x_name = self.get_column(name, True).tolist() # 拿列名
  1159. nodes_list = list(set(y_name + x_name)) # 处理重复,作为nodes列表
  1160. nodes = []
  1161. source = {}
  1162. target = {}
  1163. for i in nodes_list:
  1164. nodes.append({"name": f"{i}"})
  1165. source[i] = set() # 记录该元素source边连接的节点
  1166. target[i] = set() # 记录改元素target边连接的节点
  1167. # 生成link信息
  1168. link = [] # 记录连接的信息
  1169. have = []
  1170. for y in range(len(y_name)): # 按行迭代
  1171. for x in range(len(x_name)):
  1172. y_n = y_name[y] # 节点1
  1173. x_n = x_name[x] # 节点2
  1174. if y_n == x_n:
  1175. continue # 是否相同
  1176. if (y_n, x_n) in have or (x_n, y_n) in have:
  1177. continue # 是否重复
  1178. else:
  1179. have.append((y_n, x_n))
  1180. # 固定的,y在s而x在t,桑基图不可以绕环形,所以要做检查
  1181. if source[y_n] & target[x_n] != set():
  1182. continue
  1183. try:
  1184. v = float(eval(f"get.iloc[{y},{x}]", {"get": get})) # 取得value
  1185. link.append({"source": y_n, "target": x_n, "value": v})
  1186. target[y_n].add(x_n)
  1187. source[x_n].add(y_n)
  1188. except BaseException as e:
  1189. logging.warning(str(e))
  1190. c = (
  1191. Sankey()
  1192. .add(
  1193. f"{name}",
  1194. nodes,
  1195. link,
  1196. linestyle_opt=opts.LineStyleOpts(
  1197. opacity=0.2, curve=0.5, color="source"
  1198. ),
  1199. label_opts=opts.LabelOpts(position="right"),
  1200. )
  1201. .set_global_opts(
  1202. **self.global_set(args, f"{name}桑基图", 0, 100, False, False)
  1203. )
  1204. )
  1205. self.all_render[f"{name}桑基图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1206. return c
  1207. def to_parallel(self, name, text) -> Parallel:
  1208. get = self.get_sheet(name)
  1209. dim = []
  1210. dim_list = self.get_index(name, True).tolist()
  1211. for i in range(len(dim_list)):
  1212. dim.append({"dim": i, "name": f"{dim_list[i]}"})
  1213. args = self.parsing_parameters(text)
  1214. c = (
  1215. Parallel(**self.init_setting(args))
  1216. .add_schema(dim)
  1217. .set_global_opts(
  1218. **self.global_set(args, f"{name}多轴图", 0, 100, False, False)
  1219. )
  1220. )
  1221. for i in get.iteritems(): # 按列迭代
  1222. q = i[1].tolist() # 转换为列表
  1223. c.add(f"{i[0]}", [q], **self.yaxis_label(args))
  1224. self.all_render[f"{name}多轴图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1225. return c
  1226. def to_pie(self, name, text) -> Pie:
  1227. get = self.get_sheet(name)
  1228. data = []
  1229. for i in get.iterrows(): # 按行迭代
  1230. try:
  1231. data.append([f"{i[0]}", float(i[1].tolist()[0])])
  1232. except BaseException as e:
  1233. logging.warning(str(e))
  1234. args = self.parsing_parameters(text)
  1235. c = (
  1236. Pie(**self.init_setting(args))
  1237. .add(f"{name}", data, **self.yaxis_label(args, "top"))
  1238. .set_global_opts(**self.global_set(args, f"{name}饼图", 0, 100, False, False))
  1239. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
  1240. )
  1241. self.all_render[f"{name}饼图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1242. return c
  1243. def to_polar(self, name, text) -> Polar:
  1244. get = self.get_sheet(name)
  1245. data = []
  1246. args = self.parsing_parameters(text)
  1247. setting = self.special_setting(args, "Polar")
  1248. if setting == "rad": # 弧度制
  1249. convert = 0.0628
  1250. elif setting == "360": # 角度制
  1251. convert = 0.36
  1252. else:
  1253. convert = 1
  1254. for i in get.iterrows(): # 按行迭代
  1255. try:
  1256. q = i[1].tolist()
  1257. data.append((float(q[0]), float(q[1]) / convert))
  1258. except BaseException as e:
  1259. logging.warning(str(e))
  1260. c = (
  1261. Polar(**self.init_setting(args))
  1262. .add(f"{name}", data, type_="scatter", **self.yaxis_label(args))
  1263. .set_global_opts(
  1264. **self.global_set(args, f"{name}极坐标图", 0, 100, False, False)
  1265. )
  1266. )
  1267. self.all_render[f"{name}极坐标图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1268. return c
  1269. def to_radar(self, name, text) -> Radar:
  1270. get = self.get_sheet(name)
  1271. x = self.get_index(name, True).tolist()
  1272. max_list = [[] for _ in range(len(x))] # 保存每个x栏目的最大值
  1273. data = [] # y的组成数据,包括name和list
  1274. x_list = [] # 保存x的数据
  1275. for i in get.iteritems(): # 按列迭代计算每一项的abcd
  1276. q = i[1].tolist()
  1277. add = []
  1278. for a in range(len(q)):
  1279. try:
  1280. f = float(q[a])
  1281. max_list[a].append(f)
  1282. add.append(f)
  1283. except BaseException as e:
  1284. logging.warning(str(e))
  1285. data.append([f"{i[0]}", [add]]) # add是包含在一个list中的
  1286. for i in range(len(max_list)): # 计算x_list
  1287. x_list.append(opts.RadarIndicatorItem(name=x[i], max_=max(max_list[i])))
  1288. args = self.parsing_parameters(text)
  1289. c = (
  1290. Radar(**self.init_setting(args))
  1291. .add_schema(schema=x_list)
  1292. .set_global_opts(
  1293. **self.global_set(args, f"{name}雷达图", 0, 100, False, False)
  1294. )
  1295. )
  1296. k = self.special_setting(args, "Radar")
  1297. for i in data:
  1298. c.add(
  1299. *i, **self.yaxis_label(args), color=self.get_random_color(), **k
  1300. ) # 对i解包,取得name和data 随机颜色
  1301. self.all_render[f"{name}雷达图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1302. return c
  1303. def to_funnel(self, name, text) -> Funnel:
  1304. get = self.get_sheet(name)
  1305. y_name = self.get_index(name, True).tolist() # 拿行名
  1306. value = []
  1307. y = []
  1308. for r in range(len(y_name)):
  1309. try:
  1310. v = float(eval(f"get.iloc[{r},0]", {"get": get}))
  1311. except ValueError:
  1312. continue
  1313. value.append([f"{y_name[r]}", v])
  1314. y.append(v)
  1315. args = self.parsing_parameters(text)
  1316. c = (
  1317. Funnel(**self.init_setting(args))
  1318. .add(f"{name}", value, **self.yaxis_label(args, "top"))
  1319. .set_global_opts(
  1320. **self.global_set(args, f"{name}漏斗图", min(y), max(y), True, False)
  1321. )
  1322. )
  1323. self.all_render[f"{name}漏斗图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1324. return c
  1325. def to_calendar(self, name, text) -> Calendar:
  1326. get = self.get_sheet(name)
  1327. data = [[] for _ in self.get_column(name, True)]
  1328. x_name = self.get_column(name, True).tolist()
  1329. y = []
  1330. for i in get.iterrows():
  1331. date = str(i[0]) # 时间数据
  1332. q = i[1].tolist()
  1333. for a in range(len(q)):
  1334. try:
  1335. data[a].append([date, q[a]])
  1336. y.append(float(q[a]))
  1337. except BaseException as e:
  1338. logging.warning(str(e))
  1339. args = self.parsing_parameters(text)
  1340. if not y:
  1341. y = [0, 100]
  1342. args["show_Visual_mapping"] = False # 关闭视觉映射
  1343. c = Calendar(**self.init_setting(args)).set_global_opts(
  1344. **self.global_set(args, f"{name}日历图", min(y), max(y), True)
  1345. )
  1346. for i in range(len(x_name)):
  1347. start_date = data[i][0][0]
  1348. end_date = data[i][-1][0]
  1349. c.add(
  1350. str(x_name[i]),
  1351. data[i],
  1352. calendar_opts=opts.CalendarOpts(range_=[start_date, end_date]),
  1353. **self.yaxis_label(args),
  1354. )
  1355. self.all_render[f"{name}日历图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1356. return c
  1357. def to_theme_river(self, name, text) -> ThemeRiver:
  1358. get = self.get_sheet(name)
  1359. data = []
  1360. x_name = self.get_column(name, True).tolist()
  1361. y = []
  1362. for i in get.iterrows():
  1363. date = str(i[0])
  1364. q = i[1].tolist()
  1365. for a in range(len(x_name)):
  1366. try:
  1367. data.append([date, q[a], x_name[a]])
  1368. y.append(float(q[a]))
  1369. except BaseException as e:
  1370. logging.warning(str(e))
  1371. args = self.parsing_parameters(text)
  1372. if not y:
  1373. y = [0, 100]
  1374. args["show_Visual_mapping"] = False # 关闭视觉映射
  1375. c = (
  1376. ThemeRiver(**self.init_setting(args))
  1377. # 抑制大小
  1378. .add(
  1379. x_name,
  1380. data,
  1381. singleaxis_opts=opts.SingleAxisOpts(
  1382. type_=args["x_type"], pos_bottom="10%"
  1383. ),
  1384. ).set_global_opts(
  1385. **self.global_set(args, f"{name}河流图", min(y), max(y), True, False)
  1386. )
  1387. )
  1388. self.all_render[f"{name}河流图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1389. return c
  1390. @plugin_class_loading(get_path(r"template/datascience"))
  1391. class RelationshipPlot(Render):
  1392. def to_sunburst(self, name, text) -> Sunburst:
  1393. get = self.get_sheet(name)
  1394. def convert_data(iter_object, name_):
  1395. k = {"name": name_, "children": []}
  1396. v = 0
  1397. for i in iter_object:
  1398. content = iter_object[i]
  1399. if isinstance(content, dict):
  1400. new_c = convert_data(content, str(i))
  1401. v += new_c["value"]
  1402. k["children"].append(new_c)
  1403. else:
  1404. try:
  1405. q = float(content)
  1406. except ValueError:
  1407. q = len(str(content))
  1408. v += q
  1409. k["children"].append({"name": f"{i}={content}", "value": q})
  1410. k["value"] = v
  1411. return k
  1412. data = convert_data(get.to_dict(), name)["children"]
  1413. args = self.parsing_parameters(text)
  1414. c = (
  1415. Sunburst()
  1416. .add(
  1417. series_name=f"{name}",
  1418. data_pair=data,
  1419. radius=[abs(args["Size"] - 10), "90%"],
  1420. )
  1421. .set_global_opts(
  1422. **self.global_set(args, f"{name}旭日图", 0, 100, False, False)
  1423. )
  1424. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}"))
  1425. )
  1426. self.all_render[f"{name}旭日图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1427. return c
  1428. def to_tree(self, name, text) -> Tree:
  1429. get = self.get_sheet(name)
  1430. def convert_data(iter_object, name_):
  1431. k = {"name": name_, "children": []}
  1432. for i in iter_object:
  1433. content = iter_object[i]
  1434. if isinstance(content, dict):
  1435. new_children = convert_data(content, str(i))
  1436. k["children"].append(new_children)
  1437. else:
  1438. k["children"].append(
  1439. {"name": f"{i}", "children": [{"name": f"{content}"}]}
  1440. )
  1441. return k
  1442. data = [convert_data(get.to_dict(), name)]
  1443. args = self.parsing_parameters(text)
  1444. c = (
  1445. Tree()
  1446. .add(f"{name}", data)
  1447. .set_global_opts(
  1448. **self.global_set(args, f"{name}树状图", 0, 100, False, False)
  1449. )
  1450. )
  1451. self.all_render[f"{name}树状图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1452. return c
  1453. def to_tree_map(self, name, text) -> TreeMap:
  1454. get = self.get_sheet(name)
  1455. def convert_data(iter_object, name_):
  1456. k = {"name": name_, "children": []}
  1457. v = 0
  1458. for i in iter_object:
  1459. content = iter_object[i]
  1460. if isinstance(content, dict):
  1461. new_c = convert_data(content, str(i))
  1462. v += new_c["value"]
  1463. k["children"].append(new_c)
  1464. else:
  1465. try:
  1466. q = float(content)
  1467. except ValueError:
  1468. q = len(str(content))
  1469. v += q
  1470. k["children"].append({"name": f"{i}={content}", "value": q})
  1471. k["value"] = v
  1472. return k
  1473. data = convert_data(get.to_dict(), name)["children"]
  1474. args = self.parsing_parameters(text)
  1475. c = (
  1476. TreeMap()
  1477. .add(
  1478. f"{name}",
  1479. data,
  1480. label_opts=opts.LabelOpts(is_show=True, position="inside"),
  1481. )
  1482. .set_global_opts(
  1483. **self.global_set(args, f"{name}矩形树图", 0, 100, False, False)
  1484. )
  1485. )
  1486. self.all_render[f"{name}矩形树图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1487. return c
  1488. def to_scattergeo(self, name, text) -> Geo:
  1489. get = self.get_sheet(name)
  1490. column = self.get_column(name, True).tolist()
  1491. data_type = ["scatter" for _ in column]
  1492. data = [[] for _ in column]
  1493. y = []
  1494. for i in get.iterrows(): # 按行迭代
  1495. map_ = str(i[0])
  1496. q = i[1].tolist()
  1497. for a in range(len(q)):
  1498. try:
  1499. v = float(q[a])
  1500. y.append(v)
  1501. except ValueError:
  1502. v = str(q[a])
  1503. try:
  1504. if v[:5] == "[##S]":
  1505. # 特效图
  1506. v = float(v[5:])
  1507. y.append(v)
  1508. column.append(column[a])
  1509. data_type.append(GeoType.EFFECT_SCATTER)
  1510. data.append([])
  1511. a = -1
  1512. elif v[:5] == "[##H]":
  1513. # 特效图
  1514. v = float(v[5:])
  1515. y.append(v)
  1516. column.append(column[a])
  1517. data_type.append(GeoType.HEATMAP)
  1518. data.append([])
  1519. a = -1
  1520. else:
  1521. assert False
  1522. except (AssertionError, ValueError):
  1523. data_type[a] = GeoType.LINES # 当前变为Line
  1524. data[a].append((map_, v))
  1525. args = self.parsing_parameters(text)
  1526. args["show_Visual_mapping"] = True # 必须视觉映射
  1527. if not y:
  1528. y = [0, 100]
  1529. if args["is_Dark"]:
  1530. g = {
  1531. "itemstyle_opts": opts.ItemStyleOpts(
  1532. color="#323c48", border_color="#111"
  1533. )
  1534. }
  1535. else:
  1536. g = {}
  1537. c = (
  1538. Geo().add_schema(maptype=str(args["Map"]), **g)
  1539. # 必须要有视觉映射(否则会显示奇怪的数据)
  1540. .set_global_opts(
  1541. **self.global_set(args, f"{name}Geo点地图", min(y), max(y), False)
  1542. )
  1543. )
  1544. for i in range(len(data)):
  1545. if data_type[i] != GeoType.LINES:
  1546. ka = dict(
  1547. symbol=args["Symbol"],
  1548. symbol_size=args["Size"],
  1549. color="#1E90FF" if args["is_Dark"] else "#0000FF",
  1550. )
  1551. else:
  1552. ka = dict(
  1553. symbol=SymbolType.ARROW,
  1554. symbol_size=6,
  1555. effect_opts=opts.EffectOpts(
  1556. symbol=SymbolType.ARROW, symbol_size=6, color="blue"
  1557. ),
  1558. linestyle_opts=opts.LineStyleOpts(
  1559. curve=0.2, color="#FFF8DC" if args["is_Dark"] else "#000000"
  1560. ),
  1561. )
  1562. c.add(f"{column[i]}", data[i], type_=data_type[i], **ka)
  1563. c.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 不显示数据,必须放在add后面生效
  1564. self.all_render[
  1565. f"{name}Geo点地图[{len(self.all_render)}]{self.get_title(args)}"
  1566. ] = c
  1567. return c
  1568. @plugin_class_loading(get_path(r"template/datascience"))
  1569. class GeographyPlot(Render):
  1570. def to_map(self, name, text) -> Map:
  1571. get = self.get_sheet(name)
  1572. column = self.get_column(name, True).tolist()
  1573. data = [[] for _ in column]
  1574. y = []
  1575. for i in get.iterrows(): # 按行迭代
  1576. map_ = str(i[0])
  1577. q = i[1].tolist()
  1578. for a in range(len(q)):
  1579. try:
  1580. v = float(q[a])
  1581. y.append(v)
  1582. data[a].append((map_, v))
  1583. except BaseException as e:
  1584. logging.warning(str(e))
  1585. args = self.parsing_parameters(text)
  1586. args["show_Visual_mapping"] = True # 必须视觉映射
  1587. if not y:
  1588. y = [0, 100]
  1589. if args["map_Type"] == "GLOBE":
  1590. func = MapGlobe
  1591. else:
  1592. func = Map
  1593. c = func().set_global_opts(
  1594. **self.global_set(args, f"{name}Map地图", min(y), max(y), False)
  1595. ) # 必须要有视觉映射(否则会显示奇怪的数据)
  1596. for i in range(len(data)):
  1597. c.add(
  1598. f"{column[i]}",
  1599. data[i],
  1600. str(args["Map"]),
  1601. is_map_symbol_show=args["show_Map_Symbol"],
  1602. symbol=args["Symbol"],
  1603. **self.yaxis_label(args),
  1604. )
  1605. self.all_render[
  1606. f"{name}Map地图[{len(self.all_render)}]{self.get_title(args)}"
  1607. ] = c
  1608. return c
  1609. def to_geo(self, name, text) -> Geo:
  1610. get = self.get_sheet(name)
  1611. column = self.get_column(name, True).tolist()
  1612. index = self.get_index(name, True).tolist()
  1613. args = self.parsing_parameters(text)
  1614. args["show_Visual_mapping"] = True # 必须视觉映射
  1615. if args["is_Dark"]:
  1616. g = {
  1617. "itemstyle_opts": opts.ItemStyleOpts(
  1618. color="#323c48", border_color="#111"
  1619. )
  1620. }
  1621. else:
  1622. g = {}
  1623. c = Geo().add_schema(maptype=str(args["Map"]), **g)
  1624. m = []
  1625. for y in column: # 维度
  1626. for x in index: # 精度
  1627. value = get.loc[x, y]
  1628. type_ = "scatter"
  1629. try:
  1630. v = float(value) # 数值
  1631. type_ = args["Geo_Type"]
  1632. except ValueError:
  1633. try:
  1634. q = str(value)
  1635. v = float(value[5:])
  1636. if q[:5] == "[##S]": # 点图
  1637. type_ = GeoType.SCATTER
  1638. elif q[:5] == "[##E]": # 带点特效
  1639. type_ = GeoType.EFFECT_SCATTER
  1640. else: # 画线
  1641. v = q.split(";")
  1642. c.add_coordinate(
  1643. name=f"({v[0]},{v[1]})",
  1644. longitude=float(v[0]),
  1645. latitude=float(v[1]),
  1646. )
  1647. c.add_coordinate(
  1648. name=f"({x},{y})", longitude=float(x), latitude=float(y)
  1649. )
  1650. c.add(
  1651. f"{name}",
  1652. [[f"({x},{y})", f"({v[0]},{v[1]})"]],
  1653. type_=GeoType.LINES,
  1654. effect_opts=opts.EffectOpts(
  1655. symbol=SymbolType.ARROW, symbol_size=6, color="blue"
  1656. ),
  1657. linestyle_opts=opts.LineStyleOpts(
  1658. curve=0.2,
  1659. color="#FFF8DC" if args["is_Dark"] else "#000000",
  1660. ),
  1661. )
  1662. c.add(
  1663. f"{name}_XY",
  1664. [[f"({x},{y})", 5], [f"({v[0]},{v[1]})", 5]],
  1665. type_=GeoType.EFFECT_SCATTER,
  1666. color="#1E90FF" if args["is_Dark"] else "#0000FF",
  1667. )
  1668. assert False # continue
  1669. except (ValueError, TypeError, AssertionError):
  1670. continue
  1671. try:
  1672. c.add_coordinate(
  1673. name=f"({x},{y})", longitude=float(x), latitude=float(y)
  1674. )
  1675. c.add(
  1676. f"{name}",
  1677. [[f"({x},{y})", v]],
  1678. type_=type_,
  1679. symbol=args["Symbol"],
  1680. symbol_size=args["Size"],
  1681. )
  1682. if type_ == GeoType.HEATMAP:
  1683. c.add(
  1684. f"{name}_XY",
  1685. [[f"({x},{y})", v]],
  1686. type_="scatter",
  1687. color="#1E90FF" if args["is_Dark"] else "#0000FF",
  1688. )
  1689. m.append(v)
  1690. except BaseException as e:
  1691. logging.warning(str(e))
  1692. if not m:
  1693. m = [0, 100]
  1694. c.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 不显示
  1695. c.set_global_opts(
  1696. **self.global_set(args, f"{name}Geo地图", min(m), max(m), False)
  1697. )
  1698. self.all_render[
  1699. f"{name}Geo地图[{len(self.all_render)}]{self.get_title(args)}"
  1700. ] = c
  1701. return c
  1702. @plugin_class_loading(get_path(r"template/datascience"))
  1703. class WordPlot(Render):
  1704. def to_word_cloud(self, name, text) -> WordCloud:
  1705. get = self.get_sheet(name)
  1706. data = []
  1707. for i in get.iterrows(): # 按行迭代
  1708. try:
  1709. data.append([str(i[0]), float(i[1].tolist()[0])])
  1710. except BaseException as e:
  1711. logging.warning(str(e))
  1712. args = self.parsing_parameters(text)
  1713. c = (
  1714. WordCloud(**self.init_setting(args))
  1715. .add(f"{name}", data, **self.special_setting(args, "WordCloud"))
  1716. .set_global_opts(**self.global_set(args, f"{name}词云", 0, 100, False, False))
  1717. )
  1718. self.all_render[f"{name}词云[{len(self.all_render)}]{self.get_title(args)}"] = c
  1719. return c
  1720. def to_liquid(self, name, text) -> Liquid:
  1721. get = self.get_sheet(name)
  1722. data = str(get.iloc[0, 0])
  1723. c = data.split(".")
  1724. try:
  1725. data = float(f"0.{c[1]}")
  1726. except ValueError:
  1727. data = float(f"0.{c[0]}")
  1728. args = self.parsing_parameters(text)
  1729. c = (
  1730. Liquid(**self.init_setting(args))
  1731. .add(f"{name}", [data, data])
  1732. .set_global_opts(
  1733. title_opts=opts.TitleOpts(title=f"{name}水球图", subtitle="CoTan~数据处理")
  1734. )
  1735. )
  1736. self.all_render[f"{name}水球图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1737. return c
  1738. def to_gauge(self, name, text) -> Gauge:
  1739. get = self.get_sheet(name)
  1740. data = float(get.iloc[0, 0])
  1741. if data > 100:
  1742. data = str(data / 100)
  1743. c = data.split(".")
  1744. try:
  1745. data = float(f"0.{c[1]}") * 100
  1746. except ValueError:
  1747. data = float(f"0.{data}") * 100
  1748. args = self.parsing_parameters(text)
  1749. c = (
  1750. Gauge(**self.init_setting(args))
  1751. .add(f"{name}", [(f"{name}", data)])
  1752. .set_global_opts(
  1753. title_opts=opts.TitleOpts(title=f"{name}仪表图", subtitle="CoTan~数据处理")
  1754. )
  1755. )
  1756. self.all_render[f"{name}仪表图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1757. return c
  1758. @plugin_class_loading(get_path(r"template/datascience"))
  1759. class SolidPlot(Render):
  1760. def to_bar3d(self, name, text) -> Bar3D:
  1761. get = self.get_sheet(name)
  1762. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1763. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1764. value_list = []
  1765. q = []
  1766. for c in range(len(x)): # c-列,r-行
  1767. for r in range(len(y)):
  1768. try:
  1769. v = eval(f"get.iloc[{r},{c}]", {"get": get}) # 先行后列
  1770. value_list.append([c, r, v])
  1771. q.append(float(v))
  1772. except BaseException as e:
  1773. logging.warning(str(e))
  1774. args = self.parsing_parameters(text)
  1775. if not q:
  1776. q = [0, 100]
  1777. args["show_Visual_mapping"] = False # 关闭视觉映射
  1778. c = (
  1779. Bar3D(**self.init_setting(args))
  1780. .add(
  1781. f"{name}",
  1782. value_list,
  1783. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1784. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1785. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1786. )
  1787. .set_global_opts(
  1788. **self.global_set(args, f"{name}3D柱状图", min(q), max(q), True),
  1789. )
  1790. )
  1791. if args["bar_Stacking"]:
  1792. c.set_series_opts(**{"stack": "stack"}) # 层叠
  1793. self.all_render[
  1794. f"{name}3D柱状图[{len(self.all_render)}]{self.get_title(args)}"
  1795. ] = c
  1796. return c
  1797. def to_scatter3d(self, name, text) -> Scatter3D:
  1798. get = self.get_sheet(name)
  1799. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1800. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1801. value_list = []
  1802. q = []
  1803. for c in range(len(x)): # c-列,r-行
  1804. for r in range(len(y)):
  1805. try:
  1806. v = eval(f"get.iloc[{r},{c}]", {"get": get}) # 先行后列
  1807. value_list.append([c, r, v])
  1808. q.append(float(v))
  1809. except BaseException as e:
  1810. logging.warning(str(e))
  1811. args = self.parsing_parameters(text)
  1812. if not q:
  1813. q = [0, 100]
  1814. args["show_Visual_mapping"] = False # 关闭视觉映射
  1815. c = (
  1816. Scatter3D(**self.init_setting(args))
  1817. .add(
  1818. f"{name}",
  1819. value_list,
  1820. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1821. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1822. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1823. )
  1824. .set_global_opts(
  1825. **self.global_set(args, f"{name}3D散点图", min(q), max(q), True)
  1826. )
  1827. )
  1828. self.all_render[
  1829. f"{name}3D散点图[{len(self.all_render)}]{self.get_title(args)}"
  1830. ] = c
  1831. return c
  1832. def to_line3d(self, name, text) -> Line3D:
  1833. get = self.get_sheet(name)
  1834. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1835. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1836. value_list = []
  1837. q = []
  1838. for c in range(len(x)): # c-列,r-行
  1839. for r in range(len(y)):
  1840. try:
  1841. v = eval(f"get.iloc[{r},{c}]", {"get": get}) # 先行后列
  1842. value_list.append([c, r, v])
  1843. q.append(float(v))
  1844. except BaseException as e:
  1845. logging.warning(str(e))
  1846. args = self.parsing_parameters(text)
  1847. if not q:
  1848. q = [0, 100]
  1849. args["show_Visual_mapping"] = False # 关闭视觉映射
  1850. c = (
  1851. Line3D(**self.init_setting(args))
  1852. .add(
  1853. f"{name}",
  1854. value_list,
  1855. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1856. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1857. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1858. grid3d_opts=opts.Grid3DOpts(width=100, height=100, depth=100),
  1859. )
  1860. .set_global_opts(
  1861. **self.global_set(args, f"{name}3D折线图", min(q), max(q), True)
  1862. )
  1863. )
  1864. self.all_render[
  1865. f"{name}3D折线图[{len(self.all_render)}]{self.get_title(args)}"
  1866. ] = c
  1867. return c
  1868. class MachineLearnerBase(
  1869. AxisPlot, GeneralPlot, RelationshipPlot, GeographyPlot, WordPlot, SolidPlot
  1870. ):
  1871. def __init__(self, *args, **kwargs):
  1872. super().__init__(*args, **kwargs)
  1873. self.learner = {} # 记录机器
  1874. self.learn_dict = {
  1875. "Line": (LinearRegression, ()),
  1876. "Ridge": (Ridge, ("alpha", "max_iter",)),
  1877. "Lasso": (Lasso, ("alpha", "max_iter",)),
  1878. "LogisticRegression": (LogisticRegression, ("C",)),
  1879. "Knn": (KNeighborsClassifier, ("n_neighbors",)),
  1880. "Knn_class": (KNeighborsRegressor, ("n_neighbors",)),
  1881. }
  1882. self.learner_type = {} # 记录机器的类型
  1883. @staticmethod
  1884. def parsing(parameters): # 解析参数
  1885. args = {}
  1886. args_use = {}
  1887. # 输入数据
  1888. exec(parameters, args)
  1889. # 处理数据
  1890. args_use["alpha"] = float(args.get("alpha", 1.0)) # L1和L2正则化用
  1891. args_use["C"] = float(args.get("C", 1.0)) # L1和L2正则化用
  1892. args_use["max_iter"] = int(args.get("max_iter", 1000)) # L1和L2正则化用
  1893. args_use["n_neighbors"] = int(args.get("K_knn", 5)) # knn邻居数 (命名不同)
  1894. args_use["nDim_2"] = bool(args.get("nDim_2", True)) # 数据是否降维
  1895. return args_use
  1896. def get_learner(self, name):
  1897. return self.learner[name]
  1898. def get_learner_type(self, name):
  1899. return self.learner_type[name]
  1900. @plugin_class_loading(get_path(r"template/datascience"))
  1901. class VisualLearner(MachineLearnerBase):
  1902. def visual_learner(self, learner, new=False): # 显示参数
  1903. learner = self.get_learner(learner)
  1904. learner_type = self.get_learner_type(learner)
  1905. if learner_type in ("Ridge", "Lasso"):
  1906. alpha = learner.alpha # 阿尔法
  1907. w = learner.coef_.tolist() # w系数
  1908. b = learner.intercept_ # 截距
  1909. max_iter = learner.max_iter
  1910. w_name = [f"权重:W[{i}]" for i in range(len(w))]
  1911. index = ["阿尔法:Alpha"] + w_name + ["截距:b", "最大迭代数"]
  1912. data = [alpha] + w + [b] + [max_iter]
  1913. # 文档
  1914. doc = (
  1915. f"阿尔法:alpha = {alpha}\n\n权重:\nw = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\n最大迭代数:{max_iter}"
  1916. f"\n\n\nEND"
  1917. )
  1918. data = pd.DataFrame(data, index=index)
  1919. elif learner_type in ("Line",):
  1920. w = learner.coef_.tolist() # w系数
  1921. b = learner.intercept_
  1922. index = [f"权重:W[{i}]" for i in range(len(w))] + ["截距:b"]
  1923. data = w + [b] # 截距
  1924. # 文档
  1925. doc = f"权重:w = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\n\nEND"
  1926. data = pd.DataFrame(data, index=index)
  1927. elif learner_type in ("Knn",): # Knn_class
  1928. classes = learner.classes_.tolist() # 分类
  1929. n = learner.n_neighbors # 个数
  1930. p = {1: "曼哈顿距离", 2: "欧几里得距离"}.get(learner.p)
  1931. index = [f"类目[{i}]" for i in range(len(classes))] + ["邻居个数", "距离公式"]
  1932. data = classes + [n, p]
  1933. doc = f"分类类目:\n{pd.DataFrame(classes)}\n\n邻居个数:{n}\n\n计算距离的方式:{p}\n\n\nEND"
  1934. data = pd.DataFrame(data, index=index)
  1935. elif learner_type in ("Knn_class",):
  1936. n = learner.n_neighbors # 个数
  1937. p = {1: "曼哈顿距离", 2: "欧几里得距离"}.get(learner.p)
  1938. index = ["邻居个数", "距离公式"]
  1939. data = [n, p]
  1940. doc = f"邻居个数:{n}\n\n计算距离的方式:{p}\n\n\nEND"
  1941. data = pd.DataFrame(data, index=index)
  1942. elif learner_type in ("LogisticRegression",):
  1943. classes = learner.classes_.tolist() # 分类
  1944. w = learner.coef_.tolist() # w系数
  1945. b = learner.intercept_
  1946. c = learner.C
  1947. index = (
  1948. [f"类目[{i}]" for i in range(len(classes))]
  1949. + [f"权重:W[{j}][{i}]" for i in range(len(w)) for j in range(len(w[i]))]
  1950. + [f"截距:b[{i}]" for i in range(len(b))]
  1951. + ["C"]
  1952. )
  1953. data = classes + [j for i in w for j in i] + [i for i in b] + [c]
  1954. doc = f"分类类目:\n{pd.DataFrame(classes)}\n\n权重:w = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\nC={c}\n\n\n"
  1955. data = pd.DataFrame(data, index=index)
  1956. else:
  1957. return "", []
  1958. if new:
  1959. self.add_sheet(data, f"{learner}:属性")
  1960. return doc, data
  1961. @plugin_class_loading(get_path(r"template/datascience"))
  1962. class Learner(MachineLearnerBase):
  1963. def decision_tree_classifier(self, name): # 特征提取
  1964. get = self.get_sheet(name)
  1965. dver = DictVectorizer()
  1966. get_dic = get.to_dict(orient="records")
  1967. new = dver.fit_transform(get_dic).toarray()
  1968. dec = pd.DataFrame(new, columns=dver.feature_names_)
  1969. self.add_sheet(dec, f"{name}:特征")
  1970. return dec
  1971. def training_machine_core(
  1972. self, name, learner, score_only=False, down_ndim=True, split=0.3, **kwargs
  1973. ):
  1974. get = self.get_sheet(name)
  1975. x = get.to_numpy()
  1976. y = self.get_index(name, True) # 获取y值(用index作为y)
  1977. if down_ndim or x.ndim == 1: # 执行降维处理(也包括升维,ravel让一切变成一维度,包括数字)
  1978. a = x
  1979. x = []
  1980. for i in a:
  1981. try:
  1982. c = i.np.ravel(a[i], "C")
  1983. x.append(c)
  1984. except ValueError:
  1985. x.append(i)
  1986. x = np.array(x)
  1987. model = self.get_learner(learner)
  1988. if not score_only: # 只计算得分,全部数据用于测试
  1989. train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=split)
  1990. model.fit(train_x, train_y)
  1991. train_score = model.score(train_x, train_y)
  1992. test_score = model.score(test_x, test_y)
  1993. return train_score, test_score
  1994. test_score = model.score(x, y)
  1995. return 0, test_score
  1996. def training_machine(self, name, learnner, parameters="", **kwargs):
  1997. type_ = self.get_learner_type(learnner)
  1998. args_use = self.parsing(parameters)
  1999. if type_ in (
  2000. "Line",
  2001. "Ridge",
  2002. "Lasso",
  2003. "LogisticRegression",
  2004. "Knn",
  2005. "Knn_class",
  2006. ):
  2007. return self.training_machine_core(
  2008. name, learnner, down_ndim=args_use["nDim_2"], **kwargs
  2009. )
  2010. def predict_simp(self, name, learner, down_ndim=True, **kwargs):
  2011. get = self.get_sheet(name)
  2012. column = self.get_column(name, True)
  2013. x = get.to_numpy()
  2014. if down_ndim or x.ndim == 1: # 执行降维处理(也包括升维,ravel让一切变成一维度,包括数字)
  2015. a = x
  2016. x = []
  2017. for i in a:
  2018. try:
  2019. c = i.np.ravel(a[i], "C")
  2020. x.append(c)
  2021. except ValueError:
  2022. x.append(i)
  2023. x = np.array(x)
  2024. model = self.get_learner(learner)
  2025. answer = model.predict(x)
  2026. data = pd.DataFrame(x, index=answer, columns=column)
  2027. self.add_sheet(data, f"{name}:预测")
  2028. return data
  2029. def predict(self, name, learner, parameters="", **kwargs):
  2030. type_ = self.get_learner_type(learner)
  2031. args_use = self.parsing(parameters)
  2032. if type_ in (
  2033. "Line",
  2034. "Ridge",
  2035. "Lasso",
  2036. "LogisticRegression",
  2037. "Knn",
  2038. "Knn_class",
  2039. ):
  2040. return self.predict_simp(
  2041. name, learner, down_ndim=args_use["nDim_2"], **kwargs
  2042. )