template.py 82 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193
  1. from abc import ABCMeta, abstractmethod
  2. from random import randint
  3. import re
  4. from os import getcwd
  5. import numpy as np
  6. from sklearn.feature_extraction import DictVectorizer
  7. from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
  8. from sklearn.linear_model import *
  9. from sklearn.model_selection import train_test_split
  10. from pyecharts.components import Table
  11. from pyecharts.globals import SymbolType
  12. from pyecharts.charts import *
  13. from pyecharts import options as opts
  14. import pandas as pd
  15. import pandas_profiling as pp
  16. from pyecharts.globals import CurrentConfig
  17. from pyecharts.globals import GeoType # 地图推荐使用GeoType而不是str
  18. CurrentConfig.ONLINE_HOST = f"{getcwd()}/assets/"
  19. class RowColumnBase(metaclass=ABCMeta):
  20. @abstractmethod
  21. def get_column(self, name, only):
  22. pass
  23. @abstractmethod
  24. def get_index(self, name, only):
  25. pass
  26. @abstractmethod
  27. def get_sheet(self, name, all_row=None, all_colunm=None) -> pd.DataFrame:
  28. pass
  29. class FormBase(RowColumnBase):
  30. def __init__(self, *args, **kwargs):
  31. class Del:
  32. pass
  33. self.sheet_dict = {}
  34. self.clean_func = {}
  35. self.clean_func_code = {}
  36. self.DEL = Del()
  37. self.Name = {"pd": pd, "DEL": self.DEL, "re": re, "Sheet": self.sheet_dict}
  38. self.all_render = {} # 存放所有的图
  39. class SheetIO(FormBase):
  40. def add_sheet(self, data, name=""):
  41. if name == "":
  42. name = f"Sheet[{len(self.sheet_dict)}]"
  43. else:
  44. name += f"_[{len(self.sheet_dict)}]"
  45. self.sheet_dict[name] = data
  46. return data
  47. def __add_sheet(self, data_dir, func, name="", index=True, **kwargs): # 新增表格的核心方式
  48. try:
  49. data = func(data_dir, **kwargs)
  50. except UnicodeDecodeError: # 找不到编码方式
  51. return False
  52. if not index:
  53. data.index = data.iloc[:, 0].tolist()
  54. data.drop(data.columns.values.tolist()[0], inplace=True, axis=1)
  55. return self.add_sheet(data, name)
  56. def add_csv(
  57. self, data_dir, name="", sep=",", encodeding="utf-8", str_=True, index=True
  58. ):
  59. if str_:
  60. k = {"dtype": "object"}
  61. else:
  62. k = {}
  63. return self.__add_sheet(
  64. data_dir, pd.read_csv, name, index, sep=sep, encoding=encodeding, **k
  65. )
  66. def add_python(self, python_file, sheet_name="") -> pd.DataFrame:
  67. name = {"Sheet": self.get_sheet}
  68. name.update(globals().copy())
  69. name.update(locals().copy())
  70. exec(python_file, name)
  71. exec("get = Creat()", name)
  72. if isinstance(name["get"], pd.DataFrame): # 已经是DataFram
  73. get = name["get"]
  74. elif isinstance(name["get"], np.array):
  75. if bool(name.get("downNdim", False)): # 执行降或升维操作
  76. a = name["get"]
  77. array = []
  78. for i in a:
  79. try:
  80. c = i.np.ravel(a[i], "C")
  81. array.append(c)
  82. except BaseException:
  83. array.append(i)
  84. get = pd.DataFrame(array)
  85. else:
  86. array = name["get"].tolist()
  87. get = pd.DataFrame(array)
  88. else:
  89. try:
  90. get = pd.DataFrame(name["get"])
  91. except BaseException:
  92. get = pd.DataFrame([name["get"]])
  93. self.add_sheet(get, sheet_name)
  94. return get
  95. def add_html(self, data_dir, name="", encoding="utf-8", str_=True, index=True):
  96. if str_:
  97. k = {"dtype": "object"}
  98. else:
  99. k = {}
  100. return self.__add_sheet(
  101. data_dir, pd.read_html, name, index, encoding=encoding, **k
  102. )
  103. def get_sheet_list(self):
  104. return list(self.sheet_dict.keys()) # 返回列表
  105. def to_csv(self, name, save_dir, sep=","):
  106. if sep == "":
  107. sep = ","
  108. get = self.get_sheet(name)
  109. get.to_csv(save_dir, sep=sep, na_rep="")
  110. class SheetRender(SheetIO):
  111. def render_html_one(self, name, render_dir=""):
  112. if render_dir == "":
  113. render_dir = f"{name}.html"
  114. get = self.get_sheet(name)
  115. headers = [f"{name}"] + self.get_column(name, True).tolist()
  116. rows = []
  117. table = Table()
  118. for i in get.iterrows(): # 按行迭代
  119. q = i[1].tolist()
  120. rows.append([f"{i[0]}"] + q)
  121. table.add(headers, rows).set_global_opts(
  122. title_opts=opts.ComponentTitleOpts(
  123. title=f"表格:{name}", subtitle="CoTan~数据处理:查看表格"
  124. )
  125. )
  126. table.render(render_dir)
  127. return render_dir
  128. def render_html_all(self, name, tab_render_dir="", render_type=0):
  129. if tab_render_dir == "":
  130. tab_render_dir = f"{name}.html"
  131. # 把要画的sheet放到第一个
  132. sheet_dict = self.sheet_dict.copy()
  133. del sheet_dict[name]
  134. sheet_list = [name] + list(sheet_dict.keys())
  135. class TabNew:
  136. def __init__(self, original_tab):
  137. self.original_tab = original_tab # 一个Tab
  138. def render(self, render_dir):
  139. return self.original_tab.render(render_dir)
  140. # 生成一个显示页面
  141. if render_type == 0:
  142. class TabZero(TabNew):
  143. def add(self, render, k, *more):
  144. self.original_tab.add(render, k)
  145. tab = TabZero(Tab(page_title="CoTan:查看表格")) # 一个Tab
  146. elif render_type == 1:
  147. class TabOne(TabNew):
  148. def add(self, render, *more):
  149. self.original_tab.add(render)
  150. tab = TabOne(Page(page_title="CoTan:查看表格", layout=Page.DraggablePageLayout))
  151. else:
  152. class TabTwo(TabNew):
  153. def add(self, render, *more):
  154. self.original_tab.add(render)
  155. tab = TabTwo(Page(page_title="CoTan:查看表格", layout=Page.SimplePageLayout))
  156. # 迭代添加内容
  157. for name in sheet_list:
  158. try:
  159. get = self.get_sheet(name)
  160. headers = [f"{name}"] + self.get_column(name, True).tolist()
  161. rows = []
  162. table = Table()
  163. for i in get.iterrows(): # 按行迭代
  164. q = i[1].tolist()
  165. rows.append([f"{i[0]}"] + q)
  166. table.add(headers, rows).set_global_opts(
  167. title_opts=opts.ComponentTitleOpts(
  168. title=f"表格:{name}", subtitle="CoTan~数据处理:查看表格"
  169. )
  170. )
  171. tab.add(table, f"表格:{name}")
  172. except BaseException:
  173. pass
  174. tab.render(tab_render_dir)
  175. return tab_render_dir
  176. class SheetReport(SheetIO):
  177. def describe(self, name, new=False): # 生成描述
  178. get = self.get_sheet(name)
  179. des = get.describe()
  180. if new:
  181. self.add_sheet(des, f"{name}_describe[{len(self.sheet_dict)}]")
  182. shape = get.shape
  183. dtype = get.dtypes
  184. n = get.ndim
  185. head = get.head()
  186. tail = get.tail(3)
  187. return (
  188. f"1)基本\n{des}\n\n2)形状:{shape}\n\n3)数据类型\n{dtype}\n\n4)数据维度:{n}\n\n5)头部数据\n{head}"
  189. f"\n\n6)尾部数据\n{tail}\n\n7)行名\n{get.index}\n\n8)列名\n{get.columns}"
  190. )
  191. def sheet_profile_report_core(self, sheet, save_dir):
  192. report = pp.ProfileReport(sheet)
  193. report.to_file(save_dir)
  194. def to_report(self, name, save_dir=""):
  195. if save_dir == "":
  196. save_dir = f"{name}.html"
  197. sheet = self.get_sheet(name)
  198. self.sheet_profile_report_core(sheet, save_dir)
  199. return save_dir
  200. class Rename(SheetIO):
  201. def number_naming(self, name, is_column, save):
  202. get = self.get_sheet(name).copy()
  203. if is_column: # 处理列名
  204. column = self.get_column(name, True)
  205. if save: # 保存原数据
  206. get.loc["column"] = column
  207. get.columns = [i for i in range(len(column))]
  208. else:
  209. row = self.get_index(name, True)
  210. if save:
  211. get.loc[:, "row"] = row
  212. get.index = [i for i in range(len(row))]
  213. self.add_sheet(get, f"{name}")
  214. return get
  215. def name_with_number(self, name, is_column, save):
  216. get = self.get_sheet(name).copy()
  217. if is_column: # 处理列名
  218. column = self.get_column(name, True)
  219. if save: # 保存原数据
  220. get.loc["column"] = column
  221. get.columns = [f"[{i}]{column[i]}" for i in range(len(column))]
  222. else:
  223. row = self.get_index(name, True)
  224. if save:
  225. get.loc[:, "row"] = row
  226. get.index = [f"[{i}]{row[i]}" for i in range(len(row))]
  227. self.add_sheet(get, f"{name}")
  228. return get
  229. def data_naming(self, name, is_column, save, **data_init):
  230. # Date_Init:start,end,freq 任意两样
  231. get = self.get_sheet(name)
  232. if is_column: # 处理列名
  233. column = self.get_column(name, True)
  234. if save: # 保存原数据
  235. get.loc["column"] = column
  236. data_init["periods"] = len(column)
  237. get.columns = pd.date_range(**data_init)
  238. else:
  239. row = self.get_index(name, True)
  240. if save:
  241. get.loc[:, "row"] = row
  242. data_init["periods"] = len(row)
  243. get.index = pd.date_range(**data_init)
  244. self.add_sheet(get, f"{name}")
  245. return get
  246. def time_naming(self, name, is_column, save, **time_init):
  247. # Date_Init:start,end,freq 任意两样
  248. get = self.get_sheet(name)
  249. if is_column: # 处理列名
  250. column = self.get_column(name, True)
  251. if save: # 保存原数据
  252. get.loc["column"] = column
  253. time_init["periods"] = len(column)
  254. get.columns = pd.timedelta_range(**time_init)
  255. else:
  256. row = self.get_index(name, True)
  257. if save:
  258. get.loc[:, "row"] = row
  259. time_init["periods"] = len(row)
  260. get.index = pd.timedelta_range(**time_init)
  261. self.add_sheet(get, f"{name}")
  262. return get
  263. class Sorted(SheetIO):
  264. def sorted_index(self, name, row: bool, new=False, a=True):
  265. get = self.get_sheet(name)
  266. if row: # row-行名排序
  267. sorted_sheet = get.sort_index(axis=0, ascending=a)
  268. else:
  269. sorted_sheet = get.sort_index(axis=1, ascending=a)
  270. if new:
  271. self.add_sheet(sorted_sheet, f"{name}:排序")
  272. return sorted_sheet
  273. def stored_value(self, name, collation, new=False):
  274. get = self.get_sheet(name)
  275. row = get.columns.values
  276. by = []
  277. ascending = []
  278. for i in collation:
  279. by.append(row[i[0]])
  280. ascending.append(i[1])
  281. if len(by) == 1:
  282. by = by[0]
  283. ascending = ascending[0]
  284. sorted_sheet = get.sort_values(by=by, ascending=ascending)
  285. if new:
  286. self.add_sheet(sorted_sheet, f"{name}:排序")
  287. return sorted_sheet
  288. class RowColumn(Rename, Sorted):
  289. def get_column(self, name, only=False): # 列名
  290. get = self.get_sheet(name)
  291. if only:
  292. re = get.columns.values
  293. else:
  294. re = []
  295. loc_list = get.columns.values
  296. a = 0
  297. for i in loc_list:
  298. data = get[i].to_list()
  299. re.append(f"[列号:{a}]{i} -> {data}")
  300. a += 1
  301. return re
  302. def get_index(self, name, only=False):
  303. get = self.get_sheet(name)
  304. if only:
  305. values = get.index.values
  306. else:
  307. values = []
  308. loc_list = get.index.values
  309. a = 0
  310. for i in range(len(loc_list)):
  311. index_num = loc_list[i]
  312. data = get.iloc[i].to_list()
  313. values.append(f"[行号:{a}]{index_num} -> {data}")
  314. a += 1
  315. return values
  316. def replace_index(self, name, is_column, rename, save):
  317. get = self.get_sheet(name)
  318. if is_column:
  319. if save: # 保存原数据
  320. get.loc["column"] = self.get_column(name, True)
  321. new = get.rename(columns=rename)
  322. else:
  323. if save:
  324. get.loc[:, "row"] = self.get_index(name, True)
  325. new = get.rename(index=rename)
  326. self.add_sheet(new, f"{name}")
  327. return new
  328. def change_index(
  329. self,
  330. name: str,
  331. is_column: bool,
  332. iloc: int,
  333. save: bool = True,
  334. drop: bool = False,
  335. ):
  336. get = self.get_sheet(name).copy()
  337. if is_column: # 列名
  338. row = self.get_index(name, True) # 行数据
  339. t = row.tolist()[iloc]
  340. if save: # 保存原数据
  341. get.loc["column"] = self.get_column(name, True)
  342. # new_colums = get.loc[t].values
  343. get.columns = get.loc[t].values
  344. if drop:
  345. get.drop(t, axis=0, inplace=True) # 删除行
  346. else:
  347. column = self.get_column(name, True)
  348. t = column.tolist()[iloc]
  349. if save:
  350. get.loc[:, "row"] = self.get_index(name, True)
  351. get.index = get.loc[:, t].values # 调整
  352. if drop:
  353. get.drop(t, axis=1, inplace=True) # 删除行
  354. self.add_sheet(get, f"{name}")
  355. return get
  356. class SheetSlice(SheetIO):
  357. def get_slice(
  358. self, name, column, row, is_iloc=True, new=False
  359. ): # iloc(Row,Column) or loc
  360. get = self.get_sheet(name)
  361. if is_iloc:
  362. new_sheet = get.iloc[row, column]
  363. else:
  364. new_sheet = get.loc[row, column]
  365. if new:
  366. self.add_sheet(new_sheet, f"{name}:切片")
  367. return new_sheet
  368. def del_slice(self, name, column, row, new):
  369. new_sheet = self.get_sheet(name)
  370. column_list = new_sheet.columns.values
  371. for i in column:
  372. try:
  373. new_sheet = new_sheet.drop(column_list[int(i)], axis=1)
  374. except BaseException:
  375. pass
  376. row_list = new_sheet.index.values
  377. for i in row:
  378. try:
  379. new_sheet = new_sheet.drop(row_list[int(i)])
  380. except BaseException:
  381. pass
  382. if new:
  383. self.add_sheet(new_sheet, f"{name}:删减")
  384. return new_sheet
  385. class DatacleaningFunc(SheetIO):
  386. def add_clean_func(self, code):
  387. name = self.Name.copy()
  388. try:
  389. exec(code, name)
  390. except BaseException:
  391. return False
  392. func_dict = {}
  393. func_dict["Done_Row"] = name.get("Done_Row", [])
  394. func_dict["Done_Column"] = name.get("Done_Column", [])
  395. func_dict["axis"] = name.get("axis", True)
  396. func_dict["check"] = name.get("check", lambda data, x, b, c, d, e: True)
  397. func_dict["done"] = name.get("done", lambda data, x, b, c, d, e: data)
  398. title = (
  399. f"[{name.get('name', f'[{len(self.clean_func)}')}] Done_Row={func_dict['Done_Row']}_Done_Column="
  400. f"{func_dict['Done_Column']}_axis={func_dict['axis']}"
  401. )
  402. self.clean_func[title] = func_dict
  403. self.clean_func_code[title] = code
  404. def get_clean_func(self):
  405. return list(self.clean_func.keys())
  406. def del_clean_func(self, key):
  407. try:
  408. del self.clean_func[key]
  409. del self.clean_func_code[key]
  410. except BaseException:
  411. pass
  412. def del_all_clean_func(self):
  413. self.clean_func = {}
  414. self.clean_func_code = {}
  415. def get_clean_code(self, key):
  416. return self.clean_func_code[key]
  417. def data_clean(self, name):
  418. get = self.get_sheet(name).copy()
  419. for i in list(self.clean_func.values()):
  420. done_row = i["Done_Row"]
  421. done_column = i["Done_Column"]
  422. if done_row == []:
  423. done_row = range(get.shape[0]) # shape=[行,列]#不需要回调
  424. if done_column == []:
  425. done_column = range(get.shape[1]) # shape=[行,列]#不需要回调
  426. if i["axis"]:
  427. axis = 0
  428. else:
  429. axis = 1
  430. check = i["check"]
  431. done = i["done"]
  432. for row in done_row:
  433. for column in done_column:
  434. try:
  435. data = eval(
  436. f"get.iloc[{row},{column}]", {"get": get}
  437. ) # 第一个是行号,然后是列号
  438. column_data = eval(f"get.iloc[{row}]", {"get": get})
  439. row_data = eval(f"get.iloc[:,{column}]", {"get": get})
  440. if not check(
  441. data,
  442. row,
  443. column,
  444. get.copy(),
  445. column_data.copy(),
  446. row_data.copy(),
  447. ):
  448. d = done(
  449. data,
  450. row,
  451. column,
  452. get.copy(),
  453. column_data.copy(),
  454. row_data.copy(),
  455. )
  456. if d == self.DEL:
  457. if axis == 0: # 常规删除
  458. row_list = get.index.values
  459. get = get.drop(row_list[int(row)])
  460. else: # 常规删除
  461. columns_list = get.columns.values
  462. get = get.drop(columns_list[int(row)], axis=1)
  463. else:
  464. # 第一个是行名,然后是列名
  465. exec(f"get.iloc[{row},{column}] = {d}", {"get": get})
  466. except BaseException:
  467. pass
  468. self.add_sheet(get, f"{name}:清洗")
  469. return get
  470. class SheetDtype(SheetIO):
  471. def set_dtype(self, name, column, dtype, wrong):
  472. get = self.get_sheet(name).copy()
  473. for i in range(len(column)):
  474. try:
  475. column[i] = int(column[i])
  476. except BaseException:
  477. pass
  478. if dtype != "":
  479. func_dic = {
  480. "Num": pd.to_numeric,
  481. "Date": pd.to_datetime,
  482. "Time": pd.to_timedelta,
  483. }
  484. if column != []:
  485. get.iloc[:, column] = get.iloc[:, column].apply(
  486. func_dic.get(dtype, pd.to_numeric), errors=wrong
  487. )
  488. else:
  489. get = get.apply(func_dic.get(dtype, pd.to_numeric), errors=wrong)
  490. else:
  491. if column != []:
  492. get.iloc[:, column] = get.iloc[:, column].infer_objects()
  493. else:
  494. get = get.infer_objects()
  495. self.add_sheet(get, f"{name}")
  496. return get
  497. def as_dtype(self, name, column, dtype, wrong):
  498. get = self.get_sheet(name).copy()
  499. for i in range(len(column)):
  500. try:
  501. column[i] = int(column[i])
  502. except BaseException:
  503. pass
  504. func_dic = {
  505. "Int": int,
  506. "Float": float,
  507. "Str": str,
  508. "Date": pd.Timestamp,
  509. "TimeDelta": pd.Timedelta,
  510. }
  511. if column != []:
  512. get.iloc[:, column] = get.iloc[:, column].astype(
  513. func_dic.get(dtype, dtype), errors=wrong
  514. )
  515. else:
  516. get = get.astype(func_dic.get(dtype, dtype), errors=wrong)
  517. self.add_sheet(get, f"{name}")
  518. return get
  519. class DataNan(SheetIO):
  520. def is_nan(self, name):
  521. get = self.get_sheet(name)
  522. bool_nan = pd.isna(get)
  523. return bool_nan
  524. def del_nan(self, name, new):
  525. get = self.get_sheet(name)
  526. clean_sheet = get.dropna(axis=0)
  527. if new:
  528. self.add_sheet(clean_sheet, f"{name}:清洗")
  529. return clean_sheet
  530. class BoolSheet(SheetIO):
  531. def to_bool(self, name, exp, new=False):
  532. get = self.get_sheet(name)
  533. try:
  534. bool_sheet = eval(exp, {"S": get, "Sheet": get.iloc})
  535. if new:
  536. self.add_sheet(bool_sheet, f"{name}:布尔")
  537. return bool_sheet
  538. except BaseException:
  539. return None
  540. class DataSample(SheetIO):
  541. def sample(self, name, new):
  542. get = self.get_sheet(name)
  543. sample = get.sample(frac=1) # 返回比,默认按行打乱
  544. if new:
  545. self.add_sheet(sample, f"{name}:打乱")
  546. return sample
  547. class DataTranspose(SheetIO):
  548. def transpose(self, name, new=True):
  549. get = self.get_sheet(name)
  550. t = get.T.copy() # 复制一份,防止冲突
  551. if new:
  552. self.add_sheet(t, f"{name}.T")
  553. return t
  554. class DataFormBase(SheetRender, SheetReport, RowColumn, SheetSlice, DatacleaningFunc, SheetDtype, DataNan, BoolSheet,
  555. DataSample, DataTranspose):
  556. def get_sheet(self, name, all_row=None, all_colunm=None) -> pd.DataFrame:
  557. try:
  558. pd.set_option("display.max_rows", all_row)
  559. pd.set_option("display.max_columns", all_colunm)
  560. except BaseException:
  561. pass
  562. return self.sheet_dict[name]
  563. def del_sheet(self, name):
  564. del self.sheet_dict[name]
  565. class PlotBase(DataFormBase):
  566. def parsing_parameters(self, text): # 解析文本参数
  567. args = {} # 解析到的参数
  568. exec(text, args)
  569. args_use = {} # 真实的参数
  570. # 标题设置,global
  571. args_use["title"] = args.get("title", None)
  572. args_use["vice_title"] = args.get("vice_title", "CoTan~数据处理:")
  573. # 图例设置global
  574. args_use["show_Legend"] = bool(args.get("show_Legend", True)) # 是否显示图例
  575. args_use["ori_Legend"] = args.get("ori_Legend", "horizontal") # 朝向
  576. # 视觉映射设置global
  577. args_use["show_Visual_mapping"] = bool(
  578. args.get("show_Visual_mapping", True)
  579. ) # 是否显示视觉映射
  580. args_use["is_color_Visual_mapping"] = bool(
  581. args.get("is_color_Visual_mapping", True)
  582. ) # 颜色 or 大小
  583. args_use["min_Visual_mapping"] = args.get(
  584. "min_Visual_mapping", None
  585. ) # 最小值(None表示现场计算)
  586. args_use["max_Visual_mapping"] = args.get(
  587. "max_Visual_mapping", None
  588. ) # 最大值(None表示现场计算)
  589. args_use["color_Visual_mapping"] = args.get(
  590. "color_Visual_mapping", None
  591. ) # 颜色列表
  592. args_use["size_Visual_mapping"] = args.get("size_Visual_mapping", None) # 大小列表
  593. args_use["text_Visual_mapping"] = args.get("text_Visual_mapping", None) # 文字
  594. args_use["is_Subsection"] = bool(args.get("is_Subsection", False)) # 分段类型
  595. args_use["Subsection_list"] = args.get("Subsection_list", []) # 分段列表
  596. args_use["ori_Visual"] = args.get("ori_Visual", "vertical") # 朝向
  597. # 工具箱设置global
  598. args_use["Tool_BOX"] = bool(args.get("Tool_BOX", True)) # 开启工具箱
  599. # Init设置global
  600. args_use["Theme"] = args.get("Theme", "white") # 设置style
  601. args_use["BG_Color"] = args.get("BG_Color", None) # 设置背景颜色
  602. args_use["width"] = args.get("width", "900px") # 设置宽度
  603. args_use["heigh"] = (
  604. args.get("heigh", "500px")
  605. if not bool(args.get("Square", False))
  606. else args.get("width", "900px")
  607. ) # 设置高度
  608. args_use["page_Title"] = args.get("page_Title", "") # 设置HTML标题
  609. args_use["show_Animation"] = args.get("show_Animation", True) # 设置HTML标题
  610. # 坐标轴设置,2D坐标图和3D坐标图
  611. args_use["show_Axis"] = bool(args.get("show_Axis", True)) # 显示坐标轴
  612. args_use["Axis_Zero"] = bool(args.get("Axis_Zero", False)) # 重叠于原点
  613. args_use["show_Axis_Scale"] = bool(args.get("show_Axis_Scale", True)) # 显示刻度
  614. args_use["x_type"] = args.get("x_type", None) # 坐标轴类型
  615. args_use["y_type"] = args.get("y_type", None)
  616. args_use["z_type"] = args.get("z_type", None)
  617. # Mark设置 坐标图专属
  618. args_use["make_Line"] = args.get("make_Line", []) # 设置直线
  619. # Datazoom设置 坐标图专属
  620. args_use["Datazoom"] = args.get("Datazoom", "N") # 设置Datazoom
  621. # 显示文字设置
  622. args_use["show_Text"] = bool(args.get("show_Text", False)) # 显示文字
  623. # 统一化的设置
  624. args_use["Size"] = args.get("Size", 10) # Size
  625. args_use["Symbol"] = args.get("Symbol", "circle") # 散点样式
  626. # Bar设置
  627. args_use["bar_Stacking"] = bool(args.get("bar_Stacking", False)) # 堆叠(2D和3D)
  628. # 散点图设置
  629. args_use["EffectScatter"] = bool(
  630. args.get("EffectScatter", False)
  631. ) # 开启特效(2D和3D)
  632. # 折线图设置
  633. args_use["connect_None"] = bool(args.get("connect_None", False)) # 连接None
  634. args_use["Smooth_Line"] = bool(args.get("Smooth_Line", False)) # 平滑曲线
  635. args_use["Area_chart"] = bool(args.get("Area_chart", False)) # 面积图
  636. args_use["paste_Y"] = bool(args.get("paste_Y", False)) # 紧贴Y轴
  637. args_use["step_Line"] = bool(args.get("step_Line", False)) # 阶梯式图
  638. args_use["size_PictorialBar"] = args.get("size_PictorialBar", None) # 象形柱状图大小
  639. args_use["Polar_units"] = args.get("Polar_units", "100") # 极坐标图单位制
  640. args_use["More"] = bool(args.get("More", False)) # 均绘制水球图、仪表图
  641. args_use["WordCould_Size"] = args.get("WordCould_Size", [20, 100]) # 开启特效
  642. args_use["WordCould_Shape"] = args.get("WordCould_Shape", "circle") # 开启特效
  643. args_use["symbol_Graph"] = args.get("symbol_Graph", "circle") # 关系点样式
  644. args_use["Repulsion"] = float(args.get("Repulsion", 8000)) # 斥力因子
  645. args_use["Area_radar"] = bool(args.get("Area_radar", True)) # 雷达图面积
  646. args_use["HTML_Type"] = args.get("HTML_Type", 2) # 输出Page的类型
  647. args_use["Map"] = args.get("Map", "china") # 输出Page的面积
  648. args_use["show_Map_Symbol"] = bool(
  649. args.get("show_Map_Symbol", False)
  650. ) # 输出Page的面积
  651. args_use["Geo_Type"] = {
  652. "heatmap": GeoType.HEATMAP,
  653. "scatter": "scatter",
  654. "EFFECT": GeoType.EFFECT_SCATTER,
  655. }.get(
  656. args.get("Geo_Type", "heatmap"), GeoType.HEATMAP
  657. ) # 输出Page的面积
  658. args_use["map_Type"] = args.get("map_Type", "2D") # 输出Page的面积
  659. args_use["is_Dark"] = bool(args.get("is_Dark", False)) # 输出Page的面积
  660. return args_use
  661. def global_set(
  662. self, args_use, title, min_, max_, data_zoom=False, visual_mapping=True, axis=()
  663. ):
  664. k = {}
  665. # 标题设置
  666. if args_use["title"] is None:
  667. args_use["title"] = title
  668. k["title_opts"] = opts.TitleOpts(
  669. title=args_use["title"], subtitle=args_use["vice_title"]
  670. )
  671. # 图例设置
  672. if not args_use["show_Legend"]:
  673. k["legend_opts"] = opts.LegendOpts(is_show=False)
  674. else:
  675. k["legend_opts"] = opts.LegendOpts(
  676. type_="scroll", orient=args_use["ori_Legend"], pos_bottom="2%"
  677. ) # 移动到底部,避免和标题冲突
  678. # 视觉映射
  679. if not args_use["show_Visual_mapping"]:
  680. pass
  681. elif not visual_mapping:
  682. pass
  683. else:
  684. if args_use["min_Visual_mapping"] is not None:
  685. min_ = args_use["min_Visual_mapping"]
  686. if args_use["max_Visual_mapping"] is not None:
  687. max_ = args_use["max_Visual_mapping"]
  688. k["visualmap_opts"] = opts.VisualMapOpts(
  689. type_="color" if args_use["is_color_Visual_mapping"] else "size",
  690. max_=max_,
  691. min_=min_,
  692. range_color=args_use["color_Visual_mapping"],
  693. range_size=args_use["size_Visual_mapping"],
  694. range_text=args_use["text_Visual_mapping"],
  695. is_piecewise=args_use["is_Subsection"],
  696. pieces=args_use["Subsection_list"],
  697. orient=args_use["ori_Visual"],
  698. )
  699. k["toolbox_opts"] = opts.ToolboxOpts(is_show=args_use["Tool_BOX"])
  700. if data_zoom:
  701. if args_use["Datazoom"] == "all":
  702. k["datazoom_opts"] = [
  703. opts.DataZoomOpts(),
  704. opts.DataZoomOpts(orient="horizontal"),
  705. ]
  706. elif args_use["Datazoom"] == "horizontal":
  707. k["datazoom_opts"] = opts.DataZoomOpts(type_="inside")
  708. elif args_use["Datazoom"] == "vertical":
  709. opts.DataZoomOpts(orient="vertical")
  710. elif args_use["Datazoom"] == "inside_vertical":
  711. opts.DataZoomOpts(type_="inside", orient="vertical")
  712. elif args_use["Datazoom"] == "inside_vertical":
  713. opts.DataZoomOpts(type_="inside", orient="horizontal")
  714. # 坐标轴设定,输入设定的坐标轴即可
  715. def axis_seeting(args_use, axis="x"):
  716. axis_k = {}
  717. if args_use[f"{axis[0]}_type"] == "Display" or not args_use["show_Axis"]:
  718. axis_k[f"{axis[0]}axis_opts"] = opts.AxisOpts(is_show=False)
  719. else:
  720. axis_k[f"{axis[0]}axis_opts"] = opts.AxisOpts(
  721. type_=args_use[f"{axis[0]}_type"],
  722. axisline_opts=opts.AxisLineOpts(is_on_zero=args_use["Axis_Zero"]),
  723. axistick_opts=opts.AxisTickOpts(
  724. is_show=args_use["show_Axis_Scale"]
  725. ),
  726. )
  727. return axis_k
  728. for i in axis:
  729. k.update(axis_seeting(args_use, i))
  730. return k
  731. def init_setting(self, args_use):
  732. k = {}
  733. # 设置标题
  734. if args_use["page_Title"] == "":
  735. title = "CoTan_数据处理"
  736. else:
  737. title = f"CoTan_数据处理:{args_use['page_Title']}"
  738. k["init_opts"] = opts.InitOpts(
  739. theme=args_use["Theme"],
  740. bg_color=args_use["BG_Color"],
  741. width=args_use["width"],
  742. height=args_use["heigh"],
  743. page_title=title,
  744. animation_opts=opts.AnimationOpts(animation=args_use["show_Animation"]),
  745. )
  746. return k
  747. def get_title(self, args_use):
  748. return f":{args_use['title']}"
  749. def mark(self, args_use):
  750. k = {}
  751. line = []
  752. for i in args_use["make_Line"]:
  753. try:
  754. if i[2] == "c" or i[0] in ("min", "max", "average"):
  755. line.append(opts.MarkLineItem(type_=i[0], name=i[1]))
  756. elif i[2] == "x":
  757. line.append(opts.MarkLineItem(x=i[0], name=i[1]))
  758. else:
  759. raise Exception
  760. except BaseException:
  761. line.append(opts.MarkLineItem(y=i[0], name=i[1]))
  762. if line == []:
  763. return k
  764. k["markline_opts"] = opts.MarkLineOpts(data=line)
  765. return k
  766. def yaxis_label(self, args_use, position="inside"):
  767. return {
  768. "label_opts": opts.LabelOpts(
  769. is_show=args_use["show_Text"], position=position
  770. )
  771. }
  772. def special_setting(self, args_use, type_): # 私人设定
  773. k = {}
  774. if type_ == "Bar": # 设置y的重叠
  775. if args_use["bar_Stacking"]:
  776. k = {"stack": "stack1"}
  777. elif type_ == "Scatter":
  778. k["Beautiful"] = args_use["EffectScatter"]
  779. k["symbol"] = args_use["Symbol"]
  780. k["symbol_size"] = args_use["Size"]
  781. elif type_ == "Line":
  782. k["is_connect_nones"] = args_use["connect_None"]
  783. # 平滑曲线或连接y轴
  784. k["is_smooth"] = (
  785. True if args_use["Smooth_Line"] or args_use["paste_Y"] else False
  786. )
  787. k["areastyle_opts"] = opts.AreaStyleOpts(
  788. opacity=0.5 if args_use["Area_chart"] else 0
  789. )
  790. if args_use["step_Line"]:
  791. del k["is_smooth"]
  792. k["is_step"] = True
  793. elif type_ == "PictorialBar":
  794. k["symbol_size"] = args_use["Size"]
  795. elif type_ == "Polar":
  796. return args_use["Polar_units"] # 回复的是单位制而不是设定
  797. elif type_ == "WordCloud":
  798. k["word_size_range"] = args_use["WordCould_Size"] # 放到x轴
  799. k["shape"] = args_use["Symbol"] # 放到x轴
  800. elif type_ == "Graph":
  801. k["symbol_Graph"] = args_use["Symbol"] # 放到x轴
  802. elif type_ == "Radar": # 雷达图
  803. k["areastyle_opts"] = opts.AreaStyleOpts(
  804. opacity=0.1 if args_use["Area_chart"] else 0
  805. )
  806. k["symbol"] = args_use["Symbol"] # 雷达图symbol
  807. return k
  808. class Render(PlotBase):
  809. def render_all(self, text, render_dir) -> Page:
  810. args = self.parsing_parameters(text)
  811. if args["page_Title"] == "":
  812. title = "CoTan_数据处理"
  813. else:
  814. title = f"CoTan_数据处理:{args['page_Title']}"
  815. if args["HTML_Type"] == 1:
  816. page = Page(page_title=title, layout=Page.DraggablePageLayout)
  817. page.add(*self.all_render.values())
  818. elif args["HTML_Type"] == 2:
  819. page = Page(page_title=title, layout=Page.SimplePageLayout)
  820. page.add(*self.all_render.values())
  821. else:
  822. page = Tab(page_title=title)
  823. for i in self.all_render:
  824. page.add(self.all_render[i], i)
  825. page.render(render_dir)
  826. return render_dir
  827. def overlap(self, down, up):
  828. over_down = self.all_render[down]
  829. over_up = self.all_render[up]
  830. over_down.overlap(over_up)
  831. return over_down
  832. def get_random_color(self):
  833. # 随机颜色,雷达图默认非随机颜色
  834. rgb = [randint(0, 255), randint(0, 255), randint(0, 255)]
  835. color = "#"
  836. for a in rgb:
  837. # 转换为16进制,upper表示小写(规范化)
  838. color += str(hex(a))[-2:].replace("x", "0").upper()
  839. return color
  840. class AxisPlot(Render):
  841. def to_bar(self, name, text) -> Bar: # Bar:数据堆叠
  842. get = self.get_sheet(name)
  843. x = self.get_index(name, True).tolist()
  844. args = self.parsing_parameters(text)
  845. c = Bar(**self.init_setting(args)).add_xaxis(
  846. list(map(str, list(set(x))))
  847. ) # 转变为str类型
  848. y = []
  849. for i in get.iteritems(): # 按列迭代
  850. q = i[1].tolist() # 转换为列表
  851. try:
  852. c.add_yaxis(
  853. f"{name}_{i[0]}",
  854. q,
  855. **self.special_setting(args, "Bar"),
  856. **self.yaxis_label(args),
  857. color=self.get_random_color(),
  858. ) # i[0]是名字,i是tuple,其中i[1]是data
  859. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  860. y += list(map(int, q))
  861. except BaseException:
  862. pass
  863. if y == []:
  864. args["show_Visual_mapping"] = False # 关闭视觉映射
  865. y = [0, 100]
  866. c.set_global_opts(
  867. **self.global_set(args, f"{name}柱状图", min(y), max(y), True, axis=["x", "y"])
  868. )
  869. c.set_series_opts(**self.mark(args))
  870. self.all_render[f"{name}柱状图[{len(self.all_render)}]{self.get_title(args)}"] = c
  871. return c
  872. def to_line(self, name, text) -> Line: # 折线图:连接空数据、显示数值、平滑曲线、面积图以及紧贴Y轴
  873. get = self.get_sheet(name)
  874. x = self.get_index(name, True).tolist()
  875. args = self.parsing_parameters(text)
  876. c = Line(**self.init_setting(args)).add_xaxis(
  877. list(map(str, list(set(x))))
  878. ) # 转变为str类型
  879. y = []
  880. for i in get.iteritems(): # 按列迭代
  881. q = i[1].tolist() # 转换为列表
  882. try:
  883. c.add_yaxis(
  884. f"{name}_{i[0]}",
  885. q,
  886. **self.special_setting(args, "Line"),
  887. **self.yaxis_label(args),
  888. color=self.get_random_color(),
  889. ) # i[0]是名字,i是tuple,其中i[1]是data
  890. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  891. y += list(map(int, q))
  892. except BaseException:
  893. pass
  894. if y == []:
  895. args["show_Visual_mapping"] = False # 关闭视觉映射
  896. y = [0, 100]
  897. c.set_global_opts(
  898. **self.global_set(args, f"{name}折线图", min(y), max(y), True, axis=["x", "y"])
  899. )
  900. c.set_series_opts(**self.mark(args))
  901. self.all_render[f"{name}折线图[{len(self.all_render)}]{self.get_title(args)}"] = c
  902. return c
  903. def to_scatter(self, name, text) -> Scatter: # 散点图标记形状和大小、特效、标记线
  904. get = self.get_sheet(name)
  905. args = self.parsing_parameters(text)
  906. x = self.get_index(name, True).tolist()
  907. type_ = self.special_setting(args, "Scatter")
  908. if type_["Beautiful"]:
  909. func = EffectScatter
  910. else:
  911. func = Scatter
  912. del type_["Beautiful"]
  913. c = func(**self.init_setting(args)).add_xaxis(
  914. list(map(str, list(set(x))))
  915. ) # 转变为str类型
  916. y = []
  917. for i in get.iteritems(): # 按列迭代
  918. q = i[1].tolist() # 转换为列表
  919. try:
  920. c.add_yaxis(
  921. f"{name}_{i[0]}",
  922. q,
  923. **type_,
  924. **self.yaxis_label(args),
  925. color=self.get_random_color(),
  926. ) # i[0]是名字,i是tuple,其中i[1]是data
  927. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  928. y += list(map(int, q))
  929. except BaseException:
  930. pass
  931. if y == []:
  932. args["show_Visual_mapping"] = False # 关闭视觉映射
  933. y = [0, 100]
  934. c.set_global_opts(
  935. **self.global_set(args, f"{name}散点图", min(y), max(y), True, axis=["x", "y"])
  936. )
  937. c.set_series_opts(**self.mark(args))
  938. self.all_render[f"{name}散点图[{len(self.all_render)}]{self.get_title(args)}"] = c
  939. return c
  940. def to_pictorialbar(self, name, text) -> PictorialBar: # 象形柱状图:图形、剪裁图像、元素重复和间隔
  941. get = self.get_sheet(name)
  942. x = self.get_index(name, True).tolist()
  943. args = self.parsing_parameters(text)
  944. c = (
  945. PictorialBar(**self.init_setting(args))
  946. .add_xaxis(list(map(str, list(set(x))))) # 转变为str类型
  947. .reversal_axis()
  948. )
  949. y = []
  950. k = self.special_setting(args, "PictorialBar")
  951. for i in get.iteritems(): # 按列迭代
  952. q = i[1].tolist() # 转换为列表
  953. try:
  954. c.add_yaxis(
  955. f"{name}_{i[0]}",
  956. q,
  957. label_opts=opts.LabelOpts(is_show=False),
  958. symbol_repeat=True,
  959. is_symbol_clip=True,
  960. symbol=SymbolType.ROUND_RECT,
  961. **k,
  962. color=self.get_random_color(),
  963. )
  964. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  965. y += list(map(int, q))
  966. except BaseException:
  967. pass
  968. if y == []:
  969. args["show_Visual_mapping"] = False # 关闭视觉映射
  970. y = [0, 100]
  971. c.set_global_opts(
  972. **self.global_set(
  973. args, f"{name}象形柱状图", min(y), max(y), True, axis=["x", "y"]
  974. )
  975. )
  976. c.set_series_opts(**self.mark(args))
  977. self.all_render[f"{name}[{len(self.all_render)}]{self.get_title(args)}"] = c
  978. return c
  979. def to_boxpolt(self, name, text) -> Boxplot:
  980. get = self.get_sheet(name)
  981. args = self.parsing_parameters(text)
  982. c = Boxplot(**self.init_setting(args)).add_xaxis([f"{name}"])
  983. y = []
  984. for i in get.iteritems(): # 按列迭代
  985. q = i[1].tolist() # 转换为列表
  986. try:
  987. c.add_yaxis(f"{name}_{i[0]}", [q], **self.yaxis_label(args))
  988. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  989. y += list(map(float, q))
  990. except BaseException:
  991. pass
  992. if y == []:
  993. args["show_Visual_mapping"] = False # 关闭视觉映射
  994. y = [0, 100]
  995. c.set_global_opts(
  996. **self.global_set(args, f"{name}箱形图", min(y), max(y), True, axis=["x", "y"])
  997. )
  998. c.set_series_opts(**self.mark(args))
  999. self.all_render[f"{name}箱形图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1000. return c
  1001. def to_heatmap(self, name, text) -> HeatMap: # 显示数据
  1002. get = self.get_sheet(name)
  1003. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1004. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1005. value_list = []
  1006. q = []
  1007. for c in range(len(x)): # c-列,r-行
  1008. for r in range(len(y)):
  1009. try:
  1010. v = float(eval(f"get.iloc[{r},{c}]", {"get": get})) # 先行后列
  1011. except BaseException:
  1012. continue
  1013. q.append(v)
  1014. value_list.append([c, r, v])
  1015. args = self.parsing_parameters(text)
  1016. try:
  1017. max_, min_ = max(q), min(q)
  1018. except BaseException:
  1019. args["show_Visual_mapping"] = False # 关闭视觉映射
  1020. max_, min_ = 0, 100
  1021. c = (
  1022. HeatMap(**self.init_setting(args))
  1023. .add_xaxis(list(map(str, list(set(x))))) # 转变为str类型
  1024. .add_yaxis(
  1025. f"{name}", list(map(str, y)), value_list, **self.yaxis_label(args)
  1026. )
  1027. .set_global_opts(
  1028. **self.global_set(args, f"{name}热力图", min_, max_, True, axis=["x", "y"])
  1029. )
  1030. .set_series_opts(**self.mark(args))
  1031. )
  1032. self.all_render[f"{name}热力图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1033. return c
  1034. class GeneralPlot(Render):
  1035. def to_format_graph(self, name, text) -> Graph:
  1036. get = self.get_sheet(name)
  1037. y_name = self.get_index(name, True).tolist() # 拿行名
  1038. nodes = []
  1039. link = []
  1040. for i in get.iterrows(): # 按行迭代
  1041. q = i[1].tolist() # 转换为列表
  1042. try:
  1043. nodes.append(
  1044. {"name": f"{i[0]}", "symbolSize": float(q[0]), "value": float(q[0])}
  1045. )
  1046. for a in q[1:]:
  1047. n = str(a).split(":")
  1048. try:
  1049. link.append(
  1050. {"source": f"{i[0]}", "target": n[0], "value": float(n[1])}
  1051. )
  1052. except BaseException:
  1053. pass
  1054. except BaseException:
  1055. pass
  1056. if link == []:
  1057. for i in nodes:
  1058. for j in nodes:
  1059. link.append(
  1060. {
  1061. "source": i.get("name"),
  1062. "target": j.get("name"),
  1063. "value": abs(i.get("value") - j.get("value")),
  1064. }
  1065. )
  1066. args = self.parsing_parameters(text)
  1067. c = (
  1068. Graph(**self.init_setting(args))
  1069. .add(
  1070. f"{y_name[0]}",
  1071. nodes,
  1072. link,
  1073. repulsion=args["Repulsion"],
  1074. **self.yaxis_label(args),
  1075. )
  1076. .set_global_opts(
  1077. **self.global_set(args, f"{name}关系图", 0, 100, False, False)
  1078. )
  1079. )
  1080. self.all_render[f"{name}关系图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1081. return c
  1082. def to_graph(self, name, text) -> Graph: # XY关系图,新的书写方式
  1083. get = self.get_sheet(name)
  1084. args = self.parsing_parameters(text)
  1085. size = args["Size"] * 3
  1086. # 生成节点信息
  1087. y_name = self.get_index(name, True).tolist() # 拿行名
  1088. x_name = self.get_column(name, True).tolist() # 拿列名
  1089. nodes_list = list(set(y_name + x_name)) # 处理重复,作为nodes列表
  1090. nodes = []
  1091. for i in nodes_list:
  1092. nodes.append({"name": f"{i}", "symbolSize": size})
  1093. # 生成link信息
  1094. link = [] # 记录连接的信息
  1095. have = []
  1096. for y in range(len(y_name)): # 按行迭代
  1097. for x in range(len(x_name)):
  1098. y_n = y_name[y] # 节点1
  1099. x_n = x_name[x] # 节点2
  1100. if y_n == x_n:
  1101. continue
  1102. if (y_n, x_n) in have or (x_n, y_n) in have:
  1103. continue
  1104. else:
  1105. have.append((y_n, x_n))
  1106. try:
  1107. v = float(eval(f"get.iloc[{y},{x}]", {"get": get})) # 取得value
  1108. link.append({"source": y_n, "target": x_n, "value": v})
  1109. except BaseException:
  1110. pass
  1111. c = (
  1112. Graph(**self.init_setting(args))
  1113. .add(
  1114. f"{y_name[0]}",
  1115. nodes,
  1116. link,
  1117. repulsion=args["Repulsion"],
  1118. **self.yaxis_label(args),
  1119. )
  1120. .set_global_opts(
  1121. **self.global_set(args, f"{name}关系图", 0, 100, False, False)
  1122. )
  1123. )
  1124. self.all_render[f"{name}关系图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1125. return c
  1126. def to_sankey(self, name, text):
  1127. get = self.get_sheet(name)
  1128. args = self.parsing_parameters(text)
  1129. # 生成节点信息
  1130. y_name = self.get_index(name, True).tolist() # 拿行名
  1131. x_name = self.get_column(name, True).tolist() # 拿列名
  1132. nodes_list = list(set(y_name + x_name)) # 处理重复,作为nodes列表
  1133. nodes = []
  1134. source = {}
  1135. target = {}
  1136. for i in nodes_list:
  1137. nodes.append({"name": f"{i}"})
  1138. source[i] = set() # 记录该元素source边连接的节点
  1139. target[i] = set() # 记录改元素target边连接的节点
  1140. # 生成link信息
  1141. link = [] # 记录连接的信息
  1142. have = []
  1143. for y in range(len(y_name)): # 按行迭代
  1144. for x in range(len(x_name)):
  1145. y_n = y_name[y] # 节点1
  1146. x_n = x_name[x] # 节点2
  1147. if y_n == x_n:
  1148. continue # 是否相同
  1149. if (y_n, x_n) in have or (x_n, y_n) in have:
  1150. continue # 是否重复
  1151. else:
  1152. have.append((y_n, x_n))
  1153. # 固定的,y在s而x在t,桑基图不可以绕环形,所以要做检查
  1154. if source[y_n] & target[x_n] != set():
  1155. continue
  1156. try:
  1157. v = float(eval(f"get.iloc[{y},{x}]", {"get": get})) # 取得value
  1158. link.append({"source": y_n, "target": x_n, "value": v})
  1159. target[y_n].add(x_n)
  1160. source[x_n].add(y_n)
  1161. except BaseException:
  1162. pass
  1163. c = (
  1164. Sankey()
  1165. .add(
  1166. f"{name}",
  1167. nodes,
  1168. link,
  1169. linestyle_opt=opts.LineStyleOpts(
  1170. opacity=0.2, curve=0.5, color="source"
  1171. ),
  1172. label_opts=opts.LabelOpts(position="right"),
  1173. )
  1174. .set_global_opts(
  1175. **self.global_set(args, f"{name}桑基图", 0, 100, False, False)
  1176. )
  1177. )
  1178. self.all_render[f"{name}桑基图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1179. return c
  1180. def to_parallel(self, name, text) -> Parallel:
  1181. get = self.get_sheet(name)
  1182. dim = []
  1183. dim_list = self.get_index(name, True).tolist()
  1184. for i in range(len(dim_list)):
  1185. dim.append({"dim": i, "name": f"{dim_list[i]}"})
  1186. args = self.parsing_parameters(text)
  1187. c = (
  1188. Parallel(**self.init_setting(args))
  1189. .add_schema(dim)
  1190. .set_global_opts(
  1191. **self.global_set(args, f"{name}多轴图", 0, 100, False, False)
  1192. )
  1193. )
  1194. for i in get.iteritems(): # 按列迭代
  1195. q = i[1].tolist() # 转换为列表
  1196. c.add(f"{i[0]}", [q], **self.yaxis_label(args))
  1197. self.all_render[f"{name}多轴图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1198. return c
  1199. def to_pie(self, name, text) -> Pie:
  1200. get = self.get_sheet(name)
  1201. data = []
  1202. for i in get.iterrows(): # 按行迭代
  1203. try:
  1204. data.append([f"{i[0]}", float(i[1].tolist()[0])])
  1205. except BaseException:
  1206. pass
  1207. args = self.parsing_parameters(text)
  1208. c = (
  1209. Pie(**self.init_setting(args))
  1210. .add(f"{name}", data, **self.yaxis_label(args, "top"))
  1211. .set_global_opts(**self.global_set(args, f"{name}饼图", 0, 100, False, False))
  1212. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
  1213. )
  1214. self.all_render[f"{name}饼图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1215. return c
  1216. def to_polar(self, name, text) -> Polar:
  1217. get = self.get_sheet(name)
  1218. data = []
  1219. args = self.parsing_parameters(text)
  1220. setting = self.special_setting(args, "Polar")
  1221. if setting == "rad": # 弧度制
  1222. convert = 0.0628
  1223. elif setting == "360": # 角度制
  1224. convert = 0.36
  1225. else:
  1226. convert = 1
  1227. for i in get.iterrows(): # 按行迭代
  1228. try:
  1229. q = i[1].tolist()
  1230. data.append((float(q[0]), float(q[1]) / convert))
  1231. except BaseException:
  1232. pass
  1233. c = (
  1234. Polar(**self.init_setting(args))
  1235. .add(f"{name}", data, type_="scatter", **self.yaxis_label(args))
  1236. .set_global_opts(
  1237. **self.global_set(args, f"{name}极坐标图", 0, 100, False, False)
  1238. )
  1239. )
  1240. self.all_render[f"{name}极坐标图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1241. return c
  1242. def to_radar(self, name, text) -> Radar:
  1243. get = self.get_sheet(name)
  1244. x = self.get_index(name, True).tolist()
  1245. max_list = [[] for _ in range(len(x))] # 保存每个x栏目的最大值
  1246. data = [] # y的组成数据,包括name和list
  1247. x_list = [] # 保存x的数据
  1248. for i in get.iteritems(): # 按列迭代计算每一项的abcd
  1249. q = i[1].tolist()
  1250. add = []
  1251. for a in range(len(q)):
  1252. try:
  1253. f = float(q[a])
  1254. max_list[a].append(f)
  1255. add.append(f)
  1256. except BaseException:
  1257. pass
  1258. data.append([f"{i[0]}", [add]]) # add是包含在一个list中的
  1259. for i in range(len(max_list)): # 计算x_list
  1260. x_list.append(opts.RadarIndicatorItem(name=x[i], max_=max(max_list[i])))
  1261. args = self.parsing_parameters(text)
  1262. c = (
  1263. Radar(**self.init_setting(args))
  1264. .add_schema(schema=x_list)
  1265. .set_global_opts(
  1266. **self.global_set(args, f"{name}雷达图", 0, 100, False, False)
  1267. )
  1268. )
  1269. k = self.special_setting(args, "Radar")
  1270. for i in data:
  1271. c.add(
  1272. *i, **self.yaxis_label(args), color=self.get_random_color(), **k
  1273. ) # 对i解包,取得name和data 随机颜色
  1274. self.all_render[f"{name}雷达图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1275. return c
  1276. def to_funnel(self, name, text) -> Funnel:
  1277. get = self.get_sheet(name)
  1278. y_name = self.get_index(name, True).tolist() # 拿行名
  1279. x = self.get_column(name, True).tolist()[0]
  1280. value = []
  1281. y = []
  1282. for r in range(len(y_name)):
  1283. try:
  1284. v = float(eval(f"get.iloc[{r},0]", {"get": get}))
  1285. except BaseException:
  1286. continue
  1287. value.append([f"{y_name[r]}", v])
  1288. y.append(v)
  1289. args = self.parsing_parameters(text)
  1290. c = (
  1291. Funnel(**self.init_setting(args))
  1292. .add(f"{name}", value, **self.yaxis_label(args, "top"))
  1293. .set_global_opts(
  1294. **self.global_set(args, f"{name}漏斗图", min(y), max(y), True, False)
  1295. )
  1296. )
  1297. self.all_render[f"{name}漏斗图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1298. return c
  1299. def to_calendar(self, name, text) -> Calendar:
  1300. get = self.get_sheet(name)
  1301. data = [[] for _ in self.get_column(name, True)]
  1302. x_name = self.get_column(name, True).tolist()
  1303. y = []
  1304. for i in get.iterrows():
  1305. date = str(i[0]) # 时间数据
  1306. q = i[1].tolist()
  1307. for a in range(len(q)):
  1308. try:
  1309. data[a].append([date, q[a]])
  1310. y.append(float(q[a]))
  1311. except BaseException:
  1312. pass
  1313. args = self.parsing_parameters(text)
  1314. if y == []:
  1315. y = [0, 100]
  1316. args["show_Visual_mapping"] = False # 关闭视觉映射
  1317. c = Calendar(**self.init_setting(args)).set_global_opts(
  1318. **self.global_set(args, f"{name}日历图", min(y), max(y), True)
  1319. )
  1320. for i in range(len(x_name)):
  1321. start_date = data[i][0][0]
  1322. end_date = data[i][-1][0]
  1323. c.add(
  1324. str(x_name[i]),
  1325. data[i],
  1326. calendar_opts=opts.CalendarOpts(range_=[start_date, end_date]),
  1327. **self.yaxis_label(args),
  1328. )
  1329. self.all_render[f"{name}日历图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1330. return c
  1331. def to_theme_river(self, name, text) -> ThemeRiver:
  1332. get = self.get_sheet(name)
  1333. data = []
  1334. x_name = self.get_column(name, True).tolist()
  1335. y = []
  1336. for i in get.iterrows():
  1337. date = str(i[0])
  1338. q = i[1].tolist()
  1339. for a in range(len(x_name)):
  1340. try:
  1341. data.append([date, q[a], x_name[a]])
  1342. y.append(float(q[a]))
  1343. except BaseException:
  1344. pass
  1345. args = self.parsing_parameters(text)
  1346. if y == []:
  1347. y = [0, 100]
  1348. args["show_Visual_mapping"] = False # 关闭视觉映射
  1349. c = (
  1350. ThemeRiver(**self.init_setting(args))
  1351. # 抑制大小
  1352. .add(
  1353. x_name,
  1354. data,
  1355. singleaxis_opts=opts.SingleAxisOpts(
  1356. type_=args["x_type"], pos_bottom="10%"
  1357. ),
  1358. ).set_global_opts(
  1359. **self.global_set(args, f"{name}河流图", min(y), max(y), True, False)
  1360. )
  1361. )
  1362. self.all_render[f"{name}河流图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1363. return c
  1364. class RelationshipPlot(Render):
  1365. def to_sunburst(self, name, text) -> Sunburst:
  1366. get = self.get_sheet(name)
  1367. def convert_data(iter_object, name):
  1368. k = {"name": name, "children": []}
  1369. v = 0
  1370. for i in iter_object:
  1371. content = iter_object[i]
  1372. if isinstance(content, dict):
  1373. new_c = convert_data(content, str(i))
  1374. v += new_c["value"]
  1375. k["children"].append(new_c)
  1376. else:
  1377. try:
  1378. q = float(content)
  1379. except BaseException:
  1380. q = len(str(content))
  1381. v += q
  1382. k["children"].append({"name": f"{i}={content}", "value": q})
  1383. k["value"] = v
  1384. return k
  1385. data = convert_data(get.to_dict(), name)["children"]
  1386. args = self.parsing_parameters(text)
  1387. c = (
  1388. Sunburst()
  1389. .add(
  1390. series_name=f"{name}",
  1391. data_pair=data,
  1392. radius=[abs(args["Size"] - 10), "90%"],
  1393. )
  1394. .set_global_opts(
  1395. **self.global_set(args, f"{name}旭日图", 0, 100, False, False)
  1396. )
  1397. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}"))
  1398. )
  1399. self.all_render[f"{name}旭日图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1400. return c
  1401. def to_tree(self, name, text) -> Tree:
  1402. get = self.get_sheet(name)
  1403. def convert_data(iter_object, name):
  1404. k = {"name": name, "children": []}
  1405. for i in iter_object:
  1406. content = iter_object[i]
  1407. if isinstance(content, dict):
  1408. new_children = convert_data(content, str(i))
  1409. k["children"].append(new_children)
  1410. else:
  1411. k["children"].append(
  1412. {"name": f"{i}", "children": [{"name": f"{content}"}]}
  1413. )
  1414. return k
  1415. data = [convert_data(get.to_dict(), name)]
  1416. args = self.parsing_parameters(text)
  1417. c = (
  1418. Tree()
  1419. .add(f"{name}", data)
  1420. .set_global_opts(
  1421. **self.global_set(args, f"{name}树状图", 0, 100, False, False)
  1422. )
  1423. )
  1424. self.all_render[f"{name}树状图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1425. return c
  1426. def to_tree_map(self, name, text) -> TreeMap:
  1427. get = self.get_sheet(name)
  1428. def convert_data(iter_object, name):
  1429. k = {"name": name, "children": []}
  1430. v = 0
  1431. for i in iter_object:
  1432. content = iter_object[i]
  1433. if isinstance(content, dict):
  1434. new_c = convert_data(content, str(i))
  1435. v += new_c["value"]
  1436. k["children"].append(new_c)
  1437. else:
  1438. try:
  1439. q = float(content)
  1440. except BaseException:
  1441. q = len(str(content))
  1442. v += q
  1443. k["children"].append({"name": f"{i}={content}", "value": q})
  1444. k["value"] = v
  1445. return k
  1446. data = convert_data(get.to_dict(), name)["children"]
  1447. args = self.parsing_parameters(text)
  1448. c = (
  1449. TreeMap()
  1450. .add(
  1451. f"{name}",
  1452. data,
  1453. label_opts=opts.LabelOpts(is_show=True, position="inside"),
  1454. )
  1455. .set_global_opts(
  1456. **self.global_set(args, f"{name}矩形树图", 0, 100, False, False)
  1457. )
  1458. )
  1459. self.all_render[f"{name}矩形树图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1460. return c
  1461. def to_scattergeo(self, name, text) -> Geo:
  1462. get = self.get_sheet(name)
  1463. column = self.get_column(name, True).tolist()
  1464. data_type = ["scatter" for _ in column]
  1465. data = [[] for _ in column]
  1466. y = []
  1467. for i in get.iterrows(): # 按行迭代
  1468. map = str(i[0])
  1469. q = i[1].tolist()
  1470. for a in range(len(q)):
  1471. try:
  1472. v = float(q[a])
  1473. y.append(v)
  1474. except BaseException:
  1475. v = str(q[a])
  1476. try:
  1477. if v[:5] == "[##S]":
  1478. # 特效图
  1479. v = float(v[5:])
  1480. y.append(v)
  1481. column.append(column[a])
  1482. data_type.append(GeoType.EFFECT_SCATTER)
  1483. data.append([])
  1484. a = -1
  1485. elif v[:5] == "[##H]":
  1486. # 特效图
  1487. v = float(v[5:])
  1488. y.append(v)
  1489. column.append(column[a])
  1490. data_type.append(GeoType.HEATMAP)
  1491. data.append([])
  1492. a = -1
  1493. else:
  1494. raise Exception
  1495. except BaseException:
  1496. data_type[a] = GeoType.LINES # 当前变为Line
  1497. data[a].append((map, v))
  1498. args = self.parsing_parameters(text)
  1499. args["show_Visual_mapping"] = True # 必须视觉映射
  1500. if y == []:
  1501. y = [0, 100]
  1502. if args["is_Dark"]:
  1503. g = {
  1504. "itemstyle_opts": opts.ItemStyleOpts(
  1505. color="#323c48", border_color="#111"
  1506. )
  1507. }
  1508. else:
  1509. g = {}
  1510. c = (
  1511. Geo().add_schema(maptype=str(args["Map"]), **g)
  1512. # 必须要有视觉映射(否则会显示奇怪的数据)
  1513. .set_global_opts(
  1514. **self.global_set(args, f"{name}Geo点地图", min(y), max(y), False)
  1515. )
  1516. )
  1517. for i in range(len(data)):
  1518. if data_type[i] != GeoType.LINES:
  1519. ka = dict(
  1520. symbol=args["Symbol"],
  1521. symbol_size=args["Size"],
  1522. color="#1E90FF" if args["is_Dark"] else "#0000FF",
  1523. )
  1524. else:
  1525. ka = dict(
  1526. symbol=SymbolType.ARROW,
  1527. symbol_size=6,
  1528. effect_opts=opts.EffectOpts(
  1529. symbol=SymbolType.ARROW, symbol_size=6, color="blue"
  1530. ),
  1531. linestyle_opts=opts.LineStyleOpts(
  1532. curve=0.2, color="#FFF8DC" if args["is_Dark"] else "#000000"
  1533. ),
  1534. )
  1535. c.add(f"{column[i]}", data[i], type_=data_type[i], **ka)
  1536. c.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 不显示数据,必须放在add后面生效
  1537. self.all_render[
  1538. f"{name}Geo点地图[{len(self.all_render)}]{self.get_title(args)}"
  1539. ] = c
  1540. return c
  1541. class GeographyPlot(Render):
  1542. def to_map(self, name, text) -> Map:
  1543. get = self.get_sheet(name)
  1544. column = self.get_column(name, True).tolist()
  1545. data = [[] for _ in column]
  1546. y = []
  1547. for i in get.iterrows(): # 按行迭代
  1548. map = str(i[0])
  1549. q = i[1].tolist()
  1550. for a in range(len(q)):
  1551. try:
  1552. v = float(q[a])
  1553. y.append(v)
  1554. data[a].append((map, v))
  1555. except BaseException:
  1556. pass
  1557. args = self.parsing_parameters(text)
  1558. args["show_Visual_mapping"] = True # 必须视觉映射
  1559. if y == []:
  1560. y = [0, 100]
  1561. if args["map_Type"] == "GLOBE":
  1562. func = MapGlobe
  1563. else:
  1564. func = Map
  1565. c = func().set_global_opts(
  1566. **self.global_set(args, f"{name}Map地图", min(y), max(y), False)
  1567. ) # 必须要有视觉映射(否则会显示奇怪的数据)
  1568. for i in range(len(data)):
  1569. c.add(
  1570. f"{column[i]}",
  1571. data[i],
  1572. str(args["Map"]),
  1573. is_map_symbol_show=args["show_Map_Symbol"],
  1574. symbol=args["Symbol"],
  1575. **self.yaxis_label(args),
  1576. )
  1577. self.all_render[
  1578. f"{name}Map地图[{len(self.all_render)}]{self.get_title(args)}"
  1579. ] = c
  1580. return c
  1581. def to_geo(self, name, text) -> Geo:
  1582. get = self.get_sheet(name)
  1583. column = self.get_column(name, True).tolist()
  1584. index = self.get_index(name, True).tolist()
  1585. args = self.parsing_parameters(text)
  1586. args["show_Visual_mapping"] = True # 必须视觉映射
  1587. if args["is_Dark"]:
  1588. g = {
  1589. "itemstyle_opts": opts.ItemStyleOpts(
  1590. color="#323c48", border_color="#111"
  1591. )
  1592. }
  1593. else:
  1594. g = {}
  1595. c = Geo().add_schema(maptype=str(args["Map"]), **g)
  1596. m = []
  1597. for y in column: # 维度
  1598. for x in index: # 精度
  1599. value = get.loc[x, y]
  1600. try:
  1601. v = float(value) # 数值
  1602. type_ = args["Geo_Type"]
  1603. except BaseException:
  1604. try:
  1605. q = str(value)
  1606. v = float(value[5:])
  1607. if q[:5] == "[##S]": # 点图
  1608. type_ = GeoType.SCATTER
  1609. elif q[:5] == "[##E]": # 带点特效
  1610. type_ = GeoType.EFFECT_SCATTER
  1611. else: # 画线
  1612. v = q.split(";")
  1613. c.add_coordinate(
  1614. name=f"({v[0]},{v[1]})",
  1615. longitude=float(v[0]),
  1616. latitude=float(v[1]),
  1617. )
  1618. c.add_coordinate(
  1619. name=f"({x},{y})", longitude=float(x), latitude=float(y)
  1620. )
  1621. c.add(
  1622. f"{name}",
  1623. [[f"({x},{y})", f"({v[0]},{v[1]})"]],
  1624. type_=GeoType.LINES,
  1625. effect_opts=opts.EffectOpts(
  1626. symbol=SymbolType.ARROW, symbol_size=6, color="blue"
  1627. ),
  1628. linestyle_opts=opts.LineStyleOpts(
  1629. curve=0.2,
  1630. color="#FFF8DC" if args["is_Dark"] else "#000000",
  1631. ),
  1632. )
  1633. c.add(
  1634. f"{name}_XY",
  1635. [[f"({x},{y})", 5], [f"({v[0]},{v[1]})", 5]],
  1636. type_=GeoType.EFFECT_SCATTER,
  1637. color="#1E90FF" if args["is_Dark"] else "#0000FF",
  1638. )
  1639. raise Exception # continue
  1640. except BaseException:
  1641. continue
  1642. try:
  1643. c.add_coordinate(
  1644. name=f"({x},{y})", longitude=float(x), latitude=float(y)
  1645. )
  1646. c.add(
  1647. f"{name}",
  1648. [[f"({x},{y})", v]],
  1649. type_=type_,
  1650. symbol=args["Symbol"],
  1651. symbol_size=args["Size"],
  1652. )
  1653. if type_ == GeoType.HEATMAP:
  1654. c.add(
  1655. f"{name}_XY",
  1656. [[f"({x},{y})", v]],
  1657. type_="scatter",
  1658. color="#1E90FF" if args["is_Dark"] else "#0000FF",
  1659. )
  1660. m.append(v)
  1661. except BaseException:
  1662. pass
  1663. if m == []:
  1664. m = [0, 100]
  1665. c.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 不显示
  1666. c.set_global_opts(
  1667. **self.global_set(args, f"{name}Geo地图", min(m), max(m), False)
  1668. )
  1669. self.all_render[
  1670. f"{name}Geo地图[{len(self.all_render)}]{self.get_title(args)}"
  1671. ] = c
  1672. return c
  1673. class WordPlot(Render):
  1674. def to_word_cloud(self, name, text) -> WordCloud:
  1675. get = self.get_sheet(name)
  1676. data = []
  1677. for i in get.iterrows(): # 按行迭代
  1678. try:
  1679. data.append([str(i[0]), float(i[1].tolist()[0])])
  1680. except BaseException:
  1681. pass
  1682. args = self.parsing_parameters(text)
  1683. c = (
  1684. WordCloud(**self.init_setting(args))
  1685. .add(f"{name}", data, **self.special_setting(args, "WordCloud"))
  1686. .set_global_opts(**self.global_set(args, f"{name}词云", 0, 100, False, False))
  1687. )
  1688. self.all_render[f"{name}词云[{len(self.all_render)}]{self.get_title(args)}"] = c
  1689. return c
  1690. def to_liquid(self, name, text) -> Liquid:
  1691. get = self.get_sheet(name)
  1692. data = str(get.iloc[0, 0])
  1693. c = data.split(".")
  1694. try:
  1695. data = float(f"0.{c[1]}")
  1696. except BaseException:
  1697. data = float(f"0.{c[0]}")
  1698. args = self.parsing_parameters(text)
  1699. c = (
  1700. Liquid(**self.init_setting(args))
  1701. .add(f"{name}", [data, data])
  1702. .set_global_opts(
  1703. title_opts=opts.TitleOpts(title=f"{name}水球图", subtitle="CoTan~数据处理")
  1704. )
  1705. )
  1706. self.all_render[f"{name}水球图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1707. return c
  1708. def to_gauge(self, name, text) -> Gauge:
  1709. get = self.get_sheet(name)
  1710. data = float(get.iloc[0, 0])
  1711. if data > 100:
  1712. data = str(data / 100)
  1713. c = data.split(".")
  1714. try:
  1715. data = float(f"0.{c[1]}") * 100
  1716. except BaseException:
  1717. data = float(f"0.{data}") * 100
  1718. args = self.parsing_parameters(text)
  1719. c = (
  1720. Gauge(**self.init_setting(args))
  1721. .add(f"{name}", [(f"{name}", data)])
  1722. .set_global_opts(
  1723. title_opts=opts.TitleOpts(title=f"{name}仪表图", subtitle="CoTan~数据处理")
  1724. )
  1725. )
  1726. self.all_render[f"{name}仪表图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1727. return c
  1728. class SolidPlot(Render):
  1729. def to_bar3d(self, name, text) -> Bar3D:
  1730. get = self.get_sheet(name)
  1731. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1732. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1733. value_list = []
  1734. q = []
  1735. for c in range(len(x)): # c-列,r-行
  1736. for r in range(len(y)):
  1737. try:
  1738. v = eval(f"get.iloc[{r},{c}]", {"get": get}) # 先行后列
  1739. value_list.append([c, r, v])
  1740. q.append(float(v))
  1741. except BaseException:
  1742. pass
  1743. args = self.parsing_parameters(text)
  1744. if q == []:
  1745. q = [0, 100]
  1746. args["show_Visual_mapping"] = False # 关闭视觉映射
  1747. c = (
  1748. Bar3D(**self.init_setting(args))
  1749. .add(
  1750. f"{name}",
  1751. value_list,
  1752. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1753. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1754. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1755. )
  1756. .set_global_opts(
  1757. **self.global_set(args, f"{name}3D柱状图", min(q), max(q), True),
  1758. )
  1759. )
  1760. if args["bar_Stacking"]:
  1761. c.set_series_opts(**{"stack": "stack"}) # 层叠
  1762. self.all_render[
  1763. f"{name}3D柱状图[{len(self.all_render)}]{self.get_title(args)}"
  1764. ] = c
  1765. return c
  1766. def to_scatter3d(self, name, text) -> Scatter3D:
  1767. get = self.get_sheet(name)
  1768. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1769. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1770. value_list = []
  1771. q = []
  1772. for c in range(len(x)): # c-列,r-行
  1773. for r in range(len(y)):
  1774. try:
  1775. v = eval(f"get.iloc[{r},{c}]", {"get": get}) # 先行后列
  1776. value_list.append([c, r, v])
  1777. q.append(float(v))
  1778. except BaseException:
  1779. pass
  1780. args = self.parsing_parameters(text)
  1781. if q == []:
  1782. q = [0, 100]
  1783. args["show_Visual_mapping"] = False # 关闭视觉映射
  1784. c = (
  1785. Scatter3D(**self.init_setting(args))
  1786. .add(
  1787. f"{name}",
  1788. value_list,
  1789. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1790. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1791. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1792. )
  1793. .set_global_opts(
  1794. **self.global_set(args, f"{name}3D散点图", min(q), max(q), True)
  1795. )
  1796. )
  1797. self.all_render[
  1798. f"{name}3D散点图[{len(self.all_render)}]{self.get_title(args)}"
  1799. ] = c
  1800. return c
  1801. def to_line3d(self, name, text) -> Line3D:
  1802. get = self.get_sheet(name)
  1803. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1804. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1805. value_list = []
  1806. q = []
  1807. for c in range(len(x)): # c-列,r-行
  1808. for r in range(len(y)):
  1809. try:
  1810. v = eval(f"get.iloc[{r},{c}]", {"get": get}) # 先行后列
  1811. value_list.append([c, r, v])
  1812. q.append(float(v))
  1813. except BaseException:
  1814. pass
  1815. args = self.parsing_parameters(text)
  1816. if q == []:
  1817. q = [0, 100]
  1818. args["show_Visual_mapping"] = False # 关闭视觉映射
  1819. c = (
  1820. Line3D(**self.init_setting(args))
  1821. .add(
  1822. f"{name}",
  1823. value_list,
  1824. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1825. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1826. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1827. grid3d_opts=opts.Grid3DOpts(width=100, height=100, depth=100),
  1828. )
  1829. .set_global_opts(
  1830. **self.global_set(args, f"{name}3D折线图", min(q), max(q), True)
  1831. )
  1832. )
  1833. self.all_render[
  1834. f"{name}3D折线图[{len(self.all_render)}]{self.get_title(args)}"
  1835. ] = c
  1836. return c
  1837. class Plot(AxisPlot, GeneralPlot, RelationshipPlot, GeographyPlot, WordPlot, SolidPlot):
  1838. def custom_graph(self, text):
  1839. named_domain = {}
  1840. named_domain.update(locals())
  1841. named_domain.update(globals())
  1842. exec(text, named_domain)
  1843. exec("c = Page()", named_domain)
  1844. self.all_render[f"自定义图[{len(self.all_render)}]"] = named_domain["c"]
  1845. return named_domain["c"]
  1846. def get_all_render(self):
  1847. return self.all_render.copy()
  1848. def del_render(self, key):
  1849. del self.all_render[key]
  1850. def clean_render(self):
  1851. self.all_render = {}
  1852. class MachineLearnerBase(Plot):
  1853. def __init__(self, *args, **kwargs):
  1854. super().__init__(*args, **kwargs)
  1855. self.learner = {} # 记录机器
  1856. self.learn_dict = {
  1857. "Line": (LinearRegression, ()),
  1858. "Ridge": (Ridge, ("alpha", "max_iter",)),
  1859. "Lasso": (Lasso, ("alpha", "max_iter",)),
  1860. "LogisticRegression": (LogisticRegression, ("C",)),
  1861. "Knn": (KNeighborsClassifier, ("n_neighbors",)),
  1862. "Knn_class": (KNeighborsRegressor, ("n_neighbors",)),
  1863. }
  1864. self.learner_type = {} # 记录机器的类型
  1865. def parsing(self, parameters): # 解析参数
  1866. args = {}
  1867. args_use = {}
  1868. # 输入数据
  1869. exec(parameters, args)
  1870. # 处理数据
  1871. args_use["alpha"] = float(args.get("alpha", 1.0)) # L1和L2正则化用
  1872. args_use["C"] = float(args.get("C", 1.0)) # L1和L2正则化用
  1873. args_use["max_iter"] = int(args.get("max_iter", 1000)) # L1和L2正则化用
  1874. args_use["n_neighbors"] = int(args.get("K_knn", 5)) # knn邻居数 (命名不同)
  1875. args_use["nDim_2"] = bool(args.get("nDim_2", True)) # 数据是否降维
  1876. return args_use
  1877. def get_learner(self, name):
  1878. return self.learner[name]
  1879. def get_learner_type(self, name):
  1880. return self.learner_type[name]
  1881. class VisualLearner(MachineLearnerBase):
  1882. def visual_learner(self, learner, new=False): # 显示参数
  1883. learner = self.get_learner(learner)
  1884. learner_type = self.get_learner_type(learner)
  1885. if learner_type in ("Ridge", "Lasso"):
  1886. alpha = learner.alpha # 阿尔法
  1887. w = learner.coef_.tolist() # w系数
  1888. b = learner.intercept_ # 截距
  1889. max_iter = learner.max_iter
  1890. w_name = [f"权重:W[{i}]" for i in range(len(w))]
  1891. index = ["阿尔法:Alpha"] + w_name + ["截距:b", "最大迭代数"]
  1892. data = [alpha] + w + [b] + [max_iter]
  1893. # 文档
  1894. doc = (
  1895. f"阿尔法:alpha = {alpha}\n\n权重:\nw = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\n最大迭代数:{max_iter}"
  1896. f"\n\n\nEND"
  1897. )
  1898. data = pd.DataFrame(data, index=index)
  1899. elif learner_type in ("Line",):
  1900. w = learner.coef_.tolist() # w系数
  1901. b = learner.intercept_
  1902. index = [f"权重:W[{i}]" for i in range(len(w))] + ["截距:b"]
  1903. data = w + [b] # 截距
  1904. # 文档
  1905. doc = f"权重:w = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\n\nEND"
  1906. data = pd.DataFrame(data, index=index)
  1907. elif learner_type in ("Knn",): # Knn_class
  1908. classes = learner.classes_.tolist() # 分类
  1909. n = learner.n_neighbors # 个数
  1910. p = {1: "曼哈顿距离", 2: "欧几里得距离"}.get(learner.p)
  1911. index = [f"类目[{i}]" for i in range(len(classes))] + ["邻居个数", "距离公式"]
  1912. data = classes + [n, p]
  1913. doc = f"分类类目:\n{pd.DataFrame(classes)}\n\n邻居个数:{n}\n\n计算距离的方式:{p}\n\n\nEND"
  1914. data = pd.DataFrame(data, index=index)
  1915. elif learner_type in ("Knn_class",):
  1916. n = learner.n_neighbors # 个数
  1917. p = {1: "曼哈顿距离", 2: "欧几里得距离"}.get(learner.p)
  1918. index = ["邻居个数", "距离公式"]
  1919. data = [n, p]
  1920. doc = f"邻居个数:{n}\n\n计算距离的方式:{p}\n\n\nEND"
  1921. data = pd.DataFrame(data, index=index)
  1922. elif learner_type in ("LogisticRegression",):
  1923. classes = learner.classes_.tolist() # 分类
  1924. w = learner.coef_.tolist() # w系数
  1925. b = learner.intercept_
  1926. c = learner.C
  1927. index = (
  1928. [f"类目[{i}]" for i in range(len(classes))]
  1929. + [f"权重:W[{j}][{i}]" for i in range(len(w)) for j in range(len(w[i]))]
  1930. + [f"截距:b[{i}]" for i in range(len(b))]
  1931. + ["C"]
  1932. )
  1933. data = classes + [j for i in w for j in i] + [i for i in b] + [c]
  1934. doc = f"分类类目:\n{pd.DataFrame(classes)}\n\n权重:w = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\nC={c}\n\n\n"
  1935. data = pd.DataFrame(data, index=index)
  1936. else:
  1937. return "", []
  1938. if new:
  1939. self.add_sheet(data, f"{learner}:属性")
  1940. return doc, data
  1941. class Learner(MachineLearnerBase):
  1942. def decision_tree_classifier(self, name): # 特征提取
  1943. get = self.get_sheet(name)
  1944. dver = DictVectorizer()
  1945. get_dic = get.to_dict(orient="records")
  1946. new = dver.fit_transform(get_dic).toarray()
  1947. dec = pd.DataFrame(new, columns=dver.feature_names_)
  1948. self.add_sheet(dec, f"{name}:特征")
  1949. return dec
  1950. def training_machine_core(
  1951. self, name, learner, score_only=False, down_ndim=True, split=0.3, **kwargs
  1952. ):
  1953. get = self.get_sheet(name)
  1954. x = get.to_numpy()
  1955. y = self.get_index(name, True) # 获取y值(用index作为y)
  1956. if down_ndim or x.ndim == 1: # 执行降维处理(也包括升维,ravel让一切变成一维度,包括数字)
  1957. a = x
  1958. x = []
  1959. for i in a:
  1960. try:
  1961. c = i.np.ravel(a[i], "C")
  1962. x.append(c)
  1963. except BaseException:
  1964. x.append(i)
  1965. x = np.array(x)
  1966. model = self.get_learner(learner)
  1967. if not score_only: # 只计算得分,全部数据用于测试
  1968. train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=split)
  1969. model.fit(train_x, train_y)
  1970. train_score = model.score(train_x, train_y)
  1971. test_score = model.score(test_x, test_y)
  1972. return train_score, test_score
  1973. test_score = model.score(x, y)
  1974. return 0, test_score
  1975. def training_machine(self, name, learnner, parameters="", **kwargs):
  1976. type_ = self.get_learner_type(learnner)
  1977. args_use = self.parsing(parameters)
  1978. if type_ in (
  1979. "Line",
  1980. "Ridge",
  1981. "Lasso",
  1982. "LogisticRegression",
  1983. "Knn",
  1984. "Knn_class",
  1985. ):
  1986. return self.training_machine_core(
  1987. name, learnner, down_ndim=args_use["nDim_2"], **kwargs
  1988. )
  1989. def predict_simp(self, name, learner, down_ndim=True, **kwargs):
  1990. get = self.get_sheet(name)
  1991. column = self.get_column(name, True)
  1992. x = get.to_numpy()
  1993. if down_ndim or x.ndim == 1: # 执行降维处理(也包括升维,ravel让一切变成一维度,包括数字)
  1994. a = x
  1995. x = []
  1996. for i in a:
  1997. try:
  1998. c = i.np.ravel(a[i], "C")
  1999. x.append(c)
  2000. except BaseException:
  2001. x.append(i)
  2002. x = np.array(x)
  2003. model = self.get_learner(learner)
  2004. answer = model.predict(x)
  2005. data = pd.DataFrame(x, index=answer, columns=column)
  2006. self.add_sheet(data, f"{name}:预测")
  2007. return data
  2008. def predict(self, name, learner, parameters="", **kwargs):
  2009. type_ = self.get_learner_type(learner)
  2010. args_use = self.parsing(parameters)
  2011. if type_ in (
  2012. "Line",
  2013. "Ridge",
  2014. "Lasso",
  2015. "LogisticRegression",
  2016. "Knn",
  2017. "Knn_class",
  2018. ):
  2019. return self.predict_simp(
  2020. name, learner, down_ndim=args_use["nDim_2"], **kwargs
  2021. )