Learn.py 83 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166
  1. from random import randint
  2. import re
  3. from os import getcwd
  4. import numpy as np
  5. from sklearn.feature_extraction import DictVectorizer
  6. from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
  7. from sklearn.linear_model import *
  8. from sklearn.model_selection import train_test_split
  9. from pyecharts.components import Table
  10. from pyecharts.globals import SymbolType
  11. from pyecharts.charts import *
  12. from pyecharts import options as opts
  13. import pandas as pd
  14. import pandas_profiling as pp
  15. from pyecharts.globals import CurrentConfig
  16. from pyecharts.globals import GeoType # 地图推荐使用GeoType而不是str
  17. CurrentConfig.ONLINE_HOST = f"{getcwd()}/assets/"
  18. class Form:
  19. def __init__(self, *args, **kwargs):
  20. class Del:
  21. pass
  22. self.sheet_dict = {}
  23. self.clean_func = {}
  24. self.clean_func_code = {}
  25. self.DEL = Del()
  26. self.Name = {"pd": pd, "DEL": self.DEL, "re": re, "Sheet": self.sheet_dict}
  27. self.all_render = {} # 存放所有的图
  28. def get_sheet(self, name, all_row=None, all_colunm=None) -> pd.DataFrame:
  29. try:
  30. pd.set_option("display.max_rows", all_row)
  31. pd.set_option("display.max_columns", all_colunm)
  32. except BaseException:
  33. pass
  34. return self.sheet_dict[name]
  35. def describe(self, name, new=False): # 生成描述
  36. get = self.get_sheet(name)
  37. des = get.describe()
  38. if new:
  39. self.add_sheet(des, f"{name}_describe[{len(self.sheet_dict)}]")
  40. shape = get.shape
  41. dtype = get.dtypes
  42. n = get.ndim
  43. head = get.head()
  44. tail = get.tail(3)
  45. return (
  46. f"1)基本\n{des}\n\n2)形状:{shape}\n\n3)数据类型\n{dtype}\n\n4)数据维度:{n}\n\n5)头部数据\n{head}"
  47. f"\n\n6)尾部数据\n{tail}\n\n7)行名\n{get.index}\n\n8)列名\n{get.columns}"
  48. )
  49. def add_sheet(self, data, name=""):
  50. if name == "":
  51. name = f"Sheet[{len(self.sheet_dict)}]"
  52. else:
  53. name += f"_[{len(self.sheet_dict)}]"
  54. self.sheet_dict[name] = data
  55. return data
  56. def del_sheet(self, name):
  57. del self.sheet_dict[name]
  58. def __add_sheet(self, data_dir, func, name="", index=True, **kwargs): # 新增表格的核心方式
  59. try:
  60. data = func(data_dir, **kwargs)
  61. except UnicodeDecodeError: # 找不到编码方式
  62. return False
  63. if not index:
  64. data.index = data.iloc[:, 0].tolist()
  65. data.drop(data.columns.values.tolist()[0], inplace=True, axis=1)
  66. return self.add_sheet(data, name)
  67. def add_csv(
  68. self, data_dir, name="", sep=",", encodeding="utf-8", str_=True, index=True
  69. ):
  70. if str_:
  71. k = {"dtype": "object"}
  72. else:
  73. k = {}
  74. return self.__add_sheet(
  75. data_dir, pd.read_csv, name, index, sep=sep, encoding=encodeding, **k
  76. )
  77. def add_python(self, python_file, sheet_name="") -> pd.DataFrame:
  78. name = {"Sheet": self.get_sheet}
  79. name.update(globals().copy())
  80. name.update(locals().copy())
  81. exec(python_file, name)
  82. exec("get = Creat()", name)
  83. if isinstance(name["get"], pd.DataFrame): # 已经是DataFram
  84. get = name["get"]
  85. elif isinstance(name["get"], np.array):
  86. if bool(name.get("downNdim", False)): # 执行降或升维操作
  87. a = name["get"]
  88. array = []
  89. for i in a:
  90. try:
  91. c = i.np.ravel(a[i], "C")
  92. array.append(c)
  93. except BaseException:
  94. array.append(i)
  95. get = pd.DataFrame(array)
  96. else:
  97. array = name["get"].tolist()
  98. get = pd.DataFrame(array)
  99. else:
  100. try:
  101. get = pd.DataFrame(name["get"])
  102. except BaseException:
  103. get = pd.DataFrame([name["get"]])
  104. self.add_sheet(get, sheet_name)
  105. return get
  106. def add_html(self, data_dir, name="", encoding="utf-8", str_=True, index=True):
  107. if str_:
  108. k = {"dtype": "object"}
  109. else:
  110. k = {}
  111. return self.__add_sheet(
  112. data_dir, pd.read_html, name, index, encoding=encoding, **k
  113. )
  114. def get_sheet_list(self):
  115. return list(self.sheet_dict.keys()) # 返回列表
  116. def render_html_one(self, name, render_dir=""):
  117. if render_dir == "":
  118. render_dir = f"{name}.html"
  119. get = self.get_sheet(name)
  120. headers = [f"{name}"] + self.get_column(name, True).tolist()
  121. rows = []
  122. table = Table()
  123. for i in get.iterrows(): # 按行迭代
  124. q = i[1].tolist()
  125. rows.append([f"{i[0]}"] + q)
  126. table.add(headers, rows).set_global_opts(
  127. title_opts=opts.ComponentTitleOpts(
  128. title=f"表格:{name}", subtitle="CoTan~数据处理:查看表格"
  129. )
  130. )
  131. table.render(render_dir)
  132. return render_dir
  133. def render_html_all(self, name, tab_render_dir="", render_type=0):
  134. if tab_render_dir == "":
  135. tab_render_dir = f"{name}.html"
  136. # 把要画的sheet放到第一个
  137. sheet_dict = self.sheet_dict.copy()
  138. del sheet_dict[name]
  139. sheet_list = [name] + list(sheet_dict.keys())
  140. class TabNew:
  141. def __init__(self, original_tab):
  142. self.original_tab = original_tab # 一个Tab
  143. def render(self, render_dir):
  144. return self.original_tab.render(render_dir)
  145. # 生成一个显示页面
  146. if render_type == 0:
  147. class TabZero(TabNew):
  148. def add(self, render, k, *more):
  149. self.original_tab.add(render, k)
  150. tab = TabZero(Tab(page_title="CoTan:查看表格")) # 一个Tab
  151. elif render_type == 1:
  152. class TabOne(TabNew):
  153. def add(self, render, *more):
  154. self.original_tab.add(render)
  155. tab = TabOne(Page(page_title="CoTan:查看表格", layout=Page.DraggablePageLayout))
  156. else:
  157. class TabTwo(TabNew):
  158. def add(self, render, *more):
  159. self.original_tab.add(render)
  160. tab = TabTwo(Page(page_title="CoTan:查看表格", layout=Page.SimplePageLayout))
  161. # 迭代添加内容
  162. for name in sheet_list:
  163. try:
  164. get = self.get_sheet(name)
  165. headers = [f"{name}"] + self.get_column(name, True).tolist()
  166. rows = []
  167. table = Table()
  168. for i in get.iterrows(): # 按行迭代
  169. q = i[1].tolist()
  170. rows.append([f"{i[0]}"] + q)
  171. table.add(headers, rows).set_global_opts(
  172. title_opts=opts.ComponentTitleOpts(
  173. title=f"表格:{name}", subtitle="CoTan~数据处理:查看表格"
  174. )
  175. )
  176. tab.add(table, f"表格:{name}")
  177. except BaseException:
  178. pass
  179. tab.render(tab_render_dir)
  180. return tab_render_dir
  181. def sheet_profile_report_core(self, sheet, save_dir):
  182. report = pp.ProfileReport(sheet)
  183. report.to_file(save_dir)
  184. def to_report(self, name, save_dir=""):
  185. if save_dir == "":
  186. save_dir = f"{name}.html"
  187. sheet = self.get_sheet(name)
  188. self.sheet_profile_report_core(sheet, save_dir)
  189. return save_dir
  190. def get_column(self, name, only=False): # 列名
  191. get = self.get_sheet(name)
  192. if only:
  193. re = get.columns.values
  194. else:
  195. re = []
  196. loc_list = get.columns.values
  197. a = 0
  198. for i in loc_list:
  199. data = get[i].to_list()
  200. re.append(f"[列号:{a}]{i} -> {data}")
  201. a += 1
  202. return re
  203. def get_index(self, name, only=False):
  204. get = self.get_sheet(name)
  205. if only:
  206. values = get.index.values
  207. else:
  208. values = []
  209. loc_list = get.index.values
  210. a = 0
  211. for i in range(len(loc_list)):
  212. l = loc_list[i]
  213. data = get.iloc[i].to_list()
  214. values.append(f"[行号:{a}]{l} -> {data}")
  215. a += 1
  216. return values
  217. def sorted_index(self, name, row: bool, new=False, a=True):
  218. get = self.get_sheet(name)
  219. if row: # row-行名排序
  220. sorted_sheet = get.sort_index(axis=0, ascending=a)
  221. else:
  222. sorted_sheet = get.sort_index(axis=1, ascending=a)
  223. if new:
  224. self.add_sheet(sorted_sheet, f"{name}:排序")
  225. return sorted_sheet
  226. def stored_value(self, name, collation, new=False):
  227. get = self.get_sheet(name)
  228. row = get.columns.values
  229. by = []
  230. ascending = []
  231. for i in collation:
  232. by.append(row[i[0]])
  233. ascending.append(i[1])
  234. if len(by) == 1:
  235. by = by[0]
  236. ascending = ascending[0]
  237. sorted_sheet = get.sort_values(by=by, ascending=ascending)
  238. if new:
  239. self.add_sheet(sorted_sheet, f"{name}:排序")
  240. return sorted_sheet
  241. def transpose(self, name, new=True):
  242. get = self.get_sheet(name)
  243. t = get.T.copy() # 复制一份,防止冲突
  244. if new:
  245. self.add_sheet(t, f"{name}.T")
  246. return t
  247. def get_slice(
  248. self, name, column, row, is_iloc=True, new=False
  249. ): # iloc(Row,Column) or loc
  250. get = self.get_sheet(name)
  251. if is_iloc:
  252. new_sheet = get.iloc[row, column]
  253. else:
  254. new_sheet = get.loc[row, column]
  255. if new:
  256. self.add_sheet(new_sheet, f"{name}:切片")
  257. return new_sheet
  258. def del_slice(self, name, column, row, new):
  259. new_sheet = self.get_sheet(name)
  260. column_list = new_sheet.columns.values
  261. for i in column:
  262. try:
  263. new_sheet = new_sheet.drop(column_list[int(i)], axis=1)
  264. except BaseException:
  265. pass
  266. row_list = new_sheet.index.values
  267. for i in row:
  268. try:
  269. new_sheet = new_sheet.drop(row_list[int(i)])
  270. except BaseException:
  271. pass
  272. if new:
  273. self.add_sheet(new_sheet, f"{name}:删减")
  274. return new_sheet
  275. def to_bool(self, name, exp, new=False):
  276. get = self.get_sheet(name)
  277. try:
  278. bool_sheet = eval(exp, {"S": get, "Sheet": get.iloc})
  279. if new:
  280. self.add_sheet(bool_sheet, f"{name}:布尔")
  281. return bool_sheet
  282. except BaseException:
  283. return None
  284. def is_nan(self, name):
  285. get = self.get_sheet(name)
  286. bool_nan = pd.isna(get)
  287. return bool_nan
  288. def del_nan(self, name, new):
  289. get = self.get_sheet(name)
  290. clean_sheet = get.dropna(axis=0)
  291. if new:
  292. self.add_sheet(clean_sheet, f"{name}:清洗")
  293. return clean_sheet
  294. def add_clean_func(self, code):
  295. name = self.Name.copy()
  296. try:
  297. exec(code, name)
  298. except BaseException:
  299. return False
  300. func_dict = {}
  301. func_dict["Done_Row"] = name.get("Done_Row", [])
  302. func_dict["Done_Column"] = name.get("Done_Column", [])
  303. func_dict["axis"] = name.get("axis", True)
  304. func_dict["check"] = name.get("check", lambda data, x, b, c, d, e: True)
  305. func_dict["done"] = name.get("done", lambda data, x, b, c, d, e: data)
  306. title = (
  307. f"[{name.get('name', f'[{len(self.clean_func)}')}] Done_Row={func_dict['Done_Row']}_Done_Column="
  308. f"{func_dict['Done_Column']}_axis={func_dict['axis']}"
  309. )
  310. self.clean_func[title] = func_dict
  311. self.clean_func_code[title] = code
  312. def get_clean_func(self):
  313. return list(self.clean_func.keys())
  314. def del_clean_func(self, key):
  315. try:
  316. del self.clean_func[key]
  317. del self.clean_func_code[key]
  318. except BaseException:
  319. pass
  320. def del_all_clean_func(self):
  321. self.clean_func = {}
  322. self.clean_func_code = {}
  323. def get_clean_code(self, key):
  324. return self.clean_func_code[key]
  325. def data_clean(self, name):
  326. get = self.get_sheet(name).copy()
  327. for i in list(self.clean_func.values()):
  328. done_row = i["Done_Row"]
  329. done_column = i["Done_Column"]
  330. if done_row == []:
  331. done_row = range(get.shape[0]) # shape=[行,列]#不需要回调
  332. if done_column == []:
  333. done_column = range(get.shape[1]) # shape=[行,列]#不需要回调
  334. if i["axis"]:
  335. axis = 0
  336. else:
  337. axis = 1
  338. check = i["check"]
  339. done = i["done"]
  340. for row in done_row:
  341. for column in done_column:
  342. try:
  343. data = eval(
  344. f"get.iloc[{row},{column}]", {"get": get}
  345. ) # 第一个是行号,然后是列号
  346. column_data = eval(f"get.iloc[{row}]", {"get": get})
  347. row_data = eval(f"get.iloc[:,{column}]", {"get": get})
  348. if not check(
  349. data,
  350. row,
  351. column,
  352. get.copy(),
  353. column_data.copy(),
  354. row_data.copy(),
  355. ):
  356. d = done(
  357. data,
  358. row,
  359. column,
  360. get.copy(),
  361. column_data.copy(),
  362. row_data.copy(),
  363. )
  364. if d == self.DEL:
  365. if axis == 0: # 常规删除
  366. row_list = get.index.values
  367. get = get.drop(row_list[int(row)])
  368. else: # 常规删除
  369. columns_list = get.columns.values
  370. get = get.drop(columns_list[int(row)], axis=1)
  371. else:
  372. # 第一个是行名,然后是列名
  373. exec(f"get.iloc[{row},{column}] = {d}", {"get": get})
  374. except BaseException:
  375. pass
  376. self.add_sheet(get, f"{name}:清洗")
  377. return get
  378. def set_dtype(self, name, column, dtype, wrong):
  379. get = self.get_sheet(name).copy()
  380. for i in range(len(column)):
  381. try:
  382. column[i] = int(column[i])
  383. except BaseException:
  384. pass
  385. if dtype != "":
  386. func_dic = {
  387. "Num": pd.to_numeric,
  388. "Date": pd.to_datetime,
  389. "Time": pd.to_timedelta,
  390. }
  391. if column != []:
  392. get.iloc[:, column] = get.iloc[:, column].apply(
  393. func_dic.get(dtype, pd.to_numeric), errors=wrong
  394. )
  395. else:
  396. get = get.apply(func_dic.get(dtype, pd.to_numeric), errors=wrong)
  397. else:
  398. if column != []:
  399. get.iloc[:, column] = get.iloc[:, column].infer_objects()
  400. else:
  401. get = get.infer_objects()
  402. self.add_sheet(get, f"{name}")
  403. return get
  404. def as_dtype(self, name, column, dtype, wrong):
  405. get = self.get_sheet(name).copy()
  406. for i in range(len(column)):
  407. try:
  408. column[i] = int(column[i])
  409. except BaseException:
  410. pass
  411. func_dic = {
  412. "Int": int,
  413. "Float": float,
  414. "Str": str,
  415. "Date": pd.Timestamp,
  416. "TimeDelta": pd.Timedelta,
  417. }
  418. if column != []:
  419. get.iloc[:, column] = get.iloc[:, column].astype(
  420. func_dic.get(dtype, dtype), errors=wrong
  421. )
  422. else:
  423. get = get.astype(func_dic.get(dtype, dtype), errors=wrong)
  424. self.add_sheet(get, f"{name}")
  425. return get
  426. def replace_index(self, name, is_column, rename, save):
  427. get = self.get_sheet(name)
  428. if is_column:
  429. if save: # 保存原数据
  430. get.loc["column"] = self.get_column(name, True)
  431. new = get.rename(columns=rename)
  432. else:
  433. if save:
  434. get.loc[:, "row"] = self.get_index(name, True)
  435. new = get.rename(index=rename)
  436. self.add_sheet(new, f"{name}")
  437. return new
  438. def change_index(
  439. self,
  440. name: str,
  441. is_column: bool,
  442. iloc: int,
  443. save: bool = True,
  444. drop: bool = False,
  445. ):
  446. get = self.get_sheet(name).copy()
  447. if is_column: # 列名
  448. row = self.get_index(name, True) # 行数据
  449. t = row.tolist()[iloc]
  450. if save: # 保存原数据
  451. get.loc["column"] = self.get_column(name, True)
  452. # new_colums = get.loc[t].values
  453. get.columns = get.loc[t].values
  454. if drop:
  455. get.drop(t, axis=0, inplace=True) # 删除行
  456. else:
  457. column = self.get_column(name, True)
  458. t = column.tolist()[iloc]
  459. if save:
  460. get.loc[:, "row"] = self.get_index(name, True)
  461. get.index = get.loc[:, t].values # 调整
  462. if drop:
  463. get.drop(t, axis=1, inplace=True) # 删除行
  464. self.add_sheet(get, f"{name}")
  465. return get
  466. def number_naming(self, name, is_column, save):
  467. get = self.get_sheet(name).copy()
  468. if is_column: # 处理列名
  469. column = self.get_column(name, True)
  470. if save: # 保存原数据
  471. get.loc["column"] = column
  472. get.columns = [i for i in range(len(column))]
  473. else:
  474. row = self.get_index(name, True)
  475. if save:
  476. get.loc[:, "row"] = row
  477. get.index = [i for i in range(len(row))]
  478. self.add_sheet(get, f"{name}")
  479. return get
  480. def name_with_number(self, name, is_column, save):
  481. get = self.get_sheet(name).copy()
  482. if is_column: # 处理列名
  483. column = self.get_column(name, True)
  484. if save: # 保存原数据
  485. get.loc["column"] = column
  486. get.columns = [f"[{i}]{column[i]}" for i in range(len(column))]
  487. else:
  488. row = self.get_index(name, True)
  489. if save:
  490. get.loc[:, "row"] = row
  491. get.index = [f"[{i}]{row[i]}" for i in range(len(row))]
  492. self.add_sheet(get, f"{name}")
  493. return get
  494. def data_naming(self, name, is_column, save, **data_init):
  495. # Date_Init:start,end,freq 任意两样
  496. get = self.get_sheet(name)
  497. if is_column: # 处理列名
  498. column = self.get_column(name, True)
  499. if save: # 保存原数据
  500. get.loc["column"] = column
  501. data_init["periods"] = len(column)
  502. get.columns = pd.date_range(**data_init)
  503. else:
  504. row = self.get_index(name, True)
  505. if save:
  506. get.loc[:, "row"] = row
  507. data_init["periods"] = len(row)
  508. get.index = pd.date_range(**data_init)
  509. self.add_sheet(get, f"{name}")
  510. return get
  511. def time_naming(self, name, is_column, save, **time_init):
  512. # Date_Init:start,end,freq 任意两样
  513. get = self.get_sheet(name)
  514. if is_column: # 处理列名
  515. column = self.get_column(name, True)
  516. if save: # 保存原数据
  517. get.loc["column"] = column
  518. time_init["periods"] = len(column)
  519. get.columns = pd.timedelta_range(**time_init)
  520. else:
  521. row = self.get_index(name, True)
  522. if save:
  523. get.loc[:, "row"] = row
  524. time_init["periods"] = len(row)
  525. get.index = pd.timedelta_range(**time_init)
  526. self.add_sheet(get, f"{name}")
  527. return get
  528. def sample(self, name, new):
  529. get = self.get_sheet(name)
  530. sample = get.sample(frac=1) # 返回比,默认按行打乱
  531. if new:
  532. self.add_sheet(sample, f"{name}:打乱")
  533. return sample
  534. def to_csv(self, name, save_dir, sep=","):
  535. if sep == "":
  536. sep = ","
  537. get = self.get_sheet(name)
  538. get.to_csv(save_dir, sep=sep, na_rep="")
  539. class Draw(Form):
  540. # 1)图例位置、朝向和是否显示
  541. # 2)视觉映射是否开启、是否有最大值和最小值、两端文本以及颜色、分段和朝向、size或color
  542. # 3)自动设置图标ID,标题
  543. # 4)工具箱显示
  544. # 5)title配置
  545. # 6)是否显示刻度线、数轴类型、分割线
  546. def parsing_parameters(self, text): # 解析文本参数
  547. args = {} # 解析到的参数
  548. exec(text, args)
  549. args_use = {} # 真实的参数
  550. # 标题设置,global
  551. args_use["title"] = args.get("title", None)
  552. args_use["vice_title"] = args.get("vice_title", "CoTan~数据处理:")
  553. # 图例设置global
  554. args_use["show_Legend"] = bool(args.get("show_Legend", True)) # 是否显示图例
  555. args_use["ori_Legend"] = args.get("ori_Legend", "horizontal") # 朝向
  556. # 视觉映射设置global
  557. args_use["show_Visual_mapping"] = bool(
  558. args.get("show_Visual_mapping", True)
  559. ) # 是否显示视觉映射
  560. args_use["is_color_Visual_mapping"] = bool(
  561. args.get("is_color_Visual_mapping", True)
  562. ) # 颜色 or 大小
  563. args_use["min_Visual_mapping"] = args.get(
  564. "min_Visual_mapping", None
  565. ) # 最小值(None表示现场计算)
  566. args_use["max_Visual_mapping"] = args.get(
  567. "max_Visual_mapping", None
  568. ) # 最大值(None表示现场计算)
  569. args_use["color_Visual_mapping"] = args.get(
  570. "color_Visual_mapping", None
  571. ) # 颜色列表
  572. args_use["size_Visual_mapping"] = args.get("size_Visual_mapping", None) # 大小列表
  573. args_use["text_Visual_mapping"] = args.get("text_Visual_mapping", None) # 文字
  574. args_use["is_Subsection"] = bool(args.get("is_Subsection", False)) # 分段类型
  575. args_use["Subsection_list"] = args.get("Subsection_list", []) # 分段列表
  576. args_use["ori_Visual"] = args.get("ori_Visual", "vertical") # 朝向
  577. # 工具箱设置global
  578. args_use["Tool_BOX"] = bool(args.get("Tool_BOX", True)) # 开启工具箱
  579. # Init设置global
  580. args_use["Theme"] = args.get("Theme", "white") # 设置style
  581. args_use["BG_Color"] = args.get("BG_Color", None) # 设置背景颜色
  582. args_use["width"] = args.get("width", "900px") # 设置宽度
  583. args_use["heigh"] = (
  584. args.get("heigh", "500px")
  585. if not bool(args.get("Square", False))
  586. else args.get("width", "900px")
  587. ) # 设置高度
  588. args_use["page_Title"] = args.get("page_Title", "") # 设置HTML标题
  589. args_use["show_Animation"] = args.get("show_Animation", True) # 设置HTML标题
  590. # 坐标轴设置,2D坐标图和3D坐标图
  591. args_use["show_Axis"] = bool(args.get("show_Axis", True)) # 显示坐标轴
  592. args_use["Axis_Zero"] = bool(args.get("Axis_Zero", False)) # 重叠于原点
  593. args_use["show_Axis_Scale"] = bool(args.get("show_Axis_Scale", True)) # 显示刻度
  594. args_use["x_type"] = args.get("x_type", None) # 坐标轴类型
  595. args_use["y_type"] = args.get("y_type", None)
  596. args_use["z_type"] = args.get("z_type", None)
  597. # Mark设置 坐标图专属
  598. args_use["make_Line"] = args.get("make_Line", []) # 设置直线
  599. # Datazoom设置 坐标图专属
  600. args_use["Datazoom"] = args.get("Datazoom", "N") # 设置Datazoom
  601. # 显示文字设置
  602. args_use["show_Text"] = bool(args.get("show_Text", False)) # 显示文字
  603. # 统一化的设置
  604. args_use["Size"] = args.get("Size", 10) # Size
  605. args_use["Symbol"] = args.get("Symbol", "circle") # 散点样式
  606. # Bar设置
  607. args_use["bar_Stacking"] = bool(args.get("bar_Stacking", False)) # 堆叠(2D和3D)
  608. # 散点图设置
  609. args_use["EffectScatter"] = bool(
  610. args.get("EffectScatter", False)
  611. ) # 开启特效(2D和3D)
  612. # 折线图设置
  613. args_use["connect_None"] = bool(args.get("connect_None", False)) # 连接None
  614. args_use["Smooth_Line"] = bool(args.get("Smooth_Line", False)) # 平滑曲线
  615. args_use["Area_chart"] = bool(args.get("Area_chart", False)) # 面积图
  616. args_use["paste_Y"] = bool(args.get("paste_Y", False)) # 紧贴Y轴
  617. args_use["step_Line"] = bool(args.get("step_Line", False)) # 阶梯式图
  618. args_use["size_PictorialBar"] = args.get("size_PictorialBar", None) # 象形柱状图大小
  619. args_use["Polar_units"] = args.get("Polar_units", "100") # 极坐标图单位制
  620. args_use["More"] = bool(args.get("More", False)) # 均绘制水球图、仪表图
  621. args_use["WordCould_Size"] = args.get("WordCould_Size", [20, 100]) # 开启特效
  622. args_use["WordCould_Shape"] = args.get("WordCould_Shape", "circle") # 开启特效
  623. args_use["symbol_Graph"] = args.get("symbol_Graph", "circle") # 关系点样式
  624. args_use["Repulsion"] = float(args.get("Repulsion", 8000)) # 斥力因子
  625. args_use["Area_radar"] = bool(args.get("Area_radar", True)) # 雷达图面积
  626. args_use["HTML_Type"] = args.get("HTML_Type", 2) # 输出Page的类型
  627. args_use["Map"] = args.get("Map", "china") # 输出Page的面积
  628. args_use["show_Map_Symbol"] = bool(
  629. args.get("show_Map_Symbol", False)
  630. ) # 输出Page的面积
  631. args_use["Geo_Type"] = {
  632. "heatmap": GeoType.HEATMAP,
  633. "scatter": "scatter",
  634. "EFFECT": GeoType.EFFECT_SCATTER,
  635. }.get(
  636. args.get("Geo_Type", "heatmap"), GeoType.HEATMAP
  637. ) # 输出Page的面积
  638. args_use["map_Type"] = args.get("map_Type", "2D") # 输出Page的面积
  639. args_use["is_Dark"] = bool(args.get("is_Dark", False)) # 输出Page的面积
  640. return args_use
  641. # 全局设定,返回一个全局设定的字典,解包即可使用
  642. def global_set(
  643. self, args_use, title, min_, max_, data_zoom=False, visual_mapping=True, axis=()
  644. ):
  645. k = {}
  646. # 标题设置
  647. if args_use["title"] is None:
  648. args_use["title"] = title
  649. k["title_opts"] = opts.TitleOpts(
  650. title=args_use["title"], subtitle=args_use["vice_title"]
  651. )
  652. # 图例设置
  653. if not args_use["show_Legend"]:
  654. k["legend_opts"] = opts.LegendOpts(is_show=False)
  655. else:
  656. k["legend_opts"] = opts.LegendOpts(
  657. type_="scroll", orient=args_use["ori_Legend"], pos_bottom="2%"
  658. ) # 移动到底部,避免和标题冲突
  659. # 视觉映射
  660. if not args_use["show_Visual_mapping"]:
  661. pass
  662. elif not visual_mapping:
  663. pass
  664. else:
  665. if args_use["min_Visual_mapping"] is not None:
  666. min_ = args_use["min_Visual_mapping"]
  667. if args_use["max_Visual_mapping"] is not None:
  668. max_ = args_use["max_Visual_mapping"]
  669. k["visualmap_opts"] = opts.VisualMapOpts(
  670. type_="color" if args_use["is_color_Visual_mapping"] else "size",
  671. max_=max_,
  672. min_=min_,
  673. range_color=args_use["color_Visual_mapping"],
  674. range_size=args_use["size_Visual_mapping"],
  675. range_text=args_use["text_Visual_mapping"],
  676. is_piecewise=args_use["is_Subsection"],
  677. pieces=args_use["Subsection_list"],
  678. orient=args_use["ori_Visual"],
  679. )
  680. k["toolbox_opts"] = opts.ToolboxOpts(is_show=args_use["Tool_BOX"])
  681. if data_zoom:
  682. if args_use["Datazoom"] == "all":
  683. k["datazoom_opts"] = [
  684. opts.DataZoomOpts(),
  685. opts.DataZoomOpts(orient="horizontal"),
  686. ]
  687. elif args_use["Datazoom"] == "horizontal":
  688. k["datazoom_opts"] = opts.DataZoomOpts(type_="inside")
  689. elif args_use["Datazoom"] == "vertical":
  690. opts.DataZoomOpts(orient="vertical")
  691. elif args_use["Datazoom"] == "inside_vertical":
  692. opts.DataZoomOpts(type_="inside", orient="vertical")
  693. elif args_use["Datazoom"] == "inside_vertical":
  694. opts.DataZoomOpts(type_="inside", orient="horizontal")
  695. # 坐标轴设定,输入设定的坐标轴即可
  696. def axis_seeting(args_use, axis="x"):
  697. axis_k = {}
  698. if args_use[f"{axis[0]}_type"] == "Display" or not args_use["show_Axis"]:
  699. axis_k[f"{axis[0]}axis_opts"] = opts.AxisOpts(is_show=False)
  700. else:
  701. axis_k[f"{axis[0]}axis_opts"] = opts.AxisOpts(
  702. type_=args_use[f"{axis[0]}_type"],
  703. axisline_opts=opts.AxisLineOpts(is_on_zero=args_use["Axis_Zero"]),
  704. axistick_opts=opts.AxisTickOpts(
  705. is_show=args_use["show_Axis_Scale"]
  706. ),
  707. )
  708. return axis_k
  709. for i in axis:
  710. k.update(axis_seeting(args_use, i))
  711. return k
  712. # 初始化设定
  713. def init_setting(self, args_use):
  714. k = {}
  715. # 设置标题
  716. if args_use["page_Title"] == "":
  717. title = "CoTan_数据处理"
  718. else:
  719. title = f"CoTan_数据处理:{args_use['page_Title']}"
  720. k["init_opts"] = opts.InitOpts(
  721. theme=args_use["Theme"],
  722. bg_color=args_use["BG_Color"],
  723. width=args_use["width"],
  724. height=args_use["heigh"],
  725. page_title=title,
  726. animation_opts=opts.AnimationOpts(animation=args_use["show_Animation"]),
  727. )
  728. return k
  729. # 获取title专用
  730. def get_title(self, args_use):
  731. return f":{args_use['title']}"
  732. # 标记符,包含线标记、点
  733. def mark(self, args_use):
  734. k = {}
  735. line = []
  736. for i in args_use["make_Line"]:
  737. try:
  738. if i[2] == "c" or i[0] in ("min", "max", "average"):
  739. line.append(opts.MarkLineItem(type_=i[0], name=i[1]))
  740. elif i[2] == "x":
  741. line.append(opts.MarkLineItem(x=i[0], name=i[1]))
  742. else:
  743. raise Exception
  744. except BaseException:
  745. line.append(opts.MarkLineItem(y=i[0], name=i[1]))
  746. if line == []:
  747. return k
  748. k["markline_opts"] = opts.MarkLineOpts(data=line)
  749. return k
  750. # 标签设定,可以放在系列设置中或者坐标轴y轴设置中
  751. def yaxis_label(self, args_use, position="inside"):
  752. return {
  753. "label_opts": opts.LabelOpts(
  754. is_show=args_use["show_Text"], position=position
  755. )
  756. }
  757. # 放在不同的图~.add中的设定
  758. def special_setting(self, args_use, type_): # 私人设定
  759. k = {}
  760. if type_ == "Bar": # 设置y的重叠
  761. if args_use["bar_Stacking"]:
  762. k = {"stack": "stack1"}
  763. elif type_ == "Scatter":
  764. k["Beautiful"] = args_use["EffectScatter"]
  765. k["symbol"] = args_use["Symbol"]
  766. k["symbol_size"] = args_use["Size"]
  767. elif type_ == "Line":
  768. k["is_connect_nones"] = args_use["connect_None"]
  769. # 平滑曲线或连接y轴
  770. k["is_smooth"] = (
  771. True if args_use["Smooth_Line"] or args_use["paste_Y"] else False
  772. )
  773. k["areastyle_opts"] = opts.AreaStyleOpts(
  774. opacity=0.5 if args_use["Area_chart"] else 0
  775. )
  776. if args_use["step_Line"]:
  777. del k["is_smooth"]
  778. k["is_step"] = True
  779. elif type_ == "PictorialBar":
  780. k["symbol_size"] = args_use["Size"]
  781. elif type_ == "Polar":
  782. return args_use["Polar_units"] # 回复的是单位制而不是设定
  783. elif type_ == "WordCloud":
  784. k["word_size_range"] = args_use["WordCould_Size"] # 放到x轴
  785. k["shape"] = args_use["Symbol"] # 放到x轴
  786. elif type_ == "Graph":
  787. k["symbol_Graph"] = args_use["Symbol"] # 放到x轴
  788. elif type_ == "Radar": # 雷达图
  789. k["areastyle_opts"] = opts.AreaStyleOpts(
  790. opacity=0.1 if args_use["Area_chart"] else 0
  791. )
  792. k["symbol"] = args_use["Symbol"] # 雷达图symbol
  793. return k
  794. def custom_graph(self, text):
  795. named_domain = {}
  796. named_domain.update(locals())
  797. named_domain.update(globals())
  798. exec(text, named_domain)
  799. exec("c = Page()", named_domain)
  800. self.all_render[f"自定义图[{len(self.all_render)}]"] = named_domain["c"]
  801. return named_domain["c"]
  802. def get_all_render(self):
  803. return self.all_render.copy()
  804. def del_render(self, key):
  805. del self.all_render[key]
  806. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  807. def to_bar(self, name, text) -> Bar: # Bar:数据堆叠
  808. get = self.get_sheet(name)
  809. x = self.get_index(name, True).tolist()
  810. args = self.parsing_parameters(text)
  811. c = Bar(**self.init_setting(args)).add_xaxis(
  812. list(map(str, list(set(x))))
  813. ) # 转变为str类型
  814. y = []
  815. for i in get.iteritems(): # 按列迭代
  816. q = i[1].tolist() # 转换为列表
  817. try:
  818. c.add_yaxis(
  819. f"{name}_{i[0]}",
  820. q,
  821. **self.special_setting(args, "Bar"),
  822. **self.yaxis_label(args),
  823. color=self.get_random_color(),
  824. ) # i[0]是名字,i是tuple,其中i[1]是data
  825. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  826. y += list(map(int, q))
  827. except BaseException:
  828. pass
  829. if y == []:
  830. args["show_Visual_mapping"] = False # 关闭视觉映射
  831. y = [0, 100]
  832. c.set_global_opts(
  833. **self.global_set(args, f"{name}柱状图", min(y), max(y), True, axis=["x", "y"])
  834. )
  835. c.set_series_opts(**self.mark(args))
  836. self.all_render[f"{name}柱状图[{len(self.all_render)}]{self.get_title(args)}"] = c
  837. return c
  838. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  839. def to_line(self, name, text) -> Line: # 折线图:连接空数据、显示数值、平滑曲线、面积图以及紧贴Y轴
  840. get = self.get_sheet(name)
  841. x = self.get_index(name, True).tolist()
  842. args = self.parsing_parameters(text)
  843. c = Line(**self.init_setting(args)).add_xaxis(
  844. list(map(str, list(set(x))))
  845. ) # 转变为str类型
  846. y = []
  847. for i in get.iteritems(): # 按列迭代
  848. q = i[1].tolist() # 转换为列表
  849. try:
  850. c.add_yaxis(
  851. f"{name}_{i[0]}",
  852. q,
  853. **self.special_setting(args, "Line"),
  854. **self.yaxis_label(args),
  855. color=self.get_random_color(),
  856. ) # i[0]是名字,i是tuple,其中i[1]是data
  857. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  858. y += list(map(int, q))
  859. except BaseException:
  860. pass
  861. if y == []:
  862. args["show_Visual_mapping"] = False # 关闭视觉映射
  863. y = [0, 100]
  864. c.set_global_opts(
  865. **self.global_set(args, f"{name}折线图", min(y), max(y), True, axis=["x", "y"])
  866. )
  867. c.set_series_opts(**self.mark(args))
  868. self.all_render[f"{name}折线图[{len(self.all_render)}]{self.get_title(args)}"] = c
  869. return c
  870. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  871. def to_scatter(self, name, text) -> Scatter: # 散点图标记形状和大小、特效、标记线
  872. get = self.get_sheet(name)
  873. args = self.parsing_parameters(text)
  874. x = self.get_index(name, True).tolist()
  875. type_ = self.special_setting(args, "Scatter")
  876. if type_["Beautiful"]:
  877. func = EffectScatter
  878. else:
  879. func = Scatter
  880. del type_["Beautiful"]
  881. c = func(**self.init_setting(args)).add_xaxis(
  882. list(map(str, list(set(x))))
  883. ) # 转变为str类型
  884. y = []
  885. for i in get.iteritems(): # 按列迭代
  886. q = i[1].tolist() # 转换为列表
  887. try:
  888. c.add_yaxis(
  889. f"{name}_{i[0]}",
  890. q,
  891. **type_,
  892. **self.yaxis_label(args),
  893. color=self.get_random_color(),
  894. ) # i[0]是名字,i是tuple,其中i[1]是data
  895. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  896. y += list(map(int, q))
  897. except BaseException:
  898. pass
  899. if y == []:
  900. args["show_Visual_mapping"] = False # 关闭视觉映射
  901. y = [0, 100]
  902. c.set_global_opts(
  903. **self.global_set(args, f"{name}散点图", min(y), max(y), True, axis=["x", "y"])
  904. )
  905. c.set_series_opts(**self.mark(args))
  906. self.all_render[f"{name}散点图[{len(self.all_render)}]{self.get_title(args)}"] = c
  907. return c
  908. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  909. def to_pictorialbar(self, name, text) -> PictorialBar: # 象形柱状图:图形、剪裁图像、元素重复和间隔
  910. get = self.get_sheet(name)
  911. x = self.get_index(name, True).tolist()
  912. args = self.parsing_parameters(text)
  913. c = (
  914. PictorialBar(**self.init_setting(args))
  915. .add_xaxis(list(map(str, list(set(x))))) # 转变为str类型
  916. .reversal_axis()
  917. )
  918. y = []
  919. k = self.special_setting(args, "PictorialBar")
  920. for i in get.iteritems(): # 按列迭代
  921. q = i[1].tolist() # 转换为列表
  922. try:
  923. c.add_yaxis(
  924. f"{name}_{i[0]}",
  925. q,
  926. label_opts=opts.LabelOpts(is_show=False),
  927. symbol_repeat=True,
  928. is_symbol_clip=True,
  929. symbol=SymbolType.ROUND_RECT,
  930. **k,
  931. color=self.get_random_color(),
  932. )
  933. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  934. y += list(map(int, q))
  935. except BaseException:
  936. pass
  937. if y == []:
  938. args["show_Visual_mapping"] = False # 关闭视觉映射
  939. y = [0, 100]
  940. c.set_global_opts(
  941. **self.global_set(
  942. args, f"{name}象形柱状图", min(y), max(y), True, axis=["x", "y"]
  943. )
  944. )
  945. c.set_series_opts(**self.mark(args))
  946. self.all_render[f"{name}[{len(self.all_render)}]{self.get_title(args)}"] = c
  947. return c
  948. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  949. def to_boxpolt(self, name, text) -> Boxplot:
  950. get = self.get_sheet(name)
  951. args = self.parsing_parameters(text)
  952. c = Boxplot(**self.init_setting(args)).add_xaxis([f"{name}"])
  953. y = []
  954. for i in get.iteritems(): # 按列迭代
  955. q = i[1].tolist() # 转换为列表
  956. try:
  957. c.add_yaxis(f"{name}_{i[0]}", [q], **self.yaxis_label(args))
  958. # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  959. y += list(map(float, q))
  960. except BaseException:
  961. pass
  962. if y == []:
  963. args["show_Visual_mapping"] = False # 关闭视觉映射
  964. y = [0, 100]
  965. c.set_global_opts(
  966. **self.global_set(args, f"{name}箱形图", min(y), max(y), True, axis=["x", "y"])
  967. )
  968. c.set_series_opts(**self.mark(args))
  969. self.all_render[f"{name}箱形图[{len(self.all_render)}]{self.get_title(args)}"] = c
  970. return c
  971. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  972. def to_heatmap(self, name, text) -> HeatMap: # 显示数据
  973. get = self.get_sheet(name)
  974. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  975. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  976. value_list = []
  977. q = []
  978. for c in range(len(x)): # c-列,r-行
  979. for r in range(len(y)):
  980. try:
  981. v = float(eval(f"get.iloc[{r},{c}]", {"get": get})) # 先行后列
  982. except BaseException:
  983. continue
  984. q.append(v)
  985. value_list.append([c, r, v])
  986. args = self.parsing_parameters(text)
  987. try:
  988. max_, min_ = max(q), min(q)
  989. except BaseException:
  990. args["show_Visual_mapping"] = False # 关闭视觉映射
  991. max_, min_ = 0, 100
  992. c = (
  993. HeatMap(**self.init_setting(args))
  994. .add_xaxis(list(map(str, list(set(x))))) # 转变为str类型
  995. .add_yaxis(
  996. f"{name}", list(map(str, y)), value_list, **self.yaxis_label(args)
  997. )
  998. .set_global_opts(
  999. **self.global_set(args, f"{name}热力图", min_, max_, True, axis=["x", "y"])
  1000. )
  1001. .set_series_opts(**self.mark(args))
  1002. )
  1003. self.all_render[f"{name}热力图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1004. return c
  1005. # 数据哪部全,要设置More
  1006. def to_funnel(self, name, text) -> Funnel:
  1007. get = self.get_sheet(name)
  1008. y_name = self.get_index(name, True).tolist() # 拿行名
  1009. x = self.get_column(name, True).tolist()[0]
  1010. value = []
  1011. y = []
  1012. for r in range(len(y_name)):
  1013. try:
  1014. v = float(eval(f"get.iloc[{r},0]", {"get": get}))
  1015. except BaseException:
  1016. continue
  1017. value.append([f"{y_name[r]}", v])
  1018. y.append(v)
  1019. args = self.parsing_parameters(text)
  1020. c = (
  1021. Funnel(**self.init_setting(args))
  1022. .add(f"{name}", value, **self.yaxis_label(args, "top"))
  1023. .set_global_opts(
  1024. **self.global_set(args, f"{name}漏斗图", min(y), max(y), True, False)
  1025. )
  1026. )
  1027. self.all_render[f"{name}漏斗图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1028. return c
  1029. def to_format_graph(self, name, text) -> Graph:
  1030. get = self.get_sheet(name)
  1031. y_name = self.get_index(name, True).tolist() # 拿行名
  1032. nodes = []
  1033. link = []
  1034. for i in get.iterrows(): # 按行迭代
  1035. q = i[1].tolist() # 转换为列表
  1036. try:
  1037. nodes.append(
  1038. {"name": f"{i[0]}", "symbolSize": float(q[0]), "value": float(q[0])}
  1039. )
  1040. for a in q[1:]:
  1041. n = str(a).split(":")
  1042. try:
  1043. link.append(
  1044. {"source": f"{i[0]}", "target": n[0], "value": float(n[1])}
  1045. )
  1046. except BaseException:
  1047. pass
  1048. except BaseException:
  1049. pass
  1050. if link == []:
  1051. for i in nodes:
  1052. for j in nodes:
  1053. link.append(
  1054. {
  1055. "source": i.get("name"),
  1056. "target": j.get("name"),
  1057. "value": abs(i.get("value") - j.get("value")),
  1058. }
  1059. )
  1060. args = self.parsing_parameters(text)
  1061. c = (
  1062. Graph(**self.init_setting(args))
  1063. .add(
  1064. f"{y_name[0]}",
  1065. nodes,
  1066. link,
  1067. repulsion=args["Repulsion"],
  1068. **self.yaxis_label(args),
  1069. )
  1070. .set_global_opts(
  1071. **self.global_set(args, f"{name}关系图", 0, 100, False, False)
  1072. )
  1073. )
  1074. self.all_render[f"{name}关系图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1075. return c
  1076. def to_graph(self, name, text) -> Graph: # XY关系图,新的书写方式
  1077. get = self.get_sheet(name)
  1078. args = self.parsing_parameters(text)
  1079. size = args["Size"] * 3
  1080. # 生成节点信息
  1081. y_name = self.get_index(name, True).tolist() # 拿行名
  1082. x_name = self.get_column(name, True).tolist() # 拿列名
  1083. nodes_list = list(set(y_name + x_name)) # 处理重复,作为nodes列表
  1084. nodes = []
  1085. for i in nodes_list:
  1086. nodes.append({"name": f"{i}", "symbolSize": size})
  1087. # 生成link信息
  1088. link = [] # 记录连接的信息
  1089. have = []
  1090. for y in range(len(y_name)): # 按行迭代
  1091. for x in range(len(x_name)):
  1092. y_n = y_name[y] # 节点1
  1093. x_n = x_name[x] # 节点2
  1094. if y_n == x_n:
  1095. continue
  1096. if (y_n, x_n) in have or (x_n, y_n) in have:
  1097. continue
  1098. else:
  1099. have.append((y_n, x_n))
  1100. try:
  1101. v = float(eval(f"get.iloc[{y},{x}]", {"get": get})) # 取得value
  1102. link.append({"source": y_n, "target": x_n, "value": v})
  1103. except BaseException:
  1104. pass
  1105. c = (
  1106. Graph(**self.init_setting(args))
  1107. .add(
  1108. f"{y_name[0]}",
  1109. nodes,
  1110. link,
  1111. repulsion=args["Repulsion"],
  1112. **self.yaxis_label(args),
  1113. )
  1114. .set_global_opts(
  1115. **self.global_set(args, f"{name}关系图", 0, 100, False, False)
  1116. )
  1117. )
  1118. self.all_render[f"{name}关系图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1119. return c
  1120. def to_sankey(self, name, text):
  1121. get = self.get_sheet(name)
  1122. args = self.parsing_parameters(text)
  1123. # 生成节点信息
  1124. y_name = self.get_index(name, True).tolist() # 拿行名
  1125. x_name = self.get_column(name, True).tolist() # 拿列名
  1126. nodes_list = list(set(y_name + x_name)) # 处理重复,作为nodes列表
  1127. nodes = []
  1128. source = {}
  1129. target = {}
  1130. for i in nodes_list:
  1131. nodes.append({"name": f"{i}"})
  1132. source[i] = set() # 记录该元素source边连接的节点
  1133. target[i] = set() # 记录改元素target边连接的节点
  1134. # 生成link信息
  1135. link = [] # 记录连接的信息
  1136. have = []
  1137. for y in range(len(y_name)): # 按行迭代
  1138. for x in range(len(x_name)):
  1139. y_n = y_name[y] # 节点1
  1140. x_n = x_name[x] # 节点2
  1141. if y_n == x_n:
  1142. continue # 是否相同
  1143. if (y_n, x_n) in have or (x_n, y_n) in have:
  1144. continue # 是否重复
  1145. else:
  1146. have.append((y_n, x_n))
  1147. # 固定的,y在s而x在t,桑基图不可以绕环形,所以要做检查
  1148. if source[y_n] & target[x_n] != set():
  1149. continue
  1150. try:
  1151. v = float(eval(f"get.iloc[{y},{x}]", {"get": get})) # 取得value
  1152. link.append({"source": y_n, "target": x_n, "value": v})
  1153. target[y_n].add(x_n)
  1154. source[x_n].add(y_n)
  1155. except BaseException:
  1156. pass
  1157. c = (
  1158. Sankey()
  1159. .add(
  1160. f"{name}",
  1161. nodes,
  1162. link,
  1163. linestyle_opt=opts.LineStyleOpts(
  1164. opacity=0.2, curve=0.5, color="source"
  1165. ),
  1166. label_opts=opts.LabelOpts(position="right"),
  1167. )
  1168. .set_global_opts(
  1169. **self.global_set(args, f"{name}桑基图", 0, 100, False, False)
  1170. )
  1171. )
  1172. self.all_render[f"{name}桑基图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1173. return c
  1174. def to_parallel(self, name, text) -> Parallel:
  1175. get = self.get_sheet(name)
  1176. dim = []
  1177. dim_list = self.get_index(name, True).tolist()
  1178. for i in range(len(dim_list)):
  1179. dim.append({"dim": i, "name": f"{dim_list[i]}"})
  1180. args = self.parsing_parameters(text)
  1181. c = (
  1182. Parallel(**self.init_setting(args))
  1183. .add_schema(dim)
  1184. .set_global_opts(
  1185. **self.global_set(args, f"{name}多轴图", 0, 100, False, False)
  1186. )
  1187. )
  1188. for i in get.iteritems(): # 按列迭代
  1189. q = i[1].tolist() # 转换为列表
  1190. c.add(f"{i[0]}", [q], **self.yaxis_label(args))
  1191. self.all_render[f"{name}多轴图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1192. return c
  1193. def to_pie(self, name, text) -> Pie:
  1194. get = self.get_sheet(name)
  1195. data = []
  1196. for i in get.iterrows(): # 按行迭代
  1197. try:
  1198. data.append([f"{i[0]}", float(i[1].tolist()[0])])
  1199. except BaseException:
  1200. pass
  1201. args = self.parsing_parameters(text)
  1202. c = (
  1203. Pie(**self.init_setting(args))
  1204. .add(f"{name}", data, **self.yaxis_label(args, "top"))
  1205. .set_global_opts(**self.global_set(args, f"{name}饼图", 0, 100, False, False))
  1206. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
  1207. )
  1208. self.all_render[f"{name}饼图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1209. return c
  1210. def to_polar(self, name, text) -> Polar:
  1211. get = self.get_sheet(name)
  1212. data = []
  1213. args = self.parsing_parameters(text)
  1214. setting = self.special_setting(args, "Polar")
  1215. if setting == "rad": # 弧度制
  1216. convert = 0.0628
  1217. elif setting == "360": # 角度制
  1218. convert = 0.36
  1219. else:
  1220. convert = 1
  1221. for i in get.iterrows(): # 按行迭代
  1222. try:
  1223. q = i[1].tolist()
  1224. data.append((float(q[0]), float(q[1]) / convert))
  1225. except BaseException:
  1226. pass
  1227. c = (
  1228. Polar(**self.init_setting(args))
  1229. .add(f"{name}", data, type_="scatter", **self.yaxis_label(args))
  1230. .set_global_opts(
  1231. **self.global_set(args, f"{name}极坐标图", 0, 100, False, False)
  1232. )
  1233. )
  1234. self.all_render[f"{name}极坐标图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1235. return c
  1236. def to_radar(self, name, text) -> Radar:
  1237. get = self.get_sheet(name)
  1238. x = self.get_index(name, True).tolist()
  1239. max_list = [[] for _ in range(len(x))] # 保存每个x栏目的最大值
  1240. data = [] # y的组成数据,包括name和list
  1241. x_list = [] # 保存x的数据
  1242. for i in get.iteritems(): # 按列迭代计算每一项的abcd
  1243. q = i[1].tolist()
  1244. add = []
  1245. for a in range(len(q)):
  1246. try:
  1247. f = float(q[a])
  1248. max_list[a].append(f)
  1249. add.append(f)
  1250. except BaseException:
  1251. pass
  1252. data.append([f"{i[0]}", [add]]) # add是包含在一个list中的
  1253. for i in range(len(max_list)): # 计算x_list
  1254. x_list.append(opts.RadarIndicatorItem(name=x[i], max_=max(max_list[i])))
  1255. args = self.parsing_parameters(text)
  1256. c = (
  1257. Radar(**self.init_setting(args))
  1258. .add_schema(schema=x_list)
  1259. .set_global_opts(
  1260. **self.global_set(args, f"{name}雷达图", 0, 100, False, False)
  1261. )
  1262. )
  1263. k = self.special_setting(args, "Radar")
  1264. for i in data:
  1265. c.add(
  1266. *i, **self.yaxis_label(args), color=self.get_random_color(), **k
  1267. ) # 对i解包,取得name和data 随机颜色
  1268. self.all_render[f"{name}雷达图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1269. return c
  1270. def get_random_color(self):
  1271. # 随机颜色,雷达图默认非随机颜色
  1272. rgb = [randint(0, 255), randint(0, 255), randint(0, 255)]
  1273. color = "#"
  1274. for a in rgb:
  1275. # 转换为16进制,upper表示小写(规范化)
  1276. color += str(hex(a))[-2:].replace("x", "0").upper()
  1277. return color
  1278. def to_word_cloud(self, name, text) -> WordCloud:
  1279. get = self.get_sheet(name)
  1280. data = []
  1281. for i in get.iterrows(): # 按行迭代
  1282. try:
  1283. data.append([str(i[0]), float(i[1].tolist()[0])])
  1284. except BaseException:
  1285. pass
  1286. args = self.parsing_parameters(text)
  1287. c = (
  1288. WordCloud(**self.init_setting(args))
  1289. .add(f"{name}", data, **self.special_setting(args, "WordCloud"))
  1290. .set_global_opts(**self.global_set(args, f"{name}词云", 0, 100, False, False))
  1291. )
  1292. self.all_render[f"{name}词云[{len(self.all_render)}]{self.get_title(args)}"] = c
  1293. return c
  1294. def to_liquid(self, name, text) -> Liquid:
  1295. get = self.get_sheet(name)
  1296. data = str(get.iloc[0, 0])
  1297. c = data.split(".")
  1298. try:
  1299. data = float(f"0.{c[1]}")
  1300. except BaseException:
  1301. data = float(f"0.{c[0]}")
  1302. args = self.parsing_parameters(text)
  1303. c = (
  1304. Liquid(**self.init_setting(args))
  1305. .add(f"{name}", [data, data])
  1306. .set_global_opts(
  1307. title_opts=opts.TitleOpts(title=f"{name}水球图", subtitle="CoTan~数据处理")
  1308. )
  1309. )
  1310. self.all_render[f"{name}水球图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1311. return c
  1312. def to_gauge(self, name, text) -> Gauge:
  1313. get = self.get_sheet(name)
  1314. data = float(get.iloc[0, 0])
  1315. if data > 100:
  1316. data = str(data / 100)
  1317. c = data.split(".")
  1318. try:
  1319. data = float(f"0.{c[1]}") * 100
  1320. except BaseException:
  1321. data = float(f"0.{data}") * 100
  1322. args = self.parsing_parameters(text)
  1323. c = (
  1324. Gauge(**self.init_setting(args))
  1325. .add(f"{name}", [(f"{name}", data)])
  1326. .set_global_opts(
  1327. title_opts=opts.TitleOpts(title=f"{name}仪表图", subtitle="CoTan~数据处理")
  1328. )
  1329. )
  1330. self.all_render[f"{name}仪表图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1331. return c
  1332. def to_calendar(self, name, text) -> Calendar:
  1333. get = self.get_sheet(name)
  1334. data = [[] for _ in self.get_column(name, True)]
  1335. x_name = self.get_column(name, True).tolist()
  1336. y = []
  1337. for i in get.iterrows():
  1338. date = str(i[0]) # 时间数据
  1339. q = i[1].tolist()
  1340. for a in range(len(q)):
  1341. try:
  1342. data[a].append([date, q[a]])
  1343. y.append(float(q[a]))
  1344. except BaseException:
  1345. pass
  1346. args = self.parsing_parameters(text)
  1347. if y == []:
  1348. y = [0, 100]
  1349. args["show_Visual_mapping"] = False # 关闭视觉映射
  1350. c = Calendar(**self.init_setting(args)).set_global_opts(
  1351. **self.global_set(args, f"{name}日历图", min(y), max(y), True)
  1352. )
  1353. for i in range(len(x_name)):
  1354. start_date = data[i][0][0]
  1355. end_date = data[i][-1][0]
  1356. c.add(
  1357. str(x_name[i]),
  1358. data[i],
  1359. calendar_opts=opts.CalendarOpts(range_=[start_date, end_date]),
  1360. **self.yaxis_label(args),
  1361. )
  1362. self.all_render[f"{name}日历图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1363. return c
  1364. def to_theme_river(self, name, text) -> ThemeRiver:
  1365. get = self.get_sheet(name)
  1366. data = []
  1367. x_name = self.get_column(name, True).tolist()
  1368. y = []
  1369. for i in get.iterrows():
  1370. date = str(i[0])
  1371. q = i[1].tolist()
  1372. for a in range(len(x_name)):
  1373. try:
  1374. data.append([date, q[a], x_name[a]])
  1375. y.append(float(q[a]))
  1376. except BaseException:
  1377. pass
  1378. args = self.parsing_parameters(text)
  1379. if y == []:
  1380. y = [0, 100]
  1381. args["show_Visual_mapping"] = False # 关闭视觉映射
  1382. c = (
  1383. ThemeRiver(**self.init_setting(args))
  1384. # 抑制大小
  1385. .add(
  1386. x_name,
  1387. data,
  1388. singleaxis_opts=opts.SingleAxisOpts(
  1389. type_=args["x_type"], pos_bottom="10%"
  1390. ),
  1391. ).set_global_opts(
  1392. **self.global_set(args, f"{name}河流图", min(y), max(y), True, False)
  1393. )
  1394. )
  1395. self.all_render[f"{name}河流图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1396. return c
  1397. def to_sunburst(self, name, text) -> Sunburst:
  1398. get = self.get_sheet(name)
  1399. def convert_data(iter_object, name):
  1400. k = {"name": name, "children": []}
  1401. v = 0
  1402. for i in iter_object:
  1403. content = iter_object[i]
  1404. if isinstance(content, dict):
  1405. new_c = convert_data(content, str(i))
  1406. v += new_c["value"]
  1407. k["children"].append(new_c)
  1408. else:
  1409. try:
  1410. q = float(content)
  1411. except BaseException:
  1412. q = len(str(content))
  1413. v += q
  1414. k["children"].append({"name": f"{i}={content}", "value": q})
  1415. k["value"] = v
  1416. return k
  1417. data = convert_data(get.to_dict(), name)["children"]
  1418. args = self.parsing_parameters(text)
  1419. c = (
  1420. Sunburst()
  1421. .add(
  1422. series_name=f"{name}",
  1423. data_pair=data,
  1424. radius=[abs(args["Size"] - 10), "90%"],
  1425. )
  1426. .set_global_opts(
  1427. **self.global_set(args, f"{name}旭日图", 0, 100, False, False)
  1428. )
  1429. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}"))
  1430. )
  1431. self.all_render[f"{name}旭日图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1432. return c
  1433. def to_tree(self, name, text) -> Tree:
  1434. get = self.get_sheet(name)
  1435. def convert_data(iter_object, name):
  1436. k = {"name": name, "children": []}
  1437. for i in iter_object:
  1438. content = iter_object[i]
  1439. if isinstance(content, dict):
  1440. new_children = convert_data(content, str(i))
  1441. k["children"].append(new_children)
  1442. else:
  1443. k["children"].append(
  1444. {"name": f"{i}", "children": [{"name": f"{content}"}]}
  1445. )
  1446. return k
  1447. data = [convert_data(get.to_dict(), name)]
  1448. args = self.parsing_parameters(text)
  1449. c = (
  1450. Tree()
  1451. .add(f"{name}", data)
  1452. .set_global_opts(
  1453. **self.global_set(args, f"{name}树状图", 0, 100, False, False)
  1454. )
  1455. )
  1456. self.all_render[f"{name}树状图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1457. return c
  1458. def to_tree_map(self, name, text) -> TreeMap:
  1459. get = self.get_sheet(name)
  1460. def convert_data(iter_object, name):
  1461. k = {"name": name, "children": []}
  1462. v = 0
  1463. for i in iter_object:
  1464. content = iter_object[i]
  1465. if isinstance(content, dict):
  1466. new_c = convert_data(content, str(i))
  1467. v += new_c["value"]
  1468. k["children"].append(new_c)
  1469. else:
  1470. try:
  1471. q = float(content)
  1472. except BaseException:
  1473. q = len(str(content))
  1474. v += q
  1475. k["children"].append({"name": f"{i}={content}", "value": q})
  1476. k["value"] = v
  1477. return k
  1478. data = convert_data(get.to_dict(), name)["children"]
  1479. args = self.parsing_parameters(text)
  1480. c = (
  1481. TreeMap()
  1482. .add(
  1483. f"{name}",
  1484. data,
  1485. label_opts=opts.LabelOpts(is_show=True, position="inside"),
  1486. )
  1487. .set_global_opts(
  1488. **self.global_set(args, f"{name}矩形树图", 0, 100, False, False)
  1489. )
  1490. )
  1491. self.all_render[f"{name}矩形树图[{len(self.all_render)}]{self.get_title(args)}"] = c
  1492. return c
  1493. def to_scattergeo(self, name, text) -> Geo:
  1494. get = self.get_sheet(name)
  1495. column = self.get_column(name, True).tolist()
  1496. data_type = ["scatter" for _ in column]
  1497. data = [[] for _ in column]
  1498. y = []
  1499. for i in get.iterrows(): # 按行迭代
  1500. map = str(i[0])
  1501. q = i[1].tolist()
  1502. for a in range(len(q)):
  1503. try:
  1504. v = float(q[a])
  1505. y.append(v)
  1506. except BaseException:
  1507. v = str(q[a])
  1508. try:
  1509. if v[:5] == "[##S]":
  1510. # 特效图
  1511. v = float(v[5:])
  1512. y.append(v)
  1513. column.append(column[a])
  1514. data_type.append(GeoType.EFFECT_SCATTER)
  1515. data.append([])
  1516. a = -1
  1517. elif v[:5] == "[##H]":
  1518. # 特效图
  1519. v = float(v[5:])
  1520. y.append(v)
  1521. column.append(column[a])
  1522. data_type.append(GeoType.HEATMAP)
  1523. data.append([])
  1524. a = -1
  1525. else:
  1526. raise Exception
  1527. except BaseException:
  1528. data_type[a] = GeoType.LINES # 当前变为Line
  1529. data[a].append((map, v))
  1530. args = self.parsing_parameters(text)
  1531. args["show_Visual_mapping"] = True # 必须视觉映射
  1532. if y == []:
  1533. y = [0, 100]
  1534. if args["is_Dark"]:
  1535. g = {
  1536. "itemstyle_opts": opts.ItemStyleOpts(
  1537. color="#323c48", border_color="#111"
  1538. )
  1539. }
  1540. else:
  1541. g = {}
  1542. c = (
  1543. Geo().add_schema(maptype=str(args["Map"]), **g)
  1544. # 必须要有视觉映射(否则会显示奇怪的数据)
  1545. .set_global_opts(
  1546. **self.global_set(args, f"{name}Geo点地图", min(y), max(y), False)
  1547. )
  1548. )
  1549. for i in range(len(data)):
  1550. if data_type[i] != GeoType.LINES:
  1551. ka = dict(
  1552. symbol=args["Symbol"],
  1553. symbol_size=args["Size"],
  1554. color="#1E90FF" if args["is_Dark"] else "#0000FF",
  1555. )
  1556. else:
  1557. ka = dict(
  1558. symbol=SymbolType.ARROW,
  1559. symbol_size=6,
  1560. effect_opts=opts.EffectOpts(
  1561. symbol=SymbolType.ARROW, symbol_size=6, color="blue"
  1562. ),
  1563. linestyle_opts=opts.LineStyleOpts(
  1564. curve=0.2, color="#FFF8DC" if args["is_Dark"] else "#000000"
  1565. ),
  1566. )
  1567. c.add(f"{column[i]}", data[i], type_=data_type[i], **ka)
  1568. c.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 不显示数据,必须放在add后面生效
  1569. self.all_render[
  1570. f"{name}Geo点地图[{len(self.all_render)}]{self.get_title(args)}"
  1571. ] = c
  1572. return c
  1573. def to_map(self, name, text) -> Map:
  1574. get = self.get_sheet(name)
  1575. column = self.get_column(name, True).tolist()
  1576. data = [[] for _ in column]
  1577. y = []
  1578. for i in get.iterrows(): # 按行迭代
  1579. map = str(i[0])
  1580. q = i[1].tolist()
  1581. for a in range(len(q)):
  1582. try:
  1583. v = float(q[a])
  1584. y.append(v)
  1585. data[a].append((map, v))
  1586. except BaseException:
  1587. pass
  1588. args = self.parsing_parameters(text)
  1589. args["show_Visual_mapping"] = True # 必须视觉映射
  1590. if y == []:
  1591. y = [0, 100]
  1592. if args["map_Type"] == "GLOBE":
  1593. func = MapGlobe
  1594. else:
  1595. func = Map
  1596. c = func().set_global_opts(
  1597. **self.global_set(args, f"{name}Map地图", min(y), max(y), False)
  1598. ) # 必须要有视觉映射(否则会显示奇怪的数据)
  1599. for i in range(len(data)):
  1600. c.add(
  1601. f"{column[i]}",
  1602. data[i],
  1603. str(args["Map"]),
  1604. is_map_symbol_show=args["show_Map_Symbol"],
  1605. symbol=args["Symbol"],
  1606. **self.yaxis_label(args),
  1607. )
  1608. self.all_render[
  1609. f"{name}Map地图[{len(self.all_render)}]{self.get_title(args)}"
  1610. ] = c
  1611. return c
  1612. def to_geo(self, name, text) -> Geo:
  1613. get = self.get_sheet(name)
  1614. column = self.get_column(name, True).tolist()
  1615. index = self.get_index(name, True).tolist()
  1616. args = self.parsing_parameters(text)
  1617. args["show_Visual_mapping"] = True # 必须视觉映射
  1618. if args["is_Dark"]:
  1619. g = {
  1620. "itemstyle_opts": opts.ItemStyleOpts(
  1621. color="#323c48", border_color="#111"
  1622. )
  1623. }
  1624. else:
  1625. g = {}
  1626. c = Geo().add_schema(maptype=str(args["Map"]), **g)
  1627. m = []
  1628. for y in column: # 维度
  1629. for x in index: # 精度
  1630. value = get.loc[x, y]
  1631. try:
  1632. v = float(value) # 数值
  1633. type_ = args["Geo_Type"]
  1634. except BaseException:
  1635. try:
  1636. q = str(value)
  1637. v = float(value[5:])
  1638. if q[:5] == "[##S]": # 点图
  1639. type_ = GeoType.SCATTER
  1640. elif q[:5] == "[##E]": # 带点特效
  1641. type_ = GeoType.EFFECT_SCATTER
  1642. else: # 画线
  1643. v = q.split(";")
  1644. c.add_coordinate(
  1645. name=f"({v[0]},{v[1]})",
  1646. longitude=float(v[0]),
  1647. latitude=float(v[1]),
  1648. )
  1649. c.add_coordinate(
  1650. name=f"({x},{y})", longitude=float(x), latitude=float(y)
  1651. )
  1652. c.add(
  1653. f"{name}",
  1654. [[f"({x},{y})", f"({v[0]},{v[1]})"]],
  1655. type_=GeoType.LINES,
  1656. effect_opts=opts.EffectOpts(
  1657. symbol=SymbolType.ARROW, symbol_size=6, color="blue"
  1658. ),
  1659. linestyle_opts=opts.LineStyleOpts(
  1660. curve=0.2,
  1661. color="#FFF8DC" if args["is_Dark"] else "#000000",
  1662. ),
  1663. )
  1664. c.add(
  1665. f"{name}_XY",
  1666. [[f"({x},{y})", 5], [f"({v[0]},{v[1]})", 5]],
  1667. type_=GeoType.EFFECT_SCATTER,
  1668. color="#1E90FF" if args["is_Dark"] else "#0000FF",
  1669. )
  1670. raise Exception # continue
  1671. except BaseException:
  1672. continue
  1673. try:
  1674. c.add_coordinate(
  1675. name=f"({x},{y})", longitude=float(x), latitude=float(y)
  1676. )
  1677. c.add(
  1678. f"{name}",
  1679. [[f"({x},{y})", v]],
  1680. type_=type_,
  1681. symbol=args["Symbol"],
  1682. symbol_size=args["Size"],
  1683. )
  1684. if type_ == GeoType.HEATMAP:
  1685. c.add(
  1686. f"{name}_XY",
  1687. [[f"({x},{y})", v]],
  1688. type_="scatter",
  1689. color="#1E90FF" if args["is_Dark"] else "#0000FF",
  1690. )
  1691. m.append(v)
  1692. except BaseException:
  1693. pass
  1694. if m == []:
  1695. m = [0, 100]
  1696. c.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 不显示
  1697. c.set_global_opts(
  1698. **self.global_set(args, f"{name}Geo地图", min(m), max(m), False)
  1699. )
  1700. self.all_render[
  1701. f"{name}Geo地图[{len(self.all_render)}]{self.get_title(args)}"
  1702. ] = c
  1703. return c
  1704. def to_bar3d(self, name, text) -> Bar3D:
  1705. get = self.get_sheet(name)
  1706. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1707. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1708. value_list = []
  1709. q = []
  1710. for c in range(len(x)): # c-列,r-行
  1711. for r in range(len(y)):
  1712. try:
  1713. v = eval(f"get.iloc[{r},{c}]", {"get": get}) # 先行后列
  1714. value_list.append([c, r, v])
  1715. q.append(float(v))
  1716. except BaseException:
  1717. pass
  1718. args = self.parsing_parameters(text)
  1719. if q == []:
  1720. q = [0, 100]
  1721. args["show_Visual_mapping"] = False # 关闭视觉映射
  1722. c = (
  1723. Bar3D(**self.init_setting(args))
  1724. .add(
  1725. f"{name}",
  1726. value_list,
  1727. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1728. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1729. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1730. )
  1731. .set_global_opts(
  1732. **self.global_set(args, f"{name}3D柱状图", min(q), max(q), True),
  1733. )
  1734. )
  1735. if args["bar_Stacking"]:
  1736. c.set_series_opts(**{"stack": "stack"}) # 层叠
  1737. self.all_render[
  1738. f"{name}3D柱状图[{len(self.all_render)}]{self.get_title(args)}"
  1739. ] = c
  1740. return c
  1741. def to_scatter3d(self, name, text) -> Scatter3D:
  1742. get = self.get_sheet(name)
  1743. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1744. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1745. value_list = []
  1746. q = []
  1747. for c in range(len(x)): # c-列,r-行
  1748. for r in range(len(y)):
  1749. try:
  1750. v = eval(f"get.iloc[{r},{c}]", {"get": get}) # 先行后列
  1751. value_list.append([c, r, v])
  1752. q.append(float(v))
  1753. except BaseException:
  1754. pass
  1755. args = self.parsing_parameters(text)
  1756. if q == []:
  1757. q = [0, 100]
  1758. args["show_Visual_mapping"] = False # 关闭视觉映射
  1759. c = (
  1760. Scatter3D(**self.init_setting(args))
  1761. .add(
  1762. f"{name}",
  1763. value_list,
  1764. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1765. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1766. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1767. )
  1768. .set_global_opts(
  1769. **self.global_set(args, f"{name}3D散点图", min(q), max(q), True)
  1770. )
  1771. )
  1772. self.all_render[
  1773. f"{name}3D散点图[{len(self.all_render)}]{self.get_title(args)}"
  1774. ] = c
  1775. return c
  1776. def to_line3d(self, name, text) -> Line3D:
  1777. get = self.get_sheet(name)
  1778. x = self.get_column(name, True).tolist() # 图的x轴,下侧,列名
  1779. y = self.get_index(name, True).tolist() # 图的y轴,左侧,行名
  1780. value_list = []
  1781. q = []
  1782. for c in range(len(x)): # c-列,r-行
  1783. for r in range(len(y)):
  1784. try:
  1785. v = eval(f"get.iloc[{r},{c}]", {"get": get}) # 先行后列
  1786. value_list.append([c, r, v])
  1787. q.append(float(v))
  1788. except BaseException:
  1789. pass
  1790. args = self.parsing_parameters(text)
  1791. if q == []:
  1792. q = [0, 100]
  1793. args["show_Visual_mapping"] = False # 关闭视觉映射
  1794. c = (
  1795. Line3D(**self.init_setting(args))
  1796. .add(
  1797. f"{name}",
  1798. value_list,
  1799. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1800. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1801. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1802. grid3d_opts=opts.Grid3DOpts(width=100, height=100, depth=100),
  1803. )
  1804. .set_global_opts(
  1805. **self.global_set(args, f"{name}3D折线图", min(q), max(q), True)
  1806. )
  1807. )
  1808. self.all_render[
  1809. f"{name}3D折线图[{len(self.all_render)}]{self.get_title(args)}"
  1810. ] = c
  1811. return c
  1812. def clean_render(self):
  1813. self.all_render = {}
  1814. def render_all(self, text, render_dir) -> Page:
  1815. args = self.parsing_parameters(text)
  1816. if args["page_Title"] == "":
  1817. title = "CoTan_数据处理"
  1818. else:
  1819. title = f"CoTan_数据处理:{args['page_Title']}"
  1820. if args["HTML_Type"] == 1:
  1821. page = Page(page_title=title, layout=Page.DraggablePageLayout)
  1822. page.add(*self.all_render.values())
  1823. elif args["HTML_Type"] == 2:
  1824. page = Page(page_title=title, layout=Page.SimplePageLayout)
  1825. page.add(*self.all_render.values())
  1826. else:
  1827. page = Tab(page_title=title)
  1828. for i in self.all_render:
  1829. page.add(self.all_render[i], i)
  1830. page.render(render_dir)
  1831. return render_dir
  1832. def overlap(self, down, up):
  1833. over_down = self.all_render[down]
  1834. over_up = self.all_render[up]
  1835. over_down.overlap(over_up)
  1836. return over_down
  1837. class MachineLearner(Draw): # 数据处理者
  1838. def __init__(self, *args, **kwargs):
  1839. super().__init__(*args, **kwargs)
  1840. self.learner = {} # 记录机器
  1841. self.learn_dict = {
  1842. "Line": (LinearRegression, ()),
  1843. "Ridge": (Ridge, ("alpha", "max_iter",)),
  1844. "Lasso": (Lasso, ("alpha", "max_iter",)),
  1845. "LogisticRegression": (LogisticRegression, ("C",)),
  1846. "Knn": (KNeighborsClassifier, ("n_neighbors",)),
  1847. "Knn_class": (KNeighborsRegressor, ("n_neighbors",)),
  1848. }
  1849. self.learner_type = {} # 记录机器的类型
  1850. def decision_tree_classifier(self, name): # 特征提取
  1851. get = self.get_sheet(name)
  1852. dver = DictVectorizer()
  1853. get_dic = get.to_dict(orient="records")
  1854. new = dver.fit_transform(get_dic).toarray()
  1855. dec = pd.DataFrame(new, columns=dver.feature_names_)
  1856. self.add_sheet(dec, f"{name}:特征")
  1857. return dec
  1858. def parsing(self, parameters): # 解析参数
  1859. args = {}
  1860. args_use = {}
  1861. # 输入数据
  1862. exec(parameters, args)
  1863. # 处理数据
  1864. args_use["alpha"] = float(args.get("alpha", 1.0)) # L1和L2正则化用
  1865. args_use["C"] = float(args.get("C", 1.0)) # L1和L2正则化用
  1866. args_use["max_iter"] = int(args.get("max_iter", 1000)) # L1和L2正则化用
  1867. args_use["n_neighbors"] = int(args.get("K_knn", 5)) # knn邻居数 (命名不同)
  1868. args_use["nDim_2"] = bool(args.get("nDim_2", True)) # 数据是否降维
  1869. return args_use
  1870. def add_learner(self, learner, parameters=""):
  1871. get, args_tuple = self.learn_dict[learner]
  1872. name = f"Le[{len(self.learner)}]{learner}"
  1873. # 参数调节
  1874. args_use = self.parsing(parameters)
  1875. args = {}
  1876. for i in args_tuple:
  1877. args[i] = args_use[i]
  1878. # 生成学习器
  1879. self.learner[name] = get(**args)
  1880. self.learner_type[name] = learner
  1881. def return_learner(self):
  1882. return self.learner.copy()
  1883. def get_learner(self, name):
  1884. return self.learner[name]
  1885. def get_learner_type(self, name):
  1886. return self.learner_type[name]
  1887. def training_machine(self, name, learnner, parameters="", **kwargs):
  1888. type_ = self.get_learner_type(learnner)
  1889. args_use = self.parsing(parameters)
  1890. if type_ in (
  1891. "Line",
  1892. "Ridge",
  1893. "Lasso",
  1894. "LogisticRegression",
  1895. "Knn",
  1896. "Knn_class",
  1897. ):
  1898. return self.training_machine_core(
  1899. name, learnner, down_ndim=args_use["nDim_2"], **kwargs
  1900. )
  1901. # Score_Only表示仅评分
  1902. def training_machine_core(
  1903. self, name, learner, score_only=False, down_ndim=True, split=0.3, **kwargs
  1904. ):
  1905. get = self.get_sheet(name)
  1906. x = get.to_numpy()
  1907. y = self.get_index(name, True) # 获取y值(用index作为y)
  1908. if down_ndim or x.ndim == 1: # 执行降维处理(也包括升维,ravel让一切变成一维度,包括数字)
  1909. a = x
  1910. x = []
  1911. for i in a:
  1912. try:
  1913. c = i.np.ravel(a[i], "C")
  1914. x.append(c)
  1915. except BaseException:
  1916. x.append(i)
  1917. x = np.array(x)
  1918. model = self.get_learner(learner)
  1919. if not score_only: # 只计算得分,全部数据用于测试
  1920. train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=split)
  1921. model.fit(train_x, train_y)
  1922. train_score = model.score(train_x, train_y)
  1923. test_score = model.score(test_x, test_y)
  1924. return train_score, test_score
  1925. test_score = model.score(x, y)
  1926. return 0, test_score
  1927. def predict(self, name, learner, parameters="", **kwargs):
  1928. type_ = self.get_learner_type(learner)
  1929. args_use = self.parsing(parameters)
  1930. if type_ in (
  1931. "Line",
  1932. "Ridge",
  1933. "Lasso",
  1934. "LogisticRegression",
  1935. "Knn",
  1936. "Knn_class",
  1937. ):
  1938. return self.predict_simp(
  1939. name, learner, down_ndim=args_use["nDim_2"], **kwargs
  1940. )
  1941. def predict_simp(self, name, learner, down_ndim=True, **kwargs):
  1942. get = self.get_sheet(name)
  1943. column = self.get_column(name, True)
  1944. x = get.to_numpy()
  1945. if down_ndim or x.ndim == 1: # 执行降维处理(也包括升维,ravel让一切变成一维度,包括数字)
  1946. a = x
  1947. x = []
  1948. for i in a:
  1949. try:
  1950. c = i.np.ravel(a[i], "C")
  1951. x.append(c)
  1952. except BaseException:
  1953. x.append(i)
  1954. x = np.array(x)
  1955. model = self.get_learner(learner)
  1956. answer = model.predict(x)
  1957. data = pd.DataFrame(x, index=answer, columns=column)
  1958. self.add_sheet(data, f"{name}:预测")
  1959. return data
  1960. def visual_learner(self, learner, new=False): # 显示参数
  1961. learner = self.get_learner(learner)
  1962. learner_type = self.get_learner_type(learner)
  1963. if learner_type in ("Ridge", "Lasso"):
  1964. alpha = learner.alpha # 阿尔法
  1965. w = learner.coef_.tolist() # w系数
  1966. b = learner.intercept_ # 截距
  1967. max_iter = learner.max_iter
  1968. w_name = [f"权重:W[{i}]" for i in range(len(w))]
  1969. index = ["阿尔法:Alpha"] + w_name + ["截距:b", "最大迭代数"]
  1970. data = [alpha] + w + [b] + [max_iter]
  1971. # 文档
  1972. doc = (
  1973. f"阿尔法:alpha = {alpha}\n\n权重:\nw = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\n最大迭代数:{max_iter}"
  1974. f"\n\n\nEND"
  1975. )
  1976. data = pd.DataFrame(data, index=index)
  1977. elif learner_type in ("Line",):
  1978. w = learner.coef_.tolist() # w系数
  1979. b = learner.intercept_
  1980. index = [f"权重:W[{i}]" for i in range(len(w))] + ["截距:b"]
  1981. data = w + [b] # 截距
  1982. # 文档
  1983. doc = f"权重:w = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\n\nEND"
  1984. data = pd.DataFrame(data, index=index)
  1985. elif learner_type in ("Knn",): # Knn_class
  1986. classes = learner.classes_.tolist() # 分类
  1987. n = learner.n_neighbors # 个数
  1988. p = {1: "曼哈顿距离", 2: "欧几里得距离"}.get(learner.p)
  1989. index = [f"类目[{i}]" for i in range(len(classes))] + ["邻居个数", "距离公式"]
  1990. data = classes + [n, p]
  1991. doc = f"分类类目:\n{pd.DataFrame(classes)}\n\n邻居个数:{n}\n\n计算距离的方式:{p}\n\n\nEND"
  1992. data = pd.DataFrame(data, index=index)
  1993. elif learner_type in ("Knn_class",):
  1994. n = learner.n_neighbors # 个数
  1995. p = {1: "曼哈顿距离", 2: "欧几里得距离"}.get(learner.p)
  1996. index = ["邻居个数", "距离公式"]
  1997. data = [n, p]
  1998. doc = f"邻居个数:{n}\n\n计算距离的方式:{p}\n\n\nEND"
  1999. data = pd.DataFrame(data, index=index)
  2000. elif learner_type in ("LogisticRegression",):
  2001. classes = learner.classes_.tolist() # 分类
  2002. w = learner.coef_.tolist() # w系数
  2003. b = learner.intercept_
  2004. c = learner.C
  2005. index = (
  2006. [f"类目[{i}]" for i in range(len(classes))]
  2007. + [f"权重:W[{j}][{i}]" for i in range(len(w)) for j in range(len(w[i]))]
  2008. + [f"截距:b[{i}]" for i in range(len(b))]
  2009. + ["C"]
  2010. )
  2011. data = classes + [j for i in w for j in i] + [i for i in b] + [c]
  2012. doc = f"分类类目:\n{pd.DataFrame(classes)}\n\n权重:w = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\nC={c}\n\n\n"
  2013. data = pd.DataFrame(data, index=index)
  2014. else:
  2015. return "", []
  2016. if new:
  2017. self.add_sheet(data, f"{learner}:属性")
  2018. return doc, data
  2019. def del_leaner(self, leaner):
  2020. del self.learner[leaner]
  2021. del self.learner_type[leaner]