Learn.py 74 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787
  1. import pandas as pd
  2. import re
  3. from os import getcwd
  4. from pyecharts.globals import CurrentConfig
  5. CurrentConfig.ONLINE_HOST = f"{getcwd()}/assets/"
  6. import pandas_profiling as pp
  7. from pyecharts import options as opts
  8. from pyecharts.charts import *
  9. from pyecharts.globals import SymbolType
  10. from pyecharts.components import Table
  11. from pyecharts.globals import GeoType #地图推荐使用GeoType而不是str
  12. from random import randint
  13. from sklearn.model_selection import train_test_split
  14. from sklearn.linear_model import *
  15. from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
  16. import sklearn as sk
  17. from sklearn.feature_extraction import DictVectorizer
  18. import numpy as np
  19. class Form:
  20. def __init__(self, *args, **kwargs):
  21. class DEL: pass
  22. self.Sheet_Dic = {}
  23. self.Clean_Func = {}
  24. self.Clean_Func_Exp = {}
  25. self.DEL = DEL()
  26. self.Name = {'pd': pd, 'DEL': self.DEL, 're': re, 'Sheet': self.Sheet_Dic}
  27. self.R_Dic = {} # 存放所有的图
  28. def get_Sheet(self, name, all_Row=None, all_Colunms=None) -> pd.DataFrame:
  29. try:
  30. pd.set_option('display.max_rows', all_Row)
  31. pd.set_option('display.max_columns', all_Colunms)
  32. except:
  33. pass
  34. return self.Sheet_Dic[name]
  35. def Describe(self, name, make_Sheet=False): # 生成描述
  36. get = self.get_Sheet(name)
  37. Des = get.describe()
  38. if make_Sheet: self.Add_Form(Des, f'{name}_describe[{len(self.Sheet_Dic)}]')
  39. shape = get.shape
  40. dtype = get.dtypes
  41. n = get.ndim
  42. head = get.head()
  43. tail = get.tail(3)
  44. return f'1)基本\n{Des}\n\n2)形状:{shape}\n\n3)数据类型\n{dtype}\n\n4)数据维度:{n}\n\n5)头部数据\n{head}\n\n6)尾部数据\n{tail}' \
  45. f'\n\n7)行名\n{get.index}\n\n8)列名\n{get.columns}'
  46. def Add_Form(self, Data, name=''):
  47. if name == '': name = f'Sheet[{len(self.Sheet_Dic)}]'
  48. else:name += f'_[{len(self.Sheet_Dic)}]'
  49. self.Sheet_Dic[name] = Data
  50. return Data
  51. def Del_Form(self,name):
  52. del self.Sheet_Dic[name]
  53. def __Add_Form(self, Dic, Func, name='', Index=True, **kwargs): # 新增表格的核心方式
  54. try:
  55. Data = Func(Dic, **kwargs)
  56. except UnicodeDecodeError: # 找不到编码方式
  57. return False
  58. if not Index:
  59. Data.index = Data.iloc[:, 0].tolist()
  60. Data.drop(Data.columns.values.tolist()[0], inplace=True, axis=1)
  61. return self.Add_Form(Data, name)
  62. def Add_CSV(self, Dic, name='', Sep=',', code='utf-8', str_=True, Index=True):
  63. if str_:
  64. k = {'dtype': 'object'}
  65. else:
  66. k = {}
  67. return self.__Add_Form(Dic, pd.read_csv, name, Index, sep=Sep, encoding=code, **k)
  68. def Add_Python(self, Text, sheet_name='') -> pd.DataFrame:
  69. name = {'Sheet': self.get_Sheet}
  70. name.update(globals().copy())
  71. name.update(locals().copy())
  72. exec(Text, name)
  73. exec('get = Creat()', name)
  74. if isinstance(name['get'], pd.DataFrame): # 已经是DataFram
  75. get = name['get']
  76. elif isinstance(name['get'], np.array):
  77. if bool(name.get('downNdim',False)):#执行降或升维操作
  78. a = name['get']
  79. array = []
  80. for i in a:
  81. try:
  82. c = i.np.ravel(a[i], 'C')
  83. array.append(c)
  84. except:
  85. array.append(i)
  86. get = pd.DataFrame(array)
  87. else:
  88. array = name['get'].tolist()
  89. get = pd.DataFrame(array)
  90. else:
  91. try:
  92. get = pd.DataFrame(name['get'])
  93. except:
  94. get = pd.DataFrame([name['get']])
  95. self.Add_Form(get, sheet_name)
  96. return get
  97. def Add_Html(self, Dic, name='', code='utf-8', str_=True, Index=True):
  98. if str_:
  99. k = {'dtype': 'object'}
  100. else:
  101. k = {}
  102. return self.__Add_Form(Dic, pd.read_html, name, Index, encoding=code, **k)
  103. def get_FormList(self):
  104. return list(self.Sheet_Dic.keys()) # 返回列表
  105. def to_Html_One(self,name,Dic=''):
  106. if Dic == '': Dic = f'{name}.html'
  107. get = self.get_Sheet(name)
  108. headers = [f'{name}'] + self.get_Column(name, True).tolist()
  109. rows = []
  110. table = Table()
  111. for i in get.iterrows(): # 按行迭代
  112. q = i[1].tolist()
  113. rows.append([f'{i[0]}'] + q)
  114. table.add(headers, rows).set_global_opts(
  115. title_opts=opts.ComponentTitleOpts(title=f"表格:{name}", subtitle="CoTan~数据处理:查看表格"))
  116. table.render(Dic)
  117. return Dic
  118. def to_Html(self, name, Dic='', type_=0):
  119. if Dic == '': Dic = f'{name}.html'
  120. # 把要画的sheet放到第一个
  121. Sheet_Dic = self.Sheet_Dic.copy()
  122. del Sheet_Dic[name]
  123. Sheet_list = [name] + list(Sheet_Dic.keys())
  124. class TAB_F:
  125. def __init__(self, q):
  126. self.tab = q # 一个Tab
  127. def render(self, Dic):
  128. return self.tab.render(Dic)
  129. # 生成一个显示页面
  130. if type_ == 0:
  131. class TAB(TAB_F):
  132. def add(self, table, k, *f):
  133. self.tab.add(table, k)
  134. tab = TAB(Tab(page_title='CoTan:查看表格')) # 一个Tab
  135. elif type_ == 1:
  136. class TAB(TAB_F):
  137. def add(self, table, *k):
  138. self.tab.add(table)
  139. tab = TAB(Page(page_title='CoTan:查看表格', layout=Page.DraggablePageLayout))
  140. else:
  141. class TAB(TAB_F):
  142. def add(self, table, *k):
  143. self.tab.add(table)
  144. tab = TAB(Page(page_title='CoTan:查看表格', layout=Page.SimplePageLayout))
  145. # 迭代添加内容
  146. for name in Sheet_list:
  147. get = self.get_Sheet(name)
  148. headers = [f'{name}'] + self.get_Column(name, True).tolist()
  149. rows = []
  150. table = Table()
  151. for i in get.iterrows(): # 按行迭代
  152. q = i[1].tolist()
  153. rows.append([f'{i[0]}'] + q)
  154. table.add(headers, rows).set_global_opts(
  155. title_opts=opts.ComponentTitleOpts(title=f"表格:{name}", subtitle="CoTan~数据处理:查看表格"))
  156. tab.add(table, f'表格:{name}')
  157. tab.render(Dic)
  158. return Dic
  159. def To_Sheet_Des(self, Sheet, Dic):
  160. re = pp.ProfileReport(Sheet)
  161. re.to_file(Dic)
  162. def to_Report(self, name, Dic=''):
  163. if Dic == '': Dic = f'{name}.html'
  164. Sheet = self.get_Sheet(name)
  165. self.To_Sheet_Des(Sheet, Dic)
  166. return Dic
  167. def get_Column(self, name, only=False): # 列名
  168. get = self.get_Sheet(name)
  169. if only:
  170. re = get.columns.values
  171. else:
  172. re = []
  173. loc_list = get.columns.values
  174. a = 0
  175. for i in loc_list:
  176. data = get[i].to_list()
  177. re.append(f'[列号:{a}]{i} -> {data}')
  178. a += 1
  179. return re
  180. def get_Index(self, name, only=False):
  181. get = self.get_Sheet(name)
  182. if only:
  183. re = get.index.values
  184. else:
  185. re = []
  186. loc_list = get.index.values
  187. a = 0
  188. for i in range(len(loc_list)):
  189. l = loc_list[i]
  190. data = get.iloc[i].to_list()
  191. re.append(f'[行号:{a}]{l} -> {data}')
  192. a += 1
  193. return re
  194. def Sorted(self, name, row: bool, new=False, a=True):
  195. get = self.get_Sheet(name)
  196. if row: # row-行名排序
  197. so = get.sort_index(axis=0, ascending=a)
  198. else:
  199. so = get.sort_index(axis=1, ascending=a)
  200. if new:
  201. self.Add_Form(so,f'{name}:排序')
  202. return so
  203. def Stored_Valuse(self, name, F, new=False):
  204. get = self.get_Sheet(name)
  205. row = get.columns.values
  206. a = []
  207. b = []
  208. for i in F:
  209. a.append(row[i[0]])
  210. b.append(i[1])
  211. if len(a) == 1:
  212. a = a[0]
  213. b = b[0]
  214. so = get.sort_values(by=a, ascending=b)
  215. if new:
  216. self.Add_Form(so,f'{name}:排序')
  217. return so
  218. def T(self, name, new=True):
  219. get = self.get_Sheet(name)
  220. re = get.T.copy()#复制一份,防止冲突
  221. if new:
  222. self.Add_Form(re,f'{name}.T')
  223. return re
  224. def get_Clice(self, name, Column, Row, U_iloc=True, new=False): # iloc(Row,Column) or loc
  225. get = self.get_Sheet(name)
  226. if U_iloc:
  227. Cli = get.iloc[Row, Column]
  228. else:
  229. Cli = get.loc[Row, Column]
  230. if new:
  231. self.Add_Form(Cli,f'{name}:切片')
  232. return Cli
  233. def Delete(self, name, Column, Row, new):
  234. get = self.get_Sheet(name)
  235. Column_List = get.columns.values
  236. for i in Column:
  237. try:
  238. get = get.drop(Column_List[int(i)], axis=1)
  239. except:
  240. pass
  241. Row_List = get.index.values
  242. for i in Row:
  243. try:
  244. get = get.drop(Row_List[int(i)])
  245. except:
  246. pass
  247. if new:
  248. self.Add_Form(get,f'{name}:删减')
  249. return get
  250. def Done_Bool(self, name, Exp, new=False):
  251. get = self.get_Sheet(name)
  252. try:
  253. re = eval(Exp, {'S': get, 'Sheet': get.iloc})
  254. if new:
  255. self.Add_Form(re,f'{name}:布尔')
  256. return re
  257. except:
  258. return None
  259. # raise
  260. def is_Na(self, name):
  261. get = self.get_Sheet(name)
  262. Na = pd.isna(get)
  263. return Na
  264. def Dropna(self, name, new):
  265. get = self.get_Sheet(name)
  266. Clean = get.dropna(axis=0)
  267. if new:
  268. self.Add_Form(Clean,f'{name}:清洗')
  269. return Clean
  270. def Add_CleanFunc(self, Exp):
  271. Name = self.Name.copy()
  272. try:
  273. exec(Exp, Name)
  274. except:
  275. return False
  276. Sava = {}
  277. Sava['Done_Row'] = Name.get('Done_Row', [])
  278. Sava['Done_Column'] = Name.get('Done_Column', [])
  279. Sava['axis'] = Name.get('axis', True)
  280. Sava['check'] = Name.get('check', lambda data, x, b, c, d, e: True)
  281. Sava['done'] = Name.get('done', lambda data, x, b, c, d, e: data)
  282. print(f'{len(self.Clean_Func)}')
  283. title = f"[{Name.get('name', f'[{len(self.Clean_Func)}')}] Done_Row={Sava['Done_Row']}_Done_Column={Sava['Done_Column']}_axis={Sava['axis']}"
  284. self.Clean_Func[title] = Sava
  285. self.Clean_Func_Exp[title] = Exp
  286. def Return_CleanFunc(self):
  287. return list(self.Clean_Func.keys())
  288. def Delete_CleanFunc(self, key):
  289. try:
  290. del self.Clean_Func[key]
  291. del self.Clean_Func_Exp[key]
  292. except:
  293. pass
  294. def Tra_Clean(self):
  295. self.Clean_Func = {}
  296. self.Clean_Func_Exp = {}
  297. def Return_CleanExp(self, key):
  298. return self.Clean_Func_Exp[key]
  299. def Done_CleanFunc(self, name):
  300. get = self.get_Sheet(name).copy()
  301. for i in list(self.Clean_Func.values()):
  302. Done_Row = i['Done_Row']
  303. Done_Column = i['Done_Column']
  304. if Done_Row == []:
  305. Done_Row = range(get.shape[0]) # shape=[行,列]#不需要回调
  306. if Done_Column == []:
  307. Done_Column = range(get.shape[1]) # shape=[行,列]#不需要回调
  308. if i['axis']:
  309. axis = 0
  310. else:
  311. axis = 1
  312. check = i['check']
  313. done = i['done']
  314. for r in Done_Row:
  315. for c in Done_Column:
  316. try:
  317. n = eval(f"get.iloc[{r},{c}]") # 第一个是行号,然后是列号
  318. r_h = eval(f"get.iloc[{r}]")
  319. c_h = eval(f"get.iloc[:,{c}]")
  320. if not check(n, r, c, get.copy(), r_h.copy(), c_h.copy()):
  321. d = done(n, r, c, get.copy(), r_h.copy(), c_h.copy())
  322. if d == self.DEL:
  323. if axis == 0: # 常规删除
  324. Row_List = get.index.values
  325. get = get.drop(Row_List[int(r)])
  326. else: # 常规删除
  327. Columns_List = get.columns.values
  328. get = get.drop(Columns_List[int(r)], axis=1)
  329. else:
  330. exec(f"get.iloc[{r},{c}] = {d}") # 第一个是行名,然后是列名
  331. except:
  332. pass
  333. self.Add_Form(get,f'{name}:清洗')
  334. return get
  335. def Import_c(self, text):
  336. Name = {}
  337. Name.update(locals())
  338. Name.update(globals())
  339. exec(text, Name)
  340. exec('c = Page()', Name)
  341. self.R_Dic[f'自定义图[{len(self.R_Dic)}]'] = Name['c']
  342. return Name['c']
  343. def retunr_RDic(self):
  344. return self.R_Dic.copy()
  345. def Delete_RDic(self, key):
  346. del self.R_Dic[key]
  347. def Reasonable_Type(self, name, column, dtype, wrong):
  348. get = self.get_Sheet(name).copy()
  349. for i in range(len(column)):
  350. try:
  351. column[i] = int(column[i])
  352. except:
  353. pass
  354. if dtype != '':
  355. func_Dic = {'Num': pd.to_numeric, 'Date': pd.to_datetime, 'Time': pd.to_timedelta}
  356. if column != []:
  357. get.iloc[:, column] = get.iloc[:, column].apply(func_Dic.get(dtype, pd.to_numeric), errors=wrong)
  358. else:
  359. get = get.apply(func_Dic.get(dtype, pd.to_numeric), errors=wrong)
  360. else:
  361. if column != []:
  362. get.iloc[:, column] = get.iloc[:, column].infer_objects()
  363. print('A')
  364. else:
  365. get = get.infer_objects()
  366. self.Add_Form(get,f'{name}')
  367. return get
  368. def as_Type(self, name, column, dtype, wrong):
  369. get = self.get_Sheet(name).copy()
  370. for i in range(len(column)):
  371. try:
  372. column[i] = int(column[i])
  373. except:
  374. pass
  375. func_Dic = {'Int': int, 'Float': float, 'Str': str, 'Date': pd.Timestamp, 'TimeDelta': pd.Timedelta}
  376. if column != []:
  377. get.iloc[:, column] = get.iloc[:, column].astype(func_Dic.get(dtype, dtype), errors=wrong)
  378. print('A')
  379. else:
  380. get = get.astype(func_Dic.get(dtype, dtype), errors=wrong)
  381. self.Add_Form(get,f'{name}')
  382. return get
  383. def Replace_Index(self, name, is_column, Dic, save):
  384. get = self.get_Sheet(name)
  385. if is_column:
  386. if save: # 保存原数据
  387. get.loc['column'] = self.get_Column(name, True)
  388. new = get.rename(columns=Dic)
  389. else:
  390. if save:
  391. get.loc[:, 'row'] = self.get_Index(name, True)
  392. new = get.rename(index=Dic)
  393. self.Add_Form(new,f'{name}')
  394. return new
  395. def Change_Index(self, name: str, is_column: bool, iloc: int, save: bool = True, drop: bool = False):
  396. get = self.get_Sheet(name).copy()
  397. if is_column: # 列名
  398. Row = self.get_Index(name, True)#行数据
  399. t = Row.tolist()[iloc]
  400. if save: # 保存原数据
  401. get.loc['column'] = self.get_Column(name,True)
  402. # new_colums = get.loc[t].values
  403. get.columns = get.loc[t].values
  404. if drop:
  405. get.drop(t, axis=0, inplace=True) # 删除行
  406. else:
  407. Col = self.get_Column(name, True)
  408. t = Col.tolist()[iloc]
  409. print(t)
  410. if save:
  411. get.loc[:, 'row'] = self.get_Index(name,True)
  412. get.index = get.loc[:, t].values # 调整
  413. if drop:
  414. get.drop(t, axis=1, inplace=True) # 删除行
  415. self.Add_Form(get,f'{name}')
  416. return get
  417. def num_toName(self, name, is_column, save):
  418. get = self.get_Sheet(name).copy()
  419. if is_column: # 处理列名
  420. Col = self.get_Column(name, True)
  421. if save: # 保存原数据
  422. get.loc['column'] = Col
  423. get.columns = [i for i in range(len(Col))]
  424. else:
  425. Row = self.get_Index(name, True)
  426. if save:
  427. get.loc[:, 'row'] = Row
  428. get.index = [i for i in range(len(Row))]
  429. self.Add_Form(get,f'{name}')
  430. return get
  431. def num_withName(self, name, is_column, save):
  432. get = self.get_Sheet(name).copy()
  433. if is_column: # 处理列名
  434. Col = self.get_Column(name, True)
  435. if save: # 保存原数据
  436. get.loc['column'] = Col
  437. get.columns = [f'[{i}]{Col[i]}' for i in range(len(Col))]
  438. else:
  439. Row = self.get_Index(name, True)
  440. if save:
  441. get.loc[:, 'row'] = Row
  442. get.index = [f'[{i}]{Row[i]}' for i in range(len(Row))]
  443. self.Add_Form(get,f'{name}')
  444. return get
  445. def Date_Index(self, name, is_column, save, **Date_Init):
  446. # Date_Init:start,end,freq 任意两样
  447. get = self.get_Sheet(name)
  448. if is_column: # 处理列名
  449. Col = self.get_Column(name, True)
  450. if save: # 保存原数据
  451. get.loc['column'] = Col
  452. Date_Init['periods'] = len(Col)
  453. get.columns = pd.date_range(**Date_Init)
  454. else:
  455. Row = self.get_Index(name, True)
  456. if save:
  457. get.loc[:, 'row'] = Row
  458. Date_Init['periods'] = len(Row)
  459. get.index = pd.date_range(**Date_Init)
  460. self.Add_Form(get,f'{name}')
  461. return get
  462. def Time_Index(self, name, is_column, save, **Time_Init):
  463. # Date_Init:start,end,freq 任意两样
  464. get = self.get_Sheet(name)
  465. if is_column: # 处理列名
  466. Col = self.get_Column(name, True)
  467. if save: # 保存原数据
  468. get.loc['column'] = Col
  469. Time_Init['periods'] = len(Col)
  470. get.columns = pd.timedelta_range(**Time_Init)
  471. else:
  472. Row = self.get_Index(name, True)
  473. if save:
  474. get.loc[:, 'row'] = Row
  475. Time_Init['periods'] = len(Row)
  476. get.index = pd.timedelta_range(**Time_Init)
  477. self.Add_Form(get,f'{name}')
  478. return get
  479. def Sample(self,name,new):
  480. get = self.get_Sheet(name)
  481. sample = get.sample(frac=1)#返回比,默认按行打乱
  482. if new:
  483. self.Add_Form(sample,f'{name}:打乱')
  484. return sample
  485. def to_CSV(self,name,Dic,Sep=','):
  486. if Sep == '':Sep = ','
  487. get = self.get_Sheet(name)
  488. get.to_csv(Dic,sep=Sep,na_rep='')
  489. class Draw(Form):
  490. # 1)图例位置、朝向和是否显示
  491. # 2)视觉映射是否开启、是否有最大值和最小值、两端文本以及颜色、分段和朝向、size或color
  492. # 3)自动设置图标ID,标题
  493. # 4)工具箱显示
  494. # 5)title配置
  495. # 6)是否显示刻度线、数轴类型、分割线
  496. def Parsing_Parameters(self,text):#解析文本参数
  497. args = {}#解析到的参数
  498. exec(text,args)
  499. args_use = {}#真实的参数
  500. #标题设置,global
  501. args_use['title'] = args.get('title',None)
  502. args_use['vice_title'] = args.get('vice_title', 'CoTan~数据处理:')
  503. #图例设置global
  504. args_use['show_Legend'] = bool(args.get('show_Legend', True))#是否显示图例
  505. args_use['ori_Legend'] = args.get('ori_Legend', 'horizontal')#朝向
  506. #视觉映射设置global
  507. args_use['show_Visual_mapping'] = bool(args.get('show_Visual_mapping', True))#是否显示视觉映射
  508. args_use['is_color_Visual_mapping'] = bool(args.get('is_color_Visual_mapping', True))#颜色 or 大小
  509. args_use['min_Visual_mapping'] = args.get('min_Visual_mapping', None)#最小值(None表示现场计算)
  510. args_use['max_Visual_mapping'] = args.get('max_Visual_mapping', None)#最大值(None表示现场计算)
  511. args_use['color_Visual_mapping'] = args.get('color_Visual_mapping', None)#颜色列表
  512. args_use['size_Visual_mapping'] = args.get('size_Visual_mapping', None)#大小列表
  513. args_use['text_Visual_mapping'] = args.get('text_Visual_mapping', None)#文字
  514. args_use['is_Subsection'] = bool(args.get('is_Subsection', False)) # 分段类型
  515. args_use['Subsection_list'] = args.get('Subsection_list', []) # 分段列表
  516. args_use['ori_Visual'] = args.get('ori_Visual', 'vertical') # 朝向
  517. #工具箱设置global
  518. args_use['Tool_BOX'] = bool(args.get('Tool_BOX', True)) # 开启工具箱
  519. #Init设置global
  520. args_use['Theme'] = args.get('Theme', 'white') # 设置style
  521. args_use['BG_Color'] = args.get('BG_Color', None) # 设置背景颜色
  522. args_use['width'] = args.get('width', '900px') # 设置宽度
  523. args_use['heigh'] = args.get('heigh', '500px') if not bool(args.get('Square', False)) else args.get('width', '900px') # 设置高度
  524. args_use['page_Title'] = args.get('page_Title', '') # 设置HTML标题
  525. args_use['show_Animation'] = args.get('show_Animation', True) # 设置HTML标题
  526. #坐标轴设置,2D坐标图和3D坐标图
  527. args_use['show_Axis'] = bool(args.get('show_Axis', True)) # 显示坐标轴
  528. args_use['Axis_Zero'] = bool(args.get('Axis_Zero', False)) # 重叠于原点
  529. args_use['show_Axis_Scale'] = bool(args.get('show_Axis_Scale', True)) # 显示刻度
  530. args_use['x_type'] = args.get('x_type', None) # 坐标轴类型
  531. args_use['y_type'] = args.get('y_type', None)
  532. args_use['z_type'] = args.get('z_type', None)
  533. #Mark设置 坐标图专属
  534. args_use['make_Line'] = args.get('make_Line', []) # 设置直线
  535. #Datazoom设置 坐标图专属
  536. args_use['Datazoom'] = args.get('Datazoom', 'N') # 设置Datazoom
  537. #显示文字设置
  538. args_use['show_Text'] = bool(args.get('show_Text', False)) # 显示文字
  539. #统一化的设置
  540. args_use['Size'] = args.get('Size', 10) # Size
  541. args_use['Symbol'] = args.get('Symbol', 'circle') # 散点样式
  542. #Bar设置
  543. args_use['bar_Stacking'] = bool(args.get('bar_Stacking', False)) # 堆叠(2D和3D)
  544. #散点图设置
  545. args_use['EffectScatter'] = bool(args.get('EffectScatter', False)) # 开启特效(2D和3D)
  546. # 折线图设置
  547. args_use['connect_None'] = bool(args.get('connect_None', False)) # 连接None
  548. args_use['Smooth_Line'] = bool(args.get('Smooth_Line', False)) # 平滑曲线
  549. args_use['Area_chart'] = bool(args.get('Area_chart', False)) # 面积图
  550. args_use['paste_Y'] = bool(args.get('paste_Y', False)) # 紧贴Y轴
  551. args_use['step_Line'] = bool(args.get('step_Line', False)) # 阶梯式图
  552. args_use['size_PictorialBar'] = args.get('size_PictorialBar', None) # 象形柱状图大小
  553. args_use['Polar_units'] = args.get('Polar_units', '100') # 极坐标图单位制
  554. args_use['More'] = bool(args.get('More', False)) # 均绘制水球图、仪表图
  555. args_use['WordCould_Size'] = args.get('WordCould_Size', [20,100]) # 开启特效
  556. args_use['WordCould_Shape'] = args.get('WordCould_Shape', "circle") # 开启特效
  557. args_use['symbol_Graph'] = args.get('symbol_Graph', 'circle') # 关系点样式
  558. args_use['Repulsion'] = float(args.get('Repulsion', 8000)) # 斥力因子
  559. args_use['Area_radar'] = bool(args.get('Area_radar', True)) # 雷达图面积
  560. args_use['HTML_Type'] = args.get('HTML_Type', 2) # 输出Page的类型
  561. args_use['Map'] = args.get('Map', 'china') # 输出Page的面积
  562. args_use['show_Map_Symbol'] = bool(args.get('show_Map_Symbol', False)) # 输出Page的面积
  563. args_use['Geo_Type'] = {'heatmap':GeoType.HEATMAP,'scatter':'scatter','EFFECT':GeoType.EFFECT_SCATTER
  564. }.get(args.get('Geo_Type', 'heatmap'),GeoType.HEATMAP) # 输出Page的面积
  565. args_use['map_Type'] = args.get('map_Type', '2D') # 输出Page的面积
  566. args_use['is_Dark'] = bool(args.get('is_Dark', False)) # 输出Page的面积
  567. return args_use
  568. #全局设定,返回一个全局设定的字典,解包即可使用
  569. def global_set(self,args_use,title,Min,Max,DataZoom=False,Visual_mapping=True,axis=()):
  570. k = {}
  571. #标题设置
  572. if args_use['title'] == None:args_use['title'] = title
  573. k['title_opts']=opts.TitleOpts(title=args_use['title'], subtitle=args_use['vice_title'])
  574. #图例设置
  575. if not args_use['show_Legend']:k['legend_opts']=opts.LegendOpts(is_show=False)
  576. else:
  577. k['legend_opts'] = opts.LegendOpts(type_='scroll',orient=args_use['ori_Legend'],pos_bottom='2%')#移动到底部,避免和标题冲突
  578. #视觉映射
  579. if not args_use['show_Visual_mapping']:
  580. pass
  581. elif not Visual_mapping:
  582. pass
  583. else:
  584. if args_use['min_Visual_mapping'] != None:Min = args_use['min_Visual_mapping']
  585. if args_use['max_Visual_mapping'] != None:Max = args_use['max_Visual_mapping']
  586. k['visualmap_opts'] = opts.VisualMapOpts(type_= 'color'if args_use['is_color_Visual_mapping'] else 'size',
  587. max_=Max,min_=Min,range_color=args_use['color_Visual_mapping'],
  588. range_size=args_use['size_Visual_mapping'],range_text=args_use['text_Visual_mapping'],
  589. is_piecewise=args_use['is_Subsection'],pieces=args_use['Subsection_list'],
  590. orient=args_use['ori_Visual'])
  591. k['toolbox_opts']=opts.ToolboxOpts(is_show=args_use['Tool_BOX'])
  592. if DataZoom:
  593. if args_use['Datazoom'] == 'all':
  594. k['datazoom_opts'] = [opts.DataZoomOpts(), opts.DataZoomOpts(orient = "horizontal")]
  595. elif args_use['Datazoom'] == 'horizontal':
  596. k['datazoom_opts'] = opts.DataZoomOpts(type_="inside")
  597. elif args_use['Datazoom'] == 'vertical':
  598. opts.DataZoomOpts(orient="vertical")
  599. elif args_use['Datazoom'] == 'inside_vertical':
  600. opts.DataZoomOpts(type_="inside", orient="vertical")
  601. elif args_use['Datazoom'] == 'inside_vertical':
  602. opts.DataZoomOpts(type_="inside", orient="horizontal")
  603. # 坐标轴设定,输入设定的坐标轴即可
  604. def axis_Seeting(args_use, axis='x'):
  605. axis_k = {}
  606. if args_use[f'{axis[0]}_type'] == 'Display' or not args_use['show_Axis']:
  607. axis_k[f'{axis[0]}axis_opts'] = opts.AxisOpts(is_show=False)
  608. else:
  609. axis_k[f'{axis[0]}axis_opts'] = opts.AxisOpts(type_=args_use[f'{axis[0]}_type'],
  610. axisline_opts=opts.AxisLineOpts(
  611. is_on_zero=args_use['Axis_Zero']),
  612. axistick_opts=opts.AxisTickOpts(
  613. is_show=args_use['show_Axis_Scale']))
  614. return axis_k
  615. for i in axis:
  616. k.update(axis_Seeting(args_use, i))
  617. return k
  618. #初始化设定
  619. def initSetting(self,args_use):
  620. k = {}
  621. #设置标题
  622. if args_use['page_Title'] == '':title = 'CoTan_数据处理'
  623. else:title = f"CoTan_数据处理:{args_use['page_Title']}"
  624. k['init_opts'] = opts.InitOpts(theme=args_use['Theme'],bg_color=args_use['BG_Color'],width=args_use['width'],
  625. height=args_use['heigh'],page_title=title,
  626. animation_opts=opts.AnimationOpts(animation=args_use['show_Animation']))
  627. return k
  628. #获取title专用
  629. def get_name(self,args_use):
  630. return f":{args_use['title']}"
  631. #标记符,包含线标记、点
  632. def Mark(self,args_use):
  633. k = {}
  634. line = []
  635. for i in args_use['make_Line']:
  636. try:
  637. if i[2] == 'c' or i[0] in ('min', 'max', 'average'):
  638. line.append(opts.MarkLineItem(type_=i[0], name=i[1]))
  639. elif i[2] == 'x':
  640. line.append(opts.MarkLineItem(x=i[0], name=i[1]))
  641. else:
  642. raise Exception
  643. except:
  644. line.append(opts.MarkLineItem(y=i[0], name=i[1]))
  645. if line == []:return k
  646. k['markline_opts'] = opts.MarkLineOpts(data=line)
  647. return k
  648. #标签设定,可以放在系列设置中或者坐标轴y轴设置中
  649. def y_Label(self,args_use,position="inside"):
  650. return {'label_opts':opts.LabelOpts(is_show=args_use['show_Text'],position=position)}
  651. #放在不同的图~.add中的设定
  652. def Per_Seeting(self,args_use,type_):#私人设定
  653. k = {}
  654. if type_ == 'Bar':#设置y的重叠
  655. if args_use['bar_Stacking']:
  656. k = {"stack":"stack1"}
  657. elif type_ == 'Scatter':
  658. k['Beautiful'] = args_use['EffectScatter']
  659. k['symbol'] = args_use['Symbol']
  660. k['symbol_size'] = args_use['Size']
  661. elif type_ == 'Line':
  662. k['is_connect_nones'] = args_use['connect_None']
  663. k['is_smooth'] = True if args_use['Smooth_Line'] or args_use['paste_Y'] else False#平滑曲线或连接y轴
  664. k['areastyle_opts']=opts.AreaStyleOpts(opacity=0.5 if args_use['Area_chart'] else 0)
  665. if args_use['step_Line']:
  666. del k['is_smooth']
  667. k['is_step'] = True
  668. elif type_ == 'PictorialBar':
  669. k['symbol_size'] = args_use['Size']
  670. elif type_ == 'Polar':
  671. return args_use['Polar_units']#回复的是单位制而不是设定
  672. elif type_ == 'WordCloud':
  673. k['word_size_range'] = args_use['WordCould_Size']#放到x轴
  674. k['shape'] = args_use['Symbol'] # 放到x轴
  675. elif type_ == 'Graph':
  676. k['symbol_Graph'] = args_use['Symbol']#放到x轴
  677. elif type_ == 'Radar':#雷达图
  678. k['areastyle_opts']=opts.AreaStyleOpts(opacity=0.1 if args_use['Area_chart'] else 0)
  679. k['symbol'] = args_use['Symbol']#雷达图symbol
  680. return k
  681. #坐标系图像:水平和垂直的数据轴:DataZoom+inside
  682. def to_Bar(self,name,text) -> Bar:#Bar:数据堆叠
  683. get = self.get_Sheet(name)
  684. x = self.get_Index(name,True).tolist()
  685. args = self.Parsing_Parameters(text)
  686. c = (
  687. Bar(**self.initSetting(args))
  688. .add_xaxis(list(map(str, list(set(x)))))#转变为str类型
  689. )
  690. y = []
  691. for i in get.iteritems():#按列迭代
  692. q = i[1].tolist()#转换为列表
  693. try:
  694. c.add_yaxis(f'{name}_{i[0]}', q,**self.Per_Seeting(args,'Bar'),**self.y_Label(args),color=self.get_Color())#i[0]是名字,i是tuple,其中i[1]是data
  695. y += list(map(int, q)) # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  696. except:
  697. pass
  698. if y == []:
  699. args['show_Visual_mapping'] = False # 关闭视觉映射
  700. y = [0,100]
  701. c.set_global_opts(**self.global_set(args,f"{name}柱状图",min(y),max(y),True,axis=['x','y']))
  702. c.set_series_opts(**self.Mark(args))
  703. self.R_Dic[f'{name}柱状图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  704. return c
  705. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  706. def to_Line(self,name,text) -> Line:#折线图:连接空数据、显示数值、平滑曲线、面积图以及紧贴Y轴
  707. get = self.get_Sheet(name)
  708. x = self.get_Index(name,True).tolist()
  709. args = self.Parsing_Parameters(text)
  710. c = (
  711. Line(**self.initSetting(args))
  712. .add_xaxis(list(map(str, list(set(x)))))#转变为str类型
  713. )
  714. y = []
  715. for i in get.iteritems():#按列迭代
  716. q = i[1].tolist()#转换为列表
  717. try:
  718. c.add_yaxis(f'{name}_{i[0]}', q,**self.Per_Seeting(args,'Line'),**self.y_Label(args),color=self.get_Color())#i[0]是名字,i是tuple,其中i[1]是data
  719. y += list(map(int, q)) # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  720. except:
  721. pass
  722. if y == []:
  723. args['show_Visual_mapping'] = False # 关闭视觉映射
  724. y = [0, 100]
  725. c.set_global_opts(**self.global_set(args, f"{name}折线图", min(y), max(y), True,axis=['x','y']))
  726. c.set_series_opts(**self.Mark(args))
  727. self.R_Dic[f'{name}折线图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  728. return c
  729. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  730. def to_Scatter(self,name,text) -> Scatter:#散点图标记形状和大小、特效、标记线
  731. get = self.get_Sheet(name)
  732. args = self.Parsing_Parameters(text)
  733. x = self.get_Index(name,True).tolist()
  734. type_ = self.Per_Seeting(args, 'Scatter')
  735. if type_['Beautiful']:Func = EffectScatter
  736. else:Func = Scatter
  737. del type_['Beautiful']
  738. c = (
  739. Func(**self.initSetting(args))
  740. .add_xaxis(list(map(str, list(set(x)))))#转变为str类型
  741. )
  742. y = []
  743. for i in get.iteritems():#按列迭代
  744. q = i[1].tolist()#转换为列表
  745. try:
  746. c.add_yaxis(f'{name}_{i[0]}', q,**type_,**self.y_Label(args),color=self.get_Color())#i[0]是名字,i是tuple,其中i[1]是data
  747. y += list(map(int, q)) # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  748. except:
  749. pass
  750. if y == []:
  751. args['show_Visual_mapping'] = False # 关闭视觉映射
  752. y = [0, 100]
  753. c.set_global_opts(**self.global_set(args, f"{name}散点图", min(y), max(y), True,axis=['x','y']))
  754. c.set_series_opts(**self.Mark(args))
  755. self.R_Dic[f'{name}散点图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  756. return c
  757. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  758. def to_Pictorialbar(self,name,text) -> PictorialBar:#象形柱状图:图形、剪裁图像、元素重复和间隔
  759. get = self.get_Sheet(name)
  760. x = self.get_Index(name, True).tolist()
  761. args = self.Parsing_Parameters(text)
  762. c = (
  763. PictorialBar(**self.initSetting(args))
  764. .add_xaxis(list(map(str, list(set(x)))))#转变为str类型
  765. .reversal_axis()
  766. )
  767. y = []
  768. k = self.Per_Seeting(args, 'PictorialBar')
  769. for i in get.iteritems():#按列迭代
  770. q = i[1].tolist()#转换为列表
  771. try:
  772. c.add_yaxis(
  773. f'{name}_{i[0]}',q,
  774. label_opts=opts.LabelOpts(is_show=False),
  775. symbol_repeat=True,
  776. is_symbol_clip=True,
  777. symbol=SymbolType.ROUND_RECT,
  778. **k,color=self.get_Color())
  779. y += list(map(int, q)) # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  780. except:
  781. pass
  782. if y == []:
  783. args['show_Visual_mapping'] = False # 关闭视觉映射
  784. y = [0, 100]
  785. c.set_global_opts(**self.global_set(args, f"{name}象形柱状图", min(y), max(y), True,axis=['x','y']))
  786. c.set_series_opts(**self.Mark(args))
  787. self.R_Dic[f'{name}[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  788. return c
  789. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  790. def to_Boxpolt(self,name,text) -> Boxplot:
  791. get = self.get_Sheet(name)
  792. args = self.Parsing_Parameters(text)
  793. c = (
  794. Boxplot(**self.initSetting(args))
  795. .add_xaxis([f'{name}'])
  796. )
  797. y = []
  798. for i in get.iteritems():#按列迭代
  799. q = i[1].tolist()#转换为列表
  800. try:
  801. c.add_yaxis(f'{name}_{i[0]}',[q],**self.y_Label(args))
  802. y += list(map(float, q)) # q不需要float,因为应多不同的type他会自动变更,但是y是用来比较大小
  803. except:
  804. pass
  805. if y == []:
  806. args['show_Visual_mapping'] = False # 关闭视觉映射
  807. y = [0, 100]
  808. c.set_global_opts(**self.global_set(args, f"{name}箱形图", min(y), max(y), True,axis=['x','y']))
  809. c.set_series_opts(**self.Mark(args))
  810. self.R_Dic[f'{name}箱形图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  811. return c
  812. # 坐标系图像:水平和垂直的数据轴:DataZoom+inside
  813. def to_HeatMap(self,name,text) -> HeatMap:#显示数据
  814. get = self.get_Sheet(name)
  815. x = self.get_Column(name, True).tolist() # 图的x轴,下侧,列名
  816. y = self.get_Index(name, True).tolist() # 图的y轴,左侧,行名
  817. value_list = []
  818. q = []
  819. for c in range(len(x)): # c-列,r-行
  820. for r in range(len(y)):
  821. try:
  822. v = float(eval(f'get.iloc[{r},{c}]')) # 先行后列
  823. except:continue
  824. q.append(v)
  825. value_list.append([c, r, v])
  826. args = self.Parsing_Parameters(text)
  827. try:
  828. MAX,MIN = max(q),min(q)
  829. except:
  830. args['show_Visual_mapping'] = False # 关闭视觉映射
  831. MAX, MIN = 0,100
  832. c = (
  833. HeatMap(**self.initSetting(args))
  834. .add_xaxis(list(map(str, list(set(x)))))#转变为str类型
  835. .add_yaxis(f'{name}', list(map(str, y)), value_list,**self.y_Label(args))
  836. .set_global_opts(**self.global_set(args, f"{name}热力图", MIN, MAX, True,axis=['x','y']))
  837. .set_series_opts(**self.Mark(args))
  838. )
  839. self.R_Dic[f'{name}热力图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  840. return c
  841. #数据哪部全,要设置More
  842. def to_Funnel(self,name,text) -> Funnel:
  843. get = self.get_Sheet(name)
  844. y_name = self.get_Index(name,True).tolist()#拿行名
  845. x = self.get_Column(name,True).tolist()[0]
  846. value = []
  847. y = []
  848. for r in range(len(y_name)):
  849. try:
  850. v = float(eval(f'get.iloc[{r},0]'))
  851. except:continue
  852. value.append([f'{y_name[r]}',v])
  853. y.append(v)
  854. args = self.Parsing_Parameters(text)
  855. c = (
  856. Funnel(**self.initSetting(args))
  857. .add(f'{name}', value,**self.y_Label(args,'top'))
  858. .set_global_opts(**self.global_set(args, f"{name}漏斗图", min(y), max(y), True, False))
  859. )
  860. self.R_Dic[f'{name}漏斗图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  861. return c
  862. def to_Graph(self,name,text) -> Graph:
  863. get = self.get_Sheet(name)
  864. y_name = self.get_Index(name,True).tolist()#拿行名
  865. nodes = []
  866. link = []
  867. for i in get.iterrows():#按行迭代
  868. q = i[1].tolist()#转换为列表
  869. try:
  870. nodes.append({"name": f"{i[0]}", "symbolSize": float(q[0]),"value": float(q[0])})
  871. for a in q[1:]:
  872. n = str(a).split(':')
  873. try:
  874. link.append({"source": f"{i[0]}", "target": n[0], "value":float(n[1])})
  875. except:pass
  876. except:
  877. pass
  878. if link == []:
  879. for i in nodes:
  880. for j in nodes:
  881. link.append({"source": i.get("name"), "target": j.get("name"),"value":abs(i.get("value")-j.get("value"))})
  882. args = self.Parsing_Parameters(text)
  883. c = (
  884. Graph(**self.initSetting(args))
  885. .add(f"{y_name[0]}", nodes, link, repulsion=args['Repulsion'],**self.y_Label(args))
  886. .set_global_opts(**self.global_set(args, f"{name}关系图", 0, 100, False,False))
  887. )
  888. self.R_Dic[f'{name}关系图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  889. return c
  890. def to_XY_Graph(self,name,text) -> Graph:#XY关系图,新的书写方式
  891. get = self.get_Sheet(name)
  892. args = self.Parsing_Parameters(text)
  893. size = args['Size']*3
  894. #生成节点信息
  895. y_name = self.get_Index(name,True).tolist()#拿行名
  896. x_name = self.get_Column(name,True).tolist()#拿列名
  897. nodes_list = list(set(y_name + x_name))#处理重复,作为nodes列表
  898. nodes = []
  899. for i in nodes_list:
  900. nodes.append({"name": f"{i}", "symbolSize": size})
  901. #生成link信息
  902. link = [] # 记录连接的信息
  903. have = []
  904. for y in range(len(y_name)):#按行迭代
  905. for x in range(len(x_name)):
  906. y_n = y_name[y]#节点1
  907. x_n = x_name[x]#节点2
  908. if y_n == x_n:continue
  909. if (y_n,x_n) in have or (x_n,y_n) in have :continue
  910. else:
  911. have.append((y_n,x_n))
  912. try:
  913. v = float(eval(f'get.iloc[{y},{x}]'))#取得value
  914. link.append({"source": y_n, "target": x_n, "value": v})
  915. except:
  916. pass
  917. c = (
  918. Graph(**self.initSetting(args))
  919. .add(f"{y_name[0]}", nodes, link, repulsion=args['Repulsion'],**self.y_Label(args))
  920. .set_global_opts(**self.global_set(args, f"{name}关系图", 0, 100, False,False))
  921. )
  922. self.R_Dic[f'{name}关系图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  923. return c
  924. def to_Sankey(self,name,text):
  925. get = self.get_Sheet(name)
  926. args = self.Parsing_Parameters(text)
  927. size = args['Size']*3
  928. #生成节点信息
  929. y_name = self.get_Index(name,True).tolist()#拿行名
  930. x_name = self.get_Column(name,True).tolist()#拿列名
  931. nodes_list = list(set(y_name + x_name))#处理重复,作为nodes列表
  932. nodes = []
  933. source = {}
  934. target = {}
  935. for i in nodes_list:
  936. nodes.append({"name": f"{i}"})
  937. source[i] = set()#记录该元素source边连接的节点
  938. target[i] = set()#记录改元素target边连接的节点
  939. #生成link信息
  940. link = [] # 记录连接的信息
  941. have = []
  942. for y in range(len(y_name)):#按行迭代
  943. for x in range(len(x_name)):
  944. y_n = y_name[y]#节点1
  945. x_n = x_name[x]#节点2
  946. if y_n == x_n:continue#是否相同
  947. if (y_n,x_n) in have or (x_n,y_n) in have :continue#是否重复
  948. else:have.append((y_n,x_n))
  949. #固定的,y在s而x在t,桑基图不可以绕环形,所以要做检查
  950. if source[y_n] & target[x_n] != set():continue
  951. try:
  952. v = float(eval(f'get.iloc[{y},{x}]'))#取得value
  953. link.append({"source": y_n, "target": x_n, "value": v})
  954. target[y_n].add(x_n)
  955. source[x_n].add(y_n)
  956. except:
  957. pass
  958. c = (
  959. Sankey()
  960. .add(
  961. f"{name}",
  962. nodes,
  963. link,
  964. linestyle_opt=opts.LineStyleOpts(opacity=0.2, curve=0.5, color="source"),
  965. label_opts=opts.LabelOpts(position="right"),
  966. )
  967. .set_global_opts(**self.global_set(args, f"{name}桑基图", 0, 100, False, False))
  968. )
  969. self.R_Dic[f'{name}桑基图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  970. return c
  971. def to_Parallel(self,name,text) -> Parallel:
  972. get = self.get_Sheet(name)
  973. dim = []
  974. dim_list = self.get_Index(name,True).tolist()
  975. for i in range(len(dim_list)):
  976. dim.append({"dim": i, "name": f"{dim_list[i]}"})
  977. args = self.Parsing_Parameters(text)
  978. c = (
  979. Parallel(**self.initSetting(args))
  980. .add_schema(dim)
  981. .set_global_opts(**self.global_set(args, f"{name}多轴图", 0, 100, False, False))
  982. )
  983. for i in get.iteritems(): # 按列迭代
  984. q = i[1].tolist() # 转换为列表
  985. c.add(f"{i[0]}",[q],**self.y_Label(args))
  986. self.R_Dic[f'{name}多轴图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  987. return c
  988. def to_Pie(self,name,text) -> Pie:
  989. get = self.get_Sheet(name)
  990. data = []
  991. for i in get.iterrows():#按行迭代
  992. try:
  993. data.append([f'{i[0]}',float(i[1].tolist()[0])])
  994. except:pass
  995. args = self.Parsing_Parameters(text)
  996. c = (
  997. Pie(**self.initSetting(args))
  998. .add(f"{name}", data,**self.y_Label(args,'top'))
  999. .set_global_opts(**self.global_set(args, f"{name}饼图", 0, 100, False, False))
  1000. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
  1001. )
  1002. self.R_Dic[f'{name}饼图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1003. return c
  1004. def to_Polar(self,name,text) -> Polar:
  1005. get = self.get_Sheet(name)
  1006. data = []
  1007. args = self.Parsing_Parameters(text)
  1008. setting = self.Per_Seeting(args, 'Polar')
  1009. if setting == 'rad':#弧度制
  1010. D = 0.0628
  1011. elif setting == '360':#角度制
  1012. D = 0.36
  1013. else:
  1014. D = 1
  1015. for i in get.iterrows():#按行迭代
  1016. try:
  1017. q = i[1].tolist()
  1018. data.append((float(q[0]),float(q[1])/D))
  1019. except:pass
  1020. c = (
  1021. Polar(**self.initSetting(args))
  1022. .add(f"{name}", data, type_="scatter",**self.y_Label(args))
  1023. .set_global_opts(**self.global_set(args, f"{name}极坐标图", 0, 100, False, False))
  1024. )
  1025. self.R_Dic[f'{name}极坐标图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1026. return c
  1027. def to_Radar(self,name,text) -> Radar:
  1028. get = self.get_Sheet(name)
  1029. x = self.get_Index(name,True).tolist()
  1030. Max_list = [[] for i in range(len(x))]#保存每个x栏目的最大值
  1031. data = []#y的组成数据,包括name和list
  1032. x_list = []#保存x的数据
  1033. for i in get.iteritems(): # 按列迭代计算每一项的abcd
  1034. q = i[1].tolist()
  1035. add = []
  1036. for a in range(len(q)):
  1037. try:
  1038. f = float(q[a])
  1039. Max_list[a].append(f)
  1040. add.append(f)
  1041. except:pass
  1042. data.append([f'{i[0]}',[add]])#add是包含在一个list中的
  1043. for i in range(len(Max_list)):#计算x_list
  1044. x_list.append(opts.RadarIndicatorItem(name=x[i], max_=max(Max_list[i])))
  1045. args = self.Parsing_Parameters(text)
  1046. c = (
  1047. Radar(**self.initSetting(args))
  1048. .add_schema(
  1049. schema=x_list
  1050. )
  1051. .set_global_opts(**self.global_set(args, f"{name}雷达图", 0, 100, False, False))
  1052. )
  1053. k = self.Per_Seeting(args,'Radar')
  1054. for i in data:
  1055. c.add(*i,**self.y_Label(args),color=self.get_Color(),**k)#对i解包,取得name和data 随机颜色
  1056. self.R_Dic[f'{name}雷达图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1057. return c
  1058. def get_Color(self):
  1059. # 随机颜色,雷达图默认非随机颜色
  1060. rgb = [randint(0, 255), randint(0, 255), randint(0, 255)]
  1061. color = '#'
  1062. for a in rgb:
  1063. color += str(hex(a))[-2:].replace('x', '0').upper() # 转换为16进制,upper表示小写(规范化)
  1064. return color
  1065. def to_WordCloud(self,name,text) -> WordCloud:
  1066. get = self.get_Sheet(name)
  1067. data = []
  1068. for i in get.iterrows(): # 按行迭代
  1069. try:
  1070. data.append([str(i[0]),float(i[1].tolist()[0])])
  1071. except:pass
  1072. args = self.Parsing_Parameters(text)
  1073. c = (
  1074. WordCloud(**self.initSetting(args))
  1075. .add(f"{name}", data, **self.Per_Seeting(args,'WordCloud'))
  1076. .set_global_opts(**self.global_set(args, f"{name}词云", 0, 100, False, False))
  1077. )
  1078. self.R_Dic[f'{name}词云[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1079. return c
  1080. def to_Liquid(self,name,text) -> Liquid:
  1081. get = self.get_Sheet(name)
  1082. data = str(get.iloc[0,0])
  1083. c = data.split('.')
  1084. try:
  1085. data = float(f'0.{c[1]}')
  1086. except:
  1087. data = float(f'0.{c[0]}')
  1088. args = self.Parsing_Parameters(text)
  1089. c = (
  1090. Liquid(**self.initSetting(args))
  1091. .add(f"{name}", [data, data])
  1092. .set_global_opts(title_opts=opts.TitleOpts(title=f"{name}水球图", subtitle="CoTan~数据处理"))
  1093. )
  1094. self.R_Dic[f'{name}水球图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1095. return c
  1096. def to_Gauge(self,name,text) -> Gauge:
  1097. get = self.get_Sheet(name)
  1098. data = float(get.iloc[0,0])
  1099. if data > 100:
  1100. data = str(data/100)
  1101. c = data.split('.')
  1102. try:
  1103. data = float(f'0.{c[1]}')*100
  1104. except:
  1105. data = float(f'0.{data}')*100
  1106. args = self.Parsing_Parameters(text)
  1107. c = (
  1108. Gauge(**self.initSetting(args))
  1109. .add(f"{name}", [(f"{name}", data)])
  1110. .set_global_opts(title_opts=opts.TitleOpts(title=f"{name}仪表图", subtitle="CoTan~数据处理"))
  1111. )
  1112. self.R_Dic[f'{name}仪表图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1113. return c
  1114. def to_Calendar(self,name,text) -> Calendar:
  1115. get = self.get_Sheet(name)
  1116. data = [[] for i in self.get_Column(name,True)]
  1117. x_name = self.get_Column(name,True).tolist()
  1118. y = []
  1119. for i in get.iterrows():
  1120. Date = str(i[0])#时间数据
  1121. q = i[1].tolist()
  1122. for a in range(len(q)):
  1123. try:
  1124. data[a].append([Date,q[a]])
  1125. y.append(float(q[a]))
  1126. except:
  1127. pass
  1128. args = self.Parsing_Parameters(text)
  1129. if y == []:
  1130. y = [0,100]
  1131. args['show_Visual_mapping'] = False # 关闭视觉映射
  1132. c = (
  1133. Calendar(**self.initSetting(args))
  1134. .set_global_opts(**self.global_set(args,f"{name}日历图",min(y),max(y),True))
  1135. )
  1136. for i in range(len(x_name)):
  1137. start_Date = data[i][0][0]
  1138. end_Date = data[i][-1][0]
  1139. c.add(str(x_name[i]), data[i], calendar_opts=opts.CalendarOpts(range_=[start_Date,end_Date]), **self.y_Label(args))
  1140. self.R_Dic[f'{name}日历图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1141. return c
  1142. def to_ThemeRiver(self,name,text) -> ThemeRiver:
  1143. get = self.get_Sheet(name)
  1144. data = []
  1145. x_name = self.get_Column(name,True).tolist()
  1146. y = []
  1147. for i in get.iterrows():
  1148. Date = str(i[0])
  1149. q = i[1].tolist()
  1150. for a in range(len(x_name)):
  1151. try:
  1152. data.append([Date, q[a], x_name[a]])
  1153. y.append(float(q[a]))
  1154. except:
  1155. pass
  1156. args = self.Parsing_Parameters(text)
  1157. if y == []:
  1158. y = [0,100]
  1159. args['show_Visual_mapping'] = False # 关闭视觉映射
  1160. c = (
  1161. ThemeRiver(**self.initSetting(args))
  1162. .add(x_name,data,singleaxis_opts=opts.SingleAxisOpts(type_=args['x_type'],pos_bottom="10%"))#抑制大小
  1163. .set_global_opts(**self.global_set(args,f"{name}河流图",min(y),max(y),True,False))
  1164. )
  1165. self.R_Dic[f'{name}河流图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1166. return c
  1167. def to_Sunburst(self,name,text) -> Sunburst:
  1168. get = self.get_Sheet(name)
  1169. def Done(Iter, name):
  1170. k = {'name': name, 'children': []}
  1171. v = 0
  1172. for i in Iter:
  1173. content = Iter[i]
  1174. if isinstance(content, dict):
  1175. new_C = Done(content, str(i))
  1176. v += new_C['value']
  1177. k['children'].append(new_C)
  1178. else:
  1179. try:
  1180. q = float(content)
  1181. except:
  1182. q = len(str(content))
  1183. v += q
  1184. k['children'].append({'name': f'{i}={content}', 'value': q})
  1185. k['value'] = v
  1186. return k
  1187. data = Done(get.to_dict(),name)['children']
  1188. args = self.Parsing_Parameters(text)
  1189. c = (
  1190. Sunburst()
  1191. .add(series_name=f'{name}', data_pair=data, radius=[abs(args['Size']-10), "90%"])
  1192. .set_global_opts(**self.global_set(args, f"{name}旭日图", 0, 100, False, False))
  1193. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}"))
  1194. )
  1195. self.R_Dic[f'{name}旭日图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1196. return c
  1197. def to_Tree(self,name,text) -> Tree:
  1198. get = self.get_Sheet(name)
  1199. def Done(Iter, name):
  1200. k = {'name': name, 'children': []}
  1201. for i in Iter:
  1202. content = Iter[i]
  1203. if isinstance(content, dict):
  1204. new_C = Done(content, str(i))
  1205. k['children'].append(new_C)
  1206. else:
  1207. k['children'].append({'name': f'{i}', 'children': [{'name': f'{content}'}]})
  1208. return k
  1209. data = [Done(get.to_dict(),name)]
  1210. args = self.Parsing_Parameters(text)
  1211. c = (
  1212. Tree()
  1213. .add(f"{name}", data)
  1214. .set_global_opts(**self.global_set(args, f"{name}树状图", 0, 100, False, False))
  1215. )
  1216. self.R_Dic[f'{name}树状图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1217. return c
  1218. def to_TreeMap(self,name,text) -> TreeMap:
  1219. get = self.get_Sheet(name)
  1220. def Done(Iter, name):
  1221. k = {'name': name, 'children': []}
  1222. v = 0
  1223. for i in Iter:
  1224. content = Iter[i]
  1225. if isinstance(content, dict):
  1226. new_C = Done(content, str(i))
  1227. v += new_C['value']
  1228. k['children'].append(new_C)
  1229. else:
  1230. try:
  1231. q = float(content)
  1232. except:
  1233. q = len(str(content))
  1234. v += q
  1235. k['children'].append({'name': f'{i}={content}', 'value': q})
  1236. k['value'] = v
  1237. return k
  1238. data = Done(get.to_dict(),name)['children']
  1239. args = self.Parsing_Parameters(text)
  1240. c = (
  1241. TreeMap()
  1242. .add(f"{name}", data, label_opts=opts.LabelOpts(is_show=True, position='inside'))
  1243. .set_global_opts(**self.global_set(args, f"{name}矩形树图", 0, 100, False, False))
  1244. )
  1245. self.R_Dic[f'{name}矩形树图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1246. return c
  1247. def to_ScatterGeo(self,name,text) -> Geo:
  1248. get = self.get_Sheet(name)
  1249. column = self.get_Column(name,True).tolist()
  1250. data_Type = ["scatter" for _ in column]
  1251. data = [[] for _ in column]
  1252. y = []
  1253. for i in get.iterrows(): # 按行迭代
  1254. map = str(i[0])
  1255. q = i[1].tolist()
  1256. for a in range(len(q)):
  1257. try:
  1258. v = float(q[a])
  1259. y.append(v)
  1260. except:
  1261. v = str(q[a])
  1262. try:
  1263. if v[:5] == '[##S]':
  1264. #特效图
  1265. v = float(v[5:])
  1266. y.append(v)
  1267. column.append(column[a])
  1268. data_Type.append(GeoType.EFFECT_SCATTER)
  1269. data.append([])
  1270. a = -1
  1271. elif v[:5] == '[##H]':
  1272. # 特效图
  1273. v = float(v[5:])
  1274. y.append(v)
  1275. column.append(column[a])
  1276. data_Type.append(GeoType.HEATMAP)
  1277. data.append([])
  1278. a = -1
  1279. else:raise Exception
  1280. except:
  1281. data_Type[a] = GeoType.LINES#当前变为Line
  1282. data[a].append((map, v))
  1283. args = self.Parsing_Parameters(text)
  1284. args['show_Visual_mapping'] = True#必须视觉映射
  1285. if y == []:y = [0,100]
  1286. if args['is_Dark']:
  1287. g = {'itemstyle_opts':opts.ItemStyleOpts(color="#323c48", border_color="#111")}
  1288. else:
  1289. g = {}
  1290. c = (
  1291. Geo()
  1292. .add_schema(
  1293. maptype=str(args['Map']),**g
  1294. )
  1295. .set_global_opts(**self.global_set(args, f"{name}Geo点地图", min(y), max(y), False))#必须要有视觉映射(否则会显示奇怪的数据)
  1296. )
  1297. for i in range(len(data)):
  1298. if data_Type[i] != GeoType.LINES:
  1299. ka = dict(symbol=args['Symbol'],symbol_size=args['Size'],color='#1E90FF' if args['is_Dark'] else '#0000FF')
  1300. else:
  1301. ka = dict(symbol=SymbolType.ARROW, symbol_size=6,effect_opts=opts.EffectOpts(symbol=SymbolType.ARROW, symbol_size=6, color="blue"),linestyle_opts=opts.LineStyleOpts(curve=0.2,color='#FFF8DC' if args['is_Dark'] else '#000000'))
  1302. c.add(f'{column[i]}',data[i],type_=data_Type[i],**ka)
  1303. c.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 不显示数据,必须放在add后面生效
  1304. self.R_Dic[f'{name}Geo点地图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1305. return c
  1306. def to_Map(self,name,text) -> Map:
  1307. get = self.get_Sheet(name)
  1308. column = self.get_Column(name,True).tolist()
  1309. data = [[] for _ in column]
  1310. y = []
  1311. for i in get.iterrows(): # 按行迭代
  1312. map = str(i[0])
  1313. q = i[1].tolist()
  1314. for a in range(len(q)):
  1315. try:
  1316. v = float(q[a])
  1317. y.append(v)
  1318. data[a].append((map, v))
  1319. except:
  1320. pass
  1321. args = self.Parsing_Parameters(text)
  1322. args['show_Visual_mapping'] = True#必须视觉映射
  1323. if y == []:y = [0,100]
  1324. if args['map_Type'] == 'GLOBE':
  1325. Func = MapGlobe
  1326. else:
  1327. Func = Map
  1328. c = Func().set_global_opts(**self.global_set(args, f"{name}Map地图", min(y), max(y), False))#必须要有视觉映射(否则会显示奇怪的数据)
  1329. for i in range(len(data)):
  1330. c.add(f'{column[i]}',data[i],str(args['Map']),is_map_symbol_show=args['show_Map_Symbol'],symbol=args['Symbol'],**self.y_Label(args))
  1331. self.R_Dic[f'{name}Map地图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1332. return c
  1333. def to_Geo(self,name,text) -> Geo:
  1334. get = self.get_Sheet(name)
  1335. column = self.get_Column(name,True).tolist()
  1336. index = self.get_Index(name,True).tolist()
  1337. args = self.Parsing_Parameters(text)
  1338. args['show_Visual_mapping'] = True # 必须视觉映射
  1339. if args['is_Dark']:
  1340. g = {'itemstyle_opts':opts.ItemStyleOpts(color="#323c48", border_color="#111")}
  1341. else:
  1342. g = {}
  1343. c = (
  1344. Geo()
  1345. .add_schema(maptype=str(args['Map']),**g)
  1346. )
  1347. m = []
  1348. for y in column: # 维度
  1349. for x in index: # 精度
  1350. value = get.loc[x, y]
  1351. try:
  1352. v = float(value) # 数值
  1353. type_ = args['Geo_Type']
  1354. except:
  1355. try:
  1356. q = str(value)
  1357. v = float(value[5:])
  1358. if q[:5] == '[##S]':#点图
  1359. type_ = GeoType.SCATTER
  1360. elif q[:5] == '[##E]':#带点特效
  1361. type_ = GeoType.EFFECT_SCATTER
  1362. else:#画线
  1363. v = q.split(';')
  1364. c.add_coordinate(name=f'({v[0]},{v[1]})', longitude=float(v[0]), latitude=float(v[1]))
  1365. c.add_coordinate(name=f'({x},{y})', longitude=float(x), latitude=float(y))
  1366. c.add(f'{name}', [[f'({x},{y})',f'({v[0]},{v[1]})']], type_=GeoType.LINES,
  1367. effect_opts=opts.EffectOpts(symbol=SymbolType.ARROW, symbol_size=6, color="blue"),
  1368. linestyle_opts=opts.LineStyleOpts(curve=0.2, color='#FFF8DC' if args[
  1369. 'is_Dark'] else '#000000', ))
  1370. c.add(f'{name}_XY', [[f'({x},{y})',5],[f'({v[0]},{v[1]})',5]], type_=GeoType.EFFECT_SCATTER,
  1371. color='#1E90FF' if args['is_Dark'] else '#0000FF', )
  1372. raise Exception #continue
  1373. except:
  1374. continue
  1375. try:
  1376. c.add_coordinate(name=f'({x},{y})', longitude=float(x), latitude=float(y))
  1377. c.add(f'{name}', [[f'({x},{y})', v]],type_=type_,symbol=args['Symbol'],symbol_size=args['Size'])
  1378. if type_ == GeoType.HEATMAP:
  1379. c.add(f'{name}_XY', [[f'({x},{y})', v]], type_='scatter',
  1380. color='#1E90FF' if args['is_Dark'] else '#0000FF',)
  1381. m.append(v)
  1382. except:pass
  1383. if m == []:m = [0,100]
  1384. c.set_series_opts(label_opts=opts.LabelOpts(is_show=False))#不显示
  1385. c.set_global_opts(**self.global_set(args, f"{name}Geo地图", min(m), max(m), False))
  1386. self.R_Dic[f'{name}Geo地图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1387. return c
  1388. def to_Bar3d(self,name,text) -> Bar3D:
  1389. get = self.get_Sheet(name)
  1390. x = self.get_Column(name, True).tolist() # 图的x轴,下侧,列名
  1391. y = self.get_Index(name, True).tolist() # 图的y轴,左侧,行名
  1392. value_list = []
  1393. q = []
  1394. for c in range(len(x)): # c-列,r-行
  1395. for r in range(len(y)):
  1396. try:
  1397. v = eval(f'get.iloc[{r},{c}]') # 先行后列
  1398. value_list.append([c, r, v])
  1399. q.append(float(v))
  1400. except:
  1401. pass
  1402. args = self.Parsing_Parameters(text)
  1403. if q == []:
  1404. q = [0,100]
  1405. args['show_Visual_mapping'] = False # 关闭视觉映射
  1406. c = (
  1407. Bar3D(**self.initSetting(args))
  1408. .add(f"{name}",value_list,
  1409. xaxis3d_opts=opts.Axis3DOpts(list(map(str,x)), type_=args["x_type"]),
  1410. yaxis3d_opts=opts.Axis3DOpts(list(map(str,y)), type_=args["y_type"]),
  1411. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1412. )
  1413. .set_global_opts(**self.global_set(args,f"{name}3D柱状图",min(q),max(q),True),
  1414. ))
  1415. if args['bar_Stacking']:c.set_series_opts(**{"stack": "stack"})#层叠
  1416. self.R_Dic[f'{name}3D柱状图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1417. return c
  1418. def to_Scatter3D(self,name,text) -> Scatter3D:
  1419. get = self.get_Sheet(name)
  1420. x = self.get_Column(name, True).tolist() # 图的x轴,下侧,列名
  1421. y = self.get_Index(name, True).tolist() # 图的y轴,左侧,行名
  1422. value_list = []
  1423. q = []
  1424. for c in range(len(x)): # c-列,r-行
  1425. for r in range(len(y)):
  1426. try:
  1427. v = eval(f'get.iloc[{r},{c}]') # 先行后列
  1428. value_list.append([c, r, v])
  1429. q.append(float(v))
  1430. except:
  1431. pass
  1432. args = self.Parsing_Parameters(text)
  1433. if q == []:
  1434. q = [0,100]
  1435. args['show_Visual_mapping'] = False # 关闭视觉映射
  1436. c = (
  1437. Scatter3D(**self.initSetting(args))
  1438. .add(f"{name}",value_list,
  1439. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1440. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1441. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1442. )
  1443. .set_global_opts(**self.global_set(args,f"{name}3D散点图",min(q),max(q),True))
  1444. )
  1445. self.R_Dic[f'{name}3D散点图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1446. return c
  1447. def to_Line3D(self,name,text) -> Line3D:
  1448. get = self.get_Sheet(name)
  1449. x = self.get_Column(name, True).tolist() # 图的x轴,下侧,列名
  1450. y = self.get_Index(name, True).tolist() # 图的y轴,左侧,行名
  1451. value_list = []
  1452. q = []
  1453. for c in range(len(x)): # c-列,r-行
  1454. for r in range(len(y)):
  1455. try:
  1456. v = eval(f'get.iloc[{r},{c}]') # 先行后列
  1457. value_list.append([c, r, v])
  1458. q.append(float(v))
  1459. except:
  1460. pass
  1461. args = self.Parsing_Parameters(text)
  1462. if q == []:
  1463. q = [0,100]
  1464. args['show_Visual_mapping'] = False # 关闭视觉映射
  1465. c = (
  1466. Line3D(**self.initSetting(args))
  1467. .add(f"{name}",value_list,
  1468. xaxis3d_opts=opts.Axis3DOpts(list(map(str, x)), type_=args["x_type"]),
  1469. yaxis3d_opts=opts.Axis3DOpts(list(map(str, y)), type_=args["y_type"]),
  1470. zaxis3d_opts=opts.Axis3DOpts(type_=args["z_type"]),
  1471. grid3d_opts=opts.Grid3DOpts(width=100, height=100, depth=100),
  1472. )
  1473. .set_global_opts(**self.global_set(args,f"{name}3D折线图",min(q),max(q),True))
  1474. )
  1475. self.R_Dic[f'{name}3D折线图[{len(self.R_Dic)}]{self.get_name(args)}'] = c
  1476. return c
  1477. def Tra_RDic(self):
  1478. self.R_Dic = {}
  1479. def Draw_Page(self, text, Dic) -> Page:
  1480. args = self.Parsing_Parameters(text)
  1481. if args['page_Title'] == '':
  1482. title = 'CoTan_数据处理'
  1483. else:
  1484. title = f"CoTan_数据处理:{args['page_Title']}"
  1485. if args['HTML_Type'] == 1:
  1486. page = Page(page_title=title, layout=Page.DraggablePageLayout)
  1487. page.add(*self.R_Dic.values())
  1488. elif args['HTML_Type'] == 2:
  1489. page = Page(page_title=title, layout=Page.SimplePageLayout)
  1490. page.add(*self.R_Dic.values())
  1491. else:
  1492. page = Tab(page_title=title)
  1493. for i in self.R_Dic:
  1494. page.add(self.R_Dic[i], i)
  1495. page.render(Dic)
  1496. return Dic
  1497. def Overlap(self, down, up):
  1498. Over_Down = self.R_Dic[down]
  1499. Over_Up = self.R_Dic[up]
  1500. Over_Down.overlap(Over_Up)
  1501. return Over_Down
  1502. class Machine_Learner(Draw):#数据处理者
  1503. def __init__(self,*args, **kwargs):
  1504. super().__init__(*args, **kwargs)
  1505. self.Learner = {}#记录机器
  1506. self.Learn_Dic = {'Line':(LinearRegression,()),
  1507. 'Ridge':(Ridge,('alpha','max_iter',)),
  1508. 'Lasso':(Lasso,('alpha','max_iter',)),
  1509. 'LogisticRegression':(LogisticRegression,('C')),
  1510. 'Knn':(KNeighborsClassifier,('n_neighbors',)),
  1511. 'Knn_class': (KNeighborsRegressor, ('n_neighbors',)),
  1512. }
  1513. self.Learner_Type = {}#记录机器的类型
  1514. def DecisionTreeClassifier(self, name):#特征提取
  1515. get = self.get_Sheet(name)
  1516. Dver = DictVectorizer()
  1517. get_Dic = get.to_dict(orient='records')
  1518. new = Dver.fit_transform(get_Dic).toarray()
  1519. Dec = pd.DataFrame(new, columns=Dver.feature_names_)
  1520. self.Add_Form(Dec,f'{name}:特征')
  1521. return Dec
  1522. def p_Args(self,Text):#解析参数
  1523. args = {}
  1524. args_use = {}
  1525. #输入数据
  1526. exec(Text,args)
  1527. #处理数据
  1528. args_use['alpha'] = float(args.get('alpha',1.0))#L1和L2正则化用
  1529. args_use['C'] = float(args.get('C', 1.0)) # L1和L2正则化用
  1530. args_use['max_iter'] = int(args.get('max_iter', 1000)) # L1和L2正则化用
  1531. args_use['n_neighbors'] = int(args.get('K_knn', 5))#knn邻居数 (命名不同)
  1532. args_use['nDim_2'] = bool(args.get('nDim_2', True)) # 数据是否降维
  1533. return args_use
  1534. def Add_Learner(self,Learner,Text=''):
  1535. get,args_Tuple = self.Learn_Dic[Learner]
  1536. name = f'Le[{len(self.Learner)}]{Learner}'
  1537. #参数调节
  1538. args_use = self.p_Args(Text)
  1539. args = {}
  1540. for i in args_Tuple:
  1541. args[i] = args_use[i]
  1542. #生成学习器
  1543. self.Learner[name] = get(**args)
  1544. self.Learner_Type[name] = Learner
  1545. def Return_Learner(self):
  1546. return self.Learner.copy()
  1547. def get_Learner(self,name):
  1548. return self.Learner[name]
  1549. def get_Learner_Type(self,name):
  1550. return self.Learner_Type[name]
  1551. def Fit(self,name,Learnner,Text='',**kwargs):
  1552. Type = self.get_Learner_Type(Learnner)
  1553. args_use = self.p_Args(Text)
  1554. if Type in ('Line','Ridge','Lasso','LogisticRegression','Knn','Knn_class'):
  1555. return self.Fit_Simp(name,Learnner,Down_Ndim=args_use['nDim_2'],**kwargs)
  1556. def Fit_Simp(self,name,Learner,Score_Only=False,Down_Ndim=True,split=0.3,**kwargs):#Score_Only表示仅评分 Fit_Simp 是普遍类操作
  1557. get = self.get_Sheet(name)
  1558. x = get.to_numpy()
  1559. y = self.get_Index(name,True)#获取y值(用index作为y)
  1560. if Down_Ndim or x.ndim == 1:#执行降维处理(也包括升维,ravel让一切变成一维度,包括数字)
  1561. a = x
  1562. x = []
  1563. for i in a:
  1564. try:
  1565. c = i.np.ravel(a[i], 'C')
  1566. x.append(c)
  1567. except:
  1568. x.append(i)
  1569. x = np.array(x)
  1570. model = self.get_Learner(Learner)
  1571. if not Score_Only:#只计算得分,全部数据用于测试
  1572. train_x,test_x,train_y,test_y = train_test_split(x,y,test_size=split)
  1573. model.fit(train_x,train_y)
  1574. train_Score = model.score(train_x, train_y)
  1575. test_Score = model.score(test_x, test_y)
  1576. return train_Score,test_Score
  1577. test_Score = model.score(x, y)
  1578. return 0, test_Score
  1579. def Predict(self,name,Learner,Text='',**kwargs):
  1580. Type = self.get_Learner_Type(Learner)
  1581. args_use = self.p_Args(Text)
  1582. if Type in ('Line','Ridge','Lasso','LogisticRegression','Knn','Knn_class'):
  1583. return self.Predict_Simp(name,Learner,Down_Ndim=args_use['nDim_2'],**kwargs)
  1584. def Predict_Simp(self,name,Learner,Down_Ndim=True,**kwargs):
  1585. get = self.get_Sheet(name)
  1586. column = self.get_Column(name,True)
  1587. x = get.to_numpy()
  1588. if Down_Ndim or x.ndim == 1:#执行降维处理(也包括升维,ravel让一切变成一维度,包括数字)
  1589. a = x
  1590. x = []
  1591. for i in a:
  1592. try:
  1593. c = i.np.ravel(a[i], 'C')
  1594. x.append(c)
  1595. except:
  1596. x.append(i)
  1597. x = np.array(x)
  1598. model = self.get_Learner(Learner)
  1599. answer = model.predict(x)
  1600. data = pd.DataFrame(x,index=answer,columns=column)
  1601. self.Add_Form(data,f'{name}:预测')
  1602. return data
  1603. def Show_Args(self,Learner,new=False):#显示参数
  1604. learner = self.get_Learner(Learner)
  1605. learner_Type = self.get_Learner_Type(Learner)
  1606. if learner_Type in ('Ridge','Lasso'):
  1607. Alpha = learner.alpha#阿尔法
  1608. w = learner.coef_.tolist()#w系数
  1609. b = learner.intercept_#截距
  1610. max_iter = learner.max_iter
  1611. w_name = [f'权重:W[{i}]' for i in range(len(w))]
  1612. index = ['阿尔法:Alpha'] + w_name + ['截距:b','最大迭代数']
  1613. data = [Alpha] + w + [b] + [max_iter]
  1614. #文档
  1615. doc = (f'阿尔法:alpha = {Alpha}\n\n权重:\nw = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\n最大迭代数:{max_iter}\n\n\nEND')
  1616. data = pd.DataFrame(data,index=index)
  1617. elif learner_Type in ('Line'):
  1618. w = learner.coef_.tolist() # w系数
  1619. b = learner.intercept_
  1620. index = [f'权重:W[{i}]' for i in range(len(w))] + ['截距:b']
  1621. data = w + [b] # 截距
  1622. #文档
  1623. doc = (f'权重:w = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\n\nEND')
  1624. data = pd.DataFrame(data, index=index)
  1625. elif learner_Type in ('Knn'):#Knn_class
  1626. classes = learner.classes_.tolist()#分类
  1627. n = learner.n_neighbors#个数
  1628. p = {1:'曼哈顿距离',2:'欧几里得距离'}.get(learner.p)
  1629. index = [f'类目[{i}]' for i in range(len(classes))] + ['邻居个数','距离公式']
  1630. data = classes + [n,p]
  1631. doc = f'分类类目:\n{pd.DataFrame(classes)}\n\n邻居个数:{n}\n\n计算距离的方式:{p}\n\n\nEND'
  1632. data = pd.DataFrame(data,index=index)
  1633. elif learner_Type in ('Knn_class'):
  1634. n = learner.n_neighbors#个数
  1635. p = {1:'曼哈顿距离',2:'欧几里得距离'}.get(learner.p)
  1636. index = ['邻居个数','距离公式']
  1637. data = [n,p]
  1638. doc = f'邻居个数:{n}\n\n计算距离的方式:{p}\n\n\nEND'
  1639. data = pd.DataFrame(data,index=index)
  1640. elif learner_Type in ('LogisticRegression',):
  1641. classes = learner.classes_.tolist()#分类
  1642. w = learner.coef_.tolist() # w系数
  1643. b = learner.intercept_
  1644. C = learner.C
  1645. index = [f'类目[{i}]' for i in range(len(classes))] + [f'权重:W[{j}][{i}]' for i in range(len(w)) for j in range(len(w[i]))] + [f'截距:b[{i}]' for i in range(len(b))]+['C']
  1646. data = classes + [j for i in w for j in i] + [i for i in b] + [C]
  1647. doc = f'分类类目:\n{pd.DataFrame(classes)}\n\n权重:w = \n{pd.DataFrame(w)}\n\n截距:b = {b}\n\nC={C}\n\n\n'
  1648. data = pd.DataFrame(data,index=index)
  1649. else:
  1650. return '',[]
  1651. if new:
  1652. self.Add_Form(data,f'{Learner}:属性')
  1653. return doc,data
  1654. def Del_Leaner(self,Leaner):
  1655. del self.Learner[Leaner]
  1656. del self.Learner_Type[Leaner]