Learn_Numpy.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. from pyecharts.components import Table #绘制表格
  2. from pyecharts import options as opts
  3. from pyecharts.charts import Tab,Page
  4. from pandas import DataFrame,read_csv
  5. import numpy as np
  6. from sklearn.model_selection import train_test_split
  7. from sklearn.linear_model import *
  8. from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
  9. from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
  10. from sklearn.metrics import accuracy_score
  11. from sklearn.feature_selection import *
  12. from sklearn.preprocessing import *
  13. from sklearn.impute import SimpleImputer
  14. from sklearn.decomposition import PCA, IncrementalPCA,KernelPCA
  15. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
  16. # import sklearn as sk
  17. #设置
  18. np.set_printoptions(threshold=np.inf)
  19. class Learner:
  20. def __init__(self,*args,**kwargs):
  21. self.numpy_Dic = {}#name:numpy
  22. def Add_Form(self,data:np.array,name):
  23. name = f'{name}[{len(self.numpy_Dic)}]'
  24. self.numpy_Dic[name] = data
  25. def read_csv(self,Dic,name,encoding='utf-8',str_must=False,sep=','):
  26. type_ = np.str if str_must else np.float
  27. pf_data = read_csv(Dic,encoding=encoding,delimiter=sep,header=None)
  28. try:
  29. data = pf_data.to_numpy(dtype=type_)
  30. except ValueError:
  31. data = pf_data.to_numpy(dtype=np.str)
  32. if data.ndim == 1: data = np.expand_dims(data, axis=1)
  33. self.Add_Form(data,name)
  34. return data
  35. def Add_Python(self, Text, sheet_name):
  36. name = {}
  37. name.update(globals().copy())
  38. name.update(locals().copy())
  39. exec(Text, name)
  40. exec('get = Creat()', name)
  41. if isinstance(name['get'], np.array): # 已经是DataFram
  42. get = name['get']
  43. else:
  44. try:
  45. get = np.array(name['get'])
  46. except:
  47. get = np.array([name['get']])
  48. self.Add_Form(get, sheet_name)
  49. return get
  50. def get_Form(self) -> dict:
  51. return self.numpy_Dic.copy()
  52. def get_Sheet(self,name) -> np.array:
  53. return self.numpy_Dic[name].copy()
  54. def to_CSV(self,Dic:str,name,sep) -> str:
  55. get = self.get_Sheet(name)
  56. np.savetxt(Dic, get, delimiter=sep)
  57. return Dic
  58. def to_Html_One(self,name,Dic=''):
  59. if Dic == '': Dic = f'{name}.html'
  60. get = self.get_Sheet(name)
  61. if get.ndim == 1: get = np.expand_dims(get, axis=1)
  62. get = get.tolist()
  63. for i in range(len(get)):
  64. get[i] = [i+1] + get[i]
  65. headers = [i for i in range(len(get[0]))]
  66. table = Table()
  67. table.add(headers, get).set_global_opts(
  68. title_opts=opts.ComponentTitleOpts(title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"))
  69. table.render(Dic)
  70. return Dic
  71. def to_Html(self, name, Dic='', type_=0):
  72. if Dic == '': Dic = f'{name}.html'
  73. # 把要画的sheet放到第一个
  74. Sheet_Dic = self.get_Form()
  75. del Sheet_Dic[name]
  76. Sheet_list = [name] + list(Sheet_Dic.keys())
  77. class TAB_F:
  78. def __init__(self, q):
  79. self.tab = q # 一个Tab
  80. def render(self, Dic):
  81. return self.tab.render(Dic)
  82. # 生成一个显示页面
  83. if type_ == 0:
  84. class TAB(TAB_F):
  85. def add(self, table, k, *f):
  86. self.tab.add(table, k)
  87. tab = TAB(Tab(page_title='CoTan:查看表格')) # 一个Tab
  88. elif type_ == 1:
  89. class TAB(TAB_F):
  90. def add(self, table, *k):
  91. self.tab.add(table)
  92. tab = TAB(Page(page_title='CoTan:查看表格', layout=Page.DraggablePageLayout))
  93. else:
  94. class TAB(TAB_F):
  95. def add(self, table, *k):
  96. self.tab.add(table)
  97. tab = TAB(Page(page_title='CoTan:查看表格', layout=Page.SimplePageLayout))
  98. # 迭代添加内容
  99. for name in Sheet_list:
  100. get = self.get_Sheet(name)
  101. if get.ndim == 1: get = np.expand_dims(get, axis=1)
  102. get = get.tolist()
  103. for i in range(len(get)):
  104. get[i] = [i+1] + get[i]
  105. headers = [i for i in range(len(get[0]))]
  106. table = Table()
  107. table.add(headers, get).set_global_opts(
  108. title_opts=opts.ComponentTitleOpts(title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"))
  109. tab.add(table, f'表格:{name}')
  110. tab.render(Dic)
  111. return Dic
  112. class Study_MachineBase:
  113. def __init__(self,*args,**kwargs):
  114. self.Model = None
  115. self.have_Fit = False
  116. #记录这两个是为了克隆
  117. def Accuracy(self,y_Predict,y_Really):
  118. return accuracy_score(y_Predict, y_Really)
  119. def Fit(self,x_data,y_data,split=0.3,**kwargs):
  120. self.have_Fit = True
  121. y_data = y_data.ravel()
  122. x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,test_size=split)
  123. self.Model.fit(x_data,y_data)
  124. train_score = self.Model.score(x_train,y_train)
  125. test_score = self.Model.score(x_test,y_test)
  126. return train_score,test_score
  127. def Score(self,x_data,y_data):
  128. Score = self.Model.score(x_data,y_data)
  129. return Score
  130. def Predict(self,x_data):
  131. y_Predict = self.Model.predict(x_data)
  132. return y_Predict,'预测'
  133. class prep_Base(Study_MachineBase):
  134. def __init__(self,*args,**kwargs):
  135. super(prep_Base, self).__init__(*args,**kwargs)
  136. self.Model = None
  137. def Fit(self, x_data,y_data, *args, **kwargs):
  138. if not self.have_Fit: # 不允许第二次训练
  139. self.Model.fit(x_data,y_data)
  140. return 'None', 'None'
  141. def Predict(self, x_data):
  142. x_Predict = self.Model.transform(x_data)
  143. return x_Predict,'特征工程'
  144. def Score(self, x_data, y_data):
  145. return 'None' # 没有score
  146. class Line_Model(Study_MachineBase):
  147. def __init__(self,args_use,model,*args,**kwargs):#model表示当前选用的模型类型,Alpha针对正则化的参数
  148. super(Line_Model, self).__init__(*args,**kwargs)
  149. Model = {'Line':LinearRegression,'Ridge':Ridge,'Lasso':Lasso}[
  150. model]
  151. if model == 'Line':
  152. self.Model = Model()
  153. self.k = {}
  154. else:
  155. self.Model = Model(alpha=args_use['alpha'],max_iter=args_use['max_iter'])
  156. self.k = {'alpha':args_use['alpha'],'max_iter':args_use['max_iter']}
  157. #记录这两个是为了克隆
  158. self.Alpha = args_use['alpha']
  159. self.max_iter = args_use['max_iter']
  160. self.Model_Name = model
  161. class LogisticRegression_Model(Study_MachineBase):
  162. def __init__(self,args_use,model,*args,**kwargs):#model表示当前选用的模型类型,Alpha针对正则化的参数
  163. super(LogisticRegression_Model, self).__init__(*args,**kwargs)
  164. self.Model = LogisticRegression(C=args_use['C'],max_iter=args_use['max_iter'])
  165. #记录这两个是为了克隆
  166. self.C = args_use['C']
  167. self.max_iter = args_use['max_iter']
  168. self.k = {'C':args_use['C'],'max_iter':args_use['max_iter']}
  169. self.Model_Name = model
  170. class Knn_Model(Study_MachineBase):
  171. def __init__(self,args_use,model,*args,**kwargs):#model表示当前选用的模型类型,Alpha针对正则化的参数
  172. super(Knn_Model, self).__init__(*args,**kwargs)
  173. Model = {'Knn_class':KNeighborsClassifier,'Knn':KNeighborsRegressor}[model]
  174. self.Model = Model(p=args_use['p'],n_neighbors=args_use['n_neighbors'])
  175. #记录这两个是为了克隆
  176. self.n_neighbors = args_use['n_neighbors']
  177. self.p = args_use['p']
  178. self.k = {'n_neighbors':args_use['n_neighbors'],'p':args_use['p']}
  179. self.Model_Name = model
  180. class Tree_Model(Study_MachineBase):
  181. def __init__(self,args_use,model,*args,**kwargs):#model表示当前选用的模型类型,Alpha针对正则化的参数
  182. super(Tree_Model, self).__init__(*args,**kwargs)
  183. Model = {'Tree_class':DecisionTreeClassifier,'Tree':DecisionTreeRegressor}[model]
  184. self.Model = Model(criterion=args_use['criterion'],splitter=args_use['splitter'],max_features=args_use['max_features']
  185. ,max_depth=args_use['max_depth'],min_samples_split=args_use['min_samples_split'])
  186. #记录这两个是为了克隆
  187. self.criterion = args_use['criterion']
  188. self.splitter = args_use['splitter']
  189. self.max_features = args_use['max_features']
  190. self.max_depth = args_use['max_depth']
  191. self.min_samples_split = args_use['min_samples_split']
  192. self.k = {'criterion':args_use['criterion'],'splitter':args_use['splitter'],'max_features':args_use['max_features'],
  193. 'max_depth':args_use['max_depth'],'min_samples_split':args_use['min_samples_split']}
  194. self.Model_Name = model
  195. class Variance_Model(prep_Base):
  196. def __init__(self,args_use,model,*args,**kwargs):#model表示当前选用的模型类型,Alpha针对正则化的参数
  197. super(Variance_Model, self).__init__(*args,**kwargs)
  198. self.Model = VarianceThreshold(threshold=(args_use['P'] * (1 - args_use['P'])))
  199. #记录这两个是为了克隆
  200. self.threshold = args_use['P']
  201. self.k = {'threshold':args_use['P']}
  202. self.Model_Name = model
  203. class SelectKBest_Model(prep_Base):
  204. def __init__(self, args_use, model, *args, **kwargs): # model表示当前选用的模型类型,Alpha针对正则化的参数
  205. super(SelectKBest_Model, self).__init__(*args, **kwargs)
  206. self.Model = SelectKBest(k=args_use['k'],score_func=args_use['score_func'])
  207. # 记录这两个是为了克隆
  208. self.k_ = args_use['k']
  209. self.score_func=args_use['score_func']
  210. self.k = {'k':args_use['k'],'score_func':args_use['score_func']}
  211. self.Model_Name = model
  212. class SelectFrom_Model(prep_Base):
  213. def __init__(self, args_use, Learner, *args, **kwargs): # model表示当前选用的模型类型,Alpha针对正则化的参数
  214. super(SelectFrom_Model, self).__init__(*args, **kwargs)
  215. self.Model = Learner.Model
  216. self.Select_Model = SelectFromModel(estimator=Learner.Model,max_features=args_use['k'],prefit=Learner.have_Fit)
  217. self.max_features = args_use['k']
  218. self.estimator=Learner.Model
  219. self.k = {'max_features':args_use['k'],'estimator':Learner.Model}
  220. self.Model_Name = 'SelectFrom_Model'
  221. def Fit(self, x_data,y_data, *args, **kwargs):
  222. if not self.have_Fit: # 不允许第二次训练
  223. self.Select_Model.fit(x_data,y_data)
  224. return 'None', 'None'
  225. def Predict(self, x_data):
  226. try:
  227. x_Predict = self.Select_Model.transform(x_data)
  228. return x_Predict,'模型特征工程'
  229. except:
  230. return np.array([]),'无结果工程'
  231. class Standardization_Model(prep_Base):#z-score标准化
  232. def __init__(self, args_use, model, *args, **kwargs):
  233. super(Standardization_Model, self).__init__(*args, **kwargs)
  234. self.Model = StandardScaler()
  235. self.k = {}
  236. self.Model_Name = 'StandardScaler'
  237. class MinMaxScaler_Model(prep_Base):#离差标准化
  238. def __init__(self, args_use, model, *args, **kwargs):
  239. super(MinMaxScaler_Model, self).__init__(*args, **kwargs)
  240. self.Model = MinMaxScaler(feature_range=args_use['feature_range'])
  241. self.k = {}
  242. self.Model_Name = 'MinMaxScaler'
  243. class LogScaler_Model(prep_Base):#对数标准化
  244. def __init__(self, args_use, model, *args, **kwargs):
  245. super(LogScaler_Model, self).__init__(*args, **kwargs)
  246. self.Model = None
  247. self.k = {}
  248. self.Model_Name = 'LogScaler'
  249. def Fit(self, x_data, *args, **kwargs):
  250. if not self.have_Fit: # 不允许第二次训练
  251. self.max_logx = np.log(x_data.max())
  252. return 'None', 'None'
  253. def Predict(self, x_data):
  254. try:
  255. max_logx = self.max_logx
  256. except:
  257. self.have_Fit = False
  258. self.Fit(x_data)
  259. max_logx = self.max_logx
  260. x_Predict = (np.log(x_data)/max_logx)
  261. return x_Predict,'对数变换'
  262. class atanScaler_Model(prep_Base):#对数标准化
  263. def __init__(self, args_use, model, *args, **kwargs):
  264. super(atanScaler_Model, self).__init__(*args, **kwargs)
  265. self.Model = None
  266. self.k = {}
  267. self.Model_Name = 'atanScaler'
  268. def Fit(self, x_data, *args, **kwargs):
  269. return 'None', 'None'
  270. def Predict(self, x_data):
  271. x_Predict = (np.arctan(x_data)*(2/np.pi))
  272. return x_Predict,'atan变换'
  273. class decimalScaler_Model(prep_Base):#小数定标准化
  274. def __init__(self, args_use, model, *args, **kwargs):
  275. super(decimalScaler_Model, self).__init__(*args, **kwargs)
  276. self.Model = None
  277. self.k = {}
  278. self.Model_Name = 'Decimal_normalization'
  279. def Fit(self, x_data, *args, **kwargs):
  280. if not self.have_Fit: # 不允许第二次训练
  281. self.j = max([judging_Digits(x_data.max()),judging_Digits(x_data.min())])
  282. return 'None', 'None'
  283. def Predict(self, x_data):
  284. try:
  285. j = self.j
  286. except:
  287. self.have_Fit = False
  288. self.Fit(x_data)
  289. j = self.j
  290. x_Predict = (x_data/(10**j))
  291. return x_Predict,'小数定标标准化'
  292. class Mapzoom_Model(prep_Base):#映射标准化
  293. def __init__(self, args_use, model, *args, **kwargs):
  294. super(Mapzoom_Model, self).__init__(*args, **kwargs)
  295. self.Model = None
  296. self.feature_range = args_use['feature_range']
  297. self.k = {}
  298. self.Model_Name = 'Decimal_normalization'
  299. def Fit(self, x_data, *args, **kwargs):
  300. if not self.have_Fit: # 不允许第二次训练
  301. self.max = x_data.max()
  302. self.min = x_data.min()
  303. return 'None', 'None'
  304. def Predict(self, x_data):
  305. try:
  306. max = self.max
  307. min = self.min
  308. except:
  309. self.have_Fit = False
  310. self.Fit(x_data)
  311. max = self.max
  312. min = self.min
  313. x_Predict = (x_data * (self.feature_range[1] - self.feature_range[0])) / (max - min)
  314. return x_Predict,'映射标准化'
  315. class sigmodScaler_Model(prep_Base):#sigmod变换
  316. def __init__(self, args_use, model, *args, **kwargs):
  317. super(sigmodScaler_Model, self).__init__(*args, **kwargs)
  318. self.Model = None
  319. self.k = {}
  320. self.Model_Name = 'sigmodScaler_Model'
  321. def Fit(self, x_data, *args, **kwargs):
  322. return 'None', 'None'
  323. def Predict(self, x_data:np.array):
  324. x_Predict = (1/(1+np.exp(-x_data)))
  325. return x_Predict,'Sigmod变换'
  326. class Fuzzy_quantization_Model(prep_Base):#模糊量化标准化
  327. def __init__(self, args_use, model, *args, **kwargs):
  328. super(Fuzzy_quantization_Model, self).__init__(*args, **kwargs)
  329. self.Model = None
  330. self.feature_range = args_use['feature_range']
  331. self.k = {}
  332. self.Model_Name = 'Fuzzy_quantization'
  333. def Fit(self, x_data, *args, **kwargs):
  334. if not self.have_Fit: # 不允许第二次训练
  335. self.max = x_data.max()
  336. self.min = x_data.min()
  337. return 'None', 'None'
  338. def Predict(self, x_data,*args,**kwargs):
  339. try:
  340. max = self.max
  341. min = self.min
  342. except:
  343. self.have_Fit = False
  344. self.Fit(x_data)
  345. max = self.max
  346. min = self.min
  347. x_Predict = 1 / 2 + (1 / 2) * np.sin(np.pi / (max - min) * (x_data - (max-min) / 2))
  348. return x_Predict,'映射标准化'
  349. class Regularization_Model(prep_Base):#离差标准化
  350. def __init__(self, args_use, model, *args, **kwargs):
  351. super(Regularization_Model, self).__init__(*args, **kwargs)
  352. self.Model = Normalizer(norm=args_use['norm'])
  353. self.k = {'norm':args_use['norm']}
  354. self.Model_Name = 'Regularization'
  355. class Binarizer_Model(prep_Base):#二值化
  356. def __init__(self, args_use, model, *args, **kwargs):
  357. super(Binarizer_Model, self).__init__(*args, **kwargs)
  358. self.Model = Binarizer(threshold=args_use['threshold'])
  359. self.k = {}
  360. self.Model_Name = 'Binarizer'
  361. class Discretization_Model(prep_Base):#n值离散
  362. def __init__(self, args_use, model, *args, **kwargs):
  363. super(Discretization_Model, self).__init__(*args, **kwargs)
  364. self.Model = None
  365. range_ = args_use['split_range']
  366. if range_ == []:raise Exception
  367. elif len(range_) == 1:range_.append(range_[0])
  368. self.range = range_
  369. self.k = {}
  370. self.Model_Name = 'Discretization'
  371. def Fit(self,*args,**kwargs):
  372. return 'None','None'
  373. def Predict(self,x_data):
  374. x_Predict = x_data.copy()#复制
  375. range_ = self.range
  376. bool_list = []
  377. max_ = len(range_) - 1
  378. o_t = None
  379. for i in range(len(range_)):
  380. try:
  381. t = float(range_[i])
  382. except:continue
  383. if o_t == None:#第一个参数
  384. bool_list.append(x_Predict <= t)
  385. else:
  386. bool_list.append((o_t <= x_Predict) == (x_Predict < t))
  387. if i == max_:
  388. bool_list.append(t <= x_Predict)
  389. o_t = t
  390. for i in range(len(bool_list)):
  391. x_Predict[bool_list[i]] = i
  392. return x_Predict,f'{len(bool_list)}值离散化'
  393. class Label_Model(prep_Base):#数字编码
  394. def __init__(self, args_use, model, *args, **kwargs):
  395. super(Label_Model, self).__init__(*args, **kwargs)
  396. self.Model = []
  397. self.k = {}
  398. self.Model_Name = 'LabelEncoder'
  399. def Fit(self,x_data,*args, **kwargs):
  400. if not self.have_Fit: # 不允许第二次训练
  401. if x_data.ndim == 1:x_data = np.array([x_data])
  402. for i in range(x_data.shape[1]):
  403. self.Model.append(LabelEncoder().fit(np.ravel(x_data[:,i])))#训练机器
  404. return 'None', 'None'
  405. def Predict(self, x_data):
  406. x_Predict = x_data.copy()
  407. if x_data.ndim == 1: x_data = np.array([x_data])
  408. for i in range(x_data.shape[1]):
  409. x_Predict[:,i] = self.Model[i].transform(x_data[:,i])
  410. return x_Predict,'数字编码'
  411. class OneHotEncoder_Model(prep_Base):#独热编码
  412. def __init__(self, args_use, model, *args, **kwargs):
  413. super(OneHotEncoder_Model, self).__init__(*args, **kwargs)
  414. self.Model = []
  415. self.ndim_up = args_use['ndim_up']
  416. self.k = {}
  417. self.Model_Name = 'OneHotEncoder'
  418. def Fit(self,x_data,*args, **kwargs):
  419. if not self.have_Fit: # 不允许第二次训练
  420. if x_data.ndim == 1:x_data = [x_data]
  421. for i in range(x_data.shape[1]):
  422. data = np.expand_dims(x_data[:,i], axis=1)#独热编码需要升维
  423. self.Model.append(OneHotEncoder().fit(data))#训练机器
  424. return 'None', 'None'
  425. def Predict(self, x_data):
  426. x_new = []
  427. for i in range(x_data.shape[1]):
  428. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  429. oneHot = self.Model[i].transform(data).toarray().tolist()
  430. print(len(oneHot),oneHot)
  431. x_new.append(oneHot)#添加到列表中
  432. x_new = DataFrame(x_new).to_numpy()#新列表的行数据是原data列数据的独热码(只需要ndim=2,暂时没想到numpy的做法)
  433. x_Predict = []
  434. for i in range(x_new.shape[1]):
  435. x_Predict.append(x_new[:,i])
  436. x_Predict = np.array(x_Predict)#转换回array
  437. if not self.ndim_up:#需要降维操作
  438. print('Q')
  439. new_xPredict = []
  440. for i in x_Predict:
  441. new_list = []
  442. list_ = i.tolist()
  443. for a in list_:
  444. new_list += a
  445. new = np.array(new_list)
  446. new_xPredict.append(new)
  447. return np.array(new_xPredict),'独热编码'
  448. return x_Predict,'独热编码'#不需要降维
  449. class Missed_Model(prep_Base):#缺失数据补充
  450. def __init__(self, args_use, model, *args, **kwargs):
  451. super(Missed_Model, self).__init__(*args, **kwargs)
  452. self.Model = SimpleImputer(missing_values=args_use['miss_value'], strategy=args_use['fill_method'],
  453. fill_value=args_use['fill_value'])
  454. self.k = {}
  455. self.Model_Name = 'Missed'
  456. def Fit(self, x_data, *args, **kwargs):
  457. if not self.have_Fit: # 不允许第二次训练
  458. self.Model.fit(x_data)
  459. return 'None', 'None'
  460. def Predict(self, x_data):
  461. x_Predict = self.Model.transform(x_data)
  462. return x_Predict,'填充缺失'
  463. class PCA_Model(prep_Base):
  464. def __init__(self, args_use, model, *args, **kwargs):
  465. super(PCA_Model, self).__init__(*args, **kwargs)
  466. self.Model = PCA(n_components=args_use['n_components'])
  467. self.n_components = args_use['n_components']
  468. self.k = {'n_components':args_use['n_components']}
  469. self.Model_Name = 'PCA'
  470. def Fit(self, x_data, *args, **kwargs):
  471. if not self.have_Fit: # 不允许第二次训练
  472. self.Model.fit(x_data)
  473. return 'None', 'None'
  474. def Predict(self, x_data):
  475. x_Predict = self.Model.transform(x_data)
  476. return x_Predict,'PCA'
  477. class RPCA_Model(prep_Base):
  478. def __init__(self, args_use, model, *args, **kwargs):
  479. super(RPCA_Model, self).__init__(*args, **kwargs)
  480. self.Model = IncrementalPCA(n_components=args_use['n_components'])
  481. self.n_components = args_use['n_components']
  482. self.k = {'n_components': args_use['n_components']}
  483. self.Model_Name = 'RPCA'
  484. def Fit(self, x_data, *args, **kwargs):
  485. if not self.have_Fit: # 不允许第二次训练
  486. self.Model.fit(x_data)
  487. return 'None', 'None'
  488. def Predict(self, x_data):
  489. x_Predict = self.Model.transform(x_data)
  490. return x_Predict,'RPCA'
  491. class KPCA_Model(prep_Base):
  492. def __init__(self, args_use, model, *args, **kwargs):
  493. super(KPCA_Model, self).__init__(*args, **kwargs)
  494. self.Model = KernelPCA(n_components=args_use['n_components'], kernel=args_use['kernel'])
  495. self.n_components = args_use['n_components']
  496. self.kernel = args_use['kernel']
  497. self.k = {'n_components': args_use['n_components'],'kernel':args_use['kernel']}
  498. self.Model_Name = 'KPCA'
  499. def Fit(self, x_data, *args, **kwargs):
  500. if not self.have_Fit: # 不允许第二次训练
  501. self.Model.fit(x_data)
  502. return 'None', 'None'
  503. def Predict(self, x_data):
  504. x_Predict = self.Model.transform(x_data)
  505. return x_Predict,'KPCA'
  506. class LDA_Model(prep_Base):
  507. def __init__(self, args_use, model, *args, **kwargs):
  508. super(LDA_Model, self).__init__(*args, **kwargs)
  509. self.Model = LDA(n_components=args_use['n_components'])
  510. self.n_components = args_use['n_components']
  511. self.k = {'n_components': args_use['n_components']}
  512. self.Model_Name = 'LDA'
  513. def Fit(self, x_data,y_data, *args, **kwargs):
  514. if not self.have_Fit: # 不允许第二次训练
  515. self.Model.fit(x_data,y_data)
  516. return 'None', 'None'
  517. def Predict(self, x_data):
  518. x_Predict = self.Model.transform(x_data)
  519. return x_Predict,'LDA'
  520. class Machine_Learner(Learner):#数据处理者
  521. def __init__(self,*args, **kwargs):
  522. super().__init__(*args, **kwargs)
  523. self.Learner = {}#记录机器
  524. self.Learn_Dic = {'Line':Line_Model,
  525. 'Ridge':Line_Model,
  526. 'Lasso':Line_Model,
  527. 'LogisticRegression':LogisticRegression_Model,
  528. 'Knn_class':Knn_Model,
  529. 'Knn': Knn_Model,
  530. 'Tree_class': Tree_Model,
  531. 'Tree': Tree_Model,
  532. 'Variance':Variance_Model,
  533. 'SelectKBest':SelectKBest_Model,
  534. 'Z-Score':Standardization_Model,
  535. 'MinMaxScaler':MinMaxScaler_Model,
  536. 'LogScaler':LogScaler_Model,
  537. 'atanScaler':atanScaler_Model,
  538. 'decimalScaler':decimalScaler_Model,
  539. 'sigmodScaler':sigmodScaler_Model,
  540. 'Mapzoom':Mapzoom_Model,
  541. 'Fuzzy_quantization':Fuzzy_quantization_Model,
  542. 'Regularization':Regularization_Model,
  543. 'Binarizer':Binarizer_Model,
  544. 'Discretization':Discretization_Model,
  545. 'Label':Label_Model,
  546. 'OneHotEncoder':OneHotEncoder_Model,
  547. 'Missed':Missed_Model,
  548. 'PCA':PCA_Model,
  549. 'RPCA':RPCA_Model,
  550. 'KPCA':KPCA_Model,
  551. 'LDA':LDA_Model,
  552. }
  553. self.Learner_Type = {}#记录机器的类型
  554. def p_Args(self,Text,Type):#解析参数
  555. args = {}
  556. args_use = {}
  557. #输入数据
  558. exec(Text,args)
  559. #处理数据
  560. args_use['alpha'] = float(args.get('alpha',1.0))#L1和L2正则化用
  561. args_use['C'] = float(args.get('C', 1.0)) # L1和L2正则化用
  562. args_use['max_iter'] = int(args.get('max_iter', 1000)) # L1和L2正则化用
  563. args_use['n_neighbors'] = int(args.get('K_knn', 5))#knn邻居数 (命名不同)
  564. args_use['p'] = int(args.get('p', 2)) # 距离计算方式
  565. args_use['nDim_2'] = bool(args.get('nDim_2', True)) # 数据是否降维
  566. if Type == 'Tree':
  567. args_use['criterion'] = 'mse' if bool(args.get('is_MSE', True)) else 'mae' # 是否使用基尼不纯度
  568. else:
  569. args_use['criterion'] = 'gini' if bool(args.get('is_Gini', True)) else 'entropy' # 是否使用基尼不纯度
  570. args_use['splitter'] = 'random' if bool(args.get('is_random', False)) else 'best' # 决策树节点是否随机选用最优
  571. args_use['max_features'] = args.get('max_features', None) # 选用最多特征数
  572. args_use['max_depth'] = args.get('max_depth', None) # 最大深度
  573. args_use['min_samples_split'] = int(args.get('min_samples_split', 2)) # 是否继续划分(容易造成过拟合)
  574. args_use['P'] = float(args.get('min_samples_split', 0.8))
  575. args_use['k'] = args.get('k',1)
  576. args_use['score_func'] = ({'chi2':chi2,'f_classif':f_classif,'mutual_info_classif':mutual_info_classif,
  577. 'f_regression':f_regression,'mutual_info_regression':mutual_info_regression}.
  578. get(args.get('score_func','f_classif'),f_classif))
  579. args_use['feature_range'] = tuple(args.get('feature_range',(0,1)))
  580. args_use['norm'] = args.get('norm','l2')#正则化的方式L1或者L2
  581. args_use['threshold'] = float(args.get('threshold', 0.0)) # 二值化特征
  582. args_use['split_range'] = list(args.get('split_range', [0])) # 二值化特征
  583. args_use['ndim_up'] = bool(args.get('ndim_up', True))
  584. args_use['miss_value'] = args.get('miss_value',np.nan)
  585. args_use['fill_method'] = args.get('fill_method','mean')
  586. args_use['fill_value'] = args.get('fill_value',None)
  587. args_use['n_components'] = args.get('n_components',1)
  588. args_use['kernel'] = args.get('kernel','linear')
  589. return args_use
  590. def Add_Learner(self,Learner,Text=''):
  591. get = self.Learn_Dic[Learner]
  592. name = f'Le[{len(self.Learner)}]{Learner}'
  593. #参数调节
  594. args_use = self.p_Args(Text,Learner)
  595. #生成学习器
  596. self.Learner[name] = get(model=Learner,args_use=args_use)
  597. self.Learner_Type[name] = Learner
  598. def Add_SelectFrom_Model(self,Learner,Text=''):#Learner代表选中的学习器
  599. model = self.get_Learner(Learner)
  600. name = f'Le[{len(self.Learner)}]SelectFrom_Model'
  601. #参数调节
  602. args_use = self.p_Args(Text,'SelectFrom_Model')
  603. #生成学习器
  604. self.Learner[name] = SelectFrom_Model(Learner=model,args_use=args_use,Dic=self.Learn_Dic)
  605. self.Learner_Type[name] = 'SelectFrom_Model'
  606. def Return_Learner(self):
  607. return self.Learner.copy()
  608. def get_Learner(self,name):
  609. return self.Learner[name]
  610. def get_Learner_Type(self,name):
  611. return self.Learner_Type[name]
  612. def Fit(self,x_name,y_name,Learner,split=0.3,*args,**kwargs):
  613. x_data = self.get_Sheet(x_name)
  614. y_data = self.get_Sheet(y_name)
  615. model = self.get_Learner(Learner)
  616. return model.Fit(x_data,y_data,split)
  617. def Predict(self,x_name,Learner,Text='',**kwargs):
  618. x_data = self.get_Sheet(x_name)
  619. model = self.get_Learner(Learner)
  620. y_data,name = model.Predict(x_data)
  621. self.Add_Form(y_data,f'{x_name}:{name}')
  622. return y_data
  623. def Score(self,name_x,name_y,Learner):#Score_Only表示仅评分 Fit_Simp 是普遍类操作
  624. model = self.get_Learner(Learner)
  625. x = self.get_Sheet(name_x)
  626. y = self.get_Sheet(name_y)
  627. return model.Score(x,y)
  628. def Show_Args(self,Learner,Dic):#显示参数
  629. pass
  630. def Del_Leaner(self,Leaner):
  631. del self.Learner[Leaner]
  632. del self.Learner_Type[Leaner]
  633. def judging_Digits(num:(int,float)):
  634. a = str(abs(num)).split('.')[0]
  635. if a == '':raise ValueError
  636. return len(a)