template.py 184 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982
  1. import joblib
  2. import re
  3. import tarfile
  4. from abc import ABCMeta, abstractmethod
  5. from os import getcwd, mkdir
  6. from os.path import split as path_split, splitext, basename, exists
  7. from sklearn.feature_selection import (
  8. chi2,
  9. f_classif,
  10. mutual_info_classif,
  11. f_regression,
  12. mutual_info_regression,
  13. )
  14. from sklearn.svm import SVC, SVR # SVC是svm分类,SVR是svm回归
  15. from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
  16. from sklearn.manifold import TSNE
  17. from sklearn.neural_network import MLPClassifier, MLPRegressor
  18. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as Lda
  19. from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA, NMF
  20. from sklearn.impute import SimpleImputer
  21. from sklearn.preprocessing import *
  22. from sklearn.feature_selection import *
  23. from sklearn.metrics import *
  24. from sklearn.ensemble import (
  25. RandomForestClassifier,
  26. RandomForestRegressor,
  27. GradientBoostingClassifier,
  28. GradientBoostingRegressor,
  29. )
  30. import numpy as np
  31. import matplotlib.pyplot as plt
  32. from pandas import DataFrame, read_csv
  33. from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz
  34. from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
  35. from sklearn.linear_model import *
  36. from sklearn.model_selection import train_test_split
  37. from scipy.fftpack import fft, ifft # 快速傅里叶变换
  38. from scipy import optimize
  39. from scipy.cluster.hierarchy import dendrogram, ward
  40. from pyecharts.components import Table as TableFisrt # 绘制表格
  41. from pyecharts.options.series_options import JsCode
  42. from pyecharts.charts import Tab as tab_First, Line, Scatter, Bar
  43. from pyecharts.charts import *
  44. from pyecharts import options as opts
  45. from pyecharts.components import Image
  46. from pyecharts.globals import CurrentConfig
  47. from system import plugin_class_loading, get_path, plugin_func_loading
  48. CurrentConfig.ONLINE_HOST = f"{getcwd()}/assets/"
  49. # 设置
  50. np.set_printoptions(threshold=np.inf)
  51. global_setting = dict(
  52. toolbox_opts=opts.ToolboxOpts(is_show=True),
  53. legend_opts=opts.LegendOpts(pos_bottom="3%", type_="scroll"),
  54. )
  55. global_not_legend = dict(
  56. toolbox_opts=opts.ToolboxOpts(is_show=True),
  57. legend_opts=opts.LegendOpts(is_show=False),
  58. )
  59. label_setting = dict(label_opts=opts.LabelOpts(is_show=False))
  60. more_global = False # 是否使用全部特征绘图
  61. all_global = True # 是否导出charts
  62. csv_global = True # 是否导出CSV
  63. clf_global = True # 是否导出模型
  64. tar_global = True # 是否打包tar
  65. new_dir_global = True # 是否新建目录
  66. class LearnBase:
  67. def __init__(self, *args, **kwargs):
  68. self.numpy_dict = {} # name:numpy
  69. self.fucn_add() # 制作Func_Dic
  70. def fucn_add(self):
  71. self.func_dict = {
  72. "abs": lambda x, y: np.abs(x),
  73. "sqrt": lambda x, y: np.sqrt(x),
  74. "pow": lambda x, y: x ** y,
  75. "loge": lambda x, y: np.log(x),
  76. "log10": lambda x, y: np.log10(x),
  77. "ceil": lambda x, y: np.ceil(x),
  78. "floor": lambda x, y: np.floor(x),
  79. "rint": lambda x, y: np.rint(x),
  80. "sin": lambda x, y: np.sin(x),
  81. "cos": lambda x, y: np.cos(x),
  82. "tan": lambda x, y: np.tan(x),
  83. "tanh": lambda x, y: np.tanh(x),
  84. "sinh": lambda x, y: np.sinh(x),
  85. "cosh": lambda x, y: np.cosh(x),
  86. "asin": lambda x, y: np.arcsin(x),
  87. "acos": lambda x, y: np.arccos(x),
  88. "atan": lambda x, y: np.arctan(x),
  89. "atanh": lambda x, y: np.arctanh(x),
  90. "asinh": lambda x, y: np.arcsinh(x),
  91. "acosh": lambda x, y: np.arccosh(x),
  92. "add": lambda x, y: x + y, # 矩阵或元素
  93. "sub": lambda x, y: x - y, # 矩阵或元素
  94. "mul": lambda x, y: np.multiply(x, y), # 元素级别
  95. "matmul": lambda x, y: np.matmul(x, y), # 矩阵
  96. "dot": lambda x, y: np.dot(x, y), # 矩阵
  97. "div": lambda x, y: x / y,
  98. "div_floor": lambda x, y: np.floor_divide(x, y),
  99. "power": lambda x, y: np.power(x, y), # 元素级
  100. }
  101. def get_form(self) -> dict:
  102. return self.numpy_dict.copy()
  103. def get_sheet(self, name) -> np.array:
  104. return self.numpy_dict[name].copy()
  105. @plugin_class_loading(get_path(r"template/machinelearning"))
  106. class LearnerIO(LearnBase):
  107. def add_form(self, data: np.array, name):
  108. name = f"{name}[{len(self.numpy_dict)}]"
  109. self.numpy_dict[name] = data
  110. def read_csv(self, file_dir, name, encoding="utf-8", str_must=False, sep=","):
  111. dtype = np.str if str_must else np.float
  112. dataframe = read_csv(file_dir, encoding=encoding, delimiter=sep, header=None)
  113. try:
  114. data = dataframe.to_numpy(dtype=dtype)
  115. except ValueError:
  116. data = dataframe.to_numpy(dtype=np.str)
  117. if data.ndim == 1:
  118. data = np.expand_dims(data, axis=1)
  119. self.add_form(data, name)
  120. return data
  121. def add_python(self, python_file, sheet_name):
  122. name = {}
  123. name.update(globals().copy())
  124. name.update(locals().copy())
  125. exec(python_file, name)
  126. exec("get = Creat()", name)
  127. if isinstance(name["get"], np.array):
  128. get = name["get"]
  129. else:
  130. try:
  131. get = np.array(name["get"])
  132. except BaseException:
  133. get = np.array([name["get"]])
  134. self.add_form(get, sheet_name)
  135. return get
  136. def to_csv(self, save_dir: str, name, sep) -> str:
  137. get = self.get_sheet(name)
  138. np.savetxt(save_dir, get, delimiter=sep)
  139. return save_dir
  140. def to_html_one(self, name, html_dir=""):
  141. if html_dir == "":
  142. html_dir = f"{name}.html"
  143. get = self.get_sheet(name)
  144. if get.ndim == 1:
  145. get = np.expand_dims(get, axis=1)
  146. get = get.tolist()
  147. for i in range(len(get)):
  148. get[i] = [i + 1] + get[i]
  149. headers = [i for i in range(len(get[0]))]
  150. table = TableFisrt()
  151. table.add(headers, get).set_global_opts(
  152. title_opts=opts.ComponentTitleOpts(
  153. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  154. )
  155. )
  156. table.render(html_dir)
  157. return html_dir
  158. def to_html(self, name, html_dir="", html_type=0):
  159. if html_dir == "":
  160. html_dir = f"{name}.html"
  161. # 把要画的sheet放到第一个
  162. sheet_dict = self.get_form()
  163. del sheet_dict[name]
  164. sheet_list = [name] + list(sheet_dict.keys())
  165. class TabBase:
  166. def __init__(self, q):
  167. self.tab = q # 一个Tab
  168. def render(self, render_dir):
  169. return self.tab.render(render_dir)
  170. # 生成一个显示页面
  171. if html_type == 0:
  172. class NewTab(TabBase):
  173. def add(self, table, k, *f):
  174. self.tab.add(table, k)
  175. tab = NewTab(tab_First(page_title="CoTan:查看表格")) # 一个Tab
  176. elif html_type == 1:
  177. class NewTab(TabBase):
  178. def add(self, table, *k):
  179. self.tab.add(table)
  180. tab = NewTab(Page(page_title="CoTan:查看表格", layout=Page.DraggablePageLayout))
  181. else:
  182. class NewTab(TabBase):
  183. def add(self, table, *k):
  184. self.tab.add(table)
  185. tab = NewTab(Page(page_title="CoTan:查看表格", layout=Page.SimplePageLayout))
  186. # 迭代添加内容
  187. for name in sheet_list:
  188. get = self.get_sheet(name)
  189. if get.ndim == 1:
  190. get = np.expand_dims(get, axis=1)
  191. get = get.tolist()
  192. for i in range(len(get)):
  193. get[i] = [i + 1] + get[i]
  194. headers = [i for i in range(len(get[0]))]
  195. table = TableFisrt()
  196. table.add(headers, get).set_global_opts(
  197. title_opts=opts.ComponentTitleOpts(
  198. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  199. )
  200. )
  201. tab.add(table, f"表格:{name}")
  202. tab.render(html_dir)
  203. return html_dir
  204. @plugin_class_loading(get_path(r"template/machinelearning"))
  205. class LearnerMerge(LearnerIO):
  206. def merge(self, name, axis=0): # aiis:0-横向合并(hstack),1-纵向合并(vstack),2-深度合并
  207. sheet_list = []
  208. for i in name:
  209. sheet_list.append(self.get_sheet(i))
  210. get = {0: np.hstack, 1: np.vstack, 2: np.dstack}[axis](sheet_list)
  211. self.add_form(np.array(get), f"{name[0]}合成")
  212. @plugin_class_loading(get_path(r"template/machinelearning"))
  213. class LearnerSplit(LearnerIO):
  214. def split(self, name, split=2, axis=0): # aiis:0-横向分割(hsplit),1-纵向分割(vsplit)
  215. sheet = self.get_sheet(name)
  216. get = {0: np.hsplit, 1: np.vsplit, 2: np.dsplit}[axis](sheet, split)
  217. for i in get:
  218. self.add_form(i, f"{name[0]}分割")
  219. def two_split(self, name, split, axis): # 二分切割(0-横向,1-纵向)
  220. sheet = self.get_sheet(name)
  221. try:
  222. split = float(eval(split))
  223. if split < 1:
  224. split = int(split * len(sheet) if axis == 1 else len(sheet[0]))
  225. else:
  226. raise Exception
  227. except BaseException:
  228. split = int(split)
  229. if axis == 0:
  230. self.add_form(sheet[:, split:], f"{name[0]}分割")
  231. self.add_form(sheet[:, :split], f"{name[0]}分割")
  232. @plugin_class_loading(get_path(r"template/machinelearning"))
  233. class LearnerDimensions(LearnerIO):
  234. def deep(self, sheet: np.ndarray):
  235. return sheet.ravel()
  236. def down_ndim(self, sheet: np.ndarray): # 横向
  237. down_list = []
  238. for i in sheet:
  239. down_list.append(i.ravel())
  240. return np.array(down_list)
  241. def longitudinal_down_ndim(self, sheet: np.ndarray): # 纵向
  242. down_list = []
  243. for i in range(len(sheet[0])):
  244. down_list.append(sheet[:, i].ravel())
  245. return np.array(down_list).T
  246. def reval(self, name, axis): # axis:0-横向,1-纵向(带.T),2-深度
  247. sheet = self.get_sheet(name)
  248. self.add_form(
  249. {0: self.down_ndim, 1: self.longitudinal_down_ndim, 2: self.deep}[axis](
  250. sheet
  251. ).copy(),
  252. f"{name}伸展",
  253. )
  254. def del_ndim(self, name): # 删除无用维度
  255. sheet = self.get_sheet(name)
  256. self.add_form(np.squeeze(sheet), f"{name}降维")
  257. @plugin_class_loading(get_path(r"template/machinelearning"))
  258. class LearnerShape(LearnerIO):
  259. def transpose(self, name, func: list):
  260. sheet = self.get_sheet(name)
  261. if sheet.ndim <= 2:
  262. self.add_form(sheet.transpose.copy(), f"{name}.T")
  263. else:
  264. self.add_form(np.transpose(sheet, func).copy(), f"{name}.T")
  265. def reshape(self, name, shape: list):
  266. sheet = self.get_sheet(name)
  267. self.add_form(sheet.reshape(shape).copy(), f"{name}.r")
  268. @plugin_class_loading(get_path(r"template/machinelearning"))
  269. class Learner(LearnerMerge, LearnerSplit, LearnerDimensions, LearnerShape):
  270. def calculation_matrix(self, data, data_type, func):
  271. if 1 not in data_type:
  272. raise Exception
  273. func = self.func_dict.get(func, lambda x, y: x)
  274. args_data = []
  275. for i in range(len(data)):
  276. if data_type[i] == 0:
  277. args_data.append(data[i])
  278. else:
  279. args_data.append(self.get_sheet(data[i]))
  280. get = func(*args_data)
  281. self.add_form(get, f"{func}({data[0]},{data[1]})")
  282. return get
  283. class Machinebase(metaclass=ABCMeta):
  284. def __init__(self, *args, **kwargs):
  285. self.model = None
  286. self.have_fit = False
  287. self.have_predict = False
  288. self.x_traindata = None
  289. self.y_traindata = None
  290. # 有监督学习专有的testData
  291. self.x_testdata = None
  292. self.y_testdata = None
  293. # 记录这两个是为了克隆
  294. @abstractmethod
  295. def fit_model(self, x_data, y_data, split, increment, kwargs):
  296. pass
  297. @abstractmethod
  298. def score(self, x_data, y_data):
  299. pass
  300. @abstractmethod
  301. def class_score(self, save_dir, x_data, y_really):
  302. pass
  303. def _accuracy(self, y_predict, y_really): # 准确率
  304. return accuracy_score(y_really, y_predict)
  305. def _macro(self, y_predict, y_really):
  306. func = [recall_score, precision_score, f1_score] # 召回率,精确率和f1
  307. class_ = np.unique(y_really).tolist()
  308. result = func[func](y_really, y_predict, class_, average=None)
  309. return result, class_
  310. def _confusion_matrix(self, y_predict, y_really): # 混淆矩阵
  311. class_ = np.unique(y_really).tolist()
  312. return confusion_matrix(y_really, y_predict), class_
  313. def _kappa_score(self, y_predict, y_really):
  314. return cohen_kappa_score(y_really, y_predict)
  315. @abstractmethod
  316. def regression_score(self, save_dir, x_data, y_really):
  317. pass
  318. @abstractmethod
  319. def clusters_score(self, save_dir, x_data, args):
  320. pass
  321. def _mse(self, y_predict, y_really): # 均方误差
  322. return mean_squared_error(y_really, y_predict)
  323. def _mae(self, y_predict, y_really): # 中值绝对误差
  324. return median_absolute_error(y_really, y_predict)
  325. def _r2_score(self, y_predict, y_really): # 中值绝对误差
  326. return r2_score(y_really, y_predict)
  327. def _rmse(self, y_predict, y_really): # 中值绝对误差
  328. return self._mse(y_predict, y_really) ** 0.5
  329. def _coefficient_clustering(self, x_data, y_predict):
  330. means_score = silhouette_score(x_data, y_predict)
  331. outline_score = silhouette_samples(x_data, y_predict)
  332. return means_score, outline_score
  333. @abstractmethod
  334. def predict(self, x_data, args, kwargs):
  335. pass
  336. @abstractmethod
  337. def data_visualization(self, save_dir, args, kwargs):
  338. pass
  339. @plugin_class_loading(get_path(r"template/machinelearning"))
  340. class StudyMachinebase(Machinebase):
  341. def fit_model(self, x_data, y_data, split=0.3, increment=True, **kwargs):
  342. y_data = y_data.ravel()
  343. try:
  344. if self.x_traindata is None or not increment:
  345. raise Exception
  346. self.x_traindata = np.vstack(x_data, self.x_traindata)
  347. self.y_traindata = np.vstack(y_data, self.y_traindata)
  348. except BaseException:
  349. self.x_traindata = x_data.copy()
  350. self.y_traindata = y_data.copy()
  351. x_train, x_test, y_train, y_test = train_test_split(
  352. x_data, y_data, test_size=split
  353. )
  354. try: # 增量式训练
  355. if not increment:
  356. raise Exception
  357. self.model.partial_fit(x_data, y_data)
  358. except BaseException:
  359. self.model.fit(self.x_traindata, self.y_traindata)
  360. train_score = self.model.score(x_train, y_train)
  361. test_score = self.model.score(x_test, y_test)
  362. self.have_fit = True
  363. return train_score, test_score
  364. def score(self, x_data, y_data):
  365. score = self.model.score(x_data, y_data)
  366. return score
  367. def class_score(self, save_dir, x_data: np.ndarray, y_really: np.ndarray):
  368. y_really = y_really.ravel()
  369. y_predict = self.predict(x_data)[0]
  370. accuracy = self._accuracy(y_predict, y_really)
  371. recall, class_list = self._macro(y_predict, y_really)
  372. precision, class_list = self._macro(y_predict, y_really)
  373. f1, class_list = self._macro(y_predict, y_really)
  374. confusion_matrix, class_list = self._confusion_matrix(y_predict, y_really)
  375. kappa = self._kappa_score(y_predict, y_really)
  376. tab = Tab()
  377. def gauge_base(name: str, value: float) -> Gauge:
  378. c = (
  379. Gauge()
  380. .add("", [(name, round(value * 100, 2))], min_=0, max_=100)
  381. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  382. )
  383. return c
  384. tab.add(gauge_base("准确率", accuracy), "准确率")
  385. tab.add(gauge_base("kappa", kappa), "kappa")
  386. def bar_base(name, value) -> Bar:
  387. c = (
  388. Bar()
  389. .add_xaxis(class_list)
  390. .add_yaxis(name, value, **label_setting)
  391. .set_global_opts(
  392. title_opts=opts.TitleOpts(title=name), **global_setting
  393. )
  394. )
  395. return c
  396. tab.add(bar_base("精确率", precision.tolist()), "精确率")
  397. tab.add(bar_base("召回率", recall.tolist()), "召回率")
  398. tab.add(bar_base("F1", f1.tolist()), "F1")
  399. def heatmap_base(name, value, max_, min_, show) -> HeatMap:
  400. c = (
  401. HeatMap()
  402. .add_xaxis(class_list)
  403. .add_yaxis(
  404. name,
  405. class_list,
  406. value,
  407. label_opts=opts.LabelOpts(is_show=show, position="inside"),
  408. )
  409. .set_global_opts(
  410. title_opts=opts.TitleOpts(title=name),
  411. **global_setting,
  412. visualmap_opts=opts.VisualMapOpts(
  413. max_=max_, min_=min_, pos_right="3%"
  414. ),
  415. )
  416. )
  417. return c
  418. value = [
  419. [class_list[i], class_list[j], float(confusion_matrix[i, j])]
  420. for i in range(len(class_list))
  421. for j in range(len(class_list))
  422. ]
  423. tab.add(
  424. heatmap_base(
  425. "混淆矩阵",
  426. value,
  427. float(confusion_matrix.max()),
  428. float(confusion_matrix.min()),
  429. len(class_list) < 7,
  430. ),
  431. "混淆矩阵",
  432. )
  433. des_to_csv(save_dir, "混淆矩阵", confusion_matrix, class_list, class_list)
  434. des_to_csv(
  435. save_dir, "评分", [precision, recall, f1], class_list, ["精确率", "召回率", "F1"]
  436. )
  437. save = save_dir + r"/分类模型评估.HTML"
  438. tab.render(save)
  439. return save,
  440. def regression_score(self, save_dir, x_data: np.ndarray, y_really: np.ndarray):
  441. y_really = y_really.ravel()
  442. y_predict = self.predict(x_data)[0]
  443. tab = Tab()
  444. mse = self._mse(y_predict, y_really)
  445. mae = self._mae(y_predict, y_really)
  446. r2_score = self._r2_score(y_predict, y_really)
  447. rmse = self._rmse(y_predict, y_really)
  448. tab.add(
  449. make_tab(["MSE", "MAE", "RMSE", "r2_Score"], [[mse, mae, rmse, r2_score]]),
  450. "评估数据",
  451. )
  452. save = save_dir + r"/回归模型评估.HTML"
  453. tab.render(save)
  454. return save,
  455. def clusters_score(self, save_dir, x_data: np.ndarray, *args):
  456. y_predict = self.predict(x_data)[0]
  457. tab = Tab()
  458. coefficient, coefficient_array = self._coefficient_clustering(x_data, y_predict)
  459. def gauge_base(name: str, value: float) -> Gauge:
  460. c = (
  461. Gauge()
  462. .add(
  463. "",
  464. [(name, round(value * 100, 2))],
  465. min_=0,
  466. max_=10 ** (judging_digits(value * 100)),
  467. )
  468. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  469. )
  470. return c
  471. def bar_base(name, value, xaxis) -> Bar:
  472. c = (
  473. Bar()
  474. .add_xaxis(xaxis)
  475. .add_yaxis(name, value, **label_setting)
  476. .set_global_opts(
  477. title_opts=opts.TitleOpts(title=name), **global_setting
  478. )
  479. )
  480. return c
  481. tab.add(gauge_base("平均轮廓系数", coefficient), "平均轮廓系数")
  482. def bar_(coefficient_array, name="数据轮廓系数"):
  483. xaxis = [f"数据{i}" for i in range(len(coefficient_array))]
  484. value = coefficient_array.tolist()
  485. tab.add(bar_base(name, value, xaxis), name)
  486. n = 20
  487. if len(coefficient_array) <= n:
  488. bar_(coefficient_array)
  489. elif len(coefficient_array) <= n ** 2:
  490. a = 0
  491. while a <= len(coefficient_array):
  492. b = a + n
  493. if b >= len(coefficient_array):
  494. b = len(coefficient_array) + 1
  495. cofe_array = coefficient_array[a:b]
  496. bar_(cofe_array, f"{a}-{b}数据轮廓系数")
  497. a += n
  498. else:
  499. split = np.hsplit(coefficient_array, n)
  500. a = 0
  501. for cofe_array in split:
  502. bar_(cofe_array, f"{a}%-{a + n}%数据轮廓系数")
  503. a += n
  504. save = save_dir + r"/聚类模型评估.HTML"
  505. tab.render(save)
  506. return save,
  507. def predict(self, x_data, *args, **kwargs):
  508. self.x_testdata = x_data.copy()
  509. y_predict = self.model.predict(x_data,)
  510. self.y_testdata = y_predict.copy()
  511. self.have_predict = True
  512. return y_predict, "预测"
  513. def data_visualization(self, save_dir, *args, **kwargs):
  514. return save_dir,
  515. class PrepBase(StudyMachinebase): # 不允许第二次训练
  516. def __init__(self, *args, **kwargs):
  517. super(PrepBase, self).__init__(*args, **kwargs)
  518. self.model = None
  519. def fit_model(self, x_data, y_data, increment=True, *args, **kwargs):
  520. if not self.have_predict: # 不允许第二次训练
  521. y_data = y_data.ravel()
  522. try:
  523. if self.x_traindata is None or not increment:
  524. raise Exception
  525. self.x_traindata = np.vstack(x_data, self.x_traindata)
  526. self.y_traindata = np.vstack(y_data, self.y_traindata)
  527. except BaseException:
  528. self.x_traindata = x_data.copy()
  529. self.y_traindata = y_data.copy()
  530. try: # 增量式训练
  531. if not increment:
  532. raise Exception
  533. self.model.partial_fit(x_data, y_data)
  534. except BaseException:
  535. self.model.fit(self.x_traindata, self.y_traindata)
  536. self.have_fit = True
  537. return "None", "None"
  538. def predict(self, x_data, *args, **kwargs):
  539. self.x_testdata = x_data.copy()
  540. x_predict = self.model.transform(x_data)
  541. self.y_testdata = x_predict.copy()
  542. self.have_predict = True
  543. return x_predict, "特征工程"
  544. def score(self, x_data, y_data):
  545. return "None" # 没有score
  546. class Unsupervised(PrepBase): # 无监督,不允许第二次训练
  547. def fit_model(self, x_data, increment=True, *args, **kwargs):
  548. if not self.have_predict: # 不允许第二次训练
  549. self.y_traindata = None
  550. try:
  551. if self.x_traindata is None or not increment:
  552. raise Exception
  553. self.x_traindata = np.vstack(x_data, self.x_traindata)
  554. except BaseException:
  555. self.x_traindata = x_data.copy()
  556. try: # 增量式训练
  557. if not increment:
  558. raise Exception
  559. self.model.partial_fit(x_data)
  560. except BaseException:
  561. self.model.fit(self.x_traindata, self.y_traindata)
  562. self.have_fit = True
  563. return "None", "None"
  564. class UnsupervisedModel(PrepBase): # 无监督
  565. def fit_model(self, x_data, increment=True, *args, **kwargs):
  566. self.y_traindata = None
  567. try:
  568. if self.x_traindata is None or not increment:
  569. raise Exception
  570. self.x_traindata = np.vstack(x_data, self.x_traindata)
  571. except BaseException:
  572. self.x_traindata = x_data.copy()
  573. try: # 增量式训练
  574. if not increment:
  575. raise Exception
  576. self.model.partial_fit(x_data)
  577. except BaseException:
  578. self.model.fit(self.x_traindata, self.y_traindata)
  579. self.have_fit = True
  580. return "None", "None"
  581. @plugin_class_loading(get_path(r"template/machinelearning"))
  582. class ToPyebase(StudyMachinebase):
  583. def __init__(self, model, *args, **kwargs):
  584. super(ToPyebase, self).__init__(*args, **kwargs)
  585. self.model = None
  586. # 记录这两个是为了克隆
  587. self.k = {}
  588. self.model_Name = model
  589. def fit_model(self, x_data, y_data, *args, **kwargs):
  590. self.x_traindata = x_data.copy()
  591. self.y_traindata = y_data.ravel().copy()
  592. self.have_fit = True
  593. return "None", "None"
  594. def predict(self, x_data, *args, **kwargs):
  595. self.have_predict = True
  596. return np.array([]), "请使用训练"
  597. def score(self, x_data, y_data):
  598. return "None" # 没有score
  599. class DataAnalysis(ToPyebase): # 数据分析
  600. def data_visualization(self, save_dir, *args, **kwargs):
  601. tab = Tab()
  602. data = self.x_traindata
  603. def cumulative_calculation(tab_data, func, name, render_tab):
  604. sum_list = []
  605. for i in range(len(tab_data)): # 按行迭代数据
  606. sum_list.append([])
  607. for a in range(len(tab_data[i])):
  608. s = num_str(func(tab_data[: i + 1, a]), 8)
  609. sum_list[-1].append(s)
  610. des_to_csv(save_dir, f"{name}", sum_list)
  611. render_tab.add(
  612. make_tab([f"[{i}]" for i in range(len(sum_list[0]))], sum_list),
  613. f"{name}",
  614. )
  615. def geometric_mean(x):
  616. return np.power(np.prod(x), 1 / len(x)) # 几何平均数
  617. def square_mean(x):
  618. return np.sqrt(np.sum(np.power(x, 2)) / len(x)) # 平方平均数
  619. def harmonic_mean(x):
  620. return len(x) / np.sum(np.power(x, -1)) # 调和平均数
  621. cumulative_calculation(data, np.sum, "累计求和", tab)
  622. cumulative_calculation(data, np.var, "累计方差", tab)
  623. cumulative_calculation(data, np.std, "累计标准差", tab)
  624. cumulative_calculation(data, np.mean, "累计算术平均值", tab)
  625. cumulative_calculation(data, geometric_mean, "累计几何平均值", tab)
  626. cumulative_calculation(data, square_mean, "累计平方平均值", tab)
  627. cumulative_calculation(data, harmonic_mean, "累计调和平均值", tab)
  628. cumulative_calculation(data, np.median, "累计中位数", tab)
  629. cumulative_calculation(data, np.max, "累计最大值", tab)
  630. cumulative_calculation(data, np.min, "累计最小值", tab)
  631. save = save_dir + r"/数据分析.HTML"
  632. tab.render(save) # 生成HTML
  633. return save,
  634. class Corr(ToPyebase): # 相关性和协方差
  635. def data_visualization(self, save_dir, *args, **kwargs):
  636. tab = Tab()
  637. data = DataFrame(self.x_traindata)
  638. corr = data.corr().to_numpy() # 相关性
  639. cov = data.cov().to_numpy() # 协方差
  640. def heat_map(data, name: str, max_, min_):
  641. x = [f"特征[{i}]" for i in range(len(data))]
  642. y = [f"特征[{i}]" for i in range(len(data[0]))]
  643. value = [
  644. (f"特征[{i}]", f"特征[{j}]", float(data[i][j]))
  645. for i in range(len(data))
  646. for j in range(len(data[i]))
  647. ]
  648. c = (
  649. HeatMap()
  650. .add_xaxis(x)
  651. # 如果特征太多则不显示标签
  652. .add_yaxis(
  653. f"数据",
  654. y,
  655. value,
  656. label_opts=opts.LabelOpts(
  657. is_show=True if len(x) <= 10 else False, position="inside"
  658. ),
  659. )
  660. .set_global_opts(
  661. title_opts=opts.TitleOpts(title="矩阵热力图"),
  662. **global_not_legend,
  663. yaxis_opts=opts.AxisOpts(
  664. is_scale=True, type_="category"
  665. ), # 'category'
  666. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  667. visualmap_opts=opts.VisualMapOpts(
  668. is_show=True, max_=max_, min_=min_, pos_right="3%"
  669. ),
  670. ) # 显示
  671. )
  672. tab.add(c, name)
  673. heat_map(corr, "相关性热力图", 1, -1)
  674. heat_map(cov, "协方差热力图", float(cov.max()), float(cov.min()))
  675. des_to_csv(save_dir, f"相关性矩阵", corr)
  676. des_to_csv(save_dir, f"协方差矩阵", cov)
  677. save = save_dir + r"/数据相关性.HTML"
  678. tab.render(save) # 生成HTML
  679. return save,
  680. class ViewData(ToPyebase): # 绘制预测型热力图
  681. def __init__(
  682. self, args_use, learner, *args, **kwargs
  683. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  684. super(ViewData, self).__init__(args_use, learner, *args, **kwargs)
  685. self.model = learner.Model
  686. self.Select_Model = None
  687. self.have_fit = learner.have_Fit
  688. self.model_Name = "Select_Model"
  689. self.learner = learner
  690. self.learner_name = learner.Model_Name
  691. def fit_model(self, *args, **kwargs):
  692. self.have_fit = True
  693. return "None", "None"
  694. def predict(self, x_data, add_func=None, *args, **kwargs):
  695. x_traindata = self.learner.x_traindata
  696. y_traindata = self.learner.y_traindata
  697. x_name = self.learner_name
  698. if x_traindata is not None:
  699. add_func(x_traindata, f"{x_name}:x训练数据")
  700. try:
  701. x_testdata = self.x_testdata
  702. if x_testdata is not None:
  703. add_func(x_testdata, f"{x_name}:x测试数据")
  704. except BaseException:
  705. pass
  706. try:
  707. y_testdata = self.y_testdata.copy()
  708. if y_testdata is not None:
  709. add_func(y_testdata, f"{x_name}:y测试数据")
  710. except BaseException:
  711. pass
  712. self.have_fit = True
  713. if y_traindata is None:
  714. return np.array([]), "y训练数据"
  715. return y_traindata, "y训练数据"
  716. def data_visualization(self, save_dir, *args, **kwargs):
  717. return save_dir,
  718. class MatrixScatter(ToPyebase): # 矩阵散点图
  719. def data_visualization(self, save_dir, *args, **kwargs):
  720. tab = Tab()
  721. data = self.x_traindata
  722. if data.ndim <= 2: # 维度为2
  723. c = (
  724. Scatter()
  725. .add_xaxis([f"{i}" for i in range(data.shape[1])])
  726. .set_global_opts(
  727. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  728. )
  729. )
  730. if data.ndim == 2:
  731. for num in range(len(data)):
  732. i = data[num]
  733. c.add_yaxis(f"{num}", [[f"{num}", x] for x in i], color="#FFFFFF")
  734. else:
  735. c.add_yaxis(f"0", [[0, x] for x in data], color="#FFFFFF")
  736. c.set_series_opts(
  737. label_opts=opts.LabelOpts(
  738. is_show=True,
  739. color="#000000",
  740. position="inside",
  741. formatter=JsCode("function(params){return params.data[2];}"),
  742. )
  743. )
  744. elif data.ndim == 3:
  745. c = Scatter3D().set_global_opts(
  746. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  747. )
  748. for num in range(len(data)):
  749. i = data[num]
  750. for s_num in range(len(i)):
  751. s = i[s_num]
  752. y_data = [[num, s_num, x, float(s[x])] for x in range(len(s))]
  753. c.add(
  754. f"{num}", y_data, zaxis3d_opts=opts.Axis3DOpts(type_="category")
  755. )
  756. c.set_series_opts(
  757. label_opts=opts.LabelOpts(
  758. is_show=True,
  759. color="#000000",
  760. position="inside",
  761. formatter=JsCode("function(params){return params.data[3];}"),
  762. )
  763. )
  764. else:
  765. c = Scatter()
  766. tab.add(c, "矩阵散点图")
  767. save = save_dir + r"/矩阵散点图.HTML"
  768. tab.render(save) # 生成HTML
  769. return save,
  770. class ClusterTree(ToPyebase): # 聚类树状图
  771. def data_visualization(self, save_dir, *args, **kwargs):
  772. tab = Tab()
  773. x_data = self.x_traindata
  774. linkage_array = ward(x_data) # self.y_traindata是结果
  775. dendrogram(linkage_array)
  776. plt.savefig(save_dir + r"/Cluster_graph.png")
  777. image = Image()
  778. image.add(src=save_dir + r"/Cluster_graph.png",).set_global_opts(
  779. title_opts=opts.ComponentTitleOpts(title="聚类树状图")
  780. )
  781. tab.add(image, "聚类树状图")
  782. save = save_dir + r"/聚类树状图.HTML"
  783. tab.render(save) # 生成HTML
  784. return save,
  785. class ClassBar(ToPyebase): # 类型柱状图
  786. def data_visualization(self, save_dir, *args, **kwargs):
  787. tab = Tab()
  788. x_data = self.x_traindata.transpose()
  789. y_data = self.y_traindata
  790. class_ = np.unique(y_data).tolist() # 类型
  791. class_list = []
  792. for n_class in class_: # 生成class_list(class是1,,也就是二维的,下面会压缩成一维)
  793. class_list.append(y_data == n_class)
  794. for num_i in range(len(x_data)): # 迭代每一个特征
  795. i = x_data[num_i]
  796. i_con = is_continuous(i)
  797. if i_con and len(i) >= 11:
  798. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  799. c_list = [[0] * 10 for _ in class_list]
  800. start = i.min()
  801. end = i.max()
  802. n = (end - start) / 10 # 生成10条柱子
  803. x_axis = [] # x轴
  804. iter_num = 0 # 迭代到第n个
  805. while iter_num <= 9: # 把每个特征分为10类进行迭代
  806. # x_axis添加数据
  807. x_axis.append(
  808. f"({iter_num})[{round(start, 2)}-"
  809. f"{round((start + n) if (start + n) <= end or not iter_num == 9 else end, 2)}]"
  810. )
  811. try:
  812. if iter_num == 9:
  813. raise Exception # 执行到第10次时,直接获取剩下的所有
  814. s = (start <= i) == (i < end) # 布尔索引
  815. except BaseException: # 因为start + n有超出end的风险
  816. s = (start <= i) == (i <= end) # 布尔索引
  817. # n_data = i[s] # 取得现在的特征数据
  818. for num in range(len(class_list)): # 根据类别进行迭代
  819. # 取得布尔数组:y_data == n_class也就是输出值为指定类型的bool矩阵,用于切片
  820. now_class: list = class_list[num]
  821. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  822. bool_class = now_class[s].ravel()
  823. # 用len计数 c_list = [[class1的数据],[class2的数据],[]]
  824. c_list[num][iter_num] = int(np.sum(bool_class))
  825. iter_num += 1
  826. start += n
  827. else:
  828. iter_np = np.unique(i)
  829. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  830. c_list = [[0] * len(iter_np) for _ in class_list]
  831. x_axis = [] # 添加x轴数据
  832. for i_num in range(len(iter_np)): # 迭代每一个i(不重复)
  833. i_data = iter_np[i_num]
  834. # n_data= i[i == i_data]#取得现在特征数据
  835. x_axis.append(f"[{i_data}]")
  836. for num in range(len(class_list)): # 根据类别进行迭代
  837. now_class = class_list[num] # 取得class_list的布尔数组
  838. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  839. bool_class = now_class[i == i_data]
  840. # 用len计数 c_list = [[class1的数据],[class2的数据],[]]
  841. c_list[num][i_num] = int(np.sum(bool_class).tolist())
  842. c = (
  843. Bar()
  844. .add_xaxis(x_axis)
  845. .set_global_opts(
  846. title_opts=opts.TitleOpts(title="类型-特征统计柱状图"),
  847. **global_setting,
  848. xaxis_opts=opts.AxisOpts(type_="category"),
  849. yaxis_opts=opts.AxisOpts(type_="value"),
  850. )
  851. )
  852. y_axis = []
  853. for i in range(len(c_list)):
  854. y_axis.append(f"{class_[i]}")
  855. c.add_yaxis(f"{class_[i]}", c_list[i], **label_setting)
  856. des_to_csv(save_dir, f"类型-[{num_i}]特征统计柱状图", c_list, x_axis, y_axis)
  857. tab.add(c, f"类型-[{num_i}]特征统计柱状图")
  858. # 未完成
  859. save = save_dir + r"/特征统计.HTML"
  860. tab.render(save) # 生成HTML
  861. return save,
  862. class NumpyHeatMap(ToPyebase): # Numpy矩阵绘制热力图
  863. def data_visualization(self, save_dir, *args, **kwargs):
  864. tab = Tab()
  865. data = self.x_traindata
  866. x = [f"横[{i}]" for i in range(len(data))]
  867. y = [f"纵[{i}]" for i in range(len(data[0]))]
  868. value = [
  869. (f"横[{i}]", f"纵[{j}]", float(data[i][j]))
  870. for i in range(len(data))
  871. for j in range(len(data[i]))
  872. ]
  873. c = (
  874. HeatMap()
  875. .add_xaxis(x)
  876. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  877. .set_global_opts(
  878. title_opts=opts.TitleOpts(title="矩阵热力图"),
  879. **global_not_legend,
  880. yaxis_opts=opts.AxisOpts(is_scale=True, type_="category"), # 'category'
  881. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  882. visualmap_opts=opts.VisualMapOpts(
  883. is_show=True,
  884. max_=float(data.max()),
  885. min_=float(data.min()),
  886. pos_right="3%",
  887. ),
  888. ) # 显示
  889. )
  890. tab.add(c, "矩阵热力图")
  891. tab.add(make_tab(x, data.transpose().tolist()), f"矩阵热力图:表格")
  892. save = save_dir + r"/矩阵热力图.HTML"
  893. tab.render(save) # 生成HTML
  894. return save,
  895. class PredictiveHeatmapBase(ToPyebase): # 绘制预测型热力图
  896. def __init__(
  897. self, args_use, learner, *args, **kwargs
  898. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  899. super(PredictiveHeatmapBase, self).__init__(args_use, learner, *args, **kwargs)
  900. self.model = learner.Model
  901. self.select_model = None
  902. self.have_fit = learner.have_Fit
  903. self.model_Name = "Select_Model"
  904. self.learner = learner
  905. self.x_traindata = learner.x_traindata.copy()
  906. self.y_traindata = learner.y_traindata.copy()
  907. self.means = []
  908. def fit_model(self, x_data, *args, **kwargs):
  909. try:
  910. self.means = x_data.ravel()
  911. except BaseException:
  912. pass
  913. self.have_fit = True
  914. return "None", "None"
  915. def data_visualization(
  916. self,
  917. save_dir,
  918. decision_boundary_func=None,
  919. prediction_boundary_func=None,
  920. *args,
  921. **kwargs,
  922. ):
  923. tab = Tab()
  924. y = self.y_traindata
  925. x_data = self.x_traindata
  926. try: # 如果没有class
  927. class_ = self.model.classes_.tolist()
  928. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  929. # 获取数据
  930. get, x_means, x_range, data_type = training_visualization(x_data, class_, y)
  931. # 可使用自带的means,并且nan表示跳过
  932. for i in range(min([len(x_means), len(self.means)])):
  933. try:
  934. g = self.means[i]
  935. if g == np.nan:
  936. raise Exception
  937. x_means[i] = g
  938. except BaseException:
  939. pass
  940. get = decision_boundary_func(
  941. x_range, x_means, self.learner.predict, class_, data_type
  942. )
  943. for i in range(len(get)):
  944. tab.add(get[i], f"{i}预测热力图")
  945. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  946. data = class_ + [f"{i}" for i in x_means]
  947. c = Table().add(headers=heard, rows=[data])
  948. tab.add(c, "数据表")
  949. except BaseException:
  950. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  951. get = prediction_boundary_func(
  952. x_range, x_means, self.learner.predict, data_type
  953. )
  954. for i in range(len(get)):
  955. tab.add(get[i], f"{i}预测热力图")
  956. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  957. data = [f"{i}" for i in x_means]
  958. c = Table().add(headers=heard, rows=[data])
  959. tab.add(c, "数据表")
  960. save = save_dir + r"/预测热力图.HTML"
  961. tab.render(save) # 生成HTML
  962. return save,
  963. class PredictiveHeatmap(PredictiveHeatmapBase): # 绘制预测型热力图
  964. def data_visualization(self, save_dir, *args, **kwargs):
  965. return super().data_visualization(
  966. save_dir, decision_boundary, prediction_boundary
  967. )
  968. class PredictiveHeatmapMore(PredictiveHeatmapBase): # 绘制预测型热力图_More
  969. def data_visualization(self, save_dir, *args, **kwargs):
  970. return super().data_visualization(
  971. save_dir, decision_boundary_more, prediction_boundary_more
  972. )
  973. @plugin_class_loading(get_path(r"template/machinelearning"))
  974. class NearFeatureScatterClassMore(ToPyebase):
  975. def data_visualization(self, save_dir, *args, **kwargs):
  976. tab = Tab()
  977. x_data = self.x_traindata
  978. y = self.y_traindata
  979. class_ = np.unique(y).ravel().tolist()
  980. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  981. get, x_means, x_range, data_type = training_visualization_more_no_center(
  982. x_data, class_, y
  983. )
  984. for i in range(len(get)):
  985. tab.add(get[i], f"{i}训练数据散点图")
  986. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  987. data = class_ + [f"{i}" for i in x_means]
  988. c = Table().add(headers=heard, rows=[data])
  989. tab.add(c, "数据表")
  990. save = save_dir + r"/数据特征散点图(分类).HTML"
  991. tab.render(save) # 生成HTML
  992. return save,
  993. @plugin_class_loading(get_path(r"template/machinelearning"))
  994. class NearFeatureScatterMore(ToPyebase):
  995. def data_visualization(self, save_dir, *args, **kwargs):
  996. tab = Tab()
  997. x_data = self.x_traindata
  998. x_means = quick_stats(x_data).get()[0]
  999. get_y = feature_visualization(x_data, "数据散点图") # 转换
  1000. for i in range(len(get_y)):
  1001. tab.add(get_y[i], f"[{i}]数据x-x散点图")
  1002. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1003. data = [f"{i}" for i in x_means]
  1004. c = Table().add(headers=heard, rows=[data])
  1005. tab.add(c, "数据表")
  1006. save = save_dir + r"/数据特征散点图.HTML"
  1007. tab.render(save) # 生成HTML
  1008. return save,
  1009. class NearFeatureScatterClass(ToPyebase): # 临近特征散点图:分类数据
  1010. def data_visualization(self, save_dir, *args, **kwargs):
  1011. # 获取数据
  1012. class_ = np.unique(self.y_traindata).ravel().tolist()
  1013. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1014. tab = Tab()
  1015. y = self.y_traindata
  1016. x_data = self.x_traindata
  1017. get, x_means, x_range, data_type = training_visualization(x_data, class_, y)
  1018. for i in range(len(get)):
  1019. tab.add(get[i], f"{i}临近特征散点图")
  1020. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1021. data = class_ + [f"{i}" for i in x_means]
  1022. c = Table().add(headers=heard, rows=[data])
  1023. tab.add(c, "数据表")
  1024. save = save_dir + r"/临近数据特征散点图(分类).HTML"
  1025. tab.render(save) # 生成HTML
  1026. return save,
  1027. class NearFeatureScatter(ToPyebase): # 临近特征散点图:连续数据
  1028. def data_visualization(self, save_dir, *args, **kwargs):
  1029. tab = Tab()
  1030. x_data = self.x_traindata.transpose()
  1031. get, x_means, x_range, data_type = training_visualization_no_class(x_data)
  1032. for i in range(len(get)):
  1033. tab.add(get[i], f"{i}临近特征散点图")
  1034. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1035. data = [f"{i}" for i in x_means]
  1036. tab.add(make_tab(columns, [data]), "数据表")
  1037. save = save_dir + r"/临近数据特征散点图.HTML"
  1038. tab.render(save) # 生成HTML
  1039. return save,
  1040. class FeatureScatterYX(ToPyebase): # y-x图
  1041. def data_visualization(self, save_dir, *args, **kwargs):
  1042. tab = Tab()
  1043. x_data = self.x_traindata
  1044. y = self.y_traindata
  1045. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1046. for i in range(len(get)):
  1047. tab.add(get[i], f"{i}特征x-y散点图")
  1048. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1049. data = [f"{i}" for i in x_means]
  1050. tab.add(make_tab(columns, [data]), "数据表")
  1051. save = save_dir + r"/特征y-x图像.HTML"
  1052. tab.render(save) # 生成HTML
  1053. return save,
  1054. @plugin_class_loading(get_path(r"template/machinelearning"))
  1055. class LineModel(StudyMachinebase):
  1056. def __init__(
  1057. self, args_use, model, *args, **kwargs
  1058. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1059. super(LineModel, self).__init__(*args, **kwargs)
  1060. all_model = {"Line": LinearRegression, "Ridge": Ridge, "Lasso": Lasso}[model]
  1061. if model == "Line":
  1062. self.model = all_model()
  1063. self.k = {}
  1064. else:
  1065. self.model = all_model(
  1066. alpha=args_use["alpha"], max_iter=args_use["max_iter"]
  1067. )
  1068. self.k = {"alpha": args_use["alpha"], "max_iter": args_use["max_iter"]}
  1069. # 记录这两个是为了克隆
  1070. self.Alpha = args_use["alpha"]
  1071. self.max_iter = args_use["max_iter"]
  1072. self.model_Name = model
  1073. def data_visualization(self, save_dir, *args, **kwargs):
  1074. tab = Tab()
  1075. x_data = self.x_traindata
  1076. y = self.y_traindata
  1077. w_list = self.model.coef_.tolist()
  1078. w_heard = [f"系数w[{i}]" for i in range(len(w_list))]
  1079. b = self.model.intercept_.tolist()
  1080. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1081. get_line = regress_w(x_data, w_list, b, x_means.copy())
  1082. for i in range(len(get)):
  1083. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1084. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1085. for i in range(len(get)):
  1086. tab.add(get[i], f"{i}预测热力图")
  1087. tab.add(coefficient_scatter_plot(w_heard, w_list), "系数w散点图")
  1088. tab.add(coefficient_bar_plot(w_heard, self.model.coef_), "系数柱状图")
  1089. columns = [f"普适预测第{i}特征" for i in range(len(x_means))] + w_heard + ["截距b"]
  1090. data = [f"{i}" for i in x_means] + w_list + [b]
  1091. if self.model_Name != "Line":
  1092. columns += ["阿尔法", "最大迭代次数"]
  1093. data += [self.model.alpha, self.model.max_iter]
  1094. tab.add(make_tab(columns, [data]), "数据表")
  1095. des_to_csv(
  1096. save_dir,
  1097. "系数表",
  1098. [w_list + [b]],
  1099. [f"系数W[{i}]" for i in range(len(w_list))] + ["截距"],
  1100. )
  1101. des_to_csv(
  1102. save_dir,
  1103. "预测表",
  1104. [[f"{i}" for i in x_means]],
  1105. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1106. )
  1107. save = save_dir + r"/线性回归模型.HTML"
  1108. tab.render(save) # 生成HTML
  1109. return save,
  1110. @plugin_class_loading(get_path(r"template/machinelearning"))
  1111. class LogisticregressionModel(StudyMachinebase):
  1112. def __init__(
  1113. self, args_use, model, *args, **kwargs
  1114. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1115. super(LogisticregressionModel, self).__init__(*args, **kwargs)
  1116. self.model = LogisticRegression(C=args_use["C"], max_iter=args_use["max_iter"])
  1117. # 记录这两个是为了克隆
  1118. self.C = args_use["C"]
  1119. self.max_iter = args_use["max_iter"]
  1120. self.k = {"C": args_use["C"], "max_iter": args_use["max_iter"]}
  1121. self.model_Name = model
  1122. def data_visualization(self, save_dir="render.html", *args, **kwargs):
  1123. # 获取数据
  1124. w_array = self.model.coef_
  1125. w_list = w_array.tolist() # 变为表格
  1126. b = self.model.intercept_
  1127. c = self.model.C
  1128. max_iter = self.model.max_iter
  1129. class_ = self.model.classes_.tolist()
  1130. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1131. tab = Tab()
  1132. y = self.y_traindata
  1133. x_data = self.x_traindata
  1134. get, x_means, x_range, data_type = training_visualization(x_data, class_, y)
  1135. get_line = training_w(x_data, class_, y, w_list, b, x_means.copy())
  1136. for i in range(len(get)):
  1137. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1138. for i in range(len(w_list)):
  1139. w = w_list[i]
  1140. w_heard = [f"系数w[{i},{j}]" for j in range(len(w))]
  1141. tab.add(coefficient_scatter_plot(w_heard, w), f"系数w[{i}]散点图")
  1142. tab.add(coefficient_bar_plot(w_heard, w_array[i]), f"系数w[{i}]柱状图")
  1143. columns = class_heard + [f"截距{i}" for i in range(len(b))] + ["C", "最大迭代数"]
  1144. data = class_ + b.tolist() + [c, max_iter]
  1145. c = Table().add(headers=columns, rows=[data])
  1146. tab.add(c, "数据表")
  1147. c = Table().add(
  1148. headers=[f"系数W[{i}]" for i in range(len(w_list[0]))], rows=w_list
  1149. )
  1150. tab.add(c, "系数数据表")
  1151. c = Table().add(
  1152. headers=[f"普适预测第{i}特征" for i in range(len(x_means))],
  1153. rows=[[f"{i}" for i in x_means]],
  1154. )
  1155. tab.add(c, "普适预测数据表")
  1156. des_to_csv(
  1157. save_dir, "系数表", w_list, [f"系数W[{i}]" for i in range(len(w_list[0]))]
  1158. )
  1159. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1160. des_to_csv(
  1161. save_dir,
  1162. "预测表",
  1163. [[f"{i}" for i in x_means]],
  1164. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1165. )
  1166. save = save_dir + r"/逻辑回归.HTML"
  1167. tab.render(save) # 生成HTML
  1168. return save,
  1169. class CategoricalData: # 数据统计助手
  1170. def __init__(self):
  1171. self.x_means = []
  1172. self.x_range = []
  1173. self.data_type = []
  1174. def __call__(self, x1, *args, **kwargs):
  1175. get = self.is_continuous(x1)
  1176. return get
  1177. def is_continuous(self, x1: np.array):
  1178. try:
  1179. x1_con = is_continuous(x1)
  1180. if x1_con:
  1181. self.x_means.append(np.mean(x1))
  1182. self.add_range(x1)
  1183. else:
  1184. raise Exception
  1185. return x1_con
  1186. except BaseException: # 找出出现次数最多的元素
  1187. new = np.unique(x1) # 去除相同的元素
  1188. count_list = []
  1189. for i in new:
  1190. count_list.append(np.sum(x1 == i))
  1191. index = count_list.index(max(count_list)) # 找出最大值的索引
  1192. self.x_means.append(x1[index])
  1193. self.add_range(x1, False)
  1194. return False
  1195. def add_range(self, x1: np.array, range_=True):
  1196. try:
  1197. if not range_:
  1198. raise Exception
  1199. min_ = int(x1.min()) - 1
  1200. max_ = int(x1.max()) + 1
  1201. # 不需要复制列表
  1202. self.x_range.append([min_, max_])
  1203. self.data_type.append(1)
  1204. except BaseException:
  1205. self.x_range.append(list(set(x1.tolist()))) # 去除多余元素
  1206. self.data_type.append(2)
  1207. def get(self):
  1208. return self.x_means, self.x_range, self.data_type
  1209. @plugin_class_loading(get_path(r"template/machinelearning"))
  1210. class KnnModel(StudyMachinebase):
  1211. def __init__(
  1212. self, args_use, model, *args, **kwargs
  1213. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1214. super(KnnModel, self).__init__(*args, **kwargs)
  1215. all_model = {"Knn_class": KNeighborsClassifier, "Knn": KNeighborsRegressor}[
  1216. model
  1217. ]
  1218. self.model = all_model(p=args_use["p"], n_neighbors=args_use["n_neighbors"])
  1219. # 记录这两个是为了克隆
  1220. self.n_neighbors = args_use["n_neighbors"]
  1221. self.p = args_use["p"]
  1222. self.k = {"n_neighbors": args_use["n_neighbors"], "p": args_use["p"]}
  1223. self.model_Name = model
  1224. def data_visualization(self, save_dir, *args, **kwargs):
  1225. tab = Tab()
  1226. y = self.y_traindata
  1227. x_data = self.x_traindata
  1228. y_test = self.y_testdata
  1229. x_test = self.x_testdata
  1230. if self.model_Name == "Knn_class":
  1231. class_ = self.model.classes_.tolist()
  1232. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1233. get, x_means, x_range, data_type = training_visualization(x_data, class_, y)
  1234. for i in range(len(get)):
  1235. tab.add(get[i], f"{i}训练数据散点图")
  1236. if y_test is not None:
  1237. get = training_visualization(x_test, class_, y_test)[0]
  1238. for i in range(len(get)):
  1239. tab.add(get[i], f"{i}测试数据散点图")
  1240. get = decision_boundary(x_range, x_means, self.predict, class_, data_type)
  1241. for i in range(len(get)):
  1242. tab.add(get[i], f"{i}预测热力图")
  1243. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1244. data = class_ + [f"{i}" for i in x_means]
  1245. c = Table().add(headers=heard, rows=[data])
  1246. tab.add(c, "数据表")
  1247. else:
  1248. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1249. for i in range(len(get)):
  1250. tab.add(get[i], f"{i}训练数据散点图")
  1251. get = regress_visualization(x_test, y_test)[0]
  1252. for i in range(len(get)):
  1253. tab.add(get[i], f"{i}测试数据类型图")
  1254. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1255. for i in range(len(get)):
  1256. tab.add(get[i], f"{i}预测热力图")
  1257. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1258. data = [f"{i}" for i in x_means]
  1259. c = Table().add(headers=heard, rows=[data])
  1260. tab.add(c, "数据表")
  1261. des_to_csv(
  1262. save_dir,
  1263. "预测表",
  1264. [[f"{i}" for i in x_means]],
  1265. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1266. )
  1267. save = save_dir + r"/K.HTML"
  1268. tab.render(save) # 生成HTML
  1269. return save,
  1270. @plugin_class_loading(get_path(r"template/machinelearning"))
  1271. class TreeModel(StudyMachinebase):
  1272. def __init__(
  1273. self, args_use, model, *args, **kwargs
  1274. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1275. super(TreeModel, self).__init__(*args, **kwargs)
  1276. all_model = {
  1277. "Tree_class": DecisionTreeClassifier,
  1278. "Tree": DecisionTreeRegressor,
  1279. }[model]
  1280. self.model = all_model(
  1281. criterion=args_use["criterion"],
  1282. splitter=args_use["splitter"],
  1283. max_features=args_use["max_features"],
  1284. max_depth=args_use["max_depth"],
  1285. min_samples_split=args_use["min_samples_split"],
  1286. )
  1287. # 记录这两个是为了克隆
  1288. self.criterion = args_use["criterion"]
  1289. self.splitter = args_use["splitter"]
  1290. self.max_features = args_use["max_features"]
  1291. self.max_depth = args_use["max_depth"]
  1292. self.min_samples_split = args_use["min_samples_split"]
  1293. self.k = {
  1294. "criterion": args_use["criterion"],
  1295. "splitter": args_use["splitter"],
  1296. "max_features": args_use["max_features"],
  1297. "max_depth": args_use["max_depth"],
  1298. "min_samples_split": args_use["min_samples_split"],
  1299. }
  1300. self.model_Name = model
  1301. def data_visualization(self, save_dir, *args, **kwargs):
  1302. tab = Tab()
  1303. importance = self.model.feature_importances_.tolist()
  1304. with open(save_dir + r"\Tree_Gra.dot", "w") as f:
  1305. export_graphviz(self.model, out_file=f)
  1306. make_bar("特征重要性", importance, tab)
  1307. des_to_csv(
  1308. save_dir,
  1309. "特征重要性",
  1310. [importance],
  1311. [f"[{i}]特征" for i in range(len(importance))],
  1312. )
  1313. tab.add(see_tree(save_dir + r"\Tree_Gra.dot"), "决策树可视化")
  1314. y = self.y_traindata
  1315. x_data = self.x_traindata
  1316. y_test = self.y_testdata
  1317. x_test = self.x_testdata
  1318. if self.model_Name == "Tree_class":
  1319. class_ = self.model.classes_.tolist()
  1320. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1321. get, x_means, x_range, data_type = training_visualization(x_data, class_, y)
  1322. for i in range(len(get)):
  1323. tab.add(get[i], f"{i}训练数据散点图")
  1324. get = training_visualization(x_test, class_, y_test)[0]
  1325. for i in range(len(get)):
  1326. tab.add(get[i], f"{i}测试数据散点图")
  1327. get = decision_boundary(x_range, x_means, self.predict, class_, data_type)
  1328. for i in range(len(get)):
  1329. tab.add(get[i], f"{i}预测热力图")
  1330. tab.add(
  1331. make_tab(
  1332. class_heard
  1333. + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1334. + [f"特征{i}重要性" for i in range(len(importance))],
  1335. [class_ + [f"{i}" for i in x_means] + importance],
  1336. ),
  1337. "数据表",
  1338. )
  1339. else:
  1340. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1341. for i in range(len(get)):
  1342. tab.add(get[i], f"{i}训练数据散点图")
  1343. get = regress_visualization(x_test, y_test)[0]
  1344. for i in range(len(get)):
  1345. tab.add(get[i], f"{i}测试数据类型图")
  1346. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1347. for i in range(len(get)):
  1348. tab.add(get[i], f"{i}预测热力图")
  1349. tab.add(
  1350. make_tab(
  1351. [f"普适预测第{i}特征" for i in range(len(x_means))]
  1352. + [f"特征{i}重要性" for i in range(len(importance))],
  1353. [[f"{i}" for i in x_means] + importance],
  1354. ),
  1355. "数据表",
  1356. )
  1357. des_to_csv(
  1358. save_dir,
  1359. "预测表",
  1360. [[f"{i}" for i in x_means]],
  1361. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1362. )
  1363. save = save_dir + r"/决策树.HTML"
  1364. tab.render(save) # 生成HTML
  1365. return save,
  1366. @plugin_class_loading(get_path(r"template/machinelearning"))
  1367. class ForestModel(StudyMachinebase):
  1368. def __init__(
  1369. self, args_use, model, *args, **kwargs
  1370. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1371. super(ForestModel, self).__init__(*args, **kwargs)
  1372. model = {
  1373. "Forest_class": RandomForestClassifier,
  1374. "Forest": RandomForestRegressor,
  1375. }[model]
  1376. self.model = model(
  1377. n_estimators=args_use["n_Tree"],
  1378. criterion=args_use["criterion"],
  1379. max_features=args_use["max_features"],
  1380. max_depth=args_use["max_depth"],
  1381. min_samples_split=args_use["min_samples_split"],
  1382. )
  1383. # 记录这两个是为了克隆
  1384. self.n_estimators = args_use["n_Tree"]
  1385. self.criterion = args_use["criterion"]
  1386. self.max_features = args_use["max_features"]
  1387. self.max_depth = args_use["max_depth"]
  1388. self.min_samples_split = args_use["min_samples_split"]
  1389. self.k = {
  1390. "n_estimators": args_use["n_Tree"],
  1391. "criterion": args_use["criterion"],
  1392. "max_features": args_use["max_features"],
  1393. "max_depth": args_use["max_depth"],
  1394. "min_samples_split": args_use["min_samples_split"],
  1395. }
  1396. self.model_Name = model
  1397. def data_visualization(self, save_dir, *args, **kwargs):
  1398. tab = Tab()
  1399. # 多个决策树可视化
  1400. for i in range(len(self.model.estimators_)):
  1401. with open(save_dir + rf"\Tree_Gra[{i}].dot", "w") as f:
  1402. export_graphviz(self.model.estimators_[i], out_file=f)
  1403. tab.add(see_tree(save_dir + rf"\Tree_Gra[{i}].dot"), f"[{i}]决策树可视化")
  1404. y = self.y_traindata
  1405. x_data = self.x_traindata
  1406. if self.model_Name == "Forest_class":
  1407. class_ = self.model.classes_.tolist()
  1408. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1409. get, x_means, x_range, data_type = training_visualization(x_data, class_, y)
  1410. for i in range(len(get)):
  1411. tab.add(get[i], f"{i}训练数据散点图")
  1412. get = decision_boundary(x_range, x_means, self.predict, class_, data_type)
  1413. for i in range(len(get)):
  1414. tab.add(get[i], f"{i}预测热力图")
  1415. tab.add(
  1416. make_tab(
  1417. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1418. [class_ + [f"{i}" for i in x_means]],
  1419. ),
  1420. "数据表",
  1421. )
  1422. else:
  1423. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1424. for i in range(len(get)):
  1425. tab.add(get[i], f"{i}预测类型图")
  1426. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1427. for i in range(len(get)):
  1428. tab.add(get[i], f"{i}预测热力图")
  1429. tab.add(
  1430. make_tab(
  1431. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1432. [[f"{i}" for i in x_means]],
  1433. ),
  1434. "数据表",
  1435. )
  1436. des_to_csv(
  1437. save_dir,
  1438. "预测表",
  1439. [[f"{i}" for i in x_means]],
  1440. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1441. )
  1442. save = save_dir + r"/随机森林.HTML"
  1443. tab.render(save) # 生成HTML
  1444. return save,
  1445. class GradienttreeModel(StudyMachinebase): # 继承Tree_Model主要是继承Des
  1446. def __init__(
  1447. self, args_use, model, *args, **kwargs
  1448. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1449. super(GradienttreeModel, self).__init__(*args, **kwargs) # 不需要执行Tree_Model的初始化
  1450. model = {
  1451. "GradientTree_class": GradientBoostingClassifier,
  1452. "GradientTree": GradientBoostingRegressor,
  1453. }[model]
  1454. self.model = model(
  1455. n_estimators=args_use["n_Tree"],
  1456. max_features=args_use["max_features"],
  1457. max_depth=args_use["max_depth"],
  1458. min_samples_split=args_use["min_samples_split"],
  1459. )
  1460. # 记录这两个是为了克隆
  1461. self.criterion = args_use["criterion"]
  1462. self.splitter = args_use["splitter"]
  1463. self.max_features = args_use["max_features"]
  1464. self.max_depth = args_use["max_depth"]
  1465. self.min_samples_split = args_use["min_samples_split"]
  1466. self.k = {
  1467. "criterion": args_use["criterion"],
  1468. "splitter": args_use["splitter"],
  1469. "max_features": args_use["max_features"],
  1470. "max_depth": args_use["max_depth"],
  1471. "min_samples_split": args_use["min_samples_split"],
  1472. }
  1473. self.model_Name = model
  1474. def data_visualization(self, save_dir, *args, **kwargs):
  1475. tab = Tab()
  1476. # 多个决策树可视化
  1477. for a in range(len(self.model.estimators_)):
  1478. for i in range(len(self.model.estimators_[a])):
  1479. with open(save_dir + rf"\Tree_Gra[{a},{i}].dot", "w") as f:
  1480. export_graphviz(self.model.estimators_[a][i], out_file=f)
  1481. tab.add(
  1482. see_tree(save_dir + rf"\Tree_Gra[{a},{i}].dot"), f"[{a},{i}]决策树可视化"
  1483. )
  1484. y = self.y_traindata
  1485. x_data = self.x_traindata
  1486. if self.model_Name == "Tree_class":
  1487. class_ = self.model.classes_.tolist()
  1488. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1489. get, x_means, x_range, data_type = training_visualization(x_data, class_, y)
  1490. for i in range(len(get)):
  1491. tab.add(get[i], f"{i}训练数据散点图")
  1492. get = decision_boundary(x_range, x_means, self.predict, class_, data_type)
  1493. for i in range(len(get)):
  1494. tab.add(get[i], f"{i}预测热力图")
  1495. tab.add(
  1496. make_tab(
  1497. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1498. [class_ + [f"{i}" for i in x_means]],
  1499. ),
  1500. "数据表",
  1501. )
  1502. else:
  1503. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1504. for i in range(len(get)):
  1505. tab.add(get[i], f"{i}预测类型图")
  1506. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1507. for i in range(len(get)):
  1508. tab.add(get[i], f"{i}预测热力图")
  1509. tab.add(
  1510. make_tab(
  1511. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1512. [[f"{i}" for i in x_means]],
  1513. ),
  1514. "数据表",
  1515. )
  1516. des_to_csv(
  1517. save_dir,
  1518. "预测表",
  1519. [[f"{i}" for i in x_means]],
  1520. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1521. )
  1522. save = save_dir + r"/梯度提升回归树.HTML"
  1523. tab.render(save) # 生成HTML
  1524. return save,
  1525. @plugin_class_loading(get_path(r"template/machinelearning"))
  1526. class SvcModel(StudyMachinebase):
  1527. def __init__(
  1528. self, args_use, model, *args, **kwargs
  1529. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1530. super(SvcModel, self).__init__(*args, **kwargs)
  1531. self.model = SVC(
  1532. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1533. )
  1534. # 记录这两个是为了克隆
  1535. self.C = args_use["C"]
  1536. self.gamma = args_use["gamma"]
  1537. self.kernel = args_use["kernel"]
  1538. self.k = {
  1539. "C": args_use["C"],
  1540. "gamma": args_use["gamma"],
  1541. "kernel": args_use["kernel"],
  1542. }
  1543. self.model_Name = model
  1544. def data_visualization(self, save_dir, *args, **kwargs):
  1545. tab = Tab()
  1546. try:
  1547. w_list = self.model.coef_.tolist() # 未必有这个属性
  1548. b = self.model.intercept_.tolist()
  1549. except BaseException:
  1550. have_w = False
  1551. else:
  1552. have_w = True
  1553. class_ = self.model.classes_.tolist()
  1554. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1555. y = self.y_traindata
  1556. x_data = self.x_traindata
  1557. get, x_means, x_range, data_type = training_visualization(x_data, class_, y)
  1558. if have_w:
  1559. get_line: list = training_w(x_data, class_, y, w_list, b, x_means.copy())
  1560. for i in range(len(get)):
  1561. if have_w:
  1562. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1563. else:
  1564. tab.add(get[i], f"{i}决策边界散点图")
  1565. get = decision_boundary(x_range, x_means, self.predict, class_, data_type)
  1566. for i in range(len(get)):
  1567. tab.add(get[i], f"{i}预测热力图")
  1568. dic = {2: "离散", 1: "连续"}
  1569. tab.add(
  1570. make_tab(
  1571. class_heard
  1572. + [f"普适预测第{i}特征:{dic[data_type[i]]}" for i in range(len(x_means))],
  1573. [class_ + [f"{i}" for i in x_means]],
  1574. ),
  1575. "数据表",
  1576. )
  1577. if have_w:
  1578. des_to_csv(
  1579. save_dir, "系数表", w_list, [f"系数W[{i}]" for i in range(len(w_list[0]))]
  1580. )
  1581. if have_w:
  1582. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1583. des_to_csv(
  1584. save_dir,
  1585. "预测表",
  1586. [[f"{i}" for i in x_means]],
  1587. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1588. )
  1589. save = save_dir + r"/支持向量机分类.HTML"
  1590. tab.render(save) # 生成HTML
  1591. return save,
  1592. @plugin_class_loading(get_path(r"template/machinelearning"))
  1593. class SvrModel(StudyMachinebase):
  1594. def __init__(
  1595. self, args_use, model, *args, **kwargs
  1596. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1597. super(SvrModel, self).__init__(*args, **kwargs)
  1598. self.model = SVR(
  1599. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1600. )
  1601. # 记录这两个是为了克隆
  1602. self.C = args_use["C"]
  1603. self.gamma = args_use["gamma"]
  1604. self.kernel = args_use["kernel"]
  1605. self.k = {
  1606. "C": args_use["C"],
  1607. "gamma": args_use["gamma"],
  1608. "kernel": args_use["kernel"],
  1609. }
  1610. self.model_Name = model
  1611. def data_visualization(self, save_dir, *args, **kwargs):
  1612. tab = Tab()
  1613. x_data = self.x_traindata
  1614. y = self.y_traindata
  1615. try:
  1616. w_list = self.model.coef_.tolist() # 未必有这个属性
  1617. b = self.model.intercept_.tolist()
  1618. have_w = True
  1619. except BaseException:
  1620. have_w = False
  1621. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1622. if have_w:
  1623. get_line = regress_w(x_data, w_list, b, x_means.copy())
  1624. for i in range(len(get)):
  1625. if have_w:
  1626. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1627. else:
  1628. tab.add(get[i], f"{i}预测类型图")
  1629. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1630. for i in range(len(get)):
  1631. tab.add(get[i], f"{i}预测热力图")
  1632. if have_w:
  1633. des_to_csv(
  1634. save_dir, "系数表", w_list, [f"系数W[{i}]" for i in range(len(w_list[0]))]
  1635. )
  1636. if have_w:
  1637. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1638. des_to_csv(
  1639. save_dir,
  1640. "预测表",
  1641. [[f"{i}" for i in x_means]],
  1642. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1643. )
  1644. tab.add(
  1645. make_tab(
  1646. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1647. [[f"{i}" for i in x_means]],
  1648. ),
  1649. "数据表",
  1650. )
  1651. save = save_dir + r"/支持向量机回归.HTML"
  1652. tab.render(save) # 生成HTML
  1653. return save,
  1654. class VarianceModel(Unsupervised): # 无监督
  1655. def __init__(
  1656. self, args_use, model, *args, **kwargs
  1657. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1658. super(VarianceModel, self).__init__(*args, **kwargs)
  1659. self.model = VarianceThreshold(threshold=(args_use["P"] * (1 - args_use["P"])))
  1660. # 记录这两个是为了克隆
  1661. self.threshold = args_use["P"]
  1662. self.k = {"threshold": args_use["P"]}
  1663. self.model_Name = model
  1664. def data_visualization(self, save_dir, *args, **kwargs):
  1665. tab = Tab()
  1666. var = self.model.variances_ # 标准差
  1667. y_data = self.y_testdata
  1668. if isinstance(y_data, np.ndarray):
  1669. get = feature_visualization(self.y_testdata)
  1670. for i in range(len(get)):
  1671. tab.add(get[i], f"[{i}]数据x-x散点图")
  1672. c = (
  1673. Bar()
  1674. .add_xaxis([f"[{i}]特征" for i in range(len(var))])
  1675. .add_yaxis("标准差", var.tolist(), **label_setting)
  1676. .set_global_opts(
  1677. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1678. )
  1679. )
  1680. tab.add(c, "数据标准差")
  1681. save = save_dir + r"/方差特征选择.HTML"
  1682. tab.render(save) # 生成HTML
  1683. return save,
  1684. class SelectkbestModel(PrepBase): # 有监督
  1685. def __init__(self, args_use, model, *args, **kwargs):
  1686. super(SelectkbestModel, self).__init__(*args, **kwargs)
  1687. self.model = SelectKBest(k=args_use["k"], score_func=args_use["score_func"])
  1688. # 记录这两个是为了克隆
  1689. self.k_ = args_use["k"]
  1690. self.score_func = args_use["score_func"]
  1691. self.k = {"k": args_use["k"], "score_func": args_use["score_func"]}
  1692. self.model_Name = model
  1693. def data_visualization(self, save_dir, *args, **kwargs):
  1694. tab = Tab()
  1695. score = self.model.scores_.tolist()
  1696. support = self.model.get_support()
  1697. y_data = self.y_traindata
  1698. x_data = self.x_traindata
  1699. if isinstance(x_data, np.ndarray):
  1700. get = feature_visualization(x_data)
  1701. for i in range(len(get)):
  1702. tab.add(get[i], f"[{i}]训练数据x-x散点图")
  1703. if isinstance(y_data, np.ndarray):
  1704. get = feature_visualization(y_data)
  1705. for i in range(len(get)):
  1706. tab.add(get[i], f"[{i}]保留训练数据x-x散点图")
  1707. y_data = self.y_testdata
  1708. x_data = self.x_testdata
  1709. if isinstance(x_data, np.ndarray):
  1710. get = feature_visualization(x_data)
  1711. for i in range(len(get)):
  1712. tab.add(get[i], f"[{i}]数据x-x散点图")
  1713. if isinstance(y_data, np.ndarray):
  1714. get = feature_visualization(y_data)
  1715. for i in range(len(get)):
  1716. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1717. choose = []
  1718. un_choose = []
  1719. for i in range(len(score)):
  1720. if support[i]:
  1721. choose.append(score[i])
  1722. un_choose.append(0) # 占位
  1723. else:
  1724. un_choose.append(score[i])
  1725. choose.append(0)
  1726. c = (
  1727. Bar()
  1728. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1729. .add_yaxis("选中特征", choose, **label_setting)
  1730. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1731. .set_global_opts(
  1732. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1733. )
  1734. )
  1735. tab.add(c, "单变量重要程度")
  1736. save = save_dir + r"/单一变量特征选择.HTML"
  1737. tab.render(save) # 生成HTML
  1738. return save,
  1739. class SelectFromModel(PrepBase): # 有监督
  1740. def __init__(
  1741. self, args_use, learner, *args, **kwargs
  1742. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1743. super(SelectFromModel, self).__init__(*args, **kwargs)
  1744. self.model = learner.Model
  1745. self.Select_Model = SelectFromModel(
  1746. estimator=learner.Model, max_features=args_use["k"], prefit=learner.have_Fit
  1747. )
  1748. self.max_features = args_use["k"]
  1749. self.estimator = learner.Model
  1750. self.k = {
  1751. "max_features": args_use["k"],
  1752. "estimator": learner.Model,
  1753. "have_Fit": learner.have_Fit,
  1754. }
  1755. self.have_fit = learner.have_Fit
  1756. self.model_Name = "SelectFrom_Model"
  1757. self.learner = learner
  1758. def fit_model(self, x_data, y_data, split=0.3, *args, **kwargs):
  1759. y_data = y_data.ravel()
  1760. if not self.have_fit: # 不允许第二次训练
  1761. self.Select_Model.fit(x_data, y_data)
  1762. self.have_fit = True
  1763. return "None", "None"
  1764. def predict(self, x_data, *args, **kwargs):
  1765. try:
  1766. self.x_testdata = x_data.copy()
  1767. x_predict = self.Select_Model.transform(x_data)
  1768. self.y_testdata = x_predict.copy()
  1769. self.have_predict = True
  1770. return x_predict, "模型特征工程"
  1771. except BaseException:
  1772. self.have_predict = True
  1773. return np.array([]), "无结果工程"
  1774. def data_visualization(self, save_dir, *args, **kwargs):
  1775. tab = Tab()
  1776. support = self.Select_Model.get_support()
  1777. y_data = self.y_testdata
  1778. x_data = self.x_testdata
  1779. if isinstance(x_data, np.ndarray):
  1780. get = feature_visualization(x_data)
  1781. for i in range(len(get)):
  1782. tab.add(get[i], f"[{i}]数据x-x散点图")
  1783. if isinstance(y_data, np.ndarray):
  1784. get = feature_visualization(y_data)
  1785. for i in range(len(get)):
  1786. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1787. def make_bar(score):
  1788. choose = []
  1789. un_choose = []
  1790. for i in range(len(score)):
  1791. if support[i]:
  1792. choose.append(abs(score[i]))
  1793. un_choose.append(0) # 占位
  1794. else:
  1795. un_choose.append(abs(score[i]))
  1796. choose.append(0)
  1797. c = (
  1798. Bar()
  1799. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1800. .add_yaxis("选中特征", choose, **label_setting)
  1801. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1802. .set_global_opts(
  1803. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1804. )
  1805. )
  1806. tab.add(c, "单变量重要程度")
  1807. try:
  1808. make_bar(self.model.coef_)
  1809. except BaseException:
  1810. try:
  1811. make_bar(self.model.feature_importances_)
  1812. except BaseException:
  1813. pass
  1814. save = save_dir + r"/模型特征选择.HTML"
  1815. tab.render(save) # 生成HTML
  1816. return save,
  1817. class StandardizationModel(Unsupervised): # z-score标准化 无监督
  1818. def __init__(self, *args, **kwargs):
  1819. super(StandardizationModel, self).__init__(*args, **kwargs)
  1820. self.model = StandardScaler()
  1821. self.k = {}
  1822. self.model_Name = "StandardScaler"
  1823. def data_visualization(self, save_dir, *args, **kwargs):
  1824. tab = Tab()
  1825. y_data = self.y_testdata
  1826. x_data = self.x_testdata
  1827. var = self.model.var_.tolist()
  1828. means = self.model.mean_.tolist()
  1829. scale = self.model.scale_.tolist()
  1830. conversion_control(y_data, x_data, tab)
  1831. make_bar("标准差", var, tab)
  1832. make_bar("方差", means, tab)
  1833. make_bar("Scale", scale, tab)
  1834. save = save_dir + r"/z-score标准化.HTML"
  1835. tab.render(save) # 生成HTML
  1836. return save,
  1837. class MinmaxscalerModel(Unsupervised): # 离差标准化
  1838. def __init__(self, args_use, *args, **kwargs):
  1839. super(MinmaxscalerModel, self).__init__(*args, **kwargs)
  1840. self.model = MinMaxScaler(feature_range=args_use["feature_range"])
  1841. self.k = {}
  1842. self.model_Name = "MinMaxScaler"
  1843. def data_visualization(self, save_dir, *args, **kwargs):
  1844. tab = Tab()
  1845. y_data = self.y_testdata
  1846. x_data = self.x_testdata
  1847. scale = self.model.scale_.tolist()
  1848. max_ = self.model.data_max_.tolist()
  1849. min_ = self.model.data_min_.tolist()
  1850. conversion_control(y_data, x_data, tab)
  1851. make_bar("Scale", scale, tab)
  1852. tab.add(
  1853. make_tab(
  1854. heard=[f"[{i}]特征最大值" for i in range(len(max_))]
  1855. + [f"[{i}]特征最小值" for i in range(len(min_))],
  1856. row=[max_ + min_],
  1857. ),
  1858. "数据表格",
  1859. )
  1860. save = save_dir + r"/离差标准化.HTML"
  1861. tab.render(save) # 生成HTML
  1862. return save,
  1863. class LogscalerModel(PrepBase): # 对数标准化
  1864. def __init__(self, *args, **kwargs):
  1865. super(LogscalerModel, self).__init__(*args, **kwargs)
  1866. self.model = None
  1867. self.k = {}
  1868. self.model_Name = "LogScaler"
  1869. def fit_model(self, x_data, *args, **kwargs):
  1870. if not self.have_predict: # 不允许第二次训练
  1871. self.max_logx = np.log(x_data.max())
  1872. self.have_fit = True
  1873. return "None", "None"
  1874. def predict(self, x_data, *args, **kwargs):
  1875. try:
  1876. max_logx = self.max_logx
  1877. except BaseException:
  1878. self.have_fit = False
  1879. self.fit_model(x_data)
  1880. max_logx = self.max_logx
  1881. self.x_testdata = x_data.copy()
  1882. x_predict = np.log(x_data) / max_logx
  1883. self.y_testdata = x_predict.copy()
  1884. self.have_predict = True
  1885. return x_predict, "对数变换"
  1886. def data_visualization(self, save_dir, *args, **kwargs):
  1887. tab = Tab()
  1888. y_data = self.y_testdata
  1889. x_data = self.x_testdata
  1890. conversion_control(y_data, x_data, tab)
  1891. tab.add(make_tab(heard=["最大对数值(自然对数)"], row=[[str(self.max_logx)]]), "数据表格")
  1892. save = save_dir + r"/对数标准化.HTML"
  1893. tab.render(save) # 生成HTML
  1894. return save,
  1895. class AtanscalerModel(PrepBase): # atan标准化
  1896. def __init__(self, *args, **kwargs):
  1897. super(AtanscalerModel, self).__init__(*args, **kwargs)
  1898. self.model = None
  1899. self.k = {}
  1900. self.model_Name = "atanScaler"
  1901. def fit_model(self, x_data, *args, **kwargs):
  1902. self.have_fit = True
  1903. return "None", "None"
  1904. def predict(self, x_data, *args, **kwargs):
  1905. self.x_testdata = x_data.copy()
  1906. x_predict = np.arctan(x_data) * (2 / np.pi)
  1907. self.y_testdata = x_predict.copy()
  1908. self.have_predict = True
  1909. return x_predict, "atan变换"
  1910. def data_visualization(self, save_dir, *args, **kwargs):
  1911. tab = Tab()
  1912. y_data = self.y_testdata
  1913. x_data = self.x_testdata
  1914. conversion_control(y_data, x_data, tab)
  1915. save = save_dir + r"/反正切函数标准化.HTML"
  1916. tab.render(save) # 生成HTML
  1917. return save,
  1918. class DecimalscalerModel(PrepBase): # 小数定标准化
  1919. def __init__(self, *args, **kwargs):
  1920. super(DecimalscalerModel, self).__init__(*args, **kwargs)
  1921. self.model = None
  1922. self.k = {}
  1923. self.model_Name = "Decimal_normalization"
  1924. def fit_model(self, x_data, *args, **kwargs):
  1925. if not self.have_predict: # 不允许第二次训练
  1926. self.j = max([judging_digits(x_data.max()), judging_digits(x_data.min())])
  1927. self.have_fit = True
  1928. return "None", "None"
  1929. def predict(self, x_data, *args, **kwargs):
  1930. self.x_testdata = x_data.copy()
  1931. try:
  1932. j = self.j
  1933. except BaseException:
  1934. self.have_fit = False
  1935. self.fit_model(x_data)
  1936. j = self.j
  1937. x_predict = x_data / (10 ** j)
  1938. self.y_testdata = x_predict.copy()
  1939. self.have_predict = True
  1940. return x_predict, "小数定标标准化"
  1941. def data_visualization(self, save_dir, *args, **kwargs):
  1942. tab = Tab()
  1943. y_data = self.y_testdata
  1944. x_data = self.x_testdata
  1945. j = self.j
  1946. conversion_control(y_data, x_data, tab)
  1947. tab.add(make_tab(heard=["小数位数:j"], row=[[j]]), "数据表格")
  1948. save = save_dir + r"/小数定标标准化.HTML"
  1949. tab.render(save) # 生成HTML
  1950. return save,
  1951. class MapzoomModel(PrepBase): # 映射标准化
  1952. def __init__(self, args_use, *args, **kwargs):
  1953. super(MapzoomModel, self).__init__(*args, **kwargs)
  1954. self.model = None
  1955. self.feature_range = args_use["feature_range"]
  1956. self.k = {}
  1957. self.model_Name = "Decimal_normalization"
  1958. def fit_model(self, x_data, *args, **kwargs):
  1959. if not self.have_predict: # 不允许第二次训练
  1960. self.max_ = x_data.max()
  1961. self.min_ = x_data.min()
  1962. self.have_fit = True
  1963. return "None", "None"
  1964. def predict(self, x_data, *args, **kwargs):
  1965. self.x_testdata = x_data.copy()
  1966. try:
  1967. max_ = self.max_
  1968. min_ = self.min_
  1969. except BaseException:
  1970. self.have_fit = False
  1971. self.fit_model(x_data)
  1972. max_ = self.max_
  1973. min_ = self.min_
  1974. x_predict = (x_data * (self.feature_range[1] - self.feature_range[0])) / (
  1975. max_ - min_
  1976. )
  1977. self.y_testdata = x_predict.copy()
  1978. self.have_predict = True
  1979. return x_predict, "映射标准化"
  1980. def data_visualization(self, save_dir, *args, **kwargs):
  1981. tab = Tab()
  1982. y_data = self.y_testdata
  1983. x_data = self.x_testdata
  1984. max_ = self.max_
  1985. min_ = self.min_
  1986. conversion_control(y_data, x_data, tab)
  1987. tab.add(make_tab(heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  1988. save = save_dir + r"/映射标准化.HTML"
  1989. tab.render(save) # 生成HTML
  1990. return save,
  1991. class SigmodscalerModel(PrepBase): # sigmod变换
  1992. def __init__(self, *args, **kwargs):
  1993. super(SigmodscalerModel, self).__init__(*args, **kwargs)
  1994. self.model = None
  1995. self.k = {}
  1996. self.model_Name = "sigmodScaler_Model"
  1997. def fit_model(self, x_data, *args, **kwargs):
  1998. self.have_fit = True
  1999. return "None", "None"
  2000. def predict(self, x_data: np.array, *args, **kwargs):
  2001. self.x_testdata = x_data.copy()
  2002. x_predict = 1 / (1 + np.exp(-x_data))
  2003. self.y_testdata = x_predict.copy()
  2004. self.have_predict = True
  2005. return x_predict, "Sigmod变换"
  2006. def data_visualization(self, save_dir, *args, **kwargs):
  2007. tab = Tab()
  2008. y_data = self.y_testdata
  2009. x_data = self.x_testdata
  2010. conversion_control(y_data, x_data, tab)
  2011. save = save_dir + r"/Sigmoid变换.HTML"
  2012. tab.render(save) # 生成HTML
  2013. return save,
  2014. class FuzzyQuantizationModel(PrepBase): # 模糊量化标准化
  2015. def __init__(self, args_use, *args, **kwargs):
  2016. super(FuzzyQuantizationModel, self).__init__(*args, **kwargs)
  2017. self.model = None
  2018. self.feature_range = args_use["feature_range"]
  2019. self.k = {}
  2020. self.model_Name = "Fuzzy_quantization"
  2021. def fit_model(self, x_data, *args, **kwargs):
  2022. if not self.have_predict: # 不允许第二次训练
  2023. self.max_ = x_data.max()
  2024. self.max_ = x_data.min()
  2025. self.have_fit = True
  2026. return "None", "None"
  2027. def predict(self, x_data, *args, **kwargs):
  2028. self.x_testdata = x_data.copy()
  2029. try:
  2030. max_ = self.max_
  2031. min_ = self.max_
  2032. except BaseException:
  2033. self.have_fit = False
  2034. self.fit_model(x_data)
  2035. max_ = self.max_
  2036. min_ = self.max_
  2037. x_predict = 1 / 2 + (1 / 2) * np.sin(
  2038. np.pi / (max_ - min_) * (x_data - (max_ - min_) / 2)
  2039. )
  2040. self.y_testdata = x_predict.copy()
  2041. self.have_predict = True
  2042. return x_predict, "模糊量化标准化"
  2043. def data_visualization(self, save_dir, *args, **kwargs):
  2044. tab = Tab()
  2045. y_data = self.y_traindata
  2046. x_data = self.x_traindata
  2047. max_ = self.max_
  2048. min_ = self.max_
  2049. conversion_control(y_data, x_data, tab)
  2050. tab.add(make_tab(heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  2051. save = save_dir + r"/模糊量化标准化.HTML"
  2052. tab.render(save) # 生成HTML
  2053. return save,
  2054. class RegularizationModel(Unsupervised): # 正则化
  2055. def __init__(self, args_use, *args, **kwargs):
  2056. super(RegularizationModel, self).__init__(*args, **kwargs)
  2057. self.model = Normalizer(norm=args_use["norm"])
  2058. self.k = {"norm": args_use["norm"]}
  2059. self.model_Name = "Regularization"
  2060. def data_visualization(self, save_dir, *args, **kwargs):
  2061. tab = Tab()
  2062. y_data = self.y_testdata.copy()
  2063. x_data = self.x_testdata.copy()
  2064. conversion_control(y_data, x_data, tab)
  2065. save = save_dir + r"/正则化.HTML"
  2066. tab.render(save) # 生成HTML
  2067. return save,
  2068. # 离散数据
  2069. class BinarizerModel(Unsupervised): # 二值化
  2070. def __init__(self, args_use, *args, **kwargs):
  2071. super(BinarizerModel, self).__init__(*args, **kwargs)
  2072. self.model = Binarizer(threshold=args_use["threshold"])
  2073. self.k = {}
  2074. self.model_Name = "Binarizer"
  2075. def data_visualization(self, save_dir, *args, **kwargs):
  2076. tab = Tab()
  2077. y_data = self.y_testdata
  2078. x_data = self.x_testdata
  2079. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2080. for i in range(len(get_y)):
  2081. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2082. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2083. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2084. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2085. tab.add(make_tab(heard, np.dstack((x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2086. save = save_dir + r"/二值离散化.HTML"
  2087. tab.render(save) # 生成HTML
  2088. return save,
  2089. class DiscretizationModel(PrepBase): # n值离散
  2090. def __init__(self, args_use, *args, **kwargs):
  2091. super(DiscretizationModel, self).__init__(*args, **kwargs)
  2092. self.model = None
  2093. range_ = args_use["split_range"]
  2094. if range_ == []:
  2095. raise Exception
  2096. elif len(range_) == 1:
  2097. range_.append(range_[0])
  2098. self.range = range_
  2099. self.k = {}
  2100. self.model_Name = "Discretization"
  2101. def fit_model(self, *args, **kwargs):
  2102. # t值在模型创建时已经保存
  2103. self.have_fit = True
  2104. return "None", "None"
  2105. def predict(self, x_data, *args, **kwargs):
  2106. self.x_testdata = x_data.copy()
  2107. x_predict = x_data.copy() # 复制
  2108. range_ = self.range
  2109. bool_list = []
  2110. max_ = len(range_) - 1
  2111. o_t = None
  2112. for i in range(len(range_)):
  2113. try:
  2114. t = float(range_[i])
  2115. except BaseException:
  2116. continue
  2117. if o_t is None: # 第一个参数
  2118. bool_list.append(x_predict <= t)
  2119. else:
  2120. bool_list.append((o_t <= x_predict) == (x_predict < t))
  2121. if i == max_:
  2122. bool_list.append(t <= x_predict)
  2123. o_t = t
  2124. for i in range(len(bool_list)):
  2125. x_predict[bool_list[i]] = i
  2126. self.y_testdata = x_predict.copy()
  2127. self.have_predict = True
  2128. return x_predict, f"{len(bool_list)}值离散化"
  2129. def data_visualization(self, save_dir, *args, **kwargs):
  2130. tab = Tab()
  2131. y_data = self.y_testdata
  2132. x_data = self.x_testdata
  2133. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2134. for i in range(len(get_y)):
  2135. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2136. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2137. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2138. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2139. tab.add(make_tab(heard, np.dstack((x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2140. save = save_dir + r"/多值离散化.HTML"
  2141. tab.render(save) # 生成HTML
  2142. return save,
  2143. class LabelModel(PrepBase): # 数字编码
  2144. def __init__(self, *args, **kwargs):
  2145. super(LabelModel, self).__init__(*args, **kwargs)
  2146. self.model = []
  2147. self.k = {}
  2148. self.model_Name = "LabelEncoder"
  2149. def fit_model(self, x_data, *args, **kwargs):
  2150. if not self.have_predict: # 不允许第二次训练
  2151. self.model = []
  2152. if x_data.ndim == 1:
  2153. x_data = np.array([x_data])
  2154. for i in range(x_data.shape[1]):
  2155. self.model.append(
  2156. LabelEncoder().fit(np.ravel(x_data[:, i]))
  2157. ) # 训练机器(每个特征一个学习器)
  2158. self.have_fit = True
  2159. return "None", "None"
  2160. def predict(self, x_data, *args, **kwargs):
  2161. self.x_testdata = x_data.copy()
  2162. x_predict = x_data.copy()
  2163. if x_data.ndim == 1:
  2164. x_data = np.array([x_data])
  2165. for i in range(x_data.shape[1]):
  2166. x_predict[:, i] = self.model[i].transform(x_data[:, i])
  2167. self.y_testdata = x_predict.copy()
  2168. self.have_predict = True
  2169. return x_predict, "数字编码"
  2170. def data_visualization(self, save_dir, *args, **kwargs):
  2171. tab = Tab()
  2172. x_data = self.x_testdata
  2173. y_data = self.y_testdata
  2174. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2175. for i in range(len(get_y)):
  2176. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2177. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2178. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2179. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2180. tab.add(make_tab(heard, np.dstack((x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2181. save = save_dir + r"/数字编码.HTML"
  2182. tab.render(save) # 生成HTML
  2183. return save,
  2184. class OneHotEncoderModel(PrepBase): # 独热编码
  2185. def __init__(self, args_use, *args, **kwargs):
  2186. super(OneHotEncoderModel, self).__init__(*args, **kwargs)
  2187. self.model = []
  2188. self.ndim_up = args_use["ndim_up"]
  2189. self.k = {}
  2190. self.model_Name = "OneHotEncoder"
  2191. self.OneHot_Data = None # 三维独热编码
  2192. def fit_model(self, x_data, *args, **kwargs):
  2193. if not self.have_predict: # 不允许第二次训练
  2194. if x_data.ndim == 1:
  2195. x_data = [x_data]
  2196. for i in range(x_data.shape[1]):
  2197. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2198. self.model.append(OneHotEncoder().fit(data)) # 训练机器
  2199. self.have_fit = True
  2200. return "None", "None"
  2201. def predict(self, x_data, *args, **kwargs):
  2202. self.x_testdata = x_data.copy()
  2203. x_new = []
  2204. for i in range(x_data.shape[1]):
  2205. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2206. one_hot = self.model[i].transform(data).toarray().tolist()
  2207. x_new.append(one_hot) # 添加到列表中
  2208. # 新列表的行数据是原data列数据的独热码(只需要ndim=2,暂时没想到numpy的做法)
  2209. x_new = np.array(x_new)
  2210. x_predict = []
  2211. for i in range(x_new.shape[1]):
  2212. x_predict.append(x_new[:, i])
  2213. x_predict = np.array(x_predict) # 转换回array
  2214. self.OneHot_Data = x_predict.copy() # 保存未降维数据
  2215. if not self.ndim_up: # 压缩操作
  2216. new_x_predict = []
  2217. for i in x_predict:
  2218. new_list = []
  2219. list_ = i.tolist()
  2220. for a in list_:
  2221. new_list += a
  2222. new = np.array(new_list)
  2223. new_x_predict.append(new)
  2224. self.y_testdata = np.array(new_x_predict)
  2225. return self.y_testdata.copy(), "独热编码"
  2226. self.y_testdata = self.OneHot_Data
  2227. self.have_predict = True
  2228. return x_predict, "独热编码"
  2229. def data_visualization(self, save_dir, *args, **kwargs):
  2230. tab = Tab()
  2231. y_data = self.y_testdata
  2232. x_data = self.x_testdata
  2233. oh_data = self.OneHot_Data
  2234. if not self.ndim_up:
  2235. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2236. for i in range(len(get_y)):
  2237. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2238. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2239. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2240. tab.add(make_tab(heard, oh_data.tolist()), f"编码数据")
  2241. tab.add(make_tab(heard, np.dstack((oh_data, x_data)).tolist()), f"合成[原数据,编码]数据")
  2242. tab.add(
  2243. make_tab([f"编码:{i}" for i in range(len(y_data[0]))], y_data.tolist()), f"数据"
  2244. )
  2245. save = save_dir + r"/独热编码.HTML"
  2246. tab.render(save) # 生成HTML
  2247. return save,
  2248. class MissedModel(Unsupervised): # 缺失数据补充
  2249. def __init__(self, args_use, *args, **kwargs):
  2250. super(MissedModel, self).__init__(*args, **kwargs)
  2251. self.model = SimpleImputer(
  2252. missing_values=args_use["miss_value"],
  2253. strategy=args_use["fill_method"],
  2254. fill_value=args_use["fill_value"],
  2255. )
  2256. self.k = {}
  2257. self.model_Name = "Missed"
  2258. def predict(self, x_data, *args, **kwargs):
  2259. self.x_testdata = x_data.copy()
  2260. x_predict = self.model.transform(x_data)
  2261. self.y_testdata = x_predict.copy()
  2262. self.have_predict = True
  2263. return x_predict, "填充缺失"
  2264. def data_visualization(self, save_dir, *args, **kwargs):
  2265. tab = Tab()
  2266. y_data = self.y_testdata
  2267. x_data = self.x_testdata
  2268. statistics = self.model.statistics_.tolist()
  2269. conversion_control(y_data, x_data, tab)
  2270. tab.add(
  2271. make_tab([f"特征[{i}]" for i in range(len(statistics))], [statistics]), "填充值"
  2272. )
  2273. save = save_dir + r"/缺失数据填充.HTML"
  2274. tab.render(save) # 生成HTML
  2275. return save,
  2276. @plugin_class_loading(get_path(r"template/machinelearning"))
  2277. class PcaModel(Unsupervised):
  2278. def __init__(self, args_use, *args, **kwargs):
  2279. super(PcaModel, self).__init__(*args, **kwargs)
  2280. self.model = PCA(
  2281. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2282. )
  2283. self.whiten = args_use["white_PCA"]
  2284. self.n_components = args_use["n_components"]
  2285. self.k = {
  2286. "n_components": args_use["n_components"],
  2287. "whiten": args_use["white_PCA"],
  2288. }
  2289. self.model_Name = "PCA"
  2290. def predict(self, x_data, *args, **kwargs):
  2291. self.x_testdata = x_data.copy()
  2292. x_predict = self.model.transform(x_data)
  2293. self.y_testdata = x_predict.copy()
  2294. self.have_predict = True
  2295. return x_predict, "PCA"
  2296. def data_visualization(self, save_dir, *args, **kwargs):
  2297. tab = Tab()
  2298. y_data = self.y_testdata
  2299. importance = self.model.components_.tolist()
  2300. var = self.model.explained_variance_.tolist() # 方量差
  2301. conversion_separate_format(y_data, tab)
  2302. x_data = [f"第{i+1}主成分" for i in range(len(importance))] # 主成分
  2303. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2304. value = [
  2305. (f"第{i+1}主成分", f"特征[{j}]", importance[i][j])
  2306. for i in range(len(importance))
  2307. for j in range(len(importance[i]))
  2308. ]
  2309. c = (
  2310. HeatMap()
  2311. .add_xaxis(x_data)
  2312. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2313. .set_global_opts(
  2314. title_opts=opts.TitleOpts(title="预测热力图"),
  2315. **global_not_legend,
  2316. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2317. xaxis_opts=opts.AxisOpts(is_scale=True),
  2318. visualmap_opts=opts.VisualMapOpts(
  2319. is_show=True,
  2320. max_=int(self.model.components_.max()) + 1,
  2321. min_=int(self.model.components_.min()),
  2322. pos_right="3%",
  2323. ),
  2324. ) # 显示
  2325. )
  2326. tab.add(c, "成分热力图")
  2327. c = (
  2328. Bar()
  2329. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2330. .add_yaxis("方量差", var, **label_setting)
  2331. .set_global_opts(
  2332. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2333. )
  2334. )
  2335. des_to_csv(save_dir, "成分重要性", importance, [x_data], [y_data])
  2336. des_to_csv(save_dir, "方量差", [var], [f"第[{i}]主成分" for i in range(len(var))])
  2337. tab.add(c, "方量差柱状图")
  2338. save = save_dir + r"/主成分分析.HTML"
  2339. tab.render(save) # 生成HTML
  2340. return save,
  2341. @plugin_class_loading(get_path(r"template/machinelearning"))
  2342. class RpcaModel(Unsupervised):
  2343. def __init__(self, args_use, *args, **kwargs):
  2344. super(RpcaModel, self).__init__(*args, **kwargs)
  2345. self.model = IncrementalPCA(
  2346. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2347. )
  2348. self.n_components = args_use["n_components"]
  2349. self.whiten = args_use["white_PCA"]
  2350. self.k = {
  2351. "n_components": args_use["n_components"],
  2352. "whiten": args_use["white_PCA"],
  2353. }
  2354. self.model_Name = "RPCA"
  2355. def predict(self, x_data, *args, **kwargs):
  2356. self.x_testdata = x_data.copy()
  2357. x_predict = self.model.transform(x_data)
  2358. self.y_testdata = x_predict.copy()
  2359. self.have_predict = True
  2360. return x_predict, "RPCA"
  2361. def data_visualization(self, save_dir, *args, **kwargs):
  2362. tab = Tab()
  2363. y_data = self.y_traindata
  2364. importance = self.model.components_.tolist()
  2365. var = self.model.explained_variance_.tolist() # 方量差
  2366. conversion_separate_format(y_data, tab)
  2367. x_data = [f"第{i + 1}主成分" for i in range(len(importance))] # 主成分
  2368. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2369. value = [
  2370. (f"第{i + 1}主成分", f"特征[{j}]", importance[i][j])
  2371. for i in range(len(importance))
  2372. for j in range(len(importance[i]))
  2373. ]
  2374. c = (
  2375. HeatMap()
  2376. .add_xaxis(x_data)
  2377. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2378. .set_global_opts(
  2379. title_opts=opts.TitleOpts(title="预测热力图"),
  2380. **global_not_legend,
  2381. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2382. xaxis_opts=opts.AxisOpts(is_scale=True),
  2383. visualmap_opts=opts.VisualMapOpts(
  2384. is_show=True,
  2385. max_=int(self.model.components_.max()) + 1,
  2386. min_=int(self.model.components_.min()),
  2387. pos_right="3%",
  2388. ),
  2389. ) # 显示
  2390. )
  2391. tab.add(c, "成分热力图")
  2392. c = (
  2393. Bar()
  2394. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2395. .add_yaxis("放量差", var, **label_setting)
  2396. .set_global_opts(
  2397. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2398. )
  2399. )
  2400. tab.add(c, "方量差柱状图")
  2401. des_to_csv(save_dir, "成分重要性", importance, [x_data], [y_data])
  2402. des_to_csv(save_dir, "方量差", [var], [f"第[{i}]主成分" for i in range(len(var))])
  2403. save = save_dir + r"/RPCA(主成分分析).HTML"
  2404. tab.render(save) # 生成HTML
  2405. return save,
  2406. @plugin_class_loading(get_path(r"template/machinelearning"))
  2407. class KpcaModel(Unsupervised):
  2408. def __init__(self, args_use, *args, **kwargs):
  2409. super(KpcaModel, self).__init__(*args, **kwargs)
  2410. self.model = KernelPCA(
  2411. n_components=args_use["n_components"], kernel=args_use["kernel"]
  2412. )
  2413. self.n_components = args_use["n_components"]
  2414. self.kernel = args_use["kernel"]
  2415. self.k = {
  2416. "n_components": args_use["n_components"],
  2417. "kernel": args_use["kernel"],
  2418. }
  2419. self.model_Name = "KPCA"
  2420. def predict(self, x_data, *args, **kwargs):
  2421. self.x_testdata = x_data.copy()
  2422. x_predict = self.model.transform(x_data)
  2423. self.y_testdata = x_predict.copy()
  2424. self.have_predict = True
  2425. return x_predict, "KPCA"
  2426. def data_visualization(self, save_dir, *args, **kwargs):
  2427. tab = Tab()
  2428. y_data = self.y_testdata
  2429. conversion_separate_format(y_data, tab)
  2430. save = save_dir + r"/KPCA(主成分分析).HTML"
  2431. tab.render(save) # 生成HTML
  2432. return save,
  2433. class LdaModel(PrepBase): # 有监督学习
  2434. def __init__(self, args_use, *args, **kwargs):
  2435. super(LdaModel, self).__init__(*args, **kwargs)
  2436. self.model = Lda(n_components=args_use["n_components"])
  2437. self.n_components = args_use["n_components"]
  2438. self.k = {"n_components": args_use["n_components"]}
  2439. self.model_Name = "LDA"
  2440. def predict(self, x_data, *args, **kwargs):
  2441. self.x_testdata = x_data.copy()
  2442. x_predict = self.model.transform(x_data)
  2443. self.y_testdata = x_predict.copy()
  2444. self.have_predict = True
  2445. return x_predict, "LDA"
  2446. def data_visualization(self, save_dir, *args, **kwargs):
  2447. tab = Tab()
  2448. x_data = self.x_testdata
  2449. y_data = self.y_testdata
  2450. conversion_separate_format(y_data, tab)
  2451. w_list = self.model.coef_.tolist() # 变为表格
  2452. b = self.model.intercept_
  2453. tab = Tab()
  2454. x_means = quick_stats(x_data).get()[0]
  2455. # 回归的y是历史遗留问题 不用分类回归:因为得不到分类数据(predict结果是降维数据不是预测数据)
  2456. get = regress_w(x_data, w_list, b, x_means.copy())
  2457. for i in range(len(get)):
  2458. tab.add(get[i].overlap(get[i]), f"类别:{i}LDA映射曲线")
  2459. save = save_dir + r"/render.HTML"
  2460. tab.render(save) # 生成HTML
  2461. return save,
  2462. @plugin_class_loading(get_path(r"template/machinelearning"))
  2463. class NmfModel(Unsupervised):
  2464. def __init__(self, args_use, *args, **kwargs):
  2465. super(NmfModel, self).__init__(*args, **kwargs)
  2466. self.model = NMF(n_components=args_use["n_components"])
  2467. self.n_components = args_use["n_components"]
  2468. self.k = {"n_components": args_use["n_components"]}
  2469. self.model_Name = "NFM"
  2470. self.h_testdata = None
  2471. # x_traindata保存的是W,h_traindata和y_traindata是后来数据
  2472. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  2473. self.x_testdata = x_data.copy()
  2474. x_predict = self.model.transform(x_data)
  2475. self.y_testdata = x_predict.copy()
  2476. self.h_testdata = self.model.components_
  2477. if add_func is not None and x_name != "":
  2478. add_func(self.h_testdata, f"{x_name}:V->NMF[H]")
  2479. self.have_predict = True
  2480. return x_predict, "V->NMF[W]"
  2481. def data_visualization(self, save_dir, *args, **kwargs):
  2482. tab = Tab()
  2483. y_data = self.y_testdata
  2484. x_data = self.x_testdata
  2485. h_data = self.h_testdata
  2486. conversion_separate_wh(y_data, h_data, tab)
  2487. wh_data = np.matmul(y_data, h_data)
  2488. difference_data = x_data - wh_data
  2489. def make_heat_map(data, name, max_, min_):
  2490. x = [f"数据[{i}]" for i in range(len(data))] # 主成分
  2491. y = [f"特征[{i}]" for i in range(len(data[0]))] # 主成分
  2492. value = [
  2493. (f"数据[{i}]", f"特征[{j}]", float(data[i][j]))
  2494. for i in range(len(data))
  2495. for j in range(len(data[i]))
  2496. ]
  2497. c = (
  2498. HeatMap()
  2499. .add_xaxis(x)
  2500. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2501. .set_global_opts(
  2502. title_opts=opts.TitleOpts(title="原始数据热力图"),
  2503. **global_not_legend,
  2504. yaxis_opts=opts.AxisOpts(
  2505. is_scale=True, type_="category"
  2506. ), # 'category'
  2507. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2508. visualmap_opts=opts.VisualMapOpts(
  2509. is_show=True, max_=max_, min_=min_, pos_right="3%"
  2510. ),
  2511. ) # 显示
  2512. )
  2513. tab.add(c, name)
  2514. max_ = (
  2515. max(int(x_data.max()), int(wh_data.max()), int(difference_data.max())) + 1
  2516. )
  2517. min_ = min(int(x_data.min()), int(wh_data.min()), int(difference_data.min()))
  2518. make_heat_map(x_data, "原始数据热力图", max_, min_)
  2519. make_heat_map(wh_data, "W * H数据热力图", max_, min_)
  2520. make_heat_map(difference_data, "数据差热力图", max_, min_)
  2521. des_to_csv(save_dir, "权重矩阵", y_data)
  2522. des_to_csv(save_dir, "系数矩阵", h_data)
  2523. des_to_csv(save_dir, "系数*权重矩阵", wh_data)
  2524. save = save_dir + r"/非负矩阵分解.HTML"
  2525. tab.render(save) # 生成HTML
  2526. return save,
  2527. @plugin_class_loading(get_path(r"template/machinelearning"))
  2528. class TsneModel(Unsupervised):
  2529. def __init__(self, args_use, *args, **kwargs):
  2530. super(TsneModel, self).__init__(*args, **kwargs)
  2531. self.model = TSNE(n_components=args_use["n_components"])
  2532. self.n_components = args_use["n_components"]
  2533. self.k = {"n_components": args_use["n_components"]}
  2534. self.model_Name = "t-SNE"
  2535. def fit_model(self, *args, **kwargs):
  2536. self.have_fit = True
  2537. return "None", "None"
  2538. def predict(self, x_data, *args, **kwargs):
  2539. self.x_testdata = x_data.copy()
  2540. x_predict = self.model.fit_transform(x_data)
  2541. self.y_testdata = x_predict.copy()
  2542. self.have_predict = True
  2543. return x_predict, "SNE"
  2544. def data_visualization(self, save_dir, *args, **kwargs):
  2545. tab = Tab()
  2546. y_data = self.y_testdata
  2547. conversion_separate_format(y_data, tab)
  2548. save = save_dir + r"/T-SNE.HTML"
  2549. tab.render(save) # 生成HTML
  2550. return save,
  2551. class MlpModel(StudyMachinebase): # 神经网络(多层感知机),有监督学习
  2552. def __init__(self, args_use, model, *args, **kwargs):
  2553. super(MlpModel, self).__init__(*args, **kwargs)
  2554. all_model = {"MLP": MLPRegressor, "MLP_class": MLPClassifier}[model]
  2555. self.model = all_model(
  2556. hidden_layer_sizes=args_use["hidden_size"],
  2557. activation=args_use["activation"],
  2558. solver=args_use["solver"],
  2559. alpha=args_use["alpha"],
  2560. max_iter=args_use["max_iter"],
  2561. )
  2562. # 记录这两个是为了克隆
  2563. self.hidden_layer_sizes = args_use["hidden_size"]
  2564. self.activation = args_use["activation"]
  2565. self.max_iter = args_use["max_iter"]
  2566. self.solver = args_use["solver"]
  2567. self.alpha = args_use["alpha"]
  2568. self.k = {
  2569. "hidden_layer_sizes": args_use["hidden_size"],
  2570. "activation": args_use["activation"],
  2571. "max_iter": args_use["max_iter"],
  2572. "solver": args_use["solver"],
  2573. "alpha": args_use["alpha"],
  2574. }
  2575. self.model_Name = model
  2576. def data_visualization(self, save_dir, *args, **kwargs):
  2577. tab = Tab()
  2578. x_data = self.x_testdata
  2579. y_data = self.y_testdata
  2580. coefs = self.model.coefs_
  2581. class_ = self.model.classes_
  2582. n_layers_ = self.model.n_layers_
  2583. def make_heat_map(data, name):
  2584. x = [f"特征(节点)[{i}]" for i in range(len(data))]
  2585. y = [f"节点[{i}]" for i in range(len(data[0]))]
  2586. value = [
  2587. (f"特征(节点)[{i}]", f"节点[{j}]", float(data[i][j]))
  2588. for i in range(len(data))
  2589. for j in range(len(data[i]))
  2590. ]
  2591. c = (
  2592. HeatMap()
  2593. .add_xaxis(x)
  2594. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2595. .set_global_opts(
  2596. title_opts=opts.TitleOpts(title=name),
  2597. **global_not_legend,
  2598. yaxis_opts=opts.AxisOpts(
  2599. is_scale=True, type_="category"
  2600. ), # 'category'
  2601. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2602. visualmap_opts=opts.VisualMapOpts(
  2603. is_show=True,
  2604. max_=float(data.max()),
  2605. min_=float(data.min()),
  2606. pos_right="3%",
  2607. ),
  2608. ) # 显示
  2609. )
  2610. tab.add(c, name)
  2611. tab.add(make_tab(x, data.transpose().tolist()), f"{name}:表格")
  2612. des_to_csv(save_dir, f"{name}:表格", data.transpose().tolist(), x, y)
  2613. get, x_means, x_range, data_type = regress_visualization(x_data, y_data)
  2614. for i in range(len(get)):
  2615. tab.add(get[i], f"{i}训练数据散点图")
  2616. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  2617. for i in range(len(get)):
  2618. tab.add(get[i], f"{i}预测热力图")
  2619. heard = ["神经网络层数"]
  2620. data = [n_layers_]
  2621. for i in range(len(coefs)):
  2622. make_heat_map(coefs[i], f"{i}层权重矩阵")
  2623. heard.append(f"第{i}层节点数")
  2624. data.append(len(coefs[i][0]))
  2625. if self.model_Name == "MLP_class":
  2626. heard += [f"[{i}]类型" for i in range(len(class_))]
  2627. data += class_.tolist()
  2628. tab.add(make_tab(heard, [data]), "数据表")
  2629. save = save_dir + r"/多层感知机.HTML"
  2630. tab.render(save) # 生成HTML
  2631. return save,
  2632. @plugin_class_loading(get_path(r"template/machinelearning"))
  2633. class KmeansModel(UnsupervisedModel):
  2634. def __init__(self, args_use, *args, **kwargs):
  2635. super(KmeansModel, self).__init__(*args, **kwargs)
  2636. self.model = KMeans(n_clusters=args_use["n_clusters"])
  2637. self.class_ = []
  2638. self.n_clusters = args_use["n_clusters"]
  2639. self.k = {"n_clusters": args_use["n_clusters"]}
  2640. self.model_Name = "k-means"
  2641. def fit_model(self, x_data, *args, **kwargs):
  2642. re = super().fit_model(x_data, *args, **kwargs)
  2643. self.class_ = list(set(self.model.labels_.tolist()))
  2644. self.have_fit = True
  2645. return re
  2646. def predict(self, x_data, *args, **kwargs):
  2647. self.x_testdata = x_data.copy()
  2648. y_predict = self.model.predict(x_data)
  2649. self.y_testdata = y_predict.copy()
  2650. self.have_predict = True
  2651. return y_predict, "k-means"
  2652. def data_visualization(self, save_dir, *args, **kwargs):
  2653. tab = Tab()
  2654. y = self.y_testdata
  2655. x_data = self.x_testdata
  2656. class_ = self.class_
  2657. center = self.model.cluster_centers_
  2658. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2659. func = (
  2660. training_visualization_more
  2661. if more_global
  2662. else training_visualization_center
  2663. )
  2664. get, x_means, x_range, data_type = func(x_data, class_, y, center)
  2665. for i in range(len(get)):
  2666. tab.add(get[i], f"{i}数据散点图")
  2667. get = decision_boundary(x_range, x_means, self.predict, class_, data_type)
  2668. for i in range(len(get)):
  2669. tab.add(get[i], f"{i}预测热力图")
  2670. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2671. data = class_ + [f"{i}" for i in x_means]
  2672. c = Table().add(headers=heard, rows=[data])
  2673. tab.add(c, "数据表")
  2674. des_to_csv(
  2675. save_dir,
  2676. "预测表",
  2677. [[f"{i}" for i in x_means]],
  2678. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2679. )
  2680. save = save_dir + r"/k-means聚类.HTML"
  2681. tab.render(save) # 生成HTML
  2682. return save,
  2683. @plugin_class_loading(get_path(r"template/machinelearning"))
  2684. class AgglomerativeModel(UnsupervisedModel):
  2685. def __init__(self, args_use, *args, **kwargs):
  2686. super(AgglomerativeModel, self).__init__(*args, **kwargs)
  2687. self.model = AgglomerativeClustering(
  2688. n_clusters=args_use["n_clusters"]
  2689. ) # 默认为2,不同于k-means
  2690. self.class_ = []
  2691. self.n_clusters = args_use["n_clusters"]
  2692. self.k = {"n_clusters": args_use["n_clusters"]}
  2693. self.model_Name = "Agglomerative"
  2694. def fit_model(self, x_data, *args, **kwargs):
  2695. re = super().fit_model(x_data, *args, **kwargs)
  2696. self.class_ = list(set(self.model.labels_.tolist()))
  2697. self.have_fit = True
  2698. return re
  2699. def predict(self, x_data, *args, **kwargs):
  2700. self.x_testdata = x_data.copy()
  2701. y_predict = self.model.fit_predict(x_data)
  2702. self.y_traindata = y_predict.copy()
  2703. self.have_predict = True
  2704. return y_predict, "Agglomerative"
  2705. def data_visualization(self, save_dir, *args, **kwargs):
  2706. tab = Tab()
  2707. y = self.y_testdata
  2708. x_data = self.x_testdata
  2709. class_ = self.class_
  2710. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2711. func = (
  2712. training_visualization_more_no_center
  2713. if more_global
  2714. else training_visualization
  2715. )
  2716. get, x_means, x_range, data_type = func(x_data, class_, y)
  2717. for i in range(len(get)):
  2718. tab.add(get[i], f"{i}训练数据散点图")
  2719. get = decision_boundary(x_range, x_means, self.predict, class_, data_type)
  2720. for i in range(len(get)):
  2721. tab.add(get[i], f"{i}预测热力图")
  2722. linkage_array = ward(self.x_traindata) # self.y_traindata是结果
  2723. dendrogram(linkage_array)
  2724. plt.savefig(save_dir + r"/Cluster_graph.png")
  2725. image = Image()
  2726. image.add(src=save_dir + r"/Cluster_graph.png",).set_global_opts(
  2727. title_opts=opts.ComponentTitleOpts(title="聚类树状图")
  2728. )
  2729. tab.add(image, "聚类树状图")
  2730. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2731. data = class_ + [f"{i}" for i in x_means]
  2732. c = Table().add(headers=heard, rows=[data])
  2733. tab.add(c, "数据表")
  2734. des_to_csv(
  2735. save_dir,
  2736. "预测表",
  2737. [[f"{i}" for i in x_means]],
  2738. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2739. )
  2740. save = save_dir + r"/层次聚类.HTML"
  2741. tab.render(save) # 生成HTML
  2742. return save,
  2743. @plugin_class_loading(get_path(r"template/machinelearning"))
  2744. class DbscanModel(UnsupervisedModel):
  2745. def __init__(self, args_use, *args, **kwargs):
  2746. super(DbscanModel, self).__init__(*args, **kwargs)
  2747. self.model = DBSCAN(eps=args_use["eps"], min_samples=args_use["min_samples"])
  2748. # eps是距离(0.5),min_samples(5)是簇与噪音分界线(每个簇最小元素数)
  2749. # min_samples
  2750. self.eps = args_use["eps"]
  2751. self.min_samples = args_use["min_samples"]
  2752. self.k = {"min_samples": args_use["min_samples"], "eps": args_use["eps"]}
  2753. self.class_ = []
  2754. self.model_Name = "DBSCAN"
  2755. def fit_model(self, x_data, *args, **kwargs):
  2756. re = super().fit_model(x_data, *args, **kwargs)
  2757. self.class_ = list(set(self.model.labels_.tolist()))
  2758. self.have_fit = True
  2759. return re
  2760. def predict(self, x_data, *args, **kwargs):
  2761. self.x_testdata = x_data.copy()
  2762. y_predict = self.model.fit_predict(x_data)
  2763. self.y_testdata = y_predict.copy()
  2764. self.have_predict = True
  2765. return y_predict, "DBSCAN"
  2766. def data_visualization(self, save_dir, *args, **kwargs):
  2767. # DBSCAN没有预测的必要
  2768. tab = Tab()
  2769. y = self.y_testdata.copy()
  2770. x_data = self.x_testdata.copy()
  2771. class_ = self.class_
  2772. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2773. func = (
  2774. training_visualization_more_no_center
  2775. if more_global
  2776. else training_visualization
  2777. )
  2778. get, x_means, x_range, data_type = func(x_data, class_, y)
  2779. for i in range(len(get)):
  2780. tab.add(get[i], f"{i}训练数据散点图")
  2781. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2782. data = class_ + [f"{i}" for i in x_means]
  2783. c = Table().add(headers=heard, rows=[data])
  2784. tab.add(c, "数据表")
  2785. des_to_csv(
  2786. save_dir,
  2787. "预测表",
  2788. [[f"{i}" for i in x_means]],
  2789. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2790. )
  2791. save = save_dir + r"/密度聚类.HTML"
  2792. tab.render(save) # 生成HTML
  2793. return save,
  2794. class FastFourier(StudyMachinebase): # 快速傅里叶变换
  2795. def __init__(self, *args, **kwargs):
  2796. super(FastFourier, self).__init__(*args, **kwargs)
  2797. self.model = None
  2798. self.fourier = None # fft复数
  2799. self.frequency = None # 频率range
  2800. self.angular_Frequency = None # 角频率range
  2801. self.phase = None # 相位range
  2802. self.breadth = None # 震幅range
  2803. self.sample_size = None # 样本数
  2804. def fit_model(self, y_data, *args, **kwargs):
  2805. y_data = y_data.ravel() # 扯平为一维数组
  2806. try:
  2807. if self.y_traindata is None:
  2808. raise Exception
  2809. self.y_traindata = np.hstack(y_data, self.x_traindata)
  2810. except BaseException:
  2811. self.y_traindata = y_data.copy()
  2812. fourier = fft(y_data)
  2813. self.sample_size = len(y_data)
  2814. self.frequency = np.linspace(0, 1, self.sample_size) # 频率N_range
  2815. self.angular_Frequency = self.frequency / (np.pi * 2) # 角频率w
  2816. self.phase = np.angle(fourier)
  2817. self.breadth = np.abs(fourier)
  2818. self.fourier = fourier
  2819. self.have_fit = True
  2820. return "None", "None"
  2821. def predict(self, x_data, *args, **kwargs):
  2822. return np.array([]), ""
  2823. def data_visualization(self, save_dir, *args, **kwargs):
  2824. # DBSCAN没有预测的必要
  2825. tab = Tab()
  2826. y = self.y_traindata.copy()
  2827. n = self.sample_size
  2828. phase = self.phase # 相位range
  2829. breadth = self.breadth # 震幅range
  2830. normalization_breadth = breadth / n
  2831. def line(name, value, s=slice(0, None)) -> Line:
  2832. c = (
  2833. Line()
  2834. .add_xaxis(self.frequency[s].tolist())
  2835. .add_yaxis(
  2836. "",
  2837. value,
  2838. **label_setting,
  2839. symbol="none" if self.sample_size >= 500 else None,
  2840. )
  2841. .set_global_opts(
  2842. title_opts=opts.TitleOpts(title=name),
  2843. **global_not_legend,
  2844. xaxis_opts=opts.AxisOpts(type_="value"),
  2845. yaxis_opts=opts.AxisOpts(type_="value"),
  2846. )
  2847. )
  2848. return c
  2849. tab.add(line("原始数据", y.tolist()), "原始数据")
  2850. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  2851. tab.add(line("双边振幅谱(归一化)", normalization_breadth.tolist()), "双边振幅谱(归一化)")
  2852. tab.add(
  2853. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2854. )
  2855. tab.add(
  2856. line(
  2857. "单边相位谱(归一化)",
  2858. normalization_breadth[: int(n / 2)].tolist(),
  2859. slice(0, int(n / 2)),
  2860. ),
  2861. "单边相位谱(归一化)",
  2862. )
  2863. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  2864. tab.add(
  2865. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2866. )
  2867. tab.add(make_tab(self.frequency.tolist(), [breadth.tolist()]), "双边振幅谱")
  2868. tab.add(make_tab(self.frequency.tolist(), [phase.tolist()]), "双边相位谱")
  2869. tab.add(make_tab(self.frequency.tolist(), [self.fourier.tolist()]), "快速傅里叶变换")
  2870. save = save_dir + r"/快速傅里叶.HTML"
  2871. tab.render(save) # 生成HTML
  2872. return save,
  2873. class ReverseFastFourier(StudyMachinebase): # 快速傅里叶变换
  2874. def __init__(self, *args, **kwargs):
  2875. super(ReverseFastFourier, self).__init__(*args, **kwargs)
  2876. self.model = None
  2877. self.sample_size = None
  2878. self.y_testdata_real = None
  2879. self.phase = None
  2880. self.breadth = None
  2881. def fit_model(self, y_data, *args, **kwargs):
  2882. return "None", "None"
  2883. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  2884. self.x_testdata = x_data.ravel().astype(np.complex_)
  2885. fourier = ifft(self.x_testdata)
  2886. self.y_testdata = fourier.copy()
  2887. self.y_testdata_real = np.real(fourier)
  2888. self.sample_size = len(self.y_testdata_real)
  2889. self.phase = np.angle(self.x_testdata)
  2890. self.breadth = np.abs(self.x_testdata)
  2891. add_func(self.y_testdata_real.copy(), f"{x_name}:逆向快速傅里叶变换[实数]")
  2892. return fourier, "逆向快速傅里叶变换"
  2893. def data_visualization(self, save_dir, *args, **kwargs):
  2894. # DBSCAN没有预测的必要
  2895. tab = Tab()
  2896. y = self.y_testdata_real.copy()
  2897. y_data = self.y_testdata.copy()
  2898. n = self.sample_size
  2899. range_n = np.linspace(0, 1, n).tolist()
  2900. phase = self.phase # 相位range
  2901. breadth = self.breadth # 震幅range
  2902. def line(name, value, s=slice(0, None)) -> Line:
  2903. c = (
  2904. Line()
  2905. .add_xaxis(range_n[s])
  2906. .add_yaxis(
  2907. "", value, **label_setting, symbol="none" if n >= 500 else None
  2908. )
  2909. .set_global_opts(
  2910. title_opts=opts.TitleOpts(title=name),
  2911. **global_not_legend,
  2912. xaxis_opts=opts.AxisOpts(type_="value"),
  2913. yaxis_opts=opts.AxisOpts(type_="value"),
  2914. )
  2915. )
  2916. return c
  2917. tab.add(line("逆向傅里叶变换", y.tolist()), "逆向傅里叶变换[实数]")
  2918. tab.add(make_tab(range_n, [y_data.tolist()]), "逆向傅里叶变换数据")
  2919. tab.add(make_tab(range_n, [y.tolist()]), "逆向傅里叶变换数据[实数]")
  2920. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  2921. tab.add(
  2922. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2923. )
  2924. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  2925. tab.add(
  2926. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2927. )
  2928. save = save_dir + r"/快速傅里叶.HTML"
  2929. tab.render(save) # 生成HTML
  2930. return save,
  2931. class ReverseFastFourierTwonumpy(ReverseFastFourier): # 2快速傅里叶变换
  2932. def fit_model(self, x_data, y_data=None, x_name="", add_func=None, *args, **kwargs):
  2933. r = np.multiply(np.cos(x_data), y_data)
  2934. j = np.multiply(np.sin(x_data), y_data) * 1j
  2935. super(ReverseFastFourierTwonumpy, self).predict(
  2936. r + j, x_name=x_name, add_func=add_func, *args, **kwargs
  2937. )
  2938. return "None", "None"
  2939. class CurveFitting(StudyMachinebase): # 曲线拟合
  2940. def __init__(self, name, str_, model, *args, **kwargs):
  2941. super(CurveFitting, self).__init__(*args, **kwargs)
  2942. def ndim_down(data: np.ndarray):
  2943. if data.ndim == 1:
  2944. return data
  2945. new_data = []
  2946. for i in data:
  2947. new_data.append(np.sum(i))
  2948. return np.array(new_data)
  2949. named_domain = {"np": np, "Func": model, "ndimDown": ndim_down}
  2950. protection_func = f"""
  2951. @plugin_func_loading(get_path(r'template/machinelearning'))
  2952. def FUNC({",".join(model.__code__.co_varnames)}):
  2953. answer = Func({",".join(model.__code__.co_varnames)})
  2954. return ndimDown(answer)
  2955. """
  2956. exec(protection_func, named_domain)
  2957. self.func = named_domain["FUNC"]
  2958. self.fit_data = None
  2959. self.name = name
  2960. self.func_str = str_
  2961. def fit_model(self, x_data: np.ndarray, y_data: np.ndarray, *args, **kwargs):
  2962. y_data = y_data.ravel()
  2963. x_data = x_data.astype(np.float64)
  2964. try:
  2965. if self.x_traindata is None:
  2966. raise Exception
  2967. self.x_traindata = np.vstack(x_data, self.x_traindata)
  2968. self.y_traindata = np.vstack(y_data, self.y_traindata)
  2969. except BaseException:
  2970. self.x_traindata = x_data.copy()
  2971. self.y_traindata = y_data.copy()
  2972. self.fit_data = optimize.curve_fit(
  2973. self.func, self.x_traindata, self.y_traindata
  2974. )
  2975. self.model = self.fit_data[0].copy()
  2976. return "None", "None"
  2977. def predict(self, x_data, *args, **kwargs):
  2978. self.x_testdata = x_data.copy()
  2979. predict = self.func(x_data, *self.model)
  2980. y_predict = []
  2981. for i in predict:
  2982. y_predict.append(np.sum(i))
  2983. y_predict = np.array(y_predict)
  2984. self.y_testdata = y_predict.copy()
  2985. self.have_predict = True
  2986. return y_predict, self.name
  2987. def data_visualization(self, save_dir, *args, **kwargs):
  2988. # DBSCAN没有预测的必要
  2989. tab = Tab()
  2990. y = self.y_testdata.copy()
  2991. x_data = self.x_testdata.copy()
  2992. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  2993. for i in range(len(get)):
  2994. tab.add(get[i], f"{i}预测类型图")
  2995. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  2996. for i in range(len(get)):
  2997. tab.add(get[i], f"{i}预测热力图")
  2998. tab.add(
  2999. make_tab(
  3000. [f"普适预测第{i}特征" for i in range(len(x_means))],
  3001. [[f"{i}" for i in x_means]],
  3002. ),
  3003. "普适预测特征数据",
  3004. )
  3005. tab.add(
  3006. make_tab(
  3007. [f"参数[{i}]" for i in range(len(self.model))],
  3008. [[f"{i}" for i in self.model]],
  3009. ),
  3010. "拟合参数",
  3011. )
  3012. save = save_dir + r"/曲线拟合.HTML"
  3013. tab.render(save) # 生成HTML
  3014. return save,
  3015. @plugin_class_loading(get_path(r"template/machinelearning"))
  3016. class Tab(tab_First):
  3017. def __init__(self, *args, **kwargs):
  3018. super(Tab, self).__init__(*args, **kwargs)
  3019. self.element = {} # 记录tab组成元素 name:charts
  3020. def add(self, chart, tab_name):
  3021. self.element[tab_name] = chart
  3022. return super(Tab, self).add(chart, tab_name)
  3023. def render(
  3024. self,
  3025. path: str = "render.html",
  3026. template_name: str = "simple_tab.html",
  3027. *args,
  3028. **kwargs,
  3029. ) -> str:
  3030. if all_global:
  3031. render_dir = path_split(path)[0]
  3032. for i in self.element:
  3033. self.element[i].render(render_dir + "/" + i + ".html")
  3034. return super(Tab, self).render(path, template_name, *args, **kwargs)
  3035. @plugin_class_loading(get_path(r"template/machinelearning"))
  3036. class Table(TableFisrt):
  3037. def __init__(self, *args, **kwargs):
  3038. super(Table, self).__init__(*args, **kwargs)
  3039. self.HEADERS = []
  3040. self.ROWS = [[]]
  3041. def add(self, headers, rows, attributes=None):
  3042. if len(rows) == 1:
  3043. new_headers = ["数据类型", "数据"]
  3044. new_rows = list(zip(headers, rows[0]))
  3045. self.HEADERS = new_headers
  3046. self.ROWS = new_rows
  3047. return super().add(new_headers, new_rows, attributes)
  3048. else:
  3049. self.HEADERS = headers
  3050. self.ROWS = rows
  3051. return super().add(headers, rows, attributes)
  3052. def render(self, path="render.html", *args, **kwargs,) -> str:
  3053. if csv_global:
  3054. save_dir, name = path_split(path)
  3055. name = splitext(name)[0]
  3056. try:
  3057. DataFrame(self.ROWS, columns=self.HEADERS).to_csv(
  3058. save_dir + "/" + name + ".csv"
  3059. )
  3060. except BaseException:
  3061. pass
  3062. return super().render(path, *args, **kwargs)
  3063. @plugin_func_loading(get_path(r'template/machinelearning'))
  3064. def make_list(first, end, num=35):
  3065. n = num / (end - first)
  3066. if n == 0:
  3067. n = 1
  3068. re = []
  3069. n_first = first * n
  3070. n_end = end * n
  3071. while n_first <= n_end:
  3072. cul = n_first / n
  3073. re.append(round(cul, 2))
  3074. n_first += 1
  3075. return re
  3076. @plugin_func_loading(get_path(r'template/machinelearning'))
  3077. def list_filter(original_list, num=70):
  3078. if len(original_list) <= num:
  3079. return original_list
  3080. n = int(num / len(original_list))
  3081. re = original_list[::n]
  3082. return re
  3083. @plugin_func_loading(get_path(r'template/machinelearning'))
  3084. def prediction_boundary(x_range, x_means, predict_func, data_type): # 绘制回归型x-x热力图
  3085. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3086. # a-特征x,b-特征x-1,c-其他特征
  3087. render_list = []
  3088. if len(x_means) == 1:
  3089. return render_list
  3090. for i in range(len(x_means)):
  3091. for j in range(len(x_means)):
  3092. if j <= i:
  3093. continue
  3094. a_range = x_range[j]
  3095. a_type = data_type[j]
  3096. b_range = x_range[i]
  3097. b_type = data_type[i]
  3098. if a_type == 1:
  3099. a_list = make_list(a_range[0], a_range[1], 70)
  3100. else:
  3101. a_list = list_filter(a_range) # 可以接受最大为70
  3102. if b_type == 1:
  3103. b_list = make_list(b_range[0], b_range[1], 35)
  3104. else:
  3105. b_list = list_filter(b_range) # 可以接受最大为70
  3106. a = np.array([i for i in a_list for _ in b_list]).T
  3107. b = np.array([i for _ in a_list for i in b_list]).T
  3108. data = np.array([x_means for _ in a_list for i in b_list])
  3109. data[:, j] = a
  3110. data[:, i] = b
  3111. y_data = predict_func(data)[0].tolist()
  3112. value = [[float(a[i]), float(b[i]), y_data[i]] for i in range(len(a))]
  3113. c = (
  3114. HeatMap()
  3115. .add_xaxis(np.unique(a))
  3116. # value的第一个数值是x
  3117. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3118. .set_global_opts(
  3119. title_opts=opts.TitleOpts(title="预测热力图"),
  3120. **global_not_legend,
  3121. yaxis_opts=opts.AxisOpts(
  3122. is_scale=True, type_="category"
  3123. ), # 'category'
  3124. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3125. visualmap_opts=opts.VisualMapOpts(
  3126. is_show=True,
  3127. max_=int(max(y_data)) + 1,
  3128. min_=int(min(y_data)),
  3129. pos_right="3%",
  3130. ),
  3131. ) # 显示
  3132. )
  3133. render_list.append(c)
  3134. return render_list
  3135. @plugin_func_loading(get_path(r'template/machinelearning'))
  3136. def prediction_boundary_more(x_range, x_means, predict_func, data_type):
  3137. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3138. # a-特征x,b-特征x-1,c-其他特征
  3139. render_list = []
  3140. if len(x_means) == 1:
  3141. return render_list
  3142. for i in range(len(x_means)):
  3143. if i == 0:
  3144. continue
  3145. a_range = x_range[i - 1]
  3146. a_type = data_type[i - 1]
  3147. b_range = x_range[i]
  3148. b_type = data_type[i]
  3149. if a_type == 1:
  3150. a_list = make_list(a_range[0], a_range[1], 70)
  3151. else:
  3152. a_list = list_filter(a_range) # 可以接受最大为70
  3153. if b_type == 1:
  3154. b_list = make_list(b_range[0], b_range[1], 35)
  3155. else:
  3156. b_list = list_filter(b_range) # 可以接受最大为70
  3157. a = np.array([i for i in a_list for _ in b_list]).T
  3158. b = np.array([i for _ in a_list for i in b_list]).T
  3159. data = np.array([x_means for _ in a_list for i in b_list])
  3160. data[:, i - 1] = a
  3161. data[:, i] = b
  3162. y_data = predict_func(data)[0].tolist()
  3163. value = [[float(a[i]), float(b[i]), y_data[i]] for i in range(len(a))]
  3164. c = (
  3165. HeatMap()
  3166. .add_xaxis(np.unique(a))
  3167. # value的第一个数值是x
  3168. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3169. .set_global_opts(
  3170. title_opts=opts.TitleOpts(title="预测热力图"),
  3171. **global_not_legend,
  3172. yaxis_opts=opts.AxisOpts(is_scale=True, type_="category"), # 'category'
  3173. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3174. visualmap_opts=opts.VisualMapOpts(
  3175. is_show=True,
  3176. max_=int(max(y_data)) + 1,
  3177. min_=int(min(y_data)),
  3178. pos_right="3%",
  3179. ),
  3180. ) # 显示
  3181. )
  3182. render_list.append(c)
  3183. return render_list
  3184. def decision_boundary(
  3185. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3186. ): # 绘制分类型预测图x-x热力图
  3187. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3188. # a-特征x,b-特征x-1,c-其他特征
  3189. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3190. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3191. if not no_unknow:
  3192. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3193. else:
  3194. map_dict = []
  3195. for i in class_dict:
  3196. map_dict.append(
  3197. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3198. )
  3199. render_list = []
  3200. if len(x_means) == 1:
  3201. a_range = x_range[0]
  3202. if data_type[0] == 1:
  3203. a_list = make_list(a_range[0], a_range[1], 70)
  3204. else:
  3205. a_list = a_range
  3206. a = np.array([i for i in a_list]).reshape(-1, 1)
  3207. y_data = predict_func(a)[0].tolist()
  3208. value = [[0, float(a[i]), class_dict.get(y_data[i], -1)] for i in range(len(a))]
  3209. c = (
  3210. HeatMap()
  3211. .add_xaxis(["None"])
  3212. # value的第一个数值是x
  3213. .add_yaxis(f"数据", np.unique(a), value, **label_setting)
  3214. .set_global_opts(
  3215. title_opts=opts.TitleOpts(title="预测热力图"),
  3216. **global_not_legend,
  3217. yaxis_opts=opts.AxisOpts(is_scale=True, type_="category"), # 'category'
  3218. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3219. visualmap_opts=opts.VisualMapOpts(
  3220. is_show=True,
  3221. max_=max(class_dict.values()),
  3222. min_=-1,
  3223. is_piecewise=True,
  3224. pieces=map_dict,
  3225. orient="horizontal",
  3226. pos_bottom="3%",
  3227. ),
  3228. )
  3229. )
  3230. render_list.append(c)
  3231. return render_list
  3232. # 如果x_means长度不等于1则执行下面
  3233. for i in range(len(x_means)):
  3234. if i == 0:
  3235. continue
  3236. a_range = x_range[i - 1]
  3237. a_type = data_type[i - 1]
  3238. b_range = x_range[i]
  3239. b_type = data_type[i]
  3240. if a_type == 1:
  3241. a_list = make_list(a_range[0], a_range[1], 70)
  3242. else:
  3243. a_list = a_range
  3244. if b_type == 1:
  3245. rb = make_list(b_range[0], b_range[1], 35)
  3246. else:
  3247. rb = b_range
  3248. a = np.array([i for i in a_list for _ in rb]).T
  3249. b = np.array([i for _ in a_list for i in rb]).T
  3250. data = np.array([x_means for _ in a_list for i in rb])
  3251. data[:, i - 1] = a
  3252. data[:, i] = b
  3253. y_data = predict_func(data)[0].tolist()
  3254. value = [
  3255. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3256. for i in range(len(a))
  3257. ]
  3258. c = (
  3259. HeatMap()
  3260. .add_xaxis(np.unique(a))
  3261. # value的第一个数值是x
  3262. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3263. .set_global_opts(
  3264. title_opts=opts.TitleOpts(title="预测热力图"),
  3265. **global_not_legend,
  3266. yaxis_opts=opts.AxisOpts(is_scale=True, type_="category"), # 'category'
  3267. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3268. visualmap_opts=opts.VisualMapOpts(
  3269. is_show=True,
  3270. max_=max(class_dict.values()),
  3271. min_=-1,
  3272. is_piecewise=True,
  3273. pieces=map_dict,
  3274. orient="horizontal",
  3275. pos_bottom="3%",
  3276. ),
  3277. )
  3278. )
  3279. render_list.append(c)
  3280. return render_list
  3281. def decision_boundary_more(
  3282. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3283. ):
  3284. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3285. # a-特征x,b-特征x-1,c-其他特征
  3286. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3287. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3288. if not no_unknow:
  3289. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3290. else:
  3291. map_dict = []
  3292. for i in class_dict:
  3293. map_dict.append(
  3294. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3295. )
  3296. render_list = []
  3297. if len(x_means) == 1:
  3298. return decision_boundary(
  3299. x_range, x_means, predict_func, class_list, data_type, no_unknow
  3300. )
  3301. # 如果x_means长度不等于1则执行下面
  3302. for i in range(len(x_means)):
  3303. for j in range(len(x_means)):
  3304. if j <= i:
  3305. continue
  3306. a_range = x_range[j]
  3307. a_type = data_type[j]
  3308. b_range = x_range[i]
  3309. b_type = data_type[i]
  3310. if a_type == 1:
  3311. a_range = make_list(a_range[0], a_range[1], 70)
  3312. else:
  3313. a_range = a_range
  3314. if b_type == 1:
  3315. b_range = make_list(b_range[0], b_range[1], 35)
  3316. else:
  3317. b_range = b_range
  3318. a = np.array([i for i in a_range for _ in b_range]).T
  3319. b = np.array([i for _ in a_range for i in b_range]).T
  3320. data = np.array([x_means for _ in a_range for i in b_range])
  3321. data[:, j] = a
  3322. data[:, i] = b
  3323. y_data = predict_func(data)[0].tolist()
  3324. value = [
  3325. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3326. for i in range(len(a))
  3327. ]
  3328. c = (
  3329. HeatMap()
  3330. .add_xaxis(np.unique(a))
  3331. # value的第一个数值是x
  3332. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3333. .set_global_opts(
  3334. title_opts=opts.TitleOpts(title="预测热力图"),
  3335. **global_not_legend,
  3336. yaxis_opts=opts.AxisOpts(
  3337. is_scale=True, type_="category"
  3338. ), # 'category'
  3339. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3340. visualmap_opts=opts.VisualMapOpts(
  3341. is_show=True,
  3342. max_=max(class_dict.values()),
  3343. min_=-1,
  3344. is_piecewise=True,
  3345. pieces=map_dict,
  3346. orient="horizontal",
  3347. pos_bottom="3%",
  3348. ),
  3349. )
  3350. )
  3351. render_list.append(c)
  3352. return render_list
  3353. @plugin_func_loading(get_path(r'template/machinelearning'))
  3354. def see_tree(tree_file_dir):
  3355. node_regex = re.compile(r'^([0-9]+) \[label="(.+)"\] ;$') # 匹配节点正则表达式
  3356. link_regex = re.compile("^([0-9]+) -> ([0-9]+) (.*);$") # 匹配节点正则表达式
  3357. node_dict = {}
  3358. link_list = []
  3359. with open(tree_file_dir, "r") as f: # 貌似必须分开w和r
  3360. for i in f:
  3361. try:
  3362. regex_result = re.findall(node_regex, i)[0]
  3363. if regex_result[0] != "":
  3364. try:
  3365. v = float(regex_result[0])
  3366. except BaseException:
  3367. v = 0
  3368. node_dict[regex_result[0]] = {
  3369. "name": regex_result[1].replace("\\n", "\n"),
  3370. "value": v,
  3371. "children": [],
  3372. }
  3373. continue
  3374. except BaseException:
  3375. pass
  3376. try:
  3377. regex_result = re.findall(link_regex, i)[0]
  3378. if regex_result[0] != "" and regex_result[1] != "":
  3379. link_list.append((regex_result[0], regex_result[1]))
  3380. except BaseException:
  3381. pass
  3382. father_list = [] # 已经有父亲的list
  3383. for i in link_list:
  3384. father = i[0] # 父节点
  3385. son = i[1] # 子节点
  3386. try:
  3387. node_dict[father]["children"].append(node_dict[son])
  3388. father_list.append(son)
  3389. except BaseException:
  3390. pass
  3391. father = list(set(node_dict.keys()) - set(father_list))
  3392. c = (
  3393. Tree()
  3394. .add("", [node_dict[father[0]]], is_roam=True)
  3395. .set_global_opts(
  3396. title_opts=opts.TitleOpts(title="决策树可视化"),
  3397. toolbox_opts=opts.ToolboxOpts(is_show=True),
  3398. )
  3399. )
  3400. return c
  3401. @plugin_func_loading(get_path(r'template/machinelearning'))
  3402. def make_tab(heard, row):
  3403. return Table().add(headers=heard, rows=row)
  3404. @plugin_func_loading(get_path(r'template/machinelearning'))
  3405. def coefficient_scatter_plot(w_heard, w):
  3406. c = (
  3407. Scatter()
  3408. .add_xaxis(w_heard)
  3409. .add_yaxis("", w, **label_setting)
  3410. .set_global_opts(title_opts=opts.TitleOpts(title="系数w散点图"), **global_setting)
  3411. )
  3412. return c
  3413. @plugin_func_loading(get_path(r'template/machinelearning'))
  3414. def coefficient_bar_plot(w_heard, w):
  3415. c = (
  3416. Bar()
  3417. .add_xaxis(w_heard)
  3418. .add_yaxis("", abs(w).tolist(), **label_setting)
  3419. .set_global_opts(title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting)
  3420. )
  3421. return c
  3422. @plugin_func_loading(get_path(r'template/machinelearning'))
  3423. def is_continuous(data: np.array, f: float = 0.1):
  3424. data = data.tolist()
  3425. l: list = np.unique(data).tolist()
  3426. try:
  3427. re = len(l) / len(data) >= f or len(data) <= 3
  3428. return re
  3429. except BaseException:
  3430. return False
  3431. @plugin_func_loading(get_path(r'template/machinelearning'))
  3432. def quick_stats(x_data):
  3433. statistics_assistant = CategoricalData()
  3434. print(x_data)
  3435. for i in range(len(x_data)):
  3436. x1 = x_data[i] # x坐标
  3437. statistics_assistant(x1)
  3438. return statistics_assistant
  3439. @plugin_func_loading(get_path(r'template/machinelearning'))
  3440. def training_visualization_more_no_center(x_data, class_list, y_data):
  3441. x_data = x_data.transpose()
  3442. if len(x_data) == 1:
  3443. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3444. statistics_assistant = quick_stats(x_data)
  3445. render_list = []
  3446. for i in range(len(x_data)):
  3447. for a in range(len(x_data)):
  3448. if a <= i:
  3449. continue
  3450. x1 = x_data[i] # x坐标
  3451. x1_is_continuous = is_continuous(x1)
  3452. x2 = x_data[a] # y坐标
  3453. x2_is_continuous = is_continuous(x2)
  3454. base_render = None # 旧的C
  3455. for class_num in range(len(class_list)):
  3456. now_class = class_list[class_num]
  3457. plot_x1 = x1[y_data == now_class].tolist()
  3458. plot_x2 = x2[y_data == now_class]
  3459. axis_x2 = np.unique(plot_x2)
  3460. plot_x2 = x2[y_data == now_class].tolist()
  3461. # x与散点图不同,这里是纵坐标
  3462. c = (
  3463. Scatter()
  3464. .add_xaxis(plot_x2)
  3465. .add_yaxis(f"{now_class}", plot_x1, **label_setting)
  3466. .set_global_opts(
  3467. title_opts=opts.TitleOpts(title=f"[{a}-{i}]训练数据散点图"),
  3468. **global_setting,
  3469. yaxis_opts=opts.AxisOpts(
  3470. type_="value" if x1_is_continuous else "category",
  3471. is_scale=True,
  3472. ),
  3473. xaxis_opts=opts.AxisOpts(
  3474. type_="value" if x2_is_continuous else "category",
  3475. is_scale=True,
  3476. ),
  3477. )
  3478. )
  3479. c.add_xaxis(axis_x2)
  3480. if base_render is None:
  3481. base_render = c
  3482. else:
  3483. base_render = base_render.overlap(c)
  3484. render_list.append(base_render)
  3485. means, x_range, data_type = statistics_assistant.get()
  3486. return render_list, means, x_range, data_type
  3487. @plugin_func_loading(get_path(r'template/machinelearning'))
  3488. def training_visualization_more(x_data, class_list, y_data, center):
  3489. x_data = x_data.transpose()
  3490. if len(x_data) == 1:
  3491. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3492. statistics_assistant = quick_stats(x_data)
  3493. render_list = []
  3494. for i in range(len(x_data)):
  3495. for a in range(len(x_data)):
  3496. if a <= i:
  3497. continue
  3498. x1 = x_data[i] # x坐标
  3499. x1_is_continuous = is_continuous(x1)
  3500. x2 = x_data[a] # y坐标
  3501. x2_is_continuous = is_continuous(x2)
  3502. base_render = None # 旧的C
  3503. for class_num in range(len(class_list)):
  3504. now_class = class_list[class_num]
  3505. plot_x1 = x1[y_data == now_class].tolist()
  3506. plot_x2 = x2[y_data == now_class]
  3507. axis_x2 = np.unique(plot_x2)
  3508. plot_x2 = x2[y_data == now_class].tolist()
  3509. # x与散点图不同,这里是纵坐标
  3510. c = (
  3511. Scatter()
  3512. .add_xaxis(plot_x2)
  3513. .add_yaxis(f"{now_class}", plot_x1, **label_setting)
  3514. .set_global_opts(
  3515. title_opts=opts.TitleOpts(title=f"[{a}-{i}]训练数据散点图"),
  3516. **global_setting,
  3517. yaxis_opts=opts.AxisOpts(
  3518. type_="value" if x1_is_continuous else "category",
  3519. is_scale=True,
  3520. ),
  3521. xaxis_opts=opts.AxisOpts(
  3522. type_="value" if x2_is_continuous else "category",
  3523. is_scale=True,
  3524. ),
  3525. )
  3526. )
  3527. c.add_xaxis(axis_x2)
  3528. # 添加簇中心
  3529. try:
  3530. center_x2 = [center[class_num][a]]
  3531. except BaseException:
  3532. center_x2 = [0]
  3533. b = (
  3534. Scatter()
  3535. .add_xaxis(center_x2)
  3536. .add_yaxis(
  3537. f"[{now_class}]中心",
  3538. [center[class_num][i]],
  3539. **label_setting,
  3540. symbol="triangle",
  3541. )
  3542. .set_global_opts(
  3543. title_opts=opts.TitleOpts(title="簇中心"),
  3544. **global_setting,
  3545. yaxis_opts=opts.AxisOpts(
  3546. type_="value" if x1_is_continuous else "category",
  3547. is_scale=True,
  3548. ),
  3549. xaxis_opts=opts.AxisOpts(
  3550. type_="value" if x2_is_continuous else "category",
  3551. is_scale=True,
  3552. ),
  3553. )
  3554. )
  3555. c.overlap(b)
  3556. if base_render is None:
  3557. base_render = c
  3558. else:
  3559. base_render = base_render.overlap(c)
  3560. render_list.append(base_render)
  3561. means, x_range, data_type = statistics_assistant.get()
  3562. return render_list, means, x_range, data_type
  3563. @plugin_func_loading(get_path(r'template/machinelearning'))
  3564. def training_visualization_center(x_data, class_data, y_data, center):
  3565. x_data = x_data.transpose()
  3566. if len(x_data) == 1:
  3567. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3568. statistics_assistant = quick_stats(x_data)
  3569. render_list = []
  3570. for i in range(len(x_data)):
  3571. if i == 0:
  3572. continue
  3573. x1 = x_data[i] # x坐标
  3574. x1_is_continuous = is_continuous(x1)
  3575. x2 = x_data[i - 1] # y坐标
  3576. x2_is_continuous = is_continuous(x2)
  3577. base_render = None # 旧的C
  3578. for class_num in range(len(class_data)):
  3579. n_class = class_data[class_num]
  3580. x_1 = x1[y_data == n_class].tolist()
  3581. x_2 = x2[y_data == n_class]
  3582. x_2_new = np.unique(x_2)
  3583. x_2 = x2[y_data == n_class].tolist()
  3584. # x与散点图不同,这里是纵坐标
  3585. c = (
  3586. Scatter()
  3587. .add_xaxis(x_2)
  3588. .add_yaxis(f"{n_class}", x_1, **label_setting)
  3589. .set_global_opts(
  3590. title_opts=opts.TitleOpts(title=f"[{i-1}-{i}]训练数据散点图"),
  3591. **global_setting,
  3592. yaxis_opts=opts.AxisOpts(
  3593. type_="value" if x1_is_continuous else "category", is_scale=True
  3594. ),
  3595. xaxis_opts=opts.AxisOpts(
  3596. type_="value" if x2_is_continuous else "category", is_scale=True
  3597. ),
  3598. )
  3599. )
  3600. c.add_xaxis(x_2_new)
  3601. # 添加簇中心
  3602. try:
  3603. center_x_2 = [center[class_num][i - 1]]
  3604. except BaseException:
  3605. center_x_2 = [0]
  3606. b = (
  3607. Scatter()
  3608. .add_xaxis(center_x_2)
  3609. .add_yaxis(
  3610. f"[{n_class}]中心",
  3611. [center[class_num][i]],
  3612. **label_setting,
  3613. symbol="triangle",
  3614. )
  3615. .set_global_opts(
  3616. title_opts=opts.TitleOpts(title="簇中心"),
  3617. **global_setting,
  3618. yaxis_opts=opts.AxisOpts(
  3619. type_="value" if x1_is_continuous else "category", is_scale=True
  3620. ),
  3621. xaxis_opts=opts.AxisOpts(
  3622. type_="value" if x2_is_continuous else "category", is_scale=True
  3623. ),
  3624. )
  3625. )
  3626. c.overlap(b)
  3627. if base_render is None:
  3628. base_render = c
  3629. else:
  3630. base_render = base_render.overlap(c)
  3631. render_list.append(base_render)
  3632. means, x_range, data_type = statistics_assistant.get()
  3633. return render_list, means, x_range, data_type
  3634. @plugin_func_loading(get_path(r'template/machinelearning'))
  3635. def training_visualization(x_data, class_, y_data): # 根据不同类别绘制x-x分类散点图
  3636. x_data = x_data.transpose()
  3637. if len(x_data) == 1:
  3638. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3639. statistics_assistant = quick_stats(x_data)
  3640. render_list = []
  3641. for i in range(len(x_data)):
  3642. if i == 0:
  3643. continue
  3644. x1 = x_data[i] # x坐标
  3645. x1_is_continuous = is_continuous(x1)
  3646. x2 = x_data[i - 1] # y坐标
  3647. x2_is_continuous = is_continuous(x2)
  3648. render_list = None # 旧的C
  3649. for now_class in class_:
  3650. plot_x1 = x1[y_data == now_class].tolist()
  3651. plot_x2 = x2[y_data == now_class]
  3652. axis_x2 = np.unique(plot_x2)
  3653. plot_x2 = x2[y_data == now_class].tolist()
  3654. # x与散点图不同,这里是纵坐标
  3655. c = (
  3656. Scatter()
  3657. .add_xaxis(plot_x2)
  3658. .add_yaxis(f"{now_class}", plot_x1, **label_setting)
  3659. .set_global_opts(
  3660. title_opts=opts.TitleOpts(title="训练数据散点图"),
  3661. **global_setting,
  3662. yaxis_opts=opts.AxisOpts(
  3663. type_="value" if x1_is_continuous else "category", is_scale=True
  3664. ),
  3665. xaxis_opts=opts.AxisOpts(
  3666. type_="value" if x2_is_continuous else "category", is_scale=True
  3667. ),
  3668. )
  3669. )
  3670. c.add_xaxis(axis_x2)
  3671. if render_list is None:
  3672. render_list = c
  3673. else:
  3674. render_list = render_list.overlap(c)
  3675. render_list.append(render_list)
  3676. means, x_range, data_type = statistics_assistant.get()
  3677. return render_list, means, x_range, data_type
  3678. @plugin_func_loading(get_path(r'template/machinelearning'))
  3679. def training_visualization_no_class(x_data): # 根据绘制x-x分类散点图(无类别)
  3680. x_data = x_data.transpose()
  3681. if len(x_data) == 1:
  3682. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3683. statistics_assistant = quick_stats(x_data)
  3684. render_list = []
  3685. for i in range(len(x_data)):
  3686. if i == 0:
  3687. continue
  3688. x1 = x_data[i] # x坐标
  3689. x1_is_continuous = is_continuous(x1)
  3690. x2 = x_data[i - 1] # y坐标
  3691. x2_is_continuous = is_continuous(x2)
  3692. x2_only = np.unique(x2)
  3693. # x与散点图不同,这里是纵坐标
  3694. c = (
  3695. Scatter()
  3696. .add_xaxis(x2)
  3697. .add_yaxis("", x1.tolist(), **label_setting)
  3698. .set_global_opts(
  3699. title_opts=opts.TitleOpts(title="训练数据散点图"),
  3700. **global_not_legend,
  3701. yaxis_opts=opts.AxisOpts(
  3702. type_="value" if x1_is_continuous else "category", is_scale=True
  3703. ),
  3704. xaxis_opts=opts.AxisOpts(
  3705. type_="value" if x2_is_continuous else "category", is_scale=True
  3706. ),
  3707. )
  3708. )
  3709. c.add_xaxis(x2_only)
  3710. render_list.append(c)
  3711. means, x_range, data_type = statistics_assistant.get()
  3712. return render_list, means, x_range, data_type
  3713. def training_w(
  3714. x_data, class_list, y_data, w_list, b_list, x_means: list
  3715. ): # 针对分类问题绘制决策边界
  3716. x_data = x_data.transpose()
  3717. if len(x_data) == 1:
  3718. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3719. render_list = []
  3720. x_means.append(0)
  3721. x_means = np.array(x_means)
  3722. for i in range(len(x_data)):
  3723. if i == 0:
  3724. continue
  3725. x1_is_continuous = is_continuous(x_data[i])
  3726. x2 = x_data[i - 1] # y坐标
  3727. x2_is_continuous = is_continuous(x2)
  3728. o_c = None # 旧的C
  3729. for class_num in range(len(class_list)):
  3730. n_class = class_list[class_num]
  3731. x2_only = np.unique(x2[y_data == n_class])
  3732. # x与散点图不同,这里是纵坐标
  3733. # 加入这个判断是为了解决sklearn历史遗留问题
  3734. if len(class_list) == 2: # 二分类问题
  3735. if class_num == 0:
  3736. continue
  3737. w = w_list[0]
  3738. b = b_list[0]
  3739. else: # 多分类问题
  3740. w = w_list[class_num]
  3741. b = b_list[class_num]
  3742. if x2_is_continuous:
  3743. x2_only = np.array(make_list(x2_only.min(), x2_only.max(), 5))
  3744. w = np.append(w, 0)
  3745. y_data = (
  3746. -(x2_only * w[i - 1]) / w[i]
  3747. + b
  3748. + (x_means[: i - 1] * w[: i - 1]).sum()
  3749. + (x_means[i + 1 :] * w[i + 1 :]).sum()
  3750. ) # 假设除了两个特征意外,其余特征均为means列表的数值
  3751. c = (
  3752. Line()
  3753. .add_xaxis(x2_only)
  3754. .add_yaxis(
  3755. f"决策边界:{n_class}=>[{i}]",
  3756. y_data.tolist(),
  3757. is_smooth=True,
  3758. **label_setting,
  3759. )
  3760. .set_global_opts(
  3761. title_opts=opts.TitleOpts(title=f"系数w曲线"),
  3762. **global_setting,
  3763. yaxis_opts=opts.AxisOpts(
  3764. type_="value" if x1_is_continuous else "category", is_scale=True
  3765. ),
  3766. xaxis_opts=opts.AxisOpts(
  3767. type_="value" if x2_is_continuous else "category", is_scale=True
  3768. ),
  3769. )
  3770. )
  3771. if o_c is None:
  3772. o_c = c
  3773. else:
  3774. o_c = o_c.overlap(c)
  3775. # 下面不要接任何代码,因为上面会continue
  3776. render_list.append(o_c)
  3777. return render_list
  3778. @plugin_func_loading(get_path(r'template/machinelearning'))
  3779. def regress_w(x_data, w_data: np.array, intercept_b, x_means: list): # 针对回归问题(y-x图)
  3780. x_data = x_data.transpose()
  3781. if len(x_data) == 1:
  3782. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3783. render_list = []
  3784. x_means.append(0) # 确保mean[i+1]不会超出index
  3785. x_means = np.array(x_means)
  3786. w_data = np.append(w_data, 0)
  3787. for i in range(len(x_data)):
  3788. x1 = x_data[i]
  3789. x1_is_continuous = is_continuous(x1)
  3790. if x1_is_continuous:
  3791. x1 = np.array(make_list(x1.min(), x1.max(), 5))
  3792. x1_only = np.unique(x1)
  3793. # 假设除了两个特征意外,其余特征均为means列表的数值
  3794. y_data = (
  3795. x1_only * w_data[i]
  3796. + intercept_b
  3797. + (x_means[:i] * w_data[:i]).sum()
  3798. + (x_means[i + 1 :] * w_data[i + 1 :]).sum()
  3799. )
  3800. y_is_continuous = is_continuous(y_data)
  3801. c = (
  3802. Line()
  3803. .add_xaxis(x1_only)
  3804. .add_yaxis(f"拟合结果=>[{i}]", y_data.tolist(), is_smooth=True, **label_setting)
  3805. .set_global_opts(
  3806. title_opts=opts.TitleOpts(title=f"系数w曲线"),
  3807. **global_setting,
  3808. yaxis_opts=opts.AxisOpts(
  3809. type_="value" if y_is_continuous else None, is_scale=True
  3810. ),
  3811. xaxis_opts=opts.AxisOpts(
  3812. type_="value" if x1_is_continuous else None, is_scale=True
  3813. ),
  3814. )
  3815. )
  3816. render_list.append(c)
  3817. return render_list
  3818. @plugin_func_loading(get_path(r'template/machinelearning'))
  3819. def regress_visualization(x_data, y_data): # y-x数据图
  3820. x_data = x_data.transpose()
  3821. y_is_continuous = is_continuous(y_data)
  3822. statistics_assistant = quick_stats(x_data)
  3823. render_list = []
  3824. try:
  3825. visualmap_opts = opts.VisualMapOpts(
  3826. is_show=True,
  3827. max_=int(y_data.max()) + 1,
  3828. min_=int(y_data.min()),
  3829. pos_right="3%",
  3830. )
  3831. except BaseException:
  3832. visualmap_opts = None
  3833. y_is_continuous = False
  3834. for i in range(len(x_data)):
  3835. x1 = x_data[i] # x坐标
  3836. x1_is_continuous = is_continuous(x1)
  3837. # 不转换成list因为保持dtype的精度,否则绘图会出现各种问题(数值重复)
  3838. if not y_is_continuous and x1_is_continuous:
  3839. y_is_continuous, x1_is_continuous = x1_is_continuous, y_is_continuous
  3840. x1, y_data = y_data, x1
  3841. c = (
  3842. Scatter()
  3843. .add_xaxis(x1.tolist()) # 研究表明,这个是横轴
  3844. .add_yaxis("数据", y_data.tolist(), **label_setting)
  3845. .set_global_opts(
  3846. title_opts=opts.TitleOpts(title="预测类型图"),
  3847. **global_setting,
  3848. yaxis_opts=opts.AxisOpts(
  3849. type_="value" if y_is_continuous else "category", is_scale=True
  3850. ),
  3851. xaxis_opts=opts.AxisOpts(
  3852. type_="value" if x1_is_continuous else "category", is_scale=True
  3853. ),
  3854. visualmap_opts=visualmap_opts,
  3855. )
  3856. )
  3857. c.add_xaxis(np.unique(x1))
  3858. render_list.append(c)
  3859. means, x_range, data_type = statistics_assistant.get()
  3860. return render_list, means, x_range, data_type
  3861. @plugin_func_loading(get_path(r'template/machinelearning'))
  3862. def feature_visualization(x_data, data_name=""): # x-x数据图
  3863. seeting = global_setting if data_name else global_not_legend
  3864. x_data = x_data.transpose()
  3865. only = False
  3866. if len(x_data) == 1:
  3867. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3868. only = True
  3869. render_list = []
  3870. for i in range(len(x_data)):
  3871. for a in range(len(x_data)):
  3872. if a <= i:
  3873. continue # 重复内容,跳过
  3874. x1 = x_data[i] # x坐标
  3875. x1_is_continuous = is_continuous(x1)
  3876. x2 = x_data[a] # y坐标
  3877. x2_is_continuous = is_continuous(x2)
  3878. x2_only = np.unique(x2)
  3879. if only:
  3880. x2_is_continuous = False
  3881. # x与散点图不同,这里是纵坐标
  3882. c = (
  3883. Scatter()
  3884. .add_xaxis(x2)
  3885. .add_yaxis(data_name, x1, **label_setting)
  3886. .set_global_opts(
  3887. title_opts=opts.TitleOpts(title=f"[{i}-{a}]数据散点图"),
  3888. **seeting,
  3889. yaxis_opts=opts.AxisOpts(
  3890. type_="value" if x1_is_continuous else "category", is_scale=True
  3891. ),
  3892. xaxis_opts=opts.AxisOpts(
  3893. type_="value" if x2_is_continuous else "category", is_scale=True
  3894. ),
  3895. )
  3896. )
  3897. c.add_xaxis(x2_only)
  3898. render_list.append(c)
  3899. return render_list
  3900. @plugin_func_loading(get_path(r'template/machinelearning'))
  3901. def feature_visualization_format(x_data, data_name=""): # x-x数据图
  3902. seeting = global_setting if data_name else global_not_legend
  3903. x_data = x_data.transpose()
  3904. only = False
  3905. if len(x_data) == 1:
  3906. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3907. only = True
  3908. render_list = []
  3909. for i in range(len(x_data)):
  3910. for a in range(len(x_data)):
  3911. if a <= i:
  3912. continue # 重复内容,跳过(a读取的是i后面的)
  3913. x1 = x_data[i] # x坐标
  3914. x1_is_continuous = is_continuous(x1)
  3915. x2 = x_data[a] # y坐标
  3916. x2_is_continuous = is_continuous(x2)
  3917. x2_only = np.unique(x2)
  3918. x1_list = x1.astype(np.str).tolist()
  3919. for i in range(len(x1_list)):
  3920. x1_list[i] = [x1_list[i], f"特征{i}"]
  3921. if only:
  3922. x2_is_continuous = False
  3923. # x与散点图不同,这里是纵坐标
  3924. c = (
  3925. Scatter()
  3926. .add_xaxis(x2)
  3927. .add_yaxis(data_name, x1_list, **label_setting)
  3928. .set_global_opts(
  3929. title_opts=opts.TitleOpts(title=f"[{i}-{a}]数据散点图"),
  3930. **seeting,
  3931. yaxis_opts=opts.AxisOpts(
  3932. type_="value" if x1_is_continuous else "category", is_scale=True
  3933. ),
  3934. xaxis_opts=opts.AxisOpts(
  3935. type_="value" if x2_is_continuous else "category", is_scale=True
  3936. ),
  3937. tooltip_opts=opts.TooltipOpts(
  3938. is_show=True, axis_pointer_type="cross", formatter="{c}"
  3939. ),
  3940. )
  3941. )
  3942. c.add_xaxis(x2_only)
  3943. render_list.append(c)
  3944. return render_list
  3945. @plugin_func_loading(get_path(r'template/machinelearning'))
  3946. def discrete_feature_visualization(x_data, data_name=""): # 必定离散x-x数据图
  3947. seeting = global_setting if data_name else global_not_legend
  3948. x_data = x_data.transpose()
  3949. if len(x_data) == 1:
  3950. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3951. render_list = []
  3952. for i in range(len(x_data)):
  3953. for a in range(len(x_data)):
  3954. if a <= i:
  3955. continue # 重复内容,跳过
  3956. x1 = x_data[i] # x坐标
  3957. x2 = x_data[a] # y坐标
  3958. x2_only = np.unique(x2)
  3959. # x与散点图不同,这里是纵坐标
  3960. c = (
  3961. Scatter()
  3962. .add_xaxis(x2)
  3963. .add_yaxis(data_name, x1, **label_setting)
  3964. .set_global_opts(
  3965. title_opts=opts.TitleOpts(title=f"[{i}-{a}]数据散点图"),
  3966. **seeting,
  3967. yaxis_opts=opts.AxisOpts(type_="category", is_scale=True),
  3968. xaxis_opts=opts.AxisOpts(type_="category", is_scale=True),
  3969. )
  3970. )
  3971. c.add_xaxis(x2_only)
  3972. render_list.append(c)
  3973. return render_list
  3974. @plugin_func_loading(get_path(r'template/machinelearning'))
  3975. def conversion_control(y_data, x_data, tab): # 合并两x-x图
  3976. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  3977. get_x = feature_visualization(x_data, "原数据") # 原来
  3978. get_y = feature_visualization(y_data, "转换数据") # 转换
  3979. for i in range(len(get_x)):
  3980. tab.add(get_x[i].overlap(get_y[i]), f"[{i}]数据x-x散点图")
  3981. return tab
  3982. @plugin_func_loading(get_path(r'template/machinelearning'))
  3983. def conversion_separate(y_data, x_data, tab): # 并列显示两x-x图
  3984. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  3985. get_x = feature_visualization(x_data, "原数据") # 原来
  3986. get_y = feature_visualization(y_data, "转换数据") # 转换
  3987. for i in range(len(get_x)):
  3988. try:
  3989. tab.add(get_x[i], f"[{i}]数据x-x散点图")
  3990. except IndexError:
  3991. pass
  3992. try:
  3993. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  3994. except IndexError:
  3995. pass
  3996. return tab
  3997. @plugin_func_loading(get_path(r'template/machinelearning'))
  3998. def conversion_separate_format(y_data, tab): # 并列显示两x-x图
  3999. if isinstance(y_data, np.ndarray):
  4000. get_y = feature_visualization_format(y_data, "转换数据") # 转换
  4001. for i in range(len(get_y)):
  4002. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  4003. return tab
  4004. @plugin_func_loading(get_path(r'template/machinelearning'))
  4005. def conversion_separate_wh(w_array, h_array, tab): # 并列显示两x-x图
  4006. if isinstance(w_array, np.ndarray) and isinstance(w_array, np.ndarray):
  4007. get_x = feature_visualization_format(w_array, "W矩阵数据") # 原来
  4008. get_y = feature_visualization(
  4009. h_array.transpose(), "H矩阵数据"
  4010. ) # 转换(先转T,再转T变回原样,W*H是横对列)
  4011. for i in range(len(get_x)):
  4012. try:
  4013. tab.add(get_x[i], f"[{i}]W矩阵x-x散点图")
  4014. except IndexError:
  4015. pass
  4016. try:
  4017. tab.add(get_y[i], f"[{i}]H.T矩阵x-x散点图")
  4018. except IndexError:
  4019. pass
  4020. return tab
  4021. @plugin_func_loading(get_path(r'template/machinelearning'))
  4022. def make_bar(name, value, tab): # 绘制柱状图
  4023. c = (
  4024. Bar()
  4025. .add_xaxis([f"[{i}]特征" for i in range(len(value))])
  4026. .add_yaxis(name, value, **label_setting)
  4027. .set_global_opts(title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting)
  4028. )
  4029. tab.add(c, name)
  4030. @plugin_func_loading(get_path(r'template/machinelearning'))
  4031. def judging_digits(num: (int, float)): # 查看小数位数
  4032. a = str(abs(num)).split(".")[0]
  4033. if a == "":
  4034. raise ValueError
  4035. return len(a)
  4036. @plugin_func_loading(get_path(r'template/machinelearning'))
  4037. def num_str(num, accuracy):
  4038. num = str(round(float(num), accuracy))
  4039. if len(num.replace(".", "")) == accuracy:
  4040. return num
  4041. n = num.split(".")
  4042. if len(n) == 0: # 无小数
  4043. return num + "." + "0" * (accuracy - len(num))
  4044. else:
  4045. return num + "0" * (accuracy - len(num) + 1) # len(num)多算了一位小数点
  4046. @plugin_func_loading(get_path(r'template/machinelearning'))
  4047. def des_to_csv(save_dir, name, data, columns=None, row=None):
  4048. save_dir = save_dir + "/" + name + ".csv"
  4049. print(columns)
  4050. print(row)
  4051. print(data)
  4052. DataFrame(data, columns=columns, index=row).to_csv(
  4053. save_dir,
  4054. header=False if columns is None else True,
  4055. index=False if row is None else True,
  4056. )
  4057. return data
  4058. @plugin_func_loading(get_path(r'template/machinelearning'))
  4059. def pack(output_filename, source_dir):
  4060. with tarfile.open(output_filename, "w:gz") as tar:
  4061. tar.add(source_dir, arcname=basename(source_dir))
  4062. return output_filename
  4063. def set_global(
  4064. more=more_global,
  4065. all=all_global,
  4066. csv=csv_global,
  4067. clf=clf_global,
  4068. tar=tar_global,
  4069. new=new_dir_global,
  4070. ):
  4071. global more_global, all_global, csv_global, clf_global, tar_global, new_dir_global
  4072. more_global = more # 是否使用全部特征绘图
  4073. all_global = all # 是否导出charts
  4074. csv_global = csv # 是否导出CSV
  4075. clf_global = clf # 是否导出模型
  4076. tar_global = tar # 是否打包tar
  4077. new_dir_global = new # 是否新建目录
  4078. class MachineLearnerInit(Learner):
  4079. def __init__(self, *args, **kwargs):
  4080. super().__init__(*args, **kwargs)
  4081. self.learner = {} # 记录机器
  4082. self.learn_dict = {
  4083. "Line": LineModel,
  4084. "Ridge": LineModel,
  4085. "Lasso": LineModel,
  4086. "LogisticRegression": LogisticregressionModel,
  4087. "Knn_class": KnnModel,
  4088. "Knn": KnnModel,
  4089. "Tree_class": TreeModel,
  4090. "Tree": TreeModel,
  4091. "Forest": ForestModel,
  4092. "Forest_class": ForestModel,
  4093. "GradientTree_class": GradienttreeModel,
  4094. "GradientTree": GradienttreeModel,
  4095. "Variance": VarianceModel,
  4096. "SelectKBest": SelectkbestModel,
  4097. "Z-Score": StandardizationModel,
  4098. "MinMaxScaler": MinmaxscalerModel,
  4099. "LogScaler": LogscalerModel,
  4100. "atanScaler": AtanscalerModel,
  4101. "decimalScaler": DecimalscalerModel,
  4102. "sigmodScaler": SigmodscalerModel,
  4103. "Mapzoom": MapzoomModel,
  4104. "Fuzzy_quantization": FuzzyQuantizationModel,
  4105. "Regularization": RegularizationModel,
  4106. "Binarizer": BinarizerModel,
  4107. "Discretization": DiscretizationModel,
  4108. "Label": LabelModel,
  4109. "OneHotEncoder": OneHotEncoderModel,
  4110. "Missed": MissedModel,
  4111. "PCA": PcaModel,
  4112. "RPCA": RpcaModel,
  4113. "KPCA": KpcaModel,
  4114. "LDA": LdaModel,
  4115. "SVC": SvcModel,
  4116. "SVR": SvrModel,
  4117. "MLP": MlpModel,
  4118. "MLP_class": MlpModel,
  4119. "NMF": NmfModel,
  4120. "t-SNE": TsneModel,
  4121. "k-means": KmeansModel,
  4122. "Agglomerative": AgglomerativeModel,
  4123. "DBSCAN": DbscanModel,
  4124. "ClassBar": ClassBar,
  4125. "FeatureScatter": NearFeatureScatter,
  4126. "FeatureScatterClass": NearFeatureScatterClass,
  4127. "FeatureScatter_all": NearFeatureScatterMore,
  4128. "FeatureScatterClass_all": NearFeatureScatterClassMore,
  4129. "HeatMap": NumpyHeatMap,
  4130. "FeatureY-X": FeatureScatterYX,
  4131. "ClusterTree": ClusterTree,
  4132. "MatrixScatter": MatrixScatter,
  4133. "Correlation": Corr,
  4134. "Statistics": DataAnalysis,
  4135. "Fast_Fourier": FastFourier,
  4136. "Reverse_Fast_Fourier": ReverseFastFourier,
  4137. "[2]Reverse_Fast_Fourier": ReverseFastFourierTwonumpy,
  4138. }
  4139. self.data_type = {} # 记录机器的类型
  4140. def learner_parameters(self, parameters, data_type): # 解析参数
  4141. original_parameter = {}
  4142. target_parameter = {}
  4143. # 输入数据
  4144. exec(parameters, original_parameter)
  4145. # 处理数据
  4146. if data_type in ("MLP", "MLP_class"):
  4147. target_parameter["alpha"] = float(
  4148. original_parameter.get("alpha", 0.0001)
  4149. ) # MLP正则化用
  4150. else:
  4151. target_parameter["alpha"] = float(
  4152. original_parameter.get("alpha", 1.0)
  4153. ) # L1和L2正则化用
  4154. target_parameter["C"] = float(original_parameter.get("C", 1.0)) # L1和L2正则化用
  4155. if data_type in ("MLP", "MLP_class"):
  4156. target_parameter["max_iter"] = int(
  4157. original_parameter.get("max_iter", 200)
  4158. ) # L1和L2正则化用
  4159. else:
  4160. target_parameter["max_iter"] = int(
  4161. original_parameter.get("max_iter", 1000)
  4162. ) # L1和L2正则化用
  4163. target_parameter["n_neighbors"] = int(
  4164. original_parameter.get("K_knn", 5)
  4165. ) # knn邻居数 (命名不同)
  4166. target_parameter["p"] = int(original_parameter.get("p", 2)) # 距离计算方式
  4167. target_parameter["nDim_2"] = bool(
  4168. original_parameter.get("nDim_2", True)
  4169. ) # 数据是否降维
  4170. if data_type in ("Tree", "Forest", "GradientTree"):
  4171. target_parameter["criterion"] = (
  4172. "mse" if bool(original_parameter.get("is_MSE", True)) else "mae"
  4173. ) # 是否使用基尼不纯度
  4174. else:
  4175. target_parameter["criterion"] = (
  4176. "gini" if bool(original_parameter.get("is_Gini", True)) else "entropy"
  4177. ) # 是否使用基尼不纯度
  4178. target_parameter["splitter"] = (
  4179. "random" if bool(original_parameter.get("is_random", False)) else "best"
  4180. ) # 决策树节点是否随机选用最优
  4181. target_parameter["max_features"] = original_parameter.get(
  4182. "max_features", None
  4183. ) # 选用最多特征数
  4184. target_parameter["max_depth"] = original_parameter.get(
  4185. "max_depth", None
  4186. ) # 最大深度
  4187. target_parameter["min_samples_split"] = int(
  4188. original_parameter.get("min_samples_split", 2)
  4189. ) # 是否继续划分(容易造成过拟合)
  4190. target_parameter["P"] = float(original_parameter.get("min_samples_split", 0.8))
  4191. target_parameter["k"] = original_parameter.get("k", 1)
  4192. target_parameter["score_func"] = {
  4193. "chi2": chi2,
  4194. "f_classif": f_classif,
  4195. "mutual_info_classif": mutual_info_classif,
  4196. "f_regression": f_regression,
  4197. "mutual_info_regression": mutual_info_regression,
  4198. }.get(original_parameter.get("score_func", "f_classif"), f_classif)
  4199. target_parameter["feature_range"] = tuple(
  4200. original_parameter.get("feature_range", (0, 1))
  4201. )
  4202. target_parameter["norm"] = original_parameter.get("norm", "l2") # 正则化的方式L1或者L2
  4203. target_parameter["threshold"] = float(
  4204. original_parameter.get("threshold", 0.0)
  4205. ) # 二值化特征
  4206. target_parameter["split_range"] = list(
  4207. original_parameter.get("split_range", [0])
  4208. ) # 二值化特征
  4209. target_parameter["ndim_up"] = bool(original_parameter.get("ndim_up", False))
  4210. target_parameter["miss_value"] = original_parameter.get("miss_value", np.nan)
  4211. target_parameter["fill_method"] = original_parameter.get("fill_method", "mean")
  4212. target_parameter["fill_value"] = original_parameter.get("fill_value", None)
  4213. target_parameter["n_components"] = original_parameter.get("n_components", 1)
  4214. target_parameter["kernel"] = original_parameter.get(
  4215. "kernel", "rbf" if data_type in ("SVR", "SVC") else "linear"
  4216. )
  4217. target_parameter["n_Tree"] = original_parameter.get("n_Tree", 100)
  4218. target_parameter["gamma"] = original_parameter.get("gamma", 1)
  4219. target_parameter["hidden_size"] = tuple(
  4220. original_parameter.get("hidden_size", (100,))
  4221. )
  4222. target_parameter["activation"] = str(
  4223. original_parameter.get("activation", "relu")
  4224. )
  4225. target_parameter["solver"] = str(original_parameter.get("solver", "adam"))
  4226. if data_type in ("k-means",):
  4227. target_parameter["n_clusters"] = int(
  4228. original_parameter.get("n_clusters", 8)
  4229. )
  4230. else:
  4231. target_parameter["n_clusters"] = int(
  4232. original_parameter.get("n_clusters", 2)
  4233. )
  4234. target_parameter["eps"] = float(original_parameter.get("n_clusters", 0.5))
  4235. target_parameter["min_samples"] = int(original_parameter.get("n_clusters", 5))
  4236. target_parameter["white_PCA"] = bool(original_parameter.get("white_PCA", False))
  4237. return target_parameter
  4238. def get_learner(self, name):
  4239. return self.learner[name]
  4240. def get_learner_type(self, name):
  4241. return self.data_type[name]
  4242. @plugin_class_loading(get_path(r"template/machinelearning"))
  4243. class MachineLearnerAdd(MachineLearnerInit):
  4244. def add_learner(self, learner_str, parameters=""):
  4245. get = self.learn_dict[learner_str]
  4246. name = f"Le[{len(self.learner)}]{learner_str}"
  4247. # 参数调节
  4248. args_use = self.learner_parameters(parameters, learner_str)
  4249. # 生成学习器
  4250. self.learner[name] = get(model=learner_str, args_use=args_use)
  4251. self.data_type[name] = learner_str
  4252. def add_curve_fitting(self, learner):
  4253. named_domain = {}
  4254. exec(learner, named_domain)
  4255. name = f'Le[{len(self.learner)}]{named_domain.get("name", "SELF")}'
  4256. func = named_domain.get("f", lambda x, k, b: k * x + b)
  4257. self.learner[name] = CurveFitting(name, learner, func)
  4258. self.data_type[name] = "Curve_fitting"
  4259. def add_select_from_model(self, learner, parameters=""):
  4260. model = self.get_learner(learner)
  4261. name = f"Le[{len(self.learner)}]SelectFrom_Model:{learner}"
  4262. # 参数调节
  4263. args_use = self.learner_parameters(parameters, "SelectFrom_Model")
  4264. # 生成学习器
  4265. self.learner[name] = SelectFromModel(
  4266. learner=model, args_use=args_use, Dic=self.learn_dict
  4267. )
  4268. self.data_type[name] = "SelectFrom_Model"
  4269. def add_predictive_heat_map(self, learner, parameters=""):
  4270. model = self.get_learner(learner)
  4271. name = f"Le[{len(self.learner)}]Predictive_HeatMap:{learner}"
  4272. # 生成学习器
  4273. args_use = self.learner_parameters(parameters, "Predictive_HeatMap")
  4274. self.learner[name] = PredictiveHeatmap(learner=model, args_use=args_use)
  4275. self.data_type[name] = "Predictive_HeatMap"
  4276. def add_predictive_heat_map_more(self, learner, parameters=""):
  4277. model = self.get_learner(learner)
  4278. name = f"Le[{len(self.learner)}]Predictive_HeatMap_More:{learner}"
  4279. # 生成学习器
  4280. args_use = self.learner_parameters(parameters, "Predictive_HeatMap_More")
  4281. self.learner[name] = PredictiveHeatmapMore(learner=model, args_use=args_use)
  4282. self.data_type[name] = "Predictive_HeatMap_More"
  4283. def add_view_data(self, learner, parameters=""):
  4284. model = self.get_learner(learner)
  4285. name = f"Le[{len(self.learner)}]View_data:{learner}"
  4286. # 生成学习器
  4287. args_use = self.learner_parameters(parameters, "View_data")
  4288. self.learner[name] = ViewData(learner=model, args_use=args_use)
  4289. self.data_type[name] = "View_data"
  4290. @plugin_class_loading(get_path(r"template/machinelearning"))
  4291. class MachineLearnerScore(MachineLearnerInit):
  4292. def score(self, name_x, name_y, learner): # Score_Only表示仅评分 Fit_Simp 是普遍类操作
  4293. model = self.get_learner(learner)
  4294. x = self.get_sheet(name_x)
  4295. y = self.get_sheet(name_y)
  4296. return model.score(x, y)
  4297. def model_evaluation(self, learner, save_dir, name_x, name_y, func=0): # 显示参数
  4298. x = self.get_sheet(name_x)
  4299. y = self.get_sheet(name_y)
  4300. if new_dir_global:
  4301. dic = save_dir + f"/{learner}分类评分[CoTan]"
  4302. new_dic = dic
  4303. a = 0
  4304. while exists(new_dic): # 直到他不存在 —— False
  4305. new_dic = dic + f"[{a}]"
  4306. a += 1
  4307. mkdir(new_dic)
  4308. else:
  4309. new_dic = save_dir
  4310. model = self.get_learner(learner)
  4311. # 打包
  4312. func = [model.class_score, model.regression_score, model.clusters_score][func]
  4313. save = func(new_dic, x, y)[0]
  4314. if tar_global:
  4315. pack(f"{new_dic}.tar.gz", new_dic)
  4316. return save, new_dic
  4317. def model_visualization(self, learner, save_dir): # 显示参数
  4318. if new_dir_global:
  4319. dic = save_dir + f"/{learner}数据[CoTan]"
  4320. new_dic = dic
  4321. a = 0
  4322. while exists(new_dic): # 直到他不存在 —— False
  4323. new_dic = dic + f"[{a}]"
  4324. a += 1
  4325. mkdir(new_dic)
  4326. else:
  4327. new_dic = save_dir
  4328. model = self.get_learner(learner)
  4329. if (not (model.model is None) or not (model.model is list)) and clf_global:
  4330. joblib.dump(model.model, new_dic + "/MODEL.model") # 保存模型
  4331. # 打包
  4332. save = model.data_visualization(new_dic)[0]
  4333. if tar_global:
  4334. pack(f"{new_dic}.tar.gz", new_dic)
  4335. return save, new_dic
  4336. @plugin_class_loading(get_path(r"template/machinelearning"))
  4337. class LearnerActions(MachineLearnerInit):
  4338. def fit_model(self, x_name, y_name, learner, split=0.3, *args, **kwargs):
  4339. x_data = self.get_sheet(x_name)
  4340. y_data = self.get_sheet(y_name)
  4341. model = self.get_learner(learner)
  4342. return model.fit_model(
  4343. x_data, y_data, split=split, x_name=x_name, add_func=self.add_form
  4344. )
  4345. def predict(self, x_name, learner, **kwargs):
  4346. x_data = self.get_sheet(x_name)
  4347. model = self.get_learner(learner)
  4348. y_data, name = model.predict(x_data, x_name=x_name, add_func=self.add_form)
  4349. self.add_form(y_data, f"{x_name}:{name}")
  4350. return y_data