template.py 199 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298
  1. import joblib
  2. import re
  3. import tarfile
  4. from abc import ABCMeta, abstractmethod
  5. from os import getcwd, mkdir
  6. from os.path import split as path_split, splitext, basename, exists
  7. import os
  8. import logging
  9. from sklearn.svm import SVC, SVR # SVC是svm分类,SVR是svm回归
  10. from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
  11. from sklearn.manifold import TSNE
  12. from sklearn.neural_network import MLPClassifier, MLPRegressor
  13. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as Lda
  14. from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA, NMF
  15. from sklearn.impute import SimpleImputer
  16. from sklearn.preprocessing import *
  17. from sklearn.feature_selection import *
  18. from sklearn.metrics import *
  19. from sklearn.ensemble import (
  20. RandomForestClassifier,
  21. RandomForestRegressor,
  22. GradientBoostingClassifier,
  23. GradientBoostingRegressor,
  24. )
  25. import numpy as np
  26. import matplotlib.pyplot as plt
  27. from pandas import DataFrame, read_csv
  28. from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz
  29. from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
  30. from sklearn.linear_model import *
  31. from sklearn.model_selection import train_test_split
  32. from scipy.fftpack import fft, ifft # 快速傅里叶变换
  33. from scipy import optimize
  34. from scipy.cluster.hierarchy import dendrogram, ward
  35. from pyecharts.components import Table as TableFisrt # 绘制表格
  36. from pyecharts.options.series_options import JsCode
  37. from pyecharts.charts import Tab as tab_First
  38. from pyecharts.charts import *
  39. from pyecharts import options as opts
  40. from pyecharts.components import Image
  41. from pyecharts.globals import CurrentConfig
  42. from system import plugin_class_loading, get_path, plugin_func_loading, basicConfig
  43. logging.basicConfig(**basicConfig)
  44. CurrentConfig.ONLINE_HOST = f"{getcwd()}{os.sep}assets{os.sep}"
  45. # 设置
  46. np.set_printoptions(threshold=np.inf)
  47. global_setting = dict(
  48. toolbox_opts=opts.ToolboxOpts(is_show=True),
  49. legend_opts=opts.LegendOpts(pos_bottom="3%", type_="scroll"),
  50. )
  51. global_not_legend = dict(
  52. toolbox_opts=opts.ToolboxOpts(is_show=True),
  53. legend_opts=opts.LegendOpts(is_show=False),
  54. )
  55. label_setting = dict(label_opts=opts.LabelOpts(is_show=False))
  56. more_global = False # 是否使用全部特征绘图
  57. all_global = True # 是否导出charts
  58. csv_global = True # 是否导出CSV
  59. clf_global = True # 是否导出模型
  60. tar_global = True # 是否打包tar
  61. new_dir_global = True # 是否新建目录
  62. class LearnBase(metaclass=ABCMeta):
  63. def __init__(self, *args, **kwargs):
  64. self.numpy_dict = {} # name:numpy
  65. self.fucn_add() # 制作Func_Dic
  66. def fucn_add(self):
  67. self.func_dict = {
  68. "abs": lambda x, y: np.abs(x),
  69. "sqrt": lambda x, y: np.sqrt(x),
  70. "pow": lambda x, y: x ** y,
  71. "loge": lambda x, y: np.log(x),
  72. "log10": lambda x, y: np.log10(x),
  73. "ceil": lambda x, y: np.ceil(x),
  74. "floor": lambda x, y: np.floor(x),
  75. "rint": lambda x, y: np.rint(x),
  76. "sin": lambda x, y: np.sin(x),
  77. "cos": lambda x, y: np.cos(x),
  78. "tan": lambda x, y: np.tan(x),
  79. "tanh": lambda x, y: np.tanh(x),
  80. "sinh": lambda x, y: np.sinh(x),
  81. "cosh": lambda x, y: np.cosh(x),
  82. "asin": lambda x, y: np.arcsin(x),
  83. "acos": lambda x, y: np.arccos(x),
  84. "atan": lambda x, y: np.arctan(x),
  85. "atanh": lambda x, y: np.arctanh(x),
  86. "asinh": lambda x, y: np.arcsinh(x),
  87. "acosh": lambda x, y: np.arccosh(x),
  88. "add": lambda x, y: x + y, # 矩阵或元素
  89. "sub": lambda x, y: x - y, # 矩阵或元素
  90. "mul": lambda x, y: np.multiply(x, y), # 元素级别
  91. "matmul": lambda x, y: np.matmul(x, y), # 矩阵
  92. "dot": lambda x, y: np.dot(x, y), # 矩阵
  93. "div": lambda x, y: x / y,
  94. "div_floor": lambda x, y: np.floor_divide(x, y),
  95. "power": lambda x, y: np.power(x, y), # 元素级
  96. }
  97. def get_form(self) -> dict:
  98. return self.numpy_dict.copy()
  99. def get_sheet(self, name) -> np.ndarray:
  100. return self.numpy_dict[name].copy()
  101. @abstractmethod
  102. def add_form(self, data, name):
  103. pass
  104. @plugin_class_loading(get_path(r"template/machinelearning"))
  105. class LearnerIO(LearnBase):
  106. def add_form(self, data: np.array, name):
  107. name = f"{name}[{len(self.numpy_dict)}]"
  108. self.numpy_dict[name] = data
  109. def del_sheet(self, name):
  110. del self.numpy_dict[name]
  111. def read_csv(
  112. self,
  113. file_dir,
  114. name,
  115. encoding="utf-8",
  116. str_must=False,
  117. sep=","):
  118. dtype = np.str if str_must else np.float
  119. dataframe = read_csv(
  120. file_dir,
  121. encoding=encoding,
  122. delimiter=sep,
  123. header=None)
  124. try:
  125. data = dataframe.to_numpy(dtype=dtype)
  126. except ValueError:
  127. data = dataframe.to_numpy(dtype=np.str)
  128. if data.ndim == 1:
  129. data = np.expand_dims(data, axis=1)
  130. self.add_form(data, name)
  131. return data
  132. def add_python(self, python_file, sheet_name):
  133. name = {}
  134. name.update(globals().copy())
  135. name.update(locals().copy())
  136. exec(python_file, name)
  137. exec("get = Creat()", name)
  138. if isinstance(name["get"], np.array):
  139. get = name["get"]
  140. else:
  141. get = np.array(name["get"])
  142. self.add_form(get, sheet_name)
  143. return get
  144. def to_csv(self, save_dir: str, name, sep) -> str:
  145. get: np.ndarray = self.get_sheet(name)
  146. np.savetxt(save_dir, get, delimiter=sep)
  147. return save_dir
  148. def to_html_one(self, name, html_dir=""):
  149. if html_dir == "":
  150. html_dir = f"{name}.html"
  151. get: np.ndarray = self.get_sheet(name)
  152. if get.ndim == 1:
  153. get = np.expand_dims(get, axis=1)
  154. get: list = get.tolist()
  155. for i in range(len(get)):
  156. get[i] = [i + 1] + get[i]
  157. headers = [i for i in range(len(get[0]))]
  158. table = TableFisrt()
  159. table.add(headers, get).set_global_opts(
  160. title_opts=opts.ComponentTitleOpts(
  161. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  162. )
  163. )
  164. table.render(html_dir)
  165. return html_dir
  166. def to_html(self, name, html_dir="", html_type=0):
  167. if html_dir == "":
  168. html_dir = f"{name}.html"
  169. # 把要画的sheet放到第一个
  170. sheet_dict = self.get_form()
  171. del sheet_dict[name]
  172. sheet_list = [name] + list(sheet_dict.keys())
  173. class TabBase:
  174. def __init__(self, q):
  175. self.tab = q # 一个Tab
  176. def render(self, render_dir):
  177. return self.tab.render(render_dir)
  178. # 生成一个显示页面
  179. if html_type == 0:
  180. class NewTab(TabBase):
  181. def add(self, table_, k, *f):
  182. self.tab.add(table_, k)
  183. tab = NewTab(tab_First(page_title="CoTan:查看表格")) # 一个Tab
  184. elif html_type == 1:
  185. class NewTab(TabBase):
  186. def add(self, table_, *k):
  187. self.tab.add(table_)
  188. tab = NewTab(
  189. Page(
  190. page_title="CoTan:查看表格",
  191. layout=Page.DraggablePageLayout))
  192. else:
  193. class NewTab(TabBase):
  194. def add(self, table_, *k):
  195. self.tab.add(table_)
  196. tab = NewTab(
  197. Page(
  198. page_title="CoTan:查看表格",
  199. layout=Page.SimplePageLayout))
  200. # 迭代添加内容
  201. for name in sheet_list:
  202. get: np.ndarray = self.get_sheet(name)
  203. if get.ndim == 1:
  204. get = np.expand_dims(get, axis=1)
  205. get: list = get.tolist()
  206. for i in range(len(get)):
  207. get[i] = [i + 1] + get[i]
  208. headers = [i for i in range(len(get[0]))]
  209. table = TableFisrt()
  210. table.add(headers, get).set_global_opts(
  211. title_opts=opts.ComponentTitleOpts(
  212. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  213. )
  214. )
  215. tab.add(table, f"表格:{name}")
  216. tab.render(html_dir)
  217. return html_dir
  218. @plugin_class_loading(get_path(r"template/machinelearning"))
  219. class LearnerMerge(LearnBase, metaclass=ABCMeta):
  220. def merge(self, name, axis=0): # aiis:0-横向合并(hstack),1-纵向合并(vstack),2-深度合并
  221. sheet_list = []
  222. for i in name:
  223. sheet_list.append(self.get_sheet(i))
  224. get = {0: np.hstack, 1: np.vstack, 2: np.dstack}[axis](sheet_list)
  225. self.add_form(np.array(get), f"{name[0]}合成")
  226. @plugin_class_loading(get_path(r"template/machinelearning"))
  227. class LearnerSplit(LearnBase, metaclass=ABCMeta):
  228. def split(self, name, split=2, axis=0): # aiis:0-横向分割(hsplit),1-纵向分割(vsplit)
  229. sheet = self.get_sheet(name)
  230. get = {0: np.hsplit, 1: np.vsplit, 2: np.dsplit}[axis](sheet, split)
  231. for i in get:
  232. self.add_form(i, f"{name[0]}分割")
  233. def two_split(self, name, split, axis): # 二分切割(0-横向,1-纵向)
  234. sheet = self.get_sheet(name)
  235. try:
  236. split = float(eval(split))
  237. if split < 1:
  238. split = int(split * len(sheet) if axis == 1 else len(sheet[0]))
  239. else:
  240. assert False
  241. except (ValueError, AssertionError):
  242. split = int(split)
  243. if axis == 0:
  244. self.add_form(sheet[:, split:], f"{name[0]}分割")
  245. self.add_form(sheet[:, :split], f"{name[0]}分割")
  246. @plugin_class_loading(get_path(r"template/machinelearning"))
  247. class LearnerDimensions(LearnBase, metaclass=ABCMeta):
  248. @staticmethod
  249. def deep(sheet: np.ndarray):
  250. return sheet.ravel()
  251. @staticmethod
  252. def down_ndim(sheet: np.ndarray): # 横向
  253. down_list = []
  254. for i in sheet:
  255. down_list.append(i.ravel())
  256. return np.array(down_list)
  257. @staticmethod
  258. def longitudinal_down_ndim(sheet: np.ndarray): # 纵向
  259. down_list = []
  260. for i in range(len(sheet[0])):
  261. down_list.append(sheet[:, i].ravel())
  262. return np.array(down_list).T
  263. def reval(self, name, axis): # axis:0-横向,1-纵向(带.T),2-深度
  264. sheet = self.get_sheet(name)
  265. self.add_form(
  266. {0: self.down_ndim, 1: self.longitudinal_down_ndim, 2: self.deep}[axis](
  267. sheet
  268. ).copy(),
  269. f"{name}伸展",
  270. )
  271. def del_ndim(self, name): # 删除无用维度
  272. sheet = self.get_sheet(name)
  273. self.add_form(np.squeeze(sheet), f"{name}降维")
  274. @plugin_class_loading(get_path(r"template/machinelearning"))
  275. class LearnerShape(LearnBase, metaclass=ABCMeta):
  276. def transpose(self, name, func: list):
  277. sheet = self.get_sheet(name)
  278. if sheet.ndim <= 2:
  279. self.add_form(sheet.transpose().copy(), f"{name}.T")
  280. else:
  281. self.add_form(np.transpose(sheet, func).copy(), f"{name}.T")
  282. def reshape(self, name, shape: list):
  283. sheet = self.get_sheet(name)
  284. self.add_form(sheet.reshape(shape).copy(), f"{name}.r")
  285. @plugin_class_loading(get_path(r"template/machinelearning"))
  286. class Calculation(LearnBase, metaclass=ABCMeta):
  287. def calculation_matrix(self, data, data_type, func):
  288. if 1 not in data_type:
  289. raise Exception
  290. func = self.func_dict.get(func, lambda x, y: x)
  291. args_data = []
  292. for i in range(len(data)):
  293. if data_type[i] == 0:
  294. args_data.append(data[i])
  295. else:
  296. args_data.append(self.get_sheet(data[i]))
  297. get = func(*args_data)
  298. self.add_form(get, f"{func}({data[0]},{data[1]})")
  299. return get
  300. class Machinebase(metaclass=ABCMeta): # 学习器的基类
  301. def __init__(self, *args, **kwargs):
  302. self.model = None
  303. self.have_fit = False
  304. self.have_predict = False
  305. self.x_traindata = None
  306. self.y_traindata = None
  307. # 有监督学习专有的testData
  308. self.x_testdata = None
  309. self.y_testdata = None
  310. # 记录这两个是为了克隆
  311. @abstractmethod
  312. def fit_model(self, x_data, y_data, split, increment, kwargs):
  313. pass
  314. @abstractmethod
  315. def score(self, x_data, y_data):
  316. pass
  317. @abstractmethod
  318. def class_score(self, save_dir, x_data, y_really):
  319. pass
  320. @staticmethod
  321. def _accuracy(y_predict, y_really): # 准确率
  322. return accuracy_score(y_really, y_predict)
  323. @staticmethod
  324. def _macro(y_predict, y_really, func_num=0):
  325. func = [recall_score, precision_score, f1_score] # 召回率,精确率和f1
  326. class_ = np.unique(y_really).tolist()
  327. result = func[func_num](y_really, y_predict, class_, average=None)
  328. return result, class_
  329. @staticmethod
  330. def _confusion_matrix(y_predict, y_really): # 混淆矩阵
  331. class_ = np.unique(y_really).tolist()
  332. return confusion_matrix(y_really, y_predict), class_
  333. @staticmethod
  334. def _kappa_score(y_predict, y_really):
  335. return cohen_kappa_score(y_really, y_predict)
  336. @abstractmethod
  337. def regression_score(self, save_dir, x_data, y_really):
  338. pass
  339. @abstractmethod
  340. def clusters_score(self, save_dir, x_data, args):
  341. pass
  342. @staticmethod
  343. def _mse(y_predict, y_really): # 均方误差
  344. return mean_squared_error(y_really, y_predict)
  345. @staticmethod
  346. def _mae(y_predict, y_really): # 中值绝对误差
  347. return median_absolute_error(y_really, y_predict)
  348. @staticmethod
  349. def _r2_score(y_predict, y_really): # 中值绝对误差
  350. return r2_score(y_really, y_predict)
  351. def _rmse(self, y_predict, y_really): # 中值绝对误差
  352. return self._mse(y_predict, y_really) ** 0.5
  353. @staticmethod
  354. def _coefficient_clustering(x_data, y_predict):
  355. means_score = silhouette_score(x_data, y_predict)
  356. outline_score = silhouette_samples(x_data, y_predict)
  357. return means_score, outline_score
  358. @abstractmethod
  359. def predict(self, x_data, args, kwargs):
  360. pass
  361. @abstractmethod
  362. def data_visualization(self, save_dir, args, kwargs):
  363. pass
  364. @plugin_class_loading(get_path(r"template/machinelearning"))
  365. class StudyMachinebase(Machinebase):
  366. def fit_model(self, x_data, y_data, split=0.3, increment=True, **kwargs):
  367. y_data = y_data.ravel()
  368. try:
  369. assert self.x_traindata is not None or not increment
  370. self.x_traindata = np.vstack((x_data, self.x_traindata))
  371. self.y_traindata = np.vstack((y_data, self.y_traindata))
  372. except (AssertionError, ValueError):
  373. self.x_traindata = x_data.copy()
  374. self.y_traindata = y_data.copy()
  375. x_train, x_test, y_train, y_test = train_test_split(
  376. x_data, y_data, test_size=split
  377. )
  378. try: # 增量式训练
  379. assert increment
  380. self.model.partial_fit(x_data, y_data)
  381. except (AssertionError, AttributeError):
  382. self.model.fit(self.x_traindata, self.y_traindata)
  383. train_score = self.model.score(x_train, y_train)
  384. test_score = self.model.score(x_test, y_test)
  385. self.have_fit = True
  386. return train_score, test_score
  387. def score(self, x_data, y_data):
  388. score = self.model.score(x_data, y_data)
  389. return score
  390. def class_score(self, save_dir, x_data: np.ndarray, y_really: np.ndarray):
  391. y_really: np.ndarray = y_really.ravel()
  392. y_predict: np.ndarray = self.predict(x_data)[0]
  393. accuracy = self._accuracy(y_predict, y_really)
  394. recall, class_list = self._macro(y_predict, y_really, 0)
  395. precision, class_list = self._macro(y_predict, y_really, 1)
  396. f1, class_list = self._macro(y_predict, y_really, 2)
  397. confusion_matrix_, class_list = self._confusion_matrix(
  398. y_predict, y_really)
  399. kappa = self._kappa_score(y_predict, y_really)
  400. class_list: list
  401. tab = Tab()
  402. def gauge_base(name: str, value_: float) -> Gauge:
  403. c = (
  404. Gauge()
  405. .add("", [(name, round(value_ * 100, 2))], min_=0, max_=100)
  406. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  407. )
  408. return c
  409. tab.add(gauge_base("准确率", accuracy), "准确率")
  410. tab.add(gauge_base("kappa", kappa), "kappa")
  411. def bar_base(name, value_) -> Bar:
  412. c = (
  413. Bar()
  414. .add_xaxis(class_list)
  415. .add_yaxis(name, value_, **label_setting)
  416. .set_global_opts(
  417. title_opts=opts.TitleOpts(title=name), **global_setting
  418. )
  419. )
  420. return c
  421. tab.add(bar_base("精确率", precision.tolist()), "精确率")
  422. tab.add(bar_base("召回率", recall.tolist()), "召回率")
  423. tab.add(bar_base("F1", f1.tolist()), "F1")
  424. def heatmap_base(name, value_, max_, min_, show) -> HeatMap:
  425. c = (
  426. HeatMap()
  427. .add_xaxis(class_list)
  428. .add_yaxis(
  429. name,
  430. class_list,
  431. value_,
  432. label_opts=opts.LabelOpts(is_show=show, position="inside"),
  433. )
  434. .set_global_opts(
  435. title_opts=opts.TitleOpts(title=name),
  436. **global_setting,
  437. visualmap_opts=opts.VisualMapOpts(
  438. max_=max_, min_=min_, pos_right="3%"
  439. ),
  440. )
  441. )
  442. return c
  443. value = [
  444. [class_list[i], class_list[j], float(confusion_matrix_[i, j])]
  445. for i in range(len(class_list))
  446. for j in range(len(class_list))
  447. ]
  448. tab.add(
  449. heatmap_base(
  450. "混淆矩阵",
  451. value,
  452. float(confusion_matrix_.max()),
  453. float(confusion_matrix_.min()),
  454. len(class_list) < 7,
  455. ),
  456. "混淆矩阵",
  457. )
  458. Statistics.des_to_csv(
  459. save_dir,
  460. "混淆矩阵",
  461. confusion_matrix_,
  462. class_list,
  463. class_list)
  464. Statistics.des_to_csv(
  465. save_dir, "评分", [
  466. precision, recall, f1], class_list, [
  467. "精确率", "召回率", "F1"])
  468. save = save_dir + rf"{os.sep}分类模型评估.HTML"
  469. tab.render(save)
  470. return save,
  471. def regression_score(
  472. self,
  473. save_dir,
  474. x_data: np.ndarray,
  475. y_really: np.ndarray):
  476. y_really = y_really.ravel()
  477. y_predict = self.predict(x_data)[0]
  478. tab = Tab()
  479. mse = self._mse(y_predict, y_really)
  480. mae = self._mae(y_predict, y_really)
  481. r2_score_ = self._r2_score(y_predict, y_really)
  482. rmse = self._rmse(y_predict, y_really)
  483. tab.add(MakePyecharts.make_tab(["MSE", "MAE", "RMSE", "r2_Score"], [
  484. [mse, mae, rmse, r2_score_]]), "评估数据", )
  485. save = save_dir + rf"{os.sep}回归模型评估.HTML"
  486. tab.render(save)
  487. return save,
  488. def clusters_score(self, save_dir, x_data: np.ndarray, *args):
  489. y_predict = self.predict(x_data)[0]
  490. tab = Tab()
  491. coefficient, coefficient_array = self._coefficient_clustering(
  492. x_data, y_predict)
  493. def gauge_base(name: str, value: float) -> Gauge:
  494. c = (
  495. Gauge()
  496. .add(
  497. "",
  498. [(name, round(value * 100, 2))],
  499. min_=0,
  500. max_=10 ** (DataOperations.judging_digits(value * 100)),
  501. )
  502. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  503. )
  504. return c
  505. def bar_base(name, value, xaxis) -> Bar:
  506. c = (
  507. Bar()
  508. .add_xaxis(xaxis)
  509. .add_yaxis(name, value, **label_setting)
  510. .set_global_opts(
  511. title_opts=opts.TitleOpts(title=name), **global_setting
  512. )
  513. )
  514. return c
  515. tab.add(gauge_base("平均轮廓系数", coefficient), "平均轮廓系数")
  516. def bar_(coefficient_array_, name="数据轮廓系数"):
  517. xaxis = [f"数据{i}" for i in range(len(coefficient_array_))]
  518. value = coefficient_array_.tolist()
  519. tab.add(bar_base(name, value, xaxis), name)
  520. n = 20
  521. if len(coefficient_array) <= n:
  522. bar_(coefficient_array)
  523. elif len(coefficient_array) <= n ** 2:
  524. a = 0
  525. while a <= len(coefficient_array):
  526. b = a + n
  527. if b >= len(coefficient_array):
  528. b = len(coefficient_array) + 1
  529. cofe_array = coefficient_array[a:b]
  530. bar_(cofe_array, f"{a}-{b}数据轮廓系数")
  531. a += n
  532. else:
  533. split = np.hsplit(coefficient_array, n)
  534. a = 0
  535. for cofe_array in split:
  536. bar_(cofe_array, f"{a}%-{a + n}%数据轮廓系数")
  537. a += n
  538. save = save_dir + rf"{os.sep}聚类模型评估.HTML"
  539. tab.render(save)
  540. return save,
  541. def predict(self, x_data, *args, **kwargs):
  542. self.x_testdata = x_data.copy()
  543. y_predict = self.model.predict(x_data, )
  544. self.y_testdata = y_predict.copy()
  545. self.have_predict = True
  546. return y_predict, "预测"
  547. def data_visualization(self, save_dir, *args, **kwargs):
  548. return save_dir,
  549. class PrepBase(StudyMachinebase): # 不允许第二次训练
  550. def __init__(self, *args, **kwargs):
  551. super(PrepBase, self).__init__(*args, **kwargs)
  552. self.model = None
  553. def fit_model(self, x_data, y_data, increment=True, *args, **kwargs):
  554. if not self.have_predict: # 不允许第二次训练
  555. y_data = y_data.ravel()
  556. try:
  557. assert self.x_traindata is not None or not increment
  558. self.x_traindata = np.vstack((x_data, self.x_traindata))
  559. self.y_traindata = np.vstack((y_data, self.y_traindata))
  560. except (AssertionError, ValueError):
  561. self.x_traindata = x_data.copy()
  562. self.y_traindata = y_data.copy()
  563. try: # 增量式训练
  564. assert increment
  565. self.model.partial_fit(x_data, y_data)
  566. except (AssertionError, AttributeError):
  567. self.model.fit(self.x_traindata, self.y_traindata)
  568. self.have_fit = True
  569. return "None", "None"
  570. def predict(self, x_data, *args, **kwargs):
  571. self.x_testdata = x_data.copy()
  572. x_predict = self.model.transform(x_data)
  573. self.y_testdata = x_predict.copy()
  574. self.have_predict = True
  575. return x_predict, "特征工程"
  576. def score(self, x_data, y_data):
  577. return "None" # 没有score
  578. class Unsupervised(PrepBase): # 无监督,不允许第二次训练
  579. def fit_model(self, x_data, increment=True, *args, **kwargs):
  580. if not self.have_predict: # 不允许第二次训练
  581. self.y_traindata = None
  582. try:
  583. assert self.x_traindata is not None or not increment
  584. self.x_traindata = np.vstack((x_data, self.x_traindata))
  585. except (AssertionError, ValueError):
  586. self.x_traindata = x_data.copy()
  587. try: # 增量式训练
  588. assert increment
  589. self.model.partial_fit(x_data)
  590. except (AssertionError, AttributeError):
  591. self.model.fit(self.x_traindata, self.y_traindata)
  592. self.have_fit = True
  593. return "None", "None"
  594. class UnsupervisedModel(PrepBase): # 无监督
  595. def fit_model(self, x_data, increment=True, *args, **kwargs):
  596. self.y_traindata = None
  597. try:
  598. assert self.x_traindata is not None or not increment
  599. self.x_traindata = np.vstack((x_data, self.x_traindata))
  600. except (AssertionError, ValueError):
  601. self.x_traindata = x_data.copy()
  602. try: # 增量式训练
  603. if not increment:
  604. raise Exception
  605. self.model.partial_fit(x_data)
  606. except (AssertionError, AttributeError):
  607. self.model.fit(self.x_traindata, self.y_traindata)
  608. self.have_fit = True
  609. return "None", "None"
  610. @plugin_class_loading(get_path(r"template/machinelearning"))
  611. class ToPyebase(StudyMachinebase):
  612. def __init__(self, model, *args, **kwargs):
  613. super(ToPyebase, self).__init__(*args, **kwargs)
  614. self.model = None
  615. # 记录这两个是为了克隆
  616. self.k = {}
  617. self.model_Name = model
  618. def fit_model(self, x_data, y_data, *args, **kwargs):
  619. self.x_traindata = x_data.copy()
  620. self.y_traindata = y_data.ravel().copy()
  621. self.have_fit = True
  622. return "None", "None"
  623. def predict(self, x_data, *args, **kwargs):
  624. self.have_predict = True
  625. return np.array([]), "请使用训练"
  626. def score(self, x_data, y_data):
  627. return "None" # 没有score
  628. class DataAnalysis(ToPyebase): # 数据分析
  629. def data_visualization(self, save_dir, *args, **kwargs):
  630. tab = Tab()
  631. data = self.x_traindata
  632. def cumulative_calculation(tab_data, func, name, render_tab):
  633. sum_list = []
  634. for i in range(len(tab_data)): # 按行迭代数据
  635. sum_list.append([])
  636. for a in range(len(tab_data[i])):
  637. s = DataOperations.num_str(func(tab_data[: i + 1, a]), 8)
  638. sum_list[-1].append(s)
  639. Statistics.des_to_csv(save_dir, f"{name}", sum_list)
  640. render_tab.add(MakePyecharts.make_tab(
  641. [f"[{i}]" for i in range(len(sum_list[0]))], sum_list), f"{name}", )
  642. def geometric_mean(x):
  643. return np.power(np.prod(x), 1 / len(x)) # 几何平均数
  644. def square_mean(x):
  645. return np.sqrt(np.sum(np.power(x, 2)) / len(x)) # 平方平均数
  646. def harmonic_mean(x):
  647. return len(x) / np.sum(np.power(x, -1)) # 调和平均数
  648. cumulative_calculation(data, np.sum, "累计求和", tab)
  649. cumulative_calculation(data, np.var, "累计方差", tab)
  650. cumulative_calculation(data, np.std, "累计标准差", tab)
  651. cumulative_calculation(data, np.mean, "累计算术平均值", tab)
  652. cumulative_calculation(data, geometric_mean, "累计几何平均值", tab)
  653. cumulative_calculation(data, square_mean, "累计平方平均值", tab)
  654. cumulative_calculation(data, harmonic_mean, "累计调和平均值", tab)
  655. cumulative_calculation(data, np.median, "累计中位数", tab)
  656. cumulative_calculation(data, np.max, "累计最大值", tab)
  657. cumulative_calculation(data, np.min, "累计最小值", tab)
  658. save = save_dir + rf"{os.sep}数据分析.HTML"
  659. tab.render(save) # 生成HTML
  660. return save,
  661. class Corr(ToPyebase): # 相关性和协方差
  662. def data_visualization(self, save_dir, *args, **kwargs):
  663. tab = Tab()
  664. data = DataFrame(self.x_traindata)
  665. corr: np.ndarray = data.corr().to_numpy() # 相关性
  666. cov: np.ndarray = data.cov().to_numpy() # 协方差
  667. def heat_map(data_, name: str, max_, min_):
  668. x = [f"特征[{i}]" for i in range(len(data_))]
  669. y = [f"特征[{i}]" for i in range(len(data_[0]))]
  670. value = [
  671. (f"特征[{i}]", f"特征[{j}]", float(data_[i][j]))
  672. for i in range(len(data_))
  673. for j in range(len(data_[i]))
  674. ]
  675. c = (
  676. HeatMap()
  677. .add_xaxis(x)
  678. # 如果特征太多则不显示标签
  679. .add_yaxis(
  680. f"数据",
  681. y,
  682. value,
  683. label_opts=opts.LabelOpts(
  684. is_show=True if len(x) <= 10 else False, position="inside"
  685. ),
  686. )
  687. .set_global_opts(
  688. title_opts=opts.TitleOpts(title="矩阵热力图"),
  689. **global_not_legend,
  690. yaxis_opts=opts.AxisOpts(
  691. is_scale=True, type_="category"
  692. ), # 'category'
  693. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  694. visualmap_opts=opts.VisualMapOpts(
  695. is_show=True, max_=max_, min_=min_, pos_right="3%"
  696. ),
  697. ) # 显示
  698. )
  699. tab.add(c, name)
  700. heat_map(corr, "相关性热力图", 1, -1)
  701. heat_map(cov, "协方差热力图", float(cov.max()), float(cov.min()))
  702. Statistics.des_to_csv(save_dir, f"相关性矩阵", corr)
  703. Statistics.des_to_csv(save_dir, f"协方差矩阵", cov)
  704. save = save_dir + rf"{os.sep}数据相关性.HTML"
  705. tab.render(save) # 生成HTML
  706. return save,
  707. class ViewData(ToPyebase): # 绘制预测型热力图
  708. def __init__(
  709. self, args_use, learner, *args, **kwargs
  710. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  711. super(ViewData, self).__init__(args_use, learner, *args, **kwargs)
  712. self.model = learner.Model
  713. self.Select_Model = None
  714. self.have_fit = learner.have_Fit
  715. self.model_Name = "Select_Model"
  716. self.learner = learner
  717. self.learner_name = learner.Model_Name
  718. def fit_model(self, *args, **kwargs):
  719. self.have_fit = True
  720. return "None", "None"
  721. def predict(self, x_data, add_func=None, *args, **kwargs):
  722. x_traindata = self.learner.x_traindata
  723. y_traindata = self.learner.y_traindata
  724. x_name = self.learner_name
  725. if x_traindata is not None:
  726. add_func(x_traindata, f"{x_name}:x训练数据")
  727. try:
  728. x_testdata = self.x_testdata
  729. if x_testdata is not None:
  730. add_func(x_testdata, f"{x_name}:x测试数据")
  731. except BaseException as e:
  732. logging.warning(str(e))
  733. try:
  734. y_testdata = self.y_testdata.copy()
  735. if y_testdata is not None:
  736. add_func(y_testdata, f"{x_name}:y测试数据")
  737. except BaseException as e:
  738. logging.warning(str(e))
  739. self.have_fit = True
  740. if y_traindata is None:
  741. return np.array([]), "y训练数据"
  742. return y_traindata, "y训练数据"
  743. def data_visualization(self, save_dir, *args, **kwargs):
  744. return save_dir,
  745. class MatrixScatter(ToPyebase): # 矩阵散点图
  746. def data_visualization(self, save_dir, *args, **kwargs):
  747. tab = Tab()
  748. data = self.x_traindata
  749. if data.ndim <= 2: # 维度为2
  750. c = (
  751. Scatter()
  752. .add_xaxis([f"{i}" for i in range(data.shape[1])])
  753. .set_global_opts(
  754. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  755. )
  756. )
  757. if data.ndim == 2:
  758. for num in range(len(data)):
  759. i = data[num]
  760. c.add_yaxis(f"{num}", [[f"{num}", x]
  761. for x in i], color="#FFFFFF")
  762. else:
  763. c.add_yaxis(f"0", [[0, x] for x in data], color="#FFFFFF")
  764. c.set_series_opts(
  765. label_opts=opts.LabelOpts(
  766. is_show=True,
  767. color="#000000",
  768. position="inside",
  769. formatter=JsCode("function(params){return params.data[2];}"),
  770. ))
  771. elif data.ndim == 3:
  772. c = Scatter3D().set_global_opts(
  773. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  774. )
  775. for num in range(len(data)):
  776. i = data[num]
  777. for s_num in range(len(i)):
  778. s = i[s_num]
  779. y_data = [[num, s_num, x, float(s[x])]
  780. for x in range(len(s))]
  781. c.add(
  782. f"{num}",
  783. y_data,
  784. zaxis3d_opts=opts.Axis3DOpts(
  785. type_="category"))
  786. c.set_series_opts(
  787. label_opts=opts.LabelOpts(
  788. is_show=True,
  789. color="#000000",
  790. position="inside",
  791. formatter=JsCode("function(params){return params.data[3];}"),
  792. ))
  793. else:
  794. c = Scatter()
  795. tab.add(c, "矩阵散点图")
  796. save = save_dir + rf"{os.sep}矩阵散点图.HTML"
  797. tab.render(save) # 生成HTML
  798. return save,
  799. class ClusterTree(ToPyebase): # 聚类树状图
  800. def data_visualization(self, save_dir, *args, **kwargs):
  801. tab = Tab()
  802. x_data = self.x_traindata
  803. linkage_array = ward(x_data) # self.y_traindata是结果
  804. dendrogram(linkage_array)
  805. plt.savefig(save_dir + rf"{os.sep}Cluster_graph.png")
  806. image = Image()
  807. image.add(
  808. src=save_dir +
  809. rf"{os.sep}Cluster_graph.png",
  810. ).set_global_opts(
  811. title_opts=opts.ComponentTitleOpts(
  812. title="聚类树状图"))
  813. tab.add(image, "聚类树状图")
  814. save = save_dir + rf"{os.sep}聚类树状图.HTML"
  815. tab.render(save) # 生成HTML
  816. return save,
  817. class ClassBar(ToPyebase): # 类型柱状图
  818. def data_visualization(self, save_dir, *args, **kwargs):
  819. tab = Tab()
  820. x_data: np.ndarray = self.x_traindata.transpose()
  821. y_data: np.ndarray = self.y_traindata
  822. class_: list = np.unique(y_data).tolist() # 类型
  823. class_list = []
  824. for n_class in class_: # 生成class_list(class是1,,也就是二维的,下面会压缩成一维)
  825. class_list.append(y_data == n_class)
  826. for num_i in range(len(x_data)): # 迭代每一个特征
  827. i = x_data[num_i]
  828. i_con = Statistics.is_continuous(i)
  829. if i_con and len(i) >= 11:
  830. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  831. c_list = [[0] * 10 for _ in class_list]
  832. start = i.min()
  833. end = i.max()
  834. n = (end - start) / 10 # 生成10条柱子
  835. x_axis = [] # x轴
  836. iter_num = 0 # 迭代到第n个
  837. while iter_num <= 9: # 把每个特征分为10类进行迭代
  838. # x_axis添加数据
  839. x_axis.append(
  840. f"({iter_num})[{round(start, 2)}-"
  841. f"{round((start + n) if (start + n) <= end or not iter_num == 9 else end, 2)}]")
  842. try:
  843. assert not iter_num == 9 # 执行到第10次时,直接获取剩下的所有
  844. s = (start <= i) == (i < end) # 布尔索引
  845. except (AssertionError, IndexError): # 因为start + n有超出end的风险
  846. s = (start <= i) == (i <= end) # 布尔索引
  847. # n_data = i[s] # 取得现在的特征数据
  848. for num in range(len(class_list)): # 根据类别进行迭代
  849. # 取得布尔数组:y_data == n_class也就是输出值为指定类型的bool矩阵,用于切片
  850. now_class: list = class_list[num]
  851. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  852. bool_class = now_class[s].ravel()
  853. # 用len计数 c_list = [[class1的数据],[class2的数据],[]]
  854. c_list[num][iter_num] = int(np.sum(bool_class))
  855. iter_num += 1
  856. start += n
  857. else:
  858. iter_np = np.unique(i)
  859. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  860. c_list = [[0] * len(iter_np) for _ in class_list]
  861. x_axis = [] # 添加x轴数据
  862. for i_num in range(len(iter_np)): # 迭代每一个i(不重复)
  863. i_data = iter_np[i_num]
  864. # n_data= i[i == i_data]#取得现在特征数据
  865. x_axis.append(f"[{i_data}]")
  866. for num in range(len(class_list)): # 根据类别进行迭代
  867. now_class = class_list[num] # 取得class_list的布尔数组
  868. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  869. bool_class = now_class[i == i_data]
  870. # 用sum计数(bool) c_list = [[class1的数据],[class2的数据],[]]
  871. c_list[num][i_num] = np.sum(bool_class)
  872. c = (
  873. Bar()
  874. .add_xaxis(x_axis)
  875. .set_global_opts(
  876. title_opts=opts.TitleOpts(title="类型-特征统计柱状图"),
  877. **global_setting,
  878. xaxis_opts=opts.AxisOpts(type_="category"),
  879. yaxis_opts=opts.AxisOpts(type_="value"),
  880. )
  881. )
  882. y_axis = []
  883. for i in range(len(c_list)):
  884. y_axis.append(f"{class_[i]}")
  885. c.add_yaxis(f"{class_[i]}", c_list[i], **label_setting)
  886. Statistics.des_to_csv(
  887. save_dir,
  888. f"类型-[{num_i}]特征统计柱状图",
  889. c_list,
  890. x_axis,
  891. y_axis)
  892. tab.add(c, f"类型-[{num_i}]特征统计柱状图")
  893. # 未完成
  894. save = save_dir + rf"{os.sep}特征统计.HTML"
  895. tab.render(save) # 生成HTML
  896. return save,
  897. class NumpyHeatMap(ToPyebase): # Numpy矩阵绘制热力图
  898. def data_visualization(self, save_dir, *args, **kwargs):
  899. tab = Tab()
  900. data = self.x_traindata
  901. x = [f"横[{i}]" for i in range(len(data))]
  902. y = [f"纵[{i}]" for i in range(len(data[0]))]
  903. value = [
  904. (f"横[{i}]", f"纵[{j}]", float(data[i][j]))
  905. for i in range(len(data))
  906. for j in range(len(data[i]))
  907. ]
  908. c = (
  909. HeatMap()
  910. .add_xaxis(x)
  911. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  912. .set_global_opts(
  913. title_opts=opts.TitleOpts(title="矩阵热力图"),
  914. **global_not_legend,
  915. yaxis_opts=opts.AxisOpts(
  916. is_scale=True, type_="category"), # 'category'
  917. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  918. visualmap_opts=opts.VisualMapOpts(
  919. is_show=True,
  920. max_=float(data.max()),
  921. min_=float(data.min()),
  922. pos_right="3%",
  923. ),
  924. ) # 显示
  925. )
  926. tab.add(c, "矩阵热力图")
  927. tab.add(
  928. MakePyecharts.make_tab(
  929. x,
  930. data.transpose().tolist()),
  931. f"矩阵热力图:表格")
  932. save = save_dir + rf"{os.sep}矩阵热力图.HTML"
  933. tab.render(save) # 生成HTML
  934. return save,
  935. class PredictiveHeatmapBase(ToPyebase): # 绘制预测型热力图
  936. def __init__(
  937. self, args_use, learner, *args, **kwargs
  938. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  939. super(
  940. PredictiveHeatmapBase,
  941. self).__init__(
  942. args_use,
  943. learner,
  944. *
  945. args,
  946. **kwargs)
  947. self.model = learner.Model
  948. self.select_model = None
  949. self.have_fit = learner.have_Fit
  950. self.model_Name = "Select_Model"
  951. self.learner = learner
  952. self.x_traindata = learner.x_traindata.copy()
  953. self.y_traindata = learner.y_traindata.copy()
  954. self.means = []
  955. def fit_model(self, x_data, *args, **kwargs):
  956. try:
  957. self.means = x_data.ravel()
  958. except BaseException as e:
  959. logging.warning(str(e))
  960. self.have_fit = True
  961. return "None", "None"
  962. def data_visualization(
  963. self,
  964. save_dir,
  965. decision_boundary_func=None,
  966. prediction_boundary_func=None,
  967. *args,
  968. **kwargs,
  969. ):
  970. tab = Tab()
  971. y = self.y_traindata
  972. x_data = self.x_traindata
  973. try: # 如果没有class
  974. class_ = self.model.classes_.tolist()
  975. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  976. # 获取数据
  977. get, x_means, x_range, data_type = TrainingVisualization.training_visualization(
  978. x_data, class_, y)
  979. # 可使用自带的means,并且nan表示跳过
  980. for i in range(min([len(x_means), len(self.means)])):
  981. try:
  982. g = self.means[i]
  983. if g == np.nan:
  984. raise Exception
  985. x_means[i] = g
  986. except BaseException as e:
  987. logging.warning(str(e))
  988. get = decision_boundary_func(
  989. x_range, x_means, self.learner.predict, class_, data_type
  990. )
  991. for i in range(len(get)):
  992. tab.add(get[i], f"{i}预测热力图")
  993. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  994. data = class_ + [f"{i}" for i in x_means]
  995. c = Table().add(headers=heard, rows=[data])
  996. tab.add(c, "数据表")
  997. except AttributeError:
  998. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  999. x_data, y)
  1000. get = prediction_boundary_func(
  1001. x_range, x_means, self.learner.predict, data_type
  1002. )
  1003. for i in range(len(get)):
  1004. tab.add(get[i], f"{i}预测热力图")
  1005. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1006. data = [f"{i}" for i in x_means]
  1007. c = Table().add(headers=heard, rows=[data])
  1008. tab.add(c, "数据表")
  1009. save = save_dir + rf"{os.sep}预测热力图.HTML"
  1010. tab.render(save) # 生成HTML
  1011. return save,
  1012. class PredictiveHeatmap(PredictiveHeatmapBase): # 绘制预测型热力图
  1013. def data_visualization(self, save_dir, *args, **kwargs):
  1014. return super().data_visualization(
  1015. save_dir, Boundary.decision_boundary, Boundary.prediction_boundary
  1016. )
  1017. class PredictiveHeatmapMore(PredictiveHeatmapBase): # 绘制预测型热力图_More
  1018. def data_visualization(self, save_dir, *args, **kwargs):
  1019. return super().data_visualization(
  1020. save_dir,
  1021. Boundary.decision_boundary_more,
  1022. Boundary.prediction_boundary_more)
  1023. @plugin_class_loading(get_path(r"template/machinelearning"))
  1024. class NearFeatureScatterClassMore(ToPyebase):
  1025. def data_visualization(self, save_dir, *args, **kwargs):
  1026. tab = Tab()
  1027. x_data = self.x_traindata
  1028. y = self.y_traindata
  1029. class_ = np.unique(y).ravel().tolist()
  1030. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  1031. get, x_means, x_range, data_type = TrainingVisualization.training_visualization_more_no_center(
  1032. x_data, class_, y)
  1033. for i in range(len(get)):
  1034. tab.add(get[i], f"{i}训练数据散点图")
  1035. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1036. data = class_ + [f"{i}" for i in x_means]
  1037. c = Table().add(headers=heard, rows=[data])
  1038. tab.add(c, "数据表")
  1039. save = save_dir + rf"{os.sep}数据特征散点图(分类).HTML"
  1040. tab.render(save) # 生成HTML
  1041. return save,
  1042. @plugin_class_loading(get_path(r"template/machinelearning"))
  1043. class NearFeatureScatterMore(ToPyebase):
  1044. def data_visualization(self, save_dir, *args, **kwargs):
  1045. tab = Tab()
  1046. x_data = self.x_traindata
  1047. x_means = Statistics.quick_stats(x_data).get()[0]
  1048. get_y = TrainingVisualization.training_visualization_no_class_more(
  1049. x_data, "数据散点图") # 转换
  1050. for i in range(len(get_y)):
  1051. tab.add(get_y[i], f"[{i}]数据x-x散点图")
  1052. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1053. data = [f"{i}" for i in x_means]
  1054. c = Table().add(headers=heard, rows=[data])
  1055. tab.add(c, "数据表")
  1056. save = save_dir + rf"{os.sep}数据特征散点图.HTML"
  1057. tab.render(save) # 生成HTML
  1058. return save,
  1059. class NearFeatureScatterClass(ToPyebase): # 临近特征散点图:分类数据
  1060. def data_visualization(self, save_dir, *args, **kwargs):
  1061. # 获取数据
  1062. class_ = np.unique(self.y_traindata).ravel().tolist()
  1063. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1064. tab = Tab()
  1065. y = self.y_traindata
  1066. x_data = self.x_traindata
  1067. get, x_means, x_range, data_type = TrainingVisualization.training_visualization(
  1068. x_data, class_, y)
  1069. for i in range(len(get)):
  1070. tab.add(get[i], f"{i}临近特征散点图")
  1071. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1072. data = class_ + [f"{i}" for i in x_means]
  1073. c = Table().add(headers=heard, rows=[data])
  1074. tab.add(c, "数据表")
  1075. save = save_dir + rf"{os.sep}临近数据特征散点图(分类).HTML"
  1076. tab.render(save) # 生成HTML
  1077. return save,
  1078. class NearFeatureScatter(ToPyebase): # 临近特征散点图:连续数据
  1079. def data_visualization(self, save_dir, *args, **kwargs):
  1080. tab = Tab()
  1081. x_data = self.x_traindata.transpose()
  1082. get, x_means, x_range, data_type = TrainingVisualization.training_visualization_no_class(
  1083. x_data)
  1084. for i in range(len(get)):
  1085. tab.add(get[i], f"{i}临近特征散点图")
  1086. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1087. data = [f"{i}" for i in x_means]
  1088. tab.add(MakePyecharts.make_tab(columns, [data]), "数据表")
  1089. save = save_dir + rf"{os.sep}临近数据特征散点图.HTML"
  1090. tab.render(save) # 生成HTML
  1091. return save,
  1092. class FeatureScatterYX(ToPyebase): # y-x图
  1093. def data_visualization(self, save_dir, *args, **kwargs):
  1094. tab = Tab()
  1095. x_data = self.x_traindata
  1096. y = self.y_traindata
  1097. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  1098. x_data, y)
  1099. for i in range(len(get)):
  1100. tab.add(get[i], f"{i}特征x-y散点图")
  1101. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1102. data = [f"{i}" for i in x_means]
  1103. tab.add(MakePyecharts.make_tab(columns, [data]), "数据表")
  1104. save = save_dir + rf"{os.sep}特征y-x图像.HTML"
  1105. tab.render(save) # 生成HTML
  1106. return save,
  1107. @plugin_class_loading(get_path(r"template/machinelearning"))
  1108. class LineModel(StudyMachinebase):
  1109. def __init__(
  1110. self, args_use, model, *args, **kwargs
  1111. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1112. super(LineModel, self).__init__(*args, **kwargs)
  1113. all_model = {
  1114. "Line": LinearRegression,
  1115. "Ridge": Ridge,
  1116. "Lasso": Lasso}[model]
  1117. if model == "Line":
  1118. self.model = all_model()
  1119. self.k = {}
  1120. else:
  1121. self.model = all_model(
  1122. alpha=args_use["alpha"], max_iter=args_use["max_iter"]
  1123. )
  1124. self.k = {
  1125. "alpha": args_use["alpha"],
  1126. "max_iter": args_use["max_iter"]}
  1127. # 记录这两个是为了克隆
  1128. self.Alpha = args_use["alpha"]
  1129. self.max_iter = args_use["max_iter"]
  1130. self.model_Name = model
  1131. def data_visualization(self, save_dir, *args, **kwargs):
  1132. tab = Tab()
  1133. x_data = self.x_traindata
  1134. y = self.y_traindata
  1135. w_list = self.model.coef_.tolist()
  1136. w_heard = [f"系数w[{i}]" for i in range(len(w_list))]
  1137. b = self.model.intercept_.tolist()
  1138. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  1139. x_data, y)
  1140. get_line = Curve.regress_w(x_data, w_list, b, x_means.copy())
  1141. for i in range(len(get)):
  1142. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1143. get = Boundary.prediction_boundary(
  1144. x_range, x_means, self.predict, data_type)
  1145. for i in range(len(get)):
  1146. tab.add(get[i], f"{i}预测热力图")
  1147. tab.add(
  1148. MakePyecharts.coefficient_scatter_plot(
  1149. w_heard, w_list), "系数w散点图")
  1150. tab.add(
  1151. MakePyecharts.coefficient_bar_plot(
  1152. w_heard,
  1153. self.model.coef_),
  1154. "系数柱状图")
  1155. columns = [
  1156. f"普适预测第{i}特征" for i in range(
  1157. len(x_means))] + w_heard + ["截距b"]
  1158. data = [f"{i}" for i in x_means] + w_list + [b]
  1159. if self.model_Name != "Line":
  1160. columns += ["阿尔法", "最大迭代次数"]
  1161. data += [self.model.alpha, self.model.max_iter]
  1162. tab.add(MakePyecharts.make_tab(columns, [data]), "数据表")
  1163. Statistics.des_to_csv(
  1164. save_dir,
  1165. "系数表",
  1166. [w_list + [b]],
  1167. [f"系数W[{i}]" for i in range(len(w_list))] + ["截距"],
  1168. )
  1169. Statistics.des_to_csv(
  1170. save_dir,
  1171. "预测表",
  1172. [[f"{i}" for i in x_means]],
  1173. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1174. )
  1175. save = save_dir + rf"{os.sep}线性回归模型.HTML"
  1176. tab.render(save) # 生成HTML
  1177. return save,
  1178. @plugin_class_loading(get_path(r"template/machinelearning"))
  1179. class LogisticregressionModel(StudyMachinebase):
  1180. def __init__(
  1181. self, args_use, model, *args, **kwargs
  1182. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1183. super(LogisticregressionModel, self).__init__(*args, **kwargs)
  1184. self.model = LogisticRegression(
  1185. C=args_use["C"], max_iter=args_use["max_iter"])
  1186. # 记录这两个是为了克隆
  1187. self.C = args_use["C"]
  1188. self.max_iter = args_use["max_iter"]
  1189. self.k = {"C": args_use["C"], "max_iter": args_use["max_iter"]}
  1190. self.model_Name = model
  1191. def data_visualization(self, save_dir="render.html", *args, **kwargs):
  1192. # 获取数据
  1193. w_array = self.model.coef_
  1194. w_list = w_array.tolist() # 变为表格
  1195. b = self.model.intercept_
  1196. c = self.model.C
  1197. max_iter = self.model.max_iter
  1198. class_ = self.model.classes_.tolist()
  1199. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1200. tab = Tab()
  1201. y = self.y_traindata
  1202. x_data = self.x_traindata
  1203. get, x_means, x_range, data_type = TrainingVisualization.training_visualization(
  1204. x_data, class_, y)
  1205. get_line = Curve.training_w(
  1206. x_data, class_, y, w_list, b, x_means.copy())
  1207. for i in range(len(get)):
  1208. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1209. for i in range(len(w_list)):
  1210. w = w_list[i]
  1211. w_heard = [f"系数w[{i},{j}]" for j in range(len(w))]
  1212. tab.add(
  1213. MakePyecharts.coefficient_scatter_plot(
  1214. w_heard, w), f"系数w[{i}]散点图")
  1215. tab.add(
  1216. MakePyecharts.coefficient_bar_plot(
  1217. w_heard,
  1218. w_array[i]),
  1219. f"系数w[{i}]柱状图")
  1220. columns = class_heard + \
  1221. [f"截距{i}" for i in range(len(b))] + ["C", "最大迭代数"]
  1222. data = class_ + b.tolist() + [c, max_iter]
  1223. c = Table().add(headers=columns, rows=[data])
  1224. tab.add(c, "数据表")
  1225. c = Table().add(
  1226. headers=[f"系数W[{i}]" for i in range(len(w_list[0]))], rows=w_list
  1227. )
  1228. tab.add(c, "系数数据表")
  1229. c = Table().add(
  1230. headers=[f"普适预测第{i}特征" for i in range(len(x_means))],
  1231. rows=[[f"{i}" for i in x_means]],
  1232. )
  1233. tab.add(c, "普适预测数据表")
  1234. Statistics.des_to_csv(save_dir, "系数表", w_list, [
  1235. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1236. Statistics.des_to_csv(
  1237. save_dir, "截距表", [b], [
  1238. f"截距{i}" for i in range(
  1239. len(b))])
  1240. Statistics.des_to_csv(
  1241. save_dir,
  1242. "预测表",
  1243. [[f"{i}" for i in x_means]],
  1244. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1245. )
  1246. save = save_dir + rf"{os.sep}逻辑回归.HTML"
  1247. tab.render(save) # 生成HTML
  1248. return save,
  1249. class CategoricalData: # 数据统计助手
  1250. def __init__(self):
  1251. self.x_means = []
  1252. self.x_range = []
  1253. self.data_type = []
  1254. def __call__(self, x1, *args, **kwargs):
  1255. get = self.is_continuous(x1)
  1256. return get
  1257. def is_continuous(self, x1: np.array):
  1258. try:
  1259. x1_con = Statistics.is_continuous(x1)
  1260. if x1_con:
  1261. self.x_means.append(np.mean(x1))
  1262. self.add_range(x1)
  1263. else:
  1264. assert False
  1265. return x1_con
  1266. except TypeError: # 找出出现次数最多的元素
  1267. new = np.unique(x1) # 去除相同的元素
  1268. count_list = []
  1269. for i in new:
  1270. count_list.append(np.sum(x1 == i))
  1271. index = count_list.index(max(count_list)) # 找出最大值的索引
  1272. self.x_means.append(x1[index])
  1273. self.add_range(x1, False)
  1274. return False
  1275. def add_range(self, x1: np.array, range_=True):
  1276. try:
  1277. assert range_
  1278. min_ = int(x1.min()) - 1
  1279. max_ = int(x1.max()) + 1
  1280. # 不需要复制列表
  1281. self.x_range.append([min_, max_])
  1282. self.data_type.append(1)
  1283. except AssertionError:
  1284. self.x_range.append(list(set(x1.tolist()))) # 去除多余元素
  1285. self.data_type.append(2)
  1286. def get(self):
  1287. return self.x_means, self.x_range, self.data_type
  1288. @plugin_class_loading(get_path(r"template/machinelearning"))
  1289. class KnnModel(StudyMachinebase):
  1290. def __init__(
  1291. self, args_use, model, *args, **kwargs
  1292. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1293. super(KnnModel, self).__init__(*args, **kwargs)
  1294. all_model = {
  1295. "Knn_class": KNeighborsClassifier,
  1296. "Knn": KNeighborsRegressor}[model]
  1297. self.model = all_model(
  1298. p=args_use["p"],
  1299. n_neighbors=args_use["n_neighbors"])
  1300. # 记录这两个是为了克隆
  1301. self.n_neighbors = args_use["n_neighbors"]
  1302. self.p = args_use["p"]
  1303. self.k = {"n_neighbors": args_use["n_neighbors"], "p": args_use["p"]}
  1304. self.model_Name = model
  1305. def data_visualization(self, save_dir, *args, **kwargs):
  1306. tab = Tab()
  1307. y = self.y_traindata
  1308. x_data = self.x_traindata
  1309. y_test = self.y_testdata
  1310. x_test = self.x_testdata
  1311. if self.model_Name == "Knn_class":
  1312. class_ = self.model.classes_.tolist()
  1313. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1314. get, x_means, x_range, data_type = TrainingVisualization.training_visualization(
  1315. x_data, class_, y)
  1316. for i in range(len(get)):
  1317. tab.add(get[i], f"{i}训练数据散点图")
  1318. if y_test is not None:
  1319. get = TrainingVisualization.training_visualization(
  1320. x_test, class_, y_test)[0]
  1321. for i in range(len(get)):
  1322. tab.add(get[i], f"{i}测试数据散点图")
  1323. get = Boundary.decision_boundary(
  1324. x_range, x_means, self.predict, class_, data_type)
  1325. for i in range(len(get)):
  1326. tab.add(get[i], f"{i}预测热力图")
  1327. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1328. data = class_ + [f"{i}" for i in x_means]
  1329. c = Table().add(headers=heard, rows=[data])
  1330. tab.add(c, "数据表")
  1331. else:
  1332. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  1333. x_data, y)
  1334. for i in range(len(get)):
  1335. tab.add(get[i], f"{i}训练数据散点图")
  1336. get = TrainingVisualization.regress_visualization(x_test, y_test)[
  1337. 0]
  1338. for i in range(len(get)):
  1339. tab.add(get[i], f"{i}测试数据类型图")
  1340. get = Boundary.prediction_boundary(
  1341. x_range, x_means, self.predict, data_type)
  1342. for i in range(len(get)):
  1343. tab.add(get[i], f"{i}预测热力图")
  1344. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1345. data = [f"{i}" for i in x_means]
  1346. c = Table().add(headers=heard, rows=[data])
  1347. tab.add(c, "数据表")
  1348. Statistics.des_to_csv(
  1349. save_dir,
  1350. "预测表",
  1351. [[f"{i}" for i in x_means]],
  1352. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1353. )
  1354. save = save_dir + rf"{os.sep}K.HTML"
  1355. tab.render(save) # 生成HTML
  1356. return save,
  1357. @plugin_class_loading(get_path(r"template/machinelearning"))
  1358. class TreeModel(StudyMachinebase):
  1359. def __init__(
  1360. self, args_use, model, *args, **kwargs
  1361. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1362. super(TreeModel, self).__init__(*args, **kwargs)
  1363. all_model = {
  1364. "Tree_class": DecisionTreeClassifier,
  1365. "Tree": DecisionTreeRegressor,
  1366. }[model]
  1367. self.model = all_model(
  1368. criterion=args_use["criterion"],
  1369. splitter=args_use["splitter"],
  1370. max_features=args_use["max_features"],
  1371. max_depth=args_use["max_depth"],
  1372. min_samples_split=args_use["min_samples_split"],
  1373. )
  1374. # 记录这两个是为了克隆
  1375. self.criterion = args_use["criterion"]
  1376. self.splitter = args_use["splitter"]
  1377. self.max_features = args_use["max_features"]
  1378. self.max_depth = args_use["max_depth"]
  1379. self.min_samples_split = args_use["min_samples_split"]
  1380. self.k = {
  1381. "criterion": args_use["criterion"],
  1382. "splitter": args_use["splitter"],
  1383. "max_features": args_use["max_features"],
  1384. "max_depth": args_use["max_depth"],
  1385. "min_samples_split": args_use["min_samples_split"],
  1386. }
  1387. self.model_Name = model
  1388. def data_visualization(self, save_dir, *args, **kwargs):
  1389. tab = Tab()
  1390. importance = self.model.feature_importances_.tolist()
  1391. with open(save_dir + fr"{os.sep}Tree_Gra.dot", "w") as f:
  1392. export_graphviz(self.model, out_file=f)
  1393. MakePyecharts.make_bar("特征重要性", importance, tab)
  1394. Statistics.des_to_csv(
  1395. save_dir,
  1396. "特征重要性",
  1397. [importance],
  1398. [f"[{i}]特征" for i in range(len(importance))],
  1399. )
  1400. tab.add(
  1401. TreePlot.see_tree(
  1402. save_dir +
  1403. fr"{os.sep}Tree_Gra.dot"),
  1404. "决策树可视化")
  1405. y = self.y_traindata
  1406. x_data = self.x_traindata
  1407. y_test = self.y_testdata
  1408. x_test = self.x_testdata
  1409. if self.model_Name == "Tree_class":
  1410. class_ = self.model.classes_.tolist()
  1411. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1412. get, x_means, x_range, data_type = TrainingVisualization.training_visualization(
  1413. x_data, class_, y)
  1414. for i in range(len(get)):
  1415. tab.add(get[i], f"{i}训练数据散点图")
  1416. get = TrainingVisualization.training_visualization(
  1417. x_test, class_, y_test)[0]
  1418. for i in range(len(get)):
  1419. tab.add(get[i], f"{i}测试数据散点图")
  1420. get = Boundary.decision_boundary(
  1421. x_range, x_means, self.predict, class_, data_type)
  1422. for i in range(len(get)):
  1423. tab.add(get[i], f"{i}预测热力图")
  1424. tab.add(
  1425. MakePyecharts.make_tab(
  1426. class_heard
  1427. + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1428. + [f"特征{i}重要性" for i in range(len(importance))],
  1429. [class_ + [f"{i}" for i in x_means] + importance],
  1430. ),
  1431. "数据表",
  1432. )
  1433. else:
  1434. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  1435. x_data, y)
  1436. for i in range(len(get)):
  1437. tab.add(get[i], f"{i}训练数据散点图")
  1438. get = TrainingVisualization.regress_visualization(x_test, y_test)[
  1439. 0]
  1440. for i in range(len(get)):
  1441. tab.add(get[i], f"{i}测试数据类型图")
  1442. get = Boundary.prediction_boundary(
  1443. x_range, x_means, self.predict, data_type)
  1444. for i in range(len(get)):
  1445. tab.add(get[i], f"{i}预测热力图")
  1446. tab.add(
  1447. MakePyecharts.make_tab(
  1448. [f"普适预测第{i}特征" for i in range(len(x_means))]
  1449. + [f"特征{i}重要性" for i in range(len(importance))],
  1450. [[f"{i}" for i in x_means] + importance],
  1451. ),
  1452. "数据表",
  1453. )
  1454. Statistics.des_to_csv(
  1455. save_dir,
  1456. "预测表",
  1457. [[f"{i}" for i in x_means]],
  1458. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1459. )
  1460. save = save_dir + rf"{os.sep}决策树.HTML"
  1461. tab.render(save) # 生成HTML
  1462. return save,
  1463. @plugin_class_loading(get_path(r"template/machinelearning"))
  1464. class ForestModel(StudyMachinebase):
  1465. def __init__(
  1466. self, args_use, model, *args, **kwargs
  1467. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1468. super(ForestModel, self).__init__(*args, **kwargs)
  1469. model = {
  1470. "Forest_class": RandomForestClassifier,
  1471. "Forest": RandomForestRegressor,
  1472. }[model]
  1473. self.model = model(
  1474. n_estimators=args_use["n_Tree"],
  1475. criterion=args_use["criterion"],
  1476. max_features=args_use["max_features"],
  1477. max_depth=args_use["max_depth"],
  1478. min_samples_split=args_use["min_samples_split"],
  1479. )
  1480. # 记录这两个是为了克隆
  1481. self.n_estimators = args_use["n_Tree"]
  1482. self.criterion = args_use["criterion"]
  1483. self.max_features = args_use["max_features"]
  1484. self.max_depth = args_use["max_depth"]
  1485. self.min_samples_split = args_use["min_samples_split"]
  1486. self.k = {
  1487. "n_estimators": args_use["n_Tree"],
  1488. "criterion": args_use["criterion"],
  1489. "max_features": args_use["max_features"],
  1490. "max_depth": args_use["max_depth"],
  1491. "min_samples_split": args_use["min_samples_split"],
  1492. }
  1493. self.model_Name = model
  1494. def data_visualization(self, save_dir, *args, **kwargs):
  1495. tab = Tab()
  1496. # 多个决策树可视化
  1497. for i in range(len(self.model.estimators_)):
  1498. with open(save_dir + rf"{os.sep}Tree_Gra[{i}].dot", "w") as f:
  1499. export_graphviz(self.model.estimators_[i], out_file=f)
  1500. tab.add(
  1501. TreePlot.see_tree(
  1502. save_dir +
  1503. rf"{os.sep}Tree_Gra[{i}].dot"),
  1504. f"[{i}]决策树可视化")
  1505. y = self.y_traindata
  1506. x_data = self.x_traindata
  1507. if self.model_Name == "Forest_class":
  1508. class_ = self.model.classes_.tolist()
  1509. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1510. get, x_means, x_range, data_type = TrainingVisualization.training_visualization(
  1511. x_data, class_, y)
  1512. for i in range(len(get)):
  1513. tab.add(get[i], f"{i}训练数据散点图")
  1514. get = Boundary.decision_boundary(
  1515. x_range, x_means, self.predict, class_, data_type)
  1516. for i in range(len(get)):
  1517. tab.add(get[i], f"{i}预测热力图")
  1518. tab.add(
  1519. MakePyecharts.make_tab(
  1520. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1521. [class_ + [f"{i}" for i in x_means]],
  1522. ),
  1523. "数据表",
  1524. )
  1525. else:
  1526. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  1527. x_data, y)
  1528. for i in range(len(get)):
  1529. tab.add(get[i], f"{i}预测类型图")
  1530. get = Boundary.prediction_boundary(
  1531. x_range, x_means, self.predict, data_type)
  1532. for i in range(len(get)):
  1533. tab.add(get[i], f"{i}预测热力图")
  1534. tab.add(
  1535. MakePyecharts.make_tab(
  1536. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1537. [[f"{i}" for i in x_means]],
  1538. ),
  1539. "数据表",
  1540. )
  1541. Statistics.des_to_csv(
  1542. save_dir,
  1543. "预测表",
  1544. [[f"{i}" for i in x_means]],
  1545. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1546. )
  1547. save = save_dir + rf"{os.sep}随机森林.HTML"
  1548. tab.render(save) # 生成HTML
  1549. return save,
  1550. class GradienttreeModel(StudyMachinebase): # 继承Tree_Model主要是继承Des
  1551. def __init__(
  1552. self, args_use, model, *args, **kwargs
  1553. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1554. super(
  1555. GradienttreeModel,
  1556. self).__init__(
  1557. *args,
  1558. **kwargs) # 不需要执行Tree_Model的初始化
  1559. model = {
  1560. "GradientTree_class": GradientBoostingClassifier,
  1561. "GradientTree": GradientBoostingRegressor,
  1562. }[model]
  1563. self.model = model(
  1564. n_estimators=args_use["n_Tree"],
  1565. max_features=args_use["max_features"],
  1566. max_depth=args_use["max_depth"],
  1567. min_samples_split=args_use["min_samples_split"],
  1568. )
  1569. # 记录这两个是为了克隆
  1570. self.criterion = args_use["criterion"]
  1571. self.splitter = args_use["splitter"]
  1572. self.max_features = args_use["max_features"]
  1573. self.max_depth = args_use["max_depth"]
  1574. self.min_samples_split = args_use["min_samples_split"]
  1575. self.k = {
  1576. "criterion": args_use["criterion"],
  1577. "splitter": args_use["splitter"],
  1578. "max_features": args_use["max_features"],
  1579. "max_depth": args_use["max_depth"],
  1580. "min_samples_split": args_use["min_samples_split"],
  1581. }
  1582. self.model_Name = model
  1583. def data_visualization(self, save_dir, *args, **kwargs):
  1584. tab = Tab()
  1585. # 多个决策树可视化
  1586. for a in range(len(self.model.estimators_)):
  1587. for i in range(len(self.model.estimators_[a])):
  1588. with open(save_dir + rf"{os.sep}Tree_Gra[{a},{i}].dot", "w") as f:
  1589. export_graphviz(self.model.estimators_[a][i], out_file=f)
  1590. tab.add(
  1591. TreePlot.see_tree(
  1592. save_dir +
  1593. rf"{os.sep}Tree_Gra[{a},{i}].dot"),
  1594. f"[{a},{i}]决策树可视化")
  1595. y = self.y_traindata
  1596. x_data = self.x_traindata
  1597. if self.model_Name == "Tree_class":
  1598. class_ = self.model.classes_.tolist()
  1599. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1600. get, x_means, x_range, data_type = TrainingVisualization.training_visualization(
  1601. x_data, class_, y)
  1602. for i in range(len(get)):
  1603. tab.add(get[i], f"{i}训练数据散点图")
  1604. get = Boundary.decision_boundary(
  1605. x_range, x_means, self.predict, class_, data_type)
  1606. for i in range(len(get)):
  1607. tab.add(get[i], f"{i}预测热力图")
  1608. tab.add(
  1609. MakePyecharts.make_tab(
  1610. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1611. [class_ + [f"{i}" for i in x_means]],
  1612. ),
  1613. "数据表",
  1614. )
  1615. else:
  1616. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  1617. x_data, y)
  1618. for i in range(len(get)):
  1619. tab.add(get[i], f"{i}预测类型图")
  1620. get = Boundary.prediction_boundary(
  1621. x_range, x_means, self.predict, data_type)
  1622. for i in range(len(get)):
  1623. tab.add(get[i], f"{i}预测热力图")
  1624. tab.add(
  1625. MakePyecharts.make_tab(
  1626. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1627. [[f"{i}" for i in x_means]],
  1628. ),
  1629. "数据表",
  1630. )
  1631. Statistics.des_to_csv(
  1632. save_dir,
  1633. "预测表",
  1634. [[f"{i}" for i in x_means]],
  1635. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1636. )
  1637. save = save_dir + rf"{os.sep}梯度提升回归树.HTML"
  1638. tab.render(save) # 生成HTML
  1639. return save,
  1640. @plugin_class_loading(get_path(r"template/machinelearning"))
  1641. class SvcModel(StudyMachinebase):
  1642. def __init__(
  1643. self, args_use, model, *args, **kwargs
  1644. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1645. super(SvcModel, self).__init__(*args, **kwargs)
  1646. self.model = SVC(
  1647. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1648. )
  1649. # 记录这两个是为了克隆
  1650. self.C = args_use["C"]
  1651. self.gamma = args_use["gamma"]
  1652. self.kernel = args_use["kernel"]
  1653. self.k = {
  1654. "C": args_use["C"],
  1655. "gamma": args_use["gamma"],
  1656. "kernel": args_use["kernel"],
  1657. }
  1658. self.model_Name = model
  1659. def data_visualization(self, save_dir, *args, **kwargs):
  1660. tab = Tab()
  1661. try:
  1662. w_list = self.model.coef_.tolist() # 未必有这个属性
  1663. b = self.model.intercept_.tolist()
  1664. except AttributeError:
  1665. w_list = [] # 未必有这个属性
  1666. b = []
  1667. class_ = self.model.classes_.tolist()
  1668. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1669. y = self.y_traindata
  1670. x_data = self.x_traindata
  1671. get, x_means, x_range, data_type = TrainingVisualization.training_visualization(
  1672. x_data, class_, y)
  1673. if w_list:
  1674. get_line: list = Curve.training_w(
  1675. x_data, class_, y, w_list, b, x_means.copy())
  1676. else:
  1677. get_line = []
  1678. for i in range(len(get)):
  1679. if get_line:
  1680. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1681. else:
  1682. tab.add(get[i], f"{i}决策边界散点图")
  1683. get = Boundary.decision_boundary(
  1684. x_range,
  1685. x_means,
  1686. self.predict,
  1687. class_,
  1688. data_type)
  1689. for i in range(len(get)):
  1690. tab.add(get[i], f"{i}预测热力图")
  1691. dic = {2: "离散", 1: "连续"}
  1692. tab.add(MakePyecharts.make_tab(class_heard +
  1693. [f"普适预测第{i}特征:{dic[data_type[i]]}" for i in range(len(x_means))],
  1694. [class_ + [f"{i}" for i in x_means]], ), "数据表", )
  1695. if w_list:
  1696. Statistics.des_to_csv(save_dir, "系数表", w_list, [
  1697. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1698. if w_list:
  1699. Statistics.des_to_csv(
  1700. save_dir, "截距表", [b], [
  1701. f"截距{i}" for i in range(
  1702. len(b))])
  1703. Statistics.des_to_csv(
  1704. save_dir,
  1705. "预测表",
  1706. [[f"{i}" for i in x_means]],
  1707. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1708. )
  1709. save = save_dir + rf"{os.sep}支持向量机分类.HTML"
  1710. tab.render(save) # 生成HTML
  1711. return save,
  1712. @plugin_class_loading(get_path(r"template/machinelearning"))
  1713. class SvrModel(StudyMachinebase):
  1714. def __init__(
  1715. self, args_use, model, *args, **kwargs
  1716. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1717. super(SvrModel, self).__init__(*args, **kwargs)
  1718. self.model = SVR(
  1719. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1720. )
  1721. # 记录这两个是为了克隆
  1722. self.C = args_use["C"]
  1723. self.gamma = args_use["gamma"]
  1724. self.kernel = args_use["kernel"]
  1725. self.k = {
  1726. "C": args_use["C"],
  1727. "gamma": args_use["gamma"],
  1728. "kernel": args_use["kernel"],
  1729. }
  1730. self.model_Name = model
  1731. def data_visualization(self, save_dir, *args, **kwargs):
  1732. tab = Tab()
  1733. x_data = self.x_traindata
  1734. y = self.y_traindata
  1735. try:
  1736. w_list = self.model.coef_.tolist() # 未必有这个属性
  1737. b = self.model.intercept_.tolist()
  1738. except AttributeError:
  1739. w_list = [] # 未必有这个属性
  1740. b = []
  1741. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  1742. x_data, y)
  1743. if w_list:
  1744. get_line = Curve.regress_w(x_data, w_list, b, x_means.copy())
  1745. else:
  1746. get_line = []
  1747. for i in range(len(get)):
  1748. if get_line:
  1749. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1750. else:
  1751. tab.add(get[i], f"{i}预测类型图")
  1752. get = Boundary.prediction_boundary(
  1753. x_range, x_means, self.predict, data_type)
  1754. for i in range(len(get)):
  1755. tab.add(get[i], f"{i}预测热力图")
  1756. if w_list:
  1757. Statistics.des_to_csv(save_dir, "系数表", w_list, [
  1758. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1759. if w_list:
  1760. Statistics.des_to_csv(
  1761. save_dir, "截距表", [b], [
  1762. f"截距{i}" for i in range(
  1763. len(b))])
  1764. Statistics.des_to_csv(
  1765. save_dir,
  1766. "预测表",
  1767. [[f"{i}" for i in x_means]],
  1768. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1769. )
  1770. tab.add(
  1771. MakePyecharts.make_tab(
  1772. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1773. [[f"{i}" for i in x_means]],
  1774. ),
  1775. "数据表",
  1776. )
  1777. save = save_dir + rf"{os.sep}支持向量机回归.HTML"
  1778. tab.render(save) # 生成HTML
  1779. return save,
  1780. class VarianceModel(Unsupervised): # 无监督
  1781. def __init__(
  1782. self, args_use, model, *args, **kwargs
  1783. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1784. super(VarianceModel, self).__init__(*args, **kwargs)
  1785. self.model = VarianceThreshold(
  1786. threshold=(args_use["P"] * (1 - args_use["P"])))
  1787. # 记录这两个是为了克隆
  1788. self.threshold = args_use["P"]
  1789. self.k = {"threshold": args_use["P"]}
  1790. self.model_Name = model
  1791. def data_visualization(self, save_dir, *args, **kwargs):
  1792. tab = Tab()
  1793. var = self.model.variances_ # 标准差
  1794. y_data = self.y_testdata
  1795. if isinstance(y_data, np.ndarray):
  1796. get = TrainingVisualization.training_visualization_no_class_more(
  1797. self.y_testdata)
  1798. for i in range(len(get)):
  1799. tab.add(get[i], f"[{i}]数据x-x散点图")
  1800. c = (
  1801. Bar()
  1802. .add_xaxis([f"[{i}]特征" for i in range(len(var))])
  1803. .add_yaxis("标准差", var.tolist(), **label_setting)
  1804. .set_global_opts(
  1805. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1806. )
  1807. )
  1808. tab.add(c, "数据标准差")
  1809. save = save_dir + rf"{os.sep}方差特征选择.HTML"
  1810. tab.render(save) # 生成HTML
  1811. return save,
  1812. class SelectkbestModel(PrepBase): # 有监督
  1813. def __init__(self, args_use, model, *args, **kwargs):
  1814. super(SelectkbestModel, self).__init__(*args, **kwargs)
  1815. self.model = SelectKBest(
  1816. k=args_use["k"],
  1817. score_func=args_use["score_func"])
  1818. # 记录这两个是为了克隆
  1819. self.k_ = args_use["k"]
  1820. self.score_func = args_use["score_func"]
  1821. self.k = {"k": args_use["k"], "score_func": args_use["score_func"]}
  1822. self.model_Name = model
  1823. def data_visualization(self, save_dir, *args, **kwargs):
  1824. tab = Tab()
  1825. score = self.model.scores_.tolist()
  1826. support: np.ndarray = self.model.get_support()
  1827. y_data = self.y_traindata
  1828. x_data = self.x_traindata
  1829. if isinstance(x_data, np.ndarray):
  1830. get = TrainingVisualization.training_visualization_no_class_more(
  1831. x_data)
  1832. for i in range(len(get)):
  1833. tab.add(get[i], f"[{i}]训练数据x-x散点图")
  1834. if isinstance(y_data, np.ndarray):
  1835. get = TrainingVisualization.training_visualization_no_class_more(
  1836. y_data)
  1837. for i in range(len(get)):
  1838. tab.add(get[i], f"[{i}]保留训练数据x-x散点图")
  1839. y_data = self.y_testdata
  1840. x_data = self.x_testdata
  1841. if isinstance(x_data, np.ndarray):
  1842. get = TrainingVisualization.training_visualization_no_class_more(
  1843. x_data)
  1844. for i in range(len(get)):
  1845. tab.add(get[i], f"[{i}]数据x-x散点图")
  1846. if isinstance(y_data, np.ndarray):
  1847. get = TrainingVisualization.training_visualization_no_class_more(
  1848. y_data)
  1849. for i in range(len(get)):
  1850. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1851. choose = []
  1852. un_choose = []
  1853. for i in range(len(score)):
  1854. if support[i]:
  1855. choose.append(score[i])
  1856. un_choose.append(0) # 占位
  1857. else:
  1858. un_choose.append(score[i])
  1859. choose.append(0)
  1860. c = (
  1861. Bar()
  1862. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1863. .add_yaxis("选中特征", choose, **label_setting)
  1864. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1865. .set_global_opts(
  1866. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1867. )
  1868. )
  1869. tab.add(c, "单变量重要程度")
  1870. save = save_dir + rf"{os.sep}单一变量特征选择.HTML"
  1871. tab.render(save) # 生成HTML
  1872. return save,
  1873. class SelectFromModel(PrepBase): # 有监督
  1874. def __init__(
  1875. self, args_use, learner, *args, **kwargs
  1876. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1877. super(SelectFromModel, self).__init__(*args, **kwargs)
  1878. self.model = learner.Model
  1879. self.Select_Model = SelectFromModel(
  1880. estimator=learner.Model,
  1881. max_features=args_use["k"],
  1882. prefit=learner.have_Fit)
  1883. self.max_features = args_use["k"]
  1884. self.estimator = learner.Model
  1885. self.k = {
  1886. "max_features": args_use["k"],
  1887. "estimator": learner.Model,
  1888. "have_Fit": learner.have_Fit,
  1889. }
  1890. self.have_fit = learner.have_Fit
  1891. self.model_Name = "SelectFrom_Model"
  1892. self.learner = learner
  1893. def fit_model(self, x_data, y_data, split=0.3, *args, **kwargs):
  1894. y_data = y_data.ravel()
  1895. if not self.have_fit: # 不允许第二次训练
  1896. self.Select_Model.fit(x_data, y_data)
  1897. self.have_fit = True
  1898. return "None", "None"
  1899. def predict(self, x_data, *args, **kwargs):
  1900. try:
  1901. self.x_testdata = x_data.copy()
  1902. x_predict = self.Select_Model.transform(x_data)
  1903. self.y_testdata = x_predict.copy()
  1904. self.have_predict = True
  1905. return x_predict, "模型特征工程"
  1906. except BaseException as e:
  1907. logging.debug(str(e))
  1908. self.have_predict = True
  1909. return np.array([]), "无结果工程"
  1910. def data_visualization(self, save_dir, *args, **kwargs):
  1911. tab = Tab()
  1912. support: np.ndarray = self.Select_Model.get_support()
  1913. y_data = self.y_testdata
  1914. x_data = self.x_testdata
  1915. if isinstance(x_data, np.ndarray):
  1916. get = TrainingVisualization.training_visualization_no_class_more(
  1917. x_data)
  1918. for i in range(len(get)):
  1919. tab.add(get[i], f"[{i}]数据x-x散点图")
  1920. if isinstance(y_data, np.ndarray):
  1921. get = TrainingVisualization.training_visualization_no_class_more(
  1922. y_data)
  1923. for i in range(len(get)):
  1924. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1925. def make_bar_(score):
  1926. choose = []
  1927. un_choose = []
  1928. for i in range(len(score)):
  1929. if support[i]:
  1930. choose.append(abs(score[i]))
  1931. un_choose.append(0) # 占位
  1932. else:
  1933. un_choose.append(abs(score[i]))
  1934. choose.append(0)
  1935. c = (
  1936. Bar()
  1937. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1938. .add_yaxis("选中特征", choose, **label_setting)
  1939. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1940. .set_global_opts(
  1941. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1942. )
  1943. )
  1944. tab.add(c, "单变量重要程度")
  1945. try:
  1946. make_bar_(self.model.coef_)
  1947. except AttributeError:
  1948. try:
  1949. make_bar_(self.model.feature_importances_)
  1950. except BaseException as e:
  1951. logging.warning(str(e))
  1952. save = save_dir + rf"{os.sep}模型特征选择.HTML"
  1953. tab.render(save) # 生成HTML
  1954. return save,
  1955. class StandardizationModel(Unsupervised): # z-score标准化 无监督
  1956. def __init__(self, *args, **kwargs):
  1957. super(StandardizationModel, self).__init__(*args, **kwargs)
  1958. self.model = StandardScaler()
  1959. self.k = {}
  1960. self.model_Name = "StandardScaler"
  1961. def data_visualization(self, save_dir, *args, **kwargs):
  1962. tab = Tab()
  1963. y_data = self.y_testdata
  1964. x_data = self.x_testdata
  1965. var = self.model.var_.tolist()
  1966. means = self.model.mean_.tolist()
  1967. scale_ = self.model.scale_.tolist()
  1968. MultiMap.conversion_control(y_data, x_data, tab)
  1969. MakePyecharts.make_bar("标准差", var, tab)
  1970. MakePyecharts.make_bar("方差", means, tab)
  1971. MakePyecharts.make_bar("Scale", scale_, tab)
  1972. save = save_dir + rf"{os.sep}z-score标准化.HTML"
  1973. tab.render(save) # 生成HTML
  1974. return save,
  1975. class MinmaxscalerModel(Unsupervised): # 离差标准化
  1976. def __init__(self, args_use, *args, **kwargs):
  1977. super(MinmaxscalerModel, self).__init__(*args, **kwargs)
  1978. self.model = MinMaxScaler(feature_range=args_use["feature_range"])
  1979. self.k = {}
  1980. self.model_Name = "MinMaxScaler"
  1981. def data_visualization(self, save_dir, *args, **kwargs):
  1982. tab = Tab()
  1983. y_data = self.y_testdata
  1984. x_data = self.x_testdata
  1985. scale_ = self.model.scale_.tolist()
  1986. max_ = self.model.data_max_.tolist()
  1987. min_ = self.model.data_min_.tolist()
  1988. MultiMap.conversion_control(y_data, x_data, tab)
  1989. MakePyecharts.make_bar("Scale", scale_, tab)
  1990. tab.add(
  1991. MakePyecharts.make_tab(
  1992. heard=[f"[{i}]特征最大值" for i in range(len(max_))]
  1993. + [f"[{i}]特征最小值" for i in range(len(min_))],
  1994. row=[max_ + min_],
  1995. ),
  1996. "数据表格",
  1997. )
  1998. save = save_dir + rf"{os.sep}离差标准化.HTML"
  1999. tab.render(save) # 生成HTML
  2000. return save,
  2001. class LogscalerModel(PrepBase): # 对数标准化
  2002. def __init__(self, *args, **kwargs):
  2003. super(LogscalerModel, self).__init__(*args, **kwargs)
  2004. self.model = None
  2005. self.k = {}
  2006. self.model_Name = "LogScaler"
  2007. def fit_model(self, x_data, *args, **kwargs):
  2008. if not self.have_predict: # 不允许第二次训练
  2009. self.max_logx = np.log(x_data.max())
  2010. self.have_fit = True
  2011. return "None", "None"
  2012. def predict(self, x_data, *args, **kwargs):
  2013. try:
  2014. max_logx = self.max_logx
  2015. except AttributeError:
  2016. self.have_fit = False
  2017. self.fit_model(x_data)
  2018. max_logx = self.max_logx
  2019. self.x_testdata = x_data.copy()
  2020. x_predict = np.log(x_data) / max_logx
  2021. self.y_testdata = x_predict.copy()
  2022. self.have_predict = True
  2023. return x_predict, "对数变换"
  2024. def data_visualization(self, save_dir, *args, **kwargs):
  2025. tab = Tab()
  2026. y_data = self.y_testdata
  2027. x_data = self.x_testdata
  2028. MultiMap.conversion_control(y_data, x_data, tab)
  2029. tab.add(MakePyecharts.make_tab(heard=["最大对数值(自然对数)"],
  2030. row=[[str(self.max_logx)]]), "数据表格")
  2031. save = save_dir + rf"{os.sep}对数标准化.HTML"
  2032. tab.render(save) # 生成HTML
  2033. return save,
  2034. class AtanscalerModel(PrepBase): # atan标准化
  2035. def __init__(self, *args, **kwargs):
  2036. super(AtanscalerModel, self).__init__(*args, **kwargs)
  2037. self.model = None
  2038. self.k = {}
  2039. self.model_Name = "atanScaler"
  2040. def fit_model(self, x_data, *args, **kwargs):
  2041. self.have_fit = True
  2042. return "None", "None"
  2043. def predict(self, x_data, *args, **kwargs):
  2044. self.x_testdata = x_data.copy()
  2045. x_predict = np.arctan(x_data) * (2 / np.pi)
  2046. self.y_testdata = x_predict.copy()
  2047. self.have_predict = True
  2048. return x_predict, "atan变换"
  2049. def data_visualization(self, save_dir, *args, **kwargs):
  2050. tab = Tab()
  2051. y_data = self.y_testdata
  2052. x_data = self.x_testdata
  2053. MultiMap.conversion_control(y_data, x_data, tab)
  2054. save = save_dir + rf"{os.sep}反正切函数标准化.HTML"
  2055. tab.render(save) # 生成HTML
  2056. return save,
  2057. class DecimalscalerModel(PrepBase): # 小数定标准化
  2058. def __init__(self, *args, **kwargs):
  2059. super(DecimalscalerModel, self).__init__(*args, **kwargs)
  2060. self.model = None
  2061. self.k = {}
  2062. self.model_Name = "Decimal_normalization"
  2063. def fit_model(self, x_data, *args, **kwargs):
  2064. if not self.have_predict: # 不允许第二次训练
  2065. self.j = max([DataOperations.judging_digits(x_data.max()),
  2066. DataOperations.judging_digits(x_data.min())])
  2067. self.have_fit = True
  2068. return "None", "None"
  2069. def predict(self, x_data, *args, **kwargs):
  2070. self.x_testdata = x_data.copy()
  2071. try:
  2072. j = self.j
  2073. except AttributeError:
  2074. self.have_fit = False
  2075. self.fit_model(x_data)
  2076. j = self.j
  2077. x_predict = x_data / (10 ** j)
  2078. self.y_testdata = x_predict.copy()
  2079. self.have_predict = True
  2080. return x_predict, "小数定标标准化"
  2081. def data_visualization(self, save_dir, *args, **kwargs):
  2082. tab = Tab()
  2083. y_data = self.y_testdata
  2084. x_data = self.x_testdata
  2085. j = self.j
  2086. MultiMap.conversion_control(y_data, x_data, tab)
  2087. tab.add(MakePyecharts.make_tab(heard=["小数位数:j"], row=[[j]]), "数据表格")
  2088. save = save_dir + rf"{os.sep}小数定标标准化.HTML"
  2089. tab.render(save) # 生成HTML
  2090. return save,
  2091. class MapzoomModel(PrepBase): # 映射标准化
  2092. def __init__(self, args_use, *args, **kwargs):
  2093. super(MapzoomModel, self).__init__(*args, **kwargs)
  2094. self.model = None
  2095. self.feature_range = args_use["feature_range"]
  2096. self.k = {}
  2097. self.model_Name = "Decimal_normalization"
  2098. def fit_model(self, x_data, *args, **kwargs):
  2099. if not self.have_predict: # 不允许第二次训练
  2100. self.max_ = x_data.max()
  2101. self.min_ = x_data.min()
  2102. self.have_fit = True
  2103. return "None", "None"
  2104. def predict(self, x_data, *args, **kwargs):
  2105. self.x_testdata = x_data.copy()
  2106. try:
  2107. max_ = self.max_
  2108. min_ = self.min_
  2109. except AttributeError:
  2110. self.have_fit = False
  2111. self.fit_model(x_data)
  2112. max_ = self.max_
  2113. min_ = self.min_
  2114. x_predict = (x_data * (self.feature_range[1] - self.feature_range[0])) / (
  2115. max_ - min_
  2116. )
  2117. self.y_testdata = x_predict.copy()
  2118. self.have_predict = True
  2119. return x_predict, "映射标准化"
  2120. def data_visualization(self, save_dir, *args, **kwargs):
  2121. tab = Tab()
  2122. y_data = self.y_testdata
  2123. x_data = self.x_testdata
  2124. max_ = self.max_
  2125. min_ = self.min_
  2126. MultiMap.conversion_control(y_data, x_data, tab)
  2127. tab.add(MakePyecharts.make_tab(
  2128. heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  2129. save = save_dir + rf"{os.sep}映射标准化.HTML"
  2130. tab.render(save) # 生成HTML
  2131. return save,
  2132. class SigmodscalerModel(PrepBase): # sigmod变换
  2133. def __init__(self, *args, **kwargs):
  2134. super(SigmodscalerModel, self).__init__(*args, **kwargs)
  2135. self.model = None
  2136. self.k = {}
  2137. self.model_Name = "sigmodScaler_Model"
  2138. def fit_model(self, x_data, *args, **kwargs):
  2139. self.have_fit = True
  2140. return "None", "None"
  2141. def predict(self, x_data: np.array, *args, **kwargs):
  2142. self.x_testdata = x_data.copy()
  2143. x_predict = 1 / (1 + np.exp(-x_data))
  2144. self.y_testdata = x_predict.copy()
  2145. self.have_predict = True
  2146. return x_predict, "Sigmod变换"
  2147. def data_visualization(self, save_dir, *args, **kwargs):
  2148. tab = Tab()
  2149. y_data = self.y_testdata
  2150. x_data = self.x_testdata
  2151. MultiMap.conversion_control(y_data, x_data, tab)
  2152. save = save_dir + rf"{os.sep}Sigmoid变换.HTML"
  2153. tab.render(save) # 生成HTML
  2154. return save,
  2155. class FuzzyQuantizationModel(PrepBase): # 模糊量化标准化
  2156. def __init__(self, args_use, *args, **kwargs):
  2157. super(FuzzyQuantizationModel, self).__init__(*args, **kwargs)
  2158. self.model = None
  2159. self.feature_range = args_use["feature_range"]
  2160. self.k = {}
  2161. self.model_Name = "Fuzzy_quantization"
  2162. def fit_model(self, x_data, *args, **kwargs):
  2163. if not self.have_predict: # 不允许第二次训练
  2164. self.max_ = x_data.max()
  2165. self.max_ = x_data.min()
  2166. self.have_fit = True
  2167. return "None", "None"
  2168. def predict(self, x_data, *args, **kwargs):
  2169. self.x_testdata = x_data.copy()
  2170. try:
  2171. max_ = self.max_
  2172. min_ = self.max_
  2173. except AttributeError:
  2174. self.have_fit = False
  2175. self.fit_model(x_data)
  2176. max_ = self.max_
  2177. min_ = self.max_
  2178. x_predict = 1 / 2 + (1 / 2) * np.sin(
  2179. np.pi / (max_ - min_) * (x_data - (max_ - min_) / 2)
  2180. )
  2181. self.y_testdata = x_predict.copy()
  2182. self.have_predict = True
  2183. return x_predict, "模糊量化标准化"
  2184. def data_visualization(self, save_dir, *args, **kwargs):
  2185. tab = Tab()
  2186. y_data = self.y_traindata
  2187. x_data = self.x_traindata
  2188. max_ = self.max_
  2189. min_ = self.max_
  2190. MultiMap.conversion_control(y_data, x_data, tab)
  2191. tab.add(MakePyecharts.make_tab(
  2192. heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  2193. save = save_dir + rf"{os.sep}模糊量化标准化.HTML"
  2194. tab.render(save) # 生成HTML
  2195. return save,
  2196. class RegularizationModel(Unsupervised): # 正则化
  2197. def __init__(self, args_use, *args, **kwargs):
  2198. super(RegularizationModel, self).__init__(*args, **kwargs)
  2199. self.model = Normalizer(norm=args_use["norm"])
  2200. self.k = {"norm": args_use["norm"]}
  2201. self.model_Name = "Regularization"
  2202. def data_visualization(self, save_dir, *args, **kwargs):
  2203. tab = Tab()
  2204. y_data = self.y_testdata.copy()
  2205. x_data = self.x_testdata.copy()
  2206. MultiMap.conversion_control(y_data, x_data, tab)
  2207. save = save_dir + rf"{os.sep}正则化.HTML"
  2208. tab.render(save) # 生成HTML
  2209. return save,
  2210. # 离散数据
  2211. class BinarizerModel(Unsupervised): # 二值化
  2212. def __init__(self, args_use, *args, **kwargs):
  2213. super(BinarizerModel, self).__init__(*args, **kwargs)
  2214. self.model = Binarizer(threshold=args_use["threshold"])
  2215. self.k = {}
  2216. self.model_Name = "Binarizer"
  2217. def data_visualization(self, save_dir, *args, **kwargs):
  2218. tab = Tab()
  2219. y_data = self.y_testdata
  2220. x_data = self.x_testdata
  2221. get_y = TrainingVisualization.discrete_training_visualization_no_class_more(
  2222. y_data, "转换数据") # 转换
  2223. for i in range(len(get_y)):
  2224. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2225. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2226. tab.add(MakePyecharts.make_tab(heard, x_data.tolist()), f"原数据")
  2227. tab.add(MakePyecharts.make_tab(heard, y_data.tolist()), f"编码数据")
  2228. tab.add(
  2229. MakePyecharts.make_tab(
  2230. heard, np.dstack(
  2231. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2232. save = save_dir + rf"{os.sep}二值离散化.HTML"
  2233. tab.render(save) # 生成HTML
  2234. return save,
  2235. class DiscretizationModel(PrepBase): # n值离散
  2236. def __init__(self, args_use, *args, **kwargs):
  2237. super(DiscretizationModel, self).__init__(*args, **kwargs)
  2238. self.model = None
  2239. range_ = args_use["split_range"]
  2240. if not range_:
  2241. raise Exception
  2242. elif len(range_) == 1:
  2243. range_.append(range_[0])
  2244. self.range = range_
  2245. self.k = {}
  2246. self.model_Name = "Discretization"
  2247. def fit_model(self, *args, **kwargs):
  2248. # t值在模型创建时已经保存
  2249. self.have_fit = True
  2250. return "None", "None"
  2251. def predict(self, x_data, *args, **kwargs):
  2252. self.x_testdata = x_data.copy()
  2253. x_predict = x_data.copy() # 复制
  2254. range_ = self.range
  2255. bool_list = []
  2256. max_ = len(range_) - 1
  2257. o_t = None
  2258. for i in range(len(range_)):
  2259. try:
  2260. t = float(range_[i])
  2261. except ValueError:
  2262. continue
  2263. if o_t is None: # 第一个参数
  2264. bool_list.append(x_predict <= t)
  2265. else:
  2266. bool_list.append((o_t <= x_predict) == (x_predict < t))
  2267. if i == max_:
  2268. bool_list.append(t <= x_predict)
  2269. o_t = t
  2270. for i in range(len(bool_list)):
  2271. x_predict[bool_list[i]] = i
  2272. self.y_testdata = x_predict.copy()
  2273. self.have_predict = True
  2274. return x_predict, f"{len(bool_list)}值离散化"
  2275. def data_visualization(self, save_dir, *args, **kwargs):
  2276. tab = Tab()
  2277. y_data = self.y_testdata
  2278. x_data = self.x_testdata
  2279. get_y = TrainingVisualization.discrete_training_visualization_no_class_more(
  2280. y_data, "转换数据") # 转换
  2281. for i in range(len(get_y)):
  2282. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2283. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2284. tab.add(MakePyecharts.make_tab(heard, x_data.tolist()), f"原数据")
  2285. tab.add(MakePyecharts.make_tab(heard, y_data.tolist()), f"编码数据")
  2286. tab.add(
  2287. MakePyecharts.make_tab(
  2288. heard, np.dstack(
  2289. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2290. save = save_dir + rf"{os.sep}多值离散化.HTML"
  2291. tab.render(save) # 生成HTML
  2292. return save,
  2293. class LabelModel(PrepBase): # 数字编码
  2294. def __init__(self, *args, **kwargs):
  2295. super(LabelModel, self).__init__(*args, **kwargs)
  2296. self.model = []
  2297. self.k = {}
  2298. self.model_Name = "LabelEncoder"
  2299. def fit_model(self, x_data, *args, **kwargs):
  2300. if not self.have_predict: # 不允许第二次训练
  2301. self.model = []
  2302. if x_data.ndim == 1:
  2303. x_data = np.array([x_data])
  2304. for i in range(x_data.shape[1]):
  2305. self.model.append(
  2306. LabelEncoder().fit(np.ravel(x_data[:, i]))
  2307. ) # 训练机器(每个特征一个学习器)
  2308. self.have_fit = True
  2309. return "None", "None"
  2310. def predict(self, x_data, *args, **kwargs):
  2311. self.x_testdata = x_data.copy()
  2312. x_predict = x_data.copy()
  2313. if x_data.ndim == 1:
  2314. x_data = np.array([x_data])
  2315. for i in range(x_data.shape[1]):
  2316. x_predict[:, i] = self.model[i].transform(x_data[:, i])
  2317. self.y_testdata = x_predict.copy()
  2318. self.have_predict = True
  2319. return x_predict, "数字编码"
  2320. def data_visualization(self, save_dir, *args, **kwargs):
  2321. tab = Tab()
  2322. x_data = self.x_testdata
  2323. y_data = self.y_testdata
  2324. get_y = TrainingVisualization.discrete_training_visualization_no_class_more(
  2325. y_data, "转换数据") # 转换
  2326. for i in range(len(get_y)):
  2327. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2328. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2329. tab.add(MakePyecharts.make_tab(heard, x_data.tolist()), f"原数据")
  2330. tab.add(MakePyecharts.make_tab(heard, y_data.tolist()), f"编码数据")
  2331. tab.add(
  2332. MakePyecharts.make_tab(
  2333. heard, np.dstack(
  2334. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2335. save = save_dir + rf"{os.sep}数字编码.HTML"
  2336. tab.render(save) # 生成HTML
  2337. return save,
  2338. class OneHotEncoderModel(PrepBase): # 独热编码
  2339. def __init__(self, args_use, *args, **kwargs):
  2340. super(OneHotEncoderModel, self).__init__(*args, **kwargs)
  2341. self.model = []
  2342. self.ndim_up = args_use["ndim_up"]
  2343. self.k = {}
  2344. self.model_Name = "OneHotEncoder"
  2345. self.OneHot_Data = None # 三维独热编码
  2346. def fit_model(self, x_data, *args, **kwargs):
  2347. if not self.have_predict: # 不允许第二次训练
  2348. if x_data.ndim == 1:
  2349. x_data = [x_data]
  2350. for i in range(x_data.shape[1]):
  2351. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2352. self.model.append(OneHotEncoder().fit(data)) # 训练机器
  2353. self.have_fit = True
  2354. return "None", "None"
  2355. def predict(self, x_data, *args, **kwargs):
  2356. self.x_testdata = x_data.copy()
  2357. x_new = []
  2358. for i in range(x_data.shape[1]):
  2359. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2360. one_hot = self.model[i].transform(data).toarray().tolist()
  2361. x_new.append(one_hot) # 添加到列表中
  2362. # 新列表的行数据是原data列数据的独热码(只需要ndim=2,暂时没想到numpy的做法)
  2363. x_new = np.array(x_new)
  2364. x_predict = []
  2365. for i in range(x_new.shape[1]):
  2366. x_predict.append(x_new[:, i])
  2367. x_predict = np.array(x_predict) # 转换回array
  2368. self.OneHot_Data = x_predict.copy() # 保存未降维数据
  2369. if not self.ndim_up: # 压缩操作
  2370. new_x_predict = []
  2371. for i in x_predict:
  2372. new_list = []
  2373. list_ = i.tolist()
  2374. for a in list_:
  2375. new_list += a
  2376. new = np.array(new_list)
  2377. new_x_predict.append(new)
  2378. self.y_testdata = np.array(new_x_predict)
  2379. return self.y_testdata.copy(), "独热编码"
  2380. self.y_testdata = self.OneHot_Data
  2381. self.have_predict = True
  2382. return x_predict, "独热编码"
  2383. def data_visualization(self, save_dir, *args, **kwargs):
  2384. tab = Tab()
  2385. y_data = self.y_testdata
  2386. x_data = self.x_testdata
  2387. oh_data = self.OneHot_Data
  2388. if not self.ndim_up:
  2389. get_y = TrainingVisualization.discrete_training_visualization_no_class_more(
  2390. y_data, "转换数据") # 转换
  2391. for i in range(len(get_y)):
  2392. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2393. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2394. tab.add(MakePyecharts.make_tab(heard, x_data.tolist()), f"原数据")
  2395. tab.add(MakePyecharts.make_tab(heard, oh_data.tolist()), f"编码数据")
  2396. tab.add(
  2397. MakePyecharts.make_tab(
  2398. heard, np.dstack(
  2399. (oh_data, x_data)).tolist()), f"合成[原数据,编码]数据")
  2400. tab.add(MakePyecharts.make_tab([f"编码:{i}" for i in range(
  2401. len(y_data[0]))], y_data.tolist()), f"数据")
  2402. save = save_dir + rf"{os.sep}独热编码.HTML"
  2403. tab.render(save) # 生成HTML
  2404. return save,
  2405. class MissedModel(Unsupervised): # 缺失数据补充
  2406. def __init__(self, args_use, *args, **kwargs):
  2407. super(MissedModel, self).__init__(*args, **kwargs)
  2408. self.model = SimpleImputer(
  2409. missing_values=args_use["miss_value"],
  2410. strategy=args_use["fill_method"],
  2411. fill_value=args_use["fill_value"],
  2412. )
  2413. self.k = {}
  2414. self.model_Name = "Missed"
  2415. def predict(self, x_data, *args, **kwargs):
  2416. self.x_testdata = x_data.copy()
  2417. x_predict = self.model.transform(x_data)
  2418. self.y_testdata = x_predict.copy()
  2419. self.have_predict = True
  2420. return x_predict, "填充缺失"
  2421. def data_visualization(self, save_dir, *args, **kwargs):
  2422. tab = Tab()
  2423. y_data = self.y_testdata
  2424. x_data = self.x_testdata
  2425. statistics = self.model.statistics_.tolist()
  2426. MultiMap.conversion_control(y_data, x_data, tab)
  2427. tab.add(MakePyecharts.make_tab([f"特征[{i}]" for i in range(
  2428. len(statistics))], [statistics]), "填充值")
  2429. save = save_dir + rf"{os.sep}缺失数据填充.HTML"
  2430. tab.render(save) # 生成HTML
  2431. return save,
  2432. @plugin_class_loading(get_path(r"template/machinelearning"))
  2433. class PcaModel(Unsupervised):
  2434. def __init__(self, args_use, *args, **kwargs):
  2435. super(PcaModel, self).__init__(*args, **kwargs)
  2436. self.model = PCA(
  2437. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2438. )
  2439. self.whiten = args_use["white_PCA"]
  2440. self.n_components = args_use["n_components"]
  2441. self.k = {
  2442. "n_components": args_use["n_components"],
  2443. "whiten": args_use["white_PCA"],
  2444. }
  2445. self.model_Name = "PCA"
  2446. def predict(self, x_data, *args, **kwargs):
  2447. self.x_testdata = x_data.copy()
  2448. x_predict = self.model.transform(x_data)
  2449. self.y_testdata = x_predict.copy()
  2450. self.have_predict = True
  2451. return x_predict, "PCA"
  2452. def data_visualization(self, save_dir, *args, **kwargs):
  2453. tab = Tab()
  2454. y_data = self.y_testdata
  2455. importance = self.model.components_.tolist()
  2456. var = self.model.explained_variance_.tolist() # 方量差
  2457. MultiMap.conversion_separate_format(y_data, tab)
  2458. x_data = [f"第{i + 1}主成分" for i in range(len(importance))] # 主成分
  2459. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2460. value = [
  2461. (f"第{i + 1}主成分", f"特征[{j}]", importance[i][j])
  2462. for i in range(len(importance))
  2463. for j in range(len(importance[i]))
  2464. ]
  2465. c = (
  2466. HeatMap()
  2467. .add_xaxis(x_data)
  2468. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2469. .set_global_opts(
  2470. title_opts=opts.TitleOpts(title="预测热力图"),
  2471. **global_not_legend,
  2472. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2473. xaxis_opts=opts.AxisOpts(is_scale=True),
  2474. visualmap_opts=opts.VisualMapOpts(
  2475. is_show=True,
  2476. max_=int(self.model.components_.max()) + 1,
  2477. min_=int(self.model.components_.min()),
  2478. pos_right="3%",
  2479. ),
  2480. ) # 显示
  2481. )
  2482. tab.add(c, "成分热力图")
  2483. c = (
  2484. Bar()
  2485. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2486. .add_yaxis("方量差", var, **label_setting)
  2487. .set_global_opts(
  2488. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2489. )
  2490. )
  2491. Statistics.des_to_csv(
  2492. save_dir,
  2493. "成分重要性",
  2494. importance,
  2495. [x_data],
  2496. [y_data])
  2497. Statistics.des_to_csv(
  2498. save_dir, "方量差", [var], [
  2499. f"第[{i}]主成分" for i in range(
  2500. len(var))])
  2501. tab.add(c, "方量差柱状图")
  2502. save = save_dir + rf"{os.sep}主成分分析.HTML"
  2503. tab.render(save) # 生成HTML
  2504. return save,
  2505. @plugin_class_loading(get_path(r"template/machinelearning"))
  2506. class RpcaModel(Unsupervised):
  2507. def __init__(self, args_use, *args, **kwargs):
  2508. super(RpcaModel, self).__init__(*args, **kwargs)
  2509. self.model = IncrementalPCA(
  2510. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2511. )
  2512. self.n_components = args_use["n_components"]
  2513. self.whiten = args_use["white_PCA"]
  2514. self.k = {
  2515. "n_components": args_use["n_components"],
  2516. "whiten": args_use["white_PCA"],
  2517. }
  2518. self.model_Name = "RPCA"
  2519. def predict(self, x_data, *args, **kwargs):
  2520. self.x_testdata = x_data.copy()
  2521. x_predict = self.model.transform(x_data)
  2522. self.y_testdata = x_predict.copy()
  2523. self.have_predict = True
  2524. return x_predict, "RPCA"
  2525. def data_visualization(self, save_dir, *args, **kwargs):
  2526. tab = Tab()
  2527. y_data = self.y_traindata
  2528. importance = self.model.components_.tolist()
  2529. var = self.model.explained_variance_.tolist() # 方量差
  2530. MultiMap.conversion_separate_format(y_data, tab)
  2531. x_data = [f"第{i + 1}主成分" for i in range(len(importance))] # 主成分
  2532. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2533. value = [
  2534. (f"第{i + 1}主成分", f"特征[{j}]", importance[i][j])
  2535. for i in range(len(importance))
  2536. for j in range(len(importance[i]))
  2537. ]
  2538. c = (
  2539. HeatMap()
  2540. .add_xaxis(x_data)
  2541. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2542. .set_global_opts(
  2543. title_opts=opts.TitleOpts(title="预测热力图"),
  2544. **global_not_legend,
  2545. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2546. xaxis_opts=opts.AxisOpts(is_scale=True),
  2547. visualmap_opts=opts.VisualMapOpts(
  2548. is_show=True,
  2549. max_=int(self.model.components_.max()) + 1,
  2550. min_=int(self.model.components_.min()),
  2551. pos_right="3%",
  2552. ),
  2553. ) # 显示
  2554. )
  2555. tab.add(c, "成分热力图")
  2556. c = (
  2557. Bar()
  2558. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2559. .add_yaxis("放量差", var, **label_setting)
  2560. .set_global_opts(
  2561. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2562. )
  2563. )
  2564. tab.add(c, "方量差柱状图")
  2565. Statistics.des_to_csv(
  2566. save_dir,
  2567. "成分重要性",
  2568. importance,
  2569. [x_data],
  2570. [y_data])
  2571. Statistics.des_to_csv(
  2572. save_dir, "方量差", [var], [
  2573. f"第[{i}]主成分" for i in range(
  2574. len(var))])
  2575. save = save_dir + rf"{os.sep}RPCA(主成分分析).HTML"
  2576. tab.render(save) # 生成HTML
  2577. return save,
  2578. @plugin_class_loading(get_path(r"template/machinelearning"))
  2579. class KpcaModel(Unsupervised):
  2580. def __init__(self, args_use, *args, **kwargs):
  2581. super(KpcaModel, self).__init__(*args, **kwargs)
  2582. self.model = KernelPCA(
  2583. n_components=args_use["n_components"], kernel=args_use["kernel"]
  2584. )
  2585. self.n_components = args_use["n_components"]
  2586. self.kernel = args_use["kernel"]
  2587. self.k = {
  2588. "n_components": args_use["n_components"],
  2589. "kernel": args_use["kernel"],
  2590. }
  2591. self.model_Name = "KPCA"
  2592. def predict(self, x_data, *args, **kwargs):
  2593. self.x_testdata = x_data.copy()
  2594. x_predict = self.model.transform(x_data)
  2595. self.y_testdata = x_predict.copy()
  2596. self.have_predict = True
  2597. return x_predict, "KPCA"
  2598. def data_visualization(self, save_dir, *args, **kwargs):
  2599. tab = Tab()
  2600. y_data = self.y_testdata
  2601. MultiMap.conversion_separate_format(y_data, tab)
  2602. save = save_dir + rf"{os.sep}KPCA(主成分分析).HTML"
  2603. tab.render(save) # 生成HTML
  2604. return save,
  2605. class LdaModel(PrepBase): # 有监督学习
  2606. def __init__(self, args_use, *args, **kwargs):
  2607. super(LdaModel, self).__init__(*args, **kwargs)
  2608. self.model = Lda(n_components=args_use["n_components"])
  2609. self.n_components = args_use["n_components"]
  2610. self.k = {"n_components": args_use["n_components"]}
  2611. self.model_Name = "LDA"
  2612. def predict(self, x_data, *args, **kwargs):
  2613. self.x_testdata = x_data.copy()
  2614. x_predict = self.model.transform(x_data)
  2615. self.y_testdata = x_predict.copy()
  2616. self.have_predict = True
  2617. return x_predict, "LDA"
  2618. def data_visualization(self, save_dir, *args, **kwargs):
  2619. tab = Tab()
  2620. x_data = self.x_testdata
  2621. y_data = self.y_testdata
  2622. MultiMap.conversion_separate_format(y_data, tab)
  2623. w_list = self.model.coef_.tolist() # 变为表格
  2624. b = self.model.intercept_
  2625. tab = Tab()
  2626. x_means = Statistics.quick_stats(x_data).get()[0]
  2627. # 回归的y是历史遗留问题 不用分类回归:因为得不到分类数据(predict结果是降维数据不是预测数据)
  2628. get = Curve.regress_w(x_data, w_list, b, x_means.copy())
  2629. for i in range(len(get)):
  2630. tab.add(get[i].overlap(get[i]), f"类别:{i}LDA映射曲线")
  2631. save = save_dir + rf"{os.sep}render.HTML"
  2632. tab.render(save) # 生成HTML
  2633. return save,
  2634. @plugin_class_loading(get_path(r"template/machinelearning"))
  2635. class NmfModel(Unsupervised):
  2636. def __init__(self, args_use, *args, **kwargs):
  2637. super(NmfModel, self).__init__(*args, **kwargs)
  2638. self.model = NMF(n_components=args_use["n_components"])
  2639. self.n_components = args_use["n_components"]
  2640. self.k = {"n_components": args_use["n_components"]}
  2641. self.model_Name = "NFM"
  2642. self.h_testdata = None
  2643. # x_traindata保存的是W,h_traindata和y_traindata是后来数据
  2644. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  2645. self.x_testdata = x_data.copy()
  2646. x_predict = self.model.transform(x_data)
  2647. self.y_testdata = x_predict.copy()
  2648. self.h_testdata = self.model.components_
  2649. if add_func is not None and x_name != "":
  2650. add_func(self.h_testdata, f"{x_name}:V->NMF[H]")
  2651. self.have_predict = True
  2652. return x_predict, "V->NMF[W]"
  2653. def data_visualization(self, save_dir, *args, **kwargs):
  2654. tab = Tab()
  2655. y_data = self.y_testdata
  2656. x_data = self.x_testdata
  2657. h_data = self.h_testdata
  2658. MultiMap.conversion_separate_wh(y_data, h_data, tab)
  2659. wh_data = np.matmul(y_data, h_data)
  2660. difference_data = x_data - wh_data
  2661. def make_heat_map(data, name, data_max, data_min):
  2662. x = [f"数据[{i}]" for i in range(len(data))] # 主成分
  2663. y = [f"特征[{i}]" for i in range(len(data[0]))] # 主成分
  2664. value = [
  2665. (f"数据[{i}]", f"特征[{j}]", float(data[i][j]))
  2666. for i in range(len(data))
  2667. for j in range(len(data[i]))
  2668. ]
  2669. c = (
  2670. HeatMap()
  2671. .add_xaxis(x)
  2672. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2673. .set_global_opts(
  2674. title_opts=opts.TitleOpts(title="原始数据热力图"),
  2675. **global_not_legend,
  2676. yaxis_opts=opts.AxisOpts(
  2677. is_scale=True, type_="category"
  2678. ), # 'category'
  2679. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2680. visualmap_opts=opts.VisualMapOpts(
  2681. is_show=True, max_=data_max, min_=data_min, pos_right="3%"
  2682. ),
  2683. ) # 显示
  2684. )
  2685. tab.add(c, name)
  2686. max_ = (max(int(x_data.max()), int(wh_data.max()),
  2687. int(difference_data.max())) + 1)
  2688. min_ = min(int(x_data.min()), int(wh_data.min()),
  2689. int(difference_data.min()))
  2690. make_heat_map(x_data, "原始数据热力图", max_, min_)
  2691. make_heat_map(wh_data, "W * H数据热力图", max_, min_)
  2692. make_heat_map(difference_data, "数据差热力图", max_, min_)
  2693. Statistics.des_to_csv(save_dir, "权重矩阵", y_data)
  2694. Statistics.des_to_csv(save_dir, "系数矩阵", h_data)
  2695. Statistics.des_to_csv(save_dir, "系数*权重矩阵", wh_data)
  2696. save = save_dir + rf"{os.sep}非负矩阵分解.HTML"
  2697. tab.render(save) # 生成HTML
  2698. return save,
  2699. @plugin_class_loading(get_path(r"template/machinelearning"))
  2700. class TsneModel(Unsupervised):
  2701. def __init__(self, args_use, *args, **kwargs):
  2702. super(TsneModel, self).__init__(*args, **kwargs)
  2703. self.model = TSNE(n_components=args_use["n_components"])
  2704. self.n_components = args_use["n_components"]
  2705. self.k = {"n_components": args_use["n_components"]}
  2706. self.model_Name = "t-SNE"
  2707. def fit_model(self, *args, **kwargs):
  2708. self.have_fit = True
  2709. return "None", "None"
  2710. def predict(self, x_data, *args, **kwargs):
  2711. self.x_testdata = x_data.copy()
  2712. x_predict = self.model.fit_transform(x_data)
  2713. self.y_testdata = x_predict.copy()
  2714. self.have_predict = True
  2715. return x_predict, "SNE"
  2716. def data_visualization(self, save_dir, *args, **kwargs):
  2717. tab = Tab()
  2718. y_data = self.y_testdata
  2719. MultiMap.conversion_separate_format(y_data, tab)
  2720. save = save_dir + rf"{os.sep}T-SNE.HTML"
  2721. tab.render(save) # 生成HTML
  2722. return save,
  2723. class MlpModel(StudyMachinebase): # 神经网络(多层感知机),有监督学习
  2724. def __init__(self, args_use, model, *args, **kwargs):
  2725. super(MlpModel, self).__init__(*args, **kwargs)
  2726. all_model = {"MLP": MLPRegressor, "MLP_class": MLPClassifier}[model]
  2727. self.model = all_model(
  2728. hidden_layer_sizes=args_use["hidden_size"],
  2729. activation=args_use["activation"],
  2730. solver=args_use["solver"],
  2731. alpha=args_use["alpha"],
  2732. max_iter=args_use["max_iter"],
  2733. )
  2734. # 记录这两个是为了克隆
  2735. self.hidden_layer_sizes = args_use["hidden_size"]
  2736. self.activation = args_use["activation"]
  2737. self.max_iter = args_use["max_iter"]
  2738. self.solver = args_use["solver"]
  2739. self.alpha = args_use["alpha"]
  2740. self.k = {
  2741. "hidden_layer_sizes": args_use["hidden_size"],
  2742. "activation": args_use["activation"],
  2743. "max_iter": args_use["max_iter"],
  2744. "solver": args_use["solver"],
  2745. "alpha": args_use["alpha"],
  2746. }
  2747. self.model_Name = model
  2748. def data_visualization(self, save_dir, *args, **kwargs):
  2749. tab = Tab()
  2750. x_data = self.x_testdata
  2751. y_data = self.y_testdata
  2752. coefs = self.model.coefs_
  2753. class_ = self.model.classes_
  2754. n_layers_ = self.model.n_layers_
  2755. def make_heat_map(data_, name):
  2756. x = [f"特征(节点)[{i}]" for i in range(len(data_))]
  2757. y = [f"节点[{i}]" for i in range(len(data_[0]))]
  2758. value = [
  2759. (f"特征(节点)[{i}]", f"节点[{j}]", float(data_[i][j]))
  2760. for i in range(len(data_))
  2761. for j in range(len(data_[i]))
  2762. ]
  2763. c = (
  2764. HeatMap()
  2765. .add_xaxis(x)
  2766. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2767. .set_global_opts(
  2768. title_opts=opts.TitleOpts(title=name),
  2769. **global_not_legend,
  2770. yaxis_opts=opts.AxisOpts(
  2771. is_scale=True, type_="category"
  2772. ), # 'category'
  2773. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2774. visualmap_opts=opts.VisualMapOpts(
  2775. is_show=True,
  2776. max_=float(data_.max()),
  2777. min_=float(data_.min()),
  2778. pos_right="3%",
  2779. ),
  2780. ) # 显示
  2781. )
  2782. tab.add(c, name)
  2783. tab.add(
  2784. MakePyecharts.make_tab(
  2785. x,
  2786. data_.transpose().tolist()),
  2787. f"{name}:表格")
  2788. Statistics.des_to_csv(
  2789. save_dir,
  2790. f"{name}:表格",
  2791. data_.transpose().tolist(),
  2792. x,
  2793. y)
  2794. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  2795. x_data, y_data)
  2796. for i in range(len(get)):
  2797. tab.add(get[i], f"{i}训练数据散点图")
  2798. get = Boundary.prediction_boundary(
  2799. x_range, x_means, self.predict, data_type)
  2800. for i in range(len(get)):
  2801. tab.add(get[i], f"{i}预测热力图")
  2802. heard = ["神经网络层数"]
  2803. data = [n_layers_]
  2804. for i in range(len(coefs)):
  2805. make_heat_map(coefs[i], f"{i}层权重矩阵")
  2806. heard.append(f"第{i}层节点数")
  2807. data.append(len(coefs[i][0]))
  2808. if self.model_Name == "MLP_class":
  2809. heard += [f"[{i}]类型" for i in range(len(class_))]
  2810. data += class_.tolist()
  2811. tab.add(MakePyecharts.make_tab(heard, [data]), "数据表")
  2812. save = save_dir + rf"{os.sep}多层感知机.HTML"
  2813. tab.render(save) # 生成HTML
  2814. return save,
  2815. @plugin_class_loading(get_path(r"template/machinelearning"))
  2816. class KmeansModel(UnsupervisedModel):
  2817. def __init__(self, args_use, *args, **kwargs):
  2818. super(KmeansModel, self).__init__(*args, **kwargs)
  2819. self.model = KMeans(n_clusters=args_use["n_clusters"])
  2820. self.class_ = []
  2821. self.n_clusters = args_use["n_clusters"]
  2822. self.k = {"n_clusters": args_use["n_clusters"]}
  2823. self.model_Name = "k-means"
  2824. def fit_model(self, x_data, *args, **kwargs):
  2825. return_ = super().fit_model(x_data, *args, **kwargs)
  2826. self.class_ = list(set(self.model.labels_.tolist()))
  2827. self.have_fit = True
  2828. return return_
  2829. def predict(self, x_data, *args, **kwargs):
  2830. self.x_testdata = x_data.copy()
  2831. y_predict = self.model.predict(x_data)
  2832. self.y_testdata = y_predict.copy()
  2833. self.have_predict = True
  2834. return y_predict, "k-means"
  2835. def data_visualization(self, save_dir, *args, **kwargs):
  2836. tab = Tab()
  2837. y = self.y_testdata
  2838. x_data = self.x_testdata
  2839. class_ = self.class_
  2840. center = self.model.cluster_centers_
  2841. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2842. func = (
  2843. TrainingVisualization.training_visualization_more
  2844. if more_global
  2845. else TrainingVisualization.training_visualization_center
  2846. )
  2847. get, x_means, x_range, data_type = func(x_data, class_, y, center)
  2848. for i in range(len(get)):
  2849. tab.add(get[i], f"{i}数据散点图")
  2850. get = Boundary.decision_boundary(
  2851. x_range,
  2852. x_means,
  2853. self.predict,
  2854. class_,
  2855. data_type)
  2856. for i in range(len(get)):
  2857. tab.add(get[i], f"{i}预测热力图")
  2858. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2859. data = class_ + [f"{i}" for i in x_means]
  2860. c = Table().add(headers=heard, rows=[data])
  2861. tab.add(c, "数据表")
  2862. Statistics.des_to_csv(
  2863. save_dir,
  2864. "预测表",
  2865. [[f"{i}" for i in x_means]],
  2866. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2867. )
  2868. save = save_dir + rf"{os.sep}k-means聚类.HTML"
  2869. tab.render(save) # 生成HTML
  2870. return save,
  2871. @plugin_class_loading(get_path(r"template/machinelearning"))
  2872. class AgglomerativeModel(UnsupervisedModel):
  2873. def __init__(self, args_use, *args, **kwargs):
  2874. super(AgglomerativeModel, self).__init__(*args, **kwargs)
  2875. self.model = AgglomerativeClustering(
  2876. n_clusters=args_use["n_clusters"]
  2877. ) # 默认为2,不同于k-means
  2878. self.class_ = []
  2879. self.n_clusters = args_use["n_clusters"]
  2880. self.k = {"n_clusters": args_use["n_clusters"]}
  2881. self.model_Name = "Agglomerative"
  2882. def fit_model(self, x_data, *args, **kwargs):
  2883. return_ = super().fit_model(x_data, *args, **kwargs)
  2884. self.class_ = list(set(self.model.labels_.tolist()))
  2885. self.have_fit = True
  2886. return return_
  2887. def predict(self, x_data, *args, **kwargs):
  2888. self.x_testdata = x_data.copy()
  2889. y_predict = self.model.fit_predict(x_data)
  2890. self.y_traindata = y_predict.copy()
  2891. self.have_predict = True
  2892. return y_predict, "Agglomerative"
  2893. def data_visualization(self, save_dir, *args, **kwargs):
  2894. tab = Tab()
  2895. y = self.y_testdata
  2896. x_data = self.x_testdata
  2897. class_ = self.class_
  2898. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2899. func = (
  2900. TrainingVisualization.training_visualization_more_no_center
  2901. if more_global
  2902. else TrainingVisualization.training_visualization
  2903. )
  2904. get, x_means, x_range, data_type = func(x_data, class_, y)
  2905. for i in range(len(get)):
  2906. tab.add(get[i], f"{i}训练数据散点图")
  2907. get = Boundary.decision_boundary(
  2908. x_range,
  2909. x_means,
  2910. self.predict,
  2911. class_,
  2912. data_type)
  2913. for i in range(len(get)):
  2914. tab.add(get[i], f"{i}预测热力图")
  2915. linkage_array = ward(self.x_traindata) # self.y_traindata是结果
  2916. dendrogram(linkage_array)
  2917. plt.savefig(save_dir + rf"{os.sep}Cluster_graph.png")
  2918. image = Image()
  2919. image.add(
  2920. src=save_dir +
  2921. rf"{os.sep}Cluster_graph.png",
  2922. ).set_global_opts(
  2923. title_opts=opts.ComponentTitleOpts(
  2924. title="聚类树状图"))
  2925. tab.add(image, "聚类树状图")
  2926. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2927. data = class_ + [f"{i}" for i in x_means]
  2928. c = Table().add(headers=heard, rows=[data])
  2929. tab.add(c, "数据表")
  2930. Statistics.des_to_csv(
  2931. save_dir,
  2932. "预测表",
  2933. [[f"{i}" for i in x_means]],
  2934. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2935. )
  2936. save = save_dir + rf"{os.sep}层次聚类.HTML"
  2937. tab.render(save) # 生成HTML
  2938. return save,
  2939. @plugin_class_loading(get_path(r"template/machinelearning"))
  2940. class DbscanModel(UnsupervisedModel):
  2941. def __init__(self, args_use, *args, **kwargs):
  2942. super(DbscanModel, self).__init__(*args, **kwargs)
  2943. self.model = DBSCAN(
  2944. eps=args_use["eps"],
  2945. min_samples=args_use["min_samples"])
  2946. # eps是距离(0.5),min_samples(5)是簇与噪音分界线(每个簇最小元素数)
  2947. # min_samples
  2948. self.eps = args_use["eps"]
  2949. self.min_samples = args_use["min_samples"]
  2950. self.k = {
  2951. "min_samples": args_use["min_samples"],
  2952. "eps": args_use["eps"]}
  2953. self.class_ = []
  2954. self.model_Name = "DBSCAN"
  2955. def fit_model(self, x_data, *args, **kwargs):
  2956. return_ = super().fit_model(x_data, *args, **kwargs)
  2957. self.class_ = list(set(self.model.labels_.tolist()))
  2958. self.have_fit = True
  2959. return return_
  2960. def predict(self, x_data, *args, **kwargs):
  2961. self.x_testdata = x_data.copy()
  2962. y_predict = self.model.fit_predict(x_data)
  2963. self.y_testdata = y_predict.copy()
  2964. self.have_predict = True
  2965. return y_predict, "DBSCAN"
  2966. def data_visualization(self, save_dir, *args, **kwargs):
  2967. # DBSCAN没有预测的必要
  2968. tab = Tab()
  2969. y = self.y_testdata.copy()
  2970. x_data = self.x_testdata.copy()
  2971. class_ = self.class_
  2972. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2973. func = (
  2974. TrainingVisualization.training_visualization_more_no_center
  2975. if more_global
  2976. else TrainingVisualization.training_visualization
  2977. )
  2978. get, x_means, x_range, data_type = func(x_data, class_, y)
  2979. for i in range(len(get)):
  2980. tab.add(get[i], f"{i}训练数据散点图")
  2981. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2982. data = class_ + [f"{i}" for i in x_means]
  2983. c = Table().add(headers=heard, rows=[data])
  2984. tab.add(c, "数据表")
  2985. Statistics.des_to_csv(
  2986. save_dir,
  2987. "预测表",
  2988. [[f"{i}" for i in x_means]],
  2989. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2990. )
  2991. save = save_dir + rf"{os.sep}密度聚类.HTML"
  2992. tab.render(save) # 生成HTML
  2993. return save,
  2994. class FastFourier(StudyMachinebase): # 快速傅里叶变换
  2995. def __init__(self, *args, **kwargs):
  2996. super(FastFourier, self).__init__(*args, **kwargs)
  2997. self.model = None
  2998. self.fourier = None # fft复数
  2999. self.frequency = None # 频率range
  3000. self.angular_Frequency = None # 角频率range
  3001. self.phase = None # 相位range
  3002. self.breadth = None # 震幅range
  3003. self.sample_size = None # 样本数
  3004. def fit_model(self, y_data, *args, **kwargs):
  3005. y_data = y_data.ravel() # 扯平为一维数组
  3006. try:
  3007. assert self.y_traindata is not None
  3008. self.y_traindata = np.hstack((y_data, self.x_traindata))
  3009. except (AssertionError, ValueError):
  3010. self.y_traindata = y_data.copy()
  3011. fourier = fft(y_data)
  3012. self.sample_size = len(y_data)
  3013. self.frequency = np.linspace(0, 1, self.sample_size) # 频率N_range
  3014. self.angular_Frequency = self.frequency / (np.pi * 2) # 角频率w
  3015. self.phase = np.angle(fourier)
  3016. self.breadth = np.abs(fourier)
  3017. self.fourier = fourier
  3018. self.have_fit = True
  3019. return "None", "None"
  3020. def predict(self, x_data, *args, **kwargs):
  3021. return np.array([]), ""
  3022. def data_visualization(self, save_dir, *args, **kwargs):
  3023. # DBSCAN没有预测的必要
  3024. tab = Tab()
  3025. y = self.y_traindata.copy()
  3026. n = self.sample_size
  3027. phase = self.phase # 相位range
  3028. breadth = self.breadth # 震幅range
  3029. normalization_breadth = breadth / n
  3030. def line(name, value, s=slice(0, None)) -> Line:
  3031. c = (
  3032. Line()
  3033. .add_xaxis(self.frequency[s].tolist())
  3034. .add_yaxis(
  3035. "",
  3036. value,
  3037. **label_setting,
  3038. symbol="none" if self.sample_size >= 500 else None,
  3039. )
  3040. .set_global_opts(
  3041. title_opts=opts.TitleOpts(title=name),
  3042. **global_not_legend,
  3043. xaxis_opts=opts.AxisOpts(type_="value"),
  3044. yaxis_opts=opts.AxisOpts(type_="value"),
  3045. )
  3046. )
  3047. return c
  3048. tab.add(line("原始数据", y.tolist()), "原始数据")
  3049. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  3050. tab.add(
  3051. line(
  3052. "双边振幅谱(归一化)",
  3053. normalization_breadth.tolist()),
  3054. "双边振幅谱(归一化)")
  3055. tab.add(
  3056. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3057. )
  3058. tab.add(
  3059. line(
  3060. "单边相位谱(归一化)",
  3061. normalization_breadth[: int(n / 2)].tolist(),
  3062. slice(0, int(n / 2)),
  3063. ),
  3064. "单边相位谱(归一化)",
  3065. )
  3066. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  3067. tab.add(
  3068. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3069. )
  3070. tab.add(
  3071. MakePyecharts.make_tab(
  3072. self.frequency.tolist(), [
  3073. breadth.tolist()]), "双边振幅谱")
  3074. tab.add(
  3075. MakePyecharts.make_tab(
  3076. self.frequency.tolist(), [
  3077. phase.tolist()]), "双边相位谱")
  3078. tab.add(
  3079. MakePyecharts.make_tab(
  3080. self.frequency.tolist(), [
  3081. self.fourier.tolist()]), "快速傅里叶变换")
  3082. save = save_dir + rf"{os.sep}快速傅里叶.HTML"
  3083. tab.render(save) # 生成HTML
  3084. return save,
  3085. class ReverseFastFourier(StudyMachinebase): # 快速傅里叶变换
  3086. def __init__(self, *args, **kwargs):
  3087. super(ReverseFastFourier, self).__init__(*args, **kwargs)
  3088. self.model = None
  3089. self.sample_size = None
  3090. self.y_testdata_real = None
  3091. self.phase = None
  3092. self.breadth = None
  3093. def fit_model(self, y_data, *args, **kwargs):
  3094. return "None", "None"
  3095. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  3096. self.x_testdata = x_data.ravel().astype(np.complex_)
  3097. fourier = ifft(self.x_testdata)
  3098. self.y_testdata = fourier.copy()
  3099. self.y_testdata_real = np.real(fourier)
  3100. self.sample_size = len(self.y_testdata_real)
  3101. self.phase = np.angle(self.x_testdata)
  3102. self.breadth = np.abs(self.x_testdata)
  3103. add_func(self.y_testdata_real.copy(), f"{x_name}:逆向快速傅里叶变换[实数]")
  3104. return fourier, "逆向快速傅里叶变换"
  3105. def data_visualization(self, save_dir, *args, **kwargs):
  3106. # DBSCAN没有预测的必要
  3107. tab = Tab()
  3108. y = self.y_testdata_real.copy()
  3109. y_data = self.y_testdata.copy()
  3110. n = self.sample_size
  3111. range_n: list = np.linspace(0, 1, n).tolist()
  3112. phase = self.phase # 相位range
  3113. breadth = self.breadth # 震幅range
  3114. def line(name, value, s=slice(0, None)) -> Line:
  3115. c = (
  3116. Line().add_xaxis(
  3117. range_n[s]).add_yaxis(
  3118. "",
  3119. value,
  3120. **label_setting,
  3121. symbol="none" if n >= 500 else None).set_global_opts(
  3122. title_opts=opts.TitleOpts(
  3123. title=name),
  3124. **global_not_legend,
  3125. xaxis_opts=opts.AxisOpts(
  3126. type_="value"),
  3127. yaxis_opts=opts.AxisOpts(
  3128. type_="value"),
  3129. ))
  3130. return c
  3131. tab.add(line("逆向傅里叶变换", y.tolist()), "逆向傅里叶变换[实数]")
  3132. tab.add(
  3133. MakePyecharts.make_tab(
  3134. range_n, [
  3135. y_data.tolist()]), "逆向傅里叶变换数据")
  3136. tab.add(MakePyecharts.make_tab(range_n, [y.tolist()]), "逆向傅里叶变换数据[实数]")
  3137. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  3138. tab.add(
  3139. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3140. )
  3141. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  3142. tab.add(
  3143. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3144. )
  3145. save = save_dir + rf"{os.sep}快速傅里叶.HTML"
  3146. tab.render(save) # 生成HTML
  3147. return save,
  3148. class ReverseFastFourierTwonumpy(ReverseFastFourier): # 2快速傅里叶变换
  3149. def fit_model(
  3150. self,
  3151. x_data,
  3152. y_data=None,
  3153. x_name="",
  3154. add_func=None,
  3155. *args,
  3156. **kwargs):
  3157. r = np.multiply(np.cos(x_data), y_data)
  3158. j = np.multiply(np.sin(x_data), y_data) * 1j
  3159. super(ReverseFastFourierTwonumpy, self).predict(
  3160. r + j, x_name=x_name, add_func=add_func, *args, **kwargs
  3161. )
  3162. return "None", "None"
  3163. class CurveFitting(StudyMachinebase): # 曲线拟合
  3164. def __init__(self, name, str_, model, *args, **kwargs):
  3165. super(CurveFitting, self).__init__(*args, **kwargs)
  3166. def ndim_down(data: np.ndarray):
  3167. if data.ndim == 1:
  3168. return data
  3169. new_data = []
  3170. for i in data:
  3171. new_data.append(np.sum(i))
  3172. return np.array(new_data)
  3173. named_domain = {"np": np, "Func": model, "ndimDown": ndim_down}
  3174. protection_func = f"""
  3175. @plugin_func_loading(get_path(r'template/machinelearning'))
  3176. def FUNC({",".join(model.__code__.co_varnames)}):
  3177. answer = Func({",".join(model.__code__.co_varnames)})
  3178. return ndimDown(answer)
  3179. """
  3180. exec(protection_func, named_domain)
  3181. self.func = named_domain["FUNC"]
  3182. self.fit_data = None
  3183. self.name = name
  3184. self.func_str = str_
  3185. def fit_model(
  3186. self,
  3187. x_data: np.ndarray,
  3188. y_data: np.ndarray,
  3189. *args,
  3190. **kwargs):
  3191. y_data = y_data.ravel()
  3192. x_data = x_data.astype(np.float64)
  3193. try:
  3194. assert self.x_traindata is not None
  3195. self.x_traindata = np.vstack((x_data, self.x_traindata))
  3196. self.y_traindata = np.vstack((y_data, self.y_traindata))
  3197. except (AssertionError, ValueError):
  3198. self.x_traindata = x_data.copy()
  3199. self.y_traindata = y_data.copy()
  3200. self.fit_data = optimize.curve_fit(
  3201. self.func, self.x_traindata, self.y_traindata
  3202. )
  3203. self.model = self.fit_data[0].copy()
  3204. return "None", "None"
  3205. def predict(self, x_data, *args, **kwargs):
  3206. self.x_testdata = x_data.copy()
  3207. predict = self.func(x_data, *self.model)
  3208. y_predict = []
  3209. for i in predict:
  3210. y_predict.append(np.sum(i))
  3211. y_predict = np.array(y_predict)
  3212. self.y_testdata = y_predict.copy()
  3213. self.have_predict = True
  3214. return y_predict, self.name
  3215. def data_visualization(self, save_dir, *args, **kwargs):
  3216. # DBSCAN没有预测的必要
  3217. tab = Tab()
  3218. y = self.y_testdata.copy()
  3219. x_data = self.x_testdata.copy()
  3220. get, x_means, x_range, data_type = TrainingVisualization.regress_visualization(
  3221. x_data, y)
  3222. for i in range(len(get)):
  3223. tab.add(get[i], f"{i}预测类型图")
  3224. get = Boundary.prediction_boundary(
  3225. x_range, x_means, self.predict, data_type)
  3226. for i in range(len(get)):
  3227. tab.add(get[i], f"{i}预测热力图")
  3228. tab.add(
  3229. MakePyecharts.make_tab(
  3230. [f"普适预测第{i}特征" for i in range(len(x_means))],
  3231. [[f"{i}" for i in x_means]],
  3232. ),
  3233. "普适预测特征数据",
  3234. )
  3235. tab.add(
  3236. MakePyecharts.make_tab(
  3237. [f"参数[{i}]" for i in range(len(self.model))],
  3238. [[f"{i}" for i in self.model]],
  3239. ),
  3240. "拟合参数",
  3241. )
  3242. save = save_dir + rf"{os.sep}曲线拟合.HTML"
  3243. tab.render(save) # 生成HTML
  3244. return save,
  3245. @plugin_class_loading(get_path(r"template/machinelearning"))
  3246. class Tab(tab_First):
  3247. def __init__(self, *args, **kwargs):
  3248. super(Tab, self).__init__(*args, **kwargs)
  3249. self.element = {} # 记录tab组成元素 name:charts
  3250. def add(self, chart, tab_name):
  3251. self.element[tab_name] = chart
  3252. return super(Tab, self).add(chart, tab_name)
  3253. def render(
  3254. self,
  3255. path: str = "render.html",
  3256. template_name: str = "simple_tab.html",
  3257. *args,
  3258. **kwargs,
  3259. ) -> str:
  3260. if all_global:
  3261. render_dir = path_split(path)[0]
  3262. for i in self.element:
  3263. self.element[i].render(render_dir + os.sep + i + ".html")
  3264. return super(Tab, self).render(path, template_name, *args, **kwargs)
  3265. @plugin_class_loading(get_path(r"template/machinelearning"))
  3266. class Table(TableFisrt):
  3267. def __init__(self, *args, **kwargs):
  3268. super(Table, self).__init__(*args, **kwargs)
  3269. self.HEADERS = []
  3270. self.ROWS = [[]]
  3271. def add(self, headers, rows, attributes=None):
  3272. if len(rows) == 1:
  3273. new_headers = ["数据类型", "数据"]
  3274. new_rows = list(zip(headers, rows[0]))
  3275. self.HEADERS = new_headers
  3276. self.ROWS = new_rows
  3277. return super().add(new_headers, new_rows, attributes)
  3278. else:
  3279. self.HEADERS = headers
  3280. self.ROWS = rows
  3281. return super().add(headers, rows, attributes)
  3282. def render(self, path="render.html", *args, **kwargs, ) -> str:
  3283. if csv_global:
  3284. save_dir, name = path_split(path)
  3285. name = splitext(name)[0]
  3286. try:
  3287. DataFrame(self.ROWS, columns=self.HEADERS).to_csv(
  3288. save_dir + os.sep + name + ".csv"
  3289. )
  3290. except BaseException as e:
  3291. logging.warning(str(e))
  3292. return super().render(path, *args, **kwargs)
  3293. class DataOperations:
  3294. @staticmethod
  3295. @plugin_func_loading(get_path(r"template/machinelearning"))
  3296. def judging_digits(num: (int, float)): # 查看小数位数
  3297. a = str(abs(num)).split(".")[0]
  3298. if a == "":
  3299. raise ValueError
  3300. return len(a)
  3301. @staticmethod
  3302. @plugin_func_loading(get_path(r"template/machinelearning"))
  3303. def num_str(num, accuracy):
  3304. num = str(round(float(num), accuracy))
  3305. if len(num.replace(".", "")) == accuracy:
  3306. return num
  3307. n = num.split(".")
  3308. if len(n) == 0: # 无小数
  3309. return num + "." + "0" * (accuracy - len(num))
  3310. else:
  3311. return num + "0" * (accuracy - len(num) + 1) # len(num)多算了一位小数点
  3312. @staticmethod
  3313. @plugin_func_loading(get_path(r"template/machinelearning"))
  3314. def make_list(first, end, num=35):
  3315. n = num / (end - first)
  3316. if n == 0:
  3317. n = 1
  3318. return_ = []
  3319. n_first = first * n
  3320. n_end = end * n
  3321. while n_first <= n_end:
  3322. cul = n_first / n
  3323. return_.append(round(cul, 2))
  3324. n_first += 1
  3325. return return_
  3326. @staticmethod
  3327. @plugin_func_loading(get_path(r"template/machinelearning"))
  3328. def list_filter(original_list, num=70):
  3329. if len(original_list) <= num:
  3330. return original_list
  3331. n = int(num / len(original_list))
  3332. return_ = original_list[::n]
  3333. return return_
  3334. class Boundary:
  3335. @staticmethod
  3336. @plugin_func_loading(get_path(r"template/machinelearning"))
  3337. def prediction_boundary(x_range, x_means, predict_func, data_type): # 绘制回归型x-x热力图
  3338. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3339. # a-特征x,b-特征x-1,c-其他特征
  3340. render_list = []
  3341. if len(x_means) == 1:
  3342. return render_list
  3343. for i in range(len(x_means)):
  3344. for j in range(len(x_means)):
  3345. if j <= i:
  3346. continue
  3347. a_range = x_range[j]
  3348. a_type = data_type[j]
  3349. b_range = x_range[i]
  3350. b_type = data_type[i]
  3351. if a_type == 1:
  3352. a_list = DataOperations.make_list(
  3353. a_range[0], a_range[1], 70)
  3354. else:
  3355. a_list = DataOperations.list_filter(a_range) # 可以接受最大为70
  3356. if b_type == 1:
  3357. b_list = DataOperations.make_list(
  3358. b_range[0], b_range[1], 35)
  3359. else:
  3360. b_list = DataOperations.list_filter(b_range) # 可以接受最大为70
  3361. a = np.array([i for i in a_list for _ in b_list]).T
  3362. b = np.array([i for _ in a_list for i in b_list]).T
  3363. data = np.array([x_means for _ in a_list for i in b_list])
  3364. data[:, j] = a
  3365. data[:, i] = b
  3366. y_data = predict_func(data)[0].tolist()
  3367. value = [[float(a[i]), float(b[i]), y_data[i]]
  3368. for i in range(len(a))]
  3369. c = (
  3370. HeatMap()
  3371. .add_xaxis(np.unique(a))
  3372. # value的第一个数值是x
  3373. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3374. .set_global_opts(
  3375. title_opts=opts.TitleOpts(title="预测热力图"),
  3376. **global_not_legend,
  3377. yaxis_opts=opts.AxisOpts(
  3378. is_scale=True, type_="category"
  3379. ), # 'category'
  3380. xaxis_opts=opts.AxisOpts(
  3381. is_scale=True, type_="category"),
  3382. visualmap_opts=opts.VisualMapOpts(
  3383. is_show=True,
  3384. max_=int(max(y_data)) + 1,
  3385. min_=int(min(y_data)),
  3386. pos_right="3%",
  3387. ),
  3388. ) # 显示
  3389. )
  3390. render_list.append(c)
  3391. return render_list
  3392. @staticmethod
  3393. @plugin_func_loading(get_path(r"template/machinelearning"))
  3394. # 回归型x-x热力图(more)
  3395. def prediction_boundary_more(x_range, x_means, predict_func, data_type):
  3396. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3397. # a-特征x,b-特征x-1,c-其他特征
  3398. render_list = []
  3399. if len(x_means) == 1:
  3400. return render_list
  3401. for i in range(len(x_means)):
  3402. if i == 0:
  3403. continue
  3404. a_range = x_range[i - 1]
  3405. a_type = data_type[i - 1]
  3406. b_range = x_range[i]
  3407. b_type = data_type[i]
  3408. if a_type == 1:
  3409. a_list = DataOperations.make_list(a_range[0], a_range[1], 70)
  3410. else:
  3411. a_list = DataOperations.list_filter(a_range) # 可以接受最大为70
  3412. if b_type == 1:
  3413. b_list = DataOperations.make_list(b_range[0], b_range[1], 35)
  3414. else:
  3415. b_list = DataOperations.list_filter(b_range) # 可以接受最大为70
  3416. a = np.array([i for i in a_list for _ in b_list]).T
  3417. b = np.array([i for _ in a_list for i in b_list]).T
  3418. data = np.array([x_means for _ in a_list for i in b_list])
  3419. data[:, i - 1] = a
  3420. data[:, i] = b
  3421. y_data = predict_func(data)[0].tolist()
  3422. value = [[float(a[i]), float(b[i]), y_data[i]]
  3423. for i in range(len(a))]
  3424. c = (
  3425. HeatMap()
  3426. .add_xaxis(np.unique(a))
  3427. # value的第一个数值是x
  3428. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3429. .set_global_opts(
  3430. title_opts=opts.TitleOpts(title="预测热力图"),
  3431. **global_not_legend,
  3432. yaxis_opts=opts.AxisOpts(
  3433. is_scale=True, type_="category"), # 'category'
  3434. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3435. visualmap_opts=opts.VisualMapOpts(
  3436. is_show=True,
  3437. max_=int(max(y_data)) + 1,
  3438. min_=int(min(y_data)),
  3439. pos_right="3%",
  3440. ),
  3441. ) # 显示
  3442. )
  3443. render_list.append(c)
  3444. return render_list
  3445. @staticmethod
  3446. def decision_boundary(
  3447. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3448. ): # 绘制分类型预测图x-x热力图
  3449. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3450. # a-特征x,b-特征x-1,c-其他特征
  3451. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3452. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3453. if not no_unknow:
  3454. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3455. else:
  3456. map_dict = []
  3457. for i in class_dict:
  3458. map_dict.append(
  3459. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3460. )
  3461. render_list = []
  3462. if len(x_means) == 1:
  3463. a_range = x_range[0]
  3464. if data_type[0] == 1:
  3465. a_list = DataOperations.make_list(a_range[0], a_range[1], 70)
  3466. else:
  3467. a_list = a_range
  3468. a = np.array([i for i in a_list]).reshape(-1, 1)
  3469. y_data = predict_func(a)[0].tolist()
  3470. value = [[0, float(a[i]), class_dict.get(y_data[i], -1)]
  3471. for i in range(len(a))]
  3472. c = (
  3473. HeatMap()
  3474. .add_xaxis(["None"])
  3475. # value的第一个数值是x
  3476. .add_yaxis(f"数据", np.unique(a), value, **label_setting)
  3477. .set_global_opts(
  3478. title_opts=opts.TitleOpts(title="预测热力图"),
  3479. **global_not_legend,
  3480. yaxis_opts=opts.AxisOpts(
  3481. is_scale=True, type_="category"), # 'category'
  3482. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3483. visualmap_opts=opts.VisualMapOpts(
  3484. is_show=True,
  3485. max_=max(class_dict.values()),
  3486. min_=-1,
  3487. is_piecewise=True,
  3488. pieces=map_dict,
  3489. orient="horizontal",
  3490. pos_bottom="3%",
  3491. ),
  3492. )
  3493. )
  3494. render_list.append(c)
  3495. return render_list
  3496. # 如果x_means长度不等于1则执行下面
  3497. for i in range(len(x_means)):
  3498. if i == 0:
  3499. continue
  3500. a_range = x_range[i - 1]
  3501. a_type = data_type[i - 1]
  3502. b_range = x_range[i]
  3503. b_type = data_type[i]
  3504. if a_type == 1:
  3505. a_list = DataOperations.make_list(a_range[0], a_range[1], 70)
  3506. else:
  3507. a_list = a_range
  3508. if b_type == 1:
  3509. rb = DataOperations.make_list(b_range[0], b_range[1], 35)
  3510. else:
  3511. rb = b_range
  3512. a = np.array([i for i in a_list for _ in rb]).T
  3513. b = np.array([i for _ in a_list for i in rb]).T
  3514. data = np.array([x_means for _ in a_list for i in rb])
  3515. data[:, i - 1] = a
  3516. data[:, i] = b
  3517. y_data = predict_func(data)[0].tolist()
  3518. value = [
  3519. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3520. for i in range(len(a))
  3521. ]
  3522. c = (
  3523. HeatMap()
  3524. .add_xaxis(np.unique(a))
  3525. # value的第一个数值是x
  3526. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3527. .set_global_opts(
  3528. title_opts=opts.TitleOpts(title="预测热力图"),
  3529. **global_not_legend,
  3530. yaxis_opts=opts.AxisOpts(
  3531. is_scale=True, type_="category"), # 'category'
  3532. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3533. visualmap_opts=opts.VisualMapOpts(
  3534. is_show=True,
  3535. max_=max(class_dict.values()),
  3536. min_=-1,
  3537. is_piecewise=True,
  3538. pieces=map_dict,
  3539. orient="horizontal",
  3540. pos_bottom="3%",
  3541. ),
  3542. )
  3543. )
  3544. render_list.append(c)
  3545. return render_list
  3546. @staticmethod
  3547. def decision_boundary_more(
  3548. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3549. ): # 分类型x-x热力图(more)
  3550. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3551. # a-特征x,b-特征x-1,c-其他特征
  3552. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3553. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3554. if not no_unknow:
  3555. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3556. else:
  3557. map_dict = []
  3558. for i in class_dict:
  3559. map_dict.append(
  3560. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3561. )
  3562. render_list = []
  3563. if len(x_means) == 1:
  3564. return Boundary.decision_boundary(
  3565. x_range, x_means, predict_func, class_list, data_type, no_unknow)
  3566. # 如果x_means长度不等于1则执行下面
  3567. for i in range(len(x_means)):
  3568. for j in range(len(x_means)):
  3569. if j <= i:
  3570. continue
  3571. a_range = x_range[j]
  3572. a_type = data_type[j]
  3573. b_range = x_range[i]
  3574. b_type = data_type[i]
  3575. if a_type == 1:
  3576. a_range = DataOperations.make_list(
  3577. a_range[0], a_range[1], 70)
  3578. else:
  3579. a_range = a_range
  3580. if b_type == 1:
  3581. b_range = DataOperations.make_list(
  3582. b_range[0], b_range[1], 35)
  3583. else:
  3584. b_range = b_range
  3585. a = np.array([i for i in a_range for _ in b_range]).T
  3586. b = np.array([i for _ in a_range for i in b_range]).T
  3587. data = np.array([x_means for _ in a_range for i in b_range])
  3588. data[:, j] = a
  3589. data[:, i] = b
  3590. y_data = predict_func(data)[0].tolist()
  3591. value = [
  3592. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3593. for i in range(len(a))
  3594. ]
  3595. c = (
  3596. HeatMap()
  3597. .add_xaxis(np.unique(a))
  3598. # value的第一个数值是x
  3599. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3600. .set_global_opts(
  3601. title_opts=opts.TitleOpts(title="预测热力图"),
  3602. **global_not_legend,
  3603. yaxis_opts=opts.AxisOpts(
  3604. is_scale=True, type_="category"
  3605. ), # 'category'
  3606. xaxis_opts=opts.AxisOpts(
  3607. is_scale=True, type_="category"),
  3608. visualmap_opts=opts.VisualMapOpts(
  3609. is_show=True,
  3610. max_=max(class_dict.values()),
  3611. min_=-1,
  3612. is_piecewise=True,
  3613. pieces=map_dict,
  3614. orient="horizontal",
  3615. pos_bottom="3%",
  3616. ),
  3617. )
  3618. )
  3619. render_list.append(c)
  3620. return render_list
  3621. class TreePlot:
  3622. @staticmethod
  3623. @plugin_func_loading(get_path(r"template/machinelearning"))
  3624. def see_tree(tree_file_dir):
  3625. node_regex = re.compile(r'^([0-9]+) \[label="(.+)"\] ;$') # 匹配节点正则表达式
  3626. link_regex = re.compile("^([0-9]+) -> ([0-9]+) (.*);$") # 匹配节点正则表达式
  3627. node_dict = {}
  3628. link_list = []
  3629. with open(tree_file_dir, "r") as f: # 貌似必须分开w和r
  3630. for i in f:
  3631. try:
  3632. regex_result = re.findall(node_regex, i)[0]
  3633. if regex_result[0] != "":
  3634. try:
  3635. v = float(regex_result[0])
  3636. except ValueError:
  3637. v = 0
  3638. node_dict[regex_result[0]] = {
  3639. "name": regex_result[1].replace("\\n", "\n"),
  3640. "value": v,
  3641. "children": [],
  3642. }
  3643. continue
  3644. except BaseException as e:
  3645. logging.warning(str(e))
  3646. try:
  3647. regex_result = re.findall(link_regex, i)[0]
  3648. if regex_result[0] != "" and regex_result[1] != "":
  3649. link_list.append((regex_result[0], regex_result[1]))
  3650. except BaseException as e:
  3651. logging.warning(str(e))
  3652. father_list = [] # 已经有父亲的list
  3653. for i in link_list:
  3654. father = i[0] # 父节点
  3655. son = i[1] # 子节点
  3656. try:
  3657. node_dict[father]["children"].append(node_dict[son])
  3658. father_list.append(son)
  3659. except BaseException as e:
  3660. logging.warning(str(e))
  3661. father = list(set(node_dict.keys()) - set(father_list))
  3662. c = (
  3663. Tree()
  3664. .add("", [node_dict[father[0]]], is_roam=True)
  3665. .set_global_opts(
  3666. title_opts=opts.TitleOpts(title="决策树可视化"),
  3667. toolbox_opts=opts.ToolboxOpts(is_show=True),
  3668. )
  3669. )
  3670. return c
  3671. class MakePyecharts:
  3672. @staticmethod
  3673. @plugin_func_loading(get_path(r"template/machinelearning"))
  3674. def make_tab(heard, row):
  3675. return Table().add(headers=heard, rows=row)
  3676. @staticmethod
  3677. @plugin_func_loading(get_path(r"template/machinelearning"))
  3678. def coefficient_scatter_plot(w_heard, w):
  3679. c = (
  3680. Scatter() .add_xaxis(w_heard) .add_yaxis(
  3681. "", w, **label_setting) .set_global_opts(
  3682. title_opts=opts.TitleOpts(
  3683. title="系数w散点图"), **global_setting))
  3684. return c
  3685. @staticmethod
  3686. @plugin_func_loading(get_path(r"template/machinelearning"))
  3687. def coefficient_bar_plot(w_heard, w):
  3688. c = (
  3689. Bar() .add_xaxis(w_heard) .add_yaxis(
  3690. "",
  3691. abs(w).tolist(),
  3692. **label_setting) .set_global_opts(
  3693. title_opts=opts.TitleOpts(
  3694. title="系数w柱状图"),
  3695. **global_setting))
  3696. return c
  3697. @staticmethod
  3698. @plugin_func_loading(get_path(r"template/machinelearning"))
  3699. def make_bar(name, value, tab): # 绘制柱状图
  3700. c = (
  3701. Bar()
  3702. .add_xaxis([f"[{i}]特征" for i in range(len(value))])
  3703. .add_yaxis(name, value, **label_setting)
  3704. .set_global_opts(title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting)
  3705. )
  3706. tab.add(c, name)
  3707. class TrainingVisualization:
  3708. @staticmethod
  3709. @plugin_func_loading(get_path(r"template/machinelearning"))
  3710. # 无中心训练数据散点图(聚类)(more)
  3711. def training_visualization_more_no_center(x_data, class_list, y_data):
  3712. x_data = x_data.transpose()
  3713. if len(x_data) == 1:
  3714. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3715. statistics_assistant = Statistics.quick_stats(x_data)
  3716. render_list = []
  3717. for i in range(len(x_data)):
  3718. for a in range(len(x_data)):
  3719. if a <= i:
  3720. continue
  3721. x1 = x_data[i] # x坐标
  3722. x1_is_continuous = Statistics.is_continuous(x1)
  3723. x2 = x_data[a] # y坐标
  3724. x2_is_continuous = Statistics.is_continuous(x2)
  3725. base_render = None # 旧的C
  3726. for class_num in range(len(class_list)):
  3727. now_class = class_list[class_num]
  3728. plot_x1 = x1[y_data == now_class].tolist()
  3729. plot_x2 = x2[y_data == now_class]
  3730. axis_x2 = np.unique(plot_x2)
  3731. plot_x2 = x2[y_data == now_class].tolist()
  3732. # x与散点图不同,这里是纵坐标
  3733. c = (
  3734. Scatter() .add_xaxis(plot_x2) .add_yaxis(
  3735. f"{now_class}",
  3736. plot_x1,
  3737. **label_setting) .set_global_opts(
  3738. title_opts=opts.TitleOpts(
  3739. title=f"[{a}-{i}]训练数据散点图"),
  3740. **global_setting,
  3741. yaxis_opts=opts.AxisOpts(
  3742. type_="value" if x1_is_continuous else "category",
  3743. is_scale=True,
  3744. ),
  3745. xaxis_opts=opts.AxisOpts(
  3746. type_="value" if x2_is_continuous else "category",
  3747. is_scale=True,
  3748. ),
  3749. ))
  3750. c.add_xaxis(axis_x2)
  3751. if base_render is None:
  3752. base_render = c
  3753. else:
  3754. base_render = base_render.overlap(c)
  3755. render_list.append(base_render)
  3756. means, x_range, data_type = statistics_assistant.get()
  3757. return render_list, means, x_range, data_type
  3758. @staticmethod
  3759. @plugin_func_loading(get_path(r"template/machinelearning"))
  3760. # 有中心训练数据散点图(more)
  3761. def training_visualization_more(x_data, class_list, y_data, center):
  3762. x_data = x_data.transpose()
  3763. if len(x_data) == 1:
  3764. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3765. statistics_assistant = Statistics.quick_stats(x_data)
  3766. render_list = []
  3767. for i in range(len(x_data)):
  3768. for a in range(len(x_data)):
  3769. if a <= i:
  3770. continue
  3771. x1 = x_data[i] # x坐标
  3772. x1_is_continuous = Statistics.is_continuous(x1)
  3773. x2 = x_data[a] # y坐标
  3774. x2_is_continuous = Statistics.is_continuous(x2)
  3775. base_render = None # 旧的C
  3776. for class_num in range(len(class_list)):
  3777. now_class = class_list[class_num]
  3778. plot_x1 = x1[y_data == now_class].tolist()
  3779. plot_x2 = x2[y_data == now_class]
  3780. axis_x2 = np.unique(plot_x2)
  3781. plot_x2 = x2[y_data == now_class].tolist()
  3782. # x与散点图不同,这里是纵坐标
  3783. c = (
  3784. Scatter() .add_xaxis(plot_x2) .add_yaxis(
  3785. f"{now_class}",
  3786. plot_x1,
  3787. **label_setting) .set_global_opts(
  3788. title_opts=opts.TitleOpts(
  3789. title=f"[{a}-{i}]训练数据散点图"),
  3790. **global_setting,
  3791. yaxis_opts=opts.AxisOpts(
  3792. type_="value" if x1_is_continuous else "category",
  3793. is_scale=True,
  3794. ),
  3795. xaxis_opts=opts.AxisOpts(
  3796. type_="value" if x2_is_continuous else "category",
  3797. is_scale=True,
  3798. ),
  3799. ))
  3800. c.add_xaxis(axis_x2)
  3801. # 添加簇中心
  3802. try:
  3803. center_x2 = [center[class_num][a]]
  3804. except IndexError:
  3805. center_x2 = [0]
  3806. b = (
  3807. Scatter() .add_xaxis(center_x2) .add_yaxis(
  3808. f"[{now_class}]中心",
  3809. [
  3810. center[class_num][i]],
  3811. **label_setting,
  3812. symbol="triangle",
  3813. ) .set_global_opts(
  3814. title_opts=opts.TitleOpts(
  3815. title="簇中心"),
  3816. **global_setting,
  3817. yaxis_opts=opts.AxisOpts(
  3818. type_="value" if x1_is_continuous else "category",
  3819. is_scale=True,
  3820. ),
  3821. xaxis_opts=opts.AxisOpts(
  3822. type_="value" if x2_is_continuous else "category",
  3823. is_scale=True,
  3824. ),
  3825. ))
  3826. c.overlap(b)
  3827. if base_render is None:
  3828. base_render = c
  3829. else:
  3830. base_render = base_render.overlap(c)
  3831. render_list.append(base_render)
  3832. means, x_range, data_type = statistics_assistant.get()
  3833. return render_list, means, x_range, data_type
  3834. @staticmethod
  3835. @plugin_func_loading(get_path(r"template/machinelearning"))
  3836. # 有中心训练数据散点图
  3837. def training_visualization_center(x_data, class_data, y_data, center):
  3838. x_data = x_data.transpose()
  3839. if len(x_data) == 1:
  3840. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3841. statistics_assistant = Statistics.quick_stats(x_data)
  3842. render_list = []
  3843. for i in range(len(x_data)):
  3844. if i == 0:
  3845. continue
  3846. x1 = x_data[i] # x坐标
  3847. x1_is_continuous = Statistics.is_continuous(x1)
  3848. x2 = x_data[i - 1] # y坐标
  3849. x2_is_continuous = Statistics.is_continuous(x2)
  3850. base_render = None # 旧的C
  3851. for class_num in range(len(class_data)):
  3852. n_class = class_data[class_num]
  3853. x_1 = x1[y_data == n_class].tolist()
  3854. x_2 = x2[y_data == n_class]
  3855. x_2_new = np.unique(x_2)
  3856. x_2 = x2[y_data == n_class].tolist()
  3857. # x与散点图不同,这里是纵坐标
  3858. c = (
  3859. Scatter().add_xaxis(x_2).add_yaxis(
  3860. f"{n_class}",
  3861. x_1,
  3862. **label_setting).set_global_opts(
  3863. title_opts=opts.TitleOpts(
  3864. title=f"[{i - 1}-{i}]训练数据散点图"),
  3865. **global_setting,
  3866. yaxis_opts=opts.AxisOpts(
  3867. type_="value" if x1_is_continuous else "category",
  3868. is_scale=True),
  3869. xaxis_opts=opts.AxisOpts(
  3870. type_="value" if x2_is_continuous else "category",
  3871. is_scale=True),
  3872. ))
  3873. c.add_xaxis(x_2_new)
  3874. # 添加簇中心
  3875. try:
  3876. center_x_2 = [center[class_num][i - 1]]
  3877. except IndexError:
  3878. center_x_2 = [0]
  3879. b = (
  3880. Scatter().add_xaxis(center_x_2).add_yaxis(
  3881. f"[{n_class}]中心",
  3882. [
  3883. center[class_num][i]],
  3884. **label_setting,
  3885. symbol="triangle",
  3886. ).set_global_opts(
  3887. title_opts=opts.TitleOpts(
  3888. title="簇中心"),
  3889. **global_setting,
  3890. yaxis_opts=opts.AxisOpts(
  3891. type_="value" if x1_is_continuous else "category",
  3892. is_scale=True),
  3893. xaxis_opts=opts.AxisOpts(
  3894. type_="value" if x2_is_continuous else "category",
  3895. is_scale=True),
  3896. ))
  3897. c.overlap(b)
  3898. if base_render is None:
  3899. base_render = c
  3900. else:
  3901. base_render = base_render.overlap(c)
  3902. render_list.append(base_render)
  3903. means, x_range, data_type = statistics_assistant.get()
  3904. return render_list, means, x_range, data_type
  3905. @staticmethod
  3906. @plugin_func_loading(get_path(r"template/machinelearning"))
  3907. def training_visualization(x_data, class_, y_data): # 无中心训练数据散点图(聚类、分类)
  3908. x_data = x_data.transpose()
  3909. if len(x_data) == 1:
  3910. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3911. statistics_assistant = Statistics.quick_stats(x_data)
  3912. render_list = []
  3913. for i in range(len(x_data)):
  3914. if i == 0:
  3915. continue
  3916. x1 = x_data[i] # x坐标
  3917. x1_is_continuous = Statistics.is_continuous(x1)
  3918. x2 = x_data[i - 1] # y坐标
  3919. x2_is_continuous = Statistics.is_continuous(x2)
  3920. render_list = [] # 旧的C
  3921. base_render = None
  3922. for now_class in class_:
  3923. plot_x1 = x1[y_data == now_class].tolist()
  3924. plot_x2 = x2[y_data == now_class]
  3925. axis_x2 = np.unique(plot_x2)
  3926. plot_x2 = x2[y_data == now_class].tolist()
  3927. # x与散点图不同,这里是纵坐标
  3928. c = (
  3929. Scatter().add_xaxis(plot_x2).add_yaxis(
  3930. f"{now_class}",
  3931. plot_x1,
  3932. **label_setting).set_global_opts(
  3933. title_opts=opts.TitleOpts(
  3934. title="训练数据散点图"),
  3935. **global_setting,
  3936. yaxis_opts=opts.AxisOpts(
  3937. type_="value" if x1_is_continuous else "category",
  3938. is_scale=True),
  3939. xaxis_opts=opts.AxisOpts(
  3940. type_="value" if x2_is_continuous else "category",
  3941. is_scale=True),
  3942. ))
  3943. c.add_xaxis(axis_x2)
  3944. if base_render is None:
  3945. base_render = c
  3946. else:
  3947. base_render = base_render.overlap(c)
  3948. render_list.append(base_render)
  3949. means, x_range, data_type = statistics_assistant.get()
  3950. return render_list, means, x_range, data_type
  3951. @staticmethod
  3952. @plugin_func_loading(get_path(r"template/machinelearning"))
  3953. def training_visualization_no_class(x_data): # 绘制无分类x-x分类
  3954. x_data = x_data.transpose()
  3955. if len(x_data) == 1:
  3956. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3957. statistics_assistant = Statistics.quick_stats(x_data)
  3958. render_list = []
  3959. for i in range(len(x_data)):
  3960. if i == 0:
  3961. continue
  3962. x1 = x_data[i] # x坐标
  3963. x1_is_continuous = Statistics.is_continuous(x1)
  3964. x2 = x_data[i - 1] # y坐标
  3965. x2_is_continuous = Statistics.is_continuous(x2)
  3966. x2_only = np.unique(x2)
  3967. # x与散点图不同,这里是纵坐标
  3968. c = (
  3969. Scatter().add_xaxis(x2).add_yaxis(
  3970. "",
  3971. x1.tolist(),
  3972. **label_setting).set_global_opts(
  3973. title_opts=opts.TitleOpts(
  3974. title="训练数据散点图"),
  3975. **global_not_legend,
  3976. yaxis_opts=opts.AxisOpts(
  3977. type_="value" if x1_is_continuous else "category",
  3978. is_scale=True),
  3979. xaxis_opts=opts.AxisOpts(
  3980. type_="value" if x2_is_continuous else "category",
  3981. is_scale=True),
  3982. ))
  3983. c.add_xaxis(x2_only)
  3984. render_list.append(c)
  3985. means, x_range, data_type = statistics_assistant.get()
  3986. return render_list, means, x_range, data_type
  3987. @staticmethod
  3988. @plugin_func_loading(get_path(r"template/machinelearning"))
  3989. # 绘制无分类x-x数据图(more)
  3990. def training_visualization_no_class_more(x_data, data_name=""):
  3991. seeting = global_setting if data_name else global_not_legend
  3992. x_data = x_data.transpose()
  3993. only = False
  3994. if len(x_data) == 1:
  3995. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3996. only = True
  3997. render_list = []
  3998. for i in range(len(x_data)):
  3999. for a in range(len(x_data)):
  4000. if a <= i:
  4001. continue # 重复内容,跳过
  4002. x1 = x_data[i] # x坐标
  4003. x1_is_continuous = Statistics.is_continuous(x1)
  4004. x2 = x_data[a] # y坐标
  4005. x2_is_continuous = Statistics.is_continuous(x2)
  4006. x2_only = np.unique(x2)
  4007. if only:
  4008. x2_is_continuous = False
  4009. # x与散点图不同,这里是纵坐标
  4010. c = (
  4011. Scatter().add_xaxis(x2).add_yaxis(
  4012. data_name,
  4013. x1,
  4014. **label_setting).set_global_opts(
  4015. title_opts=opts.TitleOpts(
  4016. title=f"[{i}-{a}]数据散点图"),
  4017. **seeting,
  4018. yaxis_opts=opts.AxisOpts(
  4019. type_="value" if x1_is_continuous else "category",
  4020. is_scale=True),
  4021. xaxis_opts=opts.AxisOpts(
  4022. type_="value" if x2_is_continuous else "category",
  4023. is_scale=True),
  4024. ))
  4025. c.add_xaxis(x2_only)
  4026. render_list.append(c)
  4027. return render_list
  4028. @staticmethod
  4029. @plugin_func_loading(get_path(r"template/machinelearning"))
  4030. # x-x数据图
  4031. def training_visualization_no_class_more_format(x_data, data_name=""):
  4032. seeting = global_setting if data_name else global_not_legend
  4033. x_data = x_data.transpose()
  4034. only = False
  4035. if len(x_data) == 1:
  4036. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4037. only = True
  4038. render_list = []
  4039. for i in range(len(x_data)):
  4040. for a in range(len(x_data)):
  4041. if a <= i:
  4042. continue # 重复内容,跳过(a读取的是i后面的)
  4043. x1 = x_data[i] # x坐标
  4044. x1_is_continuous = Statistics.is_continuous(x1)
  4045. x2 = x_data[a] # y坐标
  4046. x2_is_continuous = Statistics.is_continuous(x2)
  4047. x2_only = np.unique(x2)
  4048. x1_list = x1.astype(np.str).tolist()
  4049. for j in range(len(x1_list)):
  4050. x1_list[j] = [x1_list[j], f"特征{j}"]
  4051. if only:
  4052. x2_is_continuous = False
  4053. # x与散点图不同,这里是纵坐标
  4054. c = (
  4055. Scatter().add_xaxis(x2).add_yaxis(
  4056. data_name,
  4057. x1_list,
  4058. **label_setting).set_global_opts(
  4059. title_opts=opts.TitleOpts(
  4060. title=f"[{i}-{a}]数据散点图"),
  4061. **seeting,
  4062. yaxis_opts=opts.AxisOpts(
  4063. type_="value" if x1_is_continuous else "category",
  4064. is_scale=True),
  4065. xaxis_opts=opts.AxisOpts(
  4066. type_="value" if x2_is_continuous else "category",
  4067. is_scale=True),
  4068. tooltip_opts=opts.TooltipOpts(
  4069. is_show=True,
  4070. axis_pointer_type="cross",
  4071. formatter="{c}"),
  4072. ))
  4073. c.add_xaxis(x2_only)
  4074. render_list.append(c)
  4075. return render_list
  4076. @staticmethod
  4077. @plugin_func_loading(get_path(r"template/machinelearning"))
  4078. # 必定离散x-x数据图
  4079. def discrete_training_visualization_no_class_more(x_data, data_name=""):
  4080. seeting = global_setting if data_name else global_not_legend
  4081. x_data = x_data.transpose()
  4082. if len(x_data) == 1:
  4083. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4084. render_list = []
  4085. for i in range(len(x_data)):
  4086. for a in range(len(x_data)):
  4087. if a <= i:
  4088. continue # 重复内容,跳过
  4089. x1 = x_data[i] # x坐标
  4090. x2 = x_data[a] # y坐标
  4091. x2_only = np.unique(x2)
  4092. # x与散点图不同,这里是纵坐标
  4093. c = (
  4094. Scatter() .add_xaxis(x2) .add_yaxis(
  4095. data_name,
  4096. x1,
  4097. **label_setting) .set_global_opts(
  4098. title_opts=opts.TitleOpts(
  4099. title=f"[{i}-{a}]数据散点图"),
  4100. **seeting,
  4101. yaxis_opts=opts.AxisOpts(
  4102. type_="category",
  4103. is_scale=True),
  4104. xaxis_opts=opts.AxisOpts(
  4105. type_="category",
  4106. is_scale=True),
  4107. ))
  4108. c.add_xaxis(x2_only)
  4109. render_list.append(c)
  4110. return render_list
  4111. @staticmethod
  4112. @plugin_func_loading(get_path(r"template/machinelearning"))
  4113. def regress_visualization(x_data, y_data): # 绘制回归散点图
  4114. x_data = x_data.transpose()
  4115. y_is_continuous = Statistics.is_continuous(y_data)
  4116. statistics_assistant = Statistics.quick_stats(x_data)
  4117. render_list = []
  4118. try:
  4119. visualmap_opts = opts.VisualMapOpts(
  4120. is_show=True,
  4121. max_=int(y_data.max()) + 1,
  4122. min_=int(y_data.min()),
  4123. pos_right="3%",
  4124. )
  4125. except ValueError:
  4126. visualmap_opts = None
  4127. y_is_continuous = False
  4128. for i in range(len(x_data)):
  4129. x1 = x_data[i] # x坐标
  4130. x1_is_continuous = Statistics.is_continuous(x1)
  4131. # 不转换成list因为保持dtype的精度,否则绘图会出现各种问题(数值重复)
  4132. if not y_is_continuous and x1_is_continuous:
  4133. y_is_continuous, x1_is_continuous = x1_is_continuous, y_is_continuous
  4134. x1, y_data = y_data, x1
  4135. c = (
  4136. Scatter()
  4137. .add_xaxis(x1.tolist()) # 研究表明,这个是横轴
  4138. .add_yaxis("数据", y_data.tolist(), **label_setting)
  4139. .set_global_opts(
  4140. title_opts=opts.TitleOpts(title="预测类型图"),
  4141. **global_setting,
  4142. yaxis_opts=opts.AxisOpts(
  4143. type_="value" if y_is_continuous else "category", is_scale=True
  4144. ),
  4145. xaxis_opts=opts.AxisOpts(
  4146. type_="value" if x1_is_continuous else "category", is_scale=True
  4147. ),
  4148. visualmap_opts=visualmap_opts,
  4149. )
  4150. )
  4151. c.add_xaxis(np.unique(x1))
  4152. render_list.append(c)
  4153. means, x_range, data_type = statistics_assistant.get()
  4154. return render_list, means, x_range, data_type
  4155. class Curve:
  4156. @staticmethod
  4157. @plugin_func_loading(get_path(r"template/machinelearning"))
  4158. def training_w(
  4159. x_data, class_list, y_data, w_list, b_list, x_means: list
  4160. ): # 绘制分类决策边界
  4161. x_data = x_data.transpose()
  4162. if len(x_data) == 1:
  4163. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4164. render_list = []
  4165. x_means.append(0)
  4166. x_means = np.array(x_means)
  4167. for i in range(len(x_data)):
  4168. if i == 0:
  4169. continue
  4170. x1_is_continuous = Statistics.is_continuous(x_data[i])
  4171. x2 = x_data[i - 1] # y坐标
  4172. x2_is_continuous = Statistics.is_continuous(x2)
  4173. o_c = None # 旧的C
  4174. for class_num in range(len(class_list)):
  4175. n_class = class_list[class_num]
  4176. x2_only = np.unique(x2[y_data == n_class])
  4177. # x与散点图不同,这里是纵坐标
  4178. # 加入这个判断是为了解决sklearn历史遗留问题
  4179. if len(class_list) == 2: # 二分类问题
  4180. if class_num == 0:
  4181. continue
  4182. w = w_list[0]
  4183. b = b_list[0]
  4184. else: # 多分类问题
  4185. w = w_list[class_num]
  4186. b = b_list[class_num]
  4187. if x2_is_continuous:
  4188. try:
  4189. x2_only = np.array(
  4190. DataOperations.make_list(
  4191. x2_only.min(), x2_only.max(), 5))
  4192. except ValueError: # x2_only为[],不需要画了
  4193. continue
  4194. # 此处的y_data和上面撞名,更改为y_data_
  4195. w = np.append(w, 0)
  4196. # 根据公式 分类=wo*x0 + w1*x1 + w2*x2....+b,其中当分类=0,根据x0和x1画出一条线表示决策边界
  4197. y_data_ = (
  4198. -(x2_only * w[i - 1]) / w[i]
  4199. + b
  4200. + (x_means[: i - 1] * w[: i - 1]).sum()
  4201. + (x_means[i + 1:] * w[i + 1:]).sum()
  4202. ) # 假设除了两个特征意外,其余特征均为means列表的数值,这里的y_data其实
  4203. c = (
  4204. Line().add_xaxis(x2_only).add_yaxis(
  4205. f"决策边界:{n_class}=>[{i}]",
  4206. y_data_.tolist(),
  4207. is_smooth=True,
  4208. **label_setting,
  4209. ).set_global_opts(
  4210. title_opts=opts.TitleOpts(
  4211. title=f"系数w曲线"),
  4212. **global_setting,
  4213. yaxis_opts=opts.AxisOpts(
  4214. type_="value" if x1_is_continuous else "category", # 此处y_data其实就是x_1
  4215. is_scale=True),
  4216. xaxis_opts=opts.AxisOpts(
  4217. type_="value" if x2_is_continuous else "category",
  4218. is_scale=True),
  4219. ))
  4220. if o_c is None:
  4221. o_c = c
  4222. else:
  4223. o_c = o_c.overlap(c)
  4224. # 下面不要接任何代码,因为上面会continue
  4225. render_list.append(o_c)
  4226. return render_list
  4227. @staticmethod
  4228. @plugin_func_loading(get_path(r"template/machinelearning"))
  4229. def regress_w(x_data, w_data: np.array, intercept_b, x_means: list): # 绘制回归曲线
  4230. x_data = x_data.transpose()
  4231. if len(x_data) == 1:
  4232. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4233. render_list = []
  4234. x_means.append(0) # 确保mean[i+1]不会超出index
  4235. x_means = np.array(x_means)
  4236. w_data = np.append(w_data, 0)
  4237. for i in range(len(x_data)):
  4238. x1 = x_data[i]
  4239. x1_is_continuous = Statistics.is_continuous(x1)
  4240. if x1_is_continuous:
  4241. x1 = np.array(DataOperations.make_list(x1.min(), x1.max(), 5))
  4242. x1_only = np.unique(x1)
  4243. # 假设除了两个特征意外,其余特征均为means列表的数值
  4244. y_data = (
  4245. x1_only * w_data[i]
  4246. + intercept_b
  4247. + (x_means[:i] * w_data[:i]).sum()
  4248. + (x_means[i + 1:] * w_data[i + 1:]).sum()
  4249. )
  4250. y_is_continuous = Statistics.is_continuous(y_data)
  4251. c = (
  4252. Line().add_xaxis(x1_only).add_yaxis(
  4253. f"拟合结果=>[{i}]",
  4254. y_data.tolist(),
  4255. is_smooth=True,
  4256. **label_setting).set_global_opts(
  4257. title_opts=opts.TitleOpts(
  4258. title=f"系数w曲线"),
  4259. **global_setting,
  4260. yaxis_opts=opts.AxisOpts(
  4261. type_="value" if y_is_continuous else None,
  4262. is_scale=True),
  4263. xaxis_opts=opts.AxisOpts(
  4264. type_="value" if x1_is_continuous else None,
  4265. is_scale=True),
  4266. ))
  4267. render_list.append(c)
  4268. return render_list
  4269. class MultiMap:
  4270. @staticmethod
  4271. @plugin_func_loading(get_path(r"template/machinelearning"))
  4272. def conversion_control(y_data, x_data, tab): # 合并两x-x图
  4273. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  4274. get_x = TrainingVisualization.training_visualization_no_class_more(
  4275. x_data, "原数据") # 原来
  4276. get_y = TrainingVisualization.training_visualization_no_class_more(
  4277. y_data, "转换数据") # 转换
  4278. for i in range(len(get_x)):
  4279. tab.add(get_x[i].overlap(get_y[i]), f"[{i}]数据x-x散点图")
  4280. return tab
  4281. @staticmethod
  4282. @plugin_func_loading(get_path(r"template/machinelearning"))
  4283. def conversion_separate(y_data, x_data, tab): # 并列显示两x-x图
  4284. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  4285. get_x = TrainingVisualization.training_visualization_no_class_more(
  4286. x_data, "原数据") # 原来
  4287. get_y = TrainingVisualization.training_visualization_no_class_more(
  4288. y_data, "转换数据") # 转换
  4289. for i in range(len(get_x)):
  4290. try:
  4291. tab.add(get_x[i], f"[{i}]数据x-x散点图")
  4292. except IndexError:
  4293. pass
  4294. try:
  4295. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  4296. except IndexError:
  4297. pass
  4298. return tab
  4299. @staticmethod
  4300. @plugin_func_loading(get_path(r"template/machinelearning"))
  4301. def conversion_separate_format(y_data, tab): # 并列显示两x-x图
  4302. if isinstance(y_data, np.ndarray):
  4303. get_y = TrainingVisualization.training_visualization_no_class_more_format(
  4304. y_data, "转换数据") # 转换
  4305. for i in range(len(get_y)):
  4306. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  4307. return tab
  4308. @staticmethod
  4309. @plugin_func_loading(get_path(r"template/machinelearning"))
  4310. def conversion_separate_wh(w_array, h_array, tab): # 并列显示两x-x图
  4311. if isinstance(w_array, np.ndarray) and isinstance(w_array, np.ndarray):
  4312. get_x = TrainingVisualization.training_visualization_no_class_more_format(
  4313. w_array, "W矩阵数据") # 原来
  4314. get_y = TrainingVisualization.training_visualization_no_class_more(
  4315. h_array.transpose(), "H矩阵数据") # 转换(先转T,再转T变回原样,W*H是横对列)
  4316. for i in range(len(get_x)):
  4317. try:
  4318. tab.add(get_x[i], f"[{i}]W矩阵x-x散点图")
  4319. except IndexError:
  4320. pass
  4321. try:
  4322. tab.add(get_y[i], f"[{i}]H.T矩阵x-x散点图")
  4323. except IndexError:
  4324. pass
  4325. return tab
  4326. class Statistics:
  4327. @staticmethod
  4328. @plugin_func_loading(get_path(r"template/machinelearning"))
  4329. def is_continuous(data: np.ndarray, f: float = 0.1):
  4330. l: list = np.unique(data).tolist()
  4331. return len(l) / len(data) >= f or len(data) <= 3
  4332. @staticmethod
  4333. @plugin_func_loading(get_path(r"template/machinelearning"))
  4334. def quick_stats(x_data):
  4335. statistics_assistant = CategoricalData()
  4336. print(x_data)
  4337. for i in range(len(x_data)):
  4338. x1 = x_data[i] # x坐标
  4339. statistics_assistant(x1)
  4340. return statistics_assistant
  4341. @staticmethod
  4342. @plugin_func_loading(get_path(r"template/machinelearning"))
  4343. def des_to_csv(save_dir, name, data, columns=None, row=None):
  4344. save_dir = save_dir + os.sep + name + ".csv"
  4345. DataFrame(data, columns=columns, index=row).to_csv(
  4346. save_dir,
  4347. header=False if columns is None else True,
  4348. index=False if row is None else True,
  4349. )
  4350. return data
  4351. class Packing:
  4352. @staticmethod
  4353. @plugin_func_loading(get_path(r"template/machinelearning"))
  4354. def pack(output_filename, source_dir):
  4355. with tarfile.open(output_filename, "w:gz") as tar:
  4356. tar.add(source_dir, arcname=basename(source_dir))
  4357. return output_filename
  4358. class MachineLearnerInit(
  4359. LearnerIO,
  4360. Calculation,
  4361. LearnerMerge,
  4362. LearnerSplit,
  4363. LearnerDimensions,
  4364. LearnerShape,
  4365. metaclass=ABCMeta):
  4366. def __init__(self, *args, **kwargs):
  4367. super().__init__(*args, **kwargs)
  4368. self.learner = {} # 记录机器
  4369. self.learn_dict = {
  4370. "Line": LineModel,
  4371. "Ridge": LineModel,
  4372. "Lasso": LineModel,
  4373. "LogisticRegression": LogisticregressionModel,
  4374. "Knn_class": KnnModel,
  4375. "Knn": KnnModel,
  4376. "Tree_class": TreeModel,
  4377. "Tree": TreeModel,
  4378. "Forest": ForestModel,
  4379. "Forest_class": ForestModel,
  4380. "GradientTree_class": GradienttreeModel,
  4381. "GradientTree": GradienttreeModel,
  4382. "Variance": VarianceModel,
  4383. "SelectKBest": SelectkbestModel,
  4384. "Z-Score": StandardizationModel,
  4385. "MinMaxScaler": MinmaxscalerModel,
  4386. "LogScaler": LogscalerModel,
  4387. "atanScaler": AtanscalerModel,
  4388. "decimalScaler": DecimalscalerModel,
  4389. "sigmodScaler": SigmodscalerModel,
  4390. "Mapzoom": MapzoomModel,
  4391. "Fuzzy_quantization": FuzzyQuantizationModel,
  4392. "Regularization": RegularizationModel,
  4393. "Binarizer": BinarizerModel,
  4394. "Discretization": DiscretizationModel,
  4395. "Label": LabelModel,
  4396. "OneHotEncoder": OneHotEncoderModel,
  4397. "Missed": MissedModel,
  4398. "PCA": PcaModel,
  4399. "RPCA": RpcaModel,
  4400. "KPCA": KpcaModel,
  4401. "LDA": LdaModel,
  4402. "SVC": SvcModel,
  4403. "SVR": SvrModel,
  4404. "MLP": MlpModel,
  4405. "MLP_class": MlpModel,
  4406. "NMF": NmfModel,
  4407. "t-SNE": TsneModel,
  4408. "k-means": KmeansModel,
  4409. "Agglomerative": AgglomerativeModel,
  4410. "DBSCAN": DbscanModel,
  4411. "ClassBar": ClassBar,
  4412. "FeatureScatter": NearFeatureScatter,
  4413. "FeatureScatterClass": NearFeatureScatterClass,
  4414. "FeatureScatter_all": NearFeatureScatterMore,
  4415. "FeatureScatterClass_all": NearFeatureScatterClassMore,
  4416. "HeatMap": NumpyHeatMap,
  4417. "FeatureY-X": FeatureScatterYX,
  4418. "ClusterTree": ClusterTree,
  4419. "MatrixScatter": MatrixScatter,
  4420. "Correlation": Corr,
  4421. "Statistics": DataAnalysis,
  4422. "Fast_Fourier": FastFourier,
  4423. "Reverse_Fast_Fourier": ReverseFastFourier,
  4424. "[2]Reverse_Fast_Fourier": ReverseFastFourierTwonumpy,
  4425. }
  4426. self.data_type = {} # 记录机器的类型
  4427. @staticmethod
  4428. def learner_parameters(parameters, data_type): # 解析参数
  4429. original_parameter = {}
  4430. target_parameter = {}
  4431. # 输入数据
  4432. exec(parameters, original_parameter)
  4433. # 处理数据
  4434. if data_type in ("MLP", "MLP_class"):
  4435. target_parameter["alpha"] = float(
  4436. original_parameter.get("alpha", 0.0001)
  4437. ) # MLP正则化用
  4438. else:
  4439. target_parameter["alpha"] = float(
  4440. original_parameter.get("alpha", 1.0)
  4441. ) # L1和L2正则化用
  4442. target_parameter["C"] = float(
  4443. original_parameter.get(
  4444. "C", 1.0)) # L1和L2正则化用
  4445. if data_type in ("MLP", "MLP_class"):
  4446. target_parameter["max_iter"] = int(
  4447. original_parameter.get("max_iter", 200)
  4448. ) # L1和L2正则化用
  4449. else:
  4450. target_parameter["max_iter"] = int(
  4451. original_parameter.get("max_iter", 1000)
  4452. ) # L1和L2正则化用
  4453. target_parameter["n_neighbors"] = int(
  4454. original_parameter.get("K_knn", 5)
  4455. ) # knn邻居数 (命名不同)
  4456. target_parameter["p"] = int(original_parameter.get("p", 2)) # 距离计算方式
  4457. target_parameter["nDim_2"] = bool(
  4458. original_parameter.get("nDim_2", True)
  4459. ) # 数据是否降维
  4460. if data_type in ("Tree", "Forest", "GradientTree"):
  4461. target_parameter["criterion"] = (
  4462. "mse" if bool(
  4463. original_parameter.get(
  4464. "is_MSE",
  4465. True)) else "mae") # 是否使用基尼不纯度
  4466. else:
  4467. target_parameter["criterion"] = (
  4468. "gini" if bool(
  4469. original_parameter.get(
  4470. "is_Gini",
  4471. True)) else "entropy") # 是否使用基尼不纯度
  4472. target_parameter["splitter"] = (
  4473. "random" if bool(
  4474. original_parameter.get(
  4475. "is_random",
  4476. False)) else "best") # 决策树节点是否随机选用最优
  4477. target_parameter["max_features"] = original_parameter.get(
  4478. "max_features", None
  4479. ) # 选用最多特征数
  4480. target_parameter["max_depth"] = original_parameter.get(
  4481. "max_depth", None
  4482. ) # 最大深度
  4483. target_parameter["min_samples_split"] = int(
  4484. original_parameter.get("min_samples_split", 2)
  4485. ) # 是否继续划分(容易造成过拟合)
  4486. target_parameter["P"] = float(
  4487. original_parameter.get(
  4488. "min_samples_split", 0.8))
  4489. target_parameter["k"] = original_parameter.get("k", 1)
  4490. target_parameter["score_func"] = {
  4491. "chi2": chi2,
  4492. "f_classif": f_classif,
  4493. "mutual_info_classif": mutual_info_classif,
  4494. "f_regression": f_regression,
  4495. "mutual_info_regression": mutual_info_regression,
  4496. }.get(original_parameter.get("score_func", "f_classif"), f_classif)
  4497. target_parameter["feature_range"] = tuple(
  4498. original_parameter.get("feature_range", (0, 1))
  4499. )
  4500. target_parameter["norm"] = original_parameter.get(
  4501. "norm", "l2") # 正则化的方式L1或者L2
  4502. target_parameter["threshold"] = float(
  4503. original_parameter.get("threshold", 0.0)
  4504. ) # 二值化特征
  4505. target_parameter["split_range"] = list(
  4506. original_parameter.get("split_range", [0])
  4507. ) # 二值化特征
  4508. target_parameter["ndim_up"] = bool(
  4509. original_parameter.get("ndim_up", False))
  4510. target_parameter["miss_value"] = original_parameter.get(
  4511. "miss_value", np.nan)
  4512. target_parameter["fill_method"] = original_parameter.get(
  4513. "fill_method", "mean")
  4514. target_parameter["fill_value"] = original_parameter.get(
  4515. "fill_value", None)
  4516. target_parameter["n_components"] = original_parameter.get(
  4517. "n_components", 1)
  4518. target_parameter["kernel"] = original_parameter.get(
  4519. "kernel", "rbf" if data_type in ("SVR", "SVC") else "linear"
  4520. )
  4521. target_parameter["n_Tree"] = original_parameter.get("n_Tree", 100)
  4522. target_parameter["gamma"] = original_parameter.get("gamma", 1)
  4523. target_parameter["hidden_size"] = tuple(
  4524. original_parameter.get("hidden_size", (100,))
  4525. )
  4526. target_parameter["activation"] = str(
  4527. original_parameter.get("activation", "relu")
  4528. )
  4529. target_parameter["solver"] = str(
  4530. original_parameter.get("solver", "adam"))
  4531. if data_type in ("k-means",):
  4532. target_parameter["n_clusters"] = int(
  4533. original_parameter.get("n_clusters", 8)
  4534. )
  4535. else:
  4536. target_parameter["n_clusters"] = int(
  4537. original_parameter.get("n_clusters", 2)
  4538. )
  4539. target_parameter["eps"] = float(
  4540. original_parameter.get(
  4541. "n_clusters", 0.5))
  4542. target_parameter["min_samples"] = int(
  4543. original_parameter.get("n_clusters", 5))
  4544. target_parameter["white_PCA"] = bool(
  4545. original_parameter.get("white_PCA", False))
  4546. return target_parameter
  4547. def get_learner(self, name):
  4548. return self.learner[name]
  4549. def get_learner_type(self, name):
  4550. return self.data_type[name]
  4551. @plugin_class_loading(get_path(r"template/machinelearning"))
  4552. class MachineLearnerAdd(MachineLearnerInit, metaclass=ABCMeta):
  4553. def add_learner(self, learner_str, parameters=""):
  4554. get = self.learn_dict[learner_str]
  4555. name = f"Le[{len(self.learner)}]{learner_str}"
  4556. # 参数调节
  4557. args_use = self.learner_parameters(parameters, learner_str)
  4558. # 生成学习器
  4559. self.learner[name] = get(model=learner_str, args_use=args_use)
  4560. self.data_type[name] = learner_str
  4561. def add_learner_from_python(self, learner, name):
  4562. name = f"Le[{len(self.learner)}]{name}"
  4563. # 生成学习器
  4564. self.learner[name] = learner
  4565. self.data_type[name] = 'from_python'
  4566. def add_curve_fitting(self, learner):
  4567. named_domain = {}
  4568. exec(learner, named_domain)
  4569. name = f'Le[{len(self.learner)}]{named_domain.get("name", "SELF")}'
  4570. func = named_domain.get("f", lambda x, k, b: k * x + b)
  4571. self.learner[name] = CurveFitting(name, learner, func)
  4572. self.data_type[name] = "Curve_fitting"
  4573. def add_select_from_model(self, learner, parameters=""):
  4574. model = self.get_learner(learner)
  4575. name = f"Le[{len(self.learner)}]SelectFrom_Model:{learner}"
  4576. # 参数调节
  4577. args_use = self.learner_parameters(parameters, "SelectFrom_Model")
  4578. # 生成学习器
  4579. self.learner[name] = SelectFromModel(
  4580. learner=model, args_use=args_use, Dic=self.learn_dict
  4581. )
  4582. self.data_type[name] = "SelectFrom_Model"
  4583. def add_predictive_heat_map(self, learner, parameters=""):
  4584. model = self.get_learner(learner)
  4585. name = f"Le[{len(self.learner)}]Predictive_HeatMap:{learner}"
  4586. # 生成学习器
  4587. args_use = self.learner_parameters(parameters, "Predictive_HeatMap")
  4588. self.learner[name] = PredictiveHeatmap(
  4589. learner=model, args_use=args_use)
  4590. self.data_type[name] = "Predictive_HeatMap"
  4591. def add_predictive_heat_map_more(self, learner, parameters=""):
  4592. model = self.get_learner(learner)
  4593. name = f"Le[{len(self.learner)}]Predictive_HeatMap_More:{learner}"
  4594. # 生成学习器
  4595. args_use = self.learner_parameters(
  4596. parameters, "Predictive_HeatMap_More")
  4597. self.learner[name] = PredictiveHeatmapMore(
  4598. learner=model, args_use=args_use)
  4599. self.data_type[name] = "Predictive_HeatMap_More"
  4600. def add_view_data(self, learner, parameters=""):
  4601. model = self.get_learner(learner)
  4602. name = f"Le[{len(self.learner)}]View_data:{learner}"
  4603. # 生成学习器
  4604. args_use = self.learner_parameters(parameters, "View_data")
  4605. self.learner[name] = ViewData(learner=model, args_use=args_use)
  4606. self.data_type[name] = "View_data"
  4607. @plugin_class_loading(get_path(r"template/machinelearning"))
  4608. class MachineLearnerScore(MachineLearnerInit, metaclass=ABCMeta):
  4609. def score(self, name_x, name_y, learner): # Score_Only表示仅评分 Fit_Simp 是普遍类操作
  4610. model = self.get_learner(learner)
  4611. x = self.get_sheet(name_x)
  4612. y = self.get_sheet(name_y)
  4613. return model.score(x, y)
  4614. def model_evaluation(self, learner, save_dir, name_x, name_y, func=0): # 显示参数
  4615. x = self.get_sheet(name_x)
  4616. y = self.get_sheet(name_y)
  4617. if new_dir_global:
  4618. dic = save_dir + f"{os.sep}{learner}分类评分[CoTan]"
  4619. new_dic = dic
  4620. a = 0
  4621. while exists(new_dic): # 直到他不存在 —— False
  4622. new_dic = dic + f"[{a}]"
  4623. a += 1
  4624. mkdir(new_dic)
  4625. else:
  4626. new_dic = save_dir
  4627. model = self.get_learner(learner)
  4628. # 打包
  4629. func = [
  4630. model.class_score,
  4631. model.regression_score,
  4632. model.clusters_score][func]
  4633. save = func(new_dic, x, y)[0]
  4634. if tar_global:
  4635. Packing.pack(f"{new_dic}.tar.gz", new_dic)
  4636. return save, new_dic
  4637. def model_visualization(self, learner, save_dir): # 显示参数
  4638. if new_dir_global:
  4639. dic = save_dir + f"{os.sep}{learner}数据[CoTan]"
  4640. new_dic = dic
  4641. a = 0
  4642. while exists(new_dic): # 直到他不存在 —— False
  4643. new_dic = dic + f"[{a}]"
  4644. a += 1
  4645. mkdir(new_dic)
  4646. else:
  4647. new_dic = save_dir
  4648. model = self.get_learner(learner)
  4649. if (not (model.model is None) or not (
  4650. model.model is list)) and clf_global:
  4651. joblib.dump(model.model, new_dic + f"{os.sep}MODEL.model") # 保存模型
  4652. # 打包
  4653. save = model.data_visualization(new_dic)[0]
  4654. if tar_global:
  4655. Packing.pack(f"{new_dic}.tar.gz", new_dic)
  4656. return save, new_dic
  4657. @plugin_class_loading(get_path(r"template/machinelearning"))
  4658. class LearnerActions(MachineLearnerInit, metaclass=ABCMeta):
  4659. def fit_model(self, x_name, y_name, learner, split=0.3, *args, **kwargs):
  4660. x_data = self.get_sheet(x_name)
  4661. y_data = self.get_sheet(y_name)
  4662. model = self.get_learner(learner)
  4663. return model.fit_model(
  4664. x_data, y_data, split=split, x_name=x_name, add_func=self.add_form
  4665. )
  4666. def predict(self, x_name, learner, **kwargs):
  4667. x_data = self.get_sheet(x_name)
  4668. model = self.get_learner(learner)
  4669. y_data, name = model.predict(
  4670. x_data, x_name=x_name, add_func=self.add_form)
  4671. self.add_form(y_data, f"{x_name}:{name}")
  4672. return y_data
  4673. def set_global(
  4674. more=more_global,
  4675. all_=all_global,
  4676. csv=csv_global,
  4677. clf=clf_global,
  4678. tar=tar_global,
  4679. new=new_dir_global,
  4680. ):
  4681. global more_global, all_global, csv_global, clf_global, tar_global, new_dir_global
  4682. more_global = more # 是否使用全部特征绘图
  4683. all_global = all_ # 是否导出charts
  4684. csv_global = csv # 是否导出CSV
  4685. clf_global = clf # 是否导出模型
  4686. tar_global = tar # 是否打包tar
  4687. new_dir_global = new # 是否新建目录