template.py 188 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143
  1. import joblib
  2. import re
  3. import tarfile
  4. from abc import ABCMeta, abstractmethod
  5. from os import getcwd, mkdir
  6. from os.path import split as path_split, splitext, basename, exists
  7. import os
  8. import logging
  9. from sklearn.svm import SVC, SVR # SVC是svm分类,SVR是svm回归
  10. from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
  11. from sklearn.manifold import TSNE
  12. from sklearn.neural_network import MLPClassifier, MLPRegressor
  13. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as Lda
  14. from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA, NMF
  15. from sklearn.impute import SimpleImputer
  16. from sklearn.preprocessing import *
  17. from sklearn.feature_selection import *
  18. from sklearn.metrics import *
  19. from sklearn.ensemble import (
  20. RandomForestClassifier,
  21. RandomForestRegressor,
  22. GradientBoostingClassifier,
  23. GradientBoostingRegressor,
  24. )
  25. import numpy as np
  26. import matplotlib.pyplot as plt
  27. from pandas import DataFrame, read_csv
  28. from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz
  29. from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
  30. from sklearn.linear_model import *
  31. from sklearn.model_selection import train_test_split
  32. from scipy.fftpack import fft, ifft # 快速傅里叶变换
  33. from scipy import optimize
  34. from scipy.cluster.hierarchy import dendrogram, ward
  35. from pyecharts.components import Table as TableFisrt # 绘制表格
  36. from pyecharts.options.series_options import JsCode
  37. from pyecharts.charts import Tab as tab_First
  38. from pyecharts.charts import *
  39. from pyecharts import options as opts
  40. from pyecharts.components import Image
  41. from pyecharts.globals import CurrentConfig
  42. from system import plugin_class_loading, get_path, plugin_func_loading, basicConfig
  43. logging.basicConfig(**basicConfig)
  44. CurrentConfig.ONLINE_HOST = f"{getcwd()}{os.sep}assets{os.sep}"
  45. # 设置
  46. np.set_printoptions(threshold=np.inf)
  47. global_setting = dict(
  48. toolbox_opts=opts.ToolboxOpts(is_show=True),
  49. legend_opts=opts.LegendOpts(pos_bottom="3%", type_="scroll"),
  50. )
  51. global_not_legend = dict(
  52. toolbox_opts=opts.ToolboxOpts(is_show=True),
  53. legend_opts=opts.LegendOpts(is_show=False),
  54. )
  55. label_setting = dict(label_opts=opts.LabelOpts(is_show=False))
  56. more_global = False # 是否使用全部特征绘图
  57. all_global = True # 是否导出charts
  58. csv_global = True # 是否导出CSV
  59. clf_global = True # 是否导出模型
  60. tar_global = True # 是否打包tar
  61. new_dir_global = True # 是否新建目录
  62. class LearnBase(metaclass=ABCMeta):
  63. def __init__(self, *args, **kwargs):
  64. self.numpy_dict = {} # name:numpy
  65. self.fucn_add() # 制作Func_Dic
  66. def fucn_add(self):
  67. self.func_dict = {
  68. "abs": lambda x, y: np.abs(x),
  69. "sqrt": lambda x, y: np.sqrt(x),
  70. "pow": lambda x, y: x ** y,
  71. "loge": lambda x, y: np.log(x),
  72. "log10": lambda x, y: np.log10(x),
  73. "ceil": lambda x, y: np.ceil(x),
  74. "floor": lambda x, y: np.floor(x),
  75. "rint": lambda x, y: np.rint(x),
  76. "sin": lambda x, y: np.sin(x),
  77. "cos": lambda x, y: np.cos(x),
  78. "tan": lambda x, y: np.tan(x),
  79. "tanh": lambda x, y: np.tanh(x),
  80. "sinh": lambda x, y: np.sinh(x),
  81. "cosh": lambda x, y: np.cosh(x),
  82. "asin": lambda x, y: np.arcsin(x),
  83. "acos": lambda x, y: np.arccos(x),
  84. "atan": lambda x, y: np.arctan(x),
  85. "atanh": lambda x, y: np.arctanh(x),
  86. "asinh": lambda x, y: np.arcsinh(x),
  87. "acosh": lambda x, y: np.arccosh(x),
  88. "add": lambda x, y: x + y, # 矩阵或元素
  89. "sub": lambda x, y: x - y, # 矩阵或元素
  90. "mul": lambda x, y: np.multiply(x, y), # 元素级别
  91. "matmul": lambda x, y: np.matmul(x, y), # 矩阵
  92. "dot": lambda x, y: np.dot(x, y), # 矩阵
  93. "div": lambda x, y: x / y,
  94. "div_floor": lambda x, y: np.floor_divide(x, y),
  95. "power": lambda x, y: np.power(x, y), # 元素级
  96. }
  97. def get_form(self) -> dict:
  98. return self.numpy_dict.copy()
  99. def get_sheet(self, name) -> np.ndarray:
  100. return self.numpy_dict[name].copy()
  101. @abstractmethod
  102. def add_form(self, data, name):
  103. pass
  104. @plugin_class_loading(get_path(r"template/machinelearning"))
  105. class LearnerIO(LearnBase):
  106. def add_form(self, data: np.array, name):
  107. name = f"{name}[{len(self.numpy_dict)}]"
  108. self.numpy_dict[name] = data
  109. def read_csv(
  110. self,
  111. file_dir,
  112. name,
  113. encoding="utf-8",
  114. str_must=False,
  115. sep=","):
  116. dtype = np.str if str_must else np.float
  117. dataframe = read_csv(
  118. file_dir,
  119. encoding=encoding,
  120. delimiter=sep,
  121. header=None)
  122. try:
  123. data = dataframe.to_numpy(dtype=dtype)
  124. except ValueError:
  125. data = dataframe.to_numpy(dtype=np.str)
  126. if data.ndim == 1:
  127. data = np.expand_dims(data, axis=1)
  128. self.add_form(data, name)
  129. return data
  130. def add_python(self, python_file, sheet_name):
  131. name = {}
  132. name.update(globals().copy())
  133. name.update(locals().copy())
  134. exec(python_file, name)
  135. exec("get = Creat()", name)
  136. if isinstance(name["get"], np.array):
  137. get = name["get"]
  138. else:
  139. get = np.array(name["get"])
  140. self.add_form(get, sheet_name)
  141. return get
  142. def to_csv(self, save_dir: str, name, sep) -> str:
  143. get: np.ndarray = self.get_sheet(name)
  144. np.savetxt(save_dir, get, delimiter=sep)
  145. return save_dir
  146. def to_html_one(self, name, html_dir=""):
  147. if html_dir == "":
  148. html_dir = f"{name}.html"
  149. get: np.ndarray = self.get_sheet(name)
  150. if get.ndim == 1:
  151. get = np.expand_dims(get, axis=1)
  152. get: list = get.tolist()
  153. for i in range(len(get)):
  154. get[i] = [i + 1] + get[i]
  155. headers = [i for i in range(len(get[0]))]
  156. table = TableFisrt()
  157. table.add(headers, get).set_global_opts(
  158. title_opts=opts.ComponentTitleOpts(
  159. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  160. )
  161. )
  162. table.render(html_dir)
  163. return html_dir
  164. def to_html(self, name, html_dir="", html_type=0):
  165. if html_dir == "":
  166. html_dir = f"{name}.html"
  167. # 把要画的sheet放到第一个
  168. sheet_dict = self.get_form()
  169. del sheet_dict[name]
  170. sheet_list = [name] + list(sheet_dict.keys())
  171. class TabBase:
  172. def __init__(self, q):
  173. self.tab = q # 一个Tab
  174. def render(self, render_dir):
  175. return self.tab.render(render_dir)
  176. # 生成一个显示页面
  177. if html_type == 0:
  178. class NewTab(TabBase):
  179. def add(self, table_, k, *f):
  180. self.tab.add(table_, k)
  181. tab = NewTab(tab_First(page_title="CoTan:查看表格")) # 一个Tab
  182. elif html_type == 1:
  183. class NewTab(TabBase):
  184. def add(self, table_, *k):
  185. self.tab.add(table_)
  186. tab = NewTab(
  187. Page(
  188. page_title="CoTan:查看表格",
  189. layout=Page.DraggablePageLayout))
  190. else:
  191. class NewTab(TabBase):
  192. def add(self, table_, *k):
  193. self.tab.add(table_)
  194. tab = NewTab(
  195. Page(
  196. page_title="CoTan:查看表格",
  197. layout=Page.SimplePageLayout))
  198. # 迭代添加内容
  199. for name in sheet_list:
  200. get: np.ndarray = self.get_sheet(name)
  201. if get.ndim == 1:
  202. get = np.expand_dims(get, axis=1)
  203. get: list = get.tolist()
  204. for i in range(len(get)):
  205. get[i] = [i + 1] + get[i]
  206. headers = [i for i in range(len(get[0]))]
  207. table = TableFisrt()
  208. table.add(headers, get).set_global_opts(
  209. title_opts=opts.ComponentTitleOpts(
  210. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  211. )
  212. )
  213. tab.add(table, f"表格:{name}")
  214. tab.render(html_dir)
  215. return html_dir
  216. @plugin_class_loading(get_path(r"template/machinelearning"))
  217. class LearnerMerge(LearnBase, metaclass=ABCMeta):
  218. def merge(self, name, axis=0): # aiis:0-横向合并(hstack),1-纵向合并(vstack),2-深度合并
  219. sheet_list = []
  220. for i in name:
  221. sheet_list.append(self.get_sheet(i))
  222. get = {0: np.hstack, 1: np.vstack, 2: np.dstack}[axis](sheet_list)
  223. self.add_form(np.array(get), f"{name[0]}合成")
  224. @plugin_class_loading(get_path(r"template/machinelearning"))
  225. class LearnerSplit(LearnBase, metaclass=ABCMeta):
  226. def split(self, name, split=2, axis=0): # aiis:0-横向分割(hsplit),1-纵向分割(vsplit)
  227. sheet = self.get_sheet(name)
  228. get = {0: np.hsplit, 1: np.vsplit, 2: np.dsplit}[axis](sheet, split)
  229. for i in get:
  230. self.add_form(i, f"{name[0]}分割")
  231. def two_split(self, name, split, axis): # 二分切割(0-横向,1-纵向)
  232. sheet = self.get_sheet(name)
  233. try:
  234. split = float(eval(split))
  235. if split < 1:
  236. split = int(split * len(sheet) if axis == 1 else len(sheet[0]))
  237. else:
  238. assert False
  239. except (ValueError, AssertionError):
  240. split = int(split)
  241. if axis == 0:
  242. self.add_form(sheet[:, split:], f"{name[0]}分割")
  243. self.add_form(sheet[:, :split], f"{name[0]}分割")
  244. @plugin_class_loading(get_path(r"template/machinelearning"))
  245. class LearnerDimensions(LearnBase, metaclass=ABCMeta):
  246. @staticmethod
  247. def deep(sheet: np.ndarray):
  248. return sheet.ravel()
  249. @staticmethod
  250. def down_ndim(sheet: np.ndarray): # 横向
  251. down_list = []
  252. for i in sheet:
  253. down_list.append(i.ravel())
  254. return np.array(down_list)
  255. @staticmethod
  256. def longitudinal_down_ndim(sheet: np.ndarray): # 纵向
  257. down_list = []
  258. for i in range(len(sheet[0])):
  259. down_list.append(sheet[:, i].ravel())
  260. return np.array(down_list).T
  261. def reval(self, name, axis): # axis:0-横向,1-纵向(带.T),2-深度
  262. sheet = self.get_sheet(name)
  263. self.add_form(
  264. {0: self.down_ndim, 1: self.longitudinal_down_ndim, 2: self.deep}[axis](
  265. sheet
  266. ).copy(),
  267. f"{name}伸展",
  268. )
  269. def del_ndim(self, name): # 删除无用维度
  270. sheet = self.get_sheet(name)
  271. self.add_form(np.squeeze(sheet), f"{name}降维")
  272. @plugin_class_loading(get_path(r"template/machinelearning"))
  273. class LearnerShape(LearnBase, metaclass=ABCMeta):
  274. def transpose(self, name, func: list):
  275. sheet = self.get_sheet(name)
  276. if sheet.ndim <= 2:
  277. self.add_form(sheet.transpose().copy(), f"{name}.T")
  278. else:
  279. self.add_form(np.transpose(sheet, func).copy(), f"{name}.T")
  280. def reshape(self, name, shape: list):
  281. sheet = self.get_sheet(name)
  282. self.add_form(sheet.reshape(shape).copy(), f"{name}.r")
  283. @plugin_class_loading(get_path(r"template/machinelearning"))
  284. class Calculation(LearnBase, metaclass=ABCMeta):
  285. def calculation_matrix(self, data, data_type, func):
  286. if 1 not in data_type:
  287. raise Exception
  288. func = self.func_dict.get(func, lambda x, y: x)
  289. args_data = []
  290. for i in range(len(data)):
  291. if data_type[i] == 0:
  292. args_data.append(data[i])
  293. else:
  294. args_data.append(self.get_sheet(data[i]))
  295. get = func(*args_data)
  296. self.add_form(get, f"{func}({data[0]},{data[1]})")
  297. return get
  298. class Machinebase(metaclass=ABCMeta): # 学习器的基类
  299. def __init__(self, *args, **kwargs):
  300. self.model = None
  301. self.have_fit = False
  302. self.have_predict = False
  303. self.x_traindata = None
  304. self.y_traindata = None
  305. # 有监督学习专有的testData
  306. self.x_testdata = None
  307. self.y_testdata = None
  308. # 记录这两个是为了克隆
  309. @abstractmethod
  310. def fit_model(self, x_data, y_data, split, increment, kwargs):
  311. pass
  312. @abstractmethod
  313. def score(self, x_data, y_data):
  314. pass
  315. @abstractmethod
  316. def class_score(self, save_dir, x_data, y_really):
  317. pass
  318. @staticmethod
  319. def _accuracy(y_predict, y_really): # 准确率
  320. return accuracy_score(y_really, y_predict)
  321. @staticmethod
  322. def _macro(y_predict, y_really, func_num=0):
  323. func = [recall_score, precision_score, f1_score] # 召回率,精确率和f1
  324. class_ = np.unique(y_really).tolist()
  325. result = func[func_num](y_really, y_predict, class_, average=None)
  326. return result, class_
  327. @staticmethod
  328. def _confusion_matrix(y_predict, y_really): # 混淆矩阵
  329. class_ = np.unique(y_really).tolist()
  330. return confusion_matrix(y_really, y_predict), class_
  331. @staticmethod
  332. def _kappa_score(y_predict, y_really):
  333. return cohen_kappa_score(y_really, y_predict)
  334. @abstractmethod
  335. def regression_score(self, save_dir, x_data, y_really):
  336. pass
  337. @abstractmethod
  338. def clusters_score(self, save_dir, x_data, args):
  339. pass
  340. @staticmethod
  341. def _mse(y_predict, y_really): # 均方误差
  342. return mean_squared_error(y_really, y_predict)
  343. @staticmethod
  344. def _mae(y_predict, y_really): # 中值绝对误差
  345. return median_absolute_error(y_really, y_predict)
  346. @staticmethod
  347. def _r2_score(y_predict, y_really): # 中值绝对误差
  348. return r2_score(y_really, y_predict)
  349. def _rmse(self, y_predict, y_really): # 中值绝对误差
  350. return self._mse(y_predict, y_really) ** 0.5
  351. @staticmethod
  352. def _coefficient_clustering(x_data, y_predict):
  353. means_score = silhouette_score(x_data, y_predict)
  354. outline_score = silhouette_samples(x_data, y_predict)
  355. return means_score, outline_score
  356. @abstractmethod
  357. def predict(self, x_data, args, kwargs):
  358. pass
  359. @abstractmethod
  360. def data_visualization(self, save_dir, args, kwargs):
  361. pass
  362. @plugin_class_loading(get_path(r"template/machinelearning"))
  363. class StudyMachinebase(Machinebase):
  364. def fit_model(self, x_data, y_data, split=0.3, increment=True, **kwargs):
  365. y_data = y_data.ravel()
  366. try:
  367. assert not self.x_traindata is None or not increment
  368. self.x_traindata = np.vstack((x_data, self.x_traindata))
  369. self.y_traindata = np.vstack((y_data, self.y_traindata))
  370. except (AssertionError, ValueError):
  371. self.x_traindata = x_data.copy()
  372. self.y_traindata = y_data.copy()
  373. x_train, x_test, y_train, y_test = train_test_split(
  374. x_data, y_data, test_size=split
  375. )
  376. try: # 增量式训练
  377. assert increment
  378. self.model.partial_fit(x_data, y_data)
  379. except (AssertionError, AttributeError):
  380. self.model.fit(self.x_traindata, self.y_traindata)
  381. train_score = self.model.score(x_train, y_train)
  382. test_score = self.model.score(x_test, y_test)
  383. self.have_fit = True
  384. return train_score, test_score
  385. def score(self, x_data, y_data):
  386. score = self.model.score(x_data, y_data)
  387. return score
  388. def class_score(self, save_dir, x_data: np.ndarray, y_really: np.ndarray):
  389. y_really: np.ndarray = y_really.ravel()
  390. y_predict: np.ndarray = self.predict(x_data)[0]
  391. accuracy = self._accuracy(y_predict, y_really)
  392. recall, class_list = self._macro(y_predict, y_really, 0)
  393. precision, class_list = self._macro(y_predict, y_really, 1)
  394. f1, class_list = self._macro(y_predict, y_really, 2)
  395. confusion_matrix_, class_list = self._confusion_matrix(
  396. y_predict, y_really)
  397. kappa = self._kappa_score(y_predict, y_really)
  398. class_list: list
  399. tab = Tab()
  400. def gauge_base(name: str, value_: float) -> Gauge:
  401. c = (
  402. Gauge()
  403. .add("", [(name, round(value_ * 100, 2))], min_=0, max_=100)
  404. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  405. )
  406. return c
  407. tab.add(gauge_base("准确率", accuracy), "准确率")
  408. tab.add(gauge_base("kappa", kappa), "kappa")
  409. def bar_base(name, value_) -> Bar:
  410. c = (
  411. Bar()
  412. .add_xaxis(class_list)
  413. .add_yaxis(name, value_, **label_setting)
  414. .set_global_opts(
  415. title_opts=opts.TitleOpts(title=name), **global_setting
  416. )
  417. )
  418. return c
  419. tab.add(bar_base("精确率", precision.tolist()), "精确率")
  420. tab.add(bar_base("召回率", recall.tolist()), "召回率")
  421. tab.add(bar_base("F1", f1.tolist()), "F1")
  422. def heatmap_base(name, value_, max_, min_, show) -> HeatMap:
  423. c = (
  424. HeatMap()
  425. .add_xaxis(class_list)
  426. .add_yaxis(
  427. name,
  428. class_list,
  429. value_,
  430. label_opts=opts.LabelOpts(is_show=show, position="inside"),
  431. )
  432. .set_global_opts(
  433. title_opts=opts.TitleOpts(title=name),
  434. **global_setting,
  435. visualmap_opts=opts.VisualMapOpts(
  436. max_=max_, min_=min_, pos_right="3%"
  437. ),
  438. )
  439. )
  440. return c
  441. value = [
  442. [class_list[i], class_list[j], float(confusion_matrix_[i, j])]
  443. for i in range(len(class_list))
  444. for j in range(len(class_list))
  445. ]
  446. tab.add(
  447. heatmap_base(
  448. "混淆矩阵",
  449. value,
  450. float(confusion_matrix_.max()),
  451. float(confusion_matrix_.min()),
  452. len(class_list) < 7,
  453. ),
  454. "混淆矩阵",
  455. )
  456. des_to_csv(save_dir, "混淆矩阵", confusion_matrix_, class_list, class_list)
  457. des_to_csv(
  458. save_dir, "评分", [
  459. precision, recall, f1], class_list, [
  460. "精确率", "召回率", "F1"])
  461. save = save_dir + rf"{os.sep}分类模型评估.HTML"
  462. tab.render(save)
  463. return save,
  464. def regression_score(
  465. self,
  466. save_dir,
  467. x_data: np.ndarray,
  468. y_really: np.ndarray):
  469. y_really = y_really.ravel()
  470. y_predict = self.predict(x_data)[0]
  471. tab = Tab()
  472. mse = self._mse(y_predict, y_really)
  473. mae = self._mae(y_predict, y_really)
  474. r2_score_ = self._r2_score(y_predict, y_really)
  475. rmse = self._rmse(y_predict, y_really)
  476. tab.add(make_tab(["MSE", "MAE", "RMSE", "r2_Score"], [
  477. [mse, mae, rmse, r2_score_]]), "评估数据", )
  478. save = save_dir + rf"{os.sep}回归模型评估.HTML"
  479. tab.render(save)
  480. return save,
  481. def clusters_score(self, save_dir, x_data: np.ndarray, *args):
  482. y_predict = self.predict(x_data)[0]
  483. tab = Tab()
  484. coefficient, coefficient_array = self._coefficient_clustering(
  485. x_data, y_predict)
  486. def gauge_base(name: str, value: float) -> Gauge:
  487. c = (
  488. Gauge()
  489. .add(
  490. "",
  491. [(name, round(value * 100, 2))],
  492. min_=0,
  493. max_=10 ** (judging_digits(value * 100)),
  494. )
  495. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  496. )
  497. return c
  498. def bar_base(name, value, xaxis) -> Bar:
  499. c = (
  500. Bar()
  501. .add_xaxis(xaxis)
  502. .add_yaxis(name, value, **label_setting)
  503. .set_global_opts(
  504. title_opts=opts.TitleOpts(title=name), **global_setting
  505. )
  506. )
  507. return c
  508. tab.add(gauge_base("平均轮廓系数", coefficient), "平均轮廓系数")
  509. def bar_(coefficient_array_, name="数据轮廓系数"):
  510. xaxis = [f"数据{i}" for i in range(len(coefficient_array_))]
  511. value = coefficient_array_.tolist()
  512. tab.add(bar_base(name, value, xaxis), name)
  513. n = 20
  514. if len(coefficient_array) <= n:
  515. bar_(coefficient_array)
  516. elif len(coefficient_array) <= n ** 2:
  517. a = 0
  518. while a <= len(coefficient_array):
  519. b = a + n
  520. if b >= len(coefficient_array):
  521. b = len(coefficient_array) + 1
  522. cofe_array = coefficient_array[a:b]
  523. bar_(cofe_array, f"{a}-{b}数据轮廓系数")
  524. a += n
  525. else:
  526. split = np.hsplit(coefficient_array, n)
  527. a = 0
  528. for cofe_array in split:
  529. bar_(cofe_array, f"{a}%-{a + n}%数据轮廓系数")
  530. a += n
  531. save = save_dir + rf"{os.sep}聚类模型评估.HTML"
  532. tab.render(save)
  533. return save,
  534. def predict(self, x_data, *args, **kwargs):
  535. self.x_testdata = x_data.copy()
  536. y_predict = self.model.predict(x_data,)
  537. self.y_testdata = y_predict.copy()
  538. self.have_predict = True
  539. return y_predict, "预测"
  540. def data_visualization(self, save_dir, *args, **kwargs):
  541. return save_dir,
  542. class PrepBase(StudyMachinebase): # 不允许第二次训练
  543. def __init__(self, *args, **kwargs):
  544. super(PrepBase, self).__init__(*args, **kwargs)
  545. self.model = None
  546. def fit_model(self, x_data, y_data, increment=True, *args, **kwargs):
  547. if not self.have_predict: # 不允许第二次训练
  548. y_data = y_data.ravel()
  549. try:
  550. assert not self.x_traindata is None or not increment
  551. self.x_traindata = np.vstack((x_data, self.x_traindata))
  552. self.y_traindata = np.vstack((y_data, self.y_traindata))
  553. except (AssertionError, ValueError):
  554. self.x_traindata = x_data.copy()
  555. self.y_traindata = y_data.copy()
  556. try: # 增量式训练
  557. assert increment
  558. self.model.partial_fit(x_data, y_data)
  559. except (AssertionError, AttributeError):
  560. self.model.fit(self.x_traindata, self.y_traindata)
  561. self.have_fit = True
  562. return "None", "None"
  563. def predict(self, x_data, *args, **kwargs):
  564. self.x_testdata = x_data.copy()
  565. x_predict = self.model.transform(x_data)
  566. self.y_testdata = x_predict.copy()
  567. self.have_predict = True
  568. return x_predict, "特征工程"
  569. def score(self, x_data, y_data):
  570. return "None" # 没有score
  571. class Unsupervised(PrepBase): # 无监督,不允许第二次训练
  572. def fit_model(self, x_data, increment=True, *args, **kwargs):
  573. if not self.have_predict: # 不允许第二次训练
  574. self.y_traindata = None
  575. try:
  576. assert not self.x_traindata is None or not increment
  577. self.x_traindata = np.vstack((x_data, self.x_traindata))
  578. except (AssertionError, ValueError):
  579. self.x_traindata = x_data.copy()
  580. try: # 增量式训练
  581. assert increment
  582. self.model.partial_fit(x_data)
  583. except (AssertionError, AttributeError):
  584. self.model.fit(self.x_traindata, self.y_traindata)
  585. self.have_fit = True
  586. return "None", "None"
  587. class UnsupervisedModel(PrepBase): # 无监督
  588. def fit_model(self, x_data, increment=True, *args, **kwargs):
  589. self.y_traindata = None
  590. try:
  591. assert not self.x_traindata is None or not increment
  592. self.x_traindata = np.vstack((x_data, self.x_traindata))
  593. except (AssertionError, ValueError):
  594. self.x_traindata = x_data.copy()
  595. try: # 增量式训练
  596. if not increment:
  597. raise Exception
  598. self.model.partial_fit(x_data)
  599. except (AssertionError, AttributeError):
  600. self.model.fit(self.x_traindata, self.y_traindata)
  601. self.have_fit = True
  602. return "None", "None"
  603. @plugin_class_loading(get_path(r"template/machinelearning"))
  604. class ToPyebase(StudyMachinebase):
  605. def __init__(self, model, *args, **kwargs):
  606. super(ToPyebase, self).__init__(*args, **kwargs)
  607. self.model = None
  608. # 记录这两个是为了克隆
  609. self.k = {}
  610. self.model_Name = model
  611. def fit_model(self, x_data, y_data, *args, **kwargs):
  612. self.x_traindata = x_data.copy()
  613. self.y_traindata = y_data.ravel().copy()
  614. self.have_fit = True
  615. return "None", "None"
  616. def predict(self, x_data, *args, **kwargs):
  617. self.have_predict = True
  618. return np.array([]), "请使用训练"
  619. def score(self, x_data, y_data):
  620. return "None" # 没有score
  621. class DataAnalysis(ToPyebase): # 数据分析
  622. def data_visualization(self, save_dir, *args, **kwargs):
  623. tab = Tab()
  624. data = self.x_traindata
  625. def cumulative_calculation(tab_data, func, name, render_tab):
  626. sum_list = []
  627. for i in range(len(tab_data)): # 按行迭代数据
  628. sum_list.append([])
  629. for a in range(len(tab_data[i])):
  630. s = num_str(func(tab_data[: i + 1, a]), 8)
  631. sum_list[-1].append(s)
  632. des_to_csv(save_dir, f"{name}", sum_list)
  633. render_tab.add(
  634. make_tab([f"[{i}]" for i in range(len(sum_list[0]))], sum_list),
  635. f"{name}",
  636. )
  637. def geometric_mean(x):
  638. return np.power(np.prod(x), 1 / len(x)) # 几何平均数
  639. def square_mean(x):
  640. return np.sqrt(np.sum(np.power(x, 2)) / len(x)) # 平方平均数
  641. def harmonic_mean(x):
  642. return len(x) / np.sum(np.power(x, -1)) # 调和平均数
  643. cumulative_calculation(data, np.sum, "累计求和", tab)
  644. cumulative_calculation(data, np.var, "累计方差", tab)
  645. cumulative_calculation(data, np.std, "累计标准差", tab)
  646. cumulative_calculation(data, np.mean, "累计算术平均值", tab)
  647. cumulative_calculation(data, geometric_mean, "累计几何平均值", tab)
  648. cumulative_calculation(data, square_mean, "累计平方平均值", tab)
  649. cumulative_calculation(data, harmonic_mean, "累计调和平均值", tab)
  650. cumulative_calculation(data, np.median, "累计中位数", tab)
  651. cumulative_calculation(data, np.max, "累计最大值", tab)
  652. cumulative_calculation(data, np.min, "累计最小值", tab)
  653. save = save_dir + rf"{os.sep}数据分析.HTML"
  654. tab.render(save) # 生成HTML
  655. return save,
  656. class Corr(ToPyebase): # 相关性和协方差
  657. def data_visualization(self, save_dir, *args, **kwargs):
  658. tab = Tab()
  659. data = DataFrame(self.x_traindata)
  660. corr: np.ndarray = data.corr().to_numpy() # 相关性
  661. cov: np.ndarray = data.cov().to_numpy() # 协方差
  662. def heat_map(data_, name: str, max_, min_):
  663. x = [f"特征[{i}]" for i in range(len(data_))]
  664. y = [f"特征[{i}]" for i in range(len(data_[0]))]
  665. value = [
  666. (f"特征[{i}]", f"特征[{j}]", float(data_[i][j]))
  667. for i in range(len(data_))
  668. for j in range(len(data_[i]))
  669. ]
  670. c = (
  671. HeatMap()
  672. .add_xaxis(x)
  673. # 如果特征太多则不显示标签
  674. .add_yaxis(
  675. f"数据",
  676. y,
  677. value,
  678. label_opts=opts.LabelOpts(
  679. is_show=True if len(x) <= 10 else False, position="inside"
  680. ),
  681. )
  682. .set_global_opts(
  683. title_opts=opts.TitleOpts(title="矩阵热力图"),
  684. **global_not_legend,
  685. yaxis_opts=opts.AxisOpts(
  686. is_scale=True, type_="category"
  687. ), # 'category'
  688. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  689. visualmap_opts=opts.VisualMapOpts(
  690. is_show=True, max_=max_, min_=min_, pos_right="3%"
  691. ),
  692. ) # 显示
  693. )
  694. tab.add(c, name)
  695. heat_map(corr, "相关性热力图", 1, -1)
  696. heat_map(cov, "协方差热力图", float(cov.max()), float(cov.min()))
  697. des_to_csv(save_dir, f"相关性矩阵", corr)
  698. des_to_csv(save_dir, f"协方差矩阵", cov)
  699. save = save_dir + rf"{os.sep}数据相关性.HTML"
  700. tab.render(save) # 生成HTML
  701. return save,
  702. class ViewData(ToPyebase): # 绘制预测型热力图
  703. def __init__(
  704. self, args_use, learner, *args, **kwargs
  705. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  706. super(ViewData, self).__init__(args_use, learner, *args, **kwargs)
  707. self.model = learner.Model
  708. self.Select_Model = None
  709. self.have_fit = learner.have_Fit
  710. self.model_Name = "Select_Model"
  711. self.learner = learner
  712. self.learner_name = learner.Model_Name
  713. def fit_model(self, *args, **kwargs):
  714. self.have_fit = True
  715. return "None", "None"
  716. def predict(self, x_data, add_func=None, *args, **kwargs):
  717. x_traindata = self.learner.x_traindata
  718. y_traindata = self.learner.y_traindata
  719. x_name = self.learner_name
  720. if x_traindata is not None:
  721. add_func(x_traindata, f"{x_name}:x训练数据")
  722. try:
  723. x_testdata = self.x_testdata
  724. if x_testdata is not None:
  725. add_func(x_testdata, f"{x_name}:x测试数据")
  726. except BaseException as e:
  727. logging.warning(str(e))
  728. try:
  729. y_testdata = self.y_testdata.copy()
  730. if y_testdata is not None:
  731. add_func(y_testdata, f"{x_name}:y测试数据")
  732. except BaseException as e:
  733. logging.warning(str(e))
  734. self.have_fit = True
  735. if y_traindata is None:
  736. return np.array([]), "y训练数据"
  737. return y_traindata, "y训练数据"
  738. def data_visualization(self, save_dir, *args, **kwargs):
  739. return save_dir,
  740. class MatrixScatter(ToPyebase): # 矩阵散点图
  741. def data_visualization(self, save_dir, *args, **kwargs):
  742. tab = Tab()
  743. data = self.x_traindata
  744. if data.ndim <= 2: # 维度为2
  745. c = (
  746. Scatter()
  747. .add_xaxis([f"{i}" for i in range(data.shape[1])])
  748. .set_global_opts(
  749. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  750. )
  751. )
  752. if data.ndim == 2:
  753. for num in range(len(data)):
  754. i = data[num]
  755. c.add_yaxis(f"{num}", [[f"{num}", x]
  756. for x in i], color="#FFFFFF")
  757. else:
  758. c.add_yaxis(f"0", [[0, x] for x in data], color="#FFFFFF")
  759. c.set_series_opts(
  760. label_opts=opts.LabelOpts(
  761. is_show=True,
  762. color="#000000",
  763. position="inside",
  764. formatter=JsCode("function(params){return params.data[2];}"),
  765. ))
  766. elif data.ndim == 3:
  767. c = Scatter3D().set_global_opts(
  768. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  769. )
  770. for num in range(len(data)):
  771. i = data[num]
  772. for s_num in range(len(i)):
  773. s = i[s_num]
  774. y_data = [[num, s_num, x, float(s[x])]
  775. for x in range(len(s))]
  776. c.add(
  777. f"{num}",
  778. y_data,
  779. zaxis3d_opts=opts.Axis3DOpts(
  780. type_="category"))
  781. c.set_series_opts(
  782. label_opts=opts.LabelOpts(
  783. is_show=True,
  784. color="#000000",
  785. position="inside",
  786. formatter=JsCode("function(params){return params.data[3];}"),
  787. ))
  788. else:
  789. c = Scatter()
  790. tab.add(c, "矩阵散点图")
  791. save = save_dir + rf"{os.sep}矩阵散点图.HTML"
  792. tab.render(save) # 生成HTML
  793. return save,
  794. class ClusterTree(ToPyebase): # 聚类树状图
  795. def data_visualization(self, save_dir, *args, **kwargs):
  796. tab = Tab()
  797. x_data = self.x_traindata
  798. linkage_array = ward(x_data) # self.y_traindata是结果
  799. dendrogram(linkage_array)
  800. plt.savefig(save_dir + rf"{os.sep}Cluster_graph.png")
  801. image = Image()
  802. image.add(src=save_dir + rf"{os.sep}Cluster_graph.png",).set_global_opts(
  803. title_opts=opts.ComponentTitleOpts(title="聚类树状图")
  804. )
  805. tab.add(image, "聚类树状图")
  806. save = save_dir + rf"{os.sep}聚类树状图.HTML"
  807. tab.render(save) # 生成HTML
  808. return save,
  809. class ClassBar(ToPyebase): # 类型柱状图
  810. def data_visualization(self, save_dir, *args, **kwargs):
  811. tab = Tab()
  812. x_data: np.ndarray = self.x_traindata.transpose()
  813. y_data: np.ndarray = self.y_traindata
  814. class_: list = np.unique(y_data).tolist() # 类型
  815. class_list = []
  816. for n_class in class_: # 生成class_list(class是1,,也就是二维的,下面会压缩成一维)
  817. class_list.append(y_data == n_class)
  818. for num_i in range(len(x_data)): # 迭代每一个特征
  819. i = x_data[num_i]
  820. i_con = is_continuous(i)
  821. if i_con and len(i) >= 11:
  822. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  823. c_list = [[0] * 10 for _ in class_list]
  824. start = i.min()
  825. end = i.max()
  826. n = (end - start) / 10 # 生成10条柱子
  827. x_axis = [] # x轴
  828. iter_num = 0 # 迭代到第n个
  829. while iter_num <= 9: # 把每个特征分为10类进行迭代
  830. # x_axis添加数据
  831. x_axis.append(
  832. f"({iter_num})[{round(start, 2)}-"
  833. f"{round((start + n) if (start + n) <= end or not iter_num == 9 else end, 2)}]")
  834. try:
  835. assert not iter_num == 9 # 执行到第10次时,直接获取剩下的所有
  836. s = (start <= i) == (i < end) # 布尔索引
  837. except AssertionError: # 因为start + n有超出end的风险
  838. s = (start <= i) == (i <= end) # 布尔索引
  839. # n_data = i[s] # 取得现在的特征数据
  840. for num in range(len(class_list)): # 根据类别进行迭代
  841. # 取得布尔数组:y_data == n_class也就是输出值为指定类型的bool矩阵,用于切片
  842. now_class: list = class_list[num]
  843. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  844. bool_class = now_class[s].ravel()
  845. # 用len计数 c_list = [[class1的数据],[class2的数据],[]]
  846. c_list[num][iter_num] = int(np.sum(bool_class))
  847. iter_num += 1
  848. start += n
  849. else:
  850. iter_np = np.unique(i)
  851. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  852. c_list = [[0] * len(iter_np) for _ in class_list]
  853. x_axis = [] # 添加x轴数据
  854. for i_num in range(len(iter_np)): # 迭代每一个i(不重复)
  855. i_data = iter_np[i_num]
  856. # n_data= i[i == i_data]#取得现在特征数据
  857. x_axis.append(f"[{i_data}]")
  858. for num in range(len(class_list)): # 根据类别进行迭代
  859. now_class = class_list[num] # 取得class_list的布尔数组
  860. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  861. bool_class = now_class[i == i_data]
  862. # 用len计数 c_list = [[class1的数据],[class2的数据],[]]
  863. c_list[num][i_num] = len(np.sum(bool_class).tolist())
  864. c = (
  865. Bar()
  866. .add_xaxis(x_axis)
  867. .set_global_opts(
  868. title_opts=opts.TitleOpts(title="类型-特征统计柱状图"),
  869. **global_setting,
  870. xaxis_opts=opts.AxisOpts(type_="category"),
  871. yaxis_opts=opts.AxisOpts(type_="value"),
  872. )
  873. )
  874. y_axis = []
  875. for i in range(len(c_list)):
  876. y_axis.append(f"{class_[i]}")
  877. c.add_yaxis(f"{class_[i]}", c_list[i], **label_setting)
  878. des_to_csv(
  879. save_dir,
  880. f"类型-[{num_i}]特征统计柱状图",
  881. c_list,
  882. x_axis,
  883. y_axis)
  884. tab.add(c, f"类型-[{num_i}]特征统计柱状图")
  885. # 未完成
  886. save = save_dir + rf"{os.sep}特征统计.HTML"
  887. tab.render(save) # 生成HTML
  888. return save,
  889. class NumpyHeatMap(ToPyebase): # Numpy矩阵绘制热力图
  890. def data_visualization(self, save_dir, *args, **kwargs):
  891. tab = Tab()
  892. data = self.x_traindata
  893. x = [f"横[{i}]" for i in range(len(data))]
  894. y = [f"纵[{i}]" for i in range(len(data[0]))]
  895. value = [
  896. (f"横[{i}]", f"纵[{j}]", float(data[i][j]))
  897. for i in range(len(data))
  898. for j in range(len(data[i]))
  899. ]
  900. c = (
  901. HeatMap()
  902. .add_xaxis(x)
  903. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  904. .set_global_opts(
  905. title_opts=opts.TitleOpts(title="矩阵热力图"),
  906. **global_not_legend,
  907. yaxis_opts=opts.AxisOpts(
  908. is_scale=True, type_="category"), # 'category'
  909. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  910. visualmap_opts=opts.VisualMapOpts(
  911. is_show=True,
  912. max_=float(data.max()),
  913. min_=float(data.min()),
  914. pos_right="3%",
  915. ),
  916. ) # 显示
  917. )
  918. tab.add(c, "矩阵热力图")
  919. tab.add(make_tab(x, data.transpose().tolist()), f"矩阵热力图:表格")
  920. save = save_dir + rf"{os.sep}矩阵热力图.HTML"
  921. tab.render(save) # 生成HTML
  922. return save,
  923. class PredictiveHeatmapBase(ToPyebase): # 绘制预测型热力图
  924. def __init__(
  925. self, args_use, learner, *args, **kwargs
  926. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  927. super(
  928. PredictiveHeatmapBase,
  929. self).__init__(
  930. args_use,
  931. learner,
  932. *
  933. args,
  934. **kwargs)
  935. self.model = learner.Model
  936. self.select_model = None
  937. self.have_fit = learner.have_Fit
  938. self.model_Name = "Select_Model"
  939. self.learner = learner
  940. self.x_traindata = learner.x_traindata.copy()
  941. self.y_traindata = learner.y_traindata.copy()
  942. self.means = []
  943. def fit_model(self, x_data, *args, **kwargs):
  944. try:
  945. self.means = x_data.ravel()
  946. except BaseException as e:
  947. logging.warning(str(e))
  948. self.have_fit = True
  949. return "None", "None"
  950. def data_visualization(
  951. self,
  952. save_dir,
  953. decision_boundary_func=None,
  954. prediction_boundary_func=None,
  955. *args,
  956. **kwargs,
  957. ):
  958. tab = Tab()
  959. y = self.y_traindata
  960. x_data = self.x_traindata
  961. try: # 如果没有class
  962. class_ = self.model.classes_.tolist()
  963. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  964. # 获取数据
  965. get, x_means, x_range, data_type = training_visualization(
  966. x_data, class_, y)
  967. # 可使用自带的means,并且nan表示跳过
  968. for i in range(min([len(x_means), len(self.means)])):
  969. try:
  970. g = self.means[i]
  971. if g == np.nan:
  972. raise Exception
  973. x_means[i] = g
  974. except BaseException as e:
  975. logging.warning(str(e))
  976. get = decision_boundary_func(
  977. x_range, x_means, self.learner.predict, class_, data_type
  978. )
  979. for i in range(len(get)):
  980. tab.add(get[i], f"{i}预测热力图")
  981. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  982. data = class_ + [f"{i}" for i in x_means]
  983. c = Table().add(headers=heard, rows=[data])
  984. tab.add(c, "数据表")
  985. except AttributeError:
  986. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  987. get = prediction_boundary_func(
  988. x_range, x_means, self.learner.predict, data_type
  989. )
  990. for i in range(len(get)):
  991. tab.add(get[i], f"{i}预测热力图")
  992. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  993. data = [f"{i}" for i in x_means]
  994. c = Table().add(headers=heard, rows=[data])
  995. tab.add(c, "数据表")
  996. save = save_dir + rf"{os.sep}预测热力图.HTML"
  997. tab.render(save) # 生成HTML
  998. return save,
  999. class PredictiveHeatmap(PredictiveHeatmapBase): # 绘制预测型热力图
  1000. def data_visualization(self, save_dir, *args, **kwargs):
  1001. return super().data_visualization(
  1002. save_dir, decision_boundary, prediction_boundary
  1003. )
  1004. class PredictiveHeatmapMore(PredictiveHeatmapBase): # 绘制预测型热力图_More
  1005. def data_visualization(self, save_dir, *args, **kwargs):
  1006. return super().data_visualization(
  1007. save_dir, decision_boundary_more, prediction_boundary_more
  1008. )
  1009. @plugin_class_loading(get_path(r"template/machinelearning"))
  1010. class NearFeatureScatterClassMore(ToPyebase):
  1011. def data_visualization(self, save_dir, *args, **kwargs):
  1012. tab = Tab()
  1013. x_data = self.x_traindata
  1014. y = self.y_traindata
  1015. class_ = np.unique(y).ravel().tolist()
  1016. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  1017. get, x_means, x_range, data_type = training_visualization_more_no_center(
  1018. x_data, class_, y)
  1019. for i in range(len(get)):
  1020. tab.add(get[i], f"{i}训练数据散点图")
  1021. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1022. data = class_ + [f"{i}" for i in x_means]
  1023. c = Table().add(headers=heard, rows=[data])
  1024. tab.add(c, "数据表")
  1025. save = save_dir + rf"{os.sep}数据特征散点图(分类).HTML"
  1026. tab.render(save) # 生成HTML
  1027. return save,
  1028. @plugin_class_loading(get_path(r"template/machinelearning"))
  1029. class NearFeatureScatterMore(ToPyebase):
  1030. def data_visualization(self, save_dir, *args, **kwargs):
  1031. tab = Tab()
  1032. x_data = self.x_traindata
  1033. x_means = quick_stats(x_data).get()[0]
  1034. get_y = feature_visualization(x_data, "数据散点图") # 转换
  1035. for i in range(len(get_y)):
  1036. tab.add(get_y[i], f"[{i}]数据x-x散点图")
  1037. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1038. data = [f"{i}" for i in x_means]
  1039. c = Table().add(headers=heard, rows=[data])
  1040. tab.add(c, "数据表")
  1041. save = save_dir + rf"{os.sep}数据特征散点图.HTML"
  1042. tab.render(save) # 生成HTML
  1043. return save,
  1044. class NearFeatureScatterClass(ToPyebase): # 临近特征散点图:分类数据
  1045. def data_visualization(self, save_dir, *args, **kwargs):
  1046. # 获取数据
  1047. class_ = np.unique(self.y_traindata).ravel().tolist()
  1048. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1049. tab = Tab()
  1050. y = self.y_traindata
  1051. x_data = self.x_traindata
  1052. get, x_means, x_range, data_type = training_visualization(
  1053. x_data, class_, y)
  1054. for i in range(len(get)):
  1055. tab.add(get[i], f"{i}临近特征散点图")
  1056. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1057. data = class_ + [f"{i}" for i in x_means]
  1058. c = Table().add(headers=heard, rows=[data])
  1059. tab.add(c, "数据表")
  1060. save = save_dir + rf"{os.sep}临近数据特征散点图(分类).HTML"
  1061. tab.render(save) # 生成HTML
  1062. return save,
  1063. class NearFeatureScatter(ToPyebase): # 临近特征散点图:连续数据
  1064. def data_visualization(self, save_dir, *args, **kwargs):
  1065. tab = Tab()
  1066. x_data = self.x_traindata.transpose()
  1067. get, x_means, x_range, data_type = training_visualization_no_class(
  1068. x_data)
  1069. for i in range(len(get)):
  1070. tab.add(get[i], f"{i}临近特征散点图")
  1071. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1072. data = [f"{i}" for i in x_means]
  1073. tab.add(make_tab(columns, [data]), "数据表")
  1074. save = save_dir + rf"{os.sep}临近数据特征散点图.HTML"
  1075. tab.render(save) # 生成HTML
  1076. return save,
  1077. class FeatureScatterYX(ToPyebase): # y-x图
  1078. def data_visualization(self, save_dir, *args, **kwargs):
  1079. tab = Tab()
  1080. x_data = self.x_traindata
  1081. y = self.y_traindata
  1082. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1083. for i in range(len(get)):
  1084. tab.add(get[i], f"{i}特征x-y散点图")
  1085. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1086. data = [f"{i}" for i in x_means]
  1087. tab.add(make_tab(columns, [data]), "数据表")
  1088. save = save_dir + rf"{os.sep}特征y-x图像.HTML"
  1089. tab.render(save) # 生成HTML
  1090. return save,
  1091. @plugin_class_loading(get_path(r"template/machinelearning"))
  1092. class LineModel(StudyMachinebase):
  1093. def __init__(
  1094. self, args_use, model, *args, **kwargs
  1095. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1096. super(LineModel, self).__init__(*args, **kwargs)
  1097. all_model = {
  1098. "Line": LinearRegression,
  1099. "Ridge": Ridge,
  1100. "Lasso": Lasso}[model]
  1101. if model == "Line":
  1102. self.model = all_model()
  1103. self.k = {}
  1104. else:
  1105. self.model = all_model(
  1106. alpha=args_use["alpha"], max_iter=args_use["max_iter"]
  1107. )
  1108. self.k = {
  1109. "alpha": args_use["alpha"],
  1110. "max_iter": args_use["max_iter"]}
  1111. # 记录这两个是为了克隆
  1112. self.Alpha = args_use["alpha"]
  1113. self.max_iter = args_use["max_iter"]
  1114. self.model_Name = model
  1115. def data_visualization(self, save_dir, *args, **kwargs):
  1116. tab = Tab()
  1117. x_data = self.x_traindata
  1118. y = self.y_traindata
  1119. w_list = self.model.coef_.tolist()
  1120. w_heard = [f"系数w[{i}]" for i in range(len(w_list))]
  1121. b = self.model.intercept_.tolist()
  1122. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1123. get_line = regress_w(x_data, w_list, b, x_means.copy())
  1124. for i in range(len(get)):
  1125. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1126. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1127. for i in range(len(get)):
  1128. tab.add(get[i], f"{i}预测热力图")
  1129. tab.add(coefficient_scatter_plot(w_heard, w_list), "系数w散点图")
  1130. tab.add(coefficient_bar_plot(w_heard, self.model.coef_), "系数柱状图")
  1131. columns = [
  1132. f"普适预测第{i}特征" for i in range(
  1133. len(x_means))] + w_heard + ["截距b"]
  1134. data = [f"{i}" for i in x_means] + w_list + [b]
  1135. if self.model_Name != "Line":
  1136. columns += ["阿尔法", "最大迭代次数"]
  1137. data += [self.model.alpha, self.model.max_iter]
  1138. tab.add(make_tab(columns, [data]), "数据表")
  1139. des_to_csv(
  1140. save_dir,
  1141. "系数表",
  1142. [w_list + [b]],
  1143. [f"系数W[{i}]" for i in range(len(w_list))] + ["截距"],
  1144. )
  1145. des_to_csv(
  1146. save_dir,
  1147. "预测表",
  1148. [[f"{i}" for i in x_means]],
  1149. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1150. )
  1151. save = save_dir + rf"{os.sep}线性回归模型.HTML"
  1152. tab.render(save) # 生成HTML
  1153. return save,
  1154. @plugin_class_loading(get_path(r"template/machinelearning"))
  1155. class LogisticregressionModel(StudyMachinebase):
  1156. def __init__(
  1157. self, args_use, model, *args, **kwargs
  1158. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1159. super(LogisticregressionModel, self).__init__(*args, **kwargs)
  1160. self.model = LogisticRegression(
  1161. C=args_use["C"], max_iter=args_use["max_iter"])
  1162. # 记录这两个是为了克隆
  1163. self.C = args_use["C"]
  1164. self.max_iter = args_use["max_iter"]
  1165. self.k = {"C": args_use["C"], "max_iter": args_use["max_iter"]}
  1166. self.model_Name = model
  1167. def data_visualization(self, save_dir="render.html", *args, **kwargs):
  1168. # 获取数据
  1169. w_array = self.model.coef_
  1170. w_list = w_array.tolist() # 变为表格
  1171. b = self.model.intercept_
  1172. c = self.model.C
  1173. max_iter = self.model.max_iter
  1174. class_ = self.model.classes_.tolist()
  1175. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1176. tab = Tab()
  1177. y = self.y_traindata
  1178. x_data = self.x_traindata
  1179. get, x_means, x_range, data_type = training_visualization(
  1180. x_data, class_, y)
  1181. get_line = training_w(x_data, class_, y, w_list, b, x_means.copy())
  1182. for i in range(len(get)):
  1183. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1184. for i in range(len(w_list)):
  1185. w = w_list[i]
  1186. w_heard = [f"系数w[{i},{j}]" for j in range(len(w))]
  1187. tab.add(coefficient_scatter_plot(w_heard, w), f"系数w[{i}]散点图")
  1188. tab.add(coefficient_bar_plot(w_heard, w_array[i]), f"系数w[{i}]柱状图")
  1189. columns = class_heard + \
  1190. [f"截距{i}" for i in range(len(b))] + ["C", "最大迭代数"]
  1191. data = class_ + b.tolist() + [c, max_iter]
  1192. c = Table().add(headers=columns, rows=[data])
  1193. tab.add(c, "数据表")
  1194. c = Table().add(
  1195. headers=[f"系数W[{i}]" for i in range(len(w_list[0]))], rows=w_list
  1196. )
  1197. tab.add(c, "系数数据表")
  1198. c = Table().add(
  1199. headers=[f"普适预测第{i}特征" for i in range(len(x_means))],
  1200. rows=[[f"{i}" for i in x_means]],
  1201. )
  1202. tab.add(c, "普适预测数据表")
  1203. des_to_csv(save_dir, "系数表", w_list, [
  1204. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1205. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1206. des_to_csv(
  1207. save_dir,
  1208. "预测表",
  1209. [[f"{i}" for i in x_means]],
  1210. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1211. )
  1212. save = save_dir + rf"{os.sep}逻辑回归.HTML"
  1213. tab.render(save) # 生成HTML
  1214. return save,
  1215. class CategoricalData: # 数据统计助手
  1216. def __init__(self):
  1217. self.x_means = []
  1218. self.x_range = []
  1219. self.data_type = []
  1220. def __call__(self, x1, *args, **kwargs):
  1221. get = self.is_continuous(x1)
  1222. return get
  1223. def is_continuous(self, x1: np.array):
  1224. try:
  1225. x1_con = is_continuous(x1)
  1226. if x1_con:
  1227. self.x_means.append(np.mean(x1))
  1228. self.add_range(x1)
  1229. else:
  1230. assert False
  1231. return x1_con
  1232. except TypeError: # 找出出现次数最多的元素
  1233. new = np.unique(x1) # 去除相同的元素
  1234. count_list = []
  1235. for i in new:
  1236. count_list.append(np.sum(x1 == i))
  1237. index = count_list.index(max(count_list)) # 找出最大值的索引
  1238. self.x_means.append(x1[index])
  1239. self.add_range(x1, False)
  1240. return False
  1241. def add_range(self, x1: np.array, range_=True):
  1242. try:
  1243. assert range_
  1244. min_ = int(x1.min()) - 1
  1245. max_ = int(x1.max()) + 1
  1246. # 不需要复制列表
  1247. self.x_range.append([min_, max_])
  1248. self.data_type.append(1)
  1249. except AssertionError:
  1250. self.x_range.append(list(set(x1.tolist()))) # 去除多余元素
  1251. self.data_type.append(2)
  1252. def get(self):
  1253. return self.x_means, self.x_range, self.data_type
  1254. @plugin_class_loading(get_path(r"template/machinelearning"))
  1255. class KnnModel(StudyMachinebase):
  1256. def __init__(
  1257. self, args_use, model, *args, **kwargs
  1258. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1259. super(KnnModel, self).__init__(*args, **kwargs)
  1260. all_model = {
  1261. "Knn_class": KNeighborsClassifier,
  1262. "Knn": KNeighborsRegressor}[model]
  1263. self.model = all_model(
  1264. p=args_use["p"],
  1265. n_neighbors=args_use["n_neighbors"])
  1266. # 记录这两个是为了克隆
  1267. self.n_neighbors = args_use["n_neighbors"]
  1268. self.p = args_use["p"]
  1269. self.k = {"n_neighbors": args_use["n_neighbors"], "p": args_use["p"]}
  1270. self.model_Name = model
  1271. def data_visualization(self, save_dir, *args, **kwargs):
  1272. tab = Tab()
  1273. y = self.y_traindata
  1274. x_data = self.x_traindata
  1275. y_test = self.y_testdata
  1276. x_test = self.x_testdata
  1277. if self.model_Name == "Knn_class":
  1278. class_ = self.model.classes_.tolist()
  1279. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1280. get, x_means, x_range, data_type = training_visualization(
  1281. x_data, class_, y)
  1282. for i in range(len(get)):
  1283. tab.add(get[i], f"{i}训练数据散点图")
  1284. if y_test is not None:
  1285. get = training_visualization(x_test, class_, y_test)[0]
  1286. for i in range(len(get)):
  1287. tab.add(get[i], f"{i}测试数据散点图")
  1288. get = decision_boundary(
  1289. x_range, x_means, self.predict, class_, data_type)
  1290. for i in range(len(get)):
  1291. tab.add(get[i], f"{i}预测热力图")
  1292. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1293. data = class_ + [f"{i}" for i in x_means]
  1294. c = Table().add(headers=heard, rows=[data])
  1295. tab.add(c, "数据表")
  1296. else:
  1297. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1298. for i in range(len(get)):
  1299. tab.add(get[i], f"{i}训练数据散点图")
  1300. get = regress_visualization(x_test, y_test)[0]
  1301. for i in range(len(get)):
  1302. tab.add(get[i], f"{i}测试数据类型图")
  1303. get = prediction_boundary(
  1304. x_range, x_means, self.predict, data_type)
  1305. for i in range(len(get)):
  1306. tab.add(get[i], f"{i}预测热力图")
  1307. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1308. data = [f"{i}" for i in x_means]
  1309. c = Table().add(headers=heard, rows=[data])
  1310. tab.add(c, "数据表")
  1311. des_to_csv(
  1312. save_dir,
  1313. "预测表",
  1314. [[f"{i}" for i in x_means]],
  1315. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1316. )
  1317. save = save_dir + rf"{os.sep}K.HTML"
  1318. tab.render(save) # 生成HTML
  1319. return save,
  1320. @plugin_class_loading(get_path(r"template/machinelearning"))
  1321. class TreeModel(StudyMachinebase):
  1322. def __init__(
  1323. self, args_use, model, *args, **kwargs
  1324. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1325. super(TreeModel, self).__init__(*args, **kwargs)
  1326. all_model = {
  1327. "Tree_class": DecisionTreeClassifier,
  1328. "Tree": DecisionTreeRegressor,
  1329. }[model]
  1330. self.model = all_model(
  1331. criterion=args_use["criterion"],
  1332. splitter=args_use["splitter"],
  1333. max_features=args_use["max_features"],
  1334. max_depth=args_use["max_depth"],
  1335. min_samples_split=args_use["min_samples_split"],
  1336. )
  1337. # 记录这两个是为了克隆
  1338. self.criterion = args_use["criterion"]
  1339. self.splitter = args_use["splitter"]
  1340. self.max_features = args_use["max_features"]
  1341. self.max_depth = args_use["max_depth"]
  1342. self.min_samples_split = args_use["min_samples_split"]
  1343. self.k = {
  1344. "criterion": args_use["criterion"],
  1345. "splitter": args_use["splitter"],
  1346. "max_features": args_use["max_features"],
  1347. "max_depth": args_use["max_depth"],
  1348. "min_samples_split": args_use["min_samples_split"],
  1349. }
  1350. self.model_Name = model
  1351. def data_visualization(self, save_dir, *args, **kwargs):
  1352. tab = Tab()
  1353. importance = self.model.feature_importances_.tolist()
  1354. with open(save_dir + fr"{os.sep}Tree_Gra.dot", "w") as f:
  1355. export_graphviz(self.model, out_file=f)
  1356. make_bar("特征重要性", importance, tab)
  1357. des_to_csv(
  1358. save_dir,
  1359. "特征重要性",
  1360. [importance],
  1361. [f"[{i}]特征" for i in range(len(importance))],
  1362. )
  1363. tab.add(see_tree(save_dir + fr"{os.sep}Tree_Gra.dot"), "决策树可视化")
  1364. y = self.y_traindata
  1365. x_data = self.x_traindata
  1366. y_test = self.y_testdata
  1367. x_test = self.x_testdata
  1368. if self.model_Name == "Tree_class":
  1369. class_ = self.model.classes_.tolist()
  1370. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1371. get, x_means, x_range, data_type = training_visualization(
  1372. x_data, class_, y)
  1373. for i in range(len(get)):
  1374. tab.add(get[i], f"{i}训练数据散点图")
  1375. get = training_visualization(x_test, class_, y_test)[0]
  1376. for i in range(len(get)):
  1377. tab.add(get[i], f"{i}测试数据散点图")
  1378. get = decision_boundary(
  1379. x_range, x_means, self.predict, class_, data_type)
  1380. for i in range(len(get)):
  1381. tab.add(get[i], f"{i}预测热力图")
  1382. tab.add(
  1383. make_tab(
  1384. class_heard
  1385. + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1386. + [f"特征{i}重要性" for i in range(len(importance))],
  1387. [class_ + [f"{i}" for i in x_means] + importance],
  1388. ),
  1389. "数据表",
  1390. )
  1391. else:
  1392. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1393. for i in range(len(get)):
  1394. tab.add(get[i], f"{i}训练数据散点图")
  1395. get = regress_visualization(x_test, y_test)[0]
  1396. for i in range(len(get)):
  1397. tab.add(get[i], f"{i}测试数据类型图")
  1398. get = prediction_boundary(
  1399. x_range, x_means, self.predict, data_type)
  1400. for i in range(len(get)):
  1401. tab.add(get[i], f"{i}预测热力图")
  1402. tab.add(
  1403. make_tab(
  1404. [f"普适预测第{i}特征" for i in range(len(x_means))]
  1405. + [f"特征{i}重要性" for i in range(len(importance))],
  1406. [[f"{i}" for i in x_means] + importance],
  1407. ),
  1408. "数据表",
  1409. )
  1410. des_to_csv(
  1411. save_dir,
  1412. "预测表",
  1413. [[f"{i}" for i in x_means]],
  1414. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1415. )
  1416. save = save_dir + rf"{os.sep}决策树.HTML"
  1417. tab.render(save) # 生成HTML
  1418. return save,
  1419. @plugin_class_loading(get_path(r"template/machinelearning"))
  1420. class ForestModel(StudyMachinebase):
  1421. def __init__(
  1422. self, args_use, model, *args, **kwargs
  1423. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1424. super(ForestModel, self).__init__(*args, **kwargs)
  1425. model = {
  1426. "Forest_class": RandomForestClassifier,
  1427. "Forest": RandomForestRegressor,
  1428. }[model]
  1429. self.model = model(
  1430. n_estimators=args_use["n_Tree"],
  1431. criterion=args_use["criterion"],
  1432. max_features=args_use["max_features"],
  1433. max_depth=args_use["max_depth"],
  1434. min_samples_split=args_use["min_samples_split"],
  1435. )
  1436. # 记录这两个是为了克隆
  1437. self.n_estimators = args_use["n_Tree"]
  1438. self.criterion = args_use["criterion"]
  1439. self.max_features = args_use["max_features"]
  1440. self.max_depth = args_use["max_depth"]
  1441. self.min_samples_split = args_use["min_samples_split"]
  1442. self.k = {
  1443. "n_estimators": args_use["n_Tree"],
  1444. "criterion": args_use["criterion"],
  1445. "max_features": args_use["max_features"],
  1446. "max_depth": args_use["max_depth"],
  1447. "min_samples_split": args_use["min_samples_split"],
  1448. }
  1449. self.model_Name = model
  1450. def data_visualization(self, save_dir, *args, **kwargs):
  1451. tab = Tab()
  1452. # 多个决策树可视化
  1453. for i in range(len(self.model.estimators_)):
  1454. with open(save_dir + rf"{os.sep}Tree_Gra[{i}].dot", "w") as f:
  1455. export_graphviz(self.model.estimators_[i], out_file=f)
  1456. tab.add(
  1457. see_tree(
  1458. save_dir +
  1459. rf"{os.sep}Tree_Gra[{i}].dot"),
  1460. f"[{i}]决策树可视化")
  1461. y = self.y_traindata
  1462. x_data = self.x_traindata
  1463. if self.model_Name == "Forest_class":
  1464. class_ = self.model.classes_.tolist()
  1465. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1466. get, x_means, x_range, data_type = training_visualization(
  1467. x_data, class_, y)
  1468. for i in range(len(get)):
  1469. tab.add(get[i], f"{i}训练数据散点图")
  1470. get = decision_boundary(
  1471. x_range, x_means, self.predict, class_, data_type)
  1472. for i in range(len(get)):
  1473. tab.add(get[i], f"{i}预测热力图")
  1474. tab.add(
  1475. make_tab(
  1476. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1477. [class_ + [f"{i}" for i in x_means]],
  1478. ),
  1479. "数据表",
  1480. )
  1481. else:
  1482. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1483. for i in range(len(get)):
  1484. tab.add(get[i], f"{i}预测类型图")
  1485. get = prediction_boundary(
  1486. x_range, x_means, self.predict, data_type)
  1487. for i in range(len(get)):
  1488. tab.add(get[i], f"{i}预测热力图")
  1489. tab.add(
  1490. make_tab(
  1491. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1492. [[f"{i}" for i in x_means]],
  1493. ),
  1494. "数据表",
  1495. )
  1496. des_to_csv(
  1497. save_dir,
  1498. "预测表",
  1499. [[f"{i}" for i in x_means]],
  1500. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1501. )
  1502. save = save_dir + rf"{os.sep}随机森林.HTML"
  1503. tab.render(save) # 生成HTML
  1504. return save,
  1505. class GradienttreeModel(StudyMachinebase): # 继承Tree_Model主要是继承Des
  1506. def __init__(
  1507. self, args_use, model, *args, **kwargs
  1508. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1509. super(
  1510. GradienttreeModel,
  1511. self).__init__(
  1512. *args,
  1513. **kwargs) # 不需要执行Tree_Model的初始化
  1514. model = {
  1515. "GradientTree_class": GradientBoostingClassifier,
  1516. "GradientTree": GradientBoostingRegressor,
  1517. }[model]
  1518. self.model = model(
  1519. n_estimators=args_use["n_Tree"],
  1520. max_features=args_use["max_features"],
  1521. max_depth=args_use["max_depth"],
  1522. min_samples_split=args_use["min_samples_split"],
  1523. )
  1524. # 记录这两个是为了克隆
  1525. self.criterion = args_use["criterion"]
  1526. self.splitter = args_use["splitter"]
  1527. self.max_features = args_use["max_features"]
  1528. self.max_depth = args_use["max_depth"]
  1529. self.min_samples_split = args_use["min_samples_split"]
  1530. self.k = {
  1531. "criterion": args_use["criterion"],
  1532. "splitter": args_use["splitter"],
  1533. "max_features": args_use["max_features"],
  1534. "max_depth": args_use["max_depth"],
  1535. "min_samples_split": args_use["min_samples_split"],
  1536. }
  1537. self.model_Name = model
  1538. def data_visualization(self, save_dir, *args, **kwargs):
  1539. tab = Tab()
  1540. # 多个决策树可视化
  1541. for a in range(len(self.model.estimators_)):
  1542. for i in range(len(self.model.estimators_[a])):
  1543. with open(save_dir + rf"{os.sep}Tree_Gra[{a},{i}].dot", "w") as f:
  1544. export_graphviz(self.model.estimators_[a][i], out_file=f)
  1545. tab.add(
  1546. see_tree(
  1547. save_dir +
  1548. rf"{os.sep}Tree_Gra[{a},{i}].dot"),
  1549. f"[{a},{i}]决策树可视化")
  1550. y = self.y_traindata
  1551. x_data = self.x_traindata
  1552. if self.model_Name == "Tree_class":
  1553. class_ = self.model.classes_.tolist()
  1554. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1555. get, x_means, x_range, data_type = training_visualization(
  1556. x_data, class_, y)
  1557. for i in range(len(get)):
  1558. tab.add(get[i], f"{i}训练数据散点图")
  1559. get = decision_boundary(
  1560. x_range, x_means, self.predict, class_, data_type)
  1561. for i in range(len(get)):
  1562. tab.add(get[i], f"{i}预测热力图")
  1563. tab.add(
  1564. make_tab(
  1565. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1566. [class_ + [f"{i}" for i in x_means]],
  1567. ),
  1568. "数据表",
  1569. )
  1570. else:
  1571. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1572. for i in range(len(get)):
  1573. tab.add(get[i], f"{i}预测类型图")
  1574. get = prediction_boundary(
  1575. x_range, x_means, self.predict, data_type)
  1576. for i in range(len(get)):
  1577. tab.add(get[i], f"{i}预测热力图")
  1578. tab.add(
  1579. make_tab(
  1580. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1581. [[f"{i}" for i in x_means]],
  1582. ),
  1583. "数据表",
  1584. )
  1585. des_to_csv(
  1586. save_dir,
  1587. "预测表",
  1588. [[f"{i}" for i in x_means]],
  1589. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1590. )
  1591. save = save_dir + rf"{os.sep}梯度提升回归树.HTML"
  1592. tab.render(save) # 生成HTML
  1593. return save,
  1594. @plugin_class_loading(get_path(r"template/machinelearning"))
  1595. class SvcModel(StudyMachinebase):
  1596. def __init__(
  1597. self, args_use, model, *args, **kwargs
  1598. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1599. super(SvcModel, self).__init__(*args, **kwargs)
  1600. self.model = SVC(
  1601. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1602. )
  1603. # 记录这两个是为了克隆
  1604. self.C = args_use["C"]
  1605. self.gamma = args_use["gamma"]
  1606. self.kernel = args_use["kernel"]
  1607. self.k = {
  1608. "C": args_use["C"],
  1609. "gamma": args_use["gamma"],
  1610. "kernel": args_use["kernel"],
  1611. }
  1612. self.model_Name = model
  1613. def data_visualization(self, save_dir, *args, **kwargs):
  1614. tab = Tab()
  1615. try:
  1616. w_list = self.model.coef_.tolist() # 未必有这个属性
  1617. b = self.model.intercept_.tolist()
  1618. except AttributeError:
  1619. w_list = [] # 未必有这个属性
  1620. b = []
  1621. class_ = self.model.classes_.tolist()
  1622. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1623. y = self.y_traindata
  1624. x_data = self.x_traindata
  1625. get, x_means, x_range, data_type = training_visualization(
  1626. x_data, class_, y)
  1627. if w_list:
  1628. get_line: list = training_w(
  1629. x_data, class_, y, w_list, b, x_means.copy())
  1630. else:
  1631. get_line = []
  1632. for i in range(len(get)):
  1633. if get_line:
  1634. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1635. else:
  1636. tab.add(get[i], f"{i}决策边界散点图")
  1637. get = decision_boundary(
  1638. x_range,
  1639. x_means,
  1640. self.predict,
  1641. class_,
  1642. data_type)
  1643. for i in range(len(get)):
  1644. tab.add(get[i], f"{i}预测热力图")
  1645. dic = {2: "离散", 1: "连续"}
  1646. tab.add(make_tab(class_heard +
  1647. [f"普适预测第{i}特征:{dic[data_type[i]]}" for i in range(len(x_means))],
  1648. [class_ + [f"{i}" for i in x_means]],), "数据表", )
  1649. if w_list:
  1650. des_to_csv(save_dir, "系数表", w_list, [
  1651. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1652. if w_list:
  1653. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1654. des_to_csv(
  1655. save_dir,
  1656. "预测表",
  1657. [[f"{i}" for i in x_means]],
  1658. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1659. )
  1660. save = save_dir + rf"{os.sep}支持向量机分类.HTML"
  1661. tab.render(save) # 生成HTML
  1662. return save,
  1663. @plugin_class_loading(get_path(r"template/machinelearning"))
  1664. class SvrModel(StudyMachinebase):
  1665. def __init__(
  1666. self, args_use, model, *args, **kwargs
  1667. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1668. super(SvrModel, self).__init__(*args, **kwargs)
  1669. self.model = SVR(
  1670. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1671. )
  1672. # 记录这两个是为了克隆
  1673. self.C = args_use["C"]
  1674. self.gamma = args_use["gamma"]
  1675. self.kernel = args_use["kernel"]
  1676. self.k = {
  1677. "C": args_use["C"],
  1678. "gamma": args_use["gamma"],
  1679. "kernel": args_use["kernel"],
  1680. }
  1681. self.model_Name = model
  1682. def data_visualization(self, save_dir, *args, **kwargs):
  1683. tab = Tab()
  1684. x_data = self.x_traindata
  1685. y = self.y_traindata
  1686. try:
  1687. w_list = self.model.coef_.tolist() # 未必有这个属性
  1688. b = self.model.intercept_.tolist()
  1689. except AttributeError:
  1690. w_list = [] # 未必有这个属性
  1691. b = []
  1692. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1693. if w_list:
  1694. get_line = regress_w(x_data, w_list, b, x_means.copy())
  1695. else:
  1696. get_line = []
  1697. for i in range(len(get)):
  1698. if get_line:
  1699. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1700. else:
  1701. tab.add(get[i], f"{i}预测类型图")
  1702. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1703. for i in range(len(get)):
  1704. tab.add(get[i], f"{i}预测热力图")
  1705. if w_list:
  1706. des_to_csv(save_dir, "系数表", w_list, [
  1707. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1708. if w_list:
  1709. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1710. des_to_csv(
  1711. save_dir,
  1712. "预测表",
  1713. [[f"{i}" for i in x_means]],
  1714. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1715. )
  1716. tab.add(
  1717. make_tab(
  1718. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1719. [[f"{i}" for i in x_means]],
  1720. ),
  1721. "数据表",
  1722. )
  1723. save = save_dir + rf"{os.sep}支持向量机回归.HTML"
  1724. tab.render(save) # 生成HTML
  1725. return save,
  1726. class VarianceModel(Unsupervised): # 无监督
  1727. def __init__(
  1728. self, args_use, model, *args, **kwargs
  1729. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1730. super(VarianceModel, self).__init__(*args, **kwargs)
  1731. self.model = VarianceThreshold(
  1732. threshold=(args_use["P"] * (1 - args_use["P"])))
  1733. # 记录这两个是为了克隆
  1734. self.threshold = args_use["P"]
  1735. self.k = {"threshold": args_use["P"]}
  1736. self.model_Name = model
  1737. def data_visualization(self, save_dir, *args, **kwargs):
  1738. tab = Tab()
  1739. var = self.model.variances_ # 标准差
  1740. y_data = self.y_testdata
  1741. if isinstance(y_data, np.ndarray):
  1742. get = feature_visualization(self.y_testdata)
  1743. for i in range(len(get)):
  1744. tab.add(get[i], f"[{i}]数据x-x散点图")
  1745. c = (
  1746. Bar()
  1747. .add_xaxis([f"[{i}]特征" for i in range(len(var))])
  1748. .add_yaxis("标准差", var.tolist(), **label_setting)
  1749. .set_global_opts(
  1750. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1751. )
  1752. )
  1753. tab.add(c, "数据标准差")
  1754. save = save_dir + rf"{os.sep}方差特征选择.HTML"
  1755. tab.render(save) # 生成HTML
  1756. return save,
  1757. class SelectkbestModel(PrepBase): # 有监督
  1758. def __init__(self, args_use, model, *args, **kwargs):
  1759. super(SelectkbestModel, self).__init__(*args, **kwargs)
  1760. self.model = SelectKBest(
  1761. k=args_use["k"],
  1762. score_func=args_use["score_func"])
  1763. # 记录这两个是为了克隆
  1764. self.k_ = args_use["k"]
  1765. self.score_func = args_use["score_func"]
  1766. self.k = {"k": args_use["k"], "score_func": args_use["score_func"]}
  1767. self.model_Name = model
  1768. def data_visualization(self, save_dir, *args, **kwargs):
  1769. tab = Tab()
  1770. score = self.model.scores_.tolist()
  1771. support: np.ndarray = self.model.get_support()
  1772. y_data = self.y_traindata
  1773. x_data = self.x_traindata
  1774. if isinstance(x_data, np.ndarray):
  1775. get = feature_visualization(x_data)
  1776. for i in range(len(get)):
  1777. tab.add(get[i], f"[{i}]训练数据x-x散点图")
  1778. if isinstance(y_data, np.ndarray):
  1779. get = feature_visualization(y_data)
  1780. for i in range(len(get)):
  1781. tab.add(get[i], f"[{i}]保留训练数据x-x散点图")
  1782. y_data = self.y_testdata
  1783. x_data = self.x_testdata
  1784. if isinstance(x_data, np.ndarray):
  1785. get = feature_visualization(x_data)
  1786. for i in range(len(get)):
  1787. tab.add(get[i], f"[{i}]数据x-x散点图")
  1788. if isinstance(y_data, np.ndarray):
  1789. get = feature_visualization(y_data)
  1790. for i in range(len(get)):
  1791. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1792. choose = []
  1793. un_choose = []
  1794. for i in range(len(score)):
  1795. if support[i]:
  1796. choose.append(score[i])
  1797. un_choose.append(0) # 占位
  1798. else:
  1799. un_choose.append(score[i])
  1800. choose.append(0)
  1801. c = (
  1802. Bar()
  1803. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1804. .add_yaxis("选中特征", choose, **label_setting)
  1805. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1806. .set_global_opts(
  1807. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1808. )
  1809. )
  1810. tab.add(c, "单变量重要程度")
  1811. save = save_dir + rf"{os.sep}单一变量特征选择.HTML"
  1812. tab.render(save) # 生成HTML
  1813. return save,
  1814. class SelectFromModel(PrepBase): # 有监督
  1815. def __init__(
  1816. self, args_use, learner, *args, **kwargs
  1817. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1818. super(SelectFromModel, self).__init__(*args, **kwargs)
  1819. self.model = learner.Model
  1820. self.Select_Model = SelectFromModel(
  1821. estimator=learner.Model,
  1822. max_features=args_use["k"],
  1823. prefit=learner.have_Fit)
  1824. self.max_features = args_use["k"]
  1825. self.estimator = learner.Model
  1826. self.k = {
  1827. "max_features": args_use["k"],
  1828. "estimator": learner.Model,
  1829. "have_Fit": learner.have_Fit,
  1830. }
  1831. self.have_fit = learner.have_Fit
  1832. self.model_Name = "SelectFrom_Model"
  1833. self.learner = learner
  1834. def fit_model(self, x_data, y_data, split=0.3, *args, **kwargs):
  1835. y_data = y_data.ravel()
  1836. if not self.have_fit: # 不允许第二次训练
  1837. self.Select_Model.fit(x_data, y_data)
  1838. self.have_fit = True
  1839. return "None", "None"
  1840. def predict(self, x_data, *args, **kwargs):
  1841. try:
  1842. self.x_testdata = x_data.copy()
  1843. x_predict = self.Select_Model.transform(x_data)
  1844. self.y_testdata = x_predict.copy()
  1845. self.have_predict = True
  1846. return x_predict, "模型特征工程"
  1847. except BaseException as e:
  1848. logging.debug(str(e))
  1849. self.have_predict = True
  1850. return np.array([]), "无结果工程"
  1851. def data_visualization(self, save_dir, *args, **kwargs):
  1852. tab = Tab()
  1853. support: np.ndarray = self.Select_Model.get_support()
  1854. y_data = self.y_testdata
  1855. x_data = self.x_testdata
  1856. if isinstance(x_data, np.ndarray):
  1857. get = feature_visualization(x_data)
  1858. for i in range(len(get)):
  1859. tab.add(get[i], f"[{i}]数据x-x散点图")
  1860. if isinstance(y_data, np.ndarray):
  1861. get = feature_visualization(y_data)
  1862. for i in range(len(get)):
  1863. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1864. def make_bar_(score):
  1865. choose = []
  1866. un_choose = []
  1867. for i in range(len(score)):
  1868. if support[i]:
  1869. choose.append(abs(score[i]))
  1870. un_choose.append(0) # 占位
  1871. else:
  1872. un_choose.append(abs(score[i]))
  1873. choose.append(0)
  1874. c = (
  1875. Bar()
  1876. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1877. .add_yaxis("选中特征", choose, **label_setting)
  1878. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1879. .set_global_opts(
  1880. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1881. )
  1882. )
  1883. tab.add(c, "单变量重要程度")
  1884. try:
  1885. make_bar_(self.model.coef_)
  1886. except AttributeError:
  1887. try:
  1888. make_bar_(self.model.feature_importances_)
  1889. except BaseException as e:
  1890. logging.warning(str(e))
  1891. save = save_dir + rf"{os.sep}模型特征选择.HTML"
  1892. tab.render(save) # 生成HTML
  1893. return save,
  1894. class StandardizationModel(Unsupervised): # z-score标准化 无监督
  1895. def __init__(self, *args, **kwargs):
  1896. super(StandardizationModel, self).__init__(*args, **kwargs)
  1897. self.model = StandardScaler()
  1898. self.k = {}
  1899. self.model_Name = "StandardScaler"
  1900. def data_visualization(self, save_dir, *args, **kwargs):
  1901. tab = Tab()
  1902. y_data = self.y_testdata
  1903. x_data = self.x_testdata
  1904. var = self.model.var_.tolist()
  1905. means = self.model.mean_.tolist()
  1906. scale_ = self.model.scale_.tolist()
  1907. conversion_control(y_data, x_data, tab)
  1908. make_bar("标准差", var, tab)
  1909. make_bar("方差", means, tab)
  1910. make_bar("Scale", scale_, tab)
  1911. save = save_dir + rf"{os.sep}z-score标准化.HTML"
  1912. tab.render(save) # 生成HTML
  1913. return save,
  1914. class MinmaxscalerModel(Unsupervised): # 离差标准化
  1915. def __init__(self, args_use, *args, **kwargs):
  1916. super(MinmaxscalerModel, self).__init__(*args, **kwargs)
  1917. self.model = MinMaxScaler(feature_range=args_use["feature_range"])
  1918. self.k = {}
  1919. self.model_Name = "MinMaxScaler"
  1920. def data_visualization(self, save_dir, *args, **kwargs):
  1921. tab = Tab()
  1922. y_data = self.y_testdata
  1923. x_data = self.x_testdata
  1924. scale_ = self.model.scale_.tolist()
  1925. max_ = self.model.data_max_.tolist()
  1926. min_ = self.model.data_min_.tolist()
  1927. conversion_control(y_data, x_data, tab)
  1928. make_bar("Scale", scale_, tab)
  1929. tab.add(
  1930. make_tab(
  1931. heard=[f"[{i}]特征最大值" for i in range(len(max_))]
  1932. + [f"[{i}]特征最小值" for i in range(len(min_))],
  1933. row=[max_ + min_],
  1934. ),
  1935. "数据表格",
  1936. )
  1937. save = save_dir + rf"{os.sep}离差标准化.HTML"
  1938. tab.render(save) # 生成HTML
  1939. return save,
  1940. class LogscalerModel(PrepBase): # 对数标准化
  1941. def __init__(self, *args, **kwargs):
  1942. super(LogscalerModel, self).__init__(*args, **kwargs)
  1943. self.model = None
  1944. self.k = {}
  1945. self.model_Name = "LogScaler"
  1946. def fit_model(self, x_data, *args, **kwargs):
  1947. if not self.have_predict: # 不允许第二次训练
  1948. self.max_logx = np.log(x_data.max())
  1949. self.have_fit = True
  1950. return "None", "None"
  1951. def predict(self, x_data, *args, **kwargs):
  1952. try:
  1953. max_logx = self.max_logx
  1954. except AttributeError:
  1955. self.have_fit = False
  1956. self.fit_model(x_data)
  1957. max_logx = self.max_logx
  1958. self.x_testdata = x_data.copy()
  1959. x_predict = np.log(x_data) / max_logx
  1960. self.y_testdata = x_predict.copy()
  1961. self.have_predict = True
  1962. return x_predict, "对数变换"
  1963. def data_visualization(self, save_dir, *args, **kwargs):
  1964. tab = Tab()
  1965. y_data = self.y_testdata
  1966. x_data = self.x_testdata
  1967. conversion_control(y_data, x_data, tab)
  1968. tab.add(make_tab(heard=["最大对数值(自然对数)"],
  1969. row=[[str(self.max_logx)]]), "数据表格")
  1970. save = save_dir + rf"{os.sep}对数标准化.HTML"
  1971. tab.render(save) # 生成HTML
  1972. return save,
  1973. class AtanscalerModel(PrepBase): # atan标准化
  1974. def __init__(self, *args, **kwargs):
  1975. super(AtanscalerModel, self).__init__(*args, **kwargs)
  1976. self.model = None
  1977. self.k = {}
  1978. self.model_Name = "atanScaler"
  1979. def fit_model(self, x_data, *args, **kwargs):
  1980. self.have_fit = True
  1981. return "None", "None"
  1982. def predict(self, x_data, *args, **kwargs):
  1983. self.x_testdata = x_data.copy()
  1984. x_predict = np.arctan(x_data) * (2 / np.pi)
  1985. self.y_testdata = x_predict.copy()
  1986. self.have_predict = True
  1987. return x_predict, "atan变换"
  1988. def data_visualization(self, save_dir, *args, **kwargs):
  1989. tab = Tab()
  1990. y_data = self.y_testdata
  1991. x_data = self.x_testdata
  1992. conversion_control(y_data, x_data, tab)
  1993. save = save_dir + rf"{os.sep}反正切函数标准化.HTML"
  1994. tab.render(save) # 生成HTML
  1995. return save,
  1996. class DecimalscalerModel(PrepBase): # 小数定标准化
  1997. def __init__(self, *args, **kwargs):
  1998. super(DecimalscalerModel, self).__init__(*args, **kwargs)
  1999. self.model = None
  2000. self.k = {}
  2001. self.model_Name = "Decimal_normalization"
  2002. def fit_model(self, x_data, *args, **kwargs):
  2003. if not self.have_predict: # 不允许第二次训练
  2004. self.j = max([judging_digits(x_data.max()),
  2005. judging_digits(x_data.min())])
  2006. self.have_fit = True
  2007. return "None", "None"
  2008. def predict(self, x_data, *args, **kwargs):
  2009. self.x_testdata = x_data.copy()
  2010. try:
  2011. j = self.j
  2012. except AttributeError:
  2013. self.have_fit = False
  2014. self.fit_model(x_data)
  2015. j = self.j
  2016. x_predict = x_data / (10 ** j)
  2017. self.y_testdata = x_predict.copy()
  2018. self.have_predict = True
  2019. return x_predict, "小数定标标准化"
  2020. def data_visualization(self, save_dir, *args, **kwargs):
  2021. tab = Tab()
  2022. y_data = self.y_testdata
  2023. x_data = self.x_testdata
  2024. j = self.j
  2025. conversion_control(y_data, x_data, tab)
  2026. tab.add(make_tab(heard=["小数位数:j"], row=[[j]]), "数据表格")
  2027. save = save_dir + rf"{os.sep}小数定标标准化.HTML"
  2028. tab.render(save) # 生成HTML
  2029. return save,
  2030. class MapzoomModel(PrepBase): # 映射标准化
  2031. def __init__(self, args_use, *args, **kwargs):
  2032. super(MapzoomModel, self).__init__(*args, **kwargs)
  2033. self.model = None
  2034. self.feature_range = args_use["feature_range"]
  2035. self.k = {}
  2036. self.model_Name = "Decimal_normalization"
  2037. def fit_model(self, x_data, *args, **kwargs):
  2038. if not self.have_predict: # 不允许第二次训练
  2039. self.max_ = x_data.max()
  2040. self.min_ = x_data.min()
  2041. self.have_fit = True
  2042. return "None", "None"
  2043. def predict(self, x_data, *args, **kwargs):
  2044. self.x_testdata = x_data.copy()
  2045. try:
  2046. max_ = self.max_
  2047. min_ = self.min_
  2048. except AttributeError:
  2049. self.have_fit = False
  2050. self.fit_model(x_data)
  2051. max_ = self.max_
  2052. min_ = self.min_
  2053. x_predict = (x_data * (self.feature_range[1] - self.feature_range[0])) / (
  2054. max_ - min_
  2055. )
  2056. self.y_testdata = x_predict.copy()
  2057. self.have_predict = True
  2058. return x_predict, "映射标准化"
  2059. def data_visualization(self, save_dir, *args, **kwargs):
  2060. tab = Tab()
  2061. y_data = self.y_testdata
  2062. x_data = self.x_testdata
  2063. max_ = self.max_
  2064. min_ = self.min_
  2065. conversion_control(y_data, x_data, tab)
  2066. tab.add(make_tab(heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  2067. save = save_dir + rf"{os.sep}映射标准化.HTML"
  2068. tab.render(save) # 生成HTML
  2069. return save,
  2070. class SigmodscalerModel(PrepBase): # sigmod变换
  2071. def __init__(self, *args, **kwargs):
  2072. super(SigmodscalerModel, self).__init__(*args, **kwargs)
  2073. self.model = None
  2074. self.k = {}
  2075. self.model_Name = "sigmodScaler_Model"
  2076. def fit_model(self, x_data, *args, **kwargs):
  2077. self.have_fit = True
  2078. return "None", "None"
  2079. def predict(self, x_data: np.array, *args, **kwargs):
  2080. self.x_testdata = x_data.copy()
  2081. x_predict = 1 / (1 + np.exp(-x_data))
  2082. self.y_testdata = x_predict.copy()
  2083. self.have_predict = True
  2084. return x_predict, "Sigmod变换"
  2085. def data_visualization(self, save_dir, *args, **kwargs):
  2086. tab = Tab()
  2087. y_data = self.y_testdata
  2088. x_data = self.x_testdata
  2089. conversion_control(y_data, x_data, tab)
  2090. save = save_dir + rf"{os.sep}Sigmoid变换.HTML"
  2091. tab.render(save) # 生成HTML
  2092. return save,
  2093. class FuzzyQuantizationModel(PrepBase): # 模糊量化标准化
  2094. def __init__(self, args_use, *args, **kwargs):
  2095. super(FuzzyQuantizationModel, self).__init__(*args, **kwargs)
  2096. self.model = None
  2097. self.feature_range = args_use["feature_range"]
  2098. self.k = {}
  2099. self.model_Name = "Fuzzy_quantization"
  2100. def fit_model(self, x_data, *args, **kwargs):
  2101. if not self.have_predict: # 不允许第二次训练
  2102. self.max_ = x_data.max()
  2103. self.max_ = x_data.min()
  2104. self.have_fit = True
  2105. return "None", "None"
  2106. def predict(self, x_data, *args, **kwargs):
  2107. self.x_testdata = x_data.copy()
  2108. try:
  2109. max_ = self.max_
  2110. min_ = self.max_
  2111. except AttributeError:
  2112. self.have_fit = False
  2113. self.fit_model(x_data)
  2114. max_ = self.max_
  2115. min_ = self.max_
  2116. x_predict = 1 / 2 + (1 / 2) * np.sin(
  2117. np.pi / (max_ - min_) * (x_data - (max_ - min_) / 2)
  2118. )
  2119. self.y_testdata = x_predict.copy()
  2120. self.have_predict = True
  2121. return x_predict, "模糊量化标准化"
  2122. def data_visualization(self, save_dir, *args, **kwargs):
  2123. tab = Tab()
  2124. y_data = self.y_traindata
  2125. x_data = self.x_traindata
  2126. max_ = self.max_
  2127. min_ = self.max_
  2128. conversion_control(y_data, x_data, tab)
  2129. tab.add(make_tab(heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  2130. save = save_dir + rf"{os.sep}模糊量化标准化.HTML"
  2131. tab.render(save) # 生成HTML
  2132. return save,
  2133. class RegularizationModel(Unsupervised): # 正则化
  2134. def __init__(self, args_use, *args, **kwargs):
  2135. super(RegularizationModel, self).__init__(*args, **kwargs)
  2136. self.model = Normalizer(norm=args_use["norm"])
  2137. self.k = {"norm": args_use["norm"]}
  2138. self.model_Name = "Regularization"
  2139. def data_visualization(self, save_dir, *args, **kwargs):
  2140. tab = Tab()
  2141. y_data = self.y_testdata.copy()
  2142. x_data = self.x_testdata.copy()
  2143. conversion_control(y_data, x_data, tab)
  2144. save = save_dir + rf"{os.sep}正则化.HTML"
  2145. tab.render(save) # 生成HTML
  2146. return save,
  2147. # 离散数据
  2148. class BinarizerModel(Unsupervised): # 二值化
  2149. def __init__(self, args_use, *args, **kwargs):
  2150. super(BinarizerModel, self).__init__(*args, **kwargs)
  2151. self.model = Binarizer(threshold=args_use["threshold"])
  2152. self.k = {}
  2153. self.model_Name = "Binarizer"
  2154. def data_visualization(self, save_dir, *args, **kwargs):
  2155. tab = Tab()
  2156. y_data = self.y_testdata
  2157. x_data = self.x_testdata
  2158. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2159. for i in range(len(get_y)):
  2160. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2161. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2162. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2163. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2164. tab.add(
  2165. make_tab(
  2166. heard, np.dstack(
  2167. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2168. save = save_dir + rf"{os.sep}二值离散化.HTML"
  2169. tab.render(save) # 生成HTML
  2170. return save,
  2171. class DiscretizationModel(PrepBase): # n值离散
  2172. def __init__(self, args_use, *args, **kwargs):
  2173. super(DiscretizationModel, self).__init__(*args, **kwargs)
  2174. self.model = None
  2175. range_ = args_use["split_range"]
  2176. if not range_:
  2177. raise Exception
  2178. elif len(range_) == 1:
  2179. range_.append(range_[0])
  2180. self.range = range_
  2181. self.k = {}
  2182. self.model_Name = "Discretization"
  2183. def fit_model(self, *args, **kwargs):
  2184. # t值在模型创建时已经保存
  2185. self.have_fit = True
  2186. return "None", "None"
  2187. def predict(self, x_data, *args, **kwargs):
  2188. self.x_testdata = x_data.copy()
  2189. x_predict = x_data.copy() # 复制
  2190. range_ = self.range
  2191. bool_list = []
  2192. max_ = len(range_) - 1
  2193. o_t = None
  2194. for i in range(len(range_)):
  2195. try:
  2196. t = float(range_[i])
  2197. except ValueError:
  2198. continue
  2199. if o_t is None: # 第一个参数
  2200. bool_list.append(x_predict <= t)
  2201. else:
  2202. bool_list.append((o_t <= x_predict) == (x_predict < t))
  2203. if i == max_:
  2204. bool_list.append(t <= x_predict)
  2205. o_t = t
  2206. for i in range(len(bool_list)):
  2207. x_predict[bool_list[i]] = i
  2208. self.y_testdata = x_predict.copy()
  2209. self.have_predict = True
  2210. return x_predict, f"{len(bool_list)}值离散化"
  2211. def data_visualization(self, save_dir, *args, **kwargs):
  2212. tab = Tab()
  2213. y_data = self.y_testdata
  2214. x_data = self.x_testdata
  2215. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2216. for i in range(len(get_y)):
  2217. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2218. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2219. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2220. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2221. tab.add(
  2222. make_tab(
  2223. heard, np.dstack(
  2224. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2225. save = save_dir + rf"{os.sep}多值离散化.HTML"
  2226. tab.render(save) # 生成HTML
  2227. return save,
  2228. class LabelModel(PrepBase): # 数字编码
  2229. def __init__(self, *args, **kwargs):
  2230. super(LabelModel, self).__init__(*args, **kwargs)
  2231. self.model = []
  2232. self.k = {}
  2233. self.model_Name = "LabelEncoder"
  2234. def fit_model(self, x_data, *args, **kwargs):
  2235. if not self.have_predict: # 不允许第二次训练
  2236. self.model = []
  2237. if x_data.ndim == 1:
  2238. x_data = np.array([x_data])
  2239. for i in range(x_data.shape[1]):
  2240. self.model.append(
  2241. LabelEncoder().fit(np.ravel(x_data[:, i]))
  2242. ) # 训练机器(每个特征一个学习器)
  2243. self.have_fit = True
  2244. return "None", "None"
  2245. def predict(self, x_data, *args, **kwargs):
  2246. self.x_testdata = x_data.copy()
  2247. x_predict = x_data.copy()
  2248. if x_data.ndim == 1:
  2249. x_data = np.array([x_data])
  2250. for i in range(x_data.shape[1]):
  2251. x_predict[:, i] = self.model[i].transform(x_data[:, i])
  2252. self.y_testdata = x_predict.copy()
  2253. self.have_predict = True
  2254. return x_predict, "数字编码"
  2255. def data_visualization(self, save_dir, *args, **kwargs):
  2256. tab = Tab()
  2257. x_data = self.x_testdata
  2258. y_data = self.y_testdata
  2259. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2260. for i in range(len(get_y)):
  2261. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2262. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2263. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2264. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2265. tab.add(
  2266. make_tab(
  2267. heard, np.dstack(
  2268. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2269. save = save_dir + rf"{os.sep}数字编码.HTML"
  2270. tab.render(save) # 生成HTML
  2271. return save,
  2272. class OneHotEncoderModel(PrepBase): # 独热编码
  2273. def __init__(self, args_use, *args, **kwargs):
  2274. super(OneHotEncoderModel, self).__init__(*args, **kwargs)
  2275. self.model = []
  2276. self.ndim_up = args_use["ndim_up"]
  2277. self.k = {}
  2278. self.model_Name = "OneHotEncoder"
  2279. self.OneHot_Data = None # 三维独热编码
  2280. def fit_model(self, x_data, *args, **kwargs):
  2281. if not self.have_predict: # 不允许第二次训练
  2282. if x_data.ndim == 1:
  2283. x_data = [x_data]
  2284. for i in range(x_data.shape[1]):
  2285. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2286. self.model.append(OneHotEncoder().fit(data)) # 训练机器
  2287. self.have_fit = True
  2288. return "None", "None"
  2289. def predict(self, x_data, *args, **kwargs):
  2290. self.x_testdata = x_data.copy()
  2291. x_new = []
  2292. for i in range(x_data.shape[1]):
  2293. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2294. one_hot = self.model[i].transform(data).toarray().tolist()
  2295. x_new.append(one_hot) # 添加到列表中
  2296. # 新列表的行数据是原data列数据的独热码(只需要ndim=2,暂时没想到numpy的做法)
  2297. x_new = np.array(x_new)
  2298. x_predict = []
  2299. for i in range(x_new.shape[1]):
  2300. x_predict.append(x_new[:, i])
  2301. x_predict = np.array(x_predict) # 转换回array
  2302. self.OneHot_Data = x_predict.copy() # 保存未降维数据
  2303. if not self.ndim_up: # 压缩操作
  2304. new_x_predict = []
  2305. for i in x_predict:
  2306. new_list = []
  2307. list_ = i.tolist()
  2308. for a in list_:
  2309. new_list += a
  2310. new = np.array(new_list)
  2311. new_x_predict.append(new)
  2312. self.y_testdata = np.array(new_x_predict)
  2313. return self.y_testdata.copy(), "独热编码"
  2314. self.y_testdata = self.OneHot_Data
  2315. self.have_predict = True
  2316. return x_predict, "独热编码"
  2317. def data_visualization(self, save_dir, *args, **kwargs):
  2318. tab = Tab()
  2319. y_data = self.y_testdata
  2320. x_data = self.x_testdata
  2321. oh_data = self.OneHot_Data
  2322. if not self.ndim_up:
  2323. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2324. for i in range(len(get_y)):
  2325. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2326. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2327. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2328. tab.add(make_tab(heard, oh_data.tolist()), f"编码数据")
  2329. tab.add(
  2330. make_tab(
  2331. heard, np.dstack(
  2332. (oh_data, x_data)).tolist()), f"合成[原数据,编码]数据")
  2333. tab.add(make_tab([f"编码:{i}" for i in range(
  2334. len(y_data[0]))], y_data.tolist()), f"数据")
  2335. save = save_dir + rf"{os.sep}独热编码.HTML"
  2336. tab.render(save) # 生成HTML
  2337. return save,
  2338. class MissedModel(Unsupervised): # 缺失数据补充
  2339. def __init__(self, args_use, *args, **kwargs):
  2340. super(MissedModel, self).__init__(*args, **kwargs)
  2341. self.model = SimpleImputer(
  2342. missing_values=args_use["miss_value"],
  2343. strategy=args_use["fill_method"],
  2344. fill_value=args_use["fill_value"],
  2345. )
  2346. self.k = {}
  2347. self.model_Name = "Missed"
  2348. def predict(self, x_data, *args, **kwargs):
  2349. self.x_testdata = x_data.copy()
  2350. x_predict = self.model.transform(x_data)
  2351. self.y_testdata = x_predict.copy()
  2352. self.have_predict = True
  2353. return x_predict, "填充缺失"
  2354. def data_visualization(self, save_dir, *args, **kwargs):
  2355. tab = Tab()
  2356. y_data = self.y_testdata
  2357. x_data = self.x_testdata
  2358. statistics = self.model.statistics_.tolist()
  2359. conversion_control(y_data, x_data, tab)
  2360. tab.add(make_tab([f"特征[{i}]" for i in range(
  2361. len(statistics))], [statistics]), "填充值")
  2362. save = save_dir + rf"{os.sep}缺失数据填充.HTML"
  2363. tab.render(save) # 生成HTML
  2364. return save,
  2365. @plugin_class_loading(get_path(r"template/machinelearning"))
  2366. class PcaModel(Unsupervised):
  2367. def __init__(self, args_use, *args, **kwargs):
  2368. super(PcaModel, self).__init__(*args, **kwargs)
  2369. self.model = PCA(
  2370. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2371. )
  2372. self.whiten = args_use["white_PCA"]
  2373. self.n_components = args_use["n_components"]
  2374. self.k = {
  2375. "n_components": args_use["n_components"],
  2376. "whiten": args_use["white_PCA"],
  2377. }
  2378. self.model_Name = "PCA"
  2379. def predict(self, x_data, *args, **kwargs):
  2380. self.x_testdata = x_data.copy()
  2381. x_predict = self.model.transform(x_data)
  2382. self.y_testdata = x_predict.copy()
  2383. self.have_predict = True
  2384. return x_predict, "PCA"
  2385. def data_visualization(self, save_dir, *args, **kwargs):
  2386. tab = Tab()
  2387. y_data = self.y_testdata
  2388. importance = self.model.components_.tolist()
  2389. var = self.model.explained_variance_.tolist() # 方量差
  2390. conversion_separate_format(y_data, tab)
  2391. x_data = [f"第{i+1}主成分" for i in range(len(importance))] # 主成分
  2392. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2393. value = [
  2394. (f"第{i+1}主成分", f"特征[{j}]", importance[i][j])
  2395. for i in range(len(importance))
  2396. for j in range(len(importance[i]))
  2397. ]
  2398. c = (
  2399. HeatMap()
  2400. .add_xaxis(x_data)
  2401. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2402. .set_global_opts(
  2403. title_opts=opts.TitleOpts(title="预测热力图"),
  2404. **global_not_legend,
  2405. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2406. xaxis_opts=opts.AxisOpts(is_scale=True),
  2407. visualmap_opts=opts.VisualMapOpts(
  2408. is_show=True,
  2409. max_=int(self.model.components_.max()) + 1,
  2410. min_=int(self.model.components_.min()),
  2411. pos_right="3%",
  2412. ),
  2413. ) # 显示
  2414. )
  2415. tab.add(c, "成分热力图")
  2416. c = (
  2417. Bar()
  2418. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2419. .add_yaxis("方量差", var, **label_setting)
  2420. .set_global_opts(
  2421. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2422. )
  2423. )
  2424. des_to_csv(save_dir, "成分重要性", importance, [x_data], [y_data])
  2425. des_to_csv(
  2426. save_dir, "方量差", [var], [
  2427. f"第[{i}]主成分" for i in range(
  2428. len(var))])
  2429. tab.add(c, "方量差柱状图")
  2430. save = save_dir + rf"{os.sep}主成分分析.HTML"
  2431. tab.render(save) # 生成HTML
  2432. return save,
  2433. @plugin_class_loading(get_path(r"template/machinelearning"))
  2434. class RpcaModel(Unsupervised):
  2435. def __init__(self, args_use, *args, **kwargs):
  2436. super(RpcaModel, self).__init__(*args, **kwargs)
  2437. self.model = IncrementalPCA(
  2438. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2439. )
  2440. self.n_components = args_use["n_components"]
  2441. self.whiten = args_use["white_PCA"]
  2442. self.k = {
  2443. "n_components": args_use["n_components"],
  2444. "whiten": args_use["white_PCA"],
  2445. }
  2446. self.model_Name = "RPCA"
  2447. def predict(self, x_data, *args, **kwargs):
  2448. self.x_testdata = x_data.copy()
  2449. x_predict = self.model.transform(x_data)
  2450. self.y_testdata = x_predict.copy()
  2451. self.have_predict = True
  2452. return x_predict, "RPCA"
  2453. def data_visualization(self, save_dir, *args, **kwargs):
  2454. tab = Tab()
  2455. y_data = self.y_traindata
  2456. importance = self.model.components_.tolist()
  2457. var = self.model.explained_variance_.tolist() # 方量差
  2458. conversion_separate_format(y_data, tab)
  2459. x_data = [f"第{i + 1}主成分" for i in range(len(importance))] # 主成分
  2460. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2461. value = [
  2462. (f"第{i + 1}主成分", f"特征[{j}]", importance[i][j])
  2463. for i in range(len(importance))
  2464. for j in range(len(importance[i]))
  2465. ]
  2466. c = (
  2467. HeatMap()
  2468. .add_xaxis(x_data)
  2469. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2470. .set_global_opts(
  2471. title_opts=opts.TitleOpts(title="预测热力图"),
  2472. **global_not_legend,
  2473. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2474. xaxis_opts=opts.AxisOpts(is_scale=True),
  2475. visualmap_opts=opts.VisualMapOpts(
  2476. is_show=True,
  2477. max_=int(self.model.components_.max()) + 1,
  2478. min_=int(self.model.components_.min()),
  2479. pos_right="3%",
  2480. ),
  2481. ) # 显示
  2482. )
  2483. tab.add(c, "成分热力图")
  2484. c = (
  2485. Bar()
  2486. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2487. .add_yaxis("放量差", var, **label_setting)
  2488. .set_global_opts(
  2489. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2490. )
  2491. )
  2492. tab.add(c, "方量差柱状图")
  2493. des_to_csv(save_dir, "成分重要性", importance, [x_data], [y_data])
  2494. des_to_csv(
  2495. save_dir, "方量差", [var], [
  2496. f"第[{i}]主成分" for i in range(
  2497. len(var))])
  2498. save = save_dir + rf"{os.sep}RPCA(主成分分析).HTML"
  2499. tab.render(save) # 生成HTML
  2500. return save,
  2501. @plugin_class_loading(get_path(r"template/machinelearning"))
  2502. class KpcaModel(Unsupervised):
  2503. def __init__(self, args_use, *args, **kwargs):
  2504. super(KpcaModel, self).__init__(*args, **kwargs)
  2505. self.model = KernelPCA(
  2506. n_components=args_use["n_components"], kernel=args_use["kernel"]
  2507. )
  2508. self.n_components = args_use["n_components"]
  2509. self.kernel = args_use["kernel"]
  2510. self.k = {
  2511. "n_components": args_use["n_components"],
  2512. "kernel": args_use["kernel"],
  2513. }
  2514. self.model_Name = "KPCA"
  2515. def predict(self, x_data, *args, **kwargs):
  2516. self.x_testdata = x_data.copy()
  2517. x_predict = self.model.transform(x_data)
  2518. self.y_testdata = x_predict.copy()
  2519. self.have_predict = True
  2520. return x_predict, "KPCA"
  2521. def data_visualization(self, save_dir, *args, **kwargs):
  2522. tab = Tab()
  2523. y_data = self.y_testdata
  2524. conversion_separate_format(y_data, tab)
  2525. save = save_dir + rf"{os.sep}KPCA(主成分分析).HTML"
  2526. tab.render(save) # 生成HTML
  2527. return save,
  2528. class LdaModel(PrepBase): # 有监督学习
  2529. def __init__(self, args_use, *args, **kwargs):
  2530. super(LdaModel, self).__init__(*args, **kwargs)
  2531. self.model = Lda(n_components=args_use["n_components"])
  2532. self.n_components = args_use["n_components"]
  2533. self.k = {"n_components": args_use["n_components"]}
  2534. self.model_Name = "LDA"
  2535. def predict(self, x_data, *args, **kwargs):
  2536. self.x_testdata = x_data.copy()
  2537. x_predict = self.model.transform(x_data)
  2538. self.y_testdata = x_predict.copy()
  2539. self.have_predict = True
  2540. return x_predict, "LDA"
  2541. def data_visualization(self, save_dir, *args, **kwargs):
  2542. tab = Tab()
  2543. x_data = self.x_testdata
  2544. y_data = self.y_testdata
  2545. conversion_separate_format(y_data, tab)
  2546. w_list = self.model.coef_.tolist() # 变为表格
  2547. b = self.model.intercept_
  2548. tab = Tab()
  2549. x_means = quick_stats(x_data).get()[0]
  2550. # 回归的y是历史遗留问题 不用分类回归:因为得不到分类数据(predict结果是降维数据不是预测数据)
  2551. get = regress_w(x_data, w_list, b, x_means.copy())
  2552. for i in range(len(get)):
  2553. tab.add(get[i].overlap(get[i]), f"类别:{i}LDA映射曲线")
  2554. save = save_dir + rf"{os.sep}render.HTML"
  2555. tab.render(save) # 生成HTML
  2556. return save,
  2557. @plugin_class_loading(get_path(r"template/machinelearning"))
  2558. class NmfModel(Unsupervised):
  2559. def __init__(self, args_use, *args, **kwargs):
  2560. super(NmfModel, self).__init__(*args, **kwargs)
  2561. self.model = NMF(n_components=args_use["n_components"])
  2562. self.n_components = args_use["n_components"]
  2563. self.k = {"n_components": args_use["n_components"]}
  2564. self.model_Name = "NFM"
  2565. self.h_testdata = None
  2566. # x_traindata保存的是W,h_traindata和y_traindata是后来数据
  2567. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  2568. self.x_testdata = x_data.copy()
  2569. x_predict = self.model.transform(x_data)
  2570. self.y_testdata = x_predict.copy()
  2571. self.h_testdata = self.model.components_
  2572. if add_func is not None and x_name != "":
  2573. add_func(self.h_testdata, f"{x_name}:V->NMF[H]")
  2574. self.have_predict = True
  2575. return x_predict, "V->NMF[W]"
  2576. def data_visualization(self, save_dir, *args, **kwargs):
  2577. tab = Tab()
  2578. y_data = self.y_testdata
  2579. x_data = self.x_testdata
  2580. h_data = self.h_testdata
  2581. conversion_separate_wh(y_data, h_data, tab)
  2582. wh_data = np.matmul(y_data, h_data)
  2583. difference_data = x_data - wh_data
  2584. def make_heat_map(data, name, data_max, data_min):
  2585. x = [f"数据[{i}]" for i in range(len(data))] # 主成分
  2586. y = [f"特征[{i}]" for i in range(len(data[0]))] # 主成分
  2587. value = [
  2588. (f"数据[{i}]", f"特征[{j}]", float(data[i][j]))
  2589. for i in range(len(data))
  2590. for j in range(len(data[i]))
  2591. ]
  2592. c = (
  2593. HeatMap()
  2594. .add_xaxis(x)
  2595. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2596. .set_global_opts(
  2597. title_opts=opts.TitleOpts(title="原始数据热力图"),
  2598. **global_not_legend,
  2599. yaxis_opts=opts.AxisOpts(
  2600. is_scale=True, type_="category"
  2601. ), # 'category'
  2602. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2603. visualmap_opts=opts.VisualMapOpts(
  2604. is_show=True, max_=data_max, min_=data_min, pos_right="3%"
  2605. ),
  2606. ) # 显示
  2607. )
  2608. tab.add(c, name)
  2609. max_ = (max(int(x_data.max()), int(wh_data.max()),
  2610. int(difference_data.max())) + 1)
  2611. min_ = min(int(x_data.min()), int(wh_data.min()),
  2612. int(difference_data.min()))
  2613. make_heat_map(x_data, "原始数据热力图", max_, min_)
  2614. make_heat_map(wh_data, "W * H数据热力图", max_, min_)
  2615. make_heat_map(difference_data, "数据差热力图", max_, min_)
  2616. des_to_csv(save_dir, "权重矩阵", y_data)
  2617. des_to_csv(save_dir, "系数矩阵", h_data)
  2618. des_to_csv(save_dir, "系数*权重矩阵", wh_data)
  2619. save = save_dir + rf"{os.sep}非负矩阵分解.HTML"
  2620. tab.render(save) # 生成HTML
  2621. return save,
  2622. @plugin_class_loading(get_path(r"template/machinelearning"))
  2623. class TsneModel(Unsupervised):
  2624. def __init__(self, args_use, *args, **kwargs):
  2625. super(TsneModel, self).__init__(*args, **kwargs)
  2626. self.model = TSNE(n_components=args_use["n_components"])
  2627. self.n_components = args_use["n_components"]
  2628. self.k = {"n_components": args_use["n_components"]}
  2629. self.model_Name = "t-SNE"
  2630. def fit_model(self, *args, **kwargs):
  2631. self.have_fit = True
  2632. return "None", "None"
  2633. def predict(self, x_data, *args, **kwargs):
  2634. self.x_testdata = x_data.copy()
  2635. x_predict = self.model.fit_transform(x_data)
  2636. self.y_testdata = x_predict.copy()
  2637. self.have_predict = True
  2638. return x_predict, "SNE"
  2639. def data_visualization(self, save_dir, *args, **kwargs):
  2640. tab = Tab()
  2641. y_data = self.y_testdata
  2642. conversion_separate_format(y_data, tab)
  2643. save = save_dir + rf"{os.sep}T-SNE.HTML"
  2644. tab.render(save) # 生成HTML
  2645. return save,
  2646. class MlpModel(StudyMachinebase): # 神经网络(多层感知机),有监督学习
  2647. def __init__(self, args_use, model, *args, **kwargs):
  2648. super(MlpModel, self).__init__(*args, **kwargs)
  2649. all_model = {"MLP": MLPRegressor, "MLP_class": MLPClassifier}[model]
  2650. self.model = all_model(
  2651. hidden_layer_sizes=args_use["hidden_size"],
  2652. activation=args_use["activation"],
  2653. solver=args_use["solver"],
  2654. alpha=args_use["alpha"],
  2655. max_iter=args_use["max_iter"],
  2656. )
  2657. # 记录这两个是为了克隆
  2658. self.hidden_layer_sizes = args_use["hidden_size"]
  2659. self.activation = args_use["activation"]
  2660. self.max_iter = args_use["max_iter"]
  2661. self.solver = args_use["solver"]
  2662. self.alpha = args_use["alpha"]
  2663. self.k = {
  2664. "hidden_layer_sizes": args_use["hidden_size"],
  2665. "activation": args_use["activation"],
  2666. "max_iter": args_use["max_iter"],
  2667. "solver": args_use["solver"],
  2668. "alpha": args_use["alpha"],
  2669. }
  2670. self.model_Name = model
  2671. def data_visualization(self, save_dir, *args, **kwargs):
  2672. tab = Tab()
  2673. x_data = self.x_testdata
  2674. y_data = self.y_testdata
  2675. coefs = self.model.coefs_
  2676. class_ = self.model.classes_
  2677. n_layers_ = self.model.n_layers_
  2678. def make_heat_map(data_, name):
  2679. x = [f"特征(节点)[{i}]" for i in range(len(data_))]
  2680. y = [f"节点[{i}]" for i in range(len(data_[0]))]
  2681. value = [
  2682. (f"特征(节点)[{i}]", f"节点[{j}]", float(data_[i][j]))
  2683. for i in range(len(data_))
  2684. for j in range(len(data_[i]))
  2685. ]
  2686. c = (
  2687. HeatMap()
  2688. .add_xaxis(x)
  2689. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2690. .set_global_opts(
  2691. title_opts=opts.TitleOpts(title=name),
  2692. **global_not_legend,
  2693. yaxis_opts=opts.AxisOpts(
  2694. is_scale=True, type_="category"
  2695. ), # 'category'
  2696. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2697. visualmap_opts=opts.VisualMapOpts(
  2698. is_show=True,
  2699. max_=float(data_.max()),
  2700. min_=float(data_.min()),
  2701. pos_right="3%",
  2702. ),
  2703. ) # 显示
  2704. )
  2705. tab.add(c, name)
  2706. tab.add(make_tab(x, data_.transpose().tolist()), f"{name}:表格")
  2707. des_to_csv(save_dir, f"{name}:表格", data_.transpose().tolist(), x, y)
  2708. get, x_means, x_range, data_type = regress_visualization(
  2709. x_data, y_data)
  2710. for i in range(len(get)):
  2711. tab.add(get[i], f"{i}训练数据散点图")
  2712. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  2713. for i in range(len(get)):
  2714. tab.add(get[i], f"{i}预测热力图")
  2715. heard = ["神经网络层数"]
  2716. data = [n_layers_]
  2717. for i in range(len(coefs)):
  2718. make_heat_map(coefs[i], f"{i}层权重矩阵")
  2719. heard.append(f"第{i}层节点数")
  2720. data.append(len(coefs[i][0]))
  2721. if self.model_Name == "MLP_class":
  2722. heard += [f"[{i}]类型" for i in range(len(class_))]
  2723. data += class_.tolist()
  2724. tab.add(make_tab(heard, [data]), "数据表")
  2725. save = save_dir + rf"{os.sep}多层感知机.HTML"
  2726. tab.render(save) # 生成HTML
  2727. return save,
  2728. @plugin_class_loading(get_path(r"template/machinelearning"))
  2729. class KmeansModel(UnsupervisedModel):
  2730. def __init__(self, args_use, *args, **kwargs):
  2731. super(KmeansModel, self).__init__(*args, **kwargs)
  2732. self.model = KMeans(n_clusters=args_use["n_clusters"])
  2733. self.class_ = []
  2734. self.n_clusters = args_use["n_clusters"]
  2735. self.k = {"n_clusters": args_use["n_clusters"]}
  2736. self.model_Name = "k-means"
  2737. def fit_model(self, x_data, *args, **kwargs):
  2738. return_ = super().fit_model(x_data, *args, **kwargs)
  2739. self.class_ = list(set(self.model.labels_.tolist()))
  2740. self.have_fit = True
  2741. return return_
  2742. def predict(self, x_data, *args, **kwargs):
  2743. self.x_testdata = x_data.copy()
  2744. y_predict = self.model.predict(x_data)
  2745. self.y_testdata = y_predict.copy()
  2746. self.have_predict = True
  2747. return y_predict, "k-means"
  2748. def data_visualization(self, save_dir, *args, **kwargs):
  2749. tab = Tab()
  2750. y = self.y_testdata
  2751. x_data = self.x_testdata
  2752. class_ = self.class_
  2753. center = self.model.cluster_centers_
  2754. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2755. func = (
  2756. training_visualization_more
  2757. if more_global
  2758. else training_visualization_center
  2759. )
  2760. get, x_means, x_range, data_type = func(x_data, class_, y, center)
  2761. for i in range(len(get)):
  2762. tab.add(get[i], f"{i}数据散点图")
  2763. get = decision_boundary(
  2764. x_range,
  2765. x_means,
  2766. self.predict,
  2767. class_,
  2768. data_type)
  2769. for i in range(len(get)):
  2770. tab.add(get[i], f"{i}预测热力图")
  2771. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2772. data = class_ + [f"{i}" for i in x_means]
  2773. c = Table().add(headers=heard, rows=[data])
  2774. tab.add(c, "数据表")
  2775. des_to_csv(
  2776. save_dir,
  2777. "预测表",
  2778. [[f"{i}" for i in x_means]],
  2779. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2780. )
  2781. save = save_dir + rf"{os.sep}k-means聚类.HTML"
  2782. tab.render(save) # 生成HTML
  2783. return save,
  2784. @plugin_class_loading(get_path(r"template/machinelearning"))
  2785. class AgglomerativeModel(UnsupervisedModel):
  2786. def __init__(self, args_use, *args, **kwargs):
  2787. super(AgglomerativeModel, self).__init__(*args, **kwargs)
  2788. self.model = AgglomerativeClustering(
  2789. n_clusters=args_use["n_clusters"]
  2790. ) # 默认为2,不同于k-means
  2791. self.class_ = []
  2792. self.n_clusters = args_use["n_clusters"]
  2793. self.k = {"n_clusters": args_use["n_clusters"]}
  2794. self.model_Name = "Agglomerative"
  2795. def fit_model(self, x_data, *args, **kwargs):
  2796. return_ = super().fit_model(x_data, *args, **kwargs)
  2797. self.class_ = list(set(self.model.labels_.tolist()))
  2798. self.have_fit = True
  2799. return return_
  2800. def predict(self, x_data, *args, **kwargs):
  2801. self.x_testdata = x_data.copy()
  2802. y_predict = self.model.fit_predict(x_data)
  2803. self.y_traindata = y_predict.copy()
  2804. self.have_predict = True
  2805. return y_predict, "Agglomerative"
  2806. def data_visualization(self, save_dir, *args, **kwargs):
  2807. tab = Tab()
  2808. y = self.y_testdata
  2809. x_data = self.x_testdata
  2810. class_ = self.class_
  2811. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2812. func = (
  2813. training_visualization_more_no_center
  2814. if more_global
  2815. else training_visualization
  2816. )
  2817. get, x_means, x_range, data_type = func(x_data, class_, y)
  2818. for i in range(len(get)):
  2819. tab.add(get[i], f"{i}训练数据散点图")
  2820. get = decision_boundary(
  2821. x_range,
  2822. x_means,
  2823. self.predict,
  2824. class_,
  2825. data_type)
  2826. for i in range(len(get)):
  2827. tab.add(get[i], f"{i}预测热力图")
  2828. linkage_array = ward(self.x_traindata) # self.y_traindata是结果
  2829. dendrogram(linkage_array)
  2830. plt.savefig(save_dir + rf"{os.sep}Cluster_graph.png")
  2831. image = Image()
  2832. image.add(src=save_dir + rf"{os.sep}Cluster_graph.png",).set_global_opts(
  2833. title_opts=opts.ComponentTitleOpts(title="聚类树状图")
  2834. )
  2835. tab.add(image, "聚类树状图")
  2836. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2837. data = class_ + [f"{i}" for i in x_means]
  2838. c = Table().add(headers=heard, rows=[data])
  2839. tab.add(c, "数据表")
  2840. des_to_csv(
  2841. save_dir,
  2842. "预测表",
  2843. [[f"{i}" for i in x_means]],
  2844. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2845. )
  2846. save = save_dir + rf"{os.sep}层次聚类.HTML"
  2847. tab.render(save) # 生成HTML
  2848. return save,
  2849. @plugin_class_loading(get_path(r"template/machinelearning"))
  2850. class DbscanModel(UnsupervisedModel):
  2851. def __init__(self, args_use, *args, **kwargs):
  2852. super(DbscanModel, self).__init__(*args, **kwargs)
  2853. self.model = DBSCAN(
  2854. eps=args_use["eps"],
  2855. min_samples=args_use["min_samples"])
  2856. # eps是距离(0.5),min_samples(5)是簇与噪音分界线(每个簇最小元素数)
  2857. # min_samples
  2858. self.eps = args_use["eps"]
  2859. self.min_samples = args_use["min_samples"]
  2860. self.k = {
  2861. "min_samples": args_use["min_samples"],
  2862. "eps": args_use["eps"]}
  2863. self.class_ = []
  2864. self.model_Name = "DBSCAN"
  2865. def fit_model(self, x_data, *args, **kwargs):
  2866. return_ = super().fit_model(x_data, *args, **kwargs)
  2867. self.class_ = list(set(self.model.labels_.tolist()))
  2868. self.have_fit = True
  2869. return return_
  2870. def predict(self, x_data, *args, **kwargs):
  2871. self.x_testdata = x_data.copy()
  2872. y_predict = self.model.fit_predict(x_data)
  2873. self.y_testdata = y_predict.copy()
  2874. self.have_predict = True
  2875. return y_predict, "DBSCAN"
  2876. def data_visualization(self, save_dir, *args, **kwargs):
  2877. # DBSCAN没有预测的必要
  2878. tab = Tab()
  2879. y = self.y_testdata.copy()
  2880. x_data = self.x_testdata.copy()
  2881. class_ = self.class_
  2882. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2883. func = (
  2884. training_visualization_more_no_center
  2885. if more_global
  2886. else training_visualization
  2887. )
  2888. get, x_means, x_range, data_type = func(x_data, class_, y)
  2889. for i in range(len(get)):
  2890. tab.add(get[i], f"{i}训练数据散点图")
  2891. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2892. data = class_ + [f"{i}" for i in x_means]
  2893. c = Table().add(headers=heard, rows=[data])
  2894. tab.add(c, "数据表")
  2895. des_to_csv(
  2896. save_dir,
  2897. "预测表",
  2898. [[f"{i}" for i in x_means]],
  2899. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2900. )
  2901. save = save_dir + rf"{os.sep}密度聚类.HTML"
  2902. tab.render(save) # 生成HTML
  2903. return save,
  2904. class FastFourier(StudyMachinebase): # 快速傅里叶变换
  2905. def __init__(self, *args, **kwargs):
  2906. super(FastFourier, self).__init__(*args, **kwargs)
  2907. self.model = None
  2908. self.fourier = None # fft复数
  2909. self.frequency = None # 频率range
  2910. self.angular_Frequency = None # 角频率range
  2911. self.phase = None # 相位range
  2912. self.breadth = None # 震幅range
  2913. self.sample_size = None # 样本数
  2914. def fit_model(self, y_data, *args, **kwargs):
  2915. y_data = y_data.ravel() # 扯平为一维数组
  2916. try:
  2917. assert not self.y_traindata is None
  2918. self.y_traindata = np.hstack((y_data, self.x_traindata))
  2919. except (AssertionError, ValueError):
  2920. self.y_traindata = y_data.copy()
  2921. fourier = fft(y_data)
  2922. self.sample_size = len(y_data)
  2923. self.frequency = np.linspace(0, 1, self.sample_size) # 频率N_range
  2924. self.angular_Frequency = self.frequency / (np.pi * 2) # 角频率w
  2925. self.phase = np.angle(fourier)
  2926. self.breadth = np.abs(fourier)
  2927. self.fourier = fourier
  2928. self.have_fit = True
  2929. return "None", "None"
  2930. def predict(self, x_data, *args, **kwargs):
  2931. return np.array([]), ""
  2932. def data_visualization(self, save_dir, *args, **kwargs):
  2933. # DBSCAN没有预测的必要
  2934. tab = Tab()
  2935. y = self.y_traindata.copy()
  2936. n = self.sample_size
  2937. phase = self.phase # 相位range
  2938. breadth = self.breadth # 震幅range
  2939. normalization_breadth = breadth / n
  2940. def line(name, value, s=slice(0, None)) -> Line:
  2941. c = (
  2942. Line()
  2943. .add_xaxis(self.frequency[s].tolist())
  2944. .add_yaxis(
  2945. "",
  2946. value,
  2947. **label_setting,
  2948. symbol="none" if self.sample_size >= 500 else None,
  2949. )
  2950. .set_global_opts(
  2951. title_opts=opts.TitleOpts(title=name),
  2952. **global_not_legend,
  2953. xaxis_opts=opts.AxisOpts(type_="value"),
  2954. yaxis_opts=opts.AxisOpts(type_="value"),
  2955. )
  2956. )
  2957. return c
  2958. tab.add(line("原始数据", y.tolist()), "原始数据")
  2959. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  2960. tab.add(
  2961. line(
  2962. "双边振幅谱(归一化)",
  2963. normalization_breadth.tolist()),
  2964. "双边振幅谱(归一化)")
  2965. tab.add(
  2966. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2967. )
  2968. tab.add(
  2969. line(
  2970. "单边相位谱(归一化)",
  2971. normalization_breadth[: int(n / 2)].tolist(),
  2972. slice(0, int(n / 2)),
  2973. ),
  2974. "单边相位谱(归一化)",
  2975. )
  2976. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  2977. tab.add(
  2978. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2979. )
  2980. tab.add(make_tab(self.frequency.tolist(), [breadth.tolist()]), "双边振幅谱")
  2981. tab.add(make_tab(self.frequency.tolist(), [phase.tolist()]), "双边相位谱")
  2982. tab.add(
  2983. make_tab(
  2984. self.frequency.tolist(), [
  2985. self.fourier.tolist()]), "快速傅里叶变换")
  2986. save = save_dir + rf"{os.sep}快速傅里叶.HTML"
  2987. tab.render(save) # 生成HTML
  2988. return save,
  2989. class ReverseFastFourier(StudyMachinebase): # 快速傅里叶变换
  2990. def __init__(self, *args, **kwargs):
  2991. super(ReverseFastFourier, self).__init__(*args, **kwargs)
  2992. self.model = None
  2993. self.sample_size = None
  2994. self.y_testdata_real = None
  2995. self.phase = None
  2996. self.breadth = None
  2997. def fit_model(self, y_data, *args, **kwargs):
  2998. return "None", "None"
  2999. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  3000. self.x_testdata = x_data.ravel().astype(np.complex_)
  3001. fourier = ifft(self.x_testdata)
  3002. self.y_testdata = fourier.copy()
  3003. self.y_testdata_real = np.real(fourier)
  3004. self.sample_size = len(self.y_testdata_real)
  3005. self.phase = np.angle(self.x_testdata)
  3006. self.breadth = np.abs(self.x_testdata)
  3007. add_func(self.y_testdata_real.copy(), f"{x_name}:逆向快速傅里叶变换[实数]")
  3008. return fourier, "逆向快速傅里叶变换"
  3009. def data_visualization(self, save_dir, *args, **kwargs):
  3010. # DBSCAN没有预测的必要
  3011. tab = Tab()
  3012. y = self.y_testdata_real.copy()
  3013. y_data = self.y_testdata.copy()
  3014. n = self.sample_size
  3015. range_n: list = np.linspace(0, 1, n).tolist()
  3016. phase = self.phase # 相位range
  3017. breadth = self.breadth # 震幅range
  3018. def line(name, value, s=slice(0, None)) -> Line:
  3019. c = (
  3020. Line() .add_xaxis(
  3021. range_n[s]) .add_yaxis(
  3022. "",
  3023. value,
  3024. **label_setting,
  3025. symbol="none" if n >= 500 else None) .set_global_opts(
  3026. title_opts=opts.TitleOpts(
  3027. title=name),
  3028. **global_not_legend,
  3029. xaxis_opts=opts.AxisOpts(
  3030. type_="value"),
  3031. yaxis_opts=opts.AxisOpts(
  3032. type_="value"),
  3033. ))
  3034. return c
  3035. tab.add(line("逆向傅里叶变换", y.tolist()), "逆向傅里叶变换[实数]")
  3036. tab.add(make_tab(range_n, [y_data.tolist()]), "逆向傅里叶变换数据")
  3037. tab.add(make_tab(range_n, [y.tolist()]), "逆向傅里叶变换数据[实数]")
  3038. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  3039. tab.add(
  3040. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3041. )
  3042. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  3043. tab.add(
  3044. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3045. )
  3046. save = save_dir + rf"{os.sep}快速傅里叶.HTML"
  3047. tab.render(save) # 生成HTML
  3048. return save,
  3049. class ReverseFastFourierTwonumpy(ReverseFastFourier): # 2快速傅里叶变换
  3050. def fit_model(
  3051. self,
  3052. x_data,
  3053. y_data=None,
  3054. x_name="",
  3055. add_func=None,
  3056. *args,
  3057. **kwargs):
  3058. r = np.multiply(np.cos(x_data), y_data)
  3059. j = np.multiply(np.sin(x_data), y_data) * 1j
  3060. super(ReverseFastFourierTwonumpy, self).predict(
  3061. r + j, x_name=x_name, add_func=add_func, *args, **kwargs
  3062. )
  3063. return "None", "None"
  3064. class CurveFitting(StudyMachinebase): # 曲线拟合
  3065. def __init__(self, name, str_, model, *args, **kwargs):
  3066. super(CurveFitting, self).__init__(*args, **kwargs)
  3067. def ndim_down(data: np.ndarray):
  3068. if data.ndim == 1:
  3069. return data
  3070. new_data = []
  3071. for i in data:
  3072. new_data.append(np.sum(i))
  3073. return np.array(new_data)
  3074. named_domain = {"np": np, "Func": model, "ndimDown": ndim_down}
  3075. protection_func = f"""
  3076. @plugin_func_loading(get_path(r'template/machinelearning'))
  3077. def FUNC({",".join(model.__code__.co_varnames)}):
  3078. answer = Func({",".join(model.__code__.co_varnames)})
  3079. return ndimDown(answer)
  3080. """
  3081. exec(protection_func, named_domain)
  3082. self.func = named_domain["FUNC"]
  3083. self.fit_data = None
  3084. self.name = name
  3085. self.func_str = str_
  3086. def fit_model(
  3087. self,
  3088. x_data: np.ndarray,
  3089. y_data: np.ndarray,
  3090. *args,
  3091. **kwargs):
  3092. y_data = y_data.ravel()
  3093. x_data = x_data.astype(np.float64)
  3094. try:
  3095. assert not self.x_traindata is None
  3096. self.x_traindata = np.vstack((x_data, self.x_traindata))
  3097. self.y_traindata = np.vstack((y_data, self.y_traindata))
  3098. except (AssertionError, ValueError):
  3099. self.x_traindata = x_data.copy()
  3100. self.y_traindata = y_data.copy()
  3101. self.fit_data = optimize.curve_fit(
  3102. self.func, self.x_traindata, self.y_traindata
  3103. )
  3104. self.model = self.fit_data[0].copy()
  3105. return "None", "None"
  3106. def predict(self, x_data, *args, **kwargs):
  3107. self.x_testdata = x_data.copy()
  3108. predict = self.func(x_data, *self.model)
  3109. y_predict = []
  3110. for i in predict:
  3111. y_predict.append(np.sum(i))
  3112. y_predict = np.array(y_predict)
  3113. self.y_testdata = y_predict.copy()
  3114. self.have_predict = True
  3115. return y_predict, self.name
  3116. def data_visualization(self, save_dir, *args, **kwargs):
  3117. # DBSCAN没有预测的必要
  3118. tab = Tab()
  3119. y = self.y_testdata.copy()
  3120. x_data = self.x_testdata.copy()
  3121. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  3122. for i in range(len(get)):
  3123. tab.add(get[i], f"{i}预测类型图")
  3124. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  3125. for i in range(len(get)):
  3126. tab.add(get[i], f"{i}预测热力图")
  3127. tab.add(
  3128. make_tab(
  3129. [f"普适预测第{i}特征" for i in range(len(x_means))],
  3130. [[f"{i}" for i in x_means]],
  3131. ),
  3132. "普适预测特征数据",
  3133. )
  3134. tab.add(
  3135. make_tab(
  3136. [f"参数[{i}]" for i in range(len(self.model))],
  3137. [[f"{i}" for i in self.model]],
  3138. ),
  3139. "拟合参数",
  3140. )
  3141. save = save_dir + rf"{os.sep}曲线拟合.HTML"
  3142. tab.render(save) # 生成HTML
  3143. return save,
  3144. @plugin_class_loading(get_path(r"template/machinelearning"))
  3145. class Tab(tab_First):
  3146. def __init__(self, *args, **kwargs):
  3147. super(Tab, self).__init__(*args, **kwargs)
  3148. self.element = {} # 记录tab组成元素 name:charts
  3149. def add(self, chart, tab_name):
  3150. self.element[tab_name] = chart
  3151. return super(Tab, self).add(chart, tab_name)
  3152. def render(
  3153. self,
  3154. path: str = "render.html",
  3155. template_name: str = "simple_tab.html",
  3156. *args,
  3157. **kwargs,
  3158. ) -> str:
  3159. if all_global:
  3160. render_dir = path_split(path)[0]
  3161. for i in self.element:
  3162. self.element[i].render(render_dir + os.sep + i + ".html")
  3163. return super(Tab, self).render(path, template_name, *args, **kwargs)
  3164. @plugin_class_loading(get_path(r"template/machinelearning"))
  3165. class Table(TableFisrt):
  3166. def __init__(self, *args, **kwargs):
  3167. super(Table, self).__init__(*args, **kwargs)
  3168. self.HEADERS = []
  3169. self.ROWS = [[]]
  3170. def add(self, headers, rows, attributes=None):
  3171. if len(rows) == 1:
  3172. new_headers = ["数据类型", "数据"]
  3173. new_rows = list(zip(headers, rows[0]))
  3174. self.HEADERS = new_headers
  3175. self.ROWS = new_rows
  3176. return super().add(new_headers, new_rows, attributes)
  3177. else:
  3178. self.HEADERS = headers
  3179. self.ROWS = rows
  3180. return super().add(headers, rows, attributes)
  3181. def render(self, path="render.html", *args, **kwargs,) -> str:
  3182. if csv_global:
  3183. save_dir, name = path_split(path)
  3184. name = splitext(name)[0]
  3185. try:
  3186. DataFrame(self.ROWS, columns=self.HEADERS).to_csv(
  3187. save_dir + os.sep + name + ".csv"
  3188. )
  3189. except BaseException as e:
  3190. logging.warning(str(e))
  3191. return super().render(path, *args, **kwargs)
  3192. @plugin_func_loading(get_path(r"template/machinelearning"))
  3193. def make_list(first, end, num=35):
  3194. n = num / (end - first)
  3195. if n == 0:
  3196. n = 1
  3197. return_ = []
  3198. n_first = first * n
  3199. n_end = end * n
  3200. while n_first <= n_end:
  3201. cul = n_first / n
  3202. return_.append(round(cul, 2))
  3203. n_first += 1
  3204. return return_
  3205. @plugin_func_loading(get_path(r"template/machinelearning"))
  3206. def list_filter(original_list, num=70):
  3207. if len(original_list) <= num:
  3208. return original_list
  3209. n = int(num / len(original_list))
  3210. return_ = original_list[::n]
  3211. return return_
  3212. @plugin_func_loading(get_path(r"template/machinelearning"))
  3213. def prediction_boundary(x_range, x_means, predict_func, data_type): # 绘制回归型x-x热力图
  3214. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3215. # a-特征x,b-特征x-1,c-其他特征
  3216. render_list = []
  3217. if len(x_means) == 1:
  3218. return render_list
  3219. for i in range(len(x_means)):
  3220. for j in range(len(x_means)):
  3221. if j <= i:
  3222. continue
  3223. a_range = x_range[j]
  3224. a_type = data_type[j]
  3225. b_range = x_range[i]
  3226. b_type = data_type[i]
  3227. if a_type == 1:
  3228. a_list = make_list(a_range[0], a_range[1], 70)
  3229. else:
  3230. a_list = list_filter(a_range) # 可以接受最大为70
  3231. if b_type == 1:
  3232. b_list = make_list(b_range[0], b_range[1], 35)
  3233. else:
  3234. b_list = list_filter(b_range) # 可以接受最大为70
  3235. a = np.array([i for i in a_list for _ in b_list]).T
  3236. b = np.array([i for _ in a_list for i in b_list]).T
  3237. data = np.array([x_means for _ in a_list for i in b_list])
  3238. data[:, j] = a
  3239. data[:, i] = b
  3240. y_data = predict_func(data)[0].tolist()
  3241. value = [[float(a[i]), float(b[i]), y_data[i]]
  3242. for i in range(len(a))]
  3243. c = (
  3244. HeatMap()
  3245. .add_xaxis(np.unique(a))
  3246. # value的第一个数值是x
  3247. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3248. .set_global_opts(
  3249. title_opts=opts.TitleOpts(title="预测热力图"),
  3250. **global_not_legend,
  3251. yaxis_opts=opts.AxisOpts(
  3252. is_scale=True, type_="category"
  3253. ), # 'category'
  3254. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3255. visualmap_opts=opts.VisualMapOpts(
  3256. is_show=True,
  3257. max_=int(max(y_data)) + 1,
  3258. min_=int(min(y_data)),
  3259. pos_right="3%",
  3260. ),
  3261. ) # 显示
  3262. )
  3263. render_list.append(c)
  3264. return render_list
  3265. @plugin_func_loading(get_path(r"template/machinelearning"))
  3266. def prediction_boundary_more(x_range, x_means, predict_func, data_type):
  3267. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3268. # a-特征x,b-特征x-1,c-其他特征
  3269. render_list = []
  3270. if len(x_means) == 1:
  3271. return render_list
  3272. for i in range(len(x_means)):
  3273. if i == 0:
  3274. continue
  3275. a_range = x_range[i - 1]
  3276. a_type = data_type[i - 1]
  3277. b_range = x_range[i]
  3278. b_type = data_type[i]
  3279. if a_type == 1:
  3280. a_list = make_list(a_range[0], a_range[1], 70)
  3281. else:
  3282. a_list = list_filter(a_range) # 可以接受最大为70
  3283. if b_type == 1:
  3284. b_list = make_list(b_range[0], b_range[1], 35)
  3285. else:
  3286. b_list = list_filter(b_range) # 可以接受最大为70
  3287. a = np.array([i for i in a_list for _ in b_list]).T
  3288. b = np.array([i for _ in a_list for i in b_list]).T
  3289. data = np.array([x_means for _ in a_list for i in b_list])
  3290. data[:, i - 1] = a
  3291. data[:, i] = b
  3292. y_data = predict_func(data)[0].tolist()
  3293. value = [[float(a[i]), float(b[i]), y_data[i]] for i in range(len(a))]
  3294. c = (
  3295. HeatMap()
  3296. .add_xaxis(np.unique(a))
  3297. # value的第一个数值是x
  3298. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3299. .set_global_opts(
  3300. title_opts=opts.TitleOpts(title="预测热力图"),
  3301. **global_not_legend,
  3302. yaxis_opts=opts.AxisOpts(
  3303. is_scale=True, type_="category"), # 'category'
  3304. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3305. visualmap_opts=opts.VisualMapOpts(
  3306. is_show=True,
  3307. max_=int(max(y_data)) + 1,
  3308. min_=int(min(y_data)),
  3309. pos_right="3%",
  3310. ),
  3311. ) # 显示
  3312. )
  3313. render_list.append(c)
  3314. return render_list
  3315. def decision_boundary(
  3316. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3317. ): # 绘制分类型预测图x-x热力图
  3318. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3319. # a-特征x,b-特征x-1,c-其他特征
  3320. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3321. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3322. if not no_unknow:
  3323. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3324. else:
  3325. map_dict = []
  3326. for i in class_dict:
  3327. map_dict.append(
  3328. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3329. )
  3330. render_list = []
  3331. if len(x_means) == 1:
  3332. a_range = x_range[0]
  3333. if data_type[0] == 1:
  3334. a_list = make_list(a_range[0], a_range[1], 70)
  3335. else:
  3336. a_list = a_range
  3337. a = np.array([i for i in a_list]).reshape(-1, 1)
  3338. y_data = predict_func(a)[0].tolist()
  3339. value = [[0, float(a[i]), class_dict.get(y_data[i], -1)]
  3340. for i in range(len(a))]
  3341. c = (
  3342. HeatMap()
  3343. .add_xaxis(["None"])
  3344. # value的第一个数值是x
  3345. .add_yaxis(f"数据", np.unique(a), value, **label_setting)
  3346. .set_global_opts(
  3347. title_opts=opts.TitleOpts(title="预测热力图"),
  3348. **global_not_legend,
  3349. yaxis_opts=opts.AxisOpts(
  3350. is_scale=True, type_="category"), # 'category'
  3351. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3352. visualmap_opts=opts.VisualMapOpts(
  3353. is_show=True,
  3354. max_=max(class_dict.values()),
  3355. min_=-1,
  3356. is_piecewise=True,
  3357. pieces=map_dict,
  3358. orient="horizontal",
  3359. pos_bottom="3%",
  3360. ),
  3361. )
  3362. )
  3363. render_list.append(c)
  3364. return render_list
  3365. # 如果x_means长度不等于1则执行下面
  3366. for i in range(len(x_means)):
  3367. if i == 0:
  3368. continue
  3369. a_range = x_range[i - 1]
  3370. a_type = data_type[i - 1]
  3371. b_range = x_range[i]
  3372. b_type = data_type[i]
  3373. if a_type == 1:
  3374. a_list = make_list(a_range[0], a_range[1], 70)
  3375. else:
  3376. a_list = a_range
  3377. if b_type == 1:
  3378. rb = make_list(b_range[0], b_range[1], 35)
  3379. else:
  3380. rb = b_range
  3381. a = np.array([i for i in a_list for _ in rb]).T
  3382. b = np.array([i for _ in a_list for i in rb]).T
  3383. data = np.array([x_means for _ in a_list for i in rb])
  3384. data[:, i - 1] = a
  3385. data[:, i] = b
  3386. y_data = predict_func(data)[0].tolist()
  3387. value = [
  3388. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3389. for i in range(len(a))
  3390. ]
  3391. c = (
  3392. HeatMap()
  3393. .add_xaxis(np.unique(a))
  3394. # value的第一个数值是x
  3395. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3396. .set_global_opts(
  3397. title_opts=opts.TitleOpts(title="预测热力图"),
  3398. **global_not_legend,
  3399. yaxis_opts=opts.AxisOpts(
  3400. is_scale=True, type_="category"), # 'category'
  3401. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3402. visualmap_opts=opts.VisualMapOpts(
  3403. is_show=True,
  3404. max_=max(class_dict.values()),
  3405. min_=-1,
  3406. is_piecewise=True,
  3407. pieces=map_dict,
  3408. orient="horizontal",
  3409. pos_bottom="3%",
  3410. ),
  3411. )
  3412. )
  3413. render_list.append(c)
  3414. return render_list
  3415. def decision_boundary_more(
  3416. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3417. ):
  3418. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3419. # a-特征x,b-特征x-1,c-其他特征
  3420. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3421. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3422. if not no_unknow:
  3423. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3424. else:
  3425. map_dict = []
  3426. for i in class_dict:
  3427. map_dict.append(
  3428. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3429. )
  3430. render_list = []
  3431. if len(x_means) == 1:
  3432. return decision_boundary(
  3433. x_range, x_means, predict_func, class_list, data_type, no_unknow
  3434. )
  3435. # 如果x_means长度不等于1则执行下面
  3436. for i in range(len(x_means)):
  3437. for j in range(len(x_means)):
  3438. if j <= i:
  3439. continue
  3440. a_range = x_range[j]
  3441. a_type = data_type[j]
  3442. b_range = x_range[i]
  3443. b_type = data_type[i]
  3444. if a_type == 1:
  3445. a_range = make_list(a_range[0], a_range[1], 70)
  3446. else:
  3447. a_range = a_range
  3448. if b_type == 1:
  3449. b_range = make_list(b_range[0], b_range[1], 35)
  3450. else:
  3451. b_range = b_range
  3452. a = np.array([i for i in a_range for _ in b_range]).T
  3453. b = np.array([i for _ in a_range for i in b_range]).T
  3454. data = np.array([x_means for _ in a_range for i in b_range])
  3455. data[:, j] = a
  3456. data[:, i] = b
  3457. y_data = predict_func(data)[0].tolist()
  3458. value = [
  3459. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3460. for i in range(len(a))
  3461. ]
  3462. c = (
  3463. HeatMap()
  3464. .add_xaxis(np.unique(a))
  3465. # value的第一个数值是x
  3466. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3467. .set_global_opts(
  3468. title_opts=opts.TitleOpts(title="预测热力图"),
  3469. **global_not_legend,
  3470. yaxis_opts=opts.AxisOpts(
  3471. is_scale=True, type_="category"
  3472. ), # 'category'
  3473. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3474. visualmap_opts=opts.VisualMapOpts(
  3475. is_show=True,
  3476. max_=max(class_dict.values()),
  3477. min_=-1,
  3478. is_piecewise=True,
  3479. pieces=map_dict,
  3480. orient="horizontal",
  3481. pos_bottom="3%",
  3482. ),
  3483. )
  3484. )
  3485. render_list.append(c)
  3486. return render_list
  3487. @plugin_func_loading(get_path(r"template/machinelearning"))
  3488. def see_tree(tree_file_dir):
  3489. node_regex = re.compile(r'^([0-9]+) \[label="(.+)"\] ;$') # 匹配节点正则表达式
  3490. link_regex = re.compile("^([0-9]+) -> ([0-9]+) (.*);$") # 匹配节点正则表达式
  3491. node_dict = {}
  3492. link_list = []
  3493. with open(tree_file_dir, "r") as f: # 貌似必须分开w和r
  3494. for i in f:
  3495. try:
  3496. regex_result = re.findall(node_regex, i)[0]
  3497. if regex_result[0] != "":
  3498. try:
  3499. v = float(regex_result[0])
  3500. except ValueError:
  3501. v = 0
  3502. node_dict[regex_result[0]] = {
  3503. "name": regex_result[1].replace("\\n", "\n"),
  3504. "value": v,
  3505. "children": [],
  3506. }
  3507. continue
  3508. except BaseException as e:
  3509. logging.warning(str(e))
  3510. try:
  3511. regex_result = re.findall(link_regex, i)[0]
  3512. if regex_result[0] != "" and regex_result[1] != "":
  3513. link_list.append((regex_result[0], regex_result[1]))
  3514. except BaseException as e:
  3515. logging.warning(str(e))
  3516. father_list = [] # 已经有父亲的list
  3517. for i in link_list:
  3518. father = i[0] # 父节点
  3519. son = i[1] # 子节点
  3520. try:
  3521. node_dict[father]["children"].append(node_dict[son])
  3522. father_list.append(son)
  3523. except BaseException as e:
  3524. logging.warning(str(e))
  3525. father = list(set(node_dict.keys()) - set(father_list))
  3526. c = (
  3527. Tree()
  3528. .add("", [node_dict[father[0]]], is_roam=True)
  3529. .set_global_opts(
  3530. title_opts=opts.TitleOpts(title="决策树可视化"),
  3531. toolbox_opts=opts.ToolboxOpts(is_show=True),
  3532. )
  3533. )
  3534. return c
  3535. @plugin_func_loading(get_path(r"template/machinelearning"))
  3536. def make_tab(heard, row):
  3537. return Table().add(headers=heard, rows=row)
  3538. @plugin_func_loading(get_path(r"template/machinelearning"))
  3539. def coefficient_scatter_plot(w_heard, w):
  3540. c = (
  3541. Scatter() .add_xaxis(w_heard) .add_yaxis(
  3542. "", w, **label_setting) .set_global_opts(
  3543. title_opts=opts.TitleOpts(
  3544. title="系数w散点图"), **global_setting))
  3545. return c
  3546. @plugin_func_loading(get_path(r"template/machinelearning"))
  3547. def coefficient_bar_plot(w_heard, w):
  3548. c = (
  3549. Bar() .add_xaxis(w_heard) .add_yaxis(
  3550. "",
  3551. abs(w).tolist(),
  3552. **label_setting) .set_global_opts(
  3553. title_opts=opts.TitleOpts(
  3554. title="系数w柱状图"),
  3555. **global_setting))
  3556. return c
  3557. @plugin_func_loading(get_path(r"template/machinelearning"))
  3558. def is_continuous(data: np.array, f: float = 0.1):
  3559. data = data.tolist()
  3560. l: list = np.unique(data).tolist()
  3561. return len(l) / len(data) >= f or len(data) <= 3
  3562. @plugin_func_loading(get_path(r"template/machinelearning"))
  3563. def quick_stats(x_data):
  3564. statistics_assistant = CategoricalData()
  3565. print(x_data)
  3566. for i in range(len(x_data)):
  3567. x1 = x_data[i] # x坐标
  3568. statistics_assistant(x1)
  3569. return statistics_assistant
  3570. @plugin_func_loading(get_path(r"template/machinelearning"))
  3571. def training_visualization_more_no_center(x_data, class_list, y_data):
  3572. x_data = x_data.transpose()
  3573. if len(x_data) == 1:
  3574. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3575. statistics_assistant = quick_stats(x_data)
  3576. render_list = []
  3577. for i in range(len(x_data)):
  3578. for a in range(len(x_data)):
  3579. if a <= i:
  3580. continue
  3581. x1 = x_data[i] # x坐标
  3582. x1_is_continuous = is_continuous(x1)
  3583. x2 = x_data[a] # y坐标
  3584. x2_is_continuous = is_continuous(x2)
  3585. base_render = None # 旧的C
  3586. for class_num in range(len(class_list)):
  3587. now_class = class_list[class_num]
  3588. plot_x1 = x1[y_data == now_class].tolist()
  3589. plot_x2 = x2[y_data == now_class]
  3590. axis_x2 = np.unique(plot_x2)
  3591. plot_x2 = x2[y_data == now_class].tolist()
  3592. # x与散点图不同,这里是纵坐标
  3593. c = (
  3594. Scatter()
  3595. .add_xaxis(plot_x2)
  3596. .add_yaxis(f"{now_class}", plot_x1, **label_setting)
  3597. .set_global_opts(
  3598. title_opts=opts.TitleOpts(title=f"[{a}-{i}]训练数据散点图"),
  3599. **global_setting,
  3600. yaxis_opts=opts.AxisOpts(
  3601. type_="value" if x1_is_continuous else "category",
  3602. is_scale=True,
  3603. ),
  3604. xaxis_opts=opts.AxisOpts(
  3605. type_="value" if x2_is_continuous else "category",
  3606. is_scale=True,
  3607. ),
  3608. )
  3609. )
  3610. c.add_xaxis(axis_x2)
  3611. if base_render is None:
  3612. base_render = c
  3613. else:
  3614. base_render = base_render.overlap(c)
  3615. render_list.append(base_render)
  3616. means, x_range, data_type = statistics_assistant.get()
  3617. return render_list, means, x_range, data_type
  3618. @plugin_func_loading(get_path(r"template/machinelearning"))
  3619. def training_visualization_more(x_data, class_list, y_data, center):
  3620. x_data = x_data.transpose()
  3621. if len(x_data) == 1:
  3622. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3623. statistics_assistant = quick_stats(x_data)
  3624. render_list = []
  3625. for i in range(len(x_data)):
  3626. for a in range(len(x_data)):
  3627. if a <= i:
  3628. continue
  3629. x1 = x_data[i] # x坐标
  3630. x1_is_continuous = is_continuous(x1)
  3631. x2 = x_data[a] # y坐标
  3632. x2_is_continuous = is_continuous(x2)
  3633. base_render = None # 旧的C
  3634. for class_num in range(len(class_list)):
  3635. now_class = class_list[class_num]
  3636. plot_x1 = x1[y_data == now_class].tolist()
  3637. plot_x2 = x2[y_data == now_class]
  3638. axis_x2 = np.unique(plot_x2)
  3639. plot_x2 = x2[y_data == now_class].tolist()
  3640. # x与散点图不同,这里是纵坐标
  3641. c = (
  3642. Scatter()
  3643. .add_xaxis(plot_x2)
  3644. .add_yaxis(f"{now_class}", plot_x1, **label_setting)
  3645. .set_global_opts(
  3646. title_opts=opts.TitleOpts(title=f"[{a}-{i}]训练数据散点图"),
  3647. **global_setting,
  3648. yaxis_opts=opts.AxisOpts(
  3649. type_="value" if x1_is_continuous else "category",
  3650. is_scale=True,
  3651. ),
  3652. xaxis_opts=opts.AxisOpts(
  3653. type_="value" if x2_is_continuous else "category",
  3654. is_scale=True,
  3655. ),
  3656. )
  3657. )
  3658. c.add_xaxis(axis_x2)
  3659. # 添加簇中心
  3660. try:
  3661. center_x2 = [center[class_num][a]]
  3662. except IndexError:
  3663. center_x2 = [0]
  3664. b = (
  3665. Scatter()
  3666. .add_xaxis(center_x2)
  3667. .add_yaxis(
  3668. f"[{now_class}]中心",
  3669. [center[class_num][i]],
  3670. **label_setting,
  3671. symbol="triangle",
  3672. )
  3673. .set_global_opts(
  3674. title_opts=opts.TitleOpts(title="簇中心"),
  3675. **global_setting,
  3676. yaxis_opts=opts.AxisOpts(
  3677. type_="value" if x1_is_continuous else "category",
  3678. is_scale=True,
  3679. ),
  3680. xaxis_opts=opts.AxisOpts(
  3681. type_="value" if x2_is_continuous else "category",
  3682. is_scale=True,
  3683. ),
  3684. )
  3685. )
  3686. c.overlap(b)
  3687. if base_render is None:
  3688. base_render = c
  3689. else:
  3690. base_render = base_render.overlap(c)
  3691. render_list.append(base_render)
  3692. means, x_range, data_type = statistics_assistant.get()
  3693. return render_list, means, x_range, data_type
  3694. @plugin_func_loading(get_path(r"template/machinelearning"))
  3695. def training_visualization_center(x_data, class_data, y_data, center):
  3696. x_data = x_data.transpose()
  3697. if len(x_data) == 1:
  3698. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3699. statistics_assistant = quick_stats(x_data)
  3700. render_list = []
  3701. for i in range(len(x_data)):
  3702. if i == 0:
  3703. continue
  3704. x1 = x_data[i] # x坐标
  3705. x1_is_continuous = is_continuous(x1)
  3706. x2 = x_data[i - 1] # y坐标
  3707. x2_is_continuous = is_continuous(x2)
  3708. base_render = None # 旧的C
  3709. for class_num in range(len(class_data)):
  3710. n_class = class_data[class_num]
  3711. x_1 = x1[y_data == n_class].tolist()
  3712. x_2 = x2[y_data == n_class]
  3713. x_2_new = np.unique(x_2)
  3714. x_2 = x2[y_data == n_class].tolist()
  3715. # x与散点图不同,这里是纵坐标
  3716. c = (
  3717. Scatter() .add_xaxis(x_2) .add_yaxis(
  3718. f"{n_class}",
  3719. x_1,
  3720. **label_setting) .set_global_opts(
  3721. title_opts=opts.TitleOpts(
  3722. title=f"[{i-1}-{i}]训练数据散点图"),
  3723. **global_setting,
  3724. yaxis_opts=opts.AxisOpts(
  3725. type_="value" if x1_is_continuous else "category",
  3726. is_scale=True),
  3727. xaxis_opts=opts.AxisOpts(
  3728. type_="value" if x2_is_continuous else "category",
  3729. is_scale=True),
  3730. ))
  3731. c.add_xaxis(x_2_new)
  3732. # 添加簇中心
  3733. try:
  3734. center_x_2 = [center[class_num][i - 1]]
  3735. except IndexError:
  3736. center_x_2 = [0]
  3737. b = (
  3738. Scatter() .add_xaxis(center_x_2) .add_yaxis(
  3739. f"[{n_class}]中心",
  3740. [
  3741. center[class_num][i]],
  3742. **label_setting,
  3743. symbol="triangle",
  3744. ) .set_global_opts(
  3745. title_opts=opts.TitleOpts(
  3746. title="簇中心"),
  3747. **global_setting,
  3748. yaxis_opts=opts.AxisOpts(
  3749. type_="value" if x1_is_continuous else "category",
  3750. is_scale=True),
  3751. xaxis_opts=opts.AxisOpts(
  3752. type_="value" if x2_is_continuous else "category",
  3753. is_scale=True),
  3754. ))
  3755. c.overlap(b)
  3756. if base_render is None:
  3757. base_render = c
  3758. else:
  3759. base_render = base_render.overlap(c)
  3760. render_list.append(base_render)
  3761. means, x_range, data_type = statistics_assistant.get()
  3762. return render_list, means, x_range, data_type
  3763. @plugin_func_loading(get_path(r"template/machinelearning"))
  3764. def training_visualization(x_data, class_, y_data): # 根据不同类别绘制x-x分类散点图
  3765. x_data = x_data.transpose()
  3766. if len(x_data) == 1:
  3767. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3768. statistics_assistant = quick_stats(x_data)
  3769. render_list = []
  3770. for i in range(len(x_data)):
  3771. if i == 0:
  3772. continue
  3773. x1 = x_data[i] # x坐标
  3774. x1_is_continuous = is_continuous(x1)
  3775. x2 = x_data[i - 1] # y坐标
  3776. x2_is_continuous = is_continuous(x2)
  3777. render_list = None # 旧的C
  3778. for now_class in class_:
  3779. plot_x1 = x1[y_data == now_class].tolist()
  3780. plot_x2 = x2[y_data == now_class]
  3781. axis_x2 = np.unique(plot_x2)
  3782. plot_x2 = x2[y_data == now_class].tolist()
  3783. # x与散点图不同,这里是纵坐标
  3784. c = (
  3785. Scatter() .add_xaxis(plot_x2) .add_yaxis(
  3786. f"{now_class}",
  3787. plot_x1,
  3788. **label_setting) .set_global_opts(
  3789. title_opts=opts.TitleOpts(
  3790. title="训练数据散点图"),
  3791. **global_setting,
  3792. yaxis_opts=opts.AxisOpts(
  3793. type_="value" if x1_is_continuous else "category",
  3794. is_scale=True),
  3795. xaxis_opts=opts.AxisOpts(
  3796. type_="value" if x2_is_continuous else "category",
  3797. is_scale=True),
  3798. ))
  3799. c.add_xaxis(axis_x2)
  3800. if render_list is None:
  3801. render_list = c
  3802. else:
  3803. render_list = render_list.overlap(c)
  3804. render_list.append(render_list)
  3805. means, x_range, data_type = statistics_assistant.get()
  3806. return render_list, means, x_range, data_type
  3807. @plugin_func_loading(get_path(r"template/machinelearning"))
  3808. def training_visualization_no_class(x_data): # 根据绘制x-x分类散点图(无类别)
  3809. x_data = x_data.transpose()
  3810. if len(x_data) == 1:
  3811. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3812. statistics_assistant = quick_stats(x_data)
  3813. render_list = []
  3814. for i in range(len(x_data)):
  3815. if i == 0:
  3816. continue
  3817. x1 = x_data[i] # x坐标
  3818. x1_is_continuous = is_continuous(x1)
  3819. x2 = x_data[i - 1] # y坐标
  3820. x2_is_continuous = is_continuous(x2)
  3821. x2_only = np.unique(x2)
  3822. # x与散点图不同,这里是纵坐标
  3823. c = (
  3824. Scatter() .add_xaxis(x2) .add_yaxis(
  3825. "",
  3826. x1.tolist(),
  3827. **label_setting) .set_global_opts(
  3828. title_opts=opts.TitleOpts(
  3829. title="训练数据散点图"),
  3830. **global_not_legend,
  3831. yaxis_opts=opts.AxisOpts(
  3832. type_="value" if x1_is_continuous else "category",
  3833. is_scale=True),
  3834. xaxis_opts=opts.AxisOpts(
  3835. type_="value" if x2_is_continuous else "category",
  3836. is_scale=True),
  3837. ))
  3838. c.add_xaxis(x2_only)
  3839. render_list.append(c)
  3840. means, x_range, data_type = statistics_assistant.get()
  3841. return render_list, means, x_range, data_type
  3842. def training_w(
  3843. x_data, class_list, y_data, w_list, b_list, x_means: list
  3844. ): # 针对分类问题绘制决策边界
  3845. x_data = x_data.transpose()
  3846. if len(x_data) == 1:
  3847. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3848. render_list = []
  3849. x_means.append(0)
  3850. x_means = np.array(x_means)
  3851. for i in range(len(x_data)):
  3852. if i == 0:
  3853. continue
  3854. x1_is_continuous = is_continuous(x_data[i])
  3855. x2 = x_data[i - 1] # y坐标
  3856. x2_is_continuous = is_continuous(x2)
  3857. o_c = None # 旧的C
  3858. for class_num in range(len(class_list)):
  3859. n_class = class_list[class_num]
  3860. x2_only = np.unique(x2[y_data == n_class])
  3861. # x与散点图不同,这里是纵坐标
  3862. # 加入这个判断是为了解决sklearn历史遗留问题
  3863. if len(class_list) == 2: # 二分类问题
  3864. if class_num == 0:
  3865. continue
  3866. w = w_list[0]
  3867. b = b_list[0]
  3868. else: # 多分类问题
  3869. w = w_list[class_num]
  3870. b = b_list[class_num]
  3871. if x2_is_continuous:
  3872. x2_only = np.array(make_list(x2_only.min(), x2_only.max(), 5))
  3873. w = np.append(w, 0)
  3874. y_data = (
  3875. -(x2_only * w[i - 1]) / w[i]
  3876. + b
  3877. + (x_means[: i - 1] * w[: i - 1]).sum()
  3878. + (x_means[i + 1:] * w[i + 1:]).sum()
  3879. ) # 假设除了两个特征意外,其余特征均为means列表的数值
  3880. c = (
  3881. Line() .add_xaxis(x2_only) .add_yaxis(
  3882. f"决策边界:{n_class}=>[{i}]",
  3883. y_data.tolist(),
  3884. is_smooth=True,
  3885. **label_setting,
  3886. ) .set_global_opts(
  3887. title_opts=opts.TitleOpts(
  3888. title=f"系数w曲线"),
  3889. **global_setting,
  3890. yaxis_opts=opts.AxisOpts(
  3891. type_="value" if x1_is_continuous else "category",
  3892. is_scale=True),
  3893. xaxis_opts=opts.AxisOpts(
  3894. type_="value" if x2_is_continuous else "category",
  3895. is_scale=True),
  3896. ))
  3897. if o_c is None:
  3898. o_c = c
  3899. else:
  3900. o_c = o_c.overlap(c)
  3901. # 下面不要接任何代码,因为上面会continue
  3902. render_list.append(o_c)
  3903. return render_list
  3904. @plugin_func_loading(get_path(r"template/machinelearning"))
  3905. def regress_w(x_data, w_data: np.array, intercept_b, x_means: list): # 针对回归问题(y-x图)
  3906. x_data = x_data.transpose()
  3907. if len(x_data) == 1:
  3908. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3909. render_list = []
  3910. x_means.append(0) # 确保mean[i+1]不会超出index
  3911. x_means = np.array(x_means)
  3912. w_data = np.append(w_data, 0)
  3913. for i in range(len(x_data)):
  3914. x1 = x_data[i]
  3915. x1_is_continuous = is_continuous(x1)
  3916. if x1_is_continuous:
  3917. x1 = np.array(make_list(x1.min(), x1.max(), 5))
  3918. x1_only = np.unique(x1)
  3919. # 假设除了两个特征意外,其余特征均为means列表的数值
  3920. y_data = (
  3921. x1_only * w_data[i]
  3922. + intercept_b
  3923. + (x_means[:i] * w_data[:i]).sum()
  3924. + (x_means[i + 1:] * w_data[i + 1:]).sum()
  3925. )
  3926. y_is_continuous = is_continuous(y_data)
  3927. c = (
  3928. Line() .add_xaxis(x1_only) .add_yaxis(
  3929. f"拟合结果=>[{i}]",
  3930. y_data.tolist(),
  3931. is_smooth=True,
  3932. **label_setting) .set_global_opts(
  3933. title_opts=opts.TitleOpts(
  3934. title=f"系数w曲线"),
  3935. **global_setting,
  3936. yaxis_opts=opts.AxisOpts(
  3937. type_="value" if y_is_continuous else None,
  3938. is_scale=True),
  3939. xaxis_opts=opts.AxisOpts(
  3940. type_="value" if x1_is_continuous else None,
  3941. is_scale=True),
  3942. ))
  3943. render_list.append(c)
  3944. return render_list
  3945. @plugin_func_loading(get_path(r"template/machinelearning"))
  3946. def regress_visualization(x_data, y_data): # y-x数据图
  3947. x_data = x_data.transpose()
  3948. y_is_continuous = is_continuous(y_data)
  3949. statistics_assistant = quick_stats(x_data)
  3950. render_list = []
  3951. try:
  3952. visualmap_opts = opts.VisualMapOpts(
  3953. is_show=True,
  3954. max_=int(y_data.max()) + 1,
  3955. min_=int(y_data.min()),
  3956. pos_right="3%",
  3957. )
  3958. except ValueError:
  3959. visualmap_opts = None
  3960. y_is_continuous = False
  3961. for i in range(len(x_data)):
  3962. x1 = x_data[i] # x坐标
  3963. x1_is_continuous = is_continuous(x1)
  3964. # 不转换成list因为保持dtype的精度,否则绘图会出现各种问题(数值重复)
  3965. if not y_is_continuous and x1_is_continuous:
  3966. y_is_continuous, x1_is_continuous = x1_is_continuous, y_is_continuous
  3967. x1, y_data = y_data, x1
  3968. c = (
  3969. Scatter()
  3970. .add_xaxis(x1.tolist()) # 研究表明,这个是横轴
  3971. .add_yaxis("数据", y_data.tolist(), **label_setting)
  3972. .set_global_opts(
  3973. title_opts=opts.TitleOpts(title="预测类型图"),
  3974. **global_setting,
  3975. yaxis_opts=opts.AxisOpts(
  3976. type_="value" if y_is_continuous else "category", is_scale=True
  3977. ),
  3978. xaxis_opts=opts.AxisOpts(
  3979. type_="value" if x1_is_continuous else "category", is_scale=True
  3980. ),
  3981. visualmap_opts=visualmap_opts,
  3982. )
  3983. )
  3984. c.add_xaxis(np.unique(x1))
  3985. render_list.append(c)
  3986. means, x_range, data_type = statistics_assistant.get()
  3987. return render_list, means, x_range, data_type
  3988. @plugin_func_loading(get_path(r"template/machinelearning"))
  3989. def feature_visualization(x_data, data_name=""): # x-x数据图
  3990. seeting = global_setting if data_name else global_not_legend
  3991. x_data = x_data.transpose()
  3992. only = False
  3993. if len(x_data) == 1:
  3994. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3995. only = True
  3996. render_list = []
  3997. for i in range(len(x_data)):
  3998. for a in range(len(x_data)):
  3999. if a <= i:
  4000. continue # 重复内容,跳过
  4001. x1 = x_data[i] # x坐标
  4002. x1_is_continuous = is_continuous(x1)
  4003. x2 = x_data[a] # y坐标
  4004. x2_is_continuous = is_continuous(x2)
  4005. x2_only = np.unique(x2)
  4006. if only:
  4007. x2_is_continuous = False
  4008. # x与散点图不同,这里是纵坐标
  4009. c = (
  4010. Scatter() .add_xaxis(x2) .add_yaxis(
  4011. data_name,
  4012. x1,
  4013. **label_setting) .set_global_opts(
  4014. title_opts=opts.TitleOpts(
  4015. title=f"[{i}-{a}]数据散点图"),
  4016. **seeting,
  4017. yaxis_opts=opts.AxisOpts(
  4018. type_="value" if x1_is_continuous else "category",
  4019. is_scale=True),
  4020. xaxis_opts=opts.AxisOpts(
  4021. type_="value" if x2_is_continuous else "category",
  4022. is_scale=True),
  4023. ))
  4024. c.add_xaxis(x2_only)
  4025. render_list.append(c)
  4026. return render_list
  4027. @plugin_func_loading(get_path(r"template/machinelearning"))
  4028. def feature_visualization_format(x_data, data_name=""): # x-x数据图
  4029. seeting = global_setting if data_name else global_not_legend
  4030. x_data = x_data.transpose()
  4031. only = False
  4032. if len(x_data) == 1:
  4033. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4034. only = True
  4035. render_list = []
  4036. for i in range(len(x_data)):
  4037. for a in range(len(x_data)):
  4038. if a <= i:
  4039. continue # 重复内容,跳过(a读取的是i后面的)
  4040. x1 = x_data[i] # x坐标
  4041. x1_is_continuous = is_continuous(x1)
  4042. x2 = x_data[a] # y坐标
  4043. x2_is_continuous = is_continuous(x2)
  4044. x2_only = np.unique(x2)
  4045. x1_list = x1.astype(np.str).tolist()
  4046. for j in range(len(x1_list)):
  4047. x1_list[j] = [x1_list[j], f"特征{j}"]
  4048. if only:
  4049. x2_is_continuous = False
  4050. # x与散点图不同,这里是纵坐标
  4051. c = (
  4052. Scatter() .add_xaxis(x2) .add_yaxis(
  4053. data_name,
  4054. x1_list,
  4055. **label_setting) .set_global_opts(
  4056. title_opts=opts.TitleOpts(
  4057. title=f"[{i}-{a}]数据散点图"),
  4058. **seeting,
  4059. yaxis_opts=opts.AxisOpts(
  4060. type_="value" if x1_is_continuous else "category",
  4061. is_scale=True),
  4062. xaxis_opts=opts.AxisOpts(
  4063. type_="value" if x2_is_continuous else "category",
  4064. is_scale=True),
  4065. tooltip_opts=opts.TooltipOpts(
  4066. is_show=True,
  4067. axis_pointer_type="cross",
  4068. formatter="{c}"),
  4069. ))
  4070. c.add_xaxis(x2_only)
  4071. render_list.append(c)
  4072. return render_list
  4073. @plugin_func_loading(get_path(r"template/machinelearning"))
  4074. def discrete_feature_visualization(x_data, data_name=""): # 必定离散x-x数据图
  4075. seeting = global_setting if data_name else global_not_legend
  4076. x_data = x_data.transpose()
  4077. if len(x_data) == 1:
  4078. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4079. render_list = []
  4080. for i in range(len(x_data)):
  4081. for a in range(len(x_data)):
  4082. if a <= i:
  4083. continue # 重复内容,跳过
  4084. x1 = x_data[i] # x坐标
  4085. x2 = x_data[a] # y坐标
  4086. x2_only = np.unique(x2)
  4087. # x与散点图不同,这里是纵坐标
  4088. c = (
  4089. Scatter()
  4090. .add_xaxis(x2)
  4091. .add_yaxis(data_name, x1, **label_setting)
  4092. .set_global_opts(
  4093. title_opts=opts.TitleOpts(title=f"[{i}-{a}]数据散点图"),
  4094. **seeting,
  4095. yaxis_opts=opts.AxisOpts(type_="category", is_scale=True),
  4096. xaxis_opts=opts.AxisOpts(type_="category", is_scale=True),
  4097. )
  4098. )
  4099. c.add_xaxis(x2_only)
  4100. render_list.append(c)
  4101. return render_list
  4102. @plugin_func_loading(get_path(r"template/machinelearning"))
  4103. def conversion_control(y_data, x_data, tab): # 合并两x-x图
  4104. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  4105. get_x = feature_visualization(x_data, "原数据") # 原来
  4106. get_y = feature_visualization(y_data, "转换数据") # 转换
  4107. for i in range(len(get_x)):
  4108. tab.add(get_x[i].overlap(get_y[i]), f"[{i}]数据x-x散点图")
  4109. return tab
  4110. @plugin_func_loading(get_path(r"template/machinelearning"))
  4111. def conversion_separate(y_data, x_data, tab): # 并列显示两x-x图
  4112. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  4113. get_x = feature_visualization(x_data, "原数据") # 原来
  4114. get_y = feature_visualization(y_data, "转换数据") # 转换
  4115. for i in range(len(get_x)):
  4116. try:
  4117. tab.add(get_x[i], f"[{i}]数据x-x散点图")
  4118. except IndexError:
  4119. pass
  4120. try:
  4121. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  4122. except IndexError:
  4123. pass
  4124. return tab
  4125. @plugin_func_loading(get_path(r"template/machinelearning"))
  4126. def conversion_separate_format(y_data, tab): # 并列显示两x-x图
  4127. if isinstance(y_data, np.ndarray):
  4128. get_y = feature_visualization_format(y_data, "转换数据") # 转换
  4129. for i in range(len(get_y)):
  4130. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  4131. return tab
  4132. @plugin_func_loading(get_path(r"template/machinelearning"))
  4133. def conversion_separate_wh(w_array, h_array, tab): # 并列显示两x-x图
  4134. if isinstance(w_array, np.ndarray) and isinstance(w_array, np.ndarray):
  4135. get_x = feature_visualization_format(w_array, "W矩阵数据") # 原来
  4136. get_y = feature_visualization(
  4137. h_array.transpose(), "H矩阵数据"
  4138. ) # 转换(先转T,再转T变回原样,W*H是横对列)
  4139. for i in range(len(get_x)):
  4140. try:
  4141. tab.add(get_x[i], f"[{i}]W矩阵x-x散点图")
  4142. except IndexError:
  4143. pass
  4144. try:
  4145. tab.add(get_y[i], f"[{i}]H.T矩阵x-x散点图")
  4146. except IndexError:
  4147. pass
  4148. return tab
  4149. @plugin_func_loading(get_path(r"template/machinelearning"))
  4150. def make_bar(name, value, tab): # 绘制柱状图
  4151. c = (
  4152. Bar()
  4153. .add_xaxis([f"[{i}]特征" for i in range(len(value))])
  4154. .add_yaxis(name, value, **label_setting)
  4155. .set_global_opts(title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting)
  4156. )
  4157. tab.add(c, name)
  4158. @plugin_func_loading(get_path(r"template/machinelearning"))
  4159. def judging_digits(num: (int, float)): # 查看小数位数
  4160. a = str(abs(num)).split(".")[0]
  4161. if a == "":
  4162. raise ValueError
  4163. return len(a)
  4164. @plugin_func_loading(get_path(r"template/machinelearning"))
  4165. def num_str(num, accuracy):
  4166. num = str(round(float(num), accuracy))
  4167. if len(num.replace(".", "")) == accuracy:
  4168. return num
  4169. n = num.split(".")
  4170. if len(n) == 0: # 无小数
  4171. return num + "." + "0" * (accuracy - len(num))
  4172. else:
  4173. return num + "0" * (accuracy - len(num) + 1) # len(num)多算了一位小数点
  4174. @plugin_func_loading(get_path(r"template/machinelearning"))
  4175. def des_to_csv(save_dir, name, data, columns=None, row=None):
  4176. save_dir = save_dir + os.sep + name + ".csv"
  4177. print(columns)
  4178. print(row)
  4179. print(data)
  4180. DataFrame(data, columns=columns, index=row).to_csv(
  4181. save_dir,
  4182. header=False if columns is None else True,
  4183. index=False if row is None else True,
  4184. )
  4185. return data
  4186. @plugin_func_loading(get_path(r"template/machinelearning"))
  4187. def pack(output_filename, source_dir):
  4188. with tarfile.open(output_filename, "w:gz") as tar:
  4189. tar.add(source_dir, arcname=basename(source_dir))
  4190. return output_filename
  4191. def set_global(
  4192. more=more_global,
  4193. all_=all_global,
  4194. csv=csv_global,
  4195. clf=clf_global,
  4196. tar=tar_global,
  4197. new=new_dir_global,
  4198. ):
  4199. global more_global, all_global, csv_global, clf_global, tar_global, new_dir_global
  4200. more_global = more # 是否使用全部特征绘图
  4201. all_global = all_ # 是否导出charts
  4202. csv_global = csv # 是否导出CSV
  4203. clf_global = clf # 是否导出模型
  4204. tar_global = tar # 是否打包tar
  4205. new_dir_global = new # 是否新建目录
  4206. class MachineLearnerInit(
  4207. LearnerIO, Calculation, LearnerMerge, LearnerSplit, LearnerDimensions, LearnerShape, metaclass=ABCMeta
  4208. ):
  4209. def __init__(self, *args, **kwargs):
  4210. super().__init__(*args, **kwargs)
  4211. self.learner = {} # 记录机器
  4212. self.learn_dict = {
  4213. "Line": LineModel,
  4214. "Ridge": LineModel,
  4215. "Lasso": LineModel,
  4216. "LogisticRegression": LogisticregressionModel,
  4217. "Knn_class": KnnModel,
  4218. "Knn": KnnModel,
  4219. "Tree_class": TreeModel,
  4220. "Tree": TreeModel,
  4221. "Forest": ForestModel,
  4222. "Forest_class": ForestModel,
  4223. "GradientTree_class": GradienttreeModel,
  4224. "GradientTree": GradienttreeModel,
  4225. "Variance": VarianceModel,
  4226. "SelectKBest": SelectkbestModel,
  4227. "Z-Score": StandardizationModel,
  4228. "MinMaxScaler": MinmaxscalerModel,
  4229. "LogScaler": LogscalerModel,
  4230. "atanScaler": AtanscalerModel,
  4231. "decimalScaler": DecimalscalerModel,
  4232. "sigmodScaler": SigmodscalerModel,
  4233. "Mapzoom": MapzoomModel,
  4234. "Fuzzy_quantization": FuzzyQuantizationModel,
  4235. "Regularization": RegularizationModel,
  4236. "Binarizer": BinarizerModel,
  4237. "Discretization": DiscretizationModel,
  4238. "Label": LabelModel,
  4239. "OneHotEncoder": OneHotEncoderModel,
  4240. "Missed": MissedModel,
  4241. "PCA": PcaModel,
  4242. "RPCA": RpcaModel,
  4243. "KPCA": KpcaModel,
  4244. "LDA": LdaModel,
  4245. "SVC": SvcModel,
  4246. "SVR": SvrModel,
  4247. "MLP": MlpModel,
  4248. "MLP_class": MlpModel,
  4249. "NMF": NmfModel,
  4250. "t-SNE": TsneModel,
  4251. "k-means": KmeansModel,
  4252. "Agglomerative": AgglomerativeModel,
  4253. "DBSCAN": DbscanModel,
  4254. "ClassBar": ClassBar,
  4255. "FeatureScatter": NearFeatureScatter,
  4256. "FeatureScatterClass": NearFeatureScatterClass,
  4257. "FeatureScatter_all": NearFeatureScatterMore,
  4258. "FeatureScatterClass_all": NearFeatureScatterClassMore,
  4259. "HeatMap": NumpyHeatMap,
  4260. "FeatureY-X": FeatureScatterYX,
  4261. "ClusterTree": ClusterTree,
  4262. "MatrixScatter": MatrixScatter,
  4263. "Correlation": Corr,
  4264. "Statistics": DataAnalysis,
  4265. "Fast_Fourier": FastFourier,
  4266. "Reverse_Fast_Fourier": ReverseFastFourier,
  4267. "[2]Reverse_Fast_Fourier": ReverseFastFourierTwonumpy,
  4268. }
  4269. self.data_type = {} # 记录机器的类型
  4270. @staticmethod
  4271. def learner_parameters(parameters, data_type): # 解析参数
  4272. original_parameter = {}
  4273. target_parameter = {}
  4274. # 输入数据
  4275. exec(parameters, original_parameter)
  4276. # 处理数据
  4277. if data_type in ("MLP", "MLP_class"):
  4278. target_parameter["alpha"] = float(
  4279. original_parameter.get("alpha", 0.0001)
  4280. ) # MLP正则化用
  4281. else:
  4282. target_parameter["alpha"] = float(
  4283. original_parameter.get("alpha", 1.0)
  4284. ) # L1和L2正则化用
  4285. target_parameter["C"] = float(
  4286. original_parameter.get(
  4287. "C", 1.0)) # L1和L2正则化用
  4288. if data_type in ("MLP", "MLP_class"):
  4289. target_parameter["max_iter"] = int(
  4290. original_parameter.get("max_iter", 200)
  4291. ) # L1和L2正则化用
  4292. else:
  4293. target_parameter["max_iter"] = int(
  4294. original_parameter.get("max_iter", 1000)
  4295. ) # L1和L2正则化用
  4296. target_parameter["n_neighbors"] = int(
  4297. original_parameter.get("K_knn", 5)
  4298. ) # knn邻居数 (命名不同)
  4299. target_parameter["p"] = int(original_parameter.get("p", 2)) # 距离计算方式
  4300. target_parameter["nDim_2"] = bool(
  4301. original_parameter.get("nDim_2", True)
  4302. ) # 数据是否降维
  4303. if data_type in ("Tree", "Forest", "GradientTree"):
  4304. target_parameter["criterion"] = (
  4305. "mse" if bool(
  4306. original_parameter.get(
  4307. "is_MSE",
  4308. True)) else "mae") # 是否使用基尼不纯度
  4309. else:
  4310. target_parameter["criterion"] = (
  4311. "gini" if bool(
  4312. original_parameter.get(
  4313. "is_Gini",
  4314. True)) else "entropy") # 是否使用基尼不纯度
  4315. target_parameter["splitter"] = (
  4316. "random" if bool(
  4317. original_parameter.get(
  4318. "is_random",
  4319. False)) else "best") # 决策树节点是否随机选用最优
  4320. target_parameter["max_features"] = original_parameter.get(
  4321. "max_features", None
  4322. ) # 选用最多特征数
  4323. target_parameter["max_depth"] = original_parameter.get(
  4324. "max_depth", None
  4325. ) # 最大深度
  4326. target_parameter["min_samples_split"] = int(
  4327. original_parameter.get("min_samples_split", 2)
  4328. ) # 是否继续划分(容易造成过拟合)
  4329. target_parameter["P"] = float(
  4330. original_parameter.get(
  4331. "min_samples_split", 0.8))
  4332. target_parameter["k"] = original_parameter.get("k", 1)
  4333. target_parameter["score_func"] = {
  4334. "chi2": chi2,
  4335. "f_classif": f_classif,
  4336. "mutual_info_classif": mutual_info_classif,
  4337. "f_regression": f_regression,
  4338. "mutual_info_regression": mutual_info_regression,
  4339. }.get(original_parameter.get("score_func", "f_classif"), f_classif)
  4340. target_parameter["feature_range"] = tuple(
  4341. original_parameter.get("feature_range", (0, 1))
  4342. )
  4343. target_parameter["norm"] = original_parameter.get(
  4344. "norm", "l2") # 正则化的方式L1或者L2
  4345. target_parameter["threshold"] = float(
  4346. original_parameter.get("threshold", 0.0)
  4347. ) # 二值化特征
  4348. target_parameter["split_range"] = list(
  4349. original_parameter.get("split_range", [0])
  4350. ) # 二值化特征
  4351. target_parameter["ndim_up"] = bool(
  4352. original_parameter.get("ndim_up", False))
  4353. target_parameter["miss_value"] = original_parameter.get(
  4354. "miss_value", np.nan)
  4355. target_parameter["fill_method"] = original_parameter.get(
  4356. "fill_method", "mean")
  4357. target_parameter["fill_value"] = original_parameter.get(
  4358. "fill_value", None)
  4359. target_parameter["n_components"] = original_parameter.get(
  4360. "n_components", 1)
  4361. target_parameter["kernel"] = original_parameter.get(
  4362. "kernel", "rbf" if data_type in ("SVR", "SVC") else "linear"
  4363. )
  4364. target_parameter["n_Tree"] = original_parameter.get("n_Tree", 100)
  4365. target_parameter["gamma"] = original_parameter.get("gamma", 1)
  4366. target_parameter["hidden_size"] = tuple(
  4367. original_parameter.get("hidden_size", (100,))
  4368. )
  4369. target_parameter["activation"] = str(
  4370. original_parameter.get("activation", "relu")
  4371. )
  4372. target_parameter["solver"] = str(
  4373. original_parameter.get("solver", "adam"))
  4374. if data_type in ("k-means",):
  4375. target_parameter["n_clusters"] = int(
  4376. original_parameter.get("n_clusters", 8)
  4377. )
  4378. else:
  4379. target_parameter["n_clusters"] = int(
  4380. original_parameter.get("n_clusters", 2)
  4381. )
  4382. target_parameter["eps"] = float(
  4383. original_parameter.get(
  4384. "n_clusters", 0.5))
  4385. target_parameter["min_samples"] = int(
  4386. original_parameter.get("n_clusters", 5))
  4387. target_parameter["white_PCA"] = bool(
  4388. original_parameter.get("white_PCA", False))
  4389. return target_parameter
  4390. def get_learner(self, name):
  4391. return self.learner[name]
  4392. def get_learner_type(self, name):
  4393. return self.data_type[name]
  4394. @plugin_class_loading(get_path(r"template/machinelearning"))
  4395. class MachineLearnerAdd(MachineLearnerInit, metaclass=ABCMeta):
  4396. def add_learner(self, learner_str, parameters=""):
  4397. get = self.learn_dict[learner_str]
  4398. name = f"Le[{len(self.learner)}]{learner_str}"
  4399. # 参数调节
  4400. args_use = self.learner_parameters(parameters, learner_str)
  4401. # 生成学习器
  4402. self.learner[name] = get(model=learner_str, args_use=args_use)
  4403. self.data_type[name] = learner_str
  4404. def add_curve_fitting(self, learner):
  4405. named_domain = {}
  4406. exec(learner, named_domain)
  4407. name = f'Le[{len(self.learner)}]{named_domain.get("name", "SELF")}'
  4408. func = named_domain.get("f", lambda x, k, b: k * x + b)
  4409. self.learner[name] = CurveFitting(name, learner, func)
  4410. self.data_type[name] = "Curve_fitting"
  4411. def add_select_from_model(self, learner, parameters=""):
  4412. model = self.get_learner(learner)
  4413. name = f"Le[{len(self.learner)}]SelectFrom_Model:{learner}"
  4414. # 参数调节
  4415. args_use = self.learner_parameters(parameters, "SelectFrom_Model")
  4416. # 生成学习器
  4417. self.learner[name] = SelectFromModel(
  4418. learner=model, args_use=args_use, Dic=self.learn_dict
  4419. )
  4420. self.data_type[name] = "SelectFrom_Model"
  4421. def add_predictive_heat_map(self, learner, parameters=""):
  4422. model = self.get_learner(learner)
  4423. name = f"Le[{len(self.learner)}]Predictive_HeatMap:{learner}"
  4424. # 生成学习器
  4425. args_use = self.learner_parameters(parameters, "Predictive_HeatMap")
  4426. self.learner[name] = PredictiveHeatmap(
  4427. learner=model, args_use=args_use)
  4428. self.data_type[name] = "Predictive_HeatMap"
  4429. def add_predictive_heat_map_more(self, learner, parameters=""):
  4430. model = self.get_learner(learner)
  4431. name = f"Le[{len(self.learner)}]Predictive_HeatMap_More:{learner}"
  4432. # 生成学习器
  4433. args_use = self.learner_parameters(
  4434. parameters, "Predictive_HeatMap_More")
  4435. self.learner[name] = PredictiveHeatmapMore(
  4436. learner=model, args_use=args_use)
  4437. self.data_type[name] = "Predictive_HeatMap_More"
  4438. def add_view_data(self, learner, parameters=""):
  4439. model = self.get_learner(learner)
  4440. name = f"Le[{len(self.learner)}]View_data:{learner}"
  4441. # 生成学习器
  4442. args_use = self.learner_parameters(parameters, "View_data")
  4443. self.learner[name] = ViewData(learner=model, args_use=args_use)
  4444. self.data_type[name] = "View_data"
  4445. @plugin_class_loading(get_path(r"template/machinelearning"))
  4446. class MachineLearnerScore(MachineLearnerInit, metaclass=ABCMeta):
  4447. def score(self, name_x, name_y, learner): # Score_Only表示仅评分 Fit_Simp 是普遍类操作
  4448. model = self.get_learner(learner)
  4449. x = self.get_sheet(name_x)
  4450. y = self.get_sheet(name_y)
  4451. return model.score(x, y)
  4452. def model_evaluation(self, learner, save_dir, name_x, name_y, func=0): # 显示参数
  4453. x = self.get_sheet(name_x)
  4454. y = self.get_sheet(name_y)
  4455. if new_dir_global:
  4456. dic = save_dir + f"{os.sep}{learner}分类评分[CoTan]"
  4457. new_dic = dic
  4458. a = 0
  4459. while exists(new_dic): # 直到他不存在 —— False
  4460. new_dic = dic + f"[{a}]"
  4461. a += 1
  4462. mkdir(new_dic)
  4463. else:
  4464. new_dic = save_dir
  4465. model = self.get_learner(learner)
  4466. # 打包
  4467. func = [
  4468. model.class_score,
  4469. model.regression_score,
  4470. model.clusters_score][func]
  4471. save = func(new_dic, x, y)[0]
  4472. if tar_global:
  4473. pack(f"{new_dic}.tar.gz", new_dic)
  4474. return save, new_dic
  4475. def model_visualization(self, learner, save_dir): # 显示参数
  4476. if new_dir_global:
  4477. dic = save_dir + f"{os.sep}{learner}数据[CoTan]"
  4478. new_dic = dic
  4479. a = 0
  4480. while exists(new_dic): # 直到他不存在 —— False
  4481. new_dic = dic + f"[{a}]"
  4482. a += 1
  4483. mkdir(new_dic)
  4484. else:
  4485. new_dic = save_dir
  4486. model = self.get_learner(learner)
  4487. if (not (model.model is None) or not (
  4488. model.model is list)) and clf_global:
  4489. joblib.dump(model.model, new_dic + f"{os.sep}MODEL.model") # 保存模型
  4490. # 打包
  4491. save = model.data_visualization(new_dic)[0]
  4492. if tar_global:
  4493. pack(f"{new_dic}.tar.gz", new_dic)
  4494. return save, new_dic
  4495. @plugin_class_loading(get_path(r"template/machinelearning"))
  4496. class LearnerActions(MachineLearnerInit, metaclass=ABCMeta):
  4497. def fit_model(self, x_name, y_name, learner, split=0.3, *args, **kwargs):
  4498. x_data = self.get_sheet(x_name)
  4499. y_data = self.get_sheet(y_name)
  4500. model = self.get_learner(learner)
  4501. return model.fit_model(
  4502. x_data, y_data, split=split, x_name=x_name, add_func=self.add_form
  4503. )
  4504. def predict(self, x_name, learner, **kwargs):
  4505. x_data = self.get_sheet(x_name)
  4506. model = self.get_learner(learner)
  4507. y_data, name = model.predict(
  4508. x_data, x_name=x_name, add_func=self.add_form)
  4509. self.add_form(y_data, f"{x_name}:{name}")
  4510. return y_data