template.py 188 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152
  1. import joblib
  2. import re
  3. import tarfile
  4. from abc import ABCMeta, abstractmethod
  5. from os import getcwd, mkdir
  6. from os.path import split as path_split, splitext, basename, exists
  7. import os
  8. import logging
  9. from sklearn.svm import SVC, SVR # SVC是svm分类,SVR是svm回归
  10. from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
  11. from sklearn.manifold import TSNE
  12. from sklearn.neural_network import MLPClassifier, MLPRegressor
  13. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as Lda
  14. from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA, NMF
  15. from sklearn.impute import SimpleImputer
  16. from sklearn.preprocessing import *
  17. from sklearn.feature_selection import *
  18. from sklearn.metrics import *
  19. from sklearn.ensemble import (
  20. RandomForestClassifier,
  21. RandomForestRegressor,
  22. GradientBoostingClassifier,
  23. GradientBoostingRegressor,
  24. )
  25. import numpy as np
  26. import matplotlib.pyplot as plt
  27. from pandas import DataFrame, read_csv
  28. from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz
  29. from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
  30. from sklearn.linear_model import *
  31. from sklearn.model_selection import train_test_split
  32. from scipy.fftpack import fft, ifft # 快速傅里叶变换
  33. from scipy import optimize
  34. from scipy.cluster.hierarchy import dendrogram, ward
  35. from pyecharts.components import Table as TableFisrt # 绘制表格
  36. from pyecharts.options.series_options import JsCode
  37. from pyecharts.charts import Tab as tab_First
  38. from pyecharts.charts import *
  39. from pyecharts import options as opts
  40. from pyecharts.components import Image
  41. from pyecharts.globals import CurrentConfig
  42. from system import plugin_class_loading, get_path, plugin_func_loading, basicConfig
  43. logging.basicConfig(**basicConfig)
  44. CurrentConfig.ONLINE_HOST = f"{getcwd()}{os.sep}assets{os.sep}"
  45. # 设置
  46. np.set_printoptions(threshold=np.inf)
  47. global_setting = dict(
  48. toolbox_opts=opts.ToolboxOpts(is_show=True),
  49. legend_opts=opts.LegendOpts(pos_bottom="3%", type_="scroll"),
  50. )
  51. global_not_legend = dict(
  52. toolbox_opts=opts.ToolboxOpts(is_show=True),
  53. legend_opts=opts.LegendOpts(is_show=False),
  54. )
  55. label_setting = dict(label_opts=opts.LabelOpts(is_show=False))
  56. more_global = False # 是否使用全部特征绘图
  57. all_global = True # 是否导出charts
  58. csv_global = True # 是否导出CSV
  59. clf_global = True # 是否导出模型
  60. tar_global = True # 是否打包tar
  61. new_dir_global = True # 是否新建目录
  62. class LearnBase(metaclass=ABCMeta):
  63. def __init__(self, *args, **kwargs):
  64. self.numpy_dict = {} # name:numpy
  65. self.fucn_add() # 制作Func_Dic
  66. def fucn_add(self):
  67. self.func_dict = {
  68. "abs": lambda x, y: np.abs(x),
  69. "sqrt": lambda x, y: np.sqrt(x),
  70. "pow": lambda x, y: x ** y,
  71. "loge": lambda x, y: np.log(x),
  72. "log10": lambda x, y: np.log10(x),
  73. "ceil": lambda x, y: np.ceil(x),
  74. "floor": lambda x, y: np.floor(x),
  75. "rint": lambda x, y: np.rint(x),
  76. "sin": lambda x, y: np.sin(x),
  77. "cos": lambda x, y: np.cos(x),
  78. "tan": lambda x, y: np.tan(x),
  79. "tanh": lambda x, y: np.tanh(x),
  80. "sinh": lambda x, y: np.sinh(x),
  81. "cosh": lambda x, y: np.cosh(x),
  82. "asin": lambda x, y: np.arcsin(x),
  83. "acos": lambda x, y: np.arccos(x),
  84. "atan": lambda x, y: np.arctan(x),
  85. "atanh": lambda x, y: np.arctanh(x),
  86. "asinh": lambda x, y: np.arcsinh(x),
  87. "acosh": lambda x, y: np.arccosh(x),
  88. "add": lambda x, y: x + y, # 矩阵或元素
  89. "sub": lambda x, y: x - y, # 矩阵或元素
  90. "mul": lambda x, y: np.multiply(x, y), # 元素级别
  91. "matmul": lambda x, y: np.matmul(x, y), # 矩阵
  92. "dot": lambda x, y: np.dot(x, y), # 矩阵
  93. "div": lambda x, y: x / y,
  94. "div_floor": lambda x, y: np.floor_divide(x, y),
  95. "power": lambda x, y: np.power(x, y), # 元素级
  96. }
  97. def get_form(self) -> dict:
  98. return self.numpy_dict.copy()
  99. def get_sheet(self, name) -> np.ndarray:
  100. return self.numpy_dict[name].copy()
  101. @abstractmethod
  102. def add_form(self, data, name):
  103. pass
  104. @plugin_class_loading(get_path(r"template/machinelearning"))
  105. class LearnerIO(LearnBase):
  106. def add_form(self, data: np.array, name):
  107. name = f"{name}[{len(self.numpy_dict)}]"
  108. self.numpy_dict[name] = data
  109. def del_sheet(self, name):
  110. del self.numpy_dict[name]
  111. def read_csv(
  112. self,
  113. file_dir,
  114. name,
  115. encoding="utf-8",
  116. str_must=False,
  117. sep=","):
  118. dtype = np.str if str_must else np.float
  119. dataframe = read_csv(
  120. file_dir,
  121. encoding=encoding,
  122. delimiter=sep,
  123. header=None)
  124. try:
  125. data = dataframe.to_numpy(dtype=dtype)
  126. except ValueError:
  127. data = dataframe.to_numpy(dtype=np.str)
  128. if data.ndim == 1:
  129. data = np.expand_dims(data, axis=1)
  130. self.add_form(data, name)
  131. return data
  132. def add_python(self, python_file, sheet_name):
  133. name = {}
  134. name.update(globals().copy())
  135. name.update(locals().copy())
  136. exec(python_file, name)
  137. exec("get = Creat()", name)
  138. if isinstance(name["get"], np.array):
  139. get = name["get"]
  140. else:
  141. get = np.array(name["get"])
  142. self.add_form(get, sheet_name)
  143. return get
  144. def to_csv(self, save_dir: str, name, sep) -> str:
  145. get: np.ndarray = self.get_sheet(name)
  146. np.savetxt(save_dir, get, delimiter=sep)
  147. return save_dir
  148. def to_html_one(self, name, html_dir=""):
  149. if html_dir == "":
  150. html_dir = f"{name}.html"
  151. get: np.ndarray = self.get_sheet(name)
  152. if get.ndim == 1:
  153. get = np.expand_dims(get, axis=1)
  154. get: list = get.tolist()
  155. for i in range(len(get)):
  156. get[i] = [i + 1] + get[i]
  157. headers = [i for i in range(len(get[0]))]
  158. table = TableFisrt()
  159. table.add(headers, get).set_global_opts(
  160. title_opts=opts.ComponentTitleOpts(
  161. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  162. )
  163. )
  164. table.render(html_dir)
  165. return html_dir
  166. def to_html(self, name, html_dir="", html_type=0):
  167. if html_dir == "":
  168. html_dir = f"{name}.html"
  169. # 把要画的sheet放到第一个
  170. sheet_dict = self.get_form()
  171. del sheet_dict[name]
  172. sheet_list = [name] + list(sheet_dict.keys())
  173. class TabBase:
  174. def __init__(self, q):
  175. self.tab = q # 一个Tab
  176. def render(self, render_dir):
  177. return self.tab.render(render_dir)
  178. # 生成一个显示页面
  179. if html_type == 0:
  180. class NewTab(TabBase):
  181. def add(self, table_, k, *f):
  182. self.tab.add(table_, k)
  183. tab = NewTab(tab_First(page_title="CoTan:查看表格")) # 一个Tab
  184. elif html_type == 1:
  185. class NewTab(TabBase):
  186. def add(self, table_, *k):
  187. self.tab.add(table_)
  188. tab = NewTab(
  189. Page(
  190. page_title="CoTan:查看表格",
  191. layout=Page.DraggablePageLayout))
  192. else:
  193. class NewTab(TabBase):
  194. def add(self, table_, *k):
  195. self.tab.add(table_)
  196. tab = NewTab(
  197. Page(
  198. page_title="CoTan:查看表格",
  199. layout=Page.SimplePageLayout))
  200. # 迭代添加内容
  201. for name in sheet_list:
  202. get: np.ndarray = self.get_sheet(name)
  203. if get.ndim == 1:
  204. get = np.expand_dims(get, axis=1)
  205. get: list = get.tolist()
  206. for i in range(len(get)):
  207. get[i] = [i + 1] + get[i]
  208. headers = [i for i in range(len(get[0]))]
  209. table = TableFisrt()
  210. table.add(headers, get).set_global_opts(
  211. title_opts=opts.ComponentTitleOpts(
  212. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  213. )
  214. )
  215. tab.add(table, f"表格:{name}")
  216. tab.render(html_dir)
  217. return html_dir
  218. @plugin_class_loading(get_path(r"template/machinelearning"))
  219. class LearnerMerge(LearnBase, metaclass=ABCMeta):
  220. def merge(self, name, axis=0): # aiis:0-横向合并(hstack),1-纵向合并(vstack),2-深度合并
  221. sheet_list = []
  222. for i in name:
  223. sheet_list.append(self.get_sheet(i))
  224. get = {0: np.hstack, 1: np.vstack, 2: np.dstack}[axis](sheet_list)
  225. self.add_form(np.array(get), f"{name[0]}合成")
  226. @plugin_class_loading(get_path(r"template/machinelearning"))
  227. class LearnerSplit(LearnBase, metaclass=ABCMeta):
  228. def split(self, name, split=2, axis=0): # aiis:0-横向分割(hsplit),1-纵向分割(vsplit)
  229. sheet = self.get_sheet(name)
  230. get = {0: np.hsplit, 1: np.vsplit, 2: np.dsplit}[axis](sheet, split)
  231. for i in get:
  232. self.add_form(i, f"{name[0]}分割")
  233. def two_split(self, name, split, axis): # 二分切割(0-横向,1-纵向)
  234. sheet = self.get_sheet(name)
  235. try:
  236. split = float(eval(split))
  237. if split < 1:
  238. split = int(split * len(sheet) if axis == 1 else len(sheet[0]))
  239. else:
  240. assert False
  241. except (ValueError, AssertionError):
  242. split = int(split)
  243. if axis == 0:
  244. self.add_form(sheet[:, split:], f"{name[0]}分割")
  245. self.add_form(sheet[:, :split], f"{name[0]}分割")
  246. @plugin_class_loading(get_path(r"template/machinelearning"))
  247. class LearnerDimensions(LearnBase, metaclass=ABCMeta):
  248. @staticmethod
  249. def deep(sheet: np.ndarray):
  250. return sheet.ravel()
  251. @staticmethod
  252. def down_ndim(sheet: np.ndarray): # 横向
  253. down_list = []
  254. for i in sheet:
  255. down_list.append(i.ravel())
  256. return np.array(down_list)
  257. @staticmethod
  258. def longitudinal_down_ndim(sheet: np.ndarray): # 纵向
  259. down_list = []
  260. for i in range(len(sheet[0])):
  261. down_list.append(sheet[:, i].ravel())
  262. return np.array(down_list).T
  263. def reval(self, name, axis): # axis:0-横向,1-纵向(带.T),2-深度
  264. sheet = self.get_sheet(name)
  265. self.add_form(
  266. {0: self.down_ndim, 1: self.longitudinal_down_ndim, 2: self.deep}[axis](
  267. sheet
  268. ).copy(),
  269. f"{name}伸展",
  270. )
  271. def del_ndim(self, name): # 删除无用维度
  272. sheet = self.get_sheet(name)
  273. self.add_form(np.squeeze(sheet), f"{name}降维")
  274. @plugin_class_loading(get_path(r"template/machinelearning"))
  275. class LearnerShape(LearnBase, metaclass=ABCMeta):
  276. def transpose(self, name, func: list):
  277. sheet = self.get_sheet(name)
  278. if sheet.ndim <= 2:
  279. self.add_form(sheet.transpose().copy(), f"{name}.T")
  280. else:
  281. self.add_form(np.transpose(sheet, func).copy(), f"{name}.T")
  282. def reshape(self, name, shape: list):
  283. sheet = self.get_sheet(name)
  284. self.add_form(sheet.reshape(shape).copy(), f"{name}.r")
  285. @plugin_class_loading(get_path(r"template/machinelearning"))
  286. class Calculation(LearnBase, metaclass=ABCMeta):
  287. def calculation_matrix(self, data, data_type, func):
  288. if 1 not in data_type:
  289. raise Exception
  290. func = self.func_dict.get(func, lambda x, y: x)
  291. args_data = []
  292. for i in range(len(data)):
  293. if data_type[i] == 0:
  294. args_data.append(data[i])
  295. else:
  296. args_data.append(self.get_sheet(data[i]))
  297. get = func(*args_data)
  298. self.add_form(get, f"{func}({data[0]},{data[1]})")
  299. return get
  300. class Machinebase(metaclass=ABCMeta): # 学习器的基类
  301. def __init__(self, *args, **kwargs):
  302. self.model = None
  303. self.have_fit = False
  304. self.have_predict = False
  305. self.x_traindata = None
  306. self.y_traindata = None
  307. # 有监督学习专有的testData
  308. self.x_testdata = None
  309. self.y_testdata = None
  310. # 记录这两个是为了克隆
  311. @abstractmethod
  312. def fit_model(self, x_data, y_data, split, increment, kwargs):
  313. pass
  314. @abstractmethod
  315. def score(self, x_data, y_data):
  316. pass
  317. @abstractmethod
  318. def class_score(self, save_dir, x_data, y_really):
  319. pass
  320. @staticmethod
  321. def _accuracy(y_predict, y_really): # 准确率
  322. return accuracy_score(y_really, y_predict)
  323. @staticmethod
  324. def _macro(y_predict, y_really, func_num=0):
  325. func = [recall_score, precision_score, f1_score] # 召回率,精确率和f1
  326. class_ = np.unique(y_really).tolist()
  327. result = func[func_num](y_really, y_predict, class_, average=None)
  328. return result, class_
  329. @staticmethod
  330. def _confusion_matrix(y_predict, y_really): # 混淆矩阵
  331. class_ = np.unique(y_really).tolist()
  332. return confusion_matrix(y_really, y_predict), class_
  333. @staticmethod
  334. def _kappa_score(y_predict, y_really):
  335. return cohen_kappa_score(y_really, y_predict)
  336. @abstractmethod
  337. def regression_score(self, save_dir, x_data, y_really):
  338. pass
  339. @abstractmethod
  340. def clusters_score(self, save_dir, x_data, args):
  341. pass
  342. @staticmethod
  343. def _mse(y_predict, y_really): # 均方误差
  344. return mean_squared_error(y_really, y_predict)
  345. @staticmethod
  346. def _mae(y_predict, y_really): # 中值绝对误差
  347. return median_absolute_error(y_really, y_predict)
  348. @staticmethod
  349. def _r2_score(y_predict, y_really): # 中值绝对误差
  350. return r2_score(y_really, y_predict)
  351. def _rmse(self, y_predict, y_really): # 中值绝对误差
  352. return self._mse(y_predict, y_really) ** 0.5
  353. @staticmethod
  354. def _coefficient_clustering(x_data, y_predict):
  355. means_score = silhouette_score(x_data, y_predict)
  356. outline_score = silhouette_samples(x_data, y_predict)
  357. return means_score, outline_score
  358. @abstractmethod
  359. def predict(self, x_data, args, kwargs):
  360. pass
  361. @abstractmethod
  362. def data_visualization(self, save_dir, args, kwargs):
  363. pass
  364. @plugin_class_loading(get_path(r"template/machinelearning"))
  365. class StudyMachinebase(Machinebase):
  366. def fit_model(self, x_data, y_data, split=0.3, increment=True, **kwargs):
  367. y_data = y_data.ravel()
  368. try:
  369. assert not self.x_traindata is None or not increment
  370. self.x_traindata = np.vstack((x_data, self.x_traindata))
  371. self.y_traindata = np.vstack((y_data, self.y_traindata))
  372. except (AssertionError, ValueError):
  373. self.x_traindata = x_data.copy()
  374. self.y_traindata = y_data.copy()
  375. x_train, x_test, y_train, y_test = train_test_split(
  376. x_data, y_data, test_size=split
  377. )
  378. try: # 增量式训练
  379. assert increment
  380. self.model.partial_fit(x_data, y_data)
  381. except (AssertionError, AttributeError):
  382. self.model.fit(self.x_traindata, self.y_traindata)
  383. train_score = self.model.score(x_train, y_train)
  384. test_score = self.model.score(x_test, y_test)
  385. self.have_fit = True
  386. return train_score, test_score
  387. def score(self, x_data, y_data):
  388. score = self.model.score(x_data, y_data)
  389. return score
  390. def class_score(self, save_dir, x_data: np.ndarray, y_really: np.ndarray):
  391. y_really: np.ndarray = y_really.ravel()
  392. y_predict: np.ndarray = self.predict(x_data)[0]
  393. accuracy = self._accuracy(y_predict, y_really)
  394. recall, class_list = self._macro(y_predict, y_really, 0)
  395. precision, class_list = self._macro(y_predict, y_really, 1)
  396. f1, class_list = self._macro(y_predict, y_really, 2)
  397. confusion_matrix_, class_list = self._confusion_matrix(
  398. y_predict, y_really)
  399. kappa = self._kappa_score(y_predict, y_really)
  400. class_list: list
  401. tab = Tab()
  402. def gauge_base(name: str, value_: float) -> Gauge:
  403. c = (
  404. Gauge()
  405. .add("", [(name, round(value_ * 100, 2))], min_=0, max_=100)
  406. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  407. )
  408. return c
  409. tab.add(gauge_base("准确率", accuracy), "准确率")
  410. tab.add(gauge_base("kappa", kappa), "kappa")
  411. def bar_base(name, value_) -> Bar:
  412. c = (
  413. Bar()
  414. .add_xaxis(class_list)
  415. .add_yaxis(name, value_, **label_setting)
  416. .set_global_opts(
  417. title_opts=opts.TitleOpts(title=name), **global_setting
  418. )
  419. )
  420. return c
  421. tab.add(bar_base("精确率", precision.tolist()), "精确率")
  422. tab.add(bar_base("召回率", recall.tolist()), "召回率")
  423. tab.add(bar_base("F1", f1.tolist()), "F1")
  424. def heatmap_base(name, value_, max_, min_, show) -> HeatMap:
  425. c = (
  426. HeatMap()
  427. .add_xaxis(class_list)
  428. .add_yaxis(
  429. name,
  430. class_list,
  431. value_,
  432. label_opts=opts.LabelOpts(is_show=show, position="inside"),
  433. )
  434. .set_global_opts(
  435. title_opts=opts.TitleOpts(title=name),
  436. **global_setting,
  437. visualmap_opts=opts.VisualMapOpts(
  438. max_=max_, min_=min_, pos_right="3%"
  439. ),
  440. )
  441. )
  442. return c
  443. value = [
  444. [class_list[i], class_list[j], float(confusion_matrix_[i, j])]
  445. for i in range(len(class_list))
  446. for j in range(len(class_list))
  447. ]
  448. tab.add(
  449. heatmap_base(
  450. "混淆矩阵",
  451. value,
  452. float(confusion_matrix_.max()),
  453. float(confusion_matrix_.min()),
  454. len(class_list) < 7,
  455. ),
  456. "混淆矩阵",
  457. )
  458. des_to_csv(save_dir, "混淆矩阵", confusion_matrix_, class_list, class_list)
  459. des_to_csv(
  460. save_dir, "评分", [
  461. precision, recall, f1], class_list, [
  462. "精确率", "召回率", "F1"])
  463. save = save_dir + rf"{os.sep}分类模型评估.HTML"
  464. tab.render(save)
  465. return save,
  466. def regression_score(
  467. self,
  468. save_dir,
  469. x_data: np.ndarray,
  470. y_really: np.ndarray):
  471. y_really = y_really.ravel()
  472. y_predict = self.predict(x_data)[0]
  473. tab = Tab()
  474. mse = self._mse(y_predict, y_really)
  475. mae = self._mae(y_predict, y_really)
  476. r2_score_ = self._r2_score(y_predict, y_really)
  477. rmse = self._rmse(y_predict, y_really)
  478. tab.add(make_tab(["MSE", "MAE", "RMSE", "r2_Score"], [
  479. [mse, mae, rmse, r2_score_]]), "评估数据", )
  480. save = save_dir + rf"{os.sep}回归模型评估.HTML"
  481. tab.render(save)
  482. return save,
  483. def clusters_score(self, save_dir, x_data: np.ndarray, *args):
  484. y_predict = self.predict(x_data)[0]
  485. tab = Tab()
  486. coefficient, coefficient_array = self._coefficient_clustering(
  487. x_data, y_predict)
  488. def gauge_base(name: str, value: float) -> Gauge:
  489. c = (
  490. Gauge()
  491. .add(
  492. "",
  493. [(name, round(value * 100, 2))],
  494. min_=0,
  495. max_=10 ** (judging_digits(value * 100)),
  496. )
  497. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  498. )
  499. return c
  500. def bar_base(name, value, xaxis) -> Bar:
  501. c = (
  502. Bar()
  503. .add_xaxis(xaxis)
  504. .add_yaxis(name, value, **label_setting)
  505. .set_global_opts(
  506. title_opts=opts.TitleOpts(title=name), **global_setting
  507. )
  508. )
  509. return c
  510. tab.add(gauge_base("平均轮廓系数", coefficient), "平均轮廓系数")
  511. def bar_(coefficient_array_, name="数据轮廓系数"):
  512. xaxis = [f"数据{i}" for i in range(len(coefficient_array_))]
  513. value = coefficient_array_.tolist()
  514. tab.add(bar_base(name, value, xaxis), name)
  515. n = 20
  516. if len(coefficient_array) <= n:
  517. bar_(coefficient_array)
  518. elif len(coefficient_array) <= n ** 2:
  519. a = 0
  520. while a <= len(coefficient_array):
  521. b = a + n
  522. if b >= len(coefficient_array):
  523. b = len(coefficient_array) + 1
  524. cofe_array = coefficient_array[a:b]
  525. bar_(cofe_array, f"{a}-{b}数据轮廓系数")
  526. a += n
  527. else:
  528. split = np.hsplit(coefficient_array, n)
  529. a = 0
  530. for cofe_array in split:
  531. bar_(cofe_array, f"{a}%-{a + n}%数据轮廓系数")
  532. a += n
  533. save = save_dir + rf"{os.sep}聚类模型评估.HTML"
  534. tab.render(save)
  535. return save,
  536. def predict(self, x_data, *args, **kwargs):
  537. self.x_testdata = x_data.copy()
  538. y_predict = self.model.predict(x_data,)
  539. self.y_testdata = y_predict.copy()
  540. self.have_predict = True
  541. return y_predict, "预测"
  542. def data_visualization(self, save_dir, *args, **kwargs):
  543. return save_dir,
  544. class PrepBase(StudyMachinebase): # 不允许第二次训练
  545. def __init__(self, *args, **kwargs):
  546. super(PrepBase, self).__init__(*args, **kwargs)
  547. self.model = None
  548. def fit_model(self, x_data, y_data, increment=True, *args, **kwargs):
  549. if not self.have_predict: # 不允许第二次训练
  550. y_data = y_data.ravel()
  551. try:
  552. assert not self.x_traindata is None or not increment
  553. self.x_traindata = np.vstack((x_data, self.x_traindata))
  554. self.y_traindata = np.vstack((y_data, self.y_traindata))
  555. except (AssertionError, ValueError):
  556. self.x_traindata = x_data.copy()
  557. self.y_traindata = y_data.copy()
  558. try: # 增量式训练
  559. assert increment
  560. self.model.partial_fit(x_data, y_data)
  561. except (AssertionError, AttributeError):
  562. self.model.fit(self.x_traindata, self.y_traindata)
  563. self.have_fit = True
  564. return "None", "None"
  565. def predict(self, x_data, *args, **kwargs):
  566. self.x_testdata = x_data.copy()
  567. x_predict = self.model.transform(x_data)
  568. self.y_testdata = x_predict.copy()
  569. self.have_predict = True
  570. return x_predict, "特征工程"
  571. def score(self, x_data, y_data):
  572. return "None" # 没有score
  573. class Unsupervised(PrepBase): # 无监督,不允许第二次训练
  574. def fit_model(self, x_data, increment=True, *args, **kwargs):
  575. if not self.have_predict: # 不允许第二次训练
  576. self.y_traindata = None
  577. try:
  578. assert not self.x_traindata is None or not increment
  579. self.x_traindata = np.vstack((x_data, self.x_traindata))
  580. except (AssertionError, ValueError):
  581. self.x_traindata = x_data.copy()
  582. try: # 增量式训练
  583. assert increment
  584. self.model.partial_fit(x_data)
  585. except (AssertionError, AttributeError):
  586. self.model.fit(self.x_traindata, self.y_traindata)
  587. self.have_fit = True
  588. return "None", "None"
  589. class UnsupervisedModel(PrepBase): # 无监督
  590. def fit_model(self, x_data, increment=True, *args, **kwargs):
  591. self.y_traindata = None
  592. try:
  593. assert not self.x_traindata is None or not increment
  594. self.x_traindata = np.vstack((x_data, self.x_traindata))
  595. except (AssertionError, ValueError):
  596. self.x_traindata = x_data.copy()
  597. try: # 增量式训练
  598. if not increment:
  599. raise Exception
  600. self.model.partial_fit(x_data)
  601. except (AssertionError, AttributeError):
  602. self.model.fit(self.x_traindata, self.y_traindata)
  603. self.have_fit = True
  604. return "None", "None"
  605. @plugin_class_loading(get_path(r"template/machinelearning"))
  606. class ToPyebase(StudyMachinebase):
  607. def __init__(self, model, *args, **kwargs):
  608. super(ToPyebase, self).__init__(*args, **kwargs)
  609. self.model = None
  610. # 记录这两个是为了克隆
  611. self.k = {}
  612. self.model_Name = model
  613. def fit_model(self, x_data, y_data, *args, **kwargs):
  614. self.x_traindata = x_data.copy()
  615. self.y_traindata = y_data.ravel().copy()
  616. self.have_fit = True
  617. return "None", "None"
  618. def predict(self, x_data, *args, **kwargs):
  619. self.have_predict = True
  620. return np.array([]), "请使用训练"
  621. def score(self, x_data, y_data):
  622. return "None" # 没有score
  623. class DataAnalysis(ToPyebase): # 数据分析
  624. def data_visualization(self, save_dir, *args, **kwargs):
  625. tab = Tab()
  626. data = self.x_traindata
  627. def cumulative_calculation(tab_data, func, name, render_tab):
  628. sum_list = []
  629. for i in range(len(tab_data)): # 按行迭代数据
  630. sum_list.append([])
  631. for a in range(len(tab_data[i])):
  632. s = num_str(func(tab_data[: i + 1, a]), 8)
  633. sum_list[-1].append(s)
  634. des_to_csv(save_dir, f"{name}", sum_list)
  635. render_tab.add(
  636. make_tab([f"[{i}]" for i in range(len(sum_list[0]))], sum_list),
  637. f"{name}",
  638. )
  639. def geometric_mean(x):
  640. return np.power(np.prod(x), 1 / len(x)) # 几何平均数
  641. def square_mean(x):
  642. return np.sqrt(np.sum(np.power(x, 2)) / len(x)) # 平方平均数
  643. def harmonic_mean(x):
  644. return len(x) / np.sum(np.power(x, -1)) # 调和平均数
  645. cumulative_calculation(data, np.sum, "累计求和", tab)
  646. cumulative_calculation(data, np.var, "累计方差", tab)
  647. cumulative_calculation(data, np.std, "累计标准差", tab)
  648. cumulative_calculation(data, np.mean, "累计算术平均值", tab)
  649. cumulative_calculation(data, geometric_mean, "累计几何平均值", tab)
  650. cumulative_calculation(data, square_mean, "累计平方平均值", tab)
  651. cumulative_calculation(data, harmonic_mean, "累计调和平均值", tab)
  652. cumulative_calculation(data, np.median, "累计中位数", tab)
  653. cumulative_calculation(data, np.max, "累计最大值", tab)
  654. cumulative_calculation(data, np.min, "累计最小值", tab)
  655. save = save_dir + rf"{os.sep}数据分析.HTML"
  656. tab.render(save) # 生成HTML
  657. return save,
  658. class Corr(ToPyebase): # 相关性和协方差
  659. def data_visualization(self, save_dir, *args, **kwargs):
  660. tab = Tab()
  661. data = DataFrame(self.x_traindata)
  662. corr: np.ndarray = data.corr().to_numpy() # 相关性
  663. cov: np.ndarray = data.cov().to_numpy() # 协方差
  664. def heat_map(data_, name: str, max_, min_):
  665. x = [f"特征[{i}]" for i in range(len(data_))]
  666. y = [f"特征[{i}]" for i in range(len(data_[0]))]
  667. value = [
  668. (f"特征[{i}]", f"特征[{j}]", float(data_[i][j]))
  669. for i in range(len(data_))
  670. for j in range(len(data_[i]))
  671. ]
  672. c = (
  673. HeatMap()
  674. .add_xaxis(x)
  675. # 如果特征太多则不显示标签
  676. .add_yaxis(
  677. f"数据",
  678. y,
  679. value,
  680. label_opts=opts.LabelOpts(
  681. is_show=True if len(x) <= 10 else False, position="inside"
  682. ),
  683. )
  684. .set_global_opts(
  685. title_opts=opts.TitleOpts(title="矩阵热力图"),
  686. **global_not_legend,
  687. yaxis_opts=opts.AxisOpts(
  688. is_scale=True, type_="category"
  689. ), # 'category'
  690. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  691. visualmap_opts=opts.VisualMapOpts(
  692. is_show=True, max_=max_, min_=min_, pos_right="3%"
  693. ),
  694. ) # 显示
  695. )
  696. tab.add(c, name)
  697. heat_map(corr, "相关性热力图", 1, -1)
  698. heat_map(cov, "协方差热力图", float(cov.max()), float(cov.min()))
  699. des_to_csv(save_dir, f"相关性矩阵", corr)
  700. des_to_csv(save_dir, f"协方差矩阵", cov)
  701. save = save_dir + rf"{os.sep}数据相关性.HTML"
  702. tab.render(save) # 生成HTML
  703. return save,
  704. class ViewData(ToPyebase): # 绘制预测型热力图
  705. def __init__(
  706. self, args_use, learner, *args, **kwargs
  707. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  708. super(ViewData, self).__init__(args_use, learner, *args, **kwargs)
  709. self.model = learner.Model
  710. self.Select_Model = None
  711. self.have_fit = learner.have_Fit
  712. self.model_Name = "Select_Model"
  713. self.learner = learner
  714. self.learner_name = learner.Model_Name
  715. def fit_model(self, *args, **kwargs):
  716. self.have_fit = True
  717. return "None", "None"
  718. def predict(self, x_data, add_func=None, *args, **kwargs):
  719. x_traindata = self.learner.x_traindata
  720. y_traindata = self.learner.y_traindata
  721. x_name = self.learner_name
  722. if x_traindata is not None:
  723. add_func(x_traindata, f"{x_name}:x训练数据")
  724. try:
  725. x_testdata = self.x_testdata
  726. if x_testdata is not None:
  727. add_func(x_testdata, f"{x_name}:x测试数据")
  728. except BaseException as e:
  729. logging.warning(str(e))
  730. try:
  731. y_testdata = self.y_testdata.copy()
  732. if y_testdata is not None:
  733. add_func(y_testdata, f"{x_name}:y测试数据")
  734. except BaseException as e:
  735. logging.warning(str(e))
  736. self.have_fit = True
  737. if y_traindata is None:
  738. return np.array([]), "y训练数据"
  739. return y_traindata, "y训练数据"
  740. def data_visualization(self, save_dir, *args, **kwargs):
  741. return save_dir,
  742. class MatrixScatter(ToPyebase): # 矩阵散点图
  743. def data_visualization(self, save_dir, *args, **kwargs):
  744. tab = Tab()
  745. data = self.x_traindata
  746. if data.ndim <= 2: # 维度为2
  747. c = (
  748. Scatter()
  749. .add_xaxis([f"{i}" for i in range(data.shape[1])])
  750. .set_global_opts(
  751. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  752. )
  753. )
  754. if data.ndim == 2:
  755. for num in range(len(data)):
  756. i = data[num]
  757. c.add_yaxis(f"{num}", [[f"{num}", x]
  758. for x in i], color="#FFFFFF")
  759. else:
  760. c.add_yaxis(f"0", [[0, x] for x in data], color="#FFFFFF")
  761. c.set_series_opts(
  762. label_opts=opts.LabelOpts(
  763. is_show=True,
  764. color="#000000",
  765. position="inside",
  766. formatter=JsCode("function(params){return params.data[2];}"),
  767. ))
  768. elif data.ndim == 3:
  769. c = Scatter3D().set_global_opts(
  770. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  771. )
  772. for num in range(len(data)):
  773. i = data[num]
  774. for s_num in range(len(i)):
  775. s = i[s_num]
  776. y_data = [[num, s_num, x, float(s[x])]
  777. for x in range(len(s))]
  778. c.add(
  779. f"{num}",
  780. y_data,
  781. zaxis3d_opts=opts.Axis3DOpts(
  782. type_="category"))
  783. c.set_series_opts(
  784. label_opts=opts.LabelOpts(
  785. is_show=True,
  786. color="#000000",
  787. position="inside",
  788. formatter=JsCode("function(params){return params.data[3];}"),
  789. ))
  790. else:
  791. c = Scatter()
  792. tab.add(c, "矩阵散点图")
  793. save = save_dir + rf"{os.sep}矩阵散点图.HTML"
  794. tab.render(save) # 生成HTML
  795. return save,
  796. class ClusterTree(ToPyebase): # 聚类树状图
  797. def data_visualization(self, save_dir, *args, **kwargs):
  798. tab = Tab()
  799. x_data = self.x_traindata
  800. linkage_array = ward(x_data) # self.y_traindata是结果
  801. dendrogram(linkage_array)
  802. plt.savefig(save_dir + rf"{os.sep}Cluster_graph.png")
  803. image = Image()
  804. image.add(src=save_dir + rf"{os.sep}Cluster_graph.png",).set_global_opts(
  805. title_opts=opts.ComponentTitleOpts(title="聚类树状图")
  806. )
  807. tab.add(image, "聚类树状图")
  808. save = save_dir + rf"{os.sep}聚类树状图.HTML"
  809. tab.render(save) # 生成HTML
  810. return save,
  811. class ClassBar(ToPyebase): # 类型柱状图
  812. def data_visualization(self, save_dir, *args, **kwargs):
  813. tab = Tab()
  814. x_data: np.ndarray = self.x_traindata.transpose()
  815. y_data: np.ndarray = self.y_traindata
  816. class_: list = np.unique(y_data).tolist() # 类型
  817. class_list = []
  818. for n_class in class_: # 生成class_list(class是1,,也就是二维的,下面会压缩成一维)
  819. class_list.append(y_data == n_class)
  820. for num_i in range(len(x_data)): # 迭代每一个特征
  821. i = x_data[num_i]
  822. i_con = is_continuous(i)
  823. if i_con and len(i) >= 11:
  824. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  825. c_list = [[0] * 10 for _ in class_list]
  826. start = i.min()
  827. end = i.max()
  828. n = (end - start) / 10 # 生成10条柱子
  829. x_axis = [] # x轴
  830. iter_num = 0 # 迭代到第n个
  831. while iter_num <= 9: # 把每个特征分为10类进行迭代
  832. # x_axis添加数据
  833. x_axis.append(
  834. f"({iter_num})[{round(start, 2)}-"
  835. f"{round((start + n) if (start + n) <= end or not iter_num == 9 else end, 2)}]")
  836. try:
  837. assert not iter_num == 9 # 执行到第10次时,直接获取剩下的所有
  838. s = (start <= i) == (i < end) # 布尔索引
  839. except AssertionError: # 因为start + n有超出end的风险
  840. s = (start <= i) == (i <= end) # 布尔索引
  841. # n_data = i[s] # 取得现在的特征数据
  842. for num in range(len(class_list)): # 根据类别进行迭代
  843. # 取得布尔数组:y_data == n_class也就是输出值为指定类型的bool矩阵,用于切片
  844. now_class: list = class_list[num]
  845. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  846. bool_class = now_class[s].ravel()
  847. # 用len计数 c_list = [[class1的数据],[class2的数据],[]]
  848. c_list[num][iter_num] = int(np.sum(bool_class))
  849. iter_num += 1
  850. start += n
  851. else:
  852. iter_np = np.unique(i)
  853. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  854. c_list = [[0] * len(iter_np) for _ in class_list]
  855. x_axis = [] # 添加x轴数据
  856. for i_num in range(len(iter_np)): # 迭代每一个i(不重复)
  857. i_data = iter_np[i_num]
  858. # n_data= i[i == i_data]#取得现在特征数据
  859. x_axis.append(f"[{i_data}]")
  860. for num in range(len(class_list)): # 根据类别进行迭代
  861. now_class = class_list[num] # 取得class_list的布尔数组
  862. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  863. bool_class = now_class[i == i_data]
  864. # 用len计数 c_list = [[class1的数据],[class2的数据],[]]
  865. c_list[num][i_num] = len(np.sum(bool_class).tolist())
  866. c = (
  867. Bar()
  868. .add_xaxis(x_axis)
  869. .set_global_opts(
  870. title_opts=opts.TitleOpts(title="类型-特征统计柱状图"),
  871. **global_setting,
  872. xaxis_opts=opts.AxisOpts(type_="category"),
  873. yaxis_opts=opts.AxisOpts(type_="value"),
  874. )
  875. )
  876. y_axis = []
  877. for i in range(len(c_list)):
  878. y_axis.append(f"{class_[i]}")
  879. c.add_yaxis(f"{class_[i]}", c_list[i], **label_setting)
  880. des_to_csv(
  881. save_dir,
  882. f"类型-[{num_i}]特征统计柱状图",
  883. c_list,
  884. x_axis,
  885. y_axis)
  886. tab.add(c, f"类型-[{num_i}]特征统计柱状图")
  887. # 未完成
  888. save = save_dir + rf"{os.sep}特征统计.HTML"
  889. tab.render(save) # 生成HTML
  890. return save,
  891. class NumpyHeatMap(ToPyebase): # Numpy矩阵绘制热力图
  892. def data_visualization(self, save_dir, *args, **kwargs):
  893. tab = Tab()
  894. data = self.x_traindata
  895. x = [f"横[{i}]" for i in range(len(data))]
  896. y = [f"纵[{i}]" for i in range(len(data[0]))]
  897. value = [
  898. (f"横[{i}]", f"纵[{j}]", float(data[i][j]))
  899. for i in range(len(data))
  900. for j in range(len(data[i]))
  901. ]
  902. c = (
  903. HeatMap()
  904. .add_xaxis(x)
  905. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  906. .set_global_opts(
  907. title_opts=opts.TitleOpts(title="矩阵热力图"),
  908. **global_not_legend,
  909. yaxis_opts=opts.AxisOpts(
  910. is_scale=True, type_="category"), # 'category'
  911. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  912. visualmap_opts=opts.VisualMapOpts(
  913. is_show=True,
  914. max_=float(data.max()),
  915. min_=float(data.min()),
  916. pos_right="3%",
  917. ),
  918. ) # 显示
  919. )
  920. tab.add(c, "矩阵热力图")
  921. tab.add(make_tab(x, data.transpose().tolist()), f"矩阵热力图:表格")
  922. save = save_dir + rf"{os.sep}矩阵热力图.HTML"
  923. tab.render(save) # 生成HTML
  924. return save,
  925. class PredictiveHeatmapBase(ToPyebase): # 绘制预测型热力图
  926. def __init__(
  927. self, args_use, learner, *args, **kwargs
  928. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  929. super(
  930. PredictiveHeatmapBase,
  931. self).__init__(
  932. args_use,
  933. learner,
  934. *
  935. args,
  936. **kwargs)
  937. self.model = learner.Model
  938. self.select_model = None
  939. self.have_fit = learner.have_Fit
  940. self.model_Name = "Select_Model"
  941. self.learner = learner
  942. self.x_traindata = learner.x_traindata.copy()
  943. self.y_traindata = learner.y_traindata.copy()
  944. self.means = []
  945. def fit_model(self, x_data, *args, **kwargs):
  946. try:
  947. self.means = x_data.ravel()
  948. except BaseException as e:
  949. logging.warning(str(e))
  950. self.have_fit = True
  951. return "None", "None"
  952. def data_visualization(
  953. self,
  954. save_dir,
  955. decision_boundary_func=None,
  956. prediction_boundary_func=None,
  957. *args,
  958. **kwargs,
  959. ):
  960. tab = Tab()
  961. y = self.y_traindata
  962. x_data = self.x_traindata
  963. try: # 如果没有class
  964. class_ = self.model.classes_.tolist()
  965. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  966. # 获取数据
  967. get, x_means, x_range, data_type = training_visualization(
  968. x_data, class_, y)
  969. # 可使用自带的means,并且nan表示跳过
  970. for i in range(min([len(x_means), len(self.means)])):
  971. try:
  972. g = self.means[i]
  973. if g == np.nan:
  974. raise Exception
  975. x_means[i] = g
  976. except BaseException as e:
  977. logging.warning(str(e))
  978. get = decision_boundary_func(
  979. x_range, x_means, self.learner.predict, class_, data_type
  980. )
  981. for i in range(len(get)):
  982. tab.add(get[i], f"{i}预测热力图")
  983. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  984. data = class_ + [f"{i}" for i in x_means]
  985. c = Table().add(headers=heard, rows=[data])
  986. tab.add(c, "数据表")
  987. except AttributeError:
  988. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  989. get = prediction_boundary_func(
  990. x_range, x_means, self.learner.predict, data_type
  991. )
  992. for i in range(len(get)):
  993. tab.add(get[i], f"{i}预测热力图")
  994. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  995. data = [f"{i}" for i in x_means]
  996. c = Table().add(headers=heard, rows=[data])
  997. tab.add(c, "数据表")
  998. save = save_dir + rf"{os.sep}预测热力图.HTML"
  999. tab.render(save) # 生成HTML
  1000. return save,
  1001. class PredictiveHeatmap(PredictiveHeatmapBase): # 绘制预测型热力图
  1002. def data_visualization(self, save_dir, *args, **kwargs):
  1003. return super().data_visualization(
  1004. save_dir, decision_boundary, prediction_boundary
  1005. )
  1006. class PredictiveHeatmapMore(PredictiveHeatmapBase): # 绘制预测型热力图_More
  1007. def data_visualization(self, save_dir, *args, **kwargs):
  1008. return super().data_visualization(
  1009. save_dir, decision_boundary_more, prediction_boundary_more
  1010. )
  1011. @plugin_class_loading(get_path(r"template/machinelearning"))
  1012. class NearFeatureScatterClassMore(ToPyebase):
  1013. def data_visualization(self, save_dir, *args, **kwargs):
  1014. tab = Tab()
  1015. x_data = self.x_traindata
  1016. y = self.y_traindata
  1017. class_ = np.unique(y).ravel().tolist()
  1018. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  1019. get, x_means, x_range, data_type = training_visualization_more_no_center(
  1020. x_data, class_, y)
  1021. for i in range(len(get)):
  1022. tab.add(get[i], f"{i}训练数据散点图")
  1023. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1024. data = class_ + [f"{i}" for i in x_means]
  1025. c = Table().add(headers=heard, rows=[data])
  1026. tab.add(c, "数据表")
  1027. save = save_dir + rf"{os.sep}数据特征散点图(分类).HTML"
  1028. tab.render(save) # 生成HTML
  1029. return save,
  1030. @plugin_class_loading(get_path(r"template/machinelearning"))
  1031. class NearFeatureScatterMore(ToPyebase):
  1032. def data_visualization(self, save_dir, *args, **kwargs):
  1033. tab = Tab()
  1034. x_data = self.x_traindata
  1035. x_means = quick_stats(x_data).get()[0]
  1036. get_y = feature_visualization(x_data, "数据散点图") # 转换
  1037. for i in range(len(get_y)):
  1038. tab.add(get_y[i], f"[{i}]数据x-x散点图")
  1039. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1040. data = [f"{i}" for i in x_means]
  1041. c = Table().add(headers=heard, rows=[data])
  1042. tab.add(c, "数据表")
  1043. save = save_dir + rf"{os.sep}数据特征散点图.HTML"
  1044. tab.render(save) # 生成HTML
  1045. return save,
  1046. class NearFeatureScatterClass(ToPyebase): # 临近特征散点图:分类数据
  1047. def data_visualization(self, save_dir, *args, **kwargs):
  1048. # 获取数据
  1049. class_ = np.unique(self.y_traindata).ravel().tolist()
  1050. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1051. tab = Tab()
  1052. y = self.y_traindata
  1053. x_data = self.x_traindata
  1054. get, x_means, x_range, data_type = training_visualization(
  1055. x_data, class_, y)
  1056. for i in range(len(get)):
  1057. tab.add(get[i], f"{i}临近特征散点图")
  1058. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1059. data = class_ + [f"{i}" for i in x_means]
  1060. c = Table().add(headers=heard, rows=[data])
  1061. tab.add(c, "数据表")
  1062. save = save_dir + rf"{os.sep}临近数据特征散点图(分类).HTML"
  1063. tab.render(save) # 生成HTML
  1064. return save,
  1065. class NearFeatureScatter(ToPyebase): # 临近特征散点图:连续数据
  1066. def data_visualization(self, save_dir, *args, **kwargs):
  1067. tab = Tab()
  1068. x_data = self.x_traindata.transpose()
  1069. get, x_means, x_range, data_type = training_visualization_no_class(
  1070. x_data)
  1071. for i in range(len(get)):
  1072. tab.add(get[i], f"{i}临近特征散点图")
  1073. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1074. data = [f"{i}" for i in x_means]
  1075. tab.add(make_tab(columns, [data]), "数据表")
  1076. save = save_dir + rf"{os.sep}临近数据特征散点图.HTML"
  1077. tab.render(save) # 生成HTML
  1078. return save,
  1079. class FeatureScatterYX(ToPyebase): # y-x图
  1080. def data_visualization(self, save_dir, *args, **kwargs):
  1081. tab = Tab()
  1082. x_data = self.x_traindata
  1083. y = self.y_traindata
  1084. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1085. for i in range(len(get)):
  1086. tab.add(get[i], f"{i}特征x-y散点图")
  1087. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1088. data = [f"{i}" for i in x_means]
  1089. tab.add(make_tab(columns, [data]), "数据表")
  1090. save = save_dir + rf"{os.sep}特征y-x图像.HTML"
  1091. tab.render(save) # 生成HTML
  1092. return save,
  1093. @plugin_class_loading(get_path(r"template/machinelearning"))
  1094. class LineModel(StudyMachinebase):
  1095. def __init__(
  1096. self, args_use, model, *args, **kwargs
  1097. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1098. super(LineModel, self).__init__(*args, **kwargs)
  1099. all_model = {
  1100. "Line": LinearRegression,
  1101. "Ridge": Ridge,
  1102. "Lasso": Lasso}[model]
  1103. if model == "Line":
  1104. self.model = all_model()
  1105. self.k = {}
  1106. else:
  1107. self.model = all_model(
  1108. alpha=args_use["alpha"], max_iter=args_use["max_iter"]
  1109. )
  1110. self.k = {
  1111. "alpha": args_use["alpha"],
  1112. "max_iter": args_use["max_iter"]}
  1113. # 记录这两个是为了克隆
  1114. self.Alpha = args_use["alpha"]
  1115. self.max_iter = args_use["max_iter"]
  1116. self.model_Name = model
  1117. def data_visualization(self, save_dir, *args, **kwargs):
  1118. tab = Tab()
  1119. x_data = self.x_traindata
  1120. y = self.y_traindata
  1121. w_list = self.model.coef_.tolist()
  1122. w_heard = [f"系数w[{i}]" for i in range(len(w_list))]
  1123. b = self.model.intercept_.tolist()
  1124. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1125. get_line = regress_w(x_data, w_list, b, x_means.copy())
  1126. for i in range(len(get)):
  1127. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1128. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1129. for i in range(len(get)):
  1130. tab.add(get[i], f"{i}预测热力图")
  1131. tab.add(coefficient_scatter_plot(w_heard, w_list), "系数w散点图")
  1132. tab.add(coefficient_bar_plot(w_heard, self.model.coef_), "系数柱状图")
  1133. columns = [
  1134. f"普适预测第{i}特征" for i in range(
  1135. len(x_means))] + w_heard + ["截距b"]
  1136. data = [f"{i}" for i in x_means] + w_list + [b]
  1137. if self.model_Name != "Line":
  1138. columns += ["阿尔法", "最大迭代次数"]
  1139. data += [self.model.alpha, self.model.max_iter]
  1140. tab.add(make_tab(columns, [data]), "数据表")
  1141. des_to_csv(
  1142. save_dir,
  1143. "系数表",
  1144. [w_list + [b]],
  1145. [f"系数W[{i}]" for i in range(len(w_list))] + ["截距"],
  1146. )
  1147. des_to_csv(
  1148. save_dir,
  1149. "预测表",
  1150. [[f"{i}" for i in x_means]],
  1151. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1152. )
  1153. save = save_dir + rf"{os.sep}线性回归模型.HTML"
  1154. tab.render(save) # 生成HTML
  1155. return save,
  1156. @plugin_class_loading(get_path(r"template/machinelearning"))
  1157. class LogisticregressionModel(StudyMachinebase):
  1158. def __init__(
  1159. self, args_use, model, *args, **kwargs
  1160. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1161. super(LogisticregressionModel, self).__init__(*args, **kwargs)
  1162. self.model = LogisticRegression(
  1163. C=args_use["C"], max_iter=args_use["max_iter"])
  1164. # 记录这两个是为了克隆
  1165. self.C = args_use["C"]
  1166. self.max_iter = args_use["max_iter"]
  1167. self.k = {"C": args_use["C"], "max_iter": args_use["max_iter"]}
  1168. self.model_Name = model
  1169. def data_visualization(self, save_dir="render.html", *args, **kwargs):
  1170. # 获取数据
  1171. w_array = self.model.coef_
  1172. w_list = w_array.tolist() # 变为表格
  1173. b = self.model.intercept_
  1174. c = self.model.C
  1175. max_iter = self.model.max_iter
  1176. class_ = self.model.classes_.tolist()
  1177. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1178. tab = Tab()
  1179. y = self.y_traindata
  1180. x_data = self.x_traindata
  1181. get, x_means, x_range, data_type = training_visualization(
  1182. x_data, class_, y)
  1183. get_line = training_w(x_data, class_, y, w_list, b, x_means.copy())
  1184. for i in range(len(get)):
  1185. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1186. for i in range(len(w_list)):
  1187. w = w_list[i]
  1188. w_heard = [f"系数w[{i},{j}]" for j in range(len(w))]
  1189. tab.add(coefficient_scatter_plot(w_heard, w), f"系数w[{i}]散点图")
  1190. tab.add(coefficient_bar_plot(w_heard, w_array[i]), f"系数w[{i}]柱状图")
  1191. columns = class_heard + \
  1192. [f"截距{i}" for i in range(len(b))] + ["C", "最大迭代数"]
  1193. data = class_ + b.tolist() + [c, max_iter]
  1194. c = Table().add(headers=columns, rows=[data])
  1195. tab.add(c, "数据表")
  1196. c = Table().add(
  1197. headers=[f"系数W[{i}]" for i in range(len(w_list[0]))], rows=w_list
  1198. )
  1199. tab.add(c, "系数数据表")
  1200. c = Table().add(
  1201. headers=[f"普适预测第{i}特征" for i in range(len(x_means))],
  1202. rows=[[f"{i}" for i in x_means]],
  1203. )
  1204. tab.add(c, "普适预测数据表")
  1205. des_to_csv(save_dir, "系数表", w_list, [
  1206. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1207. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1208. des_to_csv(
  1209. save_dir,
  1210. "预测表",
  1211. [[f"{i}" for i in x_means]],
  1212. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1213. )
  1214. save = save_dir + rf"{os.sep}逻辑回归.HTML"
  1215. tab.render(save) # 生成HTML
  1216. return save,
  1217. class CategoricalData: # 数据统计助手
  1218. def __init__(self):
  1219. self.x_means = []
  1220. self.x_range = []
  1221. self.data_type = []
  1222. def __call__(self, x1, *args, **kwargs):
  1223. get = self.is_continuous(x1)
  1224. return get
  1225. def is_continuous(self, x1: np.array):
  1226. try:
  1227. x1_con = is_continuous(x1)
  1228. if x1_con:
  1229. self.x_means.append(np.mean(x1))
  1230. self.add_range(x1)
  1231. else:
  1232. assert False
  1233. return x1_con
  1234. except TypeError: # 找出出现次数最多的元素
  1235. new = np.unique(x1) # 去除相同的元素
  1236. count_list = []
  1237. for i in new:
  1238. count_list.append(np.sum(x1 == i))
  1239. index = count_list.index(max(count_list)) # 找出最大值的索引
  1240. self.x_means.append(x1[index])
  1241. self.add_range(x1, False)
  1242. return False
  1243. def add_range(self, x1: np.array, range_=True):
  1244. try:
  1245. assert range_
  1246. min_ = int(x1.min()) - 1
  1247. max_ = int(x1.max()) + 1
  1248. # 不需要复制列表
  1249. self.x_range.append([min_, max_])
  1250. self.data_type.append(1)
  1251. except AssertionError:
  1252. self.x_range.append(list(set(x1.tolist()))) # 去除多余元素
  1253. self.data_type.append(2)
  1254. def get(self):
  1255. return self.x_means, self.x_range, self.data_type
  1256. @plugin_class_loading(get_path(r"template/machinelearning"))
  1257. class KnnModel(StudyMachinebase):
  1258. def __init__(
  1259. self, args_use, model, *args, **kwargs
  1260. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1261. super(KnnModel, self).__init__(*args, **kwargs)
  1262. all_model = {
  1263. "Knn_class": KNeighborsClassifier,
  1264. "Knn": KNeighborsRegressor}[model]
  1265. self.model = all_model(
  1266. p=args_use["p"],
  1267. n_neighbors=args_use["n_neighbors"])
  1268. # 记录这两个是为了克隆
  1269. self.n_neighbors = args_use["n_neighbors"]
  1270. self.p = args_use["p"]
  1271. self.k = {"n_neighbors": args_use["n_neighbors"], "p": args_use["p"]}
  1272. self.model_Name = model
  1273. def data_visualization(self, save_dir, *args, **kwargs):
  1274. tab = Tab()
  1275. y = self.y_traindata
  1276. x_data = self.x_traindata
  1277. y_test = self.y_testdata
  1278. x_test = self.x_testdata
  1279. if self.model_Name == "Knn_class":
  1280. class_ = self.model.classes_.tolist()
  1281. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1282. get, x_means, x_range, data_type = training_visualization(
  1283. x_data, class_, y)
  1284. for i in range(len(get)):
  1285. tab.add(get[i], f"{i}训练数据散点图")
  1286. if y_test is not None:
  1287. get = training_visualization(x_test, class_, y_test)[0]
  1288. for i in range(len(get)):
  1289. tab.add(get[i], f"{i}测试数据散点图")
  1290. get = decision_boundary(
  1291. x_range, x_means, self.predict, class_, data_type)
  1292. for i in range(len(get)):
  1293. tab.add(get[i], f"{i}预测热力图")
  1294. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1295. data = class_ + [f"{i}" for i in x_means]
  1296. c = Table().add(headers=heard, rows=[data])
  1297. tab.add(c, "数据表")
  1298. else:
  1299. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1300. for i in range(len(get)):
  1301. tab.add(get[i], f"{i}训练数据散点图")
  1302. get = regress_visualization(x_test, y_test)[0]
  1303. for i in range(len(get)):
  1304. tab.add(get[i], f"{i}测试数据类型图")
  1305. get = prediction_boundary(
  1306. x_range, x_means, self.predict, data_type)
  1307. for i in range(len(get)):
  1308. tab.add(get[i], f"{i}预测热力图")
  1309. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1310. data = [f"{i}" for i in x_means]
  1311. c = Table().add(headers=heard, rows=[data])
  1312. tab.add(c, "数据表")
  1313. des_to_csv(
  1314. save_dir,
  1315. "预测表",
  1316. [[f"{i}" for i in x_means]],
  1317. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1318. )
  1319. save = save_dir + rf"{os.sep}K.HTML"
  1320. tab.render(save) # 生成HTML
  1321. return save,
  1322. @plugin_class_loading(get_path(r"template/machinelearning"))
  1323. class TreeModel(StudyMachinebase):
  1324. def __init__(
  1325. self, args_use, model, *args, **kwargs
  1326. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1327. super(TreeModel, self).__init__(*args, **kwargs)
  1328. all_model = {
  1329. "Tree_class": DecisionTreeClassifier,
  1330. "Tree": DecisionTreeRegressor,
  1331. }[model]
  1332. self.model = all_model(
  1333. criterion=args_use["criterion"],
  1334. splitter=args_use["splitter"],
  1335. max_features=args_use["max_features"],
  1336. max_depth=args_use["max_depth"],
  1337. min_samples_split=args_use["min_samples_split"],
  1338. )
  1339. # 记录这两个是为了克隆
  1340. self.criterion = args_use["criterion"]
  1341. self.splitter = args_use["splitter"]
  1342. self.max_features = args_use["max_features"]
  1343. self.max_depth = args_use["max_depth"]
  1344. self.min_samples_split = args_use["min_samples_split"]
  1345. self.k = {
  1346. "criterion": args_use["criterion"],
  1347. "splitter": args_use["splitter"],
  1348. "max_features": args_use["max_features"],
  1349. "max_depth": args_use["max_depth"],
  1350. "min_samples_split": args_use["min_samples_split"],
  1351. }
  1352. self.model_Name = model
  1353. def data_visualization(self, save_dir, *args, **kwargs):
  1354. tab = Tab()
  1355. importance = self.model.feature_importances_.tolist()
  1356. with open(save_dir + fr"{os.sep}Tree_Gra.dot", "w") as f:
  1357. export_graphviz(self.model, out_file=f)
  1358. make_bar("特征重要性", importance, tab)
  1359. des_to_csv(
  1360. save_dir,
  1361. "特征重要性",
  1362. [importance],
  1363. [f"[{i}]特征" for i in range(len(importance))],
  1364. )
  1365. tab.add(see_tree(save_dir + fr"{os.sep}Tree_Gra.dot"), "决策树可视化")
  1366. y = self.y_traindata
  1367. x_data = self.x_traindata
  1368. y_test = self.y_testdata
  1369. x_test = self.x_testdata
  1370. if self.model_Name == "Tree_class":
  1371. class_ = self.model.classes_.tolist()
  1372. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1373. get, x_means, x_range, data_type = training_visualization(
  1374. x_data, class_, y)
  1375. for i in range(len(get)):
  1376. tab.add(get[i], f"{i}训练数据散点图")
  1377. get = training_visualization(x_test, class_, y_test)[0]
  1378. for i in range(len(get)):
  1379. tab.add(get[i], f"{i}测试数据散点图")
  1380. get = decision_boundary(
  1381. x_range, x_means, self.predict, class_, data_type)
  1382. for i in range(len(get)):
  1383. tab.add(get[i], f"{i}预测热力图")
  1384. tab.add(
  1385. make_tab(
  1386. class_heard
  1387. + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1388. + [f"特征{i}重要性" for i in range(len(importance))],
  1389. [class_ + [f"{i}" for i in x_means] + importance],
  1390. ),
  1391. "数据表",
  1392. )
  1393. else:
  1394. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1395. for i in range(len(get)):
  1396. tab.add(get[i], f"{i}训练数据散点图")
  1397. get = regress_visualization(x_test, y_test)[0]
  1398. for i in range(len(get)):
  1399. tab.add(get[i], f"{i}测试数据类型图")
  1400. get = prediction_boundary(
  1401. x_range, x_means, self.predict, data_type)
  1402. for i in range(len(get)):
  1403. tab.add(get[i], f"{i}预测热力图")
  1404. tab.add(
  1405. make_tab(
  1406. [f"普适预测第{i}特征" for i in range(len(x_means))]
  1407. + [f"特征{i}重要性" for i in range(len(importance))],
  1408. [[f"{i}" for i in x_means] + importance],
  1409. ),
  1410. "数据表",
  1411. )
  1412. des_to_csv(
  1413. save_dir,
  1414. "预测表",
  1415. [[f"{i}" for i in x_means]],
  1416. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1417. )
  1418. save = save_dir + rf"{os.sep}决策树.HTML"
  1419. tab.render(save) # 生成HTML
  1420. return save,
  1421. @plugin_class_loading(get_path(r"template/machinelearning"))
  1422. class ForestModel(StudyMachinebase):
  1423. def __init__(
  1424. self, args_use, model, *args, **kwargs
  1425. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1426. super(ForestModel, self).__init__(*args, **kwargs)
  1427. model = {
  1428. "Forest_class": RandomForestClassifier,
  1429. "Forest": RandomForestRegressor,
  1430. }[model]
  1431. self.model = model(
  1432. n_estimators=args_use["n_Tree"],
  1433. criterion=args_use["criterion"],
  1434. max_features=args_use["max_features"],
  1435. max_depth=args_use["max_depth"],
  1436. min_samples_split=args_use["min_samples_split"],
  1437. )
  1438. # 记录这两个是为了克隆
  1439. self.n_estimators = args_use["n_Tree"]
  1440. self.criterion = args_use["criterion"]
  1441. self.max_features = args_use["max_features"]
  1442. self.max_depth = args_use["max_depth"]
  1443. self.min_samples_split = args_use["min_samples_split"]
  1444. self.k = {
  1445. "n_estimators": args_use["n_Tree"],
  1446. "criterion": args_use["criterion"],
  1447. "max_features": args_use["max_features"],
  1448. "max_depth": args_use["max_depth"],
  1449. "min_samples_split": args_use["min_samples_split"],
  1450. }
  1451. self.model_Name = model
  1452. def data_visualization(self, save_dir, *args, **kwargs):
  1453. tab = Tab()
  1454. # 多个决策树可视化
  1455. for i in range(len(self.model.estimators_)):
  1456. with open(save_dir + rf"{os.sep}Tree_Gra[{i}].dot", "w") as f:
  1457. export_graphviz(self.model.estimators_[i], out_file=f)
  1458. tab.add(
  1459. see_tree(
  1460. save_dir +
  1461. rf"{os.sep}Tree_Gra[{i}].dot"),
  1462. f"[{i}]决策树可视化")
  1463. y = self.y_traindata
  1464. x_data = self.x_traindata
  1465. if self.model_Name == "Forest_class":
  1466. class_ = self.model.classes_.tolist()
  1467. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1468. get, x_means, x_range, data_type = training_visualization(
  1469. x_data, class_, y)
  1470. for i in range(len(get)):
  1471. tab.add(get[i], f"{i}训练数据散点图")
  1472. get = decision_boundary(
  1473. x_range, x_means, self.predict, class_, data_type)
  1474. for i in range(len(get)):
  1475. tab.add(get[i], f"{i}预测热力图")
  1476. tab.add(
  1477. make_tab(
  1478. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1479. [class_ + [f"{i}" for i in x_means]],
  1480. ),
  1481. "数据表",
  1482. )
  1483. else:
  1484. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1485. for i in range(len(get)):
  1486. tab.add(get[i], f"{i}预测类型图")
  1487. get = prediction_boundary(
  1488. x_range, x_means, self.predict, data_type)
  1489. for i in range(len(get)):
  1490. tab.add(get[i], f"{i}预测热力图")
  1491. tab.add(
  1492. make_tab(
  1493. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1494. [[f"{i}" for i in x_means]],
  1495. ),
  1496. "数据表",
  1497. )
  1498. des_to_csv(
  1499. save_dir,
  1500. "预测表",
  1501. [[f"{i}" for i in x_means]],
  1502. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1503. )
  1504. save = save_dir + rf"{os.sep}随机森林.HTML"
  1505. tab.render(save) # 生成HTML
  1506. return save,
  1507. class GradienttreeModel(StudyMachinebase): # 继承Tree_Model主要是继承Des
  1508. def __init__(
  1509. self, args_use, model, *args, **kwargs
  1510. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1511. super(
  1512. GradienttreeModel,
  1513. self).__init__(
  1514. *args,
  1515. **kwargs) # 不需要执行Tree_Model的初始化
  1516. model = {
  1517. "GradientTree_class": GradientBoostingClassifier,
  1518. "GradientTree": GradientBoostingRegressor,
  1519. }[model]
  1520. self.model = model(
  1521. n_estimators=args_use["n_Tree"],
  1522. max_features=args_use["max_features"],
  1523. max_depth=args_use["max_depth"],
  1524. min_samples_split=args_use["min_samples_split"],
  1525. )
  1526. # 记录这两个是为了克隆
  1527. self.criterion = args_use["criterion"]
  1528. self.splitter = args_use["splitter"]
  1529. self.max_features = args_use["max_features"]
  1530. self.max_depth = args_use["max_depth"]
  1531. self.min_samples_split = args_use["min_samples_split"]
  1532. self.k = {
  1533. "criterion": args_use["criterion"],
  1534. "splitter": args_use["splitter"],
  1535. "max_features": args_use["max_features"],
  1536. "max_depth": args_use["max_depth"],
  1537. "min_samples_split": args_use["min_samples_split"],
  1538. }
  1539. self.model_Name = model
  1540. def data_visualization(self, save_dir, *args, **kwargs):
  1541. tab = Tab()
  1542. # 多个决策树可视化
  1543. for a in range(len(self.model.estimators_)):
  1544. for i in range(len(self.model.estimators_[a])):
  1545. with open(save_dir + rf"{os.sep}Tree_Gra[{a},{i}].dot", "w") as f:
  1546. export_graphviz(self.model.estimators_[a][i], out_file=f)
  1547. tab.add(
  1548. see_tree(
  1549. save_dir +
  1550. rf"{os.sep}Tree_Gra[{a},{i}].dot"),
  1551. f"[{a},{i}]决策树可视化")
  1552. y = self.y_traindata
  1553. x_data = self.x_traindata
  1554. if self.model_Name == "Tree_class":
  1555. class_ = self.model.classes_.tolist()
  1556. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1557. get, x_means, x_range, data_type = training_visualization(
  1558. x_data, class_, y)
  1559. for i in range(len(get)):
  1560. tab.add(get[i], f"{i}训练数据散点图")
  1561. get = decision_boundary(
  1562. x_range, x_means, self.predict, class_, data_type)
  1563. for i in range(len(get)):
  1564. tab.add(get[i], f"{i}预测热力图")
  1565. tab.add(
  1566. make_tab(
  1567. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1568. [class_ + [f"{i}" for i in x_means]],
  1569. ),
  1570. "数据表",
  1571. )
  1572. else:
  1573. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1574. for i in range(len(get)):
  1575. tab.add(get[i], f"{i}预测类型图")
  1576. get = prediction_boundary(
  1577. x_range, x_means, self.predict, data_type)
  1578. for i in range(len(get)):
  1579. tab.add(get[i], f"{i}预测热力图")
  1580. tab.add(
  1581. make_tab(
  1582. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1583. [[f"{i}" for i in x_means]],
  1584. ),
  1585. "数据表",
  1586. )
  1587. des_to_csv(
  1588. save_dir,
  1589. "预测表",
  1590. [[f"{i}" for i in x_means]],
  1591. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1592. )
  1593. save = save_dir + rf"{os.sep}梯度提升回归树.HTML"
  1594. tab.render(save) # 生成HTML
  1595. return save,
  1596. @plugin_class_loading(get_path(r"template/machinelearning"))
  1597. class SvcModel(StudyMachinebase):
  1598. def __init__(
  1599. self, args_use, model, *args, **kwargs
  1600. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1601. super(SvcModel, self).__init__(*args, **kwargs)
  1602. self.model = SVC(
  1603. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1604. )
  1605. # 记录这两个是为了克隆
  1606. self.C = args_use["C"]
  1607. self.gamma = args_use["gamma"]
  1608. self.kernel = args_use["kernel"]
  1609. self.k = {
  1610. "C": args_use["C"],
  1611. "gamma": args_use["gamma"],
  1612. "kernel": args_use["kernel"],
  1613. }
  1614. self.model_Name = model
  1615. def data_visualization(self, save_dir, *args, **kwargs):
  1616. tab = Tab()
  1617. try:
  1618. w_list = self.model.coef_.tolist() # 未必有这个属性
  1619. b = self.model.intercept_.tolist()
  1620. except AttributeError:
  1621. w_list = [] # 未必有这个属性
  1622. b = []
  1623. class_ = self.model.classes_.tolist()
  1624. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1625. y = self.y_traindata
  1626. x_data = self.x_traindata
  1627. get, x_means, x_range, data_type = training_visualization(
  1628. x_data, class_, y)
  1629. if w_list:
  1630. get_line: list = training_w(
  1631. x_data, class_, y, w_list, b, x_means.copy())
  1632. else:
  1633. get_line = []
  1634. for i in range(len(get)):
  1635. if get_line:
  1636. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1637. else:
  1638. tab.add(get[i], f"{i}决策边界散点图")
  1639. get = decision_boundary(
  1640. x_range,
  1641. x_means,
  1642. self.predict,
  1643. class_,
  1644. data_type)
  1645. for i in range(len(get)):
  1646. tab.add(get[i], f"{i}预测热力图")
  1647. dic = {2: "离散", 1: "连续"}
  1648. tab.add(make_tab(class_heard +
  1649. [f"普适预测第{i}特征:{dic[data_type[i]]}" for i in range(len(x_means))],
  1650. [class_ + [f"{i}" for i in x_means]],), "数据表", )
  1651. if w_list:
  1652. des_to_csv(save_dir, "系数表", w_list, [
  1653. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1654. if w_list:
  1655. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1656. des_to_csv(
  1657. save_dir,
  1658. "预测表",
  1659. [[f"{i}" for i in x_means]],
  1660. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1661. )
  1662. save = save_dir + rf"{os.sep}支持向量机分类.HTML"
  1663. tab.render(save) # 生成HTML
  1664. return save,
  1665. @plugin_class_loading(get_path(r"template/machinelearning"))
  1666. class SvrModel(StudyMachinebase):
  1667. def __init__(
  1668. self, args_use, model, *args, **kwargs
  1669. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1670. super(SvrModel, self).__init__(*args, **kwargs)
  1671. self.model = SVR(
  1672. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1673. )
  1674. # 记录这两个是为了克隆
  1675. self.C = args_use["C"]
  1676. self.gamma = args_use["gamma"]
  1677. self.kernel = args_use["kernel"]
  1678. self.k = {
  1679. "C": args_use["C"],
  1680. "gamma": args_use["gamma"],
  1681. "kernel": args_use["kernel"],
  1682. }
  1683. self.model_Name = model
  1684. def data_visualization(self, save_dir, *args, **kwargs):
  1685. tab = Tab()
  1686. x_data = self.x_traindata
  1687. y = self.y_traindata
  1688. try:
  1689. w_list = self.model.coef_.tolist() # 未必有这个属性
  1690. b = self.model.intercept_.tolist()
  1691. except AttributeError:
  1692. w_list = [] # 未必有这个属性
  1693. b = []
  1694. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1695. if w_list:
  1696. get_line = regress_w(x_data, w_list, b, x_means.copy())
  1697. else:
  1698. get_line = []
  1699. for i in range(len(get)):
  1700. if get_line:
  1701. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1702. else:
  1703. tab.add(get[i], f"{i}预测类型图")
  1704. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1705. for i in range(len(get)):
  1706. tab.add(get[i], f"{i}预测热力图")
  1707. if w_list:
  1708. des_to_csv(save_dir, "系数表", w_list, [
  1709. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1710. if w_list:
  1711. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1712. des_to_csv(
  1713. save_dir,
  1714. "预测表",
  1715. [[f"{i}" for i in x_means]],
  1716. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1717. )
  1718. tab.add(
  1719. make_tab(
  1720. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1721. [[f"{i}" for i in x_means]],
  1722. ),
  1723. "数据表",
  1724. )
  1725. save = save_dir + rf"{os.sep}支持向量机回归.HTML"
  1726. tab.render(save) # 生成HTML
  1727. return save,
  1728. class VarianceModel(Unsupervised): # 无监督
  1729. def __init__(
  1730. self, args_use, model, *args, **kwargs
  1731. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1732. super(VarianceModel, self).__init__(*args, **kwargs)
  1733. self.model = VarianceThreshold(
  1734. threshold=(args_use["P"] * (1 - args_use["P"])))
  1735. # 记录这两个是为了克隆
  1736. self.threshold = args_use["P"]
  1737. self.k = {"threshold": args_use["P"]}
  1738. self.model_Name = model
  1739. def data_visualization(self, save_dir, *args, **kwargs):
  1740. tab = Tab()
  1741. var = self.model.variances_ # 标准差
  1742. y_data = self.y_testdata
  1743. if isinstance(y_data, np.ndarray):
  1744. get = feature_visualization(self.y_testdata)
  1745. for i in range(len(get)):
  1746. tab.add(get[i], f"[{i}]数据x-x散点图")
  1747. c = (
  1748. Bar()
  1749. .add_xaxis([f"[{i}]特征" for i in range(len(var))])
  1750. .add_yaxis("标准差", var.tolist(), **label_setting)
  1751. .set_global_opts(
  1752. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1753. )
  1754. )
  1755. tab.add(c, "数据标准差")
  1756. save = save_dir + rf"{os.sep}方差特征选择.HTML"
  1757. tab.render(save) # 生成HTML
  1758. return save,
  1759. class SelectkbestModel(PrepBase): # 有监督
  1760. def __init__(self, args_use, model, *args, **kwargs):
  1761. super(SelectkbestModel, self).__init__(*args, **kwargs)
  1762. self.model = SelectKBest(
  1763. k=args_use["k"],
  1764. score_func=args_use["score_func"])
  1765. # 记录这两个是为了克隆
  1766. self.k_ = args_use["k"]
  1767. self.score_func = args_use["score_func"]
  1768. self.k = {"k": args_use["k"], "score_func": args_use["score_func"]}
  1769. self.model_Name = model
  1770. def data_visualization(self, save_dir, *args, **kwargs):
  1771. tab = Tab()
  1772. score = self.model.scores_.tolist()
  1773. support: np.ndarray = self.model.get_support()
  1774. y_data = self.y_traindata
  1775. x_data = self.x_traindata
  1776. if isinstance(x_data, np.ndarray):
  1777. get = feature_visualization(x_data)
  1778. for i in range(len(get)):
  1779. tab.add(get[i], f"[{i}]训练数据x-x散点图")
  1780. if isinstance(y_data, np.ndarray):
  1781. get = feature_visualization(y_data)
  1782. for i in range(len(get)):
  1783. tab.add(get[i], f"[{i}]保留训练数据x-x散点图")
  1784. y_data = self.y_testdata
  1785. x_data = self.x_testdata
  1786. if isinstance(x_data, np.ndarray):
  1787. get = feature_visualization(x_data)
  1788. for i in range(len(get)):
  1789. tab.add(get[i], f"[{i}]数据x-x散点图")
  1790. if isinstance(y_data, np.ndarray):
  1791. get = feature_visualization(y_data)
  1792. for i in range(len(get)):
  1793. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1794. choose = []
  1795. un_choose = []
  1796. for i in range(len(score)):
  1797. if support[i]:
  1798. choose.append(score[i])
  1799. un_choose.append(0) # 占位
  1800. else:
  1801. un_choose.append(score[i])
  1802. choose.append(0)
  1803. c = (
  1804. Bar()
  1805. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1806. .add_yaxis("选中特征", choose, **label_setting)
  1807. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1808. .set_global_opts(
  1809. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1810. )
  1811. )
  1812. tab.add(c, "单变量重要程度")
  1813. save = save_dir + rf"{os.sep}单一变量特征选择.HTML"
  1814. tab.render(save) # 生成HTML
  1815. return save,
  1816. class SelectFromModel(PrepBase): # 有监督
  1817. def __init__(
  1818. self, args_use, learner, *args, **kwargs
  1819. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1820. super(SelectFromModel, self).__init__(*args, **kwargs)
  1821. self.model = learner.Model
  1822. self.Select_Model = SelectFromModel(
  1823. estimator=learner.Model,
  1824. max_features=args_use["k"],
  1825. prefit=learner.have_Fit)
  1826. self.max_features = args_use["k"]
  1827. self.estimator = learner.Model
  1828. self.k = {
  1829. "max_features": args_use["k"],
  1830. "estimator": learner.Model,
  1831. "have_Fit": learner.have_Fit,
  1832. }
  1833. self.have_fit = learner.have_Fit
  1834. self.model_Name = "SelectFrom_Model"
  1835. self.learner = learner
  1836. def fit_model(self, x_data, y_data, split=0.3, *args, **kwargs):
  1837. y_data = y_data.ravel()
  1838. if not self.have_fit: # 不允许第二次训练
  1839. self.Select_Model.fit(x_data, y_data)
  1840. self.have_fit = True
  1841. return "None", "None"
  1842. def predict(self, x_data, *args, **kwargs):
  1843. try:
  1844. self.x_testdata = x_data.copy()
  1845. x_predict = self.Select_Model.transform(x_data)
  1846. self.y_testdata = x_predict.copy()
  1847. self.have_predict = True
  1848. return x_predict, "模型特征工程"
  1849. except BaseException as e:
  1850. logging.debug(str(e))
  1851. self.have_predict = True
  1852. return np.array([]), "无结果工程"
  1853. def data_visualization(self, save_dir, *args, **kwargs):
  1854. tab = Tab()
  1855. support: np.ndarray = self.Select_Model.get_support()
  1856. y_data = self.y_testdata
  1857. x_data = self.x_testdata
  1858. if isinstance(x_data, np.ndarray):
  1859. get = feature_visualization(x_data)
  1860. for i in range(len(get)):
  1861. tab.add(get[i], f"[{i}]数据x-x散点图")
  1862. if isinstance(y_data, np.ndarray):
  1863. get = feature_visualization(y_data)
  1864. for i in range(len(get)):
  1865. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1866. def make_bar_(score):
  1867. choose = []
  1868. un_choose = []
  1869. for i in range(len(score)):
  1870. if support[i]:
  1871. choose.append(abs(score[i]))
  1872. un_choose.append(0) # 占位
  1873. else:
  1874. un_choose.append(abs(score[i]))
  1875. choose.append(0)
  1876. c = (
  1877. Bar()
  1878. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1879. .add_yaxis("选中特征", choose, **label_setting)
  1880. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1881. .set_global_opts(
  1882. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1883. )
  1884. )
  1885. tab.add(c, "单变量重要程度")
  1886. try:
  1887. make_bar_(self.model.coef_)
  1888. except AttributeError:
  1889. try:
  1890. make_bar_(self.model.feature_importances_)
  1891. except BaseException as e:
  1892. logging.warning(str(e))
  1893. save = save_dir + rf"{os.sep}模型特征选择.HTML"
  1894. tab.render(save) # 生成HTML
  1895. return save,
  1896. class StandardizationModel(Unsupervised): # z-score标准化 无监督
  1897. def __init__(self, *args, **kwargs):
  1898. super(StandardizationModel, self).__init__(*args, **kwargs)
  1899. self.model = StandardScaler()
  1900. self.k = {}
  1901. self.model_Name = "StandardScaler"
  1902. def data_visualization(self, save_dir, *args, **kwargs):
  1903. tab = Tab()
  1904. y_data = self.y_testdata
  1905. x_data = self.x_testdata
  1906. var = self.model.var_.tolist()
  1907. means = self.model.mean_.tolist()
  1908. scale_ = self.model.scale_.tolist()
  1909. conversion_control(y_data, x_data, tab)
  1910. make_bar("标准差", var, tab)
  1911. make_bar("方差", means, tab)
  1912. make_bar("Scale", scale_, tab)
  1913. save = save_dir + rf"{os.sep}z-score标准化.HTML"
  1914. tab.render(save) # 生成HTML
  1915. return save,
  1916. class MinmaxscalerModel(Unsupervised): # 离差标准化
  1917. def __init__(self, args_use, *args, **kwargs):
  1918. super(MinmaxscalerModel, self).__init__(*args, **kwargs)
  1919. self.model = MinMaxScaler(feature_range=args_use["feature_range"])
  1920. self.k = {}
  1921. self.model_Name = "MinMaxScaler"
  1922. def data_visualization(self, save_dir, *args, **kwargs):
  1923. tab = Tab()
  1924. y_data = self.y_testdata
  1925. x_data = self.x_testdata
  1926. scale_ = self.model.scale_.tolist()
  1927. max_ = self.model.data_max_.tolist()
  1928. min_ = self.model.data_min_.tolist()
  1929. conversion_control(y_data, x_data, tab)
  1930. make_bar("Scale", scale_, tab)
  1931. tab.add(
  1932. make_tab(
  1933. heard=[f"[{i}]特征最大值" for i in range(len(max_))]
  1934. + [f"[{i}]特征最小值" for i in range(len(min_))],
  1935. row=[max_ + min_],
  1936. ),
  1937. "数据表格",
  1938. )
  1939. save = save_dir + rf"{os.sep}离差标准化.HTML"
  1940. tab.render(save) # 生成HTML
  1941. return save,
  1942. class LogscalerModel(PrepBase): # 对数标准化
  1943. def __init__(self, *args, **kwargs):
  1944. super(LogscalerModel, self).__init__(*args, **kwargs)
  1945. self.model = None
  1946. self.k = {}
  1947. self.model_Name = "LogScaler"
  1948. def fit_model(self, x_data, *args, **kwargs):
  1949. if not self.have_predict: # 不允许第二次训练
  1950. self.max_logx = np.log(x_data.max())
  1951. self.have_fit = True
  1952. return "None", "None"
  1953. def predict(self, x_data, *args, **kwargs):
  1954. try:
  1955. max_logx = self.max_logx
  1956. except AttributeError:
  1957. self.have_fit = False
  1958. self.fit_model(x_data)
  1959. max_logx = self.max_logx
  1960. self.x_testdata = x_data.copy()
  1961. x_predict = np.log(x_data) / max_logx
  1962. self.y_testdata = x_predict.copy()
  1963. self.have_predict = True
  1964. return x_predict, "对数变换"
  1965. def data_visualization(self, save_dir, *args, **kwargs):
  1966. tab = Tab()
  1967. y_data = self.y_testdata
  1968. x_data = self.x_testdata
  1969. conversion_control(y_data, x_data, tab)
  1970. tab.add(make_tab(heard=["最大对数值(自然对数)"],
  1971. row=[[str(self.max_logx)]]), "数据表格")
  1972. save = save_dir + rf"{os.sep}对数标准化.HTML"
  1973. tab.render(save) # 生成HTML
  1974. return save,
  1975. class AtanscalerModel(PrepBase): # atan标准化
  1976. def __init__(self, *args, **kwargs):
  1977. super(AtanscalerModel, self).__init__(*args, **kwargs)
  1978. self.model = None
  1979. self.k = {}
  1980. self.model_Name = "atanScaler"
  1981. def fit_model(self, x_data, *args, **kwargs):
  1982. self.have_fit = True
  1983. return "None", "None"
  1984. def predict(self, x_data, *args, **kwargs):
  1985. self.x_testdata = x_data.copy()
  1986. x_predict = np.arctan(x_data) * (2 / np.pi)
  1987. self.y_testdata = x_predict.copy()
  1988. self.have_predict = True
  1989. return x_predict, "atan变换"
  1990. def data_visualization(self, save_dir, *args, **kwargs):
  1991. tab = Tab()
  1992. y_data = self.y_testdata
  1993. x_data = self.x_testdata
  1994. conversion_control(y_data, x_data, tab)
  1995. save = save_dir + rf"{os.sep}反正切函数标准化.HTML"
  1996. tab.render(save) # 生成HTML
  1997. return save,
  1998. class DecimalscalerModel(PrepBase): # 小数定标准化
  1999. def __init__(self, *args, **kwargs):
  2000. super(DecimalscalerModel, self).__init__(*args, **kwargs)
  2001. self.model = None
  2002. self.k = {}
  2003. self.model_Name = "Decimal_normalization"
  2004. def fit_model(self, x_data, *args, **kwargs):
  2005. if not self.have_predict: # 不允许第二次训练
  2006. self.j = max([judging_digits(x_data.max()),
  2007. judging_digits(x_data.min())])
  2008. self.have_fit = True
  2009. return "None", "None"
  2010. def predict(self, x_data, *args, **kwargs):
  2011. self.x_testdata = x_data.copy()
  2012. try:
  2013. j = self.j
  2014. except AttributeError:
  2015. self.have_fit = False
  2016. self.fit_model(x_data)
  2017. j = self.j
  2018. x_predict = x_data / (10 ** j)
  2019. self.y_testdata = x_predict.copy()
  2020. self.have_predict = True
  2021. return x_predict, "小数定标标准化"
  2022. def data_visualization(self, save_dir, *args, **kwargs):
  2023. tab = Tab()
  2024. y_data = self.y_testdata
  2025. x_data = self.x_testdata
  2026. j = self.j
  2027. conversion_control(y_data, x_data, tab)
  2028. tab.add(make_tab(heard=["小数位数:j"], row=[[j]]), "数据表格")
  2029. save = save_dir + rf"{os.sep}小数定标标准化.HTML"
  2030. tab.render(save) # 生成HTML
  2031. return save,
  2032. class MapzoomModel(PrepBase): # 映射标准化
  2033. def __init__(self, args_use, *args, **kwargs):
  2034. super(MapzoomModel, self).__init__(*args, **kwargs)
  2035. self.model = None
  2036. self.feature_range = args_use["feature_range"]
  2037. self.k = {}
  2038. self.model_Name = "Decimal_normalization"
  2039. def fit_model(self, x_data, *args, **kwargs):
  2040. if not self.have_predict: # 不允许第二次训练
  2041. self.max_ = x_data.max()
  2042. self.min_ = x_data.min()
  2043. self.have_fit = True
  2044. return "None", "None"
  2045. def predict(self, x_data, *args, **kwargs):
  2046. self.x_testdata = x_data.copy()
  2047. try:
  2048. max_ = self.max_
  2049. min_ = self.min_
  2050. except AttributeError:
  2051. self.have_fit = False
  2052. self.fit_model(x_data)
  2053. max_ = self.max_
  2054. min_ = self.min_
  2055. x_predict = (x_data * (self.feature_range[1] - self.feature_range[0])) / (
  2056. max_ - min_
  2057. )
  2058. self.y_testdata = x_predict.copy()
  2059. self.have_predict = True
  2060. return x_predict, "映射标准化"
  2061. def data_visualization(self, save_dir, *args, **kwargs):
  2062. tab = Tab()
  2063. y_data = self.y_testdata
  2064. x_data = self.x_testdata
  2065. max_ = self.max_
  2066. min_ = self.min_
  2067. conversion_control(y_data, x_data, tab)
  2068. tab.add(make_tab(heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  2069. save = save_dir + rf"{os.sep}映射标准化.HTML"
  2070. tab.render(save) # 生成HTML
  2071. return save,
  2072. class SigmodscalerModel(PrepBase): # sigmod变换
  2073. def __init__(self, *args, **kwargs):
  2074. super(SigmodscalerModel, self).__init__(*args, **kwargs)
  2075. self.model = None
  2076. self.k = {}
  2077. self.model_Name = "sigmodScaler_Model"
  2078. def fit_model(self, x_data, *args, **kwargs):
  2079. self.have_fit = True
  2080. return "None", "None"
  2081. def predict(self, x_data: np.array, *args, **kwargs):
  2082. self.x_testdata = x_data.copy()
  2083. x_predict = 1 / (1 + np.exp(-x_data))
  2084. self.y_testdata = x_predict.copy()
  2085. self.have_predict = True
  2086. return x_predict, "Sigmod变换"
  2087. def data_visualization(self, save_dir, *args, **kwargs):
  2088. tab = Tab()
  2089. y_data = self.y_testdata
  2090. x_data = self.x_testdata
  2091. conversion_control(y_data, x_data, tab)
  2092. save = save_dir + rf"{os.sep}Sigmoid变换.HTML"
  2093. tab.render(save) # 生成HTML
  2094. return save,
  2095. class FuzzyQuantizationModel(PrepBase): # 模糊量化标准化
  2096. def __init__(self, args_use, *args, **kwargs):
  2097. super(FuzzyQuantizationModel, self).__init__(*args, **kwargs)
  2098. self.model = None
  2099. self.feature_range = args_use["feature_range"]
  2100. self.k = {}
  2101. self.model_Name = "Fuzzy_quantization"
  2102. def fit_model(self, x_data, *args, **kwargs):
  2103. if not self.have_predict: # 不允许第二次训练
  2104. self.max_ = x_data.max()
  2105. self.max_ = x_data.min()
  2106. self.have_fit = True
  2107. return "None", "None"
  2108. def predict(self, x_data, *args, **kwargs):
  2109. self.x_testdata = x_data.copy()
  2110. try:
  2111. max_ = self.max_
  2112. min_ = self.max_
  2113. except AttributeError:
  2114. self.have_fit = False
  2115. self.fit_model(x_data)
  2116. max_ = self.max_
  2117. min_ = self.max_
  2118. x_predict = 1 / 2 + (1 / 2) * np.sin(
  2119. np.pi / (max_ - min_) * (x_data - (max_ - min_) / 2)
  2120. )
  2121. self.y_testdata = x_predict.copy()
  2122. self.have_predict = True
  2123. return x_predict, "模糊量化标准化"
  2124. def data_visualization(self, save_dir, *args, **kwargs):
  2125. tab = Tab()
  2126. y_data = self.y_traindata
  2127. x_data = self.x_traindata
  2128. max_ = self.max_
  2129. min_ = self.max_
  2130. conversion_control(y_data, x_data, tab)
  2131. tab.add(make_tab(heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  2132. save = save_dir + rf"{os.sep}模糊量化标准化.HTML"
  2133. tab.render(save) # 生成HTML
  2134. return save,
  2135. class RegularizationModel(Unsupervised): # 正则化
  2136. def __init__(self, args_use, *args, **kwargs):
  2137. super(RegularizationModel, self).__init__(*args, **kwargs)
  2138. self.model = Normalizer(norm=args_use["norm"])
  2139. self.k = {"norm": args_use["norm"]}
  2140. self.model_Name = "Regularization"
  2141. def data_visualization(self, save_dir, *args, **kwargs):
  2142. tab = Tab()
  2143. y_data = self.y_testdata.copy()
  2144. x_data = self.x_testdata.copy()
  2145. conversion_control(y_data, x_data, tab)
  2146. save = save_dir + rf"{os.sep}正则化.HTML"
  2147. tab.render(save) # 生成HTML
  2148. return save,
  2149. # 离散数据
  2150. class BinarizerModel(Unsupervised): # 二值化
  2151. def __init__(self, args_use, *args, **kwargs):
  2152. super(BinarizerModel, self).__init__(*args, **kwargs)
  2153. self.model = Binarizer(threshold=args_use["threshold"])
  2154. self.k = {}
  2155. self.model_Name = "Binarizer"
  2156. def data_visualization(self, save_dir, *args, **kwargs):
  2157. tab = Tab()
  2158. y_data = self.y_testdata
  2159. x_data = self.x_testdata
  2160. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2161. for i in range(len(get_y)):
  2162. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2163. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2164. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2165. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2166. tab.add(
  2167. make_tab(
  2168. heard, np.dstack(
  2169. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2170. save = save_dir + rf"{os.sep}二值离散化.HTML"
  2171. tab.render(save) # 生成HTML
  2172. return save,
  2173. class DiscretizationModel(PrepBase): # n值离散
  2174. def __init__(self, args_use, *args, **kwargs):
  2175. super(DiscretizationModel, self).__init__(*args, **kwargs)
  2176. self.model = None
  2177. range_ = args_use["split_range"]
  2178. if not range_:
  2179. raise Exception
  2180. elif len(range_) == 1:
  2181. range_.append(range_[0])
  2182. self.range = range_
  2183. self.k = {}
  2184. self.model_Name = "Discretization"
  2185. def fit_model(self, *args, **kwargs):
  2186. # t值在模型创建时已经保存
  2187. self.have_fit = True
  2188. return "None", "None"
  2189. def predict(self, x_data, *args, **kwargs):
  2190. self.x_testdata = x_data.copy()
  2191. x_predict = x_data.copy() # 复制
  2192. range_ = self.range
  2193. bool_list = []
  2194. max_ = len(range_) - 1
  2195. o_t = None
  2196. for i in range(len(range_)):
  2197. try:
  2198. t = float(range_[i])
  2199. except ValueError:
  2200. continue
  2201. if o_t is None: # 第一个参数
  2202. bool_list.append(x_predict <= t)
  2203. else:
  2204. bool_list.append((o_t <= x_predict) == (x_predict < t))
  2205. if i == max_:
  2206. bool_list.append(t <= x_predict)
  2207. o_t = t
  2208. for i in range(len(bool_list)):
  2209. x_predict[bool_list[i]] = i
  2210. self.y_testdata = x_predict.copy()
  2211. self.have_predict = True
  2212. return x_predict, f"{len(bool_list)}值离散化"
  2213. def data_visualization(self, save_dir, *args, **kwargs):
  2214. tab = Tab()
  2215. y_data = self.y_testdata
  2216. x_data = self.x_testdata
  2217. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2218. for i in range(len(get_y)):
  2219. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2220. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2221. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2222. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2223. tab.add(
  2224. make_tab(
  2225. heard, np.dstack(
  2226. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2227. save = save_dir + rf"{os.sep}多值离散化.HTML"
  2228. tab.render(save) # 生成HTML
  2229. return save,
  2230. class LabelModel(PrepBase): # 数字编码
  2231. def __init__(self, *args, **kwargs):
  2232. super(LabelModel, self).__init__(*args, **kwargs)
  2233. self.model = []
  2234. self.k = {}
  2235. self.model_Name = "LabelEncoder"
  2236. def fit_model(self, x_data, *args, **kwargs):
  2237. if not self.have_predict: # 不允许第二次训练
  2238. self.model = []
  2239. if x_data.ndim == 1:
  2240. x_data = np.array([x_data])
  2241. for i in range(x_data.shape[1]):
  2242. self.model.append(
  2243. LabelEncoder().fit(np.ravel(x_data[:, i]))
  2244. ) # 训练机器(每个特征一个学习器)
  2245. self.have_fit = True
  2246. return "None", "None"
  2247. def predict(self, x_data, *args, **kwargs):
  2248. self.x_testdata = x_data.copy()
  2249. x_predict = x_data.copy()
  2250. if x_data.ndim == 1:
  2251. x_data = np.array([x_data])
  2252. for i in range(x_data.shape[1]):
  2253. x_predict[:, i] = self.model[i].transform(x_data[:, i])
  2254. self.y_testdata = x_predict.copy()
  2255. self.have_predict = True
  2256. return x_predict, "数字编码"
  2257. def data_visualization(self, save_dir, *args, **kwargs):
  2258. tab = Tab()
  2259. x_data = self.x_testdata
  2260. y_data = self.y_testdata
  2261. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2262. for i in range(len(get_y)):
  2263. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2264. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2265. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2266. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2267. tab.add(
  2268. make_tab(
  2269. heard, np.dstack(
  2270. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2271. save = save_dir + rf"{os.sep}数字编码.HTML"
  2272. tab.render(save) # 生成HTML
  2273. return save,
  2274. class OneHotEncoderModel(PrepBase): # 独热编码
  2275. def __init__(self, args_use, *args, **kwargs):
  2276. super(OneHotEncoderModel, self).__init__(*args, **kwargs)
  2277. self.model = []
  2278. self.ndim_up = args_use["ndim_up"]
  2279. self.k = {}
  2280. self.model_Name = "OneHotEncoder"
  2281. self.OneHot_Data = None # 三维独热编码
  2282. def fit_model(self, x_data, *args, **kwargs):
  2283. if not self.have_predict: # 不允许第二次训练
  2284. if x_data.ndim == 1:
  2285. x_data = [x_data]
  2286. for i in range(x_data.shape[1]):
  2287. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2288. self.model.append(OneHotEncoder().fit(data)) # 训练机器
  2289. self.have_fit = True
  2290. return "None", "None"
  2291. def predict(self, x_data, *args, **kwargs):
  2292. self.x_testdata = x_data.copy()
  2293. x_new = []
  2294. for i in range(x_data.shape[1]):
  2295. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2296. one_hot = self.model[i].transform(data).toarray().tolist()
  2297. x_new.append(one_hot) # 添加到列表中
  2298. # 新列表的行数据是原data列数据的独热码(只需要ndim=2,暂时没想到numpy的做法)
  2299. x_new = np.array(x_new)
  2300. x_predict = []
  2301. for i in range(x_new.shape[1]):
  2302. x_predict.append(x_new[:, i])
  2303. x_predict = np.array(x_predict) # 转换回array
  2304. self.OneHot_Data = x_predict.copy() # 保存未降维数据
  2305. if not self.ndim_up: # 压缩操作
  2306. new_x_predict = []
  2307. for i in x_predict:
  2308. new_list = []
  2309. list_ = i.tolist()
  2310. for a in list_:
  2311. new_list += a
  2312. new = np.array(new_list)
  2313. new_x_predict.append(new)
  2314. self.y_testdata = np.array(new_x_predict)
  2315. return self.y_testdata.copy(), "独热编码"
  2316. self.y_testdata = self.OneHot_Data
  2317. self.have_predict = True
  2318. return x_predict, "独热编码"
  2319. def data_visualization(self, save_dir, *args, **kwargs):
  2320. tab = Tab()
  2321. y_data = self.y_testdata
  2322. x_data = self.x_testdata
  2323. oh_data = self.OneHot_Data
  2324. if not self.ndim_up:
  2325. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2326. for i in range(len(get_y)):
  2327. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2328. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2329. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2330. tab.add(make_tab(heard, oh_data.tolist()), f"编码数据")
  2331. tab.add(
  2332. make_tab(
  2333. heard, np.dstack(
  2334. (oh_data, x_data)).tolist()), f"合成[原数据,编码]数据")
  2335. tab.add(make_tab([f"编码:{i}" for i in range(
  2336. len(y_data[0]))], y_data.tolist()), f"数据")
  2337. save = save_dir + rf"{os.sep}独热编码.HTML"
  2338. tab.render(save) # 生成HTML
  2339. return save,
  2340. class MissedModel(Unsupervised): # 缺失数据补充
  2341. def __init__(self, args_use, *args, **kwargs):
  2342. super(MissedModel, self).__init__(*args, **kwargs)
  2343. self.model = SimpleImputer(
  2344. missing_values=args_use["miss_value"],
  2345. strategy=args_use["fill_method"],
  2346. fill_value=args_use["fill_value"],
  2347. )
  2348. self.k = {}
  2349. self.model_Name = "Missed"
  2350. def predict(self, x_data, *args, **kwargs):
  2351. self.x_testdata = x_data.copy()
  2352. x_predict = self.model.transform(x_data)
  2353. self.y_testdata = x_predict.copy()
  2354. self.have_predict = True
  2355. return x_predict, "填充缺失"
  2356. def data_visualization(self, save_dir, *args, **kwargs):
  2357. tab = Tab()
  2358. y_data = self.y_testdata
  2359. x_data = self.x_testdata
  2360. statistics = self.model.statistics_.tolist()
  2361. conversion_control(y_data, x_data, tab)
  2362. tab.add(make_tab([f"特征[{i}]" for i in range(
  2363. len(statistics))], [statistics]), "填充值")
  2364. save = save_dir + rf"{os.sep}缺失数据填充.HTML"
  2365. tab.render(save) # 生成HTML
  2366. return save,
  2367. @plugin_class_loading(get_path(r"template/machinelearning"))
  2368. class PcaModel(Unsupervised):
  2369. def __init__(self, args_use, *args, **kwargs):
  2370. super(PcaModel, self).__init__(*args, **kwargs)
  2371. self.model = PCA(
  2372. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2373. )
  2374. self.whiten = args_use["white_PCA"]
  2375. self.n_components = args_use["n_components"]
  2376. self.k = {
  2377. "n_components": args_use["n_components"],
  2378. "whiten": args_use["white_PCA"],
  2379. }
  2380. self.model_Name = "PCA"
  2381. def predict(self, x_data, *args, **kwargs):
  2382. self.x_testdata = x_data.copy()
  2383. x_predict = self.model.transform(x_data)
  2384. self.y_testdata = x_predict.copy()
  2385. self.have_predict = True
  2386. return x_predict, "PCA"
  2387. def data_visualization(self, save_dir, *args, **kwargs):
  2388. tab = Tab()
  2389. y_data = self.y_testdata
  2390. importance = self.model.components_.tolist()
  2391. var = self.model.explained_variance_.tolist() # 方量差
  2392. conversion_separate_format(y_data, tab)
  2393. x_data = [f"第{i+1}主成分" for i in range(len(importance))] # 主成分
  2394. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2395. value = [
  2396. (f"第{i+1}主成分", f"特征[{j}]", importance[i][j])
  2397. for i in range(len(importance))
  2398. for j in range(len(importance[i]))
  2399. ]
  2400. c = (
  2401. HeatMap()
  2402. .add_xaxis(x_data)
  2403. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2404. .set_global_opts(
  2405. title_opts=opts.TitleOpts(title="预测热力图"),
  2406. **global_not_legend,
  2407. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2408. xaxis_opts=opts.AxisOpts(is_scale=True),
  2409. visualmap_opts=opts.VisualMapOpts(
  2410. is_show=True,
  2411. max_=int(self.model.components_.max()) + 1,
  2412. min_=int(self.model.components_.min()),
  2413. pos_right="3%",
  2414. ),
  2415. ) # 显示
  2416. )
  2417. tab.add(c, "成分热力图")
  2418. c = (
  2419. Bar()
  2420. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2421. .add_yaxis("方量差", var, **label_setting)
  2422. .set_global_opts(
  2423. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2424. )
  2425. )
  2426. des_to_csv(save_dir, "成分重要性", importance, [x_data], [y_data])
  2427. des_to_csv(
  2428. save_dir, "方量差", [var], [
  2429. f"第[{i}]主成分" for i in range(
  2430. len(var))])
  2431. tab.add(c, "方量差柱状图")
  2432. save = save_dir + rf"{os.sep}主成分分析.HTML"
  2433. tab.render(save) # 生成HTML
  2434. return save,
  2435. @plugin_class_loading(get_path(r"template/machinelearning"))
  2436. class RpcaModel(Unsupervised):
  2437. def __init__(self, args_use, *args, **kwargs):
  2438. super(RpcaModel, self).__init__(*args, **kwargs)
  2439. self.model = IncrementalPCA(
  2440. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2441. )
  2442. self.n_components = args_use["n_components"]
  2443. self.whiten = args_use["white_PCA"]
  2444. self.k = {
  2445. "n_components": args_use["n_components"],
  2446. "whiten": args_use["white_PCA"],
  2447. }
  2448. self.model_Name = "RPCA"
  2449. def predict(self, x_data, *args, **kwargs):
  2450. self.x_testdata = x_data.copy()
  2451. x_predict = self.model.transform(x_data)
  2452. self.y_testdata = x_predict.copy()
  2453. self.have_predict = True
  2454. return x_predict, "RPCA"
  2455. def data_visualization(self, save_dir, *args, **kwargs):
  2456. tab = Tab()
  2457. y_data = self.y_traindata
  2458. importance = self.model.components_.tolist()
  2459. var = self.model.explained_variance_.tolist() # 方量差
  2460. conversion_separate_format(y_data, tab)
  2461. x_data = [f"第{i + 1}主成分" for i in range(len(importance))] # 主成分
  2462. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2463. value = [
  2464. (f"第{i + 1}主成分", f"特征[{j}]", importance[i][j])
  2465. for i in range(len(importance))
  2466. for j in range(len(importance[i]))
  2467. ]
  2468. c = (
  2469. HeatMap()
  2470. .add_xaxis(x_data)
  2471. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2472. .set_global_opts(
  2473. title_opts=opts.TitleOpts(title="预测热力图"),
  2474. **global_not_legend,
  2475. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2476. xaxis_opts=opts.AxisOpts(is_scale=True),
  2477. visualmap_opts=opts.VisualMapOpts(
  2478. is_show=True,
  2479. max_=int(self.model.components_.max()) + 1,
  2480. min_=int(self.model.components_.min()),
  2481. pos_right="3%",
  2482. ),
  2483. ) # 显示
  2484. )
  2485. tab.add(c, "成分热力图")
  2486. c = (
  2487. Bar()
  2488. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2489. .add_yaxis("放量差", var, **label_setting)
  2490. .set_global_opts(
  2491. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2492. )
  2493. )
  2494. tab.add(c, "方量差柱状图")
  2495. des_to_csv(save_dir, "成分重要性", importance, [x_data], [y_data])
  2496. des_to_csv(
  2497. save_dir, "方量差", [var], [
  2498. f"第[{i}]主成分" for i in range(
  2499. len(var))])
  2500. save = save_dir + rf"{os.sep}RPCA(主成分分析).HTML"
  2501. tab.render(save) # 生成HTML
  2502. return save,
  2503. @plugin_class_loading(get_path(r"template/machinelearning"))
  2504. class KpcaModel(Unsupervised):
  2505. def __init__(self, args_use, *args, **kwargs):
  2506. super(KpcaModel, self).__init__(*args, **kwargs)
  2507. self.model = KernelPCA(
  2508. n_components=args_use["n_components"], kernel=args_use["kernel"]
  2509. )
  2510. self.n_components = args_use["n_components"]
  2511. self.kernel = args_use["kernel"]
  2512. self.k = {
  2513. "n_components": args_use["n_components"],
  2514. "kernel": args_use["kernel"],
  2515. }
  2516. self.model_Name = "KPCA"
  2517. def predict(self, x_data, *args, **kwargs):
  2518. self.x_testdata = x_data.copy()
  2519. x_predict = self.model.transform(x_data)
  2520. self.y_testdata = x_predict.copy()
  2521. self.have_predict = True
  2522. return x_predict, "KPCA"
  2523. def data_visualization(self, save_dir, *args, **kwargs):
  2524. tab = Tab()
  2525. y_data = self.y_testdata
  2526. conversion_separate_format(y_data, tab)
  2527. save = save_dir + rf"{os.sep}KPCA(主成分分析).HTML"
  2528. tab.render(save) # 生成HTML
  2529. return save,
  2530. class LdaModel(PrepBase): # 有监督学习
  2531. def __init__(self, args_use, *args, **kwargs):
  2532. super(LdaModel, self).__init__(*args, **kwargs)
  2533. self.model = Lda(n_components=args_use["n_components"])
  2534. self.n_components = args_use["n_components"]
  2535. self.k = {"n_components": args_use["n_components"]}
  2536. self.model_Name = "LDA"
  2537. def predict(self, x_data, *args, **kwargs):
  2538. self.x_testdata = x_data.copy()
  2539. x_predict = self.model.transform(x_data)
  2540. self.y_testdata = x_predict.copy()
  2541. self.have_predict = True
  2542. return x_predict, "LDA"
  2543. def data_visualization(self, save_dir, *args, **kwargs):
  2544. tab = Tab()
  2545. x_data = self.x_testdata
  2546. y_data = self.y_testdata
  2547. conversion_separate_format(y_data, tab)
  2548. w_list = self.model.coef_.tolist() # 变为表格
  2549. b = self.model.intercept_
  2550. tab = Tab()
  2551. x_means = quick_stats(x_data).get()[0]
  2552. # 回归的y是历史遗留问题 不用分类回归:因为得不到分类数据(predict结果是降维数据不是预测数据)
  2553. get = regress_w(x_data, w_list, b, x_means.copy())
  2554. for i in range(len(get)):
  2555. tab.add(get[i].overlap(get[i]), f"类别:{i}LDA映射曲线")
  2556. save = save_dir + rf"{os.sep}render.HTML"
  2557. tab.render(save) # 生成HTML
  2558. return save,
  2559. @plugin_class_loading(get_path(r"template/machinelearning"))
  2560. class NmfModel(Unsupervised):
  2561. def __init__(self, args_use, *args, **kwargs):
  2562. super(NmfModel, self).__init__(*args, **kwargs)
  2563. self.model = NMF(n_components=args_use["n_components"])
  2564. self.n_components = args_use["n_components"]
  2565. self.k = {"n_components": args_use["n_components"]}
  2566. self.model_Name = "NFM"
  2567. self.h_testdata = None
  2568. # x_traindata保存的是W,h_traindata和y_traindata是后来数据
  2569. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  2570. self.x_testdata = x_data.copy()
  2571. x_predict = self.model.transform(x_data)
  2572. self.y_testdata = x_predict.copy()
  2573. self.h_testdata = self.model.components_
  2574. if add_func is not None and x_name != "":
  2575. add_func(self.h_testdata, f"{x_name}:V->NMF[H]")
  2576. self.have_predict = True
  2577. return x_predict, "V->NMF[W]"
  2578. def data_visualization(self, save_dir, *args, **kwargs):
  2579. tab = Tab()
  2580. y_data = self.y_testdata
  2581. x_data = self.x_testdata
  2582. h_data = self.h_testdata
  2583. conversion_separate_wh(y_data, h_data, tab)
  2584. wh_data = np.matmul(y_data, h_data)
  2585. difference_data = x_data - wh_data
  2586. def make_heat_map(data, name, data_max, data_min):
  2587. x = [f"数据[{i}]" for i in range(len(data))] # 主成分
  2588. y = [f"特征[{i}]" for i in range(len(data[0]))] # 主成分
  2589. value = [
  2590. (f"数据[{i}]", f"特征[{j}]", float(data[i][j]))
  2591. for i in range(len(data))
  2592. for j in range(len(data[i]))
  2593. ]
  2594. c = (
  2595. HeatMap()
  2596. .add_xaxis(x)
  2597. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2598. .set_global_opts(
  2599. title_opts=opts.TitleOpts(title="原始数据热力图"),
  2600. **global_not_legend,
  2601. yaxis_opts=opts.AxisOpts(
  2602. is_scale=True, type_="category"
  2603. ), # 'category'
  2604. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2605. visualmap_opts=opts.VisualMapOpts(
  2606. is_show=True, max_=data_max, min_=data_min, pos_right="3%"
  2607. ),
  2608. ) # 显示
  2609. )
  2610. tab.add(c, name)
  2611. max_ = (max(int(x_data.max()), int(wh_data.max()),
  2612. int(difference_data.max())) + 1)
  2613. min_ = min(int(x_data.min()), int(wh_data.min()),
  2614. int(difference_data.min()))
  2615. make_heat_map(x_data, "原始数据热力图", max_, min_)
  2616. make_heat_map(wh_data, "W * H数据热力图", max_, min_)
  2617. make_heat_map(difference_data, "数据差热力图", max_, min_)
  2618. des_to_csv(save_dir, "权重矩阵", y_data)
  2619. des_to_csv(save_dir, "系数矩阵", h_data)
  2620. des_to_csv(save_dir, "系数*权重矩阵", wh_data)
  2621. save = save_dir + rf"{os.sep}非负矩阵分解.HTML"
  2622. tab.render(save) # 生成HTML
  2623. return save,
  2624. @plugin_class_loading(get_path(r"template/machinelearning"))
  2625. class TsneModel(Unsupervised):
  2626. def __init__(self, args_use, *args, **kwargs):
  2627. super(TsneModel, self).__init__(*args, **kwargs)
  2628. self.model = TSNE(n_components=args_use["n_components"])
  2629. self.n_components = args_use["n_components"]
  2630. self.k = {"n_components": args_use["n_components"]}
  2631. self.model_Name = "t-SNE"
  2632. def fit_model(self, *args, **kwargs):
  2633. self.have_fit = True
  2634. return "None", "None"
  2635. def predict(self, x_data, *args, **kwargs):
  2636. self.x_testdata = x_data.copy()
  2637. x_predict = self.model.fit_transform(x_data)
  2638. self.y_testdata = x_predict.copy()
  2639. self.have_predict = True
  2640. return x_predict, "SNE"
  2641. def data_visualization(self, save_dir, *args, **kwargs):
  2642. tab = Tab()
  2643. y_data = self.y_testdata
  2644. conversion_separate_format(y_data, tab)
  2645. save = save_dir + rf"{os.sep}T-SNE.HTML"
  2646. tab.render(save) # 生成HTML
  2647. return save,
  2648. class MlpModel(StudyMachinebase): # 神经网络(多层感知机),有监督学习
  2649. def __init__(self, args_use, model, *args, **kwargs):
  2650. super(MlpModel, self).__init__(*args, **kwargs)
  2651. all_model = {"MLP": MLPRegressor, "MLP_class": MLPClassifier}[model]
  2652. self.model = all_model(
  2653. hidden_layer_sizes=args_use["hidden_size"],
  2654. activation=args_use["activation"],
  2655. solver=args_use["solver"],
  2656. alpha=args_use["alpha"],
  2657. max_iter=args_use["max_iter"],
  2658. )
  2659. # 记录这两个是为了克隆
  2660. self.hidden_layer_sizes = args_use["hidden_size"]
  2661. self.activation = args_use["activation"]
  2662. self.max_iter = args_use["max_iter"]
  2663. self.solver = args_use["solver"]
  2664. self.alpha = args_use["alpha"]
  2665. self.k = {
  2666. "hidden_layer_sizes": args_use["hidden_size"],
  2667. "activation": args_use["activation"],
  2668. "max_iter": args_use["max_iter"],
  2669. "solver": args_use["solver"],
  2670. "alpha": args_use["alpha"],
  2671. }
  2672. self.model_Name = model
  2673. def data_visualization(self, save_dir, *args, **kwargs):
  2674. tab = Tab()
  2675. x_data = self.x_testdata
  2676. y_data = self.y_testdata
  2677. coefs = self.model.coefs_
  2678. class_ = self.model.classes_
  2679. n_layers_ = self.model.n_layers_
  2680. def make_heat_map(data_, name):
  2681. x = [f"特征(节点)[{i}]" for i in range(len(data_))]
  2682. y = [f"节点[{i}]" for i in range(len(data_[0]))]
  2683. value = [
  2684. (f"特征(节点)[{i}]", f"节点[{j}]", float(data_[i][j]))
  2685. for i in range(len(data_))
  2686. for j in range(len(data_[i]))
  2687. ]
  2688. c = (
  2689. HeatMap()
  2690. .add_xaxis(x)
  2691. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2692. .set_global_opts(
  2693. title_opts=opts.TitleOpts(title=name),
  2694. **global_not_legend,
  2695. yaxis_opts=opts.AxisOpts(
  2696. is_scale=True, type_="category"
  2697. ), # 'category'
  2698. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2699. visualmap_opts=opts.VisualMapOpts(
  2700. is_show=True,
  2701. max_=float(data_.max()),
  2702. min_=float(data_.min()),
  2703. pos_right="3%",
  2704. ),
  2705. ) # 显示
  2706. )
  2707. tab.add(c, name)
  2708. tab.add(make_tab(x, data_.transpose().tolist()), f"{name}:表格")
  2709. des_to_csv(save_dir, f"{name}:表格", data_.transpose().tolist(), x, y)
  2710. get, x_means, x_range, data_type = regress_visualization(
  2711. x_data, y_data)
  2712. for i in range(len(get)):
  2713. tab.add(get[i], f"{i}训练数据散点图")
  2714. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  2715. for i in range(len(get)):
  2716. tab.add(get[i], f"{i}预测热力图")
  2717. heard = ["神经网络层数"]
  2718. data = [n_layers_]
  2719. for i in range(len(coefs)):
  2720. make_heat_map(coefs[i], f"{i}层权重矩阵")
  2721. heard.append(f"第{i}层节点数")
  2722. data.append(len(coefs[i][0]))
  2723. if self.model_Name == "MLP_class":
  2724. heard += [f"[{i}]类型" for i in range(len(class_))]
  2725. data += class_.tolist()
  2726. tab.add(make_tab(heard, [data]), "数据表")
  2727. save = save_dir + rf"{os.sep}多层感知机.HTML"
  2728. tab.render(save) # 生成HTML
  2729. return save,
  2730. @plugin_class_loading(get_path(r"template/machinelearning"))
  2731. class KmeansModel(UnsupervisedModel):
  2732. def __init__(self, args_use, *args, **kwargs):
  2733. super(KmeansModel, self).__init__(*args, **kwargs)
  2734. self.model = KMeans(n_clusters=args_use["n_clusters"])
  2735. self.class_ = []
  2736. self.n_clusters = args_use["n_clusters"]
  2737. self.k = {"n_clusters": args_use["n_clusters"]}
  2738. self.model_Name = "k-means"
  2739. def fit_model(self, x_data, *args, **kwargs):
  2740. return_ = super().fit_model(x_data, *args, **kwargs)
  2741. self.class_ = list(set(self.model.labels_.tolist()))
  2742. self.have_fit = True
  2743. return return_
  2744. def predict(self, x_data, *args, **kwargs):
  2745. self.x_testdata = x_data.copy()
  2746. y_predict = self.model.predict(x_data)
  2747. self.y_testdata = y_predict.copy()
  2748. self.have_predict = True
  2749. return y_predict, "k-means"
  2750. def data_visualization(self, save_dir, *args, **kwargs):
  2751. tab = Tab()
  2752. y = self.y_testdata
  2753. x_data = self.x_testdata
  2754. class_ = self.class_
  2755. center = self.model.cluster_centers_
  2756. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2757. func = (
  2758. training_visualization_more
  2759. if more_global
  2760. else training_visualization_center
  2761. )
  2762. get, x_means, x_range, data_type = func(x_data, class_, y, center)
  2763. for i in range(len(get)):
  2764. tab.add(get[i], f"{i}数据散点图")
  2765. get = decision_boundary(
  2766. x_range,
  2767. x_means,
  2768. self.predict,
  2769. class_,
  2770. data_type)
  2771. for i in range(len(get)):
  2772. tab.add(get[i], f"{i}预测热力图")
  2773. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2774. data = class_ + [f"{i}" for i in x_means]
  2775. c = Table().add(headers=heard, rows=[data])
  2776. tab.add(c, "数据表")
  2777. des_to_csv(
  2778. save_dir,
  2779. "预测表",
  2780. [[f"{i}" for i in x_means]],
  2781. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2782. )
  2783. save = save_dir + rf"{os.sep}k-means聚类.HTML"
  2784. tab.render(save) # 生成HTML
  2785. return save,
  2786. @plugin_class_loading(get_path(r"template/machinelearning"))
  2787. class AgglomerativeModel(UnsupervisedModel):
  2788. def __init__(self, args_use, *args, **kwargs):
  2789. super(AgglomerativeModel, self).__init__(*args, **kwargs)
  2790. self.model = AgglomerativeClustering(
  2791. n_clusters=args_use["n_clusters"]
  2792. ) # 默认为2,不同于k-means
  2793. self.class_ = []
  2794. self.n_clusters = args_use["n_clusters"]
  2795. self.k = {"n_clusters": args_use["n_clusters"]}
  2796. self.model_Name = "Agglomerative"
  2797. def fit_model(self, x_data, *args, **kwargs):
  2798. return_ = super().fit_model(x_data, *args, **kwargs)
  2799. self.class_ = list(set(self.model.labels_.tolist()))
  2800. self.have_fit = True
  2801. return return_
  2802. def predict(self, x_data, *args, **kwargs):
  2803. self.x_testdata = x_data.copy()
  2804. y_predict = self.model.fit_predict(x_data)
  2805. self.y_traindata = y_predict.copy()
  2806. self.have_predict = True
  2807. return y_predict, "Agglomerative"
  2808. def data_visualization(self, save_dir, *args, **kwargs):
  2809. tab = Tab()
  2810. y = self.y_testdata
  2811. x_data = self.x_testdata
  2812. class_ = self.class_
  2813. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2814. func = (
  2815. training_visualization_more_no_center
  2816. if more_global
  2817. else training_visualization
  2818. )
  2819. get, x_means, x_range, data_type = func(x_data, class_, y)
  2820. for i in range(len(get)):
  2821. tab.add(get[i], f"{i}训练数据散点图")
  2822. get = decision_boundary(
  2823. x_range,
  2824. x_means,
  2825. self.predict,
  2826. class_,
  2827. data_type)
  2828. for i in range(len(get)):
  2829. tab.add(get[i], f"{i}预测热力图")
  2830. linkage_array = ward(self.x_traindata) # self.y_traindata是结果
  2831. dendrogram(linkage_array)
  2832. plt.savefig(save_dir + rf"{os.sep}Cluster_graph.png")
  2833. image = Image()
  2834. image.add(src=save_dir + rf"{os.sep}Cluster_graph.png",).set_global_opts(
  2835. title_opts=opts.ComponentTitleOpts(title="聚类树状图")
  2836. )
  2837. tab.add(image, "聚类树状图")
  2838. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2839. data = class_ + [f"{i}" for i in x_means]
  2840. c = Table().add(headers=heard, rows=[data])
  2841. tab.add(c, "数据表")
  2842. des_to_csv(
  2843. save_dir,
  2844. "预测表",
  2845. [[f"{i}" for i in x_means]],
  2846. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2847. )
  2848. save = save_dir + rf"{os.sep}层次聚类.HTML"
  2849. tab.render(save) # 生成HTML
  2850. return save,
  2851. @plugin_class_loading(get_path(r"template/machinelearning"))
  2852. class DbscanModel(UnsupervisedModel):
  2853. def __init__(self, args_use, *args, **kwargs):
  2854. super(DbscanModel, self).__init__(*args, **kwargs)
  2855. self.model = DBSCAN(
  2856. eps=args_use["eps"],
  2857. min_samples=args_use["min_samples"])
  2858. # eps是距离(0.5),min_samples(5)是簇与噪音分界线(每个簇最小元素数)
  2859. # min_samples
  2860. self.eps = args_use["eps"]
  2861. self.min_samples = args_use["min_samples"]
  2862. self.k = {
  2863. "min_samples": args_use["min_samples"],
  2864. "eps": args_use["eps"]}
  2865. self.class_ = []
  2866. self.model_Name = "DBSCAN"
  2867. def fit_model(self, x_data, *args, **kwargs):
  2868. return_ = super().fit_model(x_data, *args, **kwargs)
  2869. self.class_ = list(set(self.model.labels_.tolist()))
  2870. self.have_fit = True
  2871. return return_
  2872. def predict(self, x_data, *args, **kwargs):
  2873. self.x_testdata = x_data.copy()
  2874. y_predict = self.model.fit_predict(x_data)
  2875. self.y_testdata = y_predict.copy()
  2876. self.have_predict = True
  2877. return y_predict, "DBSCAN"
  2878. def data_visualization(self, save_dir, *args, **kwargs):
  2879. # DBSCAN没有预测的必要
  2880. tab = Tab()
  2881. y = self.y_testdata.copy()
  2882. x_data = self.x_testdata.copy()
  2883. class_ = self.class_
  2884. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2885. func = (
  2886. training_visualization_more_no_center
  2887. if more_global
  2888. else training_visualization
  2889. )
  2890. get, x_means, x_range, data_type = func(x_data, class_, y)
  2891. for i in range(len(get)):
  2892. tab.add(get[i], f"{i}训练数据散点图")
  2893. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2894. data = class_ + [f"{i}" for i in x_means]
  2895. c = Table().add(headers=heard, rows=[data])
  2896. tab.add(c, "数据表")
  2897. des_to_csv(
  2898. save_dir,
  2899. "预测表",
  2900. [[f"{i}" for i in x_means]],
  2901. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2902. )
  2903. save = save_dir + rf"{os.sep}密度聚类.HTML"
  2904. tab.render(save) # 生成HTML
  2905. return save,
  2906. class FastFourier(StudyMachinebase): # 快速傅里叶变换
  2907. def __init__(self, *args, **kwargs):
  2908. super(FastFourier, self).__init__(*args, **kwargs)
  2909. self.model = None
  2910. self.fourier = None # fft复数
  2911. self.frequency = None # 频率range
  2912. self.angular_Frequency = None # 角频率range
  2913. self.phase = None # 相位range
  2914. self.breadth = None # 震幅range
  2915. self.sample_size = None # 样本数
  2916. def fit_model(self, y_data, *args, **kwargs):
  2917. y_data = y_data.ravel() # 扯平为一维数组
  2918. try:
  2919. assert not self.y_traindata is None
  2920. self.y_traindata = np.hstack((y_data, self.x_traindata))
  2921. except (AssertionError, ValueError):
  2922. self.y_traindata = y_data.copy()
  2923. fourier = fft(y_data)
  2924. self.sample_size = len(y_data)
  2925. self.frequency = np.linspace(0, 1, self.sample_size) # 频率N_range
  2926. self.angular_Frequency = self.frequency / (np.pi * 2) # 角频率w
  2927. self.phase = np.angle(fourier)
  2928. self.breadth = np.abs(fourier)
  2929. self.fourier = fourier
  2930. self.have_fit = True
  2931. return "None", "None"
  2932. def predict(self, x_data, *args, **kwargs):
  2933. return np.array([]), ""
  2934. def data_visualization(self, save_dir, *args, **kwargs):
  2935. # DBSCAN没有预测的必要
  2936. tab = Tab()
  2937. y = self.y_traindata.copy()
  2938. n = self.sample_size
  2939. phase = self.phase # 相位range
  2940. breadth = self.breadth # 震幅range
  2941. normalization_breadth = breadth / n
  2942. def line(name, value, s=slice(0, None)) -> Line:
  2943. c = (
  2944. Line()
  2945. .add_xaxis(self.frequency[s].tolist())
  2946. .add_yaxis(
  2947. "",
  2948. value,
  2949. **label_setting,
  2950. symbol="none" if self.sample_size >= 500 else None,
  2951. )
  2952. .set_global_opts(
  2953. title_opts=opts.TitleOpts(title=name),
  2954. **global_not_legend,
  2955. xaxis_opts=opts.AxisOpts(type_="value"),
  2956. yaxis_opts=opts.AxisOpts(type_="value"),
  2957. )
  2958. )
  2959. return c
  2960. tab.add(line("原始数据", y.tolist()), "原始数据")
  2961. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  2962. tab.add(
  2963. line(
  2964. "双边振幅谱(归一化)",
  2965. normalization_breadth.tolist()),
  2966. "双边振幅谱(归一化)")
  2967. tab.add(
  2968. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2969. )
  2970. tab.add(
  2971. line(
  2972. "单边相位谱(归一化)",
  2973. normalization_breadth[: int(n / 2)].tolist(),
  2974. slice(0, int(n / 2)),
  2975. ),
  2976. "单边相位谱(归一化)",
  2977. )
  2978. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  2979. tab.add(
  2980. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2981. )
  2982. tab.add(make_tab(self.frequency.tolist(), [breadth.tolist()]), "双边振幅谱")
  2983. tab.add(make_tab(self.frequency.tolist(), [phase.tolist()]), "双边相位谱")
  2984. tab.add(
  2985. make_tab(
  2986. self.frequency.tolist(), [
  2987. self.fourier.tolist()]), "快速傅里叶变换")
  2988. save = save_dir + rf"{os.sep}快速傅里叶.HTML"
  2989. tab.render(save) # 生成HTML
  2990. return save,
  2991. class ReverseFastFourier(StudyMachinebase): # 快速傅里叶变换
  2992. def __init__(self, *args, **kwargs):
  2993. super(ReverseFastFourier, self).__init__(*args, **kwargs)
  2994. self.model = None
  2995. self.sample_size = None
  2996. self.y_testdata_real = None
  2997. self.phase = None
  2998. self.breadth = None
  2999. def fit_model(self, y_data, *args, **kwargs):
  3000. return "None", "None"
  3001. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  3002. self.x_testdata = x_data.ravel().astype(np.complex_)
  3003. fourier = ifft(self.x_testdata)
  3004. self.y_testdata = fourier.copy()
  3005. self.y_testdata_real = np.real(fourier)
  3006. self.sample_size = len(self.y_testdata_real)
  3007. self.phase = np.angle(self.x_testdata)
  3008. self.breadth = np.abs(self.x_testdata)
  3009. add_func(self.y_testdata_real.copy(), f"{x_name}:逆向快速傅里叶变换[实数]")
  3010. return fourier, "逆向快速傅里叶变换"
  3011. def data_visualization(self, save_dir, *args, **kwargs):
  3012. # DBSCAN没有预测的必要
  3013. tab = Tab()
  3014. y = self.y_testdata_real.copy()
  3015. y_data = self.y_testdata.copy()
  3016. n = self.sample_size
  3017. range_n: list = np.linspace(0, 1, n).tolist()
  3018. phase = self.phase # 相位range
  3019. breadth = self.breadth # 震幅range
  3020. def line(name, value, s=slice(0, None)) -> Line:
  3021. c = (
  3022. Line() .add_xaxis(
  3023. range_n[s]) .add_yaxis(
  3024. "",
  3025. value,
  3026. **label_setting,
  3027. symbol="none" if n >= 500 else None) .set_global_opts(
  3028. title_opts=opts.TitleOpts(
  3029. title=name),
  3030. **global_not_legend,
  3031. xaxis_opts=opts.AxisOpts(
  3032. type_="value"),
  3033. yaxis_opts=opts.AxisOpts(
  3034. type_="value"),
  3035. ))
  3036. return c
  3037. tab.add(line("逆向傅里叶变换", y.tolist()), "逆向傅里叶变换[实数]")
  3038. tab.add(make_tab(range_n, [y_data.tolist()]), "逆向傅里叶变换数据")
  3039. tab.add(make_tab(range_n, [y.tolist()]), "逆向傅里叶变换数据[实数]")
  3040. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  3041. tab.add(
  3042. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3043. )
  3044. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  3045. tab.add(
  3046. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3047. )
  3048. save = save_dir + rf"{os.sep}快速傅里叶.HTML"
  3049. tab.render(save) # 生成HTML
  3050. return save,
  3051. class ReverseFastFourierTwonumpy(ReverseFastFourier): # 2快速傅里叶变换
  3052. def fit_model(
  3053. self,
  3054. x_data,
  3055. y_data=None,
  3056. x_name="",
  3057. add_func=None,
  3058. *args,
  3059. **kwargs):
  3060. r = np.multiply(np.cos(x_data), y_data)
  3061. j = np.multiply(np.sin(x_data), y_data) * 1j
  3062. super(ReverseFastFourierTwonumpy, self).predict(
  3063. r + j, x_name=x_name, add_func=add_func, *args, **kwargs
  3064. )
  3065. return "None", "None"
  3066. class CurveFitting(StudyMachinebase): # 曲线拟合
  3067. def __init__(self, name, str_, model, *args, **kwargs):
  3068. super(CurveFitting, self).__init__(*args, **kwargs)
  3069. def ndim_down(data: np.ndarray):
  3070. if data.ndim == 1:
  3071. return data
  3072. new_data = []
  3073. for i in data:
  3074. new_data.append(np.sum(i))
  3075. return np.array(new_data)
  3076. named_domain = {"np": np, "Func": model, "ndimDown": ndim_down}
  3077. protection_func = f"""
  3078. @plugin_func_loading(get_path(r'template/machinelearning'))
  3079. def FUNC({",".join(model.__code__.co_varnames)}):
  3080. answer = Func({",".join(model.__code__.co_varnames)})
  3081. return ndimDown(answer)
  3082. """
  3083. exec(protection_func, named_domain)
  3084. self.func = named_domain["FUNC"]
  3085. self.fit_data = None
  3086. self.name = name
  3087. self.func_str = str_
  3088. def fit_model(
  3089. self,
  3090. x_data: np.ndarray,
  3091. y_data: np.ndarray,
  3092. *args,
  3093. **kwargs):
  3094. y_data = y_data.ravel()
  3095. x_data = x_data.astype(np.float64)
  3096. try:
  3097. assert not self.x_traindata is None
  3098. self.x_traindata = np.vstack((x_data, self.x_traindata))
  3099. self.y_traindata = np.vstack((y_data, self.y_traindata))
  3100. except (AssertionError, ValueError):
  3101. self.x_traindata = x_data.copy()
  3102. self.y_traindata = y_data.copy()
  3103. self.fit_data = optimize.curve_fit(
  3104. self.func, self.x_traindata, self.y_traindata
  3105. )
  3106. self.model = self.fit_data[0].copy()
  3107. return "None", "None"
  3108. def predict(self, x_data, *args, **kwargs):
  3109. self.x_testdata = x_data.copy()
  3110. predict = self.func(x_data, *self.model)
  3111. y_predict = []
  3112. for i in predict:
  3113. y_predict.append(np.sum(i))
  3114. y_predict = np.array(y_predict)
  3115. self.y_testdata = y_predict.copy()
  3116. self.have_predict = True
  3117. return y_predict, self.name
  3118. def data_visualization(self, save_dir, *args, **kwargs):
  3119. # DBSCAN没有预测的必要
  3120. tab = Tab()
  3121. y = self.y_testdata.copy()
  3122. x_data = self.x_testdata.copy()
  3123. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  3124. for i in range(len(get)):
  3125. tab.add(get[i], f"{i}预测类型图")
  3126. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  3127. for i in range(len(get)):
  3128. tab.add(get[i], f"{i}预测热力图")
  3129. tab.add(
  3130. make_tab(
  3131. [f"普适预测第{i}特征" for i in range(len(x_means))],
  3132. [[f"{i}" for i in x_means]],
  3133. ),
  3134. "普适预测特征数据",
  3135. )
  3136. tab.add(
  3137. make_tab(
  3138. [f"参数[{i}]" for i in range(len(self.model))],
  3139. [[f"{i}" for i in self.model]],
  3140. ),
  3141. "拟合参数",
  3142. )
  3143. save = save_dir + rf"{os.sep}曲线拟合.HTML"
  3144. tab.render(save) # 生成HTML
  3145. return save,
  3146. @plugin_class_loading(get_path(r"template/machinelearning"))
  3147. class Tab(tab_First):
  3148. def __init__(self, *args, **kwargs):
  3149. super(Tab, self).__init__(*args, **kwargs)
  3150. self.element = {} # 记录tab组成元素 name:charts
  3151. def add(self, chart, tab_name):
  3152. self.element[tab_name] = chart
  3153. return super(Tab, self).add(chart, tab_name)
  3154. def render(
  3155. self,
  3156. path: str = "render.html",
  3157. template_name: str = "simple_tab.html",
  3158. *args,
  3159. **kwargs,
  3160. ) -> str:
  3161. if all_global:
  3162. render_dir = path_split(path)[0]
  3163. for i in self.element:
  3164. self.element[i].render(render_dir + os.sep + i + ".html")
  3165. return super(Tab, self).render(path, template_name, *args, **kwargs)
  3166. @plugin_class_loading(get_path(r"template/machinelearning"))
  3167. class Table(TableFisrt):
  3168. def __init__(self, *args, **kwargs):
  3169. super(Table, self).__init__(*args, **kwargs)
  3170. self.HEADERS = []
  3171. self.ROWS = [[]]
  3172. def add(self, headers, rows, attributes=None):
  3173. if len(rows) == 1:
  3174. new_headers = ["数据类型", "数据"]
  3175. new_rows = list(zip(headers, rows[0]))
  3176. self.HEADERS = new_headers
  3177. self.ROWS = new_rows
  3178. return super().add(new_headers, new_rows, attributes)
  3179. else:
  3180. self.HEADERS = headers
  3181. self.ROWS = rows
  3182. return super().add(headers, rows, attributes)
  3183. def render(self, path="render.html", *args, **kwargs,) -> str:
  3184. if csv_global:
  3185. save_dir, name = path_split(path)
  3186. name = splitext(name)[0]
  3187. try:
  3188. DataFrame(self.ROWS, columns=self.HEADERS).to_csv(
  3189. save_dir + os.sep + name + ".csv"
  3190. )
  3191. except BaseException as e:
  3192. logging.warning(str(e))
  3193. return super().render(path, *args, **kwargs)
  3194. @plugin_func_loading(get_path(r"template/machinelearning"))
  3195. def make_list(first, end, num=35):
  3196. n = num / (end - first)
  3197. if n == 0:
  3198. n = 1
  3199. return_ = []
  3200. n_first = first * n
  3201. n_end = end * n
  3202. while n_first <= n_end:
  3203. cul = n_first / n
  3204. return_.append(round(cul, 2))
  3205. n_first += 1
  3206. return return_
  3207. @plugin_func_loading(get_path(r"template/machinelearning"))
  3208. def list_filter(original_list, num=70):
  3209. if len(original_list) <= num:
  3210. return original_list
  3211. n = int(num / len(original_list))
  3212. return_ = original_list[::n]
  3213. return return_
  3214. @plugin_func_loading(get_path(r"template/machinelearning"))
  3215. def prediction_boundary(x_range, x_means, predict_func, data_type): # 绘制回归型x-x热力图
  3216. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3217. # a-特征x,b-特征x-1,c-其他特征
  3218. render_list = []
  3219. if len(x_means) == 1:
  3220. return render_list
  3221. for i in range(len(x_means)):
  3222. for j in range(len(x_means)):
  3223. if j <= i:
  3224. continue
  3225. a_range = x_range[j]
  3226. a_type = data_type[j]
  3227. b_range = x_range[i]
  3228. b_type = data_type[i]
  3229. if a_type == 1:
  3230. a_list = make_list(a_range[0], a_range[1], 70)
  3231. else:
  3232. a_list = list_filter(a_range) # 可以接受最大为70
  3233. if b_type == 1:
  3234. b_list = make_list(b_range[0], b_range[1], 35)
  3235. else:
  3236. b_list = list_filter(b_range) # 可以接受最大为70
  3237. a = np.array([i for i in a_list for _ in b_list]).T
  3238. b = np.array([i for _ in a_list for i in b_list]).T
  3239. data = np.array([x_means for _ in a_list for i in b_list])
  3240. data[:, j] = a
  3241. data[:, i] = b
  3242. y_data = predict_func(data)[0].tolist()
  3243. value = [[float(a[i]), float(b[i]), y_data[i]]
  3244. for i in range(len(a))]
  3245. c = (
  3246. HeatMap()
  3247. .add_xaxis(np.unique(a))
  3248. # value的第一个数值是x
  3249. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3250. .set_global_opts(
  3251. title_opts=opts.TitleOpts(title="预测热力图"),
  3252. **global_not_legend,
  3253. yaxis_opts=opts.AxisOpts(
  3254. is_scale=True, type_="category"
  3255. ), # 'category'
  3256. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3257. visualmap_opts=opts.VisualMapOpts(
  3258. is_show=True,
  3259. max_=int(max(y_data)) + 1,
  3260. min_=int(min(y_data)),
  3261. pos_right="3%",
  3262. ),
  3263. ) # 显示
  3264. )
  3265. render_list.append(c)
  3266. return render_list
  3267. @plugin_func_loading(get_path(r"template/machinelearning"))
  3268. def prediction_boundary_more(x_range, x_means, predict_func, data_type):
  3269. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3270. # a-特征x,b-特征x-1,c-其他特征
  3271. render_list = []
  3272. if len(x_means) == 1:
  3273. return render_list
  3274. for i in range(len(x_means)):
  3275. if i == 0:
  3276. continue
  3277. a_range = x_range[i - 1]
  3278. a_type = data_type[i - 1]
  3279. b_range = x_range[i]
  3280. b_type = data_type[i]
  3281. if a_type == 1:
  3282. a_list = make_list(a_range[0], a_range[1], 70)
  3283. else:
  3284. a_list = list_filter(a_range) # 可以接受最大为70
  3285. if b_type == 1:
  3286. b_list = make_list(b_range[0], b_range[1], 35)
  3287. else:
  3288. b_list = list_filter(b_range) # 可以接受最大为70
  3289. a = np.array([i for i in a_list for _ in b_list]).T
  3290. b = np.array([i for _ in a_list for i in b_list]).T
  3291. data = np.array([x_means for _ in a_list for i in b_list])
  3292. data[:, i - 1] = a
  3293. data[:, i] = b
  3294. y_data = predict_func(data)[0].tolist()
  3295. value = [[float(a[i]), float(b[i]), y_data[i]] for i in range(len(a))]
  3296. c = (
  3297. HeatMap()
  3298. .add_xaxis(np.unique(a))
  3299. # value的第一个数值是x
  3300. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3301. .set_global_opts(
  3302. title_opts=opts.TitleOpts(title="预测热力图"),
  3303. **global_not_legend,
  3304. yaxis_opts=opts.AxisOpts(
  3305. is_scale=True, type_="category"), # 'category'
  3306. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3307. visualmap_opts=opts.VisualMapOpts(
  3308. is_show=True,
  3309. max_=int(max(y_data)) + 1,
  3310. min_=int(min(y_data)),
  3311. pos_right="3%",
  3312. ),
  3313. ) # 显示
  3314. )
  3315. render_list.append(c)
  3316. return render_list
  3317. def decision_boundary(
  3318. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3319. ): # 绘制分类型预测图x-x热力图
  3320. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3321. # a-特征x,b-特征x-1,c-其他特征
  3322. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3323. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3324. if not no_unknow:
  3325. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3326. else:
  3327. map_dict = []
  3328. for i in class_dict:
  3329. map_dict.append(
  3330. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3331. )
  3332. render_list = []
  3333. if len(x_means) == 1:
  3334. a_range = x_range[0]
  3335. if data_type[0] == 1:
  3336. a_list = make_list(a_range[0], a_range[1], 70)
  3337. else:
  3338. a_list = a_range
  3339. a = np.array([i for i in a_list]).reshape(-1, 1)
  3340. y_data = predict_func(a)[0].tolist()
  3341. value = [[0, float(a[i]), class_dict.get(y_data[i], -1)]
  3342. for i in range(len(a))]
  3343. c = (
  3344. HeatMap()
  3345. .add_xaxis(["None"])
  3346. # value的第一个数值是x
  3347. .add_yaxis(f"数据", np.unique(a), value, **label_setting)
  3348. .set_global_opts(
  3349. title_opts=opts.TitleOpts(title="预测热力图"),
  3350. **global_not_legend,
  3351. yaxis_opts=opts.AxisOpts(
  3352. is_scale=True, type_="category"), # 'category'
  3353. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3354. visualmap_opts=opts.VisualMapOpts(
  3355. is_show=True,
  3356. max_=max(class_dict.values()),
  3357. min_=-1,
  3358. is_piecewise=True,
  3359. pieces=map_dict,
  3360. orient="horizontal",
  3361. pos_bottom="3%",
  3362. ),
  3363. )
  3364. )
  3365. render_list.append(c)
  3366. return render_list
  3367. # 如果x_means长度不等于1则执行下面
  3368. for i in range(len(x_means)):
  3369. if i == 0:
  3370. continue
  3371. a_range = x_range[i - 1]
  3372. a_type = data_type[i - 1]
  3373. b_range = x_range[i]
  3374. b_type = data_type[i]
  3375. if a_type == 1:
  3376. a_list = make_list(a_range[0], a_range[1], 70)
  3377. else:
  3378. a_list = a_range
  3379. if b_type == 1:
  3380. rb = make_list(b_range[0], b_range[1], 35)
  3381. else:
  3382. rb = b_range
  3383. a = np.array([i for i in a_list for _ in rb]).T
  3384. b = np.array([i for _ in a_list for i in rb]).T
  3385. data = np.array([x_means for _ in a_list for i in rb])
  3386. data[:, i - 1] = a
  3387. data[:, i] = b
  3388. y_data = predict_func(data)[0].tolist()
  3389. value = [
  3390. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3391. for i in range(len(a))
  3392. ]
  3393. c = (
  3394. HeatMap()
  3395. .add_xaxis(np.unique(a))
  3396. # value的第一个数值是x
  3397. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3398. .set_global_opts(
  3399. title_opts=opts.TitleOpts(title="预测热力图"),
  3400. **global_not_legend,
  3401. yaxis_opts=opts.AxisOpts(
  3402. is_scale=True, type_="category"), # 'category'
  3403. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3404. visualmap_opts=opts.VisualMapOpts(
  3405. is_show=True,
  3406. max_=max(class_dict.values()),
  3407. min_=-1,
  3408. is_piecewise=True,
  3409. pieces=map_dict,
  3410. orient="horizontal",
  3411. pos_bottom="3%",
  3412. ),
  3413. )
  3414. )
  3415. render_list.append(c)
  3416. return render_list
  3417. def decision_boundary_more(
  3418. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3419. ):
  3420. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3421. # a-特征x,b-特征x-1,c-其他特征
  3422. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3423. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3424. if not no_unknow:
  3425. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3426. else:
  3427. map_dict = []
  3428. for i in class_dict:
  3429. map_dict.append(
  3430. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3431. )
  3432. render_list = []
  3433. if len(x_means) == 1:
  3434. return decision_boundary(
  3435. x_range, x_means, predict_func, class_list, data_type, no_unknow
  3436. )
  3437. # 如果x_means长度不等于1则执行下面
  3438. for i in range(len(x_means)):
  3439. for j in range(len(x_means)):
  3440. if j <= i:
  3441. continue
  3442. a_range = x_range[j]
  3443. a_type = data_type[j]
  3444. b_range = x_range[i]
  3445. b_type = data_type[i]
  3446. if a_type == 1:
  3447. a_range = make_list(a_range[0], a_range[1], 70)
  3448. else:
  3449. a_range = a_range
  3450. if b_type == 1:
  3451. b_range = make_list(b_range[0], b_range[1], 35)
  3452. else:
  3453. b_range = b_range
  3454. a = np.array([i for i in a_range for _ in b_range]).T
  3455. b = np.array([i for _ in a_range for i in b_range]).T
  3456. data = np.array([x_means for _ in a_range for i in b_range])
  3457. data[:, j] = a
  3458. data[:, i] = b
  3459. y_data = predict_func(data)[0].tolist()
  3460. value = [
  3461. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3462. for i in range(len(a))
  3463. ]
  3464. c = (
  3465. HeatMap()
  3466. .add_xaxis(np.unique(a))
  3467. # value的第一个数值是x
  3468. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3469. .set_global_opts(
  3470. title_opts=opts.TitleOpts(title="预测热力图"),
  3471. **global_not_legend,
  3472. yaxis_opts=opts.AxisOpts(
  3473. is_scale=True, type_="category"
  3474. ), # 'category'
  3475. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3476. visualmap_opts=opts.VisualMapOpts(
  3477. is_show=True,
  3478. max_=max(class_dict.values()),
  3479. min_=-1,
  3480. is_piecewise=True,
  3481. pieces=map_dict,
  3482. orient="horizontal",
  3483. pos_bottom="3%",
  3484. ),
  3485. )
  3486. )
  3487. render_list.append(c)
  3488. return render_list
  3489. @plugin_func_loading(get_path(r"template/machinelearning"))
  3490. def see_tree(tree_file_dir):
  3491. node_regex = re.compile(r'^([0-9]+) \[label="(.+)"\] ;$') # 匹配节点正则表达式
  3492. link_regex = re.compile("^([0-9]+) -> ([0-9]+) (.*);$") # 匹配节点正则表达式
  3493. node_dict = {}
  3494. link_list = []
  3495. with open(tree_file_dir, "r") as f: # 貌似必须分开w和r
  3496. for i in f:
  3497. try:
  3498. regex_result = re.findall(node_regex, i)[0]
  3499. if regex_result[0] != "":
  3500. try:
  3501. v = float(regex_result[0])
  3502. except ValueError:
  3503. v = 0
  3504. node_dict[regex_result[0]] = {
  3505. "name": regex_result[1].replace("\\n", "\n"),
  3506. "value": v,
  3507. "children": [],
  3508. }
  3509. continue
  3510. except BaseException as e:
  3511. logging.warning(str(e))
  3512. try:
  3513. regex_result = re.findall(link_regex, i)[0]
  3514. if regex_result[0] != "" and regex_result[1] != "":
  3515. link_list.append((regex_result[0], regex_result[1]))
  3516. except BaseException as e:
  3517. logging.warning(str(e))
  3518. father_list = [] # 已经有父亲的list
  3519. for i in link_list:
  3520. father = i[0] # 父节点
  3521. son = i[1] # 子节点
  3522. try:
  3523. node_dict[father]["children"].append(node_dict[son])
  3524. father_list.append(son)
  3525. except BaseException as e:
  3526. logging.warning(str(e))
  3527. father = list(set(node_dict.keys()) - set(father_list))
  3528. c = (
  3529. Tree()
  3530. .add("", [node_dict[father[0]]], is_roam=True)
  3531. .set_global_opts(
  3532. title_opts=opts.TitleOpts(title="决策树可视化"),
  3533. toolbox_opts=opts.ToolboxOpts(is_show=True),
  3534. )
  3535. )
  3536. return c
  3537. @plugin_func_loading(get_path(r"template/machinelearning"))
  3538. def make_tab(heard, row):
  3539. return Table().add(headers=heard, rows=row)
  3540. @plugin_func_loading(get_path(r"template/machinelearning"))
  3541. def coefficient_scatter_plot(w_heard, w):
  3542. c = (
  3543. Scatter() .add_xaxis(w_heard) .add_yaxis(
  3544. "", w, **label_setting) .set_global_opts(
  3545. title_opts=opts.TitleOpts(
  3546. title="系数w散点图"), **global_setting))
  3547. return c
  3548. @plugin_func_loading(get_path(r"template/machinelearning"))
  3549. def coefficient_bar_plot(w_heard, w):
  3550. c = (
  3551. Bar() .add_xaxis(w_heard) .add_yaxis(
  3552. "",
  3553. abs(w).tolist(),
  3554. **label_setting) .set_global_opts(
  3555. title_opts=opts.TitleOpts(
  3556. title="系数w柱状图"),
  3557. **global_setting))
  3558. return c
  3559. @plugin_func_loading(get_path(r"template/machinelearning"))
  3560. def is_continuous(data: np.array, f: float = 0.1):
  3561. data = data.tolist()
  3562. l: list = np.unique(data).tolist()
  3563. return len(l) / len(data) >= f or len(data) <= 3
  3564. @plugin_func_loading(get_path(r"template/machinelearning"))
  3565. def quick_stats(x_data):
  3566. statistics_assistant = CategoricalData()
  3567. print(x_data)
  3568. for i in range(len(x_data)):
  3569. x1 = x_data[i] # x坐标
  3570. statistics_assistant(x1)
  3571. return statistics_assistant
  3572. @plugin_func_loading(get_path(r"template/machinelearning"))
  3573. def training_visualization_more_no_center(x_data, class_list, y_data):
  3574. x_data = x_data.transpose()
  3575. if len(x_data) == 1:
  3576. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3577. statistics_assistant = quick_stats(x_data)
  3578. render_list = []
  3579. for i in range(len(x_data)):
  3580. for a in range(len(x_data)):
  3581. if a <= i:
  3582. continue
  3583. x1 = x_data[i] # x坐标
  3584. x1_is_continuous = is_continuous(x1)
  3585. x2 = x_data[a] # y坐标
  3586. x2_is_continuous = is_continuous(x2)
  3587. base_render = None # 旧的C
  3588. for class_num in range(len(class_list)):
  3589. now_class = class_list[class_num]
  3590. plot_x1 = x1[y_data == now_class].tolist()
  3591. plot_x2 = x2[y_data == now_class]
  3592. axis_x2 = np.unique(plot_x2)
  3593. plot_x2 = x2[y_data == now_class].tolist()
  3594. # x与散点图不同,这里是纵坐标
  3595. c = (
  3596. Scatter()
  3597. .add_xaxis(plot_x2)
  3598. .add_yaxis(f"{now_class}", plot_x1, **label_setting)
  3599. .set_global_opts(
  3600. title_opts=opts.TitleOpts(title=f"[{a}-{i}]训练数据散点图"),
  3601. **global_setting,
  3602. yaxis_opts=opts.AxisOpts(
  3603. type_="value" if x1_is_continuous else "category",
  3604. is_scale=True,
  3605. ),
  3606. xaxis_opts=opts.AxisOpts(
  3607. type_="value" if x2_is_continuous else "category",
  3608. is_scale=True,
  3609. ),
  3610. )
  3611. )
  3612. c.add_xaxis(axis_x2)
  3613. if base_render is None:
  3614. base_render = c
  3615. else:
  3616. base_render = base_render.overlap(c)
  3617. render_list.append(base_render)
  3618. means, x_range, data_type = statistics_assistant.get()
  3619. return render_list, means, x_range, data_type
  3620. @plugin_func_loading(get_path(r"template/machinelearning"))
  3621. def training_visualization_more(x_data, class_list, y_data, center):
  3622. x_data = x_data.transpose()
  3623. if len(x_data) == 1:
  3624. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3625. statistics_assistant = quick_stats(x_data)
  3626. render_list = []
  3627. for i in range(len(x_data)):
  3628. for a in range(len(x_data)):
  3629. if a <= i:
  3630. continue
  3631. x1 = x_data[i] # x坐标
  3632. x1_is_continuous = is_continuous(x1)
  3633. x2 = x_data[a] # y坐标
  3634. x2_is_continuous = is_continuous(x2)
  3635. base_render = None # 旧的C
  3636. for class_num in range(len(class_list)):
  3637. now_class = class_list[class_num]
  3638. plot_x1 = x1[y_data == now_class].tolist()
  3639. plot_x2 = x2[y_data == now_class]
  3640. axis_x2 = np.unique(plot_x2)
  3641. plot_x2 = x2[y_data == now_class].tolist()
  3642. # x与散点图不同,这里是纵坐标
  3643. c = (
  3644. Scatter()
  3645. .add_xaxis(plot_x2)
  3646. .add_yaxis(f"{now_class}", plot_x1, **label_setting)
  3647. .set_global_opts(
  3648. title_opts=opts.TitleOpts(title=f"[{a}-{i}]训练数据散点图"),
  3649. **global_setting,
  3650. yaxis_opts=opts.AxisOpts(
  3651. type_="value" if x1_is_continuous else "category",
  3652. is_scale=True,
  3653. ),
  3654. xaxis_opts=opts.AxisOpts(
  3655. type_="value" if x2_is_continuous else "category",
  3656. is_scale=True,
  3657. ),
  3658. )
  3659. )
  3660. c.add_xaxis(axis_x2)
  3661. # 添加簇中心
  3662. try:
  3663. center_x2 = [center[class_num][a]]
  3664. except IndexError:
  3665. center_x2 = [0]
  3666. b = (
  3667. Scatter()
  3668. .add_xaxis(center_x2)
  3669. .add_yaxis(
  3670. f"[{now_class}]中心",
  3671. [center[class_num][i]],
  3672. **label_setting,
  3673. symbol="triangle",
  3674. )
  3675. .set_global_opts(
  3676. title_opts=opts.TitleOpts(title="簇中心"),
  3677. **global_setting,
  3678. yaxis_opts=opts.AxisOpts(
  3679. type_="value" if x1_is_continuous else "category",
  3680. is_scale=True,
  3681. ),
  3682. xaxis_opts=opts.AxisOpts(
  3683. type_="value" if x2_is_continuous else "category",
  3684. is_scale=True,
  3685. ),
  3686. )
  3687. )
  3688. c.overlap(b)
  3689. if base_render is None:
  3690. base_render = c
  3691. else:
  3692. base_render = base_render.overlap(c)
  3693. render_list.append(base_render)
  3694. means, x_range, data_type = statistics_assistant.get()
  3695. return render_list, means, x_range, data_type
  3696. @plugin_func_loading(get_path(r"template/machinelearning"))
  3697. def training_visualization_center(x_data, class_data, y_data, center):
  3698. x_data = x_data.transpose()
  3699. if len(x_data) == 1:
  3700. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3701. statistics_assistant = quick_stats(x_data)
  3702. render_list = []
  3703. for i in range(len(x_data)):
  3704. if i == 0:
  3705. continue
  3706. x1 = x_data[i] # x坐标
  3707. x1_is_continuous = is_continuous(x1)
  3708. x2 = x_data[i - 1] # y坐标
  3709. x2_is_continuous = is_continuous(x2)
  3710. base_render = None # 旧的C
  3711. for class_num in range(len(class_data)):
  3712. n_class = class_data[class_num]
  3713. x_1 = x1[y_data == n_class].tolist()
  3714. x_2 = x2[y_data == n_class]
  3715. x_2_new = np.unique(x_2)
  3716. x_2 = x2[y_data == n_class].tolist()
  3717. # x与散点图不同,这里是纵坐标
  3718. c = (
  3719. Scatter() .add_xaxis(x_2) .add_yaxis(
  3720. f"{n_class}",
  3721. x_1,
  3722. **label_setting) .set_global_opts(
  3723. title_opts=opts.TitleOpts(
  3724. title=f"[{i-1}-{i}]训练数据散点图"),
  3725. **global_setting,
  3726. yaxis_opts=opts.AxisOpts(
  3727. type_="value" if x1_is_continuous else "category",
  3728. is_scale=True),
  3729. xaxis_opts=opts.AxisOpts(
  3730. type_="value" if x2_is_continuous else "category",
  3731. is_scale=True),
  3732. ))
  3733. c.add_xaxis(x_2_new)
  3734. # 添加簇中心
  3735. try:
  3736. center_x_2 = [center[class_num][i - 1]]
  3737. except IndexError:
  3738. center_x_2 = [0]
  3739. b = (
  3740. Scatter() .add_xaxis(center_x_2) .add_yaxis(
  3741. f"[{n_class}]中心",
  3742. [
  3743. center[class_num][i]],
  3744. **label_setting,
  3745. symbol="triangle",
  3746. ) .set_global_opts(
  3747. title_opts=opts.TitleOpts(
  3748. title="簇中心"),
  3749. **global_setting,
  3750. yaxis_opts=opts.AxisOpts(
  3751. type_="value" if x1_is_continuous else "category",
  3752. is_scale=True),
  3753. xaxis_opts=opts.AxisOpts(
  3754. type_="value" if x2_is_continuous else "category",
  3755. is_scale=True),
  3756. ))
  3757. c.overlap(b)
  3758. if base_render is None:
  3759. base_render = c
  3760. else:
  3761. base_render = base_render.overlap(c)
  3762. render_list.append(base_render)
  3763. means, x_range, data_type = statistics_assistant.get()
  3764. return render_list, means, x_range, data_type
  3765. @plugin_func_loading(get_path(r"template/machinelearning"))
  3766. def training_visualization(x_data, class_, y_data): # 根据不同类别绘制x-x分类散点图
  3767. x_data = x_data.transpose()
  3768. if len(x_data) == 1:
  3769. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3770. statistics_assistant = quick_stats(x_data)
  3771. render_list = []
  3772. for i in range(len(x_data)):
  3773. if i == 0:
  3774. continue
  3775. x1 = x_data[i] # x坐标
  3776. x1_is_continuous = is_continuous(x1)
  3777. x2 = x_data[i - 1] # y坐标
  3778. x2_is_continuous = is_continuous(x2)
  3779. render_list = None # 旧的C
  3780. for now_class in class_:
  3781. plot_x1 = x1[y_data == now_class].tolist()
  3782. plot_x2 = x2[y_data == now_class]
  3783. axis_x2 = np.unique(plot_x2)
  3784. plot_x2 = x2[y_data == now_class].tolist()
  3785. # x与散点图不同,这里是纵坐标
  3786. c = (
  3787. Scatter() .add_xaxis(plot_x2) .add_yaxis(
  3788. f"{now_class}",
  3789. plot_x1,
  3790. **label_setting) .set_global_opts(
  3791. title_opts=opts.TitleOpts(
  3792. title="训练数据散点图"),
  3793. **global_setting,
  3794. yaxis_opts=opts.AxisOpts(
  3795. type_="value" if x1_is_continuous else "category",
  3796. is_scale=True),
  3797. xaxis_opts=opts.AxisOpts(
  3798. type_="value" if x2_is_continuous else "category",
  3799. is_scale=True),
  3800. ))
  3801. c.add_xaxis(axis_x2)
  3802. if render_list is None:
  3803. render_list = c
  3804. else:
  3805. render_list = render_list.overlap(c)
  3806. render_list.append(render_list)
  3807. means, x_range, data_type = statistics_assistant.get()
  3808. return render_list, means, x_range, data_type
  3809. @plugin_func_loading(get_path(r"template/machinelearning"))
  3810. def training_visualization_no_class(x_data): # 根据绘制x-x分类散点图(无类别)
  3811. x_data = x_data.transpose()
  3812. if len(x_data) == 1:
  3813. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3814. statistics_assistant = quick_stats(x_data)
  3815. render_list = []
  3816. for i in range(len(x_data)):
  3817. if i == 0:
  3818. continue
  3819. x1 = x_data[i] # x坐标
  3820. x1_is_continuous = is_continuous(x1)
  3821. x2 = x_data[i - 1] # y坐标
  3822. x2_is_continuous = is_continuous(x2)
  3823. x2_only = np.unique(x2)
  3824. # x与散点图不同,这里是纵坐标
  3825. c = (
  3826. Scatter() .add_xaxis(x2) .add_yaxis(
  3827. "",
  3828. x1.tolist(),
  3829. **label_setting) .set_global_opts(
  3830. title_opts=opts.TitleOpts(
  3831. title="训练数据散点图"),
  3832. **global_not_legend,
  3833. yaxis_opts=opts.AxisOpts(
  3834. type_="value" if x1_is_continuous else "category",
  3835. is_scale=True),
  3836. xaxis_opts=opts.AxisOpts(
  3837. type_="value" if x2_is_continuous else "category",
  3838. is_scale=True),
  3839. ))
  3840. c.add_xaxis(x2_only)
  3841. render_list.append(c)
  3842. means, x_range, data_type = statistics_assistant.get()
  3843. return render_list, means, x_range, data_type
  3844. def training_w(
  3845. x_data, class_list, y_data, w_list, b_list, x_means: list
  3846. ): # 针对分类问题绘制决策边界
  3847. x_data = x_data.transpose()
  3848. if len(x_data) == 1:
  3849. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3850. render_list = []
  3851. x_means.append(0)
  3852. x_means = np.array(x_means)
  3853. for i in range(len(x_data)):
  3854. if i == 0:
  3855. continue
  3856. x1_is_continuous = is_continuous(x_data[i])
  3857. x2 = x_data[i - 1] # y坐标
  3858. x2_is_continuous = is_continuous(x2)
  3859. o_c = None # 旧的C
  3860. for class_num in range(len(class_list)):
  3861. n_class = class_list[class_num]
  3862. x2_only = np.unique(x2[y_data == n_class])
  3863. # x与散点图不同,这里是纵坐标
  3864. # 加入这个判断是为了解决sklearn历史遗留问题
  3865. if len(class_list) == 2: # 二分类问题
  3866. if class_num == 0:
  3867. continue
  3868. w = w_list[0]
  3869. b = b_list[0]
  3870. else: # 多分类问题
  3871. w = w_list[class_num]
  3872. b = b_list[class_num]
  3873. if x2_is_continuous:
  3874. x2_only = np.array(make_list(x2_only.min(), x2_only.max(), 5))
  3875. w = np.append(w, 0)
  3876. y_data = (
  3877. -(x2_only * w[i - 1]) / w[i]
  3878. + b
  3879. + (x_means[: i - 1] * w[: i - 1]).sum()
  3880. + (x_means[i + 1:] * w[i + 1:]).sum()
  3881. ) # 假设除了两个特征意外,其余特征均为means列表的数值
  3882. c = (
  3883. Line() .add_xaxis(x2_only) .add_yaxis(
  3884. f"决策边界:{n_class}=>[{i}]",
  3885. y_data.tolist(),
  3886. is_smooth=True,
  3887. **label_setting,
  3888. ) .set_global_opts(
  3889. title_opts=opts.TitleOpts(
  3890. title=f"系数w曲线"),
  3891. **global_setting,
  3892. yaxis_opts=opts.AxisOpts(
  3893. type_="value" if x1_is_continuous else "category",
  3894. is_scale=True),
  3895. xaxis_opts=opts.AxisOpts(
  3896. type_="value" if x2_is_continuous else "category",
  3897. is_scale=True),
  3898. ))
  3899. if o_c is None:
  3900. o_c = c
  3901. else:
  3902. o_c = o_c.overlap(c)
  3903. # 下面不要接任何代码,因为上面会continue
  3904. render_list.append(o_c)
  3905. return render_list
  3906. @plugin_func_loading(get_path(r"template/machinelearning"))
  3907. def regress_w(x_data, w_data: np.array, intercept_b, x_means: list): # 针对回归问题(y-x图)
  3908. x_data = x_data.transpose()
  3909. if len(x_data) == 1:
  3910. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3911. render_list = []
  3912. x_means.append(0) # 确保mean[i+1]不会超出index
  3913. x_means = np.array(x_means)
  3914. w_data = np.append(w_data, 0)
  3915. for i in range(len(x_data)):
  3916. x1 = x_data[i]
  3917. x1_is_continuous = is_continuous(x1)
  3918. if x1_is_continuous:
  3919. x1 = np.array(make_list(x1.min(), x1.max(), 5))
  3920. x1_only = np.unique(x1)
  3921. # 假设除了两个特征意外,其余特征均为means列表的数值
  3922. y_data = (
  3923. x1_only * w_data[i]
  3924. + intercept_b
  3925. + (x_means[:i] * w_data[:i]).sum()
  3926. + (x_means[i + 1:] * w_data[i + 1:]).sum()
  3927. )
  3928. y_is_continuous = is_continuous(y_data)
  3929. c = (
  3930. Line() .add_xaxis(x1_only) .add_yaxis(
  3931. f"拟合结果=>[{i}]",
  3932. y_data.tolist(),
  3933. is_smooth=True,
  3934. **label_setting) .set_global_opts(
  3935. title_opts=opts.TitleOpts(
  3936. title=f"系数w曲线"),
  3937. **global_setting,
  3938. yaxis_opts=opts.AxisOpts(
  3939. type_="value" if y_is_continuous else None,
  3940. is_scale=True),
  3941. xaxis_opts=opts.AxisOpts(
  3942. type_="value" if x1_is_continuous else None,
  3943. is_scale=True),
  3944. ))
  3945. render_list.append(c)
  3946. return render_list
  3947. @plugin_func_loading(get_path(r"template/machinelearning"))
  3948. def regress_visualization(x_data, y_data): # y-x数据图
  3949. x_data = x_data.transpose()
  3950. y_is_continuous = is_continuous(y_data)
  3951. statistics_assistant = quick_stats(x_data)
  3952. render_list = []
  3953. try:
  3954. visualmap_opts = opts.VisualMapOpts(
  3955. is_show=True,
  3956. max_=int(y_data.max()) + 1,
  3957. min_=int(y_data.min()),
  3958. pos_right="3%",
  3959. )
  3960. except ValueError:
  3961. visualmap_opts = None
  3962. y_is_continuous = False
  3963. for i in range(len(x_data)):
  3964. x1 = x_data[i] # x坐标
  3965. x1_is_continuous = is_continuous(x1)
  3966. # 不转换成list因为保持dtype的精度,否则绘图会出现各种问题(数值重复)
  3967. if not y_is_continuous and x1_is_continuous:
  3968. y_is_continuous, x1_is_continuous = x1_is_continuous, y_is_continuous
  3969. x1, y_data = y_data, x1
  3970. c = (
  3971. Scatter()
  3972. .add_xaxis(x1.tolist()) # 研究表明,这个是横轴
  3973. .add_yaxis("数据", y_data.tolist(), **label_setting)
  3974. .set_global_opts(
  3975. title_opts=opts.TitleOpts(title="预测类型图"),
  3976. **global_setting,
  3977. yaxis_opts=opts.AxisOpts(
  3978. type_="value" if y_is_continuous else "category", is_scale=True
  3979. ),
  3980. xaxis_opts=opts.AxisOpts(
  3981. type_="value" if x1_is_continuous else "category", is_scale=True
  3982. ),
  3983. visualmap_opts=visualmap_opts,
  3984. )
  3985. )
  3986. c.add_xaxis(np.unique(x1))
  3987. render_list.append(c)
  3988. means, x_range, data_type = statistics_assistant.get()
  3989. return render_list, means, x_range, data_type
  3990. @plugin_func_loading(get_path(r"template/machinelearning"))
  3991. def feature_visualization(x_data, data_name=""): # x-x数据图
  3992. seeting = global_setting if data_name else global_not_legend
  3993. x_data = x_data.transpose()
  3994. only = False
  3995. if len(x_data) == 1:
  3996. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3997. only = True
  3998. render_list = []
  3999. for i in range(len(x_data)):
  4000. for a in range(len(x_data)):
  4001. if a <= i:
  4002. continue # 重复内容,跳过
  4003. x1 = x_data[i] # x坐标
  4004. x1_is_continuous = is_continuous(x1)
  4005. x2 = x_data[a] # y坐标
  4006. x2_is_continuous = is_continuous(x2)
  4007. x2_only = np.unique(x2)
  4008. if only:
  4009. x2_is_continuous = False
  4010. # x与散点图不同,这里是纵坐标
  4011. c = (
  4012. Scatter() .add_xaxis(x2) .add_yaxis(
  4013. data_name,
  4014. x1,
  4015. **label_setting) .set_global_opts(
  4016. title_opts=opts.TitleOpts(
  4017. title=f"[{i}-{a}]数据散点图"),
  4018. **seeting,
  4019. yaxis_opts=opts.AxisOpts(
  4020. type_="value" if x1_is_continuous else "category",
  4021. is_scale=True),
  4022. xaxis_opts=opts.AxisOpts(
  4023. type_="value" if x2_is_continuous else "category",
  4024. is_scale=True),
  4025. ))
  4026. c.add_xaxis(x2_only)
  4027. render_list.append(c)
  4028. return render_list
  4029. @plugin_func_loading(get_path(r"template/machinelearning"))
  4030. def feature_visualization_format(x_data, data_name=""): # x-x数据图
  4031. seeting = global_setting if data_name else global_not_legend
  4032. x_data = x_data.transpose()
  4033. only = False
  4034. if len(x_data) == 1:
  4035. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4036. only = True
  4037. render_list = []
  4038. for i in range(len(x_data)):
  4039. for a in range(len(x_data)):
  4040. if a <= i:
  4041. continue # 重复内容,跳过(a读取的是i后面的)
  4042. x1 = x_data[i] # x坐标
  4043. x1_is_continuous = is_continuous(x1)
  4044. x2 = x_data[a] # y坐标
  4045. x2_is_continuous = is_continuous(x2)
  4046. x2_only = np.unique(x2)
  4047. x1_list = x1.astype(np.str).tolist()
  4048. for j in range(len(x1_list)):
  4049. x1_list[j] = [x1_list[j], f"特征{j}"]
  4050. if only:
  4051. x2_is_continuous = False
  4052. # x与散点图不同,这里是纵坐标
  4053. c = (
  4054. Scatter() .add_xaxis(x2) .add_yaxis(
  4055. data_name,
  4056. x1_list,
  4057. **label_setting) .set_global_opts(
  4058. title_opts=opts.TitleOpts(
  4059. title=f"[{i}-{a}]数据散点图"),
  4060. **seeting,
  4061. yaxis_opts=opts.AxisOpts(
  4062. type_="value" if x1_is_continuous else "category",
  4063. is_scale=True),
  4064. xaxis_opts=opts.AxisOpts(
  4065. type_="value" if x2_is_continuous else "category",
  4066. is_scale=True),
  4067. tooltip_opts=opts.TooltipOpts(
  4068. is_show=True,
  4069. axis_pointer_type="cross",
  4070. formatter="{c}"),
  4071. ))
  4072. c.add_xaxis(x2_only)
  4073. render_list.append(c)
  4074. return render_list
  4075. @plugin_func_loading(get_path(r"template/machinelearning"))
  4076. def discrete_feature_visualization(x_data, data_name=""): # 必定离散x-x数据图
  4077. seeting = global_setting if data_name else global_not_legend
  4078. x_data = x_data.transpose()
  4079. if len(x_data) == 1:
  4080. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4081. render_list = []
  4082. for i in range(len(x_data)):
  4083. for a in range(len(x_data)):
  4084. if a <= i:
  4085. continue # 重复内容,跳过
  4086. x1 = x_data[i] # x坐标
  4087. x2 = x_data[a] # y坐标
  4088. x2_only = np.unique(x2)
  4089. # x与散点图不同,这里是纵坐标
  4090. c = (
  4091. Scatter()
  4092. .add_xaxis(x2)
  4093. .add_yaxis(data_name, x1, **label_setting)
  4094. .set_global_opts(
  4095. title_opts=opts.TitleOpts(title=f"[{i}-{a}]数据散点图"),
  4096. **seeting,
  4097. yaxis_opts=opts.AxisOpts(type_="category", is_scale=True),
  4098. xaxis_opts=opts.AxisOpts(type_="category", is_scale=True),
  4099. )
  4100. )
  4101. c.add_xaxis(x2_only)
  4102. render_list.append(c)
  4103. return render_list
  4104. @plugin_func_loading(get_path(r"template/machinelearning"))
  4105. def conversion_control(y_data, x_data, tab): # 合并两x-x图
  4106. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  4107. get_x = feature_visualization(x_data, "原数据") # 原来
  4108. get_y = feature_visualization(y_data, "转换数据") # 转换
  4109. for i in range(len(get_x)):
  4110. tab.add(get_x[i].overlap(get_y[i]), f"[{i}]数据x-x散点图")
  4111. return tab
  4112. @plugin_func_loading(get_path(r"template/machinelearning"))
  4113. def conversion_separate(y_data, x_data, tab): # 并列显示两x-x图
  4114. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  4115. get_x = feature_visualization(x_data, "原数据") # 原来
  4116. get_y = feature_visualization(y_data, "转换数据") # 转换
  4117. for i in range(len(get_x)):
  4118. try:
  4119. tab.add(get_x[i], f"[{i}]数据x-x散点图")
  4120. except IndexError:
  4121. pass
  4122. try:
  4123. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  4124. except IndexError:
  4125. pass
  4126. return tab
  4127. @plugin_func_loading(get_path(r"template/machinelearning"))
  4128. def conversion_separate_format(y_data, tab): # 并列显示两x-x图
  4129. if isinstance(y_data, np.ndarray):
  4130. get_y = feature_visualization_format(y_data, "转换数据") # 转换
  4131. for i in range(len(get_y)):
  4132. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  4133. return tab
  4134. @plugin_func_loading(get_path(r"template/machinelearning"))
  4135. def conversion_separate_wh(w_array, h_array, tab): # 并列显示两x-x图
  4136. if isinstance(w_array, np.ndarray) and isinstance(w_array, np.ndarray):
  4137. get_x = feature_visualization_format(w_array, "W矩阵数据") # 原来
  4138. get_y = feature_visualization(
  4139. h_array.transpose(), "H矩阵数据"
  4140. ) # 转换(先转T,再转T变回原样,W*H是横对列)
  4141. for i in range(len(get_x)):
  4142. try:
  4143. tab.add(get_x[i], f"[{i}]W矩阵x-x散点图")
  4144. except IndexError:
  4145. pass
  4146. try:
  4147. tab.add(get_y[i], f"[{i}]H.T矩阵x-x散点图")
  4148. except IndexError:
  4149. pass
  4150. return tab
  4151. @plugin_func_loading(get_path(r"template/machinelearning"))
  4152. def make_bar(name, value, tab): # 绘制柱状图
  4153. c = (
  4154. Bar()
  4155. .add_xaxis([f"[{i}]特征" for i in range(len(value))])
  4156. .add_yaxis(name, value, **label_setting)
  4157. .set_global_opts(title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting)
  4158. )
  4159. tab.add(c, name)
  4160. @plugin_func_loading(get_path(r"template/machinelearning"))
  4161. def judging_digits(num: (int, float)): # 查看小数位数
  4162. a = str(abs(num)).split(".")[0]
  4163. if a == "":
  4164. raise ValueError
  4165. return len(a)
  4166. @plugin_func_loading(get_path(r"template/machinelearning"))
  4167. def num_str(num, accuracy):
  4168. num = str(round(float(num), accuracy))
  4169. if len(num.replace(".", "")) == accuracy:
  4170. return num
  4171. n = num.split(".")
  4172. if len(n) == 0: # 无小数
  4173. return num + "." + "0" * (accuracy - len(num))
  4174. else:
  4175. return num + "0" * (accuracy - len(num) + 1) # len(num)多算了一位小数点
  4176. @plugin_func_loading(get_path(r"template/machinelearning"))
  4177. def des_to_csv(save_dir, name, data, columns=None, row=None):
  4178. save_dir = save_dir + os.sep + name + ".csv"
  4179. print(columns)
  4180. print(row)
  4181. print(data)
  4182. DataFrame(data, columns=columns, index=row).to_csv(
  4183. save_dir,
  4184. header=False if columns is None else True,
  4185. index=False if row is None else True,
  4186. )
  4187. return data
  4188. @plugin_func_loading(get_path(r"template/machinelearning"))
  4189. def pack(output_filename, source_dir):
  4190. with tarfile.open(output_filename, "w:gz") as tar:
  4191. tar.add(source_dir, arcname=basename(source_dir))
  4192. return output_filename
  4193. def set_global(
  4194. more=more_global,
  4195. all_=all_global,
  4196. csv=csv_global,
  4197. clf=clf_global,
  4198. tar=tar_global,
  4199. new=new_dir_global,
  4200. ):
  4201. global more_global, all_global, csv_global, clf_global, tar_global, new_dir_global
  4202. more_global = more # 是否使用全部特征绘图
  4203. all_global = all_ # 是否导出charts
  4204. csv_global = csv # 是否导出CSV
  4205. clf_global = clf # 是否导出模型
  4206. tar_global = tar # 是否打包tar
  4207. new_dir_global = new # 是否新建目录
  4208. class MachineLearnerInit(
  4209. LearnerIO, Calculation, LearnerMerge, LearnerSplit, LearnerDimensions, LearnerShape, metaclass=ABCMeta
  4210. ):
  4211. def __init__(self, *args, **kwargs):
  4212. super().__init__(*args, **kwargs)
  4213. self.learner = {} # 记录机器
  4214. self.learn_dict = {
  4215. "Line": LineModel,
  4216. "Ridge": LineModel,
  4217. "Lasso": LineModel,
  4218. "LogisticRegression": LogisticregressionModel,
  4219. "Knn_class": KnnModel,
  4220. "Knn": KnnModel,
  4221. "Tree_class": TreeModel,
  4222. "Tree": TreeModel,
  4223. "Forest": ForestModel,
  4224. "Forest_class": ForestModel,
  4225. "GradientTree_class": GradienttreeModel,
  4226. "GradientTree": GradienttreeModel,
  4227. "Variance": VarianceModel,
  4228. "SelectKBest": SelectkbestModel,
  4229. "Z-Score": StandardizationModel,
  4230. "MinMaxScaler": MinmaxscalerModel,
  4231. "LogScaler": LogscalerModel,
  4232. "atanScaler": AtanscalerModel,
  4233. "decimalScaler": DecimalscalerModel,
  4234. "sigmodScaler": SigmodscalerModel,
  4235. "Mapzoom": MapzoomModel,
  4236. "Fuzzy_quantization": FuzzyQuantizationModel,
  4237. "Regularization": RegularizationModel,
  4238. "Binarizer": BinarizerModel,
  4239. "Discretization": DiscretizationModel,
  4240. "Label": LabelModel,
  4241. "OneHotEncoder": OneHotEncoderModel,
  4242. "Missed": MissedModel,
  4243. "PCA": PcaModel,
  4244. "RPCA": RpcaModel,
  4245. "KPCA": KpcaModel,
  4246. "LDA": LdaModel,
  4247. "SVC": SvcModel,
  4248. "SVR": SvrModel,
  4249. "MLP": MlpModel,
  4250. "MLP_class": MlpModel,
  4251. "NMF": NmfModel,
  4252. "t-SNE": TsneModel,
  4253. "k-means": KmeansModel,
  4254. "Agglomerative": AgglomerativeModel,
  4255. "DBSCAN": DbscanModel,
  4256. "ClassBar": ClassBar,
  4257. "FeatureScatter": NearFeatureScatter,
  4258. "FeatureScatterClass": NearFeatureScatterClass,
  4259. "FeatureScatter_all": NearFeatureScatterMore,
  4260. "FeatureScatterClass_all": NearFeatureScatterClassMore,
  4261. "HeatMap": NumpyHeatMap,
  4262. "FeatureY-X": FeatureScatterYX,
  4263. "ClusterTree": ClusterTree,
  4264. "MatrixScatter": MatrixScatter,
  4265. "Correlation": Corr,
  4266. "Statistics": DataAnalysis,
  4267. "Fast_Fourier": FastFourier,
  4268. "Reverse_Fast_Fourier": ReverseFastFourier,
  4269. "[2]Reverse_Fast_Fourier": ReverseFastFourierTwonumpy,
  4270. }
  4271. self.data_type = {} # 记录机器的类型
  4272. @staticmethod
  4273. def learner_parameters(parameters, data_type): # 解析参数
  4274. original_parameter = {}
  4275. target_parameter = {}
  4276. # 输入数据
  4277. exec(parameters, original_parameter)
  4278. # 处理数据
  4279. if data_type in ("MLP", "MLP_class"):
  4280. target_parameter["alpha"] = float(
  4281. original_parameter.get("alpha", 0.0001)
  4282. ) # MLP正则化用
  4283. else:
  4284. target_parameter["alpha"] = float(
  4285. original_parameter.get("alpha", 1.0)
  4286. ) # L1和L2正则化用
  4287. target_parameter["C"] = float(
  4288. original_parameter.get(
  4289. "C", 1.0)) # L1和L2正则化用
  4290. if data_type in ("MLP", "MLP_class"):
  4291. target_parameter["max_iter"] = int(
  4292. original_parameter.get("max_iter", 200)
  4293. ) # L1和L2正则化用
  4294. else:
  4295. target_parameter["max_iter"] = int(
  4296. original_parameter.get("max_iter", 1000)
  4297. ) # L1和L2正则化用
  4298. target_parameter["n_neighbors"] = int(
  4299. original_parameter.get("K_knn", 5)
  4300. ) # knn邻居数 (命名不同)
  4301. target_parameter["p"] = int(original_parameter.get("p", 2)) # 距离计算方式
  4302. target_parameter["nDim_2"] = bool(
  4303. original_parameter.get("nDim_2", True)
  4304. ) # 数据是否降维
  4305. if data_type in ("Tree", "Forest", "GradientTree"):
  4306. target_parameter["criterion"] = (
  4307. "mse" if bool(
  4308. original_parameter.get(
  4309. "is_MSE",
  4310. True)) else "mae") # 是否使用基尼不纯度
  4311. else:
  4312. target_parameter["criterion"] = (
  4313. "gini" if bool(
  4314. original_parameter.get(
  4315. "is_Gini",
  4316. True)) else "entropy") # 是否使用基尼不纯度
  4317. target_parameter["splitter"] = (
  4318. "random" if bool(
  4319. original_parameter.get(
  4320. "is_random",
  4321. False)) else "best") # 决策树节点是否随机选用最优
  4322. target_parameter["max_features"] = original_parameter.get(
  4323. "max_features", None
  4324. ) # 选用最多特征数
  4325. target_parameter["max_depth"] = original_parameter.get(
  4326. "max_depth", None
  4327. ) # 最大深度
  4328. target_parameter["min_samples_split"] = int(
  4329. original_parameter.get("min_samples_split", 2)
  4330. ) # 是否继续划分(容易造成过拟合)
  4331. target_parameter["P"] = float(
  4332. original_parameter.get(
  4333. "min_samples_split", 0.8))
  4334. target_parameter["k"] = original_parameter.get("k", 1)
  4335. target_parameter["score_func"] = {
  4336. "chi2": chi2,
  4337. "f_classif": f_classif,
  4338. "mutual_info_classif": mutual_info_classif,
  4339. "f_regression": f_regression,
  4340. "mutual_info_regression": mutual_info_regression,
  4341. }.get(original_parameter.get("score_func", "f_classif"), f_classif)
  4342. target_parameter["feature_range"] = tuple(
  4343. original_parameter.get("feature_range", (0, 1))
  4344. )
  4345. target_parameter["norm"] = original_parameter.get(
  4346. "norm", "l2") # 正则化的方式L1或者L2
  4347. target_parameter["threshold"] = float(
  4348. original_parameter.get("threshold", 0.0)
  4349. ) # 二值化特征
  4350. target_parameter["split_range"] = list(
  4351. original_parameter.get("split_range", [0])
  4352. ) # 二值化特征
  4353. target_parameter["ndim_up"] = bool(
  4354. original_parameter.get("ndim_up", False))
  4355. target_parameter["miss_value"] = original_parameter.get(
  4356. "miss_value", np.nan)
  4357. target_parameter["fill_method"] = original_parameter.get(
  4358. "fill_method", "mean")
  4359. target_parameter["fill_value"] = original_parameter.get(
  4360. "fill_value", None)
  4361. target_parameter["n_components"] = original_parameter.get(
  4362. "n_components", 1)
  4363. target_parameter["kernel"] = original_parameter.get(
  4364. "kernel", "rbf" if data_type in ("SVR", "SVC") else "linear"
  4365. )
  4366. target_parameter["n_Tree"] = original_parameter.get("n_Tree", 100)
  4367. target_parameter["gamma"] = original_parameter.get("gamma", 1)
  4368. target_parameter["hidden_size"] = tuple(
  4369. original_parameter.get("hidden_size", (100,))
  4370. )
  4371. target_parameter["activation"] = str(
  4372. original_parameter.get("activation", "relu")
  4373. )
  4374. target_parameter["solver"] = str(
  4375. original_parameter.get("solver", "adam"))
  4376. if data_type in ("k-means",):
  4377. target_parameter["n_clusters"] = int(
  4378. original_parameter.get("n_clusters", 8)
  4379. )
  4380. else:
  4381. target_parameter["n_clusters"] = int(
  4382. original_parameter.get("n_clusters", 2)
  4383. )
  4384. target_parameter["eps"] = float(
  4385. original_parameter.get(
  4386. "n_clusters", 0.5))
  4387. target_parameter["min_samples"] = int(
  4388. original_parameter.get("n_clusters", 5))
  4389. target_parameter["white_PCA"] = bool(
  4390. original_parameter.get("white_PCA", False))
  4391. return target_parameter
  4392. def get_learner(self, name):
  4393. return self.learner[name]
  4394. def get_learner_type(self, name):
  4395. return self.data_type[name]
  4396. @plugin_class_loading(get_path(r"template/machinelearning"))
  4397. class MachineLearnerAdd(MachineLearnerInit, metaclass=ABCMeta):
  4398. def add_learner(self, learner_str, parameters=""):
  4399. get = self.learn_dict[learner_str]
  4400. name = f"Le[{len(self.learner)}]{learner_str}"
  4401. # 参数调节
  4402. args_use = self.learner_parameters(parameters, learner_str)
  4403. # 生成学习器
  4404. self.learner[name] = get(model=learner_str, args_use=args_use)
  4405. self.data_type[name] = learner_str
  4406. def add_learner_from_python(self, learner, name):
  4407. name = f"Le[{len(self.learner)}]{name}"
  4408. # 生成学习器
  4409. self.learner[name] = learner
  4410. self.data_type[name] = 'from_python'
  4411. def add_curve_fitting(self, learner):
  4412. named_domain = {}
  4413. exec(learner, named_domain)
  4414. name = f'Le[{len(self.learner)}]{named_domain.get("name", "SELF")}'
  4415. func = named_domain.get("f", lambda x, k, b: k * x + b)
  4416. self.learner[name] = CurveFitting(name, learner, func)
  4417. self.data_type[name] = "Curve_fitting"
  4418. def add_select_from_model(self, learner, parameters=""):
  4419. model = self.get_learner(learner)
  4420. name = f"Le[{len(self.learner)}]SelectFrom_Model:{learner}"
  4421. # 参数调节
  4422. args_use = self.learner_parameters(parameters, "SelectFrom_Model")
  4423. # 生成学习器
  4424. self.learner[name] = SelectFromModel(
  4425. learner=model, args_use=args_use, Dic=self.learn_dict
  4426. )
  4427. self.data_type[name] = "SelectFrom_Model"
  4428. def add_predictive_heat_map(self, learner, parameters=""):
  4429. model = self.get_learner(learner)
  4430. name = f"Le[{len(self.learner)}]Predictive_HeatMap:{learner}"
  4431. # 生成学习器
  4432. args_use = self.learner_parameters(parameters, "Predictive_HeatMap")
  4433. self.learner[name] = PredictiveHeatmap(
  4434. learner=model, args_use=args_use)
  4435. self.data_type[name] = "Predictive_HeatMap"
  4436. def add_predictive_heat_map_more(self, learner, parameters=""):
  4437. model = self.get_learner(learner)
  4438. name = f"Le[{len(self.learner)}]Predictive_HeatMap_More:{learner}"
  4439. # 生成学习器
  4440. args_use = self.learner_parameters(
  4441. parameters, "Predictive_HeatMap_More")
  4442. self.learner[name] = PredictiveHeatmapMore(
  4443. learner=model, args_use=args_use)
  4444. self.data_type[name] = "Predictive_HeatMap_More"
  4445. def add_view_data(self, learner, parameters=""):
  4446. model = self.get_learner(learner)
  4447. name = f"Le[{len(self.learner)}]View_data:{learner}"
  4448. # 生成学习器
  4449. args_use = self.learner_parameters(parameters, "View_data")
  4450. self.learner[name] = ViewData(learner=model, args_use=args_use)
  4451. self.data_type[name] = "View_data"
  4452. @plugin_class_loading(get_path(r"template/machinelearning"))
  4453. class MachineLearnerScore(MachineLearnerInit, metaclass=ABCMeta):
  4454. def score(self, name_x, name_y, learner): # Score_Only表示仅评分 Fit_Simp 是普遍类操作
  4455. model = self.get_learner(learner)
  4456. x = self.get_sheet(name_x)
  4457. y = self.get_sheet(name_y)
  4458. return model.score(x, y)
  4459. def model_evaluation(self, learner, save_dir, name_x, name_y, func=0): # 显示参数
  4460. x = self.get_sheet(name_x)
  4461. y = self.get_sheet(name_y)
  4462. if new_dir_global:
  4463. dic = save_dir + f"{os.sep}{learner}分类评分[CoTan]"
  4464. new_dic = dic
  4465. a = 0
  4466. while exists(new_dic): # 直到他不存在 —— False
  4467. new_dic = dic + f"[{a}]"
  4468. a += 1
  4469. mkdir(new_dic)
  4470. else:
  4471. new_dic = save_dir
  4472. model = self.get_learner(learner)
  4473. # 打包
  4474. func = [
  4475. model.class_score,
  4476. model.regression_score,
  4477. model.clusters_score][func]
  4478. save = func(new_dic, x, y)[0]
  4479. if tar_global:
  4480. pack(f"{new_dic}.tar.gz", new_dic)
  4481. return save, new_dic
  4482. def model_visualization(self, learner, save_dir): # 显示参数
  4483. if new_dir_global:
  4484. dic = save_dir + f"{os.sep}{learner}数据[CoTan]"
  4485. new_dic = dic
  4486. a = 0
  4487. while exists(new_dic): # 直到他不存在 —— False
  4488. new_dic = dic + f"[{a}]"
  4489. a += 1
  4490. mkdir(new_dic)
  4491. else:
  4492. new_dic = save_dir
  4493. model = self.get_learner(learner)
  4494. if (not (model.model is None) or not (
  4495. model.model is list)) and clf_global:
  4496. joblib.dump(model.model, new_dic + f"{os.sep}MODEL.model") # 保存模型
  4497. # 打包
  4498. save = model.data_visualization(new_dic)[0]
  4499. if tar_global:
  4500. pack(f"{new_dic}.tar.gz", new_dic)
  4501. return save, new_dic
  4502. @plugin_class_loading(get_path(r"template/machinelearning"))
  4503. class LearnerActions(MachineLearnerInit, metaclass=ABCMeta):
  4504. def fit_model(self, x_name, y_name, learner, split=0.3, *args, **kwargs):
  4505. x_data = self.get_sheet(x_name)
  4506. y_data = self.get_sheet(y_name)
  4507. model = self.get_learner(learner)
  4508. return model.fit_model(
  4509. x_data, y_data, split=split, x_name=x_name, add_func=self.add_form
  4510. )
  4511. def predict(self, x_name, learner, **kwargs):
  4512. x_data = self.get_sheet(x_name)
  4513. model = self.get_learner(learner)
  4514. y_data, name = model.predict(
  4515. x_data, x_name=x_name, add_func=self.add_form)
  4516. self.add_form(y_data, f"{x_name}:{name}")
  4517. return y_data