template.py 188 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158
  1. import joblib
  2. import re
  3. import tarfile
  4. from abc import ABCMeta, abstractmethod
  5. from os import getcwd, mkdir
  6. from os.path import split as path_split, splitext, basename, exists
  7. import os
  8. from sklearn.svm import SVC, SVR # SVC是svm分类,SVR是svm回归
  9. from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
  10. from sklearn.manifold import TSNE
  11. from sklearn.neural_network import MLPClassifier, MLPRegressor
  12. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as Lda
  13. from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA, NMF
  14. from sklearn.impute import SimpleImputer
  15. from sklearn.preprocessing import *
  16. from sklearn.feature_selection import *
  17. from sklearn.metrics import *
  18. from sklearn.ensemble import (
  19. RandomForestClassifier,
  20. RandomForestRegressor,
  21. GradientBoostingClassifier,
  22. GradientBoostingRegressor,
  23. )
  24. import numpy as np
  25. import matplotlib.pyplot as plt
  26. from pandas import DataFrame, read_csv
  27. from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz
  28. from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
  29. from sklearn.linear_model import *
  30. from sklearn.model_selection import train_test_split
  31. from scipy.fftpack import fft, ifft # 快速傅里叶变换
  32. from scipy import optimize
  33. from scipy.cluster.hierarchy import dendrogram, ward
  34. from pyecharts.components import Table as TableFisrt # 绘制表格
  35. from pyecharts.options.series_options import JsCode
  36. from pyecharts.charts import Tab as tab_First
  37. from pyecharts.charts import *
  38. from pyecharts import options as opts
  39. from pyecharts.components import Image
  40. from pyecharts.globals import CurrentConfig
  41. from system import plugin_class_loading, get_path, plugin_func_loading
  42. CurrentConfig.ONLINE_HOST = f"{getcwd()}{os.sep}assets{os.sep}"
  43. # 设置
  44. np.set_printoptions(threshold=np.inf)
  45. global_setting = dict(
  46. toolbox_opts=opts.ToolboxOpts(is_show=True),
  47. legend_opts=opts.LegendOpts(pos_bottom="3%", type_="scroll"),
  48. )
  49. global_not_legend = dict(
  50. toolbox_opts=opts.ToolboxOpts(is_show=True),
  51. legend_opts=opts.LegendOpts(is_show=False),
  52. )
  53. label_setting = dict(label_opts=opts.LabelOpts(is_show=False))
  54. more_global = False # 是否使用全部特征绘图
  55. all_global = True # 是否导出charts
  56. csv_global = True # 是否导出CSV
  57. clf_global = True # 是否导出模型
  58. tar_global = True # 是否打包tar
  59. new_dir_global = True # 是否新建目录
  60. class LearnBase(metaclass=ABCMeta):
  61. def __init__(self, *args, **kwargs):
  62. self.numpy_dict = {} # name:numpy
  63. self.fucn_add() # 制作Func_Dic
  64. def fucn_add(self):
  65. self.func_dict = {
  66. "abs": lambda x, y: np.abs(x),
  67. "sqrt": lambda x, y: np.sqrt(x),
  68. "pow": lambda x, y: x ** y,
  69. "loge": lambda x, y: np.log(x),
  70. "log10": lambda x, y: np.log10(x),
  71. "ceil": lambda x, y: np.ceil(x),
  72. "floor": lambda x, y: np.floor(x),
  73. "rint": lambda x, y: np.rint(x),
  74. "sin": lambda x, y: np.sin(x),
  75. "cos": lambda x, y: np.cos(x),
  76. "tan": lambda x, y: np.tan(x),
  77. "tanh": lambda x, y: np.tanh(x),
  78. "sinh": lambda x, y: np.sinh(x),
  79. "cosh": lambda x, y: np.cosh(x),
  80. "asin": lambda x, y: np.arcsin(x),
  81. "acos": lambda x, y: np.arccos(x),
  82. "atan": lambda x, y: np.arctan(x),
  83. "atanh": lambda x, y: np.arctanh(x),
  84. "asinh": lambda x, y: np.arcsinh(x),
  85. "acosh": lambda x, y: np.arccosh(x),
  86. "add": lambda x, y: x + y, # 矩阵或元素
  87. "sub": lambda x, y: x - y, # 矩阵或元素
  88. "mul": lambda x, y: np.multiply(x, y), # 元素级别
  89. "matmul": lambda x, y: np.matmul(x, y), # 矩阵
  90. "dot": lambda x, y: np.dot(x, y), # 矩阵
  91. "div": lambda x, y: x / y,
  92. "div_floor": lambda x, y: np.floor_divide(x, y),
  93. "power": lambda x, y: np.power(x, y), # 元素级
  94. }
  95. def get_form(self) -> dict:
  96. return self.numpy_dict.copy()
  97. def get_sheet(self, name) -> np.ndarray:
  98. return self.numpy_dict[name].copy()
  99. @abstractmethod
  100. def add_form(self, data, name):
  101. pass
  102. @plugin_class_loading(get_path(r"template/machinelearning"))
  103. class LearnerIO(LearnBase):
  104. def add_form(self, data: np.array, name):
  105. name = f"{name}[{len(self.numpy_dict)}]"
  106. self.numpy_dict[name] = data
  107. def read_csv(
  108. self,
  109. file_dir,
  110. name,
  111. encoding="utf-8",
  112. str_must=False,
  113. sep=","):
  114. dtype = np.str if str_must else np.float
  115. dataframe = read_csv(
  116. file_dir,
  117. encoding=encoding,
  118. delimiter=sep,
  119. header=None)
  120. try:
  121. data = dataframe.to_numpy(dtype=dtype)
  122. except ValueError:
  123. data = dataframe.to_numpy(dtype=np.str)
  124. if data.ndim == 1:
  125. data = np.expand_dims(data, axis=1)
  126. self.add_form(data, name)
  127. return data
  128. def add_python(self, python_file, sheet_name):
  129. name = {}
  130. name.update(globals().copy())
  131. name.update(locals().copy())
  132. exec(python_file, name)
  133. exec("get = Creat()", name)
  134. if isinstance(name["get"], np.array):
  135. get = name["get"]
  136. else:
  137. try:
  138. get = np.array(name["get"])
  139. except BaseException:
  140. get = np.array([name["get"]])
  141. self.add_form(get, sheet_name)
  142. return get
  143. def to_csv(self, save_dir: str, name, sep) -> str:
  144. get: np.ndarray = self.get_sheet(name)
  145. np.savetxt(save_dir, get, delimiter=sep)
  146. return save_dir
  147. def to_html_one(self, name, html_dir=""):
  148. if html_dir == "":
  149. html_dir = f"{name}.html"
  150. get: np.ndarray = self.get_sheet(name)
  151. if get.ndim == 1:
  152. get = np.expand_dims(get, axis=1)
  153. get: list = get.tolist()
  154. for i in range(len(get)):
  155. get[i] = [i + 1] + get[i]
  156. headers = [i for i in range(len(get[0]))]
  157. table = TableFisrt()
  158. table.add(headers, get).set_global_opts(
  159. title_opts=opts.ComponentTitleOpts(
  160. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  161. )
  162. )
  163. table.render(html_dir)
  164. return html_dir
  165. def to_html(self, name, html_dir="", html_type=0):
  166. if html_dir == "":
  167. html_dir = f"{name}.html"
  168. # 把要画的sheet放到第一个
  169. sheet_dict = self.get_form()
  170. del sheet_dict[name]
  171. sheet_list = [name] + list(sheet_dict.keys())
  172. class TabBase:
  173. def __init__(self, q):
  174. self.tab = q # 一个Tab
  175. def render(self, render_dir):
  176. return self.tab.render(render_dir)
  177. # 生成一个显示页面
  178. if html_type == 0:
  179. class NewTab(TabBase):
  180. def add(self, table, k, *f):
  181. self.tab.add(table, k)
  182. tab = NewTab(tab_First(page_title="CoTan:查看表格")) # 一个Tab
  183. elif html_type == 1:
  184. class NewTab(TabBase):
  185. def add(self, table, *k):
  186. self.tab.add(table)
  187. tab = NewTab(
  188. Page(
  189. page_title="CoTan:查看表格",
  190. layout=Page.DraggablePageLayout))
  191. else:
  192. class NewTab(TabBase):
  193. def add(self, table, *k):
  194. self.tab.add(table)
  195. tab = NewTab(
  196. Page(
  197. page_title="CoTan:查看表格",
  198. layout=Page.SimplePageLayout))
  199. # 迭代添加内容
  200. for name in sheet_list:
  201. get: np.ndarray = self.get_sheet(name)
  202. if get.ndim == 1:
  203. get = np.expand_dims(get, axis=1)
  204. get: list = get.tolist()
  205. for i in range(len(get)):
  206. get[i] = [i + 1] + get[i]
  207. headers = [i for i in range(len(get[0]))]
  208. table = TableFisrt()
  209. table.add(headers, get).set_global_opts(
  210. title_opts=opts.ComponentTitleOpts(
  211. title=f"表格:{name}", subtitle="CoTan~机器学习:查看数据"
  212. )
  213. )
  214. tab.add(table, f"表格:{name}")
  215. tab.render(html_dir)
  216. return html_dir
  217. @plugin_class_loading(get_path(r"template/machinelearning"))
  218. class LearnerMerge(LearnBase, metaclass=ABCMeta):
  219. def merge(self, name, axis=0): # aiis:0-横向合并(hstack),1-纵向合并(vstack),2-深度合并
  220. sheet_list = []
  221. for i in name:
  222. sheet_list.append(self.get_sheet(i))
  223. get = {0: np.hstack, 1: np.vstack, 2: np.dstack}[axis](sheet_list)
  224. self.add_form(np.array(get), f"{name[0]}合成")
  225. @plugin_class_loading(get_path(r"template/machinelearning"))
  226. class LearnerSplit(LearnBase, metaclass=ABCMeta):
  227. def split(self, name, split=2, axis=0): # aiis:0-横向分割(hsplit),1-纵向分割(vsplit)
  228. sheet = self.get_sheet(name)
  229. get = {0: np.hsplit, 1: np.vsplit, 2: np.dsplit}[axis](sheet, split)
  230. for i in get:
  231. self.add_form(i, f"{name[0]}分割")
  232. def two_split(self, name, split, axis): # 二分切割(0-横向,1-纵向)
  233. sheet = self.get_sheet(name)
  234. try:
  235. split = float(eval(split))
  236. if split < 1:
  237. split = int(split * len(sheet) if axis == 1 else len(sheet[0]))
  238. else:
  239. raise Exception
  240. except BaseException:
  241. split = int(split)
  242. if axis == 0:
  243. self.add_form(sheet[:, split:], f"{name[0]}分割")
  244. self.add_form(sheet[:, :split], f"{name[0]}分割")
  245. @plugin_class_loading(get_path(r"template/machinelearning"))
  246. class LearnerDimensions(LearnBase, metaclass=ABCMeta):
  247. @staticmethod
  248. def deep(sheet: np.ndarray):
  249. return sheet.ravel()
  250. @staticmethod
  251. def down_ndim(sheet: np.ndarray): # 横向
  252. down_list = []
  253. for i in sheet:
  254. down_list.append(i.ravel())
  255. return np.array(down_list)
  256. @staticmethod
  257. def longitudinal_down_ndim(sheet: np.ndarray): # 纵向
  258. down_list = []
  259. for i in range(len(sheet[0])):
  260. down_list.append(sheet[:, i].ravel())
  261. return np.array(down_list).T
  262. def reval(self, name, axis): # axis:0-横向,1-纵向(带.T),2-深度
  263. sheet = self.get_sheet(name)
  264. self.add_form(
  265. {0: self.down_ndim, 1: self.longitudinal_down_ndim, 2: self.deep}[axis](
  266. sheet
  267. ).copy(),
  268. f"{name}伸展",
  269. )
  270. def del_ndim(self, name): # 删除无用维度
  271. sheet = self.get_sheet(name)
  272. self.add_form(np.squeeze(sheet), f"{name}降维")
  273. @plugin_class_loading(get_path(r"template/machinelearning"))
  274. class LearnerShape(LearnBase, metaclass=ABCMeta):
  275. def transpose(self, name, func: list):
  276. sheet = self.get_sheet(name)
  277. if sheet.ndim <= 2:
  278. self.add_form(sheet.transpose().copy(), f"{name}.T")
  279. else:
  280. self.add_form(np.transpose(sheet, func).copy(), f"{name}.T")
  281. def reshape(self, name, shape: list):
  282. sheet = self.get_sheet(name)
  283. self.add_form(sheet.reshape(shape).copy(), f"{name}.r")
  284. @plugin_class_loading(get_path(r"template/machinelearning"))
  285. class Calculation(LearnBase, metaclass=ABCMeta):
  286. def calculation_matrix(self, data, data_type, func):
  287. if 1 not in data_type:
  288. raise Exception
  289. func = self.func_dict.get(func, lambda x, y: x)
  290. args_data = []
  291. for i in range(len(data)):
  292. if data_type[i] == 0:
  293. args_data.append(data[i])
  294. else:
  295. args_data.append(self.get_sheet(data[i]))
  296. get = func(*args_data)
  297. self.add_form(get, f"{func}({data[0]},{data[1]})")
  298. return get
  299. class Machinebase(metaclass=ABCMeta): # 学习器的基类
  300. def __init__(self, *args, **kwargs):
  301. self.model = None
  302. self.have_fit = False
  303. self.have_predict = False
  304. self.x_traindata = None
  305. self.y_traindata = None
  306. # 有监督学习专有的testData
  307. self.x_testdata = None
  308. self.y_testdata = None
  309. # 记录这两个是为了克隆
  310. @abstractmethod
  311. def fit_model(self, x_data, y_data, split, increment, kwargs):
  312. pass
  313. @abstractmethod
  314. def score(self, x_data, y_data):
  315. pass
  316. @abstractmethod
  317. def class_score(self, save_dir, x_data, y_really):
  318. pass
  319. @staticmethod
  320. def _accuracy(y_predict, y_really): # 准确率
  321. return accuracy_score(y_really, y_predict)
  322. @staticmethod
  323. def _macro(y_predict, y_really):
  324. func = [recall_score, precision_score, f1_score] # 召回率,精确率和f1
  325. class_ = np.unique(y_really).tolist()
  326. result = func[func](y_really, y_predict, class_, average=None)
  327. return result, class_
  328. @staticmethod
  329. def _confusion_matrix(y_predict, y_really): # 混淆矩阵
  330. class_ = np.unique(y_really).tolist()
  331. return confusion_matrix(y_really, y_predict), class_
  332. @staticmethod
  333. def _kappa_score(y_predict, y_really):
  334. return cohen_kappa_score(y_really, y_predict)
  335. @abstractmethod
  336. def regression_score(self, save_dir, x_data, y_really):
  337. pass
  338. @abstractmethod
  339. def clusters_score(self, save_dir, x_data, args):
  340. pass
  341. @staticmethod
  342. def _mse(y_predict, y_really): # 均方误差
  343. return mean_squared_error(y_really, y_predict)
  344. @staticmethod
  345. def _mae(y_predict, y_really): # 中值绝对误差
  346. return median_absolute_error(y_really, y_predict)
  347. @staticmethod
  348. def _r2_score(y_predict, y_really): # 中值绝对误差
  349. return r2_score(y_really, y_predict)
  350. def _rmse(self, y_predict, y_really): # 中值绝对误差
  351. return self._mse(y_predict, y_really) ** 0.5
  352. @staticmethod
  353. def _coefficient_clustering(x_data, y_predict):
  354. means_score = silhouette_score(x_data, y_predict)
  355. outline_score = silhouette_samples(x_data, y_predict)
  356. return means_score, outline_score
  357. @abstractmethod
  358. def predict(self, x_data, args, kwargs):
  359. pass
  360. @abstractmethod
  361. def data_visualization(self, save_dir, args, kwargs):
  362. pass
  363. @plugin_class_loading(get_path(r"template/machinelearning"))
  364. class StudyMachinebase(Machinebase):
  365. def fit_model(self, x_data, y_data, split=0.3, increment=True, **kwargs):
  366. y_data = y_data.ravel()
  367. try:
  368. if self.x_traindata is None or not increment:
  369. raise Exception
  370. self.x_traindata = np.vstack(x_data, self.x_traindata)
  371. self.y_traindata = np.vstack(y_data, self.y_traindata)
  372. except BaseException:
  373. self.x_traindata = x_data.copy()
  374. self.y_traindata = y_data.copy()
  375. x_train, x_test, y_train, y_test = train_test_split(
  376. x_data, y_data, test_size=split
  377. )
  378. try: # 增量式训练
  379. if not increment:
  380. raise Exception
  381. self.model.partial_fit(x_data, y_data)
  382. except BaseException:
  383. self.model.fit(self.x_traindata, self.y_traindata)
  384. train_score = self.model.score(x_train, y_train)
  385. test_score = self.model.score(x_test, y_test)
  386. self.have_fit = True
  387. return train_score, test_score
  388. def score(self, x_data, y_data):
  389. score = self.model.score(x_data, y_data)
  390. return score
  391. def class_score(self, save_dir, x_data: np.ndarray, y_really: np.ndarray):
  392. y_really: np.ndarray = y_really.ravel()
  393. y_predict: np.ndarray = self.predict(x_data)[0]
  394. accuracy = self._accuracy(y_predict, y_really)
  395. recall, class_list = self._macro(y_predict, y_really)
  396. precision, class_list = self._macro(y_predict, y_really)
  397. f1, class_list = self._macro(y_predict, y_really)
  398. confusion_matrix, class_list = self._confusion_matrix(
  399. y_predict, y_really)
  400. kappa = self._kappa_score(y_predict, y_really)
  401. class_list: list
  402. tab = Tab()
  403. def gauge_base(name: str, value: float) -> Gauge:
  404. c = (
  405. Gauge()
  406. .add("", [(name, round(value * 100, 2))], min_=0, max_=100)
  407. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  408. )
  409. return c
  410. tab.add(gauge_base("准确率", accuracy), "准确率")
  411. tab.add(gauge_base("kappa", kappa), "kappa")
  412. def bar_base(name, value) -> Bar:
  413. c = (
  414. Bar()
  415. .add_xaxis(class_list)
  416. .add_yaxis(name, value, **label_setting)
  417. .set_global_opts(
  418. title_opts=opts.TitleOpts(title=name), **global_setting
  419. )
  420. )
  421. return c
  422. tab.add(bar_base("精确率", precision.tolist()), "精确率")
  423. tab.add(bar_base("召回率", recall.tolist()), "召回率")
  424. tab.add(bar_base("F1", f1.tolist()), "F1")
  425. def heatmap_base(name, value, max_, min_, show) -> HeatMap:
  426. c = (
  427. HeatMap()
  428. .add_xaxis(class_list)
  429. .add_yaxis(
  430. name,
  431. class_list,
  432. value,
  433. label_opts=opts.LabelOpts(is_show=show, position="inside"),
  434. )
  435. .set_global_opts(
  436. title_opts=opts.TitleOpts(title=name),
  437. **global_setting,
  438. visualmap_opts=opts.VisualMapOpts(
  439. max_=max_, min_=min_, pos_right="3%"
  440. ),
  441. )
  442. )
  443. return c
  444. value = [
  445. [class_list[i], class_list[j], float(confusion_matrix[i, j])]
  446. for i in range(len(class_list))
  447. for j in range(len(class_list))
  448. ]
  449. tab.add(
  450. heatmap_base(
  451. "混淆矩阵",
  452. value,
  453. float(confusion_matrix.max()),
  454. float(confusion_matrix.min()),
  455. len(class_list) < 7,
  456. ),
  457. "混淆矩阵",
  458. )
  459. des_to_csv(save_dir, "混淆矩阵", confusion_matrix, class_list, class_list)
  460. des_to_csv(
  461. save_dir, "评分", [
  462. precision, recall, f1], class_list, [
  463. "精确率", "召回率", "F1"])
  464. save = save_dir + rf"{os.sep}分类模型评估.HTML"
  465. tab.render(save)
  466. return save,
  467. def regression_score(
  468. self,
  469. save_dir,
  470. x_data: np.ndarray,
  471. y_really: np.ndarray):
  472. y_really = y_really.ravel()
  473. y_predict = self.predict(x_data)[0]
  474. tab = Tab()
  475. mse = self._mse(y_predict, y_really)
  476. mae = self._mae(y_predict, y_really)
  477. r2_score = self._r2_score(y_predict, y_really)
  478. rmse = self._rmse(y_predict, y_really)
  479. tab.add(make_tab(["MSE", "MAE", "RMSE", "r2_Score"], [
  480. [mse, mae, rmse, r2_score]]), "评估数据", )
  481. save = save_dir + rf"{os.sep}回归模型评估.HTML"
  482. tab.render(save)
  483. return save,
  484. def clusters_score(self, save_dir, x_data: np.ndarray, *args):
  485. y_predict = self.predict(x_data)[0]
  486. tab = Tab()
  487. coefficient, coefficient_array = self._coefficient_clustering(
  488. x_data, y_predict)
  489. def gauge_base(name: str, value: float) -> Gauge:
  490. c = (
  491. Gauge()
  492. .add(
  493. "",
  494. [(name, round(value * 100, 2))],
  495. min_=0,
  496. max_=10 ** (judging_digits(value * 100)),
  497. )
  498. .set_global_opts(title_opts=opts.TitleOpts(title=name))
  499. )
  500. return c
  501. def bar_base(name, value, xaxis) -> Bar:
  502. c = (
  503. Bar()
  504. .add_xaxis(xaxis)
  505. .add_yaxis(name, value, **label_setting)
  506. .set_global_opts(
  507. title_opts=opts.TitleOpts(title=name), **global_setting
  508. )
  509. )
  510. return c
  511. tab.add(gauge_base("平均轮廓系数", coefficient), "平均轮廓系数")
  512. def bar_(coefficient_array, name="数据轮廓系数"):
  513. xaxis = [f"数据{i}" for i in range(len(coefficient_array))]
  514. value = coefficient_array.tolist()
  515. tab.add(bar_base(name, value, xaxis), name)
  516. n = 20
  517. if len(coefficient_array) <= n:
  518. bar_(coefficient_array)
  519. elif len(coefficient_array) <= n ** 2:
  520. a = 0
  521. while a <= len(coefficient_array):
  522. b = a + n
  523. if b >= len(coefficient_array):
  524. b = len(coefficient_array) + 1
  525. cofe_array = coefficient_array[a:b]
  526. bar_(cofe_array, f"{a}-{b}数据轮廓系数")
  527. a += n
  528. else:
  529. split = np.hsplit(coefficient_array, n)
  530. a = 0
  531. for cofe_array in split:
  532. bar_(cofe_array, f"{a}%-{a + n}%数据轮廓系数")
  533. a += n
  534. save = save_dir + rf"{os.sep}聚类模型评估.HTML"
  535. tab.render(save)
  536. return save,
  537. def predict(self, x_data, *args, **kwargs):
  538. self.x_testdata = x_data.copy()
  539. y_predict = self.model.predict(x_data,)
  540. self.y_testdata = y_predict.copy()
  541. self.have_predict = True
  542. return y_predict, "预测"
  543. def data_visualization(self, save_dir, *args, **kwargs):
  544. return save_dir,
  545. class PrepBase(StudyMachinebase): # 不允许第二次训练
  546. def __init__(self, *args, **kwargs):
  547. super(PrepBase, self).__init__(*args, **kwargs)
  548. self.model = None
  549. def fit_model(self, x_data, y_data, increment=True, *args, **kwargs):
  550. if not self.have_predict: # 不允许第二次训练
  551. y_data = y_data.ravel()
  552. try:
  553. if self.x_traindata is None or not increment:
  554. raise Exception
  555. self.x_traindata = np.vstack(x_data, self.x_traindata)
  556. self.y_traindata = np.vstack(y_data, self.y_traindata)
  557. except BaseException:
  558. self.x_traindata = x_data.copy()
  559. self.y_traindata = y_data.copy()
  560. try: # 增量式训练
  561. if not increment:
  562. raise Exception
  563. self.model.partial_fit(x_data, y_data)
  564. except BaseException:
  565. self.model.fit(self.x_traindata, self.y_traindata)
  566. self.have_fit = True
  567. return "None", "None"
  568. def predict(self, x_data, *args, **kwargs):
  569. self.x_testdata = x_data.copy()
  570. x_predict = self.model.transform(x_data)
  571. self.y_testdata = x_predict.copy()
  572. self.have_predict = True
  573. return x_predict, "特征工程"
  574. def score(self, x_data, y_data):
  575. return "None" # 没有score
  576. class Unsupervised(PrepBase): # 无监督,不允许第二次训练
  577. def fit_model(self, x_data, increment=True, *args, **kwargs):
  578. if not self.have_predict: # 不允许第二次训练
  579. self.y_traindata = None
  580. try:
  581. if self.x_traindata is None or not increment:
  582. raise Exception
  583. self.x_traindata = np.vstack(x_data, self.x_traindata)
  584. except BaseException:
  585. self.x_traindata = x_data.copy()
  586. try: # 增量式训练
  587. if not increment:
  588. raise Exception
  589. self.model.partial_fit(x_data)
  590. except BaseException:
  591. self.model.fit(self.x_traindata, self.y_traindata)
  592. self.have_fit = True
  593. return "None", "None"
  594. class UnsupervisedModel(PrepBase): # 无监督
  595. def fit_model(self, x_data, increment=True, *args, **kwargs):
  596. self.y_traindata = None
  597. try:
  598. if self.x_traindata is None or not increment:
  599. raise Exception
  600. self.x_traindata = np.vstack(x_data, self.x_traindata)
  601. except BaseException:
  602. self.x_traindata = x_data.copy()
  603. try: # 增量式训练
  604. if not increment:
  605. raise Exception
  606. self.model.partial_fit(x_data)
  607. except BaseException:
  608. self.model.fit(self.x_traindata, self.y_traindata)
  609. self.have_fit = True
  610. return "None", "None"
  611. @plugin_class_loading(get_path(r"template/machinelearning"))
  612. class ToPyebase(StudyMachinebase):
  613. def __init__(self, model, *args, **kwargs):
  614. super(ToPyebase, self).__init__(*args, **kwargs)
  615. self.model = None
  616. # 记录这两个是为了克隆
  617. self.k = {}
  618. self.model_Name = model
  619. def fit_model(self, x_data, y_data, *args, **kwargs):
  620. self.x_traindata = x_data.copy()
  621. self.y_traindata = y_data.ravel().copy()
  622. self.have_fit = True
  623. return "None", "None"
  624. def predict(self, x_data, *args, **kwargs):
  625. self.have_predict = True
  626. return np.array([]), "请使用训练"
  627. def score(self, x_data, y_data):
  628. return "None" # 没有score
  629. class DataAnalysis(ToPyebase): # 数据分析
  630. def data_visualization(self, save_dir, *args, **kwargs):
  631. tab = Tab()
  632. data = self.x_traindata
  633. def cumulative_calculation(tab_data, func, name, render_tab):
  634. sum_list = []
  635. for i in range(len(tab_data)): # 按行迭代数据
  636. sum_list.append([])
  637. for a in range(len(tab_data[i])):
  638. s = num_str(func(tab_data[: i + 1, a]), 8)
  639. sum_list[-1].append(s)
  640. des_to_csv(save_dir, f"{name}", sum_list)
  641. render_tab.add(
  642. make_tab([f"[{i}]" for i in range(len(sum_list[0]))], sum_list),
  643. f"{name}",
  644. )
  645. def geometric_mean(x):
  646. return np.power(np.prod(x), 1 / len(x)) # 几何平均数
  647. def square_mean(x):
  648. return np.sqrt(np.sum(np.power(x, 2)) / len(x)) # 平方平均数
  649. def harmonic_mean(x):
  650. return len(x) / np.sum(np.power(x, -1)) # 调和平均数
  651. cumulative_calculation(data, np.sum, "累计求和", tab)
  652. cumulative_calculation(data, np.var, "累计方差", tab)
  653. cumulative_calculation(data, np.std, "累计标准差", tab)
  654. cumulative_calculation(data, np.mean, "累计算术平均值", tab)
  655. cumulative_calculation(data, geometric_mean, "累计几何平均值", tab)
  656. cumulative_calculation(data, square_mean, "累计平方平均值", tab)
  657. cumulative_calculation(data, harmonic_mean, "累计调和平均值", tab)
  658. cumulative_calculation(data, np.median, "累计中位数", tab)
  659. cumulative_calculation(data, np.max, "累计最大值", tab)
  660. cumulative_calculation(data, np.min, "累计最小值", tab)
  661. save = save_dir + rf"{os.sep}数据分析.HTML"
  662. tab.render(save) # 生成HTML
  663. return save,
  664. class Corr(ToPyebase): # 相关性和协方差
  665. def data_visualization(self, save_dir, *args, **kwargs):
  666. tab = Tab()
  667. data = DataFrame(self.x_traindata)
  668. corr: np.ndarray = data.corr().to_numpy() # 相关性
  669. cov: np.ndarray = data.cov().to_numpy() # 协方差
  670. def heat_map(data, name: str, max_, min_):
  671. x = [f"特征[{i}]" for i in range(len(data))]
  672. y = [f"特征[{i}]" for i in range(len(data[0]))]
  673. value = [
  674. (f"特征[{i}]", f"特征[{j}]", float(data[i][j]))
  675. for i in range(len(data))
  676. for j in range(len(data[i]))
  677. ]
  678. c = (
  679. HeatMap()
  680. .add_xaxis(x)
  681. # 如果特征太多则不显示标签
  682. .add_yaxis(
  683. f"数据",
  684. y,
  685. value,
  686. label_opts=opts.LabelOpts(
  687. is_show=True if len(x) <= 10 else False, position="inside"
  688. ),
  689. )
  690. .set_global_opts(
  691. title_opts=opts.TitleOpts(title="矩阵热力图"),
  692. **global_not_legend,
  693. yaxis_opts=opts.AxisOpts(
  694. is_scale=True, type_="category"
  695. ), # 'category'
  696. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  697. visualmap_opts=opts.VisualMapOpts(
  698. is_show=True, max_=max_, min_=min_, pos_right="3%"
  699. ),
  700. ) # 显示
  701. )
  702. tab.add(c, name)
  703. heat_map(corr, "相关性热力图", 1, -1)
  704. heat_map(cov, "协方差热力图", float(cov.max()), float(cov.min()))
  705. des_to_csv(save_dir, f"相关性矩阵", corr)
  706. des_to_csv(save_dir, f"协方差矩阵", cov)
  707. save = save_dir + rf"{os.sep}数据相关性.HTML"
  708. tab.render(save) # 生成HTML
  709. return save,
  710. class ViewData(ToPyebase): # 绘制预测型热力图
  711. def __init__(
  712. self, args_use, learner, *args, **kwargs
  713. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  714. super(ViewData, self).__init__(args_use, learner, *args, **kwargs)
  715. self.model = learner.Model
  716. self.Select_Model = None
  717. self.have_fit = learner.have_Fit
  718. self.model_Name = "Select_Model"
  719. self.learner = learner
  720. self.learner_name = learner.Model_Name
  721. def fit_model(self, *args, **kwargs):
  722. self.have_fit = True
  723. return "None", "None"
  724. def predict(self, x_data, add_func=None, *args, **kwargs):
  725. x_traindata = self.learner.x_traindata
  726. y_traindata = self.learner.y_traindata
  727. x_name = self.learner_name
  728. if x_traindata is not None:
  729. add_func(x_traindata, f"{x_name}:x训练数据")
  730. try:
  731. x_testdata = self.x_testdata
  732. if x_testdata is not None:
  733. add_func(x_testdata, f"{x_name}:x测试数据")
  734. except BaseException:
  735. pass
  736. try:
  737. y_testdata = self.y_testdata.copy()
  738. if y_testdata is not None:
  739. add_func(y_testdata, f"{x_name}:y测试数据")
  740. except BaseException:
  741. pass
  742. self.have_fit = True
  743. if y_traindata is None:
  744. return np.array([]), "y训练数据"
  745. return y_traindata, "y训练数据"
  746. def data_visualization(self, save_dir, *args, **kwargs):
  747. return save_dir,
  748. class MatrixScatter(ToPyebase): # 矩阵散点图
  749. def data_visualization(self, save_dir, *args, **kwargs):
  750. tab = Tab()
  751. data = self.x_traindata
  752. if data.ndim <= 2: # 维度为2
  753. c = (
  754. Scatter()
  755. .add_xaxis([f"{i}" for i in range(data.shape[1])])
  756. .set_global_opts(
  757. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  758. )
  759. )
  760. if data.ndim == 2:
  761. for num in range(len(data)):
  762. i = data[num]
  763. c.add_yaxis(f"{num}", [[f"{num}", x]
  764. for x in i], color="#FFFFFF")
  765. else:
  766. c.add_yaxis(f"0", [[0, x] for x in data], color="#FFFFFF")
  767. c.set_series_opts(
  768. label_opts=opts.LabelOpts(
  769. is_show=True,
  770. color="#000000",
  771. position="inside",
  772. formatter=JsCode("function(params){return params.data[2];}"),
  773. ))
  774. elif data.ndim == 3:
  775. c = Scatter3D().set_global_opts(
  776. title_opts=opts.TitleOpts(title=f"矩阵散点图"), **global_not_legend
  777. )
  778. for num in range(len(data)):
  779. i = data[num]
  780. for s_num in range(len(i)):
  781. s = i[s_num]
  782. y_data = [[num, s_num, x, float(s[x])]
  783. for x in range(len(s))]
  784. c.add(
  785. f"{num}",
  786. y_data,
  787. zaxis3d_opts=opts.Axis3DOpts(
  788. type_="category"))
  789. c.set_series_opts(
  790. label_opts=opts.LabelOpts(
  791. is_show=True,
  792. color="#000000",
  793. position="inside",
  794. formatter=JsCode("function(params){return params.data[3];}"),
  795. ))
  796. else:
  797. c = Scatter()
  798. tab.add(c, "矩阵散点图")
  799. save = save_dir + rf"{os.sep}矩阵散点图.HTML"
  800. tab.render(save) # 生成HTML
  801. return save,
  802. class ClusterTree(ToPyebase): # 聚类树状图
  803. def data_visualization(self, save_dir, *args, **kwargs):
  804. tab = Tab()
  805. x_data = self.x_traindata
  806. linkage_array = ward(x_data) # self.y_traindata是结果
  807. dendrogram(linkage_array)
  808. plt.savefig(save_dir + rf"{os.sep}Cluster_graph.png")
  809. image = Image()
  810. image.add(src=save_dir + rf"{os.sep}Cluster_graph.png",).set_global_opts(
  811. title_opts=opts.ComponentTitleOpts(title="聚类树状图")
  812. )
  813. tab.add(image, "聚类树状图")
  814. save = save_dir + rf"{os.sep}聚类树状图.HTML"
  815. tab.render(save) # 生成HTML
  816. return save,
  817. class ClassBar(ToPyebase): # 类型柱状图
  818. def data_visualization(self, save_dir, *args, **kwargs):
  819. tab = Tab()
  820. x_data: np.ndarray = self.x_traindata.transpose()
  821. y_data: np.ndarray = self.y_traindata
  822. class_: list = np.unique(y_data).tolist() # 类型
  823. class_list = []
  824. for n_class in class_: # 生成class_list(class是1,,也就是二维的,下面会压缩成一维)
  825. class_list.append(y_data == n_class)
  826. for num_i in range(len(x_data)): # 迭代每一个特征
  827. i = x_data[num_i]
  828. i_con = is_continuous(i)
  829. if i_con and len(i) >= 11:
  830. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  831. c_list = [[0] * 10 for _ in class_list]
  832. start = i.min()
  833. end = i.max()
  834. n = (end - start) / 10 # 生成10条柱子
  835. x_axis = [] # x轴
  836. iter_num = 0 # 迭代到第n个
  837. while iter_num <= 9: # 把每个特征分为10类进行迭代
  838. # x_axis添加数据
  839. x_axis.append(
  840. f"({iter_num})[{round(start, 2)}-"
  841. f"{round((start + n) if (start + n) <= end or not iter_num == 9 else end, 2)}]")
  842. try:
  843. if iter_num == 9:
  844. raise Exception # 执行到第10次时,直接获取剩下的所有
  845. s = (start <= i) == (i < end) # 布尔索引
  846. except BaseException: # 因为start + n有超出end的风险
  847. s = (start <= i) == (i <= end) # 布尔索引
  848. # n_data = i[s] # 取得现在的特征数据
  849. for num in range(len(class_list)): # 根据类别进行迭代
  850. # 取得布尔数组:y_data == n_class也就是输出值为指定类型的bool矩阵,用于切片
  851. now_class: list = class_list[num]
  852. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  853. bool_class = now_class[s].ravel()
  854. # 用len计数 c_list = [[class1的数据],[class2的数据],[]]
  855. c_list[num][iter_num] = int(np.sum(bool_class))
  856. iter_num += 1
  857. start += n
  858. else:
  859. iter_np = np.unique(i)
  860. # 存放绘图数据,每一层列表是一个类(leg),第二层是每个x_data
  861. c_list = [[0] * len(iter_np) for _ in class_list]
  862. x_axis = [] # 添加x轴数据
  863. for i_num in range(len(iter_np)): # 迭代每一个i(不重复)
  864. i_data = iter_np[i_num]
  865. # n_data= i[i == i_data]#取得现在特征数据
  866. x_axis.append(f"[{i_data}]")
  867. for num in range(len(class_list)): # 根据类别进行迭代
  868. now_class = class_list[num] # 取得class_list的布尔数组
  869. # 切片成和n_data一样的位置一样的形状(now_class就是一个bool矩阵)
  870. bool_class = now_class[i == i_data]
  871. # 用len计数 c_list = [[class1的数据],[class2的数据],[]]
  872. c_list[num][i_num] = len(np.sum(bool_class).tolist())
  873. c = (
  874. Bar()
  875. .add_xaxis(x_axis)
  876. .set_global_opts(
  877. title_opts=opts.TitleOpts(title="类型-特征统计柱状图"),
  878. **global_setting,
  879. xaxis_opts=opts.AxisOpts(type_="category"),
  880. yaxis_opts=opts.AxisOpts(type_="value"),
  881. )
  882. )
  883. y_axis = []
  884. for i in range(len(c_list)):
  885. y_axis.append(f"{class_[i]}")
  886. c.add_yaxis(f"{class_[i]}", c_list[i], **label_setting)
  887. des_to_csv(
  888. save_dir,
  889. f"类型-[{num_i}]特征统计柱状图",
  890. c_list,
  891. x_axis,
  892. y_axis)
  893. tab.add(c, f"类型-[{num_i}]特征统计柱状图")
  894. # 未完成
  895. save = save_dir + rf"{os.sep}特征统计.HTML"
  896. tab.render(save) # 生成HTML
  897. return save,
  898. class NumpyHeatMap(ToPyebase): # Numpy矩阵绘制热力图
  899. def data_visualization(self, save_dir, *args, **kwargs):
  900. tab = Tab()
  901. data = self.x_traindata
  902. x = [f"横[{i}]" for i in range(len(data))]
  903. y = [f"纵[{i}]" for i in range(len(data[0]))]
  904. value = [
  905. (f"横[{i}]", f"纵[{j}]", float(data[i][j]))
  906. for i in range(len(data))
  907. for j in range(len(data[i]))
  908. ]
  909. c = (
  910. HeatMap()
  911. .add_xaxis(x)
  912. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  913. .set_global_opts(
  914. title_opts=opts.TitleOpts(title="矩阵热力图"),
  915. **global_not_legend,
  916. yaxis_opts=opts.AxisOpts(
  917. is_scale=True, type_="category"), # 'category'
  918. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  919. visualmap_opts=opts.VisualMapOpts(
  920. is_show=True,
  921. max_=float(data.max()),
  922. min_=float(data.min()),
  923. pos_right="3%",
  924. ),
  925. ) # 显示
  926. )
  927. tab.add(c, "矩阵热力图")
  928. tab.add(make_tab(x, data.transpose().tolist()), f"矩阵热力图:表格")
  929. save = save_dir + rf"{os.sep}矩阵热力图.HTML"
  930. tab.render(save) # 生成HTML
  931. return save,
  932. class PredictiveHeatmapBase(ToPyebase): # 绘制预测型热力图
  933. def __init__(
  934. self, args_use, learner, *args, **kwargs
  935. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  936. super(
  937. PredictiveHeatmapBase,
  938. self).__init__(
  939. args_use,
  940. learner,
  941. *
  942. args,
  943. **kwargs)
  944. self.model = learner.Model
  945. self.select_model = None
  946. self.have_fit = learner.have_Fit
  947. self.model_Name = "Select_Model"
  948. self.learner = learner
  949. self.x_traindata = learner.x_traindata.copy()
  950. self.y_traindata = learner.y_traindata.copy()
  951. self.means = []
  952. def fit_model(self, x_data, *args, **kwargs):
  953. try:
  954. self.means = x_data.ravel()
  955. except BaseException:
  956. pass
  957. self.have_fit = True
  958. return "None", "None"
  959. def data_visualization(
  960. self,
  961. save_dir,
  962. decision_boundary_func=None,
  963. prediction_boundary_func=None,
  964. *args,
  965. **kwargs,
  966. ):
  967. tab = Tab()
  968. y = self.y_traindata
  969. x_data = self.x_traindata
  970. try: # 如果没有class
  971. class_ = self.model.classes_.tolist()
  972. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  973. # 获取数据
  974. get, x_means, x_range, data_type = training_visualization(
  975. x_data, class_, y)
  976. # 可使用自带的means,并且nan表示跳过
  977. for i in range(min([len(x_means), len(self.means)])):
  978. try:
  979. g = self.means[i]
  980. if g == np.nan:
  981. raise Exception
  982. x_means[i] = g
  983. except BaseException:
  984. pass
  985. get = decision_boundary_func(
  986. x_range, x_means, self.learner.predict, class_, data_type
  987. )
  988. for i in range(len(get)):
  989. tab.add(get[i], f"{i}预测热力图")
  990. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  991. data = class_ + [f"{i}" for i in x_means]
  992. c = Table().add(headers=heard, rows=[data])
  993. tab.add(c, "数据表")
  994. except BaseException:
  995. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  996. get = prediction_boundary_func(
  997. x_range, x_means, self.learner.predict, data_type
  998. )
  999. for i in range(len(get)):
  1000. tab.add(get[i], f"{i}预测热力图")
  1001. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1002. data = [f"{i}" for i in x_means]
  1003. c = Table().add(headers=heard, rows=[data])
  1004. tab.add(c, "数据表")
  1005. save = save_dir + rf"{os.sep}预测热力图.HTML"
  1006. tab.render(save) # 生成HTML
  1007. return save,
  1008. class PredictiveHeatmap(PredictiveHeatmapBase): # 绘制预测型热力图
  1009. def data_visualization(self, save_dir, *args, **kwargs):
  1010. return super().data_visualization(
  1011. save_dir, decision_boundary, prediction_boundary
  1012. )
  1013. class PredictiveHeatmapMore(PredictiveHeatmapBase): # 绘制预测型热力图_More
  1014. def data_visualization(self, save_dir, *args, **kwargs):
  1015. return super().data_visualization(
  1016. save_dir, decision_boundary_more, prediction_boundary_more
  1017. )
  1018. @plugin_class_loading(get_path(r"template/machinelearning"))
  1019. class NearFeatureScatterClassMore(ToPyebase):
  1020. def data_visualization(self, save_dir, *args, **kwargs):
  1021. tab = Tab()
  1022. x_data = self.x_traindata
  1023. y = self.y_traindata
  1024. class_ = np.unique(y).ravel().tolist()
  1025. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  1026. get, x_means, x_range, data_type = training_visualization_more_no_center(
  1027. x_data, class_, y)
  1028. for i in range(len(get)):
  1029. tab.add(get[i], f"{i}训练数据散点图")
  1030. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1031. data = class_ + [f"{i}" for i in x_means]
  1032. c = Table().add(headers=heard, rows=[data])
  1033. tab.add(c, "数据表")
  1034. save = save_dir + rf"{os.sep}数据特征散点图(分类).HTML"
  1035. tab.render(save) # 生成HTML
  1036. return save,
  1037. @plugin_class_loading(get_path(r"template/machinelearning"))
  1038. class NearFeatureScatterMore(ToPyebase):
  1039. def data_visualization(self, save_dir, *args, **kwargs):
  1040. tab = Tab()
  1041. x_data = self.x_traindata
  1042. x_means = quick_stats(x_data).get()[0]
  1043. get_y = feature_visualization(x_data, "数据散点图") # 转换
  1044. for i in range(len(get_y)):
  1045. tab.add(get_y[i], f"[{i}]数据x-x散点图")
  1046. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1047. data = [f"{i}" for i in x_means]
  1048. c = Table().add(headers=heard, rows=[data])
  1049. tab.add(c, "数据表")
  1050. save = save_dir + rf"{os.sep}数据特征散点图.HTML"
  1051. tab.render(save) # 生成HTML
  1052. return save,
  1053. class NearFeatureScatterClass(ToPyebase): # 临近特征散点图:分类数据
  1054. def data_visualization(self, save_dir, *args, **kwargs):
  1055. # 获取数据
  1056. class_ = np.unique(self.y_traindata).ravel().tolist()
  1057. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1058. tab = Tab()
  1059. y = self.y_traindata
  1060. x_data = self.x_traindata
  1061. get, x_means, x_range, data_type = training_visualization(
  1062. x_data, class_, y)
  1063. for i in range(len(get)):
  1064. tab.add(get[i], f"{i}临近特征散点图")
  1065. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1066. data = class_ + [f"{i}" for i in x_means]
  1067. c = Table().add(headers=heard, rows=[data])
  1068. tab.add(c, "数据表")
  1069. save = save_dir + rf"{os.sep}临近数据特征散点图(分类).HTML"
  1070. tab.render(save) # 生成HTML
  1071. return save,
  1072. class NearFeatureScatter(ToPyebase): # 临近特征散点图:连续数据
  1073. def data_visualization(self, save_dir, *args, **kwargs):
  1074. tab = Tab()
  1075. x_data = self.x_traindata.transpose()
  1076. get, x_means, x_range, data_type = training_visualization_no_class(
  1077. x_data)
  1078. for i in range(len(get)):
  1079. tab.add(get[i], f"{i}临近特征散点图")
  1080. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1081. data = [f"{i}" for i in x_means]
  1082. tab.add(make_tab(columns, [data]), "数据表")
  1083. save = save_dir + rf"{os.sep}临近数据特征散点图.HTML"
  1084. tab.render(save) # 生成HTML
  1085. return save,
  1086. class FeatureScatterYX(ToPyebase): # y-x图
  1087. def data_visualization(self, save_dir, *args, **kwargs):
  1088. tab = Tab()
  1089. x_data = self.x_traindata
  1090. y = self.y_traindata
  1091. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1092. for i in range(len(get)):
  1093. tab.add(get[i], f"{i}特征x-y散点图")
  1094. columns = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1095. data = [f"{i}" for i in x_means]
  1096. tab.add(make_tab(columns, [data]), "数据表")
  1097. save = save_dir + rf"{os.sep}特征y-x图像.HTML"
  1098. tab.render(save) # 生成HTML
  1099. return save,
  1100. @plugin_class_loading(get_path(r"template/machinelearning"))
  1101. class LineModel(StudyMachinebase):
  1102. def __init__(
  1103. self, args_use, model, *args, **kwargs
  1104. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1105. super(LineModel, self).__init__(*args, **kwargs)
  1106. all_model = {
  1107. "Line": LinearRegression,
  1108. "Ridge": Ridge,
  1109. "Lasso": Lasso}[model]
  1110. if model == "Line":
  1111. self.model = all_model()
  1112. self.k = {}
  1113. else:
  1114. self.model = all_model(
  1115. alpha=args_use["alpha"], max_iter=args_use["max_iter"]
  1116. )
  1117. self.k = {
  1118. "alpha": args_use["alpha"],
  1119. "max_iter": args_use["max_iter"]}
  1120. # 记录这两个是为了克隆
  1121. self.Alpha = args_use["alpha"]
  1122. self.max_iter = args_use["max_iter"]
  1123. self.model_Name = model
  1124. def data_visualization(self, save_dir, *args, **kwargs):
  1125. tab = Tab()
  1126. x_data = self.x_traindata
  1127. y = self.y_traindata
  1128. w_list = self.model.coef_.tolist()
  1129. w_heard = [f"系数w[{i}]" for i in range(len(w_list))]
  1130. b = self.model.intercept_.tolist()
  1131. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1132. get_line = regress_w(x_data, w_list, b, x_means.copy())
  1133. for i in range(len(get)):
  1134. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1135. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1136. for i in range(len(get)):
  1137. tab.add(get[i], f"{i}预测热力图")
  1138. tab.add(coefficient_scatter_plot(w_heard, w_list), "系数w散点图")
  1139. tab.add(coefficient_bar_plot(w_heard, self.model.coef_), "系数柱状图")
  1140. columns = [
  1141. f"普适预测第{i}特征" for i in range(
  1142. len(x_means))] + w_heard + ["截距b"]
  1143. data = [f"{i}" for i in x_means] + w_list + [b]
  1144. if self.model_Name != "Line":
  1145. columns += ["阿尔法", "最大迭代次数"]
  1146. data += [self.model.alpha, self.model.max_iter]
  1147. tab.add(make_tab(columns, [data]), "数据表")
  1148. des_to_csv(
  1149. save_dir,
  1150. "系数表",
  1151. [w_list + [b]],
  1152. [f"系数W[{i}]" for i in range(len(w_list))] + ["截距"],
  1153. )
  1154. des_to_csv(
  1155. save_dir,
  1156. "预测表",
  1157. [[f"{i}" for i in x_means]],
  1158. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1159. )
  1160. save = save_dir + rf"{os.sep}线性回归模型.HTML"
  1161. tab.render(save) # 生成HTML
  1162. return save,
  1163. @plugin_class_loading(get_path(r"template/machinelearning"))
  1164. class LogisticregressionModel(StudyMachinebase):
  1165. def __init__(
  1166. self, args_use, model, *args, **kwargs
  1167. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1168. super(LogisticregressionModel, self).__init__(*args, **kwargs)
  1169. self.model = LogisticRegression(
  1170. C=args_use["C"], max_iter=args_use["max_iter"])
  1171. # 记录这两个是为了克隆
  1172. self.C = args_use["C"]
  1173. self.max_iter = args_use["max_iter"]
  1174. self.k = {"C": args_use["C"], "max_iter": args_use["max_iter"]}
  1175. self.model_Name = model
  1176. def data_visualization(self, save_dir="render.html", *args, **kwargs):
  1177. # 获取数据
  1178. w_array = self.model.coef_
  1179. w_list = w_array.tolist() # 变为表格
  1180. b = self.model.intercept_
  1181. c = self.model.C
  1182. max_iter = self.model.max_iter
  1183. class_ = self.model.classes_.tolist()
  1184. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1185. tab = Tab()
  1186. y = self.y_traindata
  1187. x_data = self.x_traindata
  1188. get, x_means, x_range, data_type = training_visualization(
  1189. x_data, class_, y)
  1190. get_line = training_w(x_data, class_, y, w_list, b, x_means.copy())
  1191. for i in range(len(get)):
  1192. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1193. for i in range(len(w_list)):
  1194. w = w_list[i]
  1195. w_heard = [f"系数w[{i},{j}]" for j in range(len(w))]
  1196. tab.add(coefficient_scatter_plot(w_heard, w), f"系数w[{i}]散点图")
  1197. tab.add(coefficient_bar_plot(w_heard, w_array[i]), f"系数w[{i}]柱状图")
  1198. columns = class_heard + \
  1199. [f"截距{i}" for i in range(len(b))] + ["C", "最大迭代数"]
  1200. data = class_ + b.tolist() + [c, max_iter]
  1201. c = Table().add(headers=columns, rows=[data])
  1202. tab.add(c, "数据表")
  1203. c = Table().add(
  1204. headers=[f"系数W[{i}]" for i in range(len(w_list[0]))], rows=w_list
  1205. )
  1206. tab.add(c, "系数数据表")
  1207. c = Table().add(
  1208. headers=[f"普适预测第{i}特征" for i in range(len(x_means))],
  1209. rows=[[f"{i}" for i in x_means]],
  1210. )
  1211. tab.add(c, "普适预测数据表")
  1212. des_to_csv(save_dir, "系数表", w_list, [
  1213. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1214. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1215. des_to_csv(
  1216. save_dir,
  1217. "预测表",
  1218. [[f"{i}" for i in x_means]],
  1219. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1220. )
  1221. save = save_dir + rf"{os.sep}逻辑回归.HTML"
  1222. tab.render(save) # 生成HTML
  1223. return save,
  1224. class CategoricalData: # 数据统计助手
  1225. def __init__(self):
  1226. self.x_means = []
  1227. self.x_range = []
  1228. self.data_type = []
  1229. def __call__(self, x1, *args, **kwargs):
  1230. get = self.is_continuous(x1)
  1231. return get
  1232. def is_continuous(self, x1: np.array):
  1233. try:
  1234. x1_con = is_continuous(x1)
  1235. if x1_con:
  1236. self.x_means.append(np.mean(x1))
  1237. self.add_range(x1)
  1238. else:
  1239. raise Exception
  1240. return x1_con
  1241. except BaseException: # 找出出现次数最多的元素
  1242. new = np.unique(x1) # 去除相同的元素
  1243. count_list = []
  1244. for i in new:
  1245. count_list.append(np.sum(x1 == i))
  1246. index = count_list.index(max(count_list)) # 找出最大值的索引
  1247. self.x_means.append(x1[index])
  1248. self.add_range(x1, False)
  1249. return False
  1250. def add_range(self, x1: np.array, range_=True):
  1251. try:
  1252. if not range_:
  1253. raise Exception
  1254. min_ = int(x1.min()) - 1
  1255. max_ = int(x1.max()) + 1
  1256. # 不需要复制列表
  1257. self.x_range.append([min_, max_])
  1258. self.data_type.append(1)
  1259. except BaseException:
  1260. self.x_range.append(list(set(x1.tolist()))) # 去除多余元素
  1261. self.data_type.append(2)
  1262. def get(self):
  1263. return self.x_means, self.x_range, self.data_type
  1264. @plugin_class_loading(get_path(r"template/machinelearning"))
  1265. class KnnModel(StudyMachinebase):
  1266. def __init__(
  1267. self, args_use, model, *args, **kwargs
  1268. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1269. super(KnnModel, self).__init__(*args, **kwargs)
  1270. all_model = {
  1271. "Knn_class": KNeighborsClassifier,
  1272. "Knn": KNeighborsRegressor}[model]
  1273. self.model = all_model(
  1274. p=args_use["p"],
  1275. n_neighbors=args_use["n_neighbors"])
  1276. # 记录这两个是为了克隆
  1277. self.n_neighbors = args_use["n_neighbors"]
  1278. self.p = args_use["p"]
  1279. self.k = {"n_neighbors": args_use["n_neighbors"], "p": args_use["p"]}
  1280. self.model_Name = model
  1281. def data_visualization(self, save_dir, *args, **kwargs):
  1282. tab = Tab()
  1283. y = self.y_traindata
  1284. x_data = self.x_traindata
  1285. y_test = self.y_testdata
  1286. x_test = self.x_testdata
  1287. if self.model_Name == "Knn_class":
  1288. class_ = self.model.classes_.tolist()
  1289. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1290. get, x_means, x_range, data_type = training_visualization(
  1291. x_data, class_, y)
  1292. for i in range(len(get)):
  1293. tab.add(get[i], f"{i}训练数据散点图")
  1294. if y_test is not None:
  1295. get = training_visualization(x_test, class_, y_test)[0]
  1296. for i in range(len(get)):
  1297. tab.add(get[i], f"{i}测试数据散点图")
  1298. get = decision_boundary(
  1299. x_range, x_means, self.predict, class_, data_type)
  1300. for i in range(len(get)):
  1301. tab.add(get[i], f"{i}预测热力图")
  1302. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1303. data = class_ + [f"{i}" for i in x_means]
  1304. c = Table().add(headers=heard, rows=[data])
  1305. tab.add(c, "数据表")
  1306. else:
  1307. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1308. for i in range(len(get)):
  1309. tab.add(get[i], f"{i}训练数据散点图")
  1310. get = regress_visualization(x_test, y_test)[0]
  1311. for i in range(len(get)):
  1312. tab.add(get[i], f"{i}测试数据类型图")
  1313. get = prediction_boundary(
  1314. x_range, x_means, self.predict, data_type)
  1315. for i in range(len(get)):
  1316. tab.add(get[i], f"{i}预测热力图")
  1317. heard = [f"普适预测第{i}特征" for i in range(len(x_means))]
  1318. data = [f"{i}" for i in x_means]
  1319. c = Table().add(headers=heard, rows=[data])
  1320. tab.add(c, "数据表")
  1321. des_to_csv(
  1322. save_dir,
  1323. "预测表",
  1324. [[f"{i}" for i in x_means]],
  1325. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1326. )
  1327. save = save_dir + rf"{os.sep}K.HTML"
  1328. tab.render(save) # 生成HTML
  1329. return save,
  1330. @plugin_class_loading(get_path(r"template/machinelearning"))
  1331. class TreeModel(StudyMachinebase):
  1332. def __init__(
  1333. self, args_use, model, *args, **kwargs
  1334. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1335. super(TreeModel, self).__init__(*args, **kwargs)
  1336. all_model = {
  1337. "Tree_class": DecisionTreeClassifier,
  1338. "Tree": DecisionTreeRegressor,
  1339. }[model]
  1340. self.model = all_model(
  1341. criterion=args_use["criterion"],
  1342. splitter=args_use["splitter"],
  1343. max_features=args_use["max_features"],
  1344. max_depth=args_use["max_depth"],
  1345. min_samples_split=args_use["min_samples_split"],
  1346. )
  1347. # 记录这两个是为了克隆
  1348. self.criterion = args_use["criterion"]
  1349. self.splitter = args_use["splitter"]
  1350. self.max_features = args_use["max_features"]
  1351. self.max_depth = args_use["max_depth"]
  1352. self.min_samples_split = args_use["min_samples_split"]
  1353. self.k = {
  1354. "criterion": args_use["criterion"],
  1355. "splitter": args_use["splitter"],
  1356. "max_features": args_use["max_features"],
  1357. "max_depth": args_use["max_depth"],
  1358. "min_samples_split": args_use["min_samples_split"],
  1359. }
  1360. self.model_Name = model
  1361. def data_visualization(self, save_dir, *args, **kwargs):
  1362. tab = Tab()
  1363. importance = self.model.feature_importances_.tolist()
  1364. with open(save_dir + fr"{os.sep}Tree_Gra.dot", "w") as f:
  1365. export_graphviz(self.model, out_file=f)
  1366. make_bar("特征重要性", importance, tab)
  1367. des_to_csv(
  1368. save_dir,
  1369. "特征重要性",
  1370. [importance],
  1371. [f"[{i}]特征" for i in range(len(importance))],
  1372. )
  1373. tab.add(see_tree(save_dir + fr"{os.sep}Tree_Gra.dot"), "决策树可视化")
  1374. y = self.y_traindata
  1375. x_data = self.x_traindata
  1376. y_test = self.y_testdata
  1377. x_test = self.x_testdata
  1378. if self.model_Name == "Tree_class":
  1379. class_ = self.model.classes_.tolist()
  1380. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1381. get, x_means, x_range, data_type = training_visualization(
  1382. x_data, class_, y)
  1383. for i in range(len(get)):
  1384. tab.add(get[i], f"{i}训练数据散点图")
  1385. get = training_visualization(x_test, class_, y_test)[0]
  1386. for i in range(len(get)):
  1387. tab.add(get[i], f"{i}测试数据散点图")
  1388. get = decision_boundary(
  1389. x_range, x_means, self.predict, class_, data_type)
  1390. for i in range(len(get)):
  1391. tab.add(get[i], f"{i}预测热力图")
  1392. tab.add(
  1393. make_tab(
  1394. class_heard
  1395. + [f"普适预测第{i}特征" for i in range(len(x_means))]
  1396. + [f"特征{i}重要性" for i in range(len(importance))],
  1397. [class_ + [f"{i}" for i in x_means] + importance],
  1398. ),
  1399. "数据表",
  1400. )
  1401. else:
  1402. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1403. for i in range(len(get)):
  1404. tab.add(get[i], f"{i}训练数据散点图")
  1405. get = regress_visualization(x_test, y_test)[0]
  1406. for i in range(len(get)):
  1407. tab.add(get[i], f"{i}测试数据类型图")
  1408. get = prediction_boundary(
  1409. x_range, x_means, self.predict, data_type)
  1410. for i in range(len(get)):
  1411. tab.add(get[i], f"{i}预测热力图")
  1412. tab.add(
  1413. make_tab(
  1414. [f"普适预测第{i}特征" for i in range(len(x_means))]
  1415. + [f"特征{i}重要性" for i in range(len(importance))],
  1416. [[f"{i}" for i in x_means] + importance],
  1417. ),
  1418. "数据表",
  1419. )
  1420. des_to_csv(
  1421. save_dir,
  1422. "预测表",
  1423. [[f"{i}" for i in x_means]],
  1424. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1425. )
  1426. save = save_dir + rf"{os.sep}决策树.HTML"
  1427. tab.render(save) # 生成HTML
  1428. return save,
  1429. @plugin_class_loading(get_path(r"template/machinelearning"))
  1430. class ForestModel(StudyMachinebase):
  1431. def __init__(
  1432. self, args_use, model, *args, **kwargs
  1433. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1434. super(ForestModel, self).__init__(*args, **kwargs)
  1435. model = {
  1436. "Forest_class": RandomForestClassifier,
  1437. "Forest": RandomForestRegressor,
  1438. }[model]
  1439. self.model = model(
  1440. n_estimators=args_use["n_Tree"],
  1441. criterion=args_use["criterion"],
  1442. max_features=args_use["max_features"],
  1443. max_depth=args_use["max_depth"],
  1444. min_samples_split=args_use["min_samples_split"],
  1445. )
  1446. # 记录这两个是为了克隆
  1447. self.n_estimators = args_use["n_Tree"]
  1448. self.criterion = args_use["criterion"]
  1449. self.max_features = args_use["max_features"]
  1450. self.max_depth = args_use["max_depth"]
  1451. self.min_samples_split = args_use["min_samples_split"]
  1452. self.k = {
  1453. "n_estimators": args_use["n_Tree"],
  1454. "criterion": args_use["criterion"],
  1455. "max_features": args_use["max_features"],
  1456. "max_depth": args_use["max_depth"],
  1457. "min_samples_split": args_use["min_samples_split"],
  1458. }
  1459. self.model_Name = model
  1460. def data_visualization(self, save_dir, *args, **kwargs):
  1461. tab = Tab()
  1462. # 多个决策树可视化
  1463. for i in range(len(self.model.estimators_)):
  1464. with open(save_dir + rf"{os.sep}Tree_Gra[{i}].dot", "w") as f:
  1465. export_graphviz(self.model.estimators_[i], out_file=f)
  1466. tab.add(
  1467. see_tree(
  1468. save_dir +
  1469. rf"{os.sep}Tree_Gra[{i}].dot"),
  1470. f"[{i}]决策树可视化")
  1471. y = self.y_traindata
  1472. x_data = self.x_traindata
  1473. if self.model_Name == "Forest_class":
  1474. class_ = self.model.classes_.tolist()
  1475. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1476. get, x_means, x_range, data_type = training_visualization(
  1477. x_data, class_, y)
  1478. for i in range(len(get)):
  1479. tab.add(get[i], f"{i}训练数据散点图")
  1480. get = decision_boundary(
  1481. x_range, x_means, self.predict, class_, data_type)
  1482. for i in range(len(get)):
  1483. tab.add(get[i], f"{i}预测热力图")
  1484. tab.add(
  1485. make_tab(
  1486. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1487. [class_ + [f"{i}" for i in x_means]],
  1488. ),
  1489. "数据表",
  1490. )
  1491. else:
  1492. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1493. for i in range(len(get)):
  1494. tab.add(get[i], f"{i}预测类型图")
  1495. get = prediction_boundary(
  1496. x_range, x_means, self.predict, data_type)
  1497. for i in range(len(get)):
  1498. tab.add(get[i], f"{i}预测热力图")
  1499. tab.add(
  1500. make_tab(
  1501. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1502. [[f"{i}" for i in x_means]],
  1503. ),
  1504. "数据表",
  1505. )
  1506. des_to_csv(
  1507. save_dir,
  1508. "预测表",
  1509. [[f"{i}" for i in x_means]],
  1510. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1511. )
  1512. save = save_dir + rf"{os.sep}随机森林.HTML"
  1513. tab.render(save) # 生成HTML
  1514. return save,
  1515. class GradienttreeModel(StudyMachinebase): # 继承Tree_Model主要是继承Des
  1516. def __init__(
  1517. self, args_use, model, *args, **kwargs
  1518. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1519. super(
  1520. GradienttreeModel,
  1521. self).__init__(
  1522. *args,
  1523. **kwargs) # 不需要执行Tree_Model的初始化
  1524. model = {
  1525. "GradientTree_class": GradientBoostingClassifier,
  1526. "GradientTree": GradientBoostingRegressor,
  1527. }[model]
  1528. self.model = model(
  1529. n_estimators=args_use["n_Tree"],
  1530. max_features=args_use["max_features"],
  1531. max_depth=args_use["max_depth"],
  1532. min_samples_split=args_use["min_samples_split"],
  1533. )
  1534. # 记录这两个是为了克隆
  1535. self.criterion = args_use["criterion"]
  1536. self.splitter = args_use["splitter"]
  1537. self.max_features = args_use["max_features"]
  1538. self.max_depth = args_use["max_depth"]
  1539. self.min_samples_split = args_use["min_samples_split"]
  1540. self.k = {
  1541. "criterion": args_use["criterion"],
  1542. "splitter": args_use["splitter"],
  1543. "max_features": args_use["max_features"],
  1544. "max_depth": args_use["max_depth"],
  1545. "min_samples_split": args_use["min_samples_split"],
  1546. }
  1547. self.model_Name = model
  1548. def data_visualization(self, save_dir, *args, **kwargs):
  1549. tab = Tab()
  1550. # 多个决策树可视化
  1551. for a in range(len(self.model.estimators_)):
  1552. for i in range(len(self.model.estimators_[a])):
  1553. with open(save_dir + rf"{os.sep}Tree_Gra[{a},{i}].dot", "w") as f:
  1554. export_graphviz(self.model.estimators_[a][i], out_file=f)
  1555. tab.add(
  1556. see_tree(
  1557. save_dir +
  1558. rf"{os.sep}Tree_Gra[{a},{i}].dot"),
  1559. f"[{a},{i}]决策树可视化")
  1560. y = self.y_traindata
  1561. x_data = self.x_traindata
  1562. if self.model_Name == "Tree_class":
  1563. class_ = self.model.classes_.tolist()
  1564. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1565. get, x_means, x_range, data_type = training_visualization(
  1566. x_data, class_, y)
  1567. for i in range(len(get)):
  1568. tab.add(get[i], f"{i}训练数据散点图")
  1569. get = decision_boundary(
  1570. x_range, x_means, self.predict, class_, data_type)
  1571. for i in range(len(get)):
  1572. tab.add(get[i], f"{i}预测热力图")
  1573. tab.add(
  1574. make_tab(
  1575. class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))],
  1576. [class_ + [f"{i}" for i in x_means]],
  1577. ),
  1578. "数据表",
  1579. )
  1580. else:
  1581. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1582. for i in range(len(get)):
  1583. tab.add(get[i], f"{i}预测类型图")
  1584. get = prediction_boundary(
  1585. x_range, x_means, self.predict, data_type)
  1586. for i in range(len(get)):
  1587. tab.add(get[i], f"{i}预测热力图")
  1588. tab.add(
  1589. make_tab(
  1590. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1591. [[f"{i}" for i in x_means]],
  1592. ),
  1593. "数据表",
  1594. )
  1595. des_to_csv(
  1596. save_dir,
  1597. "预测表",
  1598. [[f"{i}" for i in x_means]],
  1599. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1600. )
  1601. save = save_dir + rf"{os.sep}梯度提升回归树.HTML"
  1602. tab.render(save) # 生成HTML
  1603. return save,
  1604. @plugin_class_loading(get_path(r"template/machinelearning"))
  1605. class SvcModel(StudyMachinebase):
  1606. def __init__(
  1607. self, args_use, model, *args, **kwargs
  1608. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1609. super(SvcModel, self).__init__(*args, **kwargs)
  1610. self.model = SVC(
  1611. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1612. )
  1613. # 记录这两个是为了克隆
  1614. self.C = args_use["C"]
  1615. self.gamma = args_use["gamma"]
  1616. self.kernel = args_use["kernel"]
  1617. self.k = {
  1618. "C": args_use["C"],
  1619. "gamma": args_use["gamma"],
  1620. "kernel": args_use["kernel"],
  1621. }
  1622. self.model_Name = model
  1623. def data_visualization(self, save_dir, *args, **kwargs):
  1624. tab = Tab()
  1625. try:
  1626. w_list = self.model.coef_.tolist() # 未必有这个属性
  1627. b = self.model.intercept_.tolist()
  1628. except BaseException:
  1629. w_list = [] # 未必有这个属性
  1630. b = []
  1631. class_ = self.model.classes_.tolist()
  1632. class_heard = [f"类别[{i}]" for i in range(len(class_))]
  1633. y = self.y_traindata
  1634. x_data = self.x_traindata
  1635. get, x_means, x_range, data_type = training_visualization(
  1636. x_data, class_, y)
  1637. if w_list:
  1638. get_line: list = training_w(
  1639. x_data, class_, y, w_list, b, x_means.copy())
  1640. else:
  1641. get_line = []
  1642. for i in range(len(get)):
  1643. if get_line:
  1644. tab.add(get[i].overlap(get_line[i]), f"{i}决策边界散点图")
  1645. else:
  1646. tab.add(get[i], f"{i}决策边界散点图")
  1647. get = decision_boundary(
  1648. x_range,
  1649. x_means,
  1650. self.predict,
  1651. class_,
  1652. data_type)
  1653. for i in range(len(get)):
  1654. tab.add(get[i], f"{i}预测热力图")
  1655. dic = {2: "离散", 1: "连续"}
  1656. tab.add(make_tab(class_heard +
  1657. [f"普适预测第{i}特征:{dic[data_type[i]]}" for i in range(len(x_means))],
  1658. [class_ + [f"{i}" for i in x_means]],), "数据表", )
  1659. if w_list:
  1660. des_to_csv(save_dir, "系数表", w_list, [
  1661. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1662. if w_list:
  1663. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1664. des_to_csv(
  1665. save_dir,
  1666. "预测表",
  1667. [[f"{i}" for i in x_means]],
  1668. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1669. )
  1670. save = save_dir + rf"{os.sep}支持向量机分类.HTML"
  1671. tab.render(save) # 生成HTML
  1672. return save,
  1673. @plugin_class_loading(get_path(r"template/machinelearning"))
  1674. class SvrModel(StudyMachinebase):
  1675. def __init__(
  1676. self, args_use, model, *args, **kwargs
  1677. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1678. super(SvrModel, self).__init__(*args, **kwargs)
  1679. self.model = SVR(
  1680. C=args_use["C"], gamma=args_use["gamma"], kernel=args_use["kernel"]
  1681. )
  1682. # 记录这两个是为了克隆
  1683. self.C = args_use["C"]
  1684. self.gamma = args_use["gamma"]
  1685. self.kernel = args_use["kernel"]
  1686. self.k = {
  1687. "C": args_use["C"],
  1688. "gamma": args_use["gamma"],
  1689. "kernel": args_use["kernel"],
  1690. }
  1691. self.model_Name = model
  1692. def data_visualization(self, save_dir, *args, **kwargs):
  1693. tab = Tab()
  1694. x_data = self.x_traindata
  1695. y = self.y_traindata
  1696. try:
  1697. w_list = self.model.coef_.tolist() # 未必有这个属性
  1698. b = self.model.intercept_.tolist()
  1699. except BaseException:
  1700. w_list = [] # 未必有这个属性
  1701. b = []
  1702. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  1703. if w_list:
  1704. get_line = regress_w(x_data, w_list, b, x_means.copy())
  1705. else:
  1706. get_line = []
  1707. for i in range(len(get)):
  1708. if get_line:
  1709. tab.add(get[i].overlap(get_line[i]), f"{i}预测类型图")
  1710. else:
  1711. tab.add(get[i], f"{i}预测类型图")
  1712. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  1713. for i in range(len(get)):
  1714. tab.add(get[i], f"{i}预测热力图")
  1715. if w_list:
  1716. des_to_csv(save_dir, "系数表", w_list, [
  1717. f"系数W[{i}]" for i in range(len(w_list[0]))])
  1718. if w_list:
  1719. des_to_csv(save_dir, "截距表", [b], [f"截距{i}" for i in range(len(b))])
  1720. des_to_csv(
  1721. save_dir,
  1722. "预测表",
  1723. [[f"{i}" for i in x_means]],
  1724. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1725. )
  1726. tab.add(
  1727. make_tab(
  1728. [f"普适预测第{i}特征" for i in range(len(x_means))],
  1729. [[f"{i}" for i in x_means]],
  1730. ),
  1731. "数据表",
  1732. )
  1733. save = save_dir + rf"{os.sep}支持向量机回归.HTML"
  1734. tab.render(save) # 生成HTML
  1735. return save,
  1736. class VarianceModel(Unsupervised): # 无监督
  1737. def __init__(
  1738. self, args_use, model, *args, **kwargs
  1739. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1740. super(VarianceModel, self).__init__(*args, **kwargs)
  1741. self.model = VarianceThreshold(
  1742. threshold=(args_use["P"] * (1 - args_use["P"])))
  1743. # 记录这两个是为了克隆
  1744. self.threshold = args_use["P"]
  1745. self.k = {"threshold": args_use["P"]}
  1746. self.model_Name = model
  1747. def data_visualization(self, save_dir, *args, **kwargs):
  1748. tab = Tab()
  1749. var = self.model.variances_ # 标准差
  1750. y_data = self.y_testdata
  1751. if isinstance(y_data, np.ndarray):
  1752. get = feature_visualization(self.y_testdata)
  1753. for i in range(len(get)):
  1754. tab.add(get[i], f"[{i}]数据x-x散点图")
  1755. c = (
  1756. Bar()
  1757. .add_xaxis([f"[{i}]特征" for i in range(len(var))])
  1758. .add_yaxis("标准差", var.tolist(), **label_setting)
  1759. .set_global_opts(
  1760. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1761. )
  1762. )
  1763. tab.add(c, "数据标准差")
  1764. save = save_dir + rf"{os.sep}方差特征选择.HTML"
  1765. tab.render(save) # 生成HTML
  1766. return save,
  1767. class SelectkbestModel(PrepBase): # 有监督
  1768. def __init__(self, args_use, model, *args, **kwargs):
  1769. super(SelectkbestModel, self).__init__(*args, **kwargs)
  1770. self.model = SelectKBest(
  1771. k=args_use["k"],
  1772. score_func=args_use["score_func"])
  1773. # 记录这两个是为了克隆
  1774. self.k_ = args_use["k"]
  1775. self.score_func = args_use["score_func"]
  1776. self.k = {"k": args_use["k"], "score_func": args_use["score_func"]}
  1777. self.model_Name = model
  1778. def data_visualization(self, save_dir, *args, **kwargs):
  1779. tab = Tab()
  1780. score = self.model.scores_.tolist()
  1781. support: np.ndarray = self.model.get_support()
  1782. y_data = self.y_traindata
  1783. x_data = self.x_traindata
  1784. if isinstance(x_data, np.ndarray):
  1785. get = feature_visualization(x_data)
  1786. for i in range(len(get)):
  1787. tab.add(get[i], f"[{i}]训练数据x-x散点图")
  1788. if isinstance(y_data, np.ndarray):
  1789. get = feature_visualization(y_data)
  1790. for i in range(len(get)):
  1791. tab.add(get[i], f"[{i}]保留训练数据x-x散点图")
  1792. y_data = self.y_testdata
  1793. x_data = self.x_testdata
  1794. if isinstance(x_data, np.ndarray):
  1795. get = feature_visualization(x_data)
  1796. for i in range(len(get)):
  1797. tab.add(get[i], f"[{i}]数据x-x散点图")
  1798. if isinstance(y_data, np.ndarray):
  1799. get = feature_visualization(y_data)
  1800. for i in range(len(get)):
  1801. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1802. choose = []
  1803. un_choose = []
  1804. for i in range(len(score)):
  1805. if support[i]:
  1806. choose.append(score[i])
  1807. un_choose.append(0) # 占位
  1808. else:
  1809. un_choose.append(score[i])
  1810. choose.append(0)
  1811. c = (
  1812. Bar()
  1813. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1814. .add_yaxis("选中特征", choose, **label_setting)
  1815. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1816. .set_global_opts(
  1817. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1818. )
  1819. )
  1820. tab.add(c, "单变量重要程度")
  1821. save = save_dir + rf"{os.sep}单一变量特征选择.HTML"
  1822. tab.render(save) # 生成HTML
  1823. return save,
  1824. class SelectFromModel(PrepBase): # 有监督
  1825. def __init__(
  1826. self, args_use, learner, *args, **kwargs
  1827. ): # model表示当前选用的模型类型,Alpha针对正则化的参数
  1828. super(SelectFromModel, self).__init__(*args, **kwargs)
  1829. self.model = learner.Model
  1830. self.Select_Model = SelectFromModel(
  1831. estimator=learner.Model,
  1832. max_features=args_use["k"],
  1833. prefit=learner.have_Fit)
  1834. self.max_features = args_use["k"]
  1835. self.estimator = learner.Model
  1836. self.k = {
  1837. "max_features": args_use["k"],
  1838. "estimator": learner.Model,
  1839. "have_Fit": learner.have_Fit,
  1840. }
  1841. self.have_fit = learner.have_Fit
  1842. self.model_Name = "SelectFrom_Model"
  1843. self.learner = learner
  1844. def fit_model(self, x_data, y_data, split=0.3, *args, **kwargs):
  1845. y_data = y_data.ravel()
  1846. if not self.have_fit: # 不允许第二次训练
  1847. self.Select_Model.fit(x_data, y_data)
  1848. self.have_fit = True
  1849. return "None", "None"
  1850. def predict(self, x_data, *args, **kwargs):
  1851. try:
  1852. self.x_testdata = x_data.copy()
  1853. x_predict = self.Select_Model.transform(x_data)
  1854. self.y_testdata = x_predict.copy()
  1855. self.have_predict = True
  1856. return x_predict, "模型特征工程"
  1857. except BaseException:
  1858. self.have_predict = True
  1859. return np.array([]), "无结果工程"
  1860. def data_visualization(self, save_dir, *args, **kwargs):
  1861. tab = Tab()
  1862. support: np.ndarray = self.Select_Model.get_support()
  1863. y_data = self.y_testdata
  1864. x_data = self.x_testdata
  1865. if isinstance(x_data, np.ndarray):
  1866. get = feature_visualization(x_data)
  1867. for i in range(len(get)):
  1868. tab.add(get[i], f"[{i}]数据x-x散点图")
  1869. if isinstance(y_data, np.ndarray):
  1870. get = feature_visualization(y_data)
  1871. for i in range(len(get)):
  1872. tab.add(get[i], f"[{i}]保留数据x-x散点图")
  1873. def make_bar(score):
  1874. choose = []
  1875. un_choose = []
  1876. for i in range(len(score)):
  1877. if support[i]:
  1878. choose.append(abs(score[i]))
  1879. un_choose.append(0) # 占位
  1880. else:
  1881. un_choose.append(abs(score[i]))
  1882. choose.append(0)
  1883. c = (
  1884. Bar()
  1885. .add_xaxis([f"[{i}]特征" for i in range(len(score))])
  1886. .add_yaxis("选中特征", choose, **label_setting)
  1887. .add_yaxis("抛弃特征", un_choose, **label_setting)
  1888. .set_global_opts(
  1889. title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting
  1890. )
  1891. )
  1892. tab.add(c, "单变量重要程度")
  1893. try:
  1894. make_bar(self.model.coef_)
  1895. except BaseException:
  1896. try:
  1897. make_bar(self.model.feature_importances_)
  1898. except BaseException:
  1899. pass
  1900. save = save_dir + rf"{os.sep}模型特征选择.HTML"
  1901. tab.render(save) # 生成HTML
  1902. return save,
  1903. class StandardizationModel(Unsupervised): # z-score标准化 无监督
  1904. def __init__(self, *args, **kwargs):
  1905. super(StandardizationModel, self).__init__(*args, **kwargs)
  1906. self.model = StandardScaler()
  1907. self.k = {}
  1908. self.model_Name = "StandardScaler"
  1909. def data_visualization(self, save_dir, *args, **kwargs):
  1910. tab = Tab()
  1911. y_data = self.y_testdata
  1912. x_data = self.x_testdata
  1913. var = self.model.var_.tolist()
  1914. means = self.model.mean_.tolist()
  1915. scale = self.model.scale_.tolist()
  1916. conversion_control(y_data, x_data, tab)
  1917. make_bar("标准差", var, tab)
  1918. make_bar("方差", means, tab)
  1919. make_bar("Scale", scale, tab)
  1920. save = save_dir + rf"{os.sep}z-score标准化.HTML"
  1921. tab.render(save) # 生成HTML
  1922. return save,
  1923. class MinmaxscalerModel(Unsupervised): # 离差标准化
  1924. def __init__(self, args_use, *args, **kwargs):
  1925. super(MinmaxscalerModel, self).__init__(*args, **kwargs)
  1926. self.model = MinMaxScaler(feature_range=args_use["feature_range"])
  1927. self.k = {}
  1928. self.model_Name = "MinMaxScaler"
  1929. def data_visualization(self, save_dir, *args, **kwargs):
  1930. tab = Tab()
  1931. y_data = self.y_testdata
  1932. x_data = self.x_testdata
  1933. scale = self.model.scale_.tolist()
  1934. max_ = self.model.data_max_.tolist()
  1935. min_ = self.model.data_min_.tolist()
  1936. conversion_control(y_data, x_data, tab)
  1937. make_bar("Scale", scale, tab)
  1938. tab.add(
  1939. make_tab(
  1940. heard=[f"[{i}]特征最大值" for i in range(len(max_))]
  1941. + [f"[{i}]特征最小值" for i in range(len(min_))],
  1942. row=[max_ + min_],
  1943. ),
  1944. "数据表格",
  1945. )
  1946. save = save_dir + rf"{os.sep}离差标准化.HTML"
  1947. tab.render(save) # 生成HTML
  1948. return save,
  1949. class LogscalerModel(PrepBase): # 对数标准化
  1950. def __init__(self, *args, **kwargs):
  1951. super(LogscalerModel, self).__init__(*args, **kwargs)
  1952. self.model = None
  1953. self.k = {}
  1954. self.model_Name = "LogScaler"
  1955. def fit_model(self, x_data, *args, **kwargs):
  1956. if not self.have_predict: # 不允许第二次训练
  1957. self.max_logx = np.log(x_data.max())
  1958. self.have_fit = True
  1959. return "None", "None"
  1960. def predict(self, x_data, *args, **kwargs):
  1961. try:
  1962. max_logx = self.max_logx
  1963. except BaseException:
  1964. self.have_fit = False
  1965. self.fit_model(x_data)
  1966. max_logx = self.max_logx
  1967. self.x_testdata = x_data.copy()
  1968. x_predict = np.log(x_data) / max_logx
  1969. self.y_testdata = x_predict.copy()
  1970. self.have_predict = True
  1971. return x_predict, "对数变换"
  1972. def data_visualization(self, save_dir, *args, **kwargs):
  1973. tab = Tab()
  1974. y_data = self.y_testdata
  1975. x_data = self.x_testdata
  1976. conversion_control(y_data, x_data, tab)
  1977. tab.add(make_tab(heard=["最大对数值(自然对数)"],
  1978. row=[[str(self.max_logx)]]), "数据表格")
  1979. save = save_dir + rf"{os.sep}对数标准化.HTML"
  1980. tab.render(save) # 生成HTML
  1981. return save,
  1982. class AtanscalerModel(PrepBase): # atan标准化
  1983. def __init__(self, *args, **kwargs):
  1984. super(AtanscalerModel, self).__init__(*args, **kwargs)
  1985. self.model = None
  1986. self.k = {}
  1987. self.model_Name = "atanScaler"
  1988. def fit_model(self, x_data, *args, **kwargs):
  1989. self.have_fit = True
  1990. return "None", "None"
  1991. def predict(self, x_data, *args, **kwargs):
  1992. self.x_testdata = x_data.copy()
  1993. x_predict = np.arctan(x_data) * (2 / np.pi)
  1994. self.y_testdata = x_predict.copy()
  1995. self.have_predict = True
  1996. return x_predict, "atan变换"
  1997. def data_visualization(self, save_dir, *args, **kwargs):
  1998. tab = Tab()
  1999. y_data = self.y_testdata
  2000. x_data = self.x_testdata
  2001. conversion_control(y_data, x_data, tab)
  2002. save = save_dir + rf"{os.sep}反正切函数标准化.HTML"
  2003. tab.render(save) # 生成HTML
  2004. return save,
  2005. class DecimalscalerModel(PrepBase): # 小数定标准化
  2006. def __init__(self, *args, **kwargs):
  2007. super(DecimalscalerModel, self).__init__(*args, **kwargs)
  2008. self.model = None
  2009. self.k = {}
  2010. self.model_Name = "Decimal_normalization"
  2011. def fit_model(self, x_data, *args, **kwargs):
  2012. if not self.have_predict: # 不允许第二次训练
  2013. self.j = max([judging_digits(x_data.max()),
  2014. judging_digits(x_data.min())])
  2015. self.have_fit = True
  2016. return "None", "None"
  2017. def predict(self, x_data, *args, **kwargs):
  2018. self.x_testdata = x_data.copy()
  2019. try:
  2020. j = self.j
  2021. except BaseException:
  2022. self.have_fit = False
  2023. self.fit_model(x_data)
  2024. j = self.j
  2025. x_predict = x_data / (10 ** j)
  2026. self.y_testdata = x_predict.copy()
  2027. self.have_predict = True
  2028. return x_predict, "小数定标标准化"
  2029. def data_visualization(self, save_dir, *args, **kwargs):
  2030. tab = Tab()
  2031. y_data = self.y_testdata
  2032. x_data = self.x_testdata
  2033. j = self.j
  2034. conversion_control(y_data, x_data, tab)
  2035. tab.add(make_tab(heard=["小数位数:j"], row=[[j]]), "数据表格")
  2036. save = save_dir + rf"{os.sep}小数定标标准化.HTML"
  2037. tab.render(save) # 生成HTML
  2038. return save,
  2039. class MapzoomModel(PrepBase): # 映射标准化
  2040. def __init__(self, args_use, *args, **kwargs):
  2041. super(MapzoomModel, self).__init__(*args, **kwargs)
  2042. self.model = None
  2043. self.feature_range = args_use["feature_range"]
  2044. self.k = {}
  2045. self.model_Name = "Decimal_normalization"
  2046. def fit_model(self, x_data, *args, **kwargs):
  2047. if not self.have_predict: # 不允许第二次训练
  2048. self.max_ = x_data.max()
  2049. self.min_ = x_data.min()
  2050. self.have_fit = True
  2051. return "None", "None"
  2052. def predict(self, x_data, *args, **kwargs):
  2053. self.x_testdata = x_data.copy()
  2054. try:
  2055. max_ = self.max_
  2056. min_ = self.min_
  2057. except BaseException:
  2058. self.have_fit = False
  2059. self.fit_model(x_data)
  2060. max_ = self.max_
  2061. min_ = self.min_
  2062. x_predict = (x_data * (self.feature_range[1] - self.feature_range[0])) / (
  2063. max_ - min_
  2064. )
  2065. self.y_testdata = x_predict.copy()
  2066. self.have_predict = True
  2067. return x_predict, "映射标准化"
  2068. def data_visualization(self, save_dir, *args, **kwargs):
  2069. tab = Tab()
  2070. y_data = self.y_testdata
  2071. x_data = self.x_testdata
  2072. max_ = self.max_
  2073. min_ = self.min_
  2074. conversion_control(y_data, x_data, tab)
  2075. tab.add(make_tab(heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  2076. save = save_dir + rf"{os.sep}映射标准化.HTML"
  2077. tab.render(save) # 生成HTML
  2078. return save,
  2079. class SigmodscalerModel(PrepBase): # sigmod变换
  2080. def __init__(self, *args, **kwargs):
  2081. super(SigmodscalerModel, self).__init__(*args, **kwargs)
  2082. self.model = None
  2083. self.k = {}
  2084. self.model_Name = "sigmodScaler_Model"
  2085. def fit_model(self, x_data, *args, **kwargs):
  2086. self.have_fit = True
  2087. return "None", "None"
  2088. def predict(self, x_data: np.array, *args, **kwargs):
  2089. self.x_testdata = x_data.copy()
  2090. x_predict = 1 / (1 + np.exp(-x_data))
  2091. self.y_testdata = x_predict.copy()
  2092. self.have_predict = True
  2093. return x_predict, "Sigmod变换"
  2094. def data_visualization(self, save_dir, *args, **kwargs):
  2095. tab = Tab()
  2096. y_data = self.y_testdata
  2097. x_data = self.x_testdata
  2098. conversion_control(y_data, x_data, tab)
  2099. save = save_dir + rf"{os.sep}Sigmoid变换.HTML"
  2100. tab.render(save) # 生成HTML
  2101. return save,
  2102. class FuzzyQuantizationModel(PrepBase): # 模糊量化标准化
  2103. def __init__(self, args_use, *args, **kwargs):
  2104. super(FuzzyQuantizationModel, self).__init__(*args, **kwargs)
  2105. self.model = None
  2106. self.feature_range = args_use["feature_range"]
  2107. self.k = {}
  2108. self.model_Name = "Fuzzy_quantization"
  2109. def fit_model(self, x_data, *args, **kwargs):
  2110. if not self.have_predict: # 不允许第二次训练
  2111. self.max_ = x_data.max()
  2112. self.max_ = x_data.min()
  2113. self.have_fit = True
  2114. return "None", "None"
  2115. def predict(self, x_data, *args, **kwargs):
  2116. self.x_testdata = x_data.copy()
  2117. try:
  2118. max_ = self.max_
  2119. min_ = self.max_
  2120. except BaseException:
  2121. self.have_fit = False
  2122. self.fit_model(x_data)
  2123. max_ = self.max_
  2124. min_ = self.max_
  2125. x_predict = 1 / 2 + (1 / 2) * np.sin(
  2126. np.pi / (max_ - min_) * (x_data - (max_ - min_) / 2)
  2127. )
  2128. self.y_testdata = x_predict.copy()
  2129. self.have_predict = True
  2130. return x_predict, "模糊量化标准化"
  2131. def data_visualization(self, save_dir, *args, **kwargs):
  2132. tab = Tab()
  2133. y_data = self.y_traindata
  2134. x_data = self.x_traindata
  2135. max_ = self.max_
  2136. min_ = self.max_
  2137. conversion_control(y_data, x_data, tab)
  2138. tab.add(make_tab(heard=["最大值", "最小值"], row=[[max_, min_]]), "数据表格")
  2139. save = save_dir + rf"{os.sep}模糊量化标准化.HTML"
  2140. tab.render(save) # 生成HTML
  2141. return save,
  2142. class RegularizationModel(Unsupervised): # 正则化
  2143. def __init__(self, args_use, *args, **kwargs):
  2144. super(RegularizationModel, self).__init__(*args, **kwargs)
  2145. self.model = Normalizer(norm=args_use["norm"])
  2146. self.k = {"norm": args_use["norm"]}
  2147. self.model_Name = "Regularization"
  2148. def data_visualization(self, save_dir, *args, **kwargs):
  2149. tab = Tab()
  2150. y_data = self.y_testdata.copy()
  2151. x_data = self.x_testdata.copy()
  2152. conversion_control(y_data, x_data, tab)
  2153. save = save_dir + rf"{os.sep}正则化.HTML"
  2154. tab.render(save) # 生成HTML
  2155. return save,
  2156. # 离散数据
  2157. class BinarizerModel(Unsupervised): # 二值化
  2158. def __init__(self, args_use, *args, **kwargs):
  2159. super(BinarizerModel, self).__init__(*args, **kwargs)
  2160. self.model = Binarizer(threshold=args_use["threshold"])
  2161. self.k = {}
  2162. self.model_Name = "Binarizer"
  2163. def data_visualization(self, save_dir, *args, **kwargs):
  2164. tab = Tab()
  2165. y_data = self.y_testdata
  2166. x_data = self.x_testdata
  2167. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2168. for i in range(len(get_y)):
  2169. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2170. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2171. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2172. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2173. tab.add(
  2174. make_tab(
  2175. heard, np.dstack(
  2176. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2177. save = save_dir + rf"{os.sep}二值离散化.HTML"
  2178. tab.render(save) # 生成HTML
  2179. return save,
  2180. class DiscretizationModel(PrepBase): # n值离散
  2181. def __init__(self, args_use, *args, **kwargs):
  2182. super(DiscretizationModel, self).__init__(*args, **kwargs)
  2183. self.model = None
  2184. range_ = args_use["split_range"]
  2185. if not range_:
  2186. raise Exception
  2187. elif len(range_) == 1:
  2188. range_.append(range_[0])
  2189. self.range = range_
  2190. self.k = {}
  2191. self.model_Name = "Discretization"
  2192. def fit_model(self, *args, **kwargs):
  2193. # t值在模型创建时已经保存
  2194. self.have_fit = True
  2195. return "None", "None"
  2196. def predict(self, x_data, *args, **kwargs):
  2197. self.x_testdata = x_data.copy()
  2198. x_predict = x_data.copy() # 复制
  2199. range_ = self.range
  2200. bool_list = []
  2201. max_ = len(range_) - 1
  2202. o_t = None
  2203. for i in range(len(range_)):
  2204. try:
  2205. t = float(range_[i])
  2206. except BaseException:
  2207. continue
  2208. if o_t is None: # 第一个参数
  2209. bool_list.append(x_predict <= t)
  2210. else:
  2211. bool_list.append((o_t <= x_predict) == (x_predict < t))
  2212. if i == max_:
  2213. bool_list.append(t <= x_predict)
  2214. o_t = t
  2215. for i in range(len(bool_list)):
  2216. x_predict[bool_list[i]] = i
  2217. self.y_testdata = x_predict.copy()
  2218. self.have_predict = True
  2219. return x_predict, f"{len(bool_list)}值离散化"
  2220. def data_visualization(self, save_dir, *args, **kwargs):
  2221. tab = Tab()
  2222. y_data = self.y_testdata
  2223. x_data = self.x_testdata
  2224. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2225. for i in range(len(get_y)):
  2226. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2227. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2228. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2229. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2230. tab.add(
  2231. make_tab(
  2232. heard, np.dstack(
  2233. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2234. save = save_dir + rf"{os.sep}多值离散化.HTML"
  2235. tab.render(save) # 生成HTML
  2236. return save,
  2237. class LabelModel(PrepBase): # 数字编码
  2238. def __init__(self, *args, **kwargs):
  2239. super(LabelModel, self).__init__(*args, **kwargs)
  2240. self.model = []
  2241. self.k = {}
  2242. self.model_Name = "LabelEncoder"
  2243. def fit_model(self, x_data, *args, **kwargs):
  2244. if not self.have_predict: # 不允许第二次训练
  2245. self.model = []
  2246. if x_data.ndim == 1:
  2247. x_data = np.array([x_data])
  2248. for i in range(x_data.shape[1]):
  2249. self.model.append(
  2250. LabelEncoder().fit(np.ravel(x_data[:, i]))
  2251. ) # 训练机器(每个特征一个学习器)
  2252. self.have_fit = True
  2253. return "None", "None"
  2254. def predict(self, x_data, *args, **kwargs):
  2255. self.x_testdata = x_data.copy()
  2256. x_predict = x_data.copy()
  2257. if x_data.ndim == 1:
  2258. x_data = np.array([x_data])
  2259. for i in range(x_data.shape[1]):
  2260. x_predict[:, i] = self.model[i].transform(x_data[:, i])
  2261. self.y_testdata = x_predict.copy()
  2262. self.have_predict = True
  2263. return x_predict, "数字编码"
  2264. def data_visualization(self, save_dir, *args, **kwargs):
  2265. tab = Tab()
  2266. x_data = self.x_testdata
  2267. y_data = self.y_testdata
  2268. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2269. for i in range(len(get_y)):
  2270. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2271. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2272. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2273. tab.add(make_tab(heard, y_data.tolist()), f"编码数据")
  2274. tab.add(
  2275. make_tab(
  2276. heard, np.dstack(
  2277. (x_data, y_data)).tolist()), f"合成[原数据,编码]数据")
  2278. save = save_dir + rf"{os.sep}数字编码.HTML"
  2279. tab.render(save) # 生成HTML
  2280. return save,
  2281. class OneHotEncoderModel(PrepBase): # 独热编码
  2282. def __init__(self, args_use, *args, **kwargs):
  2283. super(OneHotEncoderModel, self).__init__(*args, **kwargs)
  2284. self.model = []
  2285. self.ndim_up = args_use["ndim_up"]
  2286. self.k = {}
  2287. self.model_Name = "OneHotEncoder"
  2288. self.OneHot_Data = None # 三维独热编码
  2289. def fit_model(self, x_data, *args, **kwargs):
  2290. if not self.have_predict: # 不允许第二次训练
  2291. if x_data.ndim == 1:
  2292. x_data = [x_data]
  2293. for i in range(x_data.shape[1]):
  2294. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2295. self.model.append(OneHotEncoder().fit(data)) # 训练机器
  2296. self.have_fit = True
  2297. return "None", "None"
  2298. def predict(self, x_data, *args, **kwargs):
  2299. self.x_testdata = x_data.copy()
  2300. x_new = []
  2301. for i in range(x_data.shape[1]):
  2302. data = np.expand_dims(x_data[:, i], axis=1) # 独热编码需要升维
  2303. one_hot = self.model[i].transform(data).toarray().tolist()
  2304. x_new.append(one_hot) # 添加到列表中
  2305. # 新列表的行数据是原data列数据的独热码(只需要ndim=2,暂时没想到numpy的做法)
  2306. x_new = np.array(x_new)
  2307. x_predict = []
  2308. for i in range(x_new.shape[1]):
  2309. x_predict.append(x_new[:, i])
  2310. x_predict = np.array(x_predict) # 转换回array
  2311. self.OneHot_Data = x_predict.copy() # 保存未降维数据
  2312. if not self.ndim_up: # 压缩操作
  2313. new_x_predict = []
  2314. for i in x_predict:
  2315. new_list = []
  2316. list_ = i.tolist()
  2317. for a in list_:
  2318. new_list += a
  2319. new = np.array(new_list)
  2320. new_x_predict.append(new)
  2321. self.y_testdata = np.array(new_x_predict)
  2322. return self.y_testdata.copy(), "独热编码"
  2323. self.y_testdata = self.OneHot_Data
  2324. self.have_predict = True
  2325. return x_predict, "独热编码"
  2326. def data_visualization(self, save_dir, *args, **kwargs):
  2327. tab = Tab()
  2328. y_data = self.y_testdata
  2329. x_data = self.x_testdata
  2330. oh_data = self.OneHot_Data
  2331. if not self.ndim_up:
  2332. get_y = discrete_feature_visualization(y_data, "转换数据") # 转换
  2333. for i in range(len(get_y)):
  2334. tab.add(get_y[i], f"[{i}]数据x-x离散散点图")
  2335. heard = [f"特征:{i}" for i in range(len(x_data[0]))]
  2336. tab.add(make_tab(heard, x_data.tolist()), f"原数据")
  2337. tab.add(make_tab(heard, oh_data.tolist()), f"编码数据")
  2338. tab.add(
  2339. make_tab(
  2340. heard, np.dstack(
  2341. (oh_data, x_data)).tolist()), f"合成[原数据,编码]数据")
  2342. tab.add(make_tab([f"编码:{i}" for i in range(
  2343. len(y_data[0]))], y_data.tolist()), f"数据")
  2344. save = save_dir + rf"{os.sep}独热编码.HTML"
  2345. tab.render(save) # 生成HTML
  2346. return save,
  2347. class MissedModel(Unsupervised): # 缺失数据补充
  2348. def __init__(self, args_use, *args, **kwargs):
  2349. super(MissedModel, self).__init__(*args, **kwargs)
  2350. self.model = SimpleImputer(
  2351. missing_values=args_use["miss_value"],
  2352. strategy=args_use["fill_method"],
  2353. fill_value=args_use["fill_value"],
  2354. )
  2355. self.k = {}
  2356. self.model_Name = "Missed"
  2357. def predict(self, x_data, *args, **kwargs):
  2358. self.x_testdata = x_data.copy()
  2359. x_predict = self.model.transform(x_data)
  2360. self.y_testdata = x_predict.copy()
  2361. self.have_predict = True
  2362. return x_predict, "填充缺失"
  2363. def data_visualization(self, save_dir, *args, **kwargs):
  2364. tab = Tab()
  2365. y_data = self.y_testdata
  2366. x_data = self.x_testdata
  2367. statistics = self.model.statistics_.tolist()
  2368. conversion_control(y_data, x_data, tab)
  2369. tab.add(make_tab([f"特征[{i}]" for i in range(
  2370. len(statistics))], [statistics]), "填充值")
  2371. save = save_dir + rf"{os.sep}缺失数据填充.HTML"
  2372. tab.render(save) # 生成HTML
  2373. return save,
  2374. @plugin_class_loading(get_path(r"template/machinelearning"))
  2375. class PcaModel(Unsupervised):
  2376. def __init__(self, args_use, *args, **kwargs):
  2377. super(PcaModel, self).__init__(*args, **kwargs)
  2378. self.model = PCA(
  2379. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2380. )
  2381. self.whiten = args_use["white_PCA"]
  2382. self.n_components = args_use["n_components"]
  2383. self.k = {
  2384. "n_components": args_use["n_components"],
  2385. "whiten": args_use["white_PCA"],
  2386. }
  2387. self.model_Name = "PCA"
  2388. def predict(self, x_data, *args, **kwargs):
  2389. self.x_testdata = x_data.copy()
  2390. x_predict = self.model.transform(x_data)
  2391. self.y_testdata = x_predict.copy()
  2392. self.have_predict = True
  2393. return x_predict, "PCA"
  2394. def data_visualization(self, save_dir, *args, **kwargs):
  2395. tab = Tab()
  2396. y_data = self.y_testdata
  2397. importance = self.model.components_.tolist()
  2398. var = self.model.explained_variance_.tolist() # 方量差
  2399. conversion_separate_format(y_data, tab)
  2400. x_data = [f"第{i+1}主成分" for i in range(len(importance))] # 主成分
  2401. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2402. value = [
  2403. (f"第{i+1}主成分", f"特征[{j}]", importance[i][j])
  2404. for i in range(len(importance))
  2405. for j in range(len(importance[i]))
  2406. ]
  2407. c = (
  2408. HeatMap()
  2409. .add_xaxis(x_data)
  2410. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2411. .set_global_opts(
  2412. title_opts=opts.TitleOpts(title="预测热力图"),
  2413. **global_not_legend,
  2414. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2415. xaxis_opts=opts.AxisOpts(is_scale=True),
  2416. visualmap_opts=opts.VisualMapOpts(
  2417. is_show=True,
  2418. max_=int(self.model.components_.max()) + 1,
  2419. min_=int(self.model.components_.min()),
  2420. pos_right="3%",
  2421. ),
  2422. ) # 显示
  2423. )
  2424. tab.add(c, "成分热力图")
  2425. c = (
  2426. Bar()
  2427. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2428. .add_yaxis("方量差", var, **label_setting)
  2429. .set_global_opts(
  2430. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2431. )
  2432. )
  2433. des_to_csv(save_dir, "成分重要性", importance, [x_data], [y_data])
  2434. des_to_csv(
  2435. save_dir, "方量差", [var], [
  2436. f"第[{i}]主成分" for i in range(
  2437. len(var))])
  2438. tab.add(c, "方量差柱状图")
  2439. save = save_dir + rf"{os.sep}主成分分析.HTML"
  2440. tab.render(save) # 生成HTML
  2441. return save,
  2442. @plugin_class_loading(get_path(r"template/machinelearning"))
  2443. class RpcaModel(Unsupervised):
  2444. def __init__(self, args_use, *args, **kwargs):
  2445. super(RpcaModel, self).__init__(*args, **kwargs)
  2446. self.model = IncrementalPCA(
  2447. n_components=args_use["n_components"], whiten=args_use["white_PCA"]
  2448. )
  2449. self.n_components = args_use["n_components"]
  2450. self.whiten = args_use["white_PCA"]
  2451. self.k = {
  2452. "n_components": args_use["n_components"],
  2453. "whiten": args_use["white_PCA"],
  2454. }
  2455. self.model_Name = "RPCA"
  2456. def predict(self, x_data, *args, **kwargs):
  2457. self.x_testdata = x_data.copy()
  2458. x_predict = self.model.transform(x_data)
  2459. self.y_testdata = x_predict.copy()
  2460. self.have_predict = True
  2461. return x_predict, "RPCA"
  2462. def data_visualization(self, save_dir, *args, **kwargs):
  2463. tab = Tab()
  2464. y_data = self.y_traindata
  2465. importance = self.model.components_.tolist()
  2466. var = self.model.explained_variance_.tolist() # 方量差
  2467. conversion_separate_format(y_data, tab)
  2468. x_data = [f"第{i + 1}主成分" for i in range(len(importance))] # 主成分
  2469. y_data = [f"特征[{i}]" for i in range(len(importance[0]))] # 主成分
  2470. value = [
  2471. (f"第{i + 1}主成分", f"特征[{j}]", importance[i][j])
  2472. for i in range(len(importance))
  2473. for j in range(len(importance[i]))
  2474. ]
  2475. c = (
  2476. HeatMap()
  2477. .add_xaxis(x_data)
  2478. .add_yaxis(f"", y_data, value, **label_setting) # value的第一个数值是x
  2479. .set_global_opts(
  2480. title_opts=opts.TitleOpts(title="预测热力图"),
  2481. **global_not_legend,
  2482. yaxis_opts=opts.AxisOpts(is_scale=True), # 'category'
  2483. xaxis_opts=opts.AxisOpts(is_scale=True),
  2484. visualmap_opts=opts.VisualMapOpts(
  2485. is_show=True,
  2486. max_=int(self.model.components_.max()) + 1,
  2487. min_=int(self.model.components_.min()),
  2488. pos_right="3%",
  2489. ),
  2490. ) # 显示
  2491. )
  2492. tab.add(c, "成分热力图")
  2493. c = (
  2494. Bar()
  2495. .add_xaxis([f"第[{i}]主成分" for i in range(len(var))])
  2496. .add_yaxis("放量差", var, **label_setting)
  2497. .set_global_opts(
  2498. title_opts=opts.TitleOpts(title="方量差柱状图"), **global_setting
  2499. )
  2500. )
  2501. tab.add(c, "方量差柱状图")
  2502. des_to_csv(save_dir, "成分重要性", importance, [x_data], [y_data])
  2503. des_to_csv(
  2504. save_dir, "方量差", [var], [
  2505. f"第[{i}]主成分" for i in range(
  2506. len(var))])
  2507. save = save_dir + rf"{os.sep}RPCA(主成分分析).HTML"
  2508. tab.render(save) # 生成HTML
  2509. return save,
  2510. @plugin_class_loading(get_path(r"template/machinelearning"))
  2511. class KpcaModel(Unsupervised):
  2512. def __init__(self, args_use, *args, **kwargs):
  2513. super(KpcaModel, self).__init__(*args, **kwargs)
  2514. self.model = KernelPCA(
  2515. n_components=args_use["n_components"], kernel=args_use["kernel"]
  2516. )
  2517. self.n_components = args_use["n_components"]
  2518. self.kernel = args_use["kernel"]
  2519. self.k = {
  2520. "n_components": args_use["n_components"],
  2521. "kernel": args_use["kernel"],
  2522. }
  2523. self.model_Name = "KPCA"
  2524. def predict(self, x_data, *args, **kwargs):
  2525. self.x_testdata = x_data.copy()
  2526. x_predict = self.model.transform(x_data)
  2527. self.y_testdata = x_predict.copy()
  2528. self.have_predict = True
  2529. return x_predict, "KPCA"
  2530. def data_visualization(self, save_dir, *args, **kwargs):
  2531. tab = Tab()
  2532. y_data = self.y_testdata
  2533. conversion_separate_format(y_data, tab)
  2534. save = save_dir + rf"{os.sep}KPCA(主成分分析).HTML"
  2535. tab.render(save) # 生成HTML
  2536. return save,
  2537. class LdaModel(PrepBase): # 有监督学习
  2538. def __init__(self, args_use, *args, **kwargs):
  2539. super(LdaModel, self).__init__(*args, **kwargs)
  2540. self.model = Lda(n_components=args_use["n_components"])
  2541. self.n_components = args_use["n_components"]
  2542. self.k = {"n_components": args_use["n_components"]}
  2543. self.model_Name = "LDA"
  2544. def predict(self, x_data, *args, **kwargs):
  2545. self.x_testdata = x_data.copy()
  2546. x_predict = self.model.transform(x_data)
  2547. self.y_testdata = x_predict.copy()
  2548. self.have_predict = True
  2549. return x_predict, "LDA"
  2550. def data_visualization(self, save_dir, *args, **kwargs):
  2551. tab = Tab()
  2552. x_data = self.x_testdata
  2553. y_data = self.y_testdata
  2554. conversion_separate_format(y_data, tab)
  2555. w_list = self.model.coef_.tolist() # 变为表格
  2556. b = self.model.intercept_
  2557. tab = Tab()
  2558. x_means = quick_stats(x_data).get()[0]
  2559. # 回归的y是历史遗留问题 不用分类回归:因为得不到分类数据(predict结果是降维数据不是预测数据)
  2560. get = regress_w(x_data, w_list, b, x_means.copy())
  2561. for i in range(len(get)):
  2562. tab.add(get[i].overlap(get[i]), f"类别:{i}LDA映射曲线")
  2563. save = save_dir + rf"{os.sep}render.HTML"
  2564. tab.render(save) # 生成HTML
  2565. return save,
  2566. @plugin_class_loading(get_path(r"template/machinelearning"))
  2567. class NmfModel(Unsupervised):
  2568. def __init__(self, args_use, *args, **kwargs):
  2569. super(NmfModel, self).__init__(*args, **kwargs)
  2570. self.model = NMF(n_components=args_use["n_components"])
  2571. self.n_components = args_use["n_components"]
  2572. self.k = {"n_components": args_use["n_components"]}
  2573. self.model_Name = "NFM"
  2574. self.h_testdata = None
  2575. # x_traindata保存的是W,h_traindata和y_traindata是后来数据
  2576. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  2577. self.x_testdata = x_data.copy()
  2578. x_predict = self.model.transform(x_data)
  2579. self.y_testdata = x_predict.copy()
  2580. self.h_testdata = self.model.components_
  2581. if add_func is not None and x_name != "":
  2582. add_func(self.h_testdata, f"{x_name}:V->NMF[H]")
  2583. self.have_predict = True
  2584. return x_predict, "V->NMF[W]"
  2585. def data_visualization(self, save_dir, *args, **kwargs):
  2586. tab = Tab()
  2587. y_data = self.y_testdata
  2588. x_data = self.x_testdata
  2589. h_data = self.h_testdata
  2590. conversion_separate_wh(y_data, h_data, tab)
  2591. wh_data = np.matmul(y_data, h_data)
  2592. difference_data = x_data - wh_data
  2593. def make_heat_map(data, name, max_, min_):
  2594. x = [f"数据[{i}]" for i in range(len(data))] # 主成分
  2595. y = [f"特征[{i}]" for i in range(len(data[0]))] # 主成分
  2596. value = [
  2597. (f"数据[{i}]", f"特征[{j}]", float(data[i][j]))
  2598. for i in range(len(data))
  2599. for j in range(len(data[i]))
  2600. ]
  2601. c = (
  2602. HeatMap()
  2603. .add_xaxis(x)
  2604. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2605. .set_global_opts(
  2606. title_opts=opts.TitleOpts(title="原始数据热力图"),
  2607. **global_not_legend,
  2608. yaxis_opts=opts.AxisOpts(
  2609. is_scale=True, type_="category"
  2610. ), # 'category'
  2611. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2612. visualmap_opts=opts.VisualMapOpts(
  2613. is_show=True, max_=max_, min_=min_, pos_right="3%"
  2614. ),
  2615. ) # 显示
  2616. )
  2617. tab.add(c, name)
  2618. max_ = (max(int(x_data.max()), int(wh_data.max()),
  2619. int(difference_data.max())) + 1)
  2620. min_ = min(int(x_data.min()), int(wh_data.min()),
  2621. int(difference_data.min()))
  2622. make_heat_map(x_data, "原始数据热力图", max_, min_)
  2623. make_heat_map(wh_data, "W * H数据热力图", max_, min_)
  2624. make_heat_map(difference_data, "数据差热力图", max_, min_)
  2625. des_to_csv(save_dir, "权重矩阵", y_data)
  2626. des_to_csv(save_dir, "系数矩阵", h_data)
  2627. des_to_csv(save_dir, "系数*权重矩阵", wh_data)
  2628. save = save_dir + rf"{os.sep}非负矩阵分解.HTML"
  2629. tab.render(save) # 生成HTML
  2630. return save,
  2631. @plugin_class_loading(get_path(r"template/machinelearning"))
  2632. class TsneModel(Unsupervised):
  2633. def __init__(self, args_use, *args, **kwargs):
  2634. super(TsneModel, self).__init__(*args, **kwargs)
  2635. self.model = TSNE(n_components=args_use["n_components"])
  2636. self.n_components = args_use["n_components"]
  2637. self.k = {"n_components": args_use["n_components"]}
  2638. self.model_Name = "t-SNE"
  2639. def fit_model(self, *args, **kwargs):
  2640. self.have_fit = True
  2641. return "None", "None"
  2642. def predict(self, x_data, *args, **kwargs):
  2643. self.x_testdata = x_data.copy()
  2644. x_predict = self.model.fit_transform(x_data)
  2645. self.y_testdata = x_predict.copy()
  2646. self.have_predict = True
  2647. return x_predict, "SNE"
  2648. def data_visualization(self, save_dir, *args, **kwargs):
  2649. tab = Tab()
  2650. y_data = self.y_testdata
  2651. conversion_separate_format(y_data, tab)
  2652. save = save_dir + rf"{os.sep}T-SNE.HTML"
  2653. tab.render(save) # 生成HTML
  2654. return save,
  2655. class MlpModel(StudyMachinebase): # 神经网络(多层感知机),有监督学习
  2656. def __init__(self, args_use, model, *args, **kwargs):
  2657. super(MlpModel, self).__init__(*args, **kwargs)
  2658. all_model = {"MLP": MLPRegressor, "MLP_class": MLPClassifier}[model]
  2659. self.model = all_model(
  2660. hidden_layer_sizes=args_use["hidden_size"],
  2661. activation=args_use["activation"],
  2662. solver=args_use["solver"],
  2663. alpha=args_use["alpha"],
  2664. max_iter=args_use["max_iter"],
  2665. )
  2666. # 记录这两个是为了克隆
  2667. self.hidden_layer_sizes = args_use["hidden_size"]
  2668. self.activation = args_use["activation"]
  2669. self.max_iter = args_use["max_iter"]
  2670. self.solver = args_use["solver"]
  2671. self.alpha = args_use["alpha"]
  2672. self.k = {
  2673. "hidden_layer_sizes": args_use["hidden_size"],
  2674. "activation": args_use["activation"],
  2675. "max_iter": args_use["max_iter"],
  2676. "solver": args_use["solver"],
  2677. "alpha": args_use["alpha"],
  2678. }
  2679. self.model_Name = model
  2680. def data_visualization(self, save_dir, *args, **kwargs):
  2681. tab = Tab()
  2682. x_data = self.x_testdata
  2683. y_data = self.y_testdata
  2684. coefs = self.model.coefs_
  2685. class_ = self.model.classes_
  2686. n_layers_ = self.model.n_layers_
  2687. def make_heat_map(data, name):
  2688. x = [f"特征(节点)[{i}]" for i in range(len(data))]
  2689. y = [f"节点[{i}]" for i in range(len(data[0]))]
  2690. value = [
  2691. (f"特征(节点)[{i}]", f"节点[{j}]", float(data[i][j]))
  2692. for i in range(len(data))
  2693. for j in range(len(data[i]))
  2694. ]
  2695. c = (
  2696. HeatMap()
  2697. .add_xaxis(x)
  2698. .add_yaxis(f"数据", y, value, **label_setting) # value的第一个数值是x
  2699. .set_global_opts(
  2700. title_opts=opts.TitleOpts(title=name),
  2701. **global_not_legend,
  2702. yaxis_opts=opts.AxisOpts(
  2703. is_scale=True, type_="category"
  2704. ), # 'category'
  2705. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  2706. visualmap_opts=opts.VisualMapOpts(
  2707. is_show=True,
  2708. max_=float(data.max()),
  2709. min_=float(data.min()),
  2710. pos_right="3%",
  2711. ),
  2712. ) # 显示
  2713. )
  2714. tab.add(c, name)
  2715. tab.add(make_tab(x, data.transpose().tolist()), f"{name}:表格")
  2716. des_to_csv(save_dir, f"{name}:表格", data.transpose().tolist(), x, y)
  2717. get, x_means, x_range, data_type = regress_visualization(
  2718. x_data, y_data)
  2719. for i in range(len(get)):
  2720. tab.add(get[i], f"{i}训练数据散点图")
  2721. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  2722. for i in range(len(get)):
  2723. tab.add(get[i], f"{i}预测热力图")
  2724. heard = ["神经网络层数"]
  2725. data = [n_layers_]
  2726. for i in range(len(coefs)):
  2727. make_heat_map(coefs[i], f"{i}层权重矩阵")
  2728. heard.append(f"第{i}层节点数")
  2729. data.append(len(coefs[i][0]))
  2730. if self.model_Name == "MLP_class":
  2731. heard += [f"[{i}]类型" for i in range(len(class_))]
  2732. data += class_.tolist()
  2733. tab.add(make_tab(heard, [data]), "数据表")
  2734. save = save_dir + rf"{os.sep}多层感知机.HTML"
  2735. tab.render(save) # 生成HTML
  2736. return save,
  2737. @plugin_class_loading(get_path(r"template/machinelearning"))
  2738. class KmeansModel(UnsupervisedModel):
  2739. def __init__(self, args_use, *args, **kwargs):
  2740. super(KmeansModel, self).__init__(*args, **kwargs)
  2741. self.model = KMeans(n_clusters=args_use["n_clusters"])
  2742. self.class_ = []
  2743. self.n_clusters = args_use["n_clusters"]
  2744. self.k = {"n_clusters": args_use["n_clusters"]}
  2745. self.model_Name = "k-means"
  2746. def fit_model(self, x_data, *args, **kwargs):
  2747. re = super().fit_model(x_data, *args, **kwargs)
  2748. self.class_ = list(set(self.model.labels_.tolist()))
  2749. self.have_fit = True
  2750. return re
  2751. def predict(self, x_data, *args, **kwargs):
  2752. self.x_testdata = x_data.copy()
  2753. y_predict = self.model.predict(x_data)
  2754. self.y_testdata = y_predict.copy()
  2755. self.have_predict = True
  2756. return y_predict, "k-means"
  2757. def data_visualization(self, save_dir, *args, **kwargs):
  2758. tab = Tab()
  2759. y = self.y_testdata
  2760. x_data = self.x_testdata
  2761. class_ = self.class_
  2762. center = self.model.cluster_centers_
  2763. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2764. func = (
  2765. training_visualization_more
  2766. if more_global
  2767. else training_visualization_center
  2768. )
  2769. get, x_means, x_range, data_type = func(x_data, class_, y, center)
  2770. for i in range(len(get)):
  2771. tab.add(get[i], f"{i}数据散点图")
  2772. get = decision_boundary(
  2773. x_range,
  2774. x_means,
  2775. self.predict,
  2776. class_,
  2777. data_type)
  2778. for i in range(len(get)):
  2779. tab.add(get[i], f"{i}预测热力图")
  2780. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2781. data = class_ + [f"{i}" for i in x_means]
  2782. c = Table().add(headers=heard, rows=[data])
  2783. tab.add(c, "数据表")
  2784. des_to_csv(
  2785. save_dir,
  2786. "预测表",
  2787. [[f"{i}" for i in x_means]],
  2788. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2789. )
  2790. save = save_dir + rf"{os.sep}k-means聚类.HTML"
  2791. tab.render(save) # 生成HTML
  2792. return save,
  2793. @plugin_class_loading(get_path(r"template/machinelearning"))
  2794. class AgglomerativeModel(UnsupervisedModel):
  2795. def __init__(self, args_use, *args, **kwargs):
  2796. super(AgglomerativeModel, self).__init__(*args, **kwargs)
  2797. self.model = AgglomerativeClustering(
  2798. n_clusters=args_use["n_clusters"]
  2799. ) # 默认为2,不同于k-means
  2800. self.class_ = []
  2801. self.n_clusters = args_use["n_clusters"]
  2802. self.k = {"n_clusters": args_use["n_clusters"]}
  2803. self.model_Name = "Agglomerative"
  2804. def fit_model(self, x_data, *args, **kwargs):
  2805. re = super().fit_model(x_data, *args, **kwargs)
  2806. self.class_ = list(set(self.model.labels_.tolist()))
  2807. self.have_fit = True
  2808. return re
  2809. def predict(self, x_data, *args, **kwargs):
  2810. self.x_testdata = x_data.copy()
  2811. y_predict = self.model.fit_predict(x_data)
  2812. self.y_traindata = y_predict.copy()
  2813. self.have_predict = True
  2814. return y_predict, "Agglomerative"
  2815. def data_visualization(self, save_dir, *args, **kwargs):
  2816. tab = Tab()
  2817. y = self.y_testdata
  2818. x_data = self.x_testdata
  2819. class_ = self.class_
  2820. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2821. func = (
  2822. training_visualization_more_no_center
  2823. if more_global
  2824. else training_visualization
  2825. )
  2826. get, x_means, x_range, data_type = func(x_data, class_, y)
  2827. for i in range(len(get)):
  2828. tab.add(get[i], f"{i}训练数据散点图")
  2829. get = decision_boundary(
  2830. x_range,
  2831. x_means,
  2832. self.predict,
  2833. class_,
  2834. data_type)
  2835. for i in range(len(get)):
  2836. tab.add(get[i], f"{i}预测热力图")
  2837. linkage_array = ward(self.x_traindata) # self.y_traindata是结果
  2838. dendrogram(linkage_array)
  2839. plt.savefig(save_dir + rf"{os.sep}Cluster_graph.png")
  2840. image = Image()
  2841. image.add(src=save_dir + rf"{os.sep}Cluster_graph.png",).set_global_opts(
  2842. title_opts=opts.ComponentTitleOpts(title="聚类树状图")
  2843. )
  2844. tab.add(image, "聚类树状图")
  2845. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2846. data = class_ + [f"{i}" for i in x_means]
  2847. c = Table().add(headers=heard, rows=[data])
  2848. tab.add(c, "数据表")
  2849. des_to_csv(
  2850. save_dir,
  2851. "预测表",
  2852. [[f"{i}" for i in x_means]],
  2853. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2854. )
  2855. save = save_dir + rf"{os.sep}层次聚类.HTML"
  2856. tab.render(save) # 生成HTML
  2857. return save,
  2858. @plugin_class_loading(get_path(r"template/machinelearning"))
  2859. class DbscanModel(UnsupervisedModel):
  2860. def __init__(self, args_use, *args, **kwargs):
  2861. super(DbscanModel, self).__init__(*args, **kwargs)
  2862. self.model = DBSCAN(
  2863. eps=args_use["eps"],
  2864. min_samples=args_use["min_samples"])
  2865. # eps是距离(0.5),min_samples(5)是簇与噪音分界线(每个簇最小元素数)
  2866. # min_samples
  2867. self.eps = args_use["eps"]
  2868. self.min_samples = args_use["min_samples"]
  2869. self.k = {
  2870. "min_samples": args_use["min_samples"],
  2871. "eps": args_use["eps"]}
  2872. self.class_ = []
  2873. self.model_Name = "DBSCAN"
  2874. def fit_model(self, x_data, *args, **kwargs):
  2875. re = super().fit_model(x_data, *args, **kwargs)
  2876. self.class_ = list(set(self.model.labels_.tolist()))
  2877. self.have_fit = True
  2878. return re
  2879. def predict(self, x_data, *args, **kwargs):
  2880. self.x_testdata = x_data.copy()
  2881. y_predict = self.model.fit_predict(x_data)
  2882. self.y_testdata = y_predict.copy()
  2883. self.have_predict = True
  2884. return y_predict, "DBSCAN"
  2885. def data_visualization(self, save_dir, *args, **kwargs):
  2886. # DBSCAN没有预测的必要
  2887. tab = Tab()
  2888. y = self.y_testdata.copy()
  2889. x_data = self.x_testdata.copy()
  2890. class_ = self.class_
  2891. class_heard = [f"簇[{i}]" for i in range(len(class_))]
  2892. func = (
  2893. training_visualization_more_no_center
  2894. if more_global
  2895. else training_visualization
  2896. )
  2897. get, x_means, x_range, data_type = func(x_data, class_, y)
  2898. for i in range(len(get)):
  2899. tab.add(get[i], f"{i}训练数据散点图")
  2900. heard = class_heard + [f"普适预测第{i}特征" for i in range(len(x_means))]
  2901. data = class_ + [f"{i}" for i in x_means]
  2902. c = Table().add(headers=heard, rows=[data])
  2903. tab.add(c, "数据表")
  2904. des_to_csv(
  2905. save_dir,
  2906. "预测表",
  2907. [[f"{i}" for i in x_means]],
  2908. [f"普适预测第{i}特征" for i in range(len(x_means))],
  2909. )
  2910. save = save_dir + rf"{os.sep}密度聚类.HTML"
  2911. tab.render(save) # 生成HTML
  2912. return save,
  2913. class FastFourier(StudyMachinebase): # 快速傅里叶变换
  2914. def __init__(self, *args, **kwargs):
  2915. super(FastFourier, self).__init__(*args, **kwargs)
  2916. self.model = None
  2917. self.fourier = None # fft复数
  2918. self.frequency = None # 频率range
  2919. self.angular_Frequency = None # 角频率range
  2920. self.phase = None # 相位range
  2921. self.breadth = None # 震幅range
  2922. self.sample_size = None # 样本数
  2923. def fit_model(self, y_data, *args, **kwargs):
  2924. y_data = y_data.ravel() # 扯平为一维数组
  2925. try:
  2926. if self.y_traindata is None:
  2927. raise Exception
  2928. self.y_traindata = np.hstack(y_data, self.x_traindata)
  2929. except BaseException:
  2930. self.y_traindata = y_data.copy()
  2931. fourier = fft(y_data)
  2932. self.sample_size = len(y_data)
  2933. self.frequency = np.linspace(0, 1, self.sample_size) # 频率N_range
  2934. self.angular_Frequency = self.frequency / (np.pi * 2) # 角频率w
  2935. self.phase = np.angle(fourier)
  2936. self.breadth = np.abs(fourier)
  2937. self.fourier = fourier
  2938. self.have_fit = True
  2939. return "None", "None"
  2940. def predict(self, x_data, *args, **kwargs):
  2941. return np.array([]), ""
  2942. def data_visualization(self, save_dir, *args, **kwargs):
  2943. # DBSCAN没有预测的必要
  2944. tab = Tab()
  2945. y = self.y_traindata.copy()
  2946. n = self.sample_size
  2947. phase = self.phase # 相位range
  2948. breadth = self.breadth # 震幅range
  2949. normalization_breadth = breadth / n
  2950. def line(name, value, s=slice(0, None)) -> Line:
  2951. c = (
  2952. Line()
  2953. .add_xaxis(self.frequency[s].tolist())
  2954. .add_yaxis(
  2955. "",
  2956. value,
  2957. **label_setting,
  2958. symbol="none" if self.sample_size >= 500 else None,
  2959. )
  2960. .set_global_opts(
  2961. title_opts=opts.TitleOpts(title=name),
  2962. **global_not_legend,
  2963. xaxis_opts=opts.AxisOpts(type_="value"),
  2964. yaxis_opts=opts.AxisOpts(type_="value"),
  2965. )
  2966. )
  2967. return c
  2968. tab.add(line("原始数据", y.tolist()), "原始数据")
  2969. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  2970. tab.add(
  2971. line(
  2972. "双边振幅谱(归一化)",
  2973. normalization_breadth.tolist()),
  2974. "双边振幅谱(归一化)")
  2975. tab.add(
  2976. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2977. )
  2978. tab.add(
  2979. line(
  2980. "单边相位谱(归一化)",
  2981. normalization_breadth[: int(n / 2)].tolist(),
  2982. slice(0, int(n / 2)),
  2983. ),
  2984. "单边相位谱(归一化)",
  2985. )
  2986. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  2987. tab.add(
  2988. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  2989. )
  2990. tab.add(make_tab(self.frequency.tolist(), [breadth.tolist()]), "双边振幅谱")
  2991. tab.add(make_tab(self.frequency.tolist(), [phase.tolist()]), "双边相位谱")
  2992. tab.add(
  2993. make_tab(
  2994. self.frequency.tolist(), [
  2995. self.fourier.tolist()]), "快速傅里叶变换")
  2996. save = save_dir + rf"{os.sep}快速傅里叶.HTML"
  2997. tab.render(save) # 生成HTML
  2998. return save,
  2999. class ReverseFastFourier(StudyMachinebase): # 快速傅里叶变换
  3000. def __init__(self, *args, **kwargs):
  3001. super(ReverseFastFourier, self).__init__(*args, **kwargs)
  3002. self.model = None
  3003. self.sample_size = None
  3004. self.y_testdata_real = None
  3005. self.phase = None
  3006. self.breadth = None
  3007. def fit_model(self, y_data, *args, **kwargs):
  3008. return "None", "None"
  3009. def predict(self, x_data, x_name="", add_func=None, *args, **kwargs):
  3010. self.x_testdata = x_data.ravel().astype(np.complex_)
  3011. fourier = ifft(self.x_testdata)
  3012. self.y_testdata = fourier.copy()
  3013. self.y_testdata_real = np.real(fourier)
  3014. self.sample_size = len(self.y_testdata_real)
  3015. self.phase = np.angle(self.x_testdata)
  3016. self.breadth = np.abs(self.x_testdata)
  3017. add_func(self.y_testdata_real.copy(), f"{x_name}:逆向快速傅里叶变换[实数]")
  3018. return fourier, "逆向快速傅里叶变换"
  3019. def data_visualization(self, save_dir, *args, **kwargs):
  3020. # DBSCAN没有预测的必要
  3021. tab = Tab()
  3022. y = self.y_testdata_real.copy()
  3023. y_data = self.y_testdata.copy()
  3024. n = self.sample_size
  3025. range_n: list = np.linspace(0, 1, n).tolist()
  3026. phase = self.phase # 相位range
  3027. breadth = self.breadth # 震幅range
  3028. def line(name, value, s=slice(0, None)) -> Line:
  3029. c = (
  3030. Line() .add_xaxis(
  3031. range_n[s]) .add_yaxis(
  3032. "",
  3033. value,
  3034. **label_setting,
  3035. symbol="none" if n >= 500 else None) .set_global_opts(
  3036. title_opts=opts.TitleOpts(
  3037. title=name),
  3038. **global_not_legend,
  3039. xaxis_opts=opts.AxisOpts(
  3040. type_="value"),
  3041. yaxis_opts=opts.AxisOpts(
  3042. type_="value"),
  3043. ))
  3044. return c
  3045. tab.add(line("逆向傅里叶变换", y.tolist()), "逆向傅里叶变换[实数]")
  3046. tab.add(make_tab(range_n, [y_data.tolist()]), "逆向傅里叶变换数据")
  3047. tab.add(make_tab(range_n, [y.tolist()]), "逆向傅里叶变换数据[实数]")
  3048. tab.add(line("双边振幅谱", breadth.tolist()), "双边振幅谱")
  3049. tab.add(
  3050. line("单边相位谱", breadth[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3051. )
  3052. tab.add(line("双边相位谱", phase.tolist()), "双边相位谱")
  3053. tab.add(
  3054. line("单边相位谱", phase[: int(n / 2)].tolist(), slice(0, int(n / 2))), "单边相位谱"
  3055. )
  3056. save = save_dir + rf"{os.sep}快速傅里叶.HTML"
  3057. tab.render(save) # 生成HTML
  3058. return save,
  3059. class ReverseFastFourierTwonumpy(ReverseFastFourier): # 2快速傅里叶变换
  3060. def fit_model(
  3061. self,
  3062. x_data,
  3063. y_data=None,
  3064. x_name="",
  3065. add_func=None,
  3066. *args,
  3067. **kwargs):
  3068. r = np.multiply(np.cos(x_data), y_data)
  3069. j = np.multiply(np.sin(x_data), y_data) * 1j
  3070. super(ReverseFastFourierTwonumpy, self).predict(
  3071. r + j, x_name=x_name, add_func=add_func, *args, **kwargs
  3072. )
  3073. return "None", "None"
  3074. class CurveFitting(StudyMachinebase): # 曲线拟合
  3075. def __init__(self, name, str_, model, *args, **kwargs):
  3076. super(CurveFitting, self).__init__(*args, **kwargs)
  3077. def ndim_down(data: np.ndarray):
  3078. if data.ndim == 1:
  3079. return data
  3080. new_data = []
  3081. for i in data:
  3082. new_data.append(np.sum(i))
  3083. return np.array(new_data)
  3084. named_domain = {"np": np, "Func": model, "ndimDown": ndim_down}
  3085. protection_func = f"""
  3086. @plugin_func_loading(get_path(r'template/machinelearning'))
  3087. def FUNC({",".join(model.__code__.co_varnames)}):
  3088. answer = Func({",".join(model.__code__.co_varnames)})
  3089. return ndimDown(answer)
  3090. """
  3091. exec(protection_func, named_domain)
  3092. self.func = named_domain["FUNC"]
  3093. self.fit_data = None
  3094. self.name = name
  3095. self.func_str = str_
  3096. def fit_model(
  3097. self,
  3098. x_data: np.ndarray,
  3099. y_data: np.ndarray,
  3100. *args,
  3101. **kwargs):
  3102. y_data = y_data.ravel()
  3103. x_data = x_data.astype(np.float64)
  3104. try:
  3105. if self.x_traindata is None:
  3106. raise Exception
  3107. self.x_traindata = np.vstack(x_data, self.x_traindata)
  3108. self.y_traindata = np.vstack(y_data, self.y_traindata)
  3109. except BaseException:
  3110. self.x_traindata = x_data.copy()
  3111. self.y_traindata = y_data.copy()
  3112. self.fit_data = optimize.curve_fit(
  3113. self.func, self.x_traindata, self.y_traindata
  3114. )
  3115. self.model = self.fit_data[0].copy()
  3116. return "None", "None"
  3117. def predict(self, x_data, *args, **kwargs):
  3118. self.x_testdata = x_data.copy()
  3119. predict = self.func(x_data, *self.model)
  3120. y_predict = []
  3121. for i in predict:
  3122. y_predict.append(np.sum(i))
  3123. y_predict = np.array(y_predict)
  3124. self.y_testdata = y_predict.copy()
  3125. self.have_predict = True
  3126. return y_predict, self.name
  3127. def data_visualization(self, save_dir, *args, **kwargs):
  3128. # DBSCAN没有预测的必要
  3129. tab = Tab()
  3130. y = self.y_testdata.copy()
  3131. x_data = self.x_testdata.copy()
  3132. get, x_means, x_range, data_type = regress_visualization(x_data, y)
  3133. for i in range(len(get)):
  3134. tab.add(get[i], f"{i}预测类型图")
  3135. get = prediction_boundary(x_range, x_means, self.predict, data_type)
  3136. for i in range(len(get)):
  3137. tab.add(get[i], f"{i}预测热力图")
  3138. tab.add(
  3139. make_tab(
  3140. [f"普适预测第{i}特征" for i in range(len(x_means))],
  3141. [[f"{i}" for i in x_means]],
  3142. ),
  3143. "普适预测特征数据",
  3144. )
  3145. tab.add(
  3146. make_tab(
  3147. [f"参数[{i}]" for i in range(len(self.model))],
  3148. [[f"{i}" for i in self.model]],
  3149. ),
  3150. "拟合参数",
  3151. )
  3152. save = save_dir + rf"{os.sep}曲线拟合.HTML"
  3153. tab.render(save) # 生成HTML
  3154. return save,
  3155. @plugin_class_loading(get_path(r"template/machinelearning"))
  3156. class Tab(tab_First):
  3157. def __init__(self, *args, **kwargs):
  3158. super(Tab, self).__init__(*args, **kwargs)
  3159. self.element = {} # 记录tab组成元素 name:charts
  3160. def add(self, chart, tab_name):
  3161. self.element[tab_name] = chart
  3162. return super(Tab, self).add(chart, tab_name)
  3163. def render(
  3164. self,
  3165. path: str = "render.html",
  3166. template_name: str = "simple_tab.html",
  3167. *args,
  3168. **kwargs,
  3169. ) -> str:
  3170. if all_global:
  3171. render_dir = path_split(path)[0]
  3172. for i in self.element:
  3173. self.element[i].render(render_dir + os.sep + i + ".html")
  3174. return super(Tab, self).render(path, template_name, *args, **kwargs)
  3175. @plugin_class_loading(get_path(r"template/machinelearning"))
  3176. class Table(TableFisrt):
  3177. def __init__(self, *args, **kwargs):
  3178. super(Table, self).__init__(*args, **kwargs)
  3179. self.HEADERS = []
  3180. self.ROWS = [[]]
  3181. def add(self, headers, rows, attributes=None):
  3182. if len(rows) == 1:
  3183. new_headers = ["数据类型", "数据"]
  3184. new_rows = list(zip(headers, rows[0]))
  3185. self.HEADERS = new_headers
  3186. self.ROWS = new_rows
  3187. return super().add(new_headers, new_rows, attributes)
  3188. else:
  3189. self.HEADERS = headers
  3190. self.ROWS = rows
  3191. return super().add(headers, rows, attributes)
  3192. def render(self, path="render.html", *args, **kwargs,) -> str:
  3193. if csv_global:
  3194. save_dir, name = path_split(path)
  3195. name = splitext(name)[0]
  3196. try:
  3197. DataFrame(self.ROWS, columns=self.HEADERS).to_csv(
  3198. save_dir + os.sep + name + ".csv"
  3199. )
  3200. except BaseException:
  3201. pass
  3202. return super().render(path, *args, **kwargs)
  3203. @plugin_func_loading(get_path(r"template/machinelearning"))
  3204. def make_list(first, end, num=35):
  3205. n = num / (end - first)
  3206. if n == 0:
  3207. n = 1
  3208. re = []
  3209. n_first = first * n
  3210. n_end = end * n
  3211. while n_first <= n_end:
  3212. cul = n_first / n
  3213. re.append(round(cul, 2))
  3214. n_first += 1
  3215. return re
  3216. @plugin_func_loading(get_path(r"template/machinelearning"))
  3217. def list_filter(original_list, num=70):
  3218. if len(original_list) <= num:
  3219. return original_list
  3220. n = int(num / len(original_list))
  3221. re = original_list[::n]
  3222. return re
  3223. @plugin_func_loading(get_path(r"template/machinelearning"))
  3224. def prediction_boundary(x_range, x_means, predict_func, data_type): # 绘制回归型x-x热力图
  3225. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3226. # a-特征x,b-特征x-1,c-其他特征
  3227. render_list = []
  3228. if len(x_means) == 1:
  3229. return render_list
  3230. for i in range(len(x_means)):
  3231. for j in range(len(x_means)):
  3232. if j <= i:
  3233. continue
  3234. a_range = x_range[j]
  3235. a_type = data_type[j]
  3236. b_range = x_range[i]
  3237. b_type = data_type[i]
  3238. if a_type == 1:
  3239. a_list = make_list(a_range[0], a_range[1], 70)
  3240. else:
  3241. a_list = list_filter(a_range) # 可以接受最大为70
  3242. if b_type == 1:
  3243. b_list = make_list(b_range[0], b_range[1], 35)
  3244. else:
  3245. b_list = list_filter(b_range) # 可以接受最大为70
  3246. a = np.array([i for i in a_list for _ in b_list]).T
  3247. b = np.array([i for _ in a_list for i in b_list]).T
  3248. data = np.array([x_means for _ in a_list for i in b_list])
  3249. data[:, j] = a
  3250. data[:, i] = b
  3251. y_data = predict_func(data)[0].tolist()
  3252. value = [[float(a[i]), float(b[i]), y_data[i]]
  3253. for i in range(len(a))]
  3254. c = (
  3255. HeatMap()
  3256. .add_xaxis(np.unique(a))
  3257. # value的第一个数值是x
  3258. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3259. .set_global_opts(
  3260. title_opts=opts.TitleOpts(title="预测热力图"),
  3261. **global_not_legend,
  3262. yaxis_opts=opts.AxisOpts(
  3263. is_scale=True, type_="category"
  3264. ), # 'category'
  3265. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3266. visualmap_opts=opts.VisualMapOpts(
  3267. is_show=True,
  3268. max_=int(max(y_data)) + 1,
  3269. min_=int(min(y_data)),
  3270. pos_right="3%",
  3271. ),
  3272. ) # 显示
  3273. )
  3274. render_list.append(c)
  3275. return render_list
  3276. @plugin_func_loading(get_path(r"template/machinelearning"))
  3277. def prediction_boundary_more(x_range, x_means, predict_func, data_type):
  3278. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调
  3279. # a-特征x,b-特征x-1,c-其他特征
  3280. render_list = []
  3281. if len(x_means) == 1:
  3282. return render_list
  3283. for i in range(len(x_means)):
  3284. if i == 0:
  3285. continue
  3286. a_range = x_range[i - 1]
  3287. a_type = data_type[i - 1]
  3288. b_range = x_range[i]
  3289. b_type = data_type[i]
  3290. if a_type == 1:
  3291. a_list = make_list(a_range[0], a_range[1], 70)
  3292. else:
  3293. a_list = list_filter(a_range) # 可以接受最大为70
  3294. if b_type == 1:
  3295. b_list = make_list(b_range[0], b_range[1], 35)
  3296. else:
  3297. b_list = list_filter(b_range) # 可以接受最大为70
  3298. a = np.array([i for i in a_list for _ in b_list]).T
  3299. b = np.array([i for _ in a_list for i in b_list]).T
  3300. data = np.array([x_means for _ in a_list for i in b_list])
  3301. data[:, i - 1] = a
  3302. data[:, i] = b
  3303. y_data = predict_func(data)[0].tolist()
  3304. value = [[float(a[i]), float(b[i]), y_data[i]] for i in range(len(a))]
  3305. c = (
  3306. HeatMap()
  3307. .add_xaxis(np.unique(a))
  3308. # value的第一个数值是x
  3309. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3310. .set_global_opts(
  3311. title_opts=opts.TitleOpts(title="预测热力图"),
  3312. **global_not_legend,
  3313. yaxis_opts=opts.AxisOpts(
  3314. is_scale=True, type_="category"), # 'category'
  3315. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3316. visualmap_opts=opts.VisualMapOpts(
  3317. is_show=True,
  3318. max_=int(max(y_data)) + 1,
  3319. min_=int(min(y_data)),
  3320. pos_right="3%",
  3321. ),
  3322. ) # 显示
  3323. )
  3324. render_list.append(c)
  3325. return render_list
  3326. def decision_boundary(
  3327. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3328. ): # 绘制分类型预测图x-x热力图
  3329. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3330. # a-特征x,b-特征x-1,c-其他特征
  3331. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3332. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3333. if not no_unknow:
  3334. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3335. else:
  3336. map_dict = []
  3337. for i in class_dict:
  3338. map_dict.append(
  3339. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3340. )
  3341. render_list = []
  3342. if len(x_means) == 1:
  3343. a_range = x_range[0]
  3344. if data_type[0] == 1:
  3345. a_list = make_list(a_range[0], a_range[1], 70)
  3346. else:
  3347. a_list = a_range
  3348. a = np.array([i for i in a_list]).reshape(-1, 1)
  3349. y_data = predict_func(a)[0].tolist()
  3350. value = [[0, float(a[i]), class_dict.get(y_data[i], -1)]
  3351. for i in range(len(a))]
  3352. c = (
  3353. HeatMap()
  3354. .add_xaxis(["None"])
  3355. # value的第一个数值是x
  3356. .add_yaxis(f"数据", np.unique(a), value, **label_setting)
  3357. .set_global_opts(
  3358. title_opts=opts.TitleOpts(title="预测热力图"),
  3359. **global_not_legend,
  3360. yaxis_opts=opts.AxisOpts(
  3361. is_scale=True, type_="category"), # 'category'
  3362. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3363. visualmap_opts=opts.VisualMapOpts(
  3364. is_show=True,
  3365. max_=max(class_dict.values()),
  3366. min_=-1,
  3367. is_piecewise=True,
  3368. pieces=map_dict,
  3369. orient="horizontal",
  3370. pos_bottom="3%",
  3371. ),
  3372. )
  3373. )
  3374. render_list.append(c)
  3375. return render_list
  3376. # 如果x_means长度不等于1则执行下面
  3377. for i in range(len(x_means)):
  3378. if i == 0:
  3379. continue
  3380. a_range = x_range[i - 1]
  3381. a_type = data_type[i - 1]
  3382. b_range = x_range[i]
  3383. b_type = data_type[i]
  3384. if a_type == 1:
  3385. a_list = make_list(a_range[0], a_range[1], 70)
  3386. else:
  3387. a_list = a_range
  3388. if b_type == 1:
  3389. rb = make_list(b_range[0], b_range[1], 35)
  3390. else:
  3391. rb = b_range
  3392. a = np.array([i for i in a_list for _ in rb]).T
  3393. b = np.array([i for _ in a_list for i in rb]).T
  3394. data = np.array([x_means for _ in a_list for i in rb])
  3395. data[:, i - 1] = a
  3396. data[:, i] = b
  3397. y_data = predict_func(data)[0].tolist()
  3398. value = [
  3399. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3400. for i in range(len(a))
  3401. ]
  3402. c = (
  3403. HeatMap()
  3404. .add_xaxis(np.unique(a))
  3405. # value的第一个数值是x
  3406. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3407. .set_global_opts(
  3408. title_opts=opts.TitleOpts(title="预测热力图"),
  3409. **global_not_legend,
  3410. yaxis_opts=opts.AxisOpts(
  3411. is_scale=True, type_="category"), # 'category'
  3412. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3413. visualmap_opts=opts.VisualMapOpts(
  3414. is_show=True,
  3415. max_=max(class_dict.values()),
  3416. min_=-1,
  3417. is_piecewise=True,
  3418. pieces=map_dict,
  3419. orient="horizontal",
  3420. pos_bottom="3%",
  3421. ),
  3422. )
  3423. )
  3424. render_list.append(c)
  3425. return render_list
  3426. def decision_boundary_more(
  3427. x_range, x_means, predict_func, class_list, data_type, no_unknow=False
  3428. ):
  3429. # r是绘图大小列表,x_means是其余值,Predict_Func是预测方法回调,class_是分类,add_o是可以合成的图
  3430. # a-特征x,b-特征x-1,c-其他特征
  3431. # 规定,i-1是x轴,a是x轴,x_1是x轴
  3432. class_dict = dict(zip(class_list, [i for i in range(len(class_list))]))
  3433. if not no_unknow:
  3434. map_dict = [{"min": -1.5, "max": -0.5, "label": "未知"}] # 分段显示
  3435. else:
  3436. map_dict = []
  3437. for i in class_dict:
  3438. map_dict.append(
  3439. {"min": class_dict[i] - 0.5, "max": class_dict[i] + 0.5, "label": str(i)}
  3440. )
  3441. render_list = []
  3442. if len(x_means) == 1:
  3443. return decision_boundary(
  3444. x_range, x_means, predict_func, class_list, data_type, no_unknow
  3445. )
  3446. # 如果x_means长度不等于1则执行下面
  3447. for i in range(len(x_means)):
  3448. for j in range(len(x_means)):
  3449. if j <= i:
  3450. continue
  3451. a_range = x_range[j]
  3452. a_type = data_type[j]
  3453. b_range = x_range[i]
  3454. b_type = data_type[i]
  3455. if a_type == 1:
  3456. a_range = make_list(a_range[0], a_range[1], 70)
  3457. else:
  3458. a_range = a_range
  3459. if b_type == 1:
  3460. b_range = make_list(b_range[0], b_range[1], 35)
  3461. else:
  3462. b_range = b_range
  3463. a = np.array([i for i in a_range for _ in b_range]).T
  3464. b = np.array([i for _ in a_range for i in b_range]).T
  3465. data = np.array([x_means for _ in a_range for i in b_range])
  3466. data[:, j] = a
  3467. data[:, i] = b
  3468. y_data = predict_func(data)[0].tolist()
  3469. value = [
  3470. [float(a[i]), float(b[i]), class_dict.get(y_data[i], -1)]
  3471. for i in range(len(a))
  3472. ]
  3473. c = (
  3474. HeatMap()
  3475. .add_xaxis(np.unique(a))
  3476. # value的第一个数值是x
  3477. .add_yaxis(f"数据", np.unique(b), value, **label_setting)
  3478. .set_global_opts(
  3479. title_opts=opts.TitleOpts(title="预测热力图"),
  3480. **global_not_legend,
  3481. yaxis_opts=opts.AxisOpts(
  3482. is_scale=True, type_="category"
  3483. ), # 'category'
  3484. xaxis_opts=opts.AxisOpts(is_scale=True, type_="category"),
  3485. visualmap_opts=opts.VisualMapOpts(
  3486. is_show=True,
  3487. max_=max(class_dict.values()),
  3488. min_=-1,
  3489. is_piecewise=True,
  3490. pieces=map_dict,
  3491. orient="horizontal",
  3492. pos_bottom="3%",
  3493. ),
  3494. )
  3495. )
  3496. render_list.append(c)
  3497. return render_list
  3498. @plugin_func_loading(get_path(r"template/machinelearning"))
  3499. def see_tree(tree_file_dir):
  3500. node_regex = re.compile(r'^([0-9]+) \[label="(.+)"\] ;$') # 匹配节点正则表达式
  3501. link_regex = re.compile("^([0-9]+) -> ([0-9]+) (.*);$") # 匹配节点正则表达式
  3502. node_dict = {}
  3503. link_list = []
  3504. with open(tree_file_dir, "r") as f: # 貌似必须分开w和r
  3505. for i in f:
  3506. try:
  3507. regex_result = re.findall(node_regex, i)[0]
  3508. if regex_result[0] != "":
  3509. try:
  3510. v = float(regex_result[0])
  3511. except BaseException:
  3512. v = 0
  3513. node_dict[regex_result[0]] = {
  3514. "name": regex_result[1].replace("\\n", "\n"),
  3515. "value": v,
  3516. "children": [],
  3517. }
  3518. continue
  3519. except BaseException:
  3520. pass
  3521. try:
  3522. regex_result = re.findall(link_regex, i)[0]
  3523. if regex_result[0] != "" and regex_result[1] != "":
  3524. link_list.append((regex_result[0], regex_result[1]))
  3525. except BaseException:
  3526. pass
  3527. father_list = [] # 已经有父亲的list
  3528. for i in link_list:
  3529. father = i[0] # 父节点
  3530. son = i[1] # 子节点
  3531. try:
  3532. node_dict[father]["children"].append(node_dict[son])
  3533. father_list.append(son)
  3534. except BaseException:
  3535. pass
  3536. father = list(set(node_dict.keys()) - set(father_list))
  3537. c = (
  3538. Tree()
  3539. .add("", [node_dict[father[0]]], is_roam=True)
  3540. .set_global_opts(
  3541. title_opts=opts.TitleOpts(title="决策树可视化"),
  3542. toolbox_opts=opts.ToolboxOpts(is_show=True),
  3543. )
  3544. )
  3545. return c
  3546. @plugin_func_loading(get_path(r"template/machinelearning"))
  3547. def make_tab(heard, row):
  3548. return Table().add(headers=heard, rows=row)
  3549. @plugin_func_loading(get_path(r"template/machinelearning"))
  3550. def coefficient_scatter_plot(w_heard, w):
  3551. c = (
  3552. Scatter() .add_xaxis(w_heard) .add_yaxis(
  3553. "", w, **label_setting) .set_global_opts(
  3554. title_opts=opts.TitleOpts(
  3555. title="系数w散点图"), **global_setting))
  3556. return c
  3557. @plugin_func_loading(get_path(r"template/machinelearning"))
  3558. def coefficient_bar_plot(w_heard, w):
  3559. c = (
  3560. Bar() .add_xaxis(w_heard) .add_yaxis(
  3561. "",
  3562. abs(w).tolist(),
  3563. **label_setting) .set_global_opts(
  3564. title_opts=opts.TitleOpts(
  3565. title="系数w柱状图"),
  3566. **global_setting))
  3567. return c
  3568. @plugin_func_loading(get_path(r"template/machinelearning"))
  3569. def is_continuous(data: np.array, f: float = 0.1):
  3570. data = data.tolist()
  3571. l: list = np.unique(data).tolist()
  3572. try:
  3573. re = len(l) / len(data) >= f or len(data) <= 3
  3574. return re
  3575. except BaseException:
  3576. return False
  3577. @plugin_func_loading(get_path(r"template/machinelearning"))
  3578. def quick_stats(x_data):
  3579. statistics_assistant = CategoricalData()
  3580. print(x_data)
  3581. for i in range(len(x_data)):
  3582. x1 = x_data[i] # x坐标
  3583. statistics_assistant(x1)
  3584. return statistics_assistant
  3585. @plugin_func_loading(get_path(r"template/machinelearning"))
  3586. def training_visualization_more_no_center(x_data, class_list, y_data):
  3587. x_data = x_data.transpose()
  3588. if len(x_data) == 1:
  3589. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3590. statistics_assistant = quick_stats(x_data)
  3591. render_list = []
  3592. for i in range(len(x_data)):
  3593. for a in range(len(x_data)):
  3594. if a <= i:
  3595. continue
  3596. x1 = x_data[i] # x坐标
  3597. x1_is_continuous = is_continuous(x1)
  3598. x2 = x_data[a] # y坐标
  3599. x2_is_continuous = is_continuous(x2)
  3600. base_render = None # 旧的C
  3601. for class_num in range(len(class_list)):
  3602. now_class = class_list[class_num]
  3603. plot_x1 = x1[y_data == now_class].tolist()
  3604. plot_x2 = x2[y_data == now_class]
  3605. axis_x2 = np.unique(plot_x2)
  3606. plot_x2 = x2[y_data == now_class].tolist()
  3607. # x与散点图不同,这里是纵坐标
  3608. c = (
  3609. Scatter()
  3610. .add_xaxis(plot_x2)
  3611. .add_yaxis(f"{now_class}", plot_x1, **label_setting)
  3612. .set_global_opts(
  3613. title_opts=opts.TitleOpts(title=f"[{a}-{i}]训练数据散点图"),
  3614. **global_setting,
  3615. yaxis_opts=opts.AxisOpts(
  3616. type_="value" if x1_is_continuous else "category",
  3617. is_scale=True,
  3618. ),
  3619. xaxis_opts=opts.AxisOpts(
  3620. type_="value" if x2_is_continuous else "category",
  3621. is_scale=True,
  3622. ),
  3623. )
  3624. )
  3625. c.add_xaxis(axis_x2)
  3626. if base_render is None:
  3627. base_render = c
  3628. else:
  3629. base_render = base_render.overlap(c)
  3630. render_list.append(base_render)
  3631. means, x_range, data_type = statistics_assistant.get()
  3632. return render_list, means, x_range, data_type
  3633. @plugin_func_loading(get_path(r"template/machinelearning"))
  3634. def training_visualization_more(x_data, class_list, y_data, center):
  3635. x_data = x_data.transpose()
  3636. if len(x_data) == 1:
  3637. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3638. statistics_assistant = quick_stats(x_data)
  3639. render_list = []
  3640. for i in range(len(x_data)):
  3641. for a in range(len(x_data)):
  3642. if a <= i:
  3643. continue
  3644. x1 = x_data[i] # x坐标
  3645. x1_is_continuous = is_continuous(x1)
  3646. x2 = x_data[a] # y坐标
  3647. x2_is_continuous = is_continuous(x2)
  3648. base_render = None # 旧的C
  3649. for class_num in range(len(class_list)):
  3650. now_class = class_list[class_num]
  3651. plot_x1 = x1[y_data == now_class].tolist()
  3652. plot_x2 = x2[y_data == now_class]
  3653. axis_x2 = np.unique(plot_x2)
  3654. plot_x2 = x2[y_data == now_class].tolist()
  3655. # x与散点图不同,这里是纵坐标
  3656. c = (
  3657. Scatter()
  3658. .add_xaxis(plot_x2)
  3659. .add_yaxis(f"{now_class}", plot_x1, **label_setting)
  3660. .set_global_opts(
  3661. title_opts=opts.TitleOpts(title=f"[{a}-{i}]训练数据散点图"),
  3662. **global_setting,
  3663. yaxis_opts=opts.AxisOpts(
  3664. type_="value" if x1_is_continuous else "category",
  3665. is_scale=True,
  3666. ),
  3667. xaxis_opts=opts.AxisOpts(
  3668. type_="value" if x2_is_continuous else "category",
  3669. is_scale=True,
  3670. ),
  3671. )
  3672. )
  3673. c.add_xaxis(axis_x2)
  3674. # 添加簇中心
  3675. try:
  3676. center_x2 = [center[class_num][a]]
  3677. except BaseException:
  3678. center_x2 = [0]
  3679. b = (
  3680. Scatter()
  3681. .add_xaxis(center_x2)
  3682. .add_yaxis(
  3683. f"[{now_class}]中心",
  3684. [center[class_num][i]],
  3685. **label_setting,
  3686. symbol="triangle",
  3687. )
  3688. .set_global_opts(
  3689. title_opts=opts.TitleOpts(title="簇中心"),
  3690. **global_setting,
  3691. yaxis_opts=opts.AxisOpts(
  3692. type_="value" if x1_is_continuous else "category",
  3693. is_scale=True,
  3694. ),
  3695. xaxis_opts=opts.AxisOpts(
  3696. type_="value" if x2_is_continuous else "category",
  3697. is_scale=True,
  3698. ),
  3699. )
  3700. )
  3701. c.overlap(b)
  3702. if base_render is None:
  3703. base_render = c
  3704. else:
  3705. base_render = base_render.overlap(c)
  3706. render_list.append(base_render)
  3707. means, x_range, data_type = statistics_assistant.get()
  3708. return render_list, means, x_range, data_type
  3709. @plugin_func_loading(get_path(r"template/machinelearning"))
  3710. def training_visualization_center(x_data, class_data, y_data, center):
  3711. x_data = x_data.transpose()
  3712. if len(x_data) == 1:
  3713. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3714. statistics_assistant = quick_stats(x_data)
  3715. render_list = []
  3716. for i in range(len(x_data)):
  3717. if i == 0:
  3718. continue
  3719. x1 = x_data[i] # x坐标
  3720. x1_is_continuous = is_continuous(x1)
  3721. x2 = x_data[i - 1] # y坐标
  3722. x2_is_continuous = is_continuous(x2)
  3723. base_render = None # 旧的C
  3724. for class_num in range(len(class_data)):
  3725. n_class = class_data[class_num]
  3726. x_1 = x1[y_data == n_class].tolist()
  3727. x_2 = x2[y_data == n_class]
  3728. x_2_new = np.unique(x_2)
  3729. x_2 = x2[y_data == n_class].tolist()
  3730. # x与散点图不同,这里是纵坐标
  3731. c = (
  3732. Scatter() .add_xaxis(x_2) .add_yaxis(
  3733. f"{n_class}",
  3734. x_1,
  3735. **label_setting) .set_global_opts(
  3736. title_opts=opts.TitleOpts(
  3737. title=f"[{i-1}-{i}]训练数据散点图"),
  3738. **global_setting,
  3739. yaxis_opts=opts.AxisOpts(
  3740. type_="value" if x1_is_continuous else "category",
  3741. is_scale=True),
  3742. xaxis_opts=opts.AxisOpts(
  3743. type_="value" if x2_is_continuous else "category",
  3744. is_scale=True),
  3745. ))
  3746. c.add_xaxis(x_2_new)
  3747. # 添加簇中心
  3748. try:
  3749. center_x_2 = [center[class_num][i - 1]]
  3750. except BaseException:
  3751. center_x_2 = [0]
  3752. b = (
  3753. Scatter() .add_xaxis(center_x_2) .add_yaxis(
  3754. f"[{n_class}]中心",
  3755. [
  3756. center[class_num][i]],
  3757. **label_setting,
  3758. symbol="triangle",
  3759. ) .set_global_opts(
  3760. title_opts=opts.TitleOpts(
  3761. title="簇中心"),
  3762. **global_setting,
  3763. yaxis_opts=opts.AxisOpts(
  3764. type_="value" if x1_is_continuous else "category",
  3765. is_scale=True),
  3766. xaxis_opts=opts.AxisOpts(
  3767. type_="value" if x2_is_continuous else "category",
  3768. is_scale=True),
  3769. ))
  3770. c.overlap(b)
  3771. if base_render is None:
  3772. base_render = c
  3773. else:
  3774. base_render = base_render.overlap(c)
  3775. render_list.append(base_render)
  3776. means, x_range, data_type = statistics_assistant.get()
  3777. return render_list, means, x_range, data_type
  3778. @plugin_func_loading(get_path(r"template/machinelearning"))
  3779. def training_visualization(x_data, class_, y_data): # 根据不同类别绘制x-x分类散点图
  3780. x_data = x_data.transpose()
  3781. if len(x_data) == 1:
  3782. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3783. statistics_assistant = quick_stats(x_data)
  3784. render_list = []
  3785. for i in range(len(x_data)):
  3786. if i == 0:
  3787. continue
  3788. x1 = x_data[i] # x坐标
  3789. x1_is_continuous = is_continuous(x1)
  3790. x2 = x_data[i - 1] # y坐标
  3791. x2_is_continuous = is_continuous(x2)
  3792. render_list = None # 旧的C
  3793. for now_class in class_:
  3794. plot_x1 = x1[y_data == now_class].tolist()
  3795. plot_x2 = x2[y_data == now_class]
  3796. axis_x2 = np.unique(plot_x2)
  3797. plot_x2 = x2[y_data == now_class].tolist()
  3798. # x与散点图不同,这里是纵坐标
  3799. c = (
  3800. Scatter() .add_xaxis(plot_x2) .add_yaxis(
  3801. f"{now_class}",
  3802. plot_x1,
  3803. **label_setting) .set_global_opts(
  3804. title_opts=opts.TitleOpts(
  3805. title="训练数据散点图"),
  3806. **global_setting,
  3807. yaxis_opts=opts.AxisOpts(
  3808. type_="value" if x1_is_continuous else "category",
  3809. is_scale=True),
  3810. xaxis_opts=opts.AxisOpts(
  3811. type_="value" if x2_is_continuous else "category",
  3812. is_scale=True),
  3813. ))
  3814. c.add_xaxis(axis_x2)
  3815. if render_list is None:
  3816. render_list = c
  3817. else:
  3818. render_list = render_list.overlap(c)
  3819. render_list.append(render_list)
  3820. means, x_range, data_type = statistics_assistant.get()
  3821. return render_list, means, x_range, data_type
  3822. @plugin_func_loading(get_path(r"template/machinelearning"))
  3823. def training_visualization_no_class(x_data): # 根据绘制x-x分类散点图(无类别)
  3824. x_data = x_data.transpose()
  3825. if len(x_data) == 1:
  3826. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3827. statistics_assistant = quick_stats(x_data)
  3828. render_list = []
  3829. for i in range(len(x_data)):
  3830. if i == 0:
  3831. continue
  3832. x1 = x_data[i] # x坐标
  3833. x1_is_continuous = is_continuous(x1)
  3834. x2 = x_data[i - 1] # y坐标
  3835. x2_is_continuous = is_continuous(x2)
  3836. x2_only = np.unique(x2)
  3837. # x与散点图不同,这里是纵坐标
  3838. c = (
  3839. Scatter() .add_xaxis(x2) .add_yaxis(
  3840. "",
  3841. x1.tolist(),
  3842. **label_setting) .set_global_opts(
  3843. title_opts=opts.TitleOpts(
  3844. title="训练数据散点图"),
  3845. **global_not_legend,
  3846. yaxis_opts=opts.AxisOpts(
  3847. type_="value" if x1_is_continuous else "category",
  3848. is_scale=True),
  3849. xaxis_opts=opts.AxisOpts(
  3850. type_="value" if x2_is_continuous else "category",
  3851. is_scale=True),
  3852. ))
  3853. c.add_xaxis(x2_only)
  3854. render_list.append(c)
  3855. means, x_range, data_type = statistics_assistant.get()
  3856. return render_list, means, x_range, data_type
  3857. def training_w(
  3858. x_data, class_list, y_data, w_list, b_list, x_means: list
  3859. ): # 针对分类问题绘制决策边界
  3860. x_data = x_data.transpose()
  3861. if len(x_data) == 1:
  3862. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3863. render_list = []
  3864. x_means.append(0)
  3865. x_means = np.array(x_means)
  3866. for i in range(len(x_data)):
  3867. if i == 0:
  3868. continue
  3869. x1_is_continuous = is_continuous(x_data[i])
  3870. x2 = x_data[i - 1] # y坐标
  3871. x2_is_continuous = is_continuous(x2)
  3872. o_c = None # 旧的C
  3873. for class_num in range(len(class_list)):
  3874. n_class = class_list[class_num]
  3875. x2_only = np.unique(x2[y_data == n_class])
  3876. # x与散点图不同,这里是纵坐标
  3877. # 加入这个判断是为了解决sklearn历史遗留问题
  3878. if len(class_list) == 2: # 二分类问题
  3879. if class_num == 0:
  3880. continue
  3881. w = w_list[0]
  3882. b = b_list[0]
  3883. else: # 多分类问题
  3884. w = w_list[class_num]
  3885. b = b_list[class_num]
  3886. if x2_is_continuous:
  3887. x2_only = np.array(make_list(x2_only.min(), x2_only.max(), 5))
  3888. w = np.append(w, 0)
  3889. y_data = (
  3890. -(x2_only * w[i - 1]) / w[i]
  3891. + b
  3892. + (x_means[: i - 1] * w[: i - 1]).sum()
  3893. + (x_means[i + 1:] * w[i + 1:]).sum()
  3894. ) # 假设除了两个特征意外,其余特征均为means列表的数值
  3895. c = (
  3896. Line() .add_xaxis(x2_only) .add_yaxis(
  3897. f"决策边界:{n_class}=>[{i}]",
  3898. y_data.tolist(),
  3899. is_smooth=True,
  3900. **label_setting,
  3901. ) .set_global_opts(
  3902. title_opts=opts.TitleOpts(
  3903. title=f"系数w曲线"),
  3904. **global_setting,
  3905. yaxis_opts=opts.AxisOpts(
  3906. type_="value" if x1_is_continuous else "category",
  3907. is_scale=True),
  3908. xaxis_opts=opts.AxisOpts(
  3909. type_="value" if x2_is_continuous else "category",
  3910. is_scale=True),
  3911. ))
  3912. if o_c is None:
  3913. o_c = c
  3914. else:
  3915. o_c = o_c.overlap(c)
  3916. # 下面不要接任何代码,因为上面会continue
  3917. render_list.append(o_c)
  3918. return render_list
  3919. @plugin_func_loading(get_path(r"template/machinelearning"))
  3920. def regress_w(x_data, w_data: np.array, intercept_b, x_means: list): # 针对回归问题(y-x图)
  3921. x_data = x_data.transpose()
  3922. if len(x_data) == 1:
  3923. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  3924. render_list = []
  3925. x_means.append(0) # 确保mean[i+1]不会超出index
  3926. x_means = np.array(x_means)
  3927. w_data = np.append(w_data, 0)
  3928. for i in range(len(x_data)):
  3929. x1 = x_data[i]
  3930. x1_is_continuous = is_continuous(x1)
  3931. if x1_is_continuous:
  3932. x1 = np.array(make_list(x1.min(), x1.max(), 5))
  3933. x1_only = np.unique(x1)
  3934. # 假设除了两个特征意外,其余特征均为means列表的数值
  3935. y_data = (
  3936. x1_only * w_data[i]
  3937. + intercept_b
  3938. + (x_means[:i] * w_data[:i]).sum()
  3939. + (x_means[i + 1:] * w_data[i + 1:]).sum()
  3940. )
  3941. y_is_continuous = is_continuous(y_data)
  3942. c = (
  3943. Line() .add_xaxis(x1_only) .add_yaxis(
  3944. f"拟合结果=>[{i}]",
  3945. y_data.tolist(),
  3946. is_smooth=True,
  3947. **label_setting) .set_global_opts(
  3948. title_opts=opts.TitleOpts(
  3949. title=f"系数w曲线"),
  3950. **global_setting,
  3951. yaxis_opts=opts.AxisOpts(
  3952. type_="value" if y_is_continuous else None,
  3953. is_scale=True),
  3954. xaxis_opts=opts.AxisOpts(
  3955. type_="value" if x1_is_continuous else None,
  3956. is_scale=True),
  3957. ))
  3958. render_list.append(c)
  3959. return render_list
  3960. @plugin_func_loading(get_path(r"template/machinelearning"))
  3961. def regress_visualization(x_data, y_data): # y-x数据图
  3962. x_data = x_data.transpose()
  3963. y_is_continuous = is_continuous(y_data)
  3964. statistics_assistant = quick_stats(x_data)
  3965. render_list = []
  3966. try:
  3967. visualmap_opts = opts.VisualMapOpts(
  3968. is_show=True,
  3969. max_=int(y_data.max()) + 1,
  3970. min_=int(y_data.min()),
  3971. pos_right="3%",
  3972. )
  3973. except BaseException:
  3974. visualmap_opts = None
  3975. y_is_continuous = False
  3976. for i in range(len(x_data)):
  3977. x1 = x_data[i] # x坐标
  3978. x1_is_continuous = is_continuous(x1)
  3979. # 不转换成list因为保持dtype的精度,否则绘图会出现各种问题(数值重复)
  3980. if not y_is_continuous and x1_is_continuous:
  3981. y_is_continuous, x1_is_continuous = x1_is_continuous, y_is_continuous
  3982. x1, y_data = y_data, x1
  3983. c = (
  3984. Scatter()
  3985. .add_xaxis(x1.tolist()) # 研究表明,这个是横轴
  3986. .add_yaxis("数据", y_data.tolist(), **label_setting)
  3987. .set_global_opts(
  3988. title_opts=opts.TitleOpts(title="预测类型图"),
  3989. **global_setting,
  3990. yaxis_opts=opts.AxisOpts(
  3991. type_="value" if y_is_continuous else "category", is_scale=True
  3992. ),
  3993. xaxis_opts=opts.AxisOpts(
  3994. type_="value" if x1_is_continuous else "category", is_scale=True
  3995. ),
  3996. visualmap_opts=visualmap_opts,
  3997. )
  3998. )
  3999. c.add_xaxis(np.unique(x1))
  4000. render_list.append(c)
  4001. means, x_range, data_type = statistics_assistant.get()
  4002. return render_list, means, x_range, data_type
  4003. @plugin_func_loading(get_path(r"template/machinelearning"))
  4004. def feature_visualization(x_data, data_name=""): # x-x数据图
  4005. seeting = global_setting if data_name else global_not_legend
  4006. x_data = x_data.transpose()
  4007. only = False
  4008. if len(x_data) == 1:
  4009. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4010. only = True
  4011. render_list = []
  4012. for i in range(len(x_data)):
  4013. for a in range(len(x_data)):
  4014. if a <= i:
  4015. continue # 重复内容,跳过
  4016. x1 = x_data[i] # x坐标
  4017. x1_is_continuous = is_continuous(x1)
  4018. x2 = x_data[a] # y坐标
  4019. x2_is_continuous = is_continuous(x2)
  4020. x2_only = np.unique(x2)
  4021. if only:
  4022. x2_is_continuous = False
  4023. # x与散点图不同,这里是纵坐标
  4024. c = (
  4025. Scatter() .add_xaxis(x2) .add_yaxis(
  4026. data_name,
  4027. x1,
  4028. **label_setting) .set_global_opts(
  4029. title_opts=opts.TitleOpts(
  4030. title=f"[{i}-{a}]数据散点图"),
  4031. **seeting,
  4032. yaxis_opts=opts.AxisOpts(
  4033. type_="value" if x1_is_continuous else "category",
  4034. is_scale=True),
  4035. xaxis_opts=opts.AxisOpts(
  4036. type_="value" if x2_is_continuous else "category",
  4037. is_scale=True),
  4038. ))
  4039. c.add_xaxis(x2_only)
  4040. render_list.append(c)
  4041. return render_list
  4042. @plugin_func_loading(get_path(r"template/machinelearning"))
  4043. def feature_visualization_format(x_data, data_name=""): # x-x数据图
  4044. seeting = global_setting if data_name else global_not_legend
  4045. x_data = x_data.transpose()
  4046. only = False
  4047. if len(x_data) == 1:
  4048. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4049. only = True
  4050. render_list = []
  4051. for i in range(len(x_data)):
  4052. for a in range(len(x_data)):
  4053. if a <= i:
  4054. continue # 重复内容,跳过(a读取的是i后面的)
  4055. x1 = x_data[i] # x坐标
  4056. x1_is_continuous = is_continuous(x1)
  4057. x2 = x_data[a] # y坐标
  4058. x2_is_continuous = is_continuous(x2)
  4059. x2_only = np.unique(x2)
  4060. x1_list = x1.astype(np.str).tolist()
  4061. for j in range(len(x1_list)):
  4062. x1_list[j] = [x1_list[j], f"特征{j}"]
  4063. if only:
  4064. x2_is_continuous = False
  4065. # x与散点图不同,这里是纵坐标
  4066. c = (
  4067. Scatter() .add_xaxis(x2) .add_yaxis(
  4068. data_name,
  4069. x1_list,
  4070. **label_setting) .set_global_opts(
  4071. title_opts=opts.TitleOpts(
  4072. title=f"[{i}-{a}]数据散点图"),
  4073. **seeting,
  4074. yaxis_opts=opts.AxisOpts(
  4075. type_="value" if x1_is_continuous else "category",
  4076. is_scale=True),
  4077. xaxis_opts=opts.AxisOpts(
  4078. type_="value" if x2_is_continuous else "category",
  4079. is_scale=True),
  4080. tooltip_opts=opts.TooltipOpts(
  4081. is_show=True,
  4082. axis_pointer_type="cross",
  4083. formatter="{c}"),
  4084. ))
  4085. c.add_xaxis(x2_only)
  4086. render_list.append(c)
  4087. return render_list
  4088. @plugin_func_loading(get_path(r"template/machinelearning"))
  4089. def discrete_feature_visualization(x_data, data_name=""): # 必定离散x-x数据图
  4090. seeting = global_setting if data_name else global_not_legend
  4091. x_data = x_data.transpose()
  4092. if len(x_data) == 1:
  4093. x_data = np.array([x_data[0], np.zeros(len(x_data[0]))])
  4094. render_list = []
  4095. for i in range(len(x_data)):
  4096. for a in range(len(x_data)):
  4097. if a <= i:
  4098. continue # 重复内容,跳过
  4099. x1 = x_data[i] # x坐标
  4100. x2 = x_data[a] # y坐标
  4101. x2_only = np.unique(x2)
  4102. # x与散点图不同,这里是纵坐标
  4103. c = (
  4104. Scatter()
  4105. .add_xaxis(x2)
  4106. .add_yaxis(data_name, x1, **label_setting)
  4107. .set_global_opts(
  4108. title_opts=opts.TitleOpts(title=f"[{i}-{a}]数据散点图"),
  4109. **seeting,
  4110. yaxis_opts=opts.AxisOpts(type_="category", is_scale=True),
  4111. xaxis_opts=opts.AxisOpts(type_="category", is_scale=True),
  4112. )
  4113. )
  4114. c.add_xaxis(x2_only)
  4115. render_list.append(c)
  4116. return render_list
  4117. @plugin_func_loading(get_path(r"template/machinelearning"))
  4118. def conversion_control(y_data, x_data, tab): # 合并两x-x图
  4119. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  4120. get_x = feature_visualization(x_data, "原数据") # 原来
  4121. get_y = feature_visualization(y_data, "转换数据") # 转换
  4122. for i in range(len(get_x)):
  4123. tab.add(get_x[i].overlap(get_y[i]), f"[{i}]数据x-x散点图")
  4124. return tab
  4125. @plugin_func_loading(get_path(r"template/machinelearning"))
  4126. def conversion_separate(y_data, x_data, tab): # 并列显示两x-x图
  4127. if isinstance(x_data, np.ndarray) and isinstance(y_data, np.ndarray):
  4128. get_x = feature_visualization(x_data, "原数据") # 原来
  4129. get_y = feature_visualization(y_data, "转换数据") # 转换
  4130. for i in range(len(get_x)):
  4131. try:
  4132. tab.add(get_x[i], f"[{i}]数据x-x散点图")
  4133. except IndexError:
  4134. pass
  4135. try:
  4136. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  4137. except IndexError:
  4138. pass
  4139. return tab
  4140. @plugin_func_loading(get_path(r"template/machinelearning"))
  4141. def conversion_separate_format(y_data, tab): # 并列显示两x-x图
  4142. if isinstance(y_data, np.ndarray):
  4143. get_y = feature_visualization_format(y_data, "转换数据") # 转换
  4144. for i in range(len(get_y)):
  4145. tab.add(get_y[i], f"[{i}]变维数据x-x散点图")
  4146. return tab
  4147. @plugin_func_loading(get_path(r"template/machinelearning"))
  4148. def conversion_separate_wh(w_array, h_array, tab): # 并列显示两x-x图
  4149. if isinstance(w_array, np.ndarray) and isinstance(w_array, np.ndarray):
  4150. get_x = feature_visualization_format(w_array, "W矩阵数据") # 原来
  4151. get_y = feature_visualization(
  4152. h_array.transpose(), "H矩阵数据"
  4153. ) # 转换(先转T,再转T变回原样,W*H是横对列)
  4154. for i in range(len(get_x)):
  4155. try:
  4156. tab.add(get_x[i], f"[{i}]W矩阵x-x散点图")
  4157. except IndexError:
  4158. pass
  4159. try:
  4160. tab.add(get_y[i], f"[{i}]H.T矩阵x-x散点图")
  4161. except IndexError:
  4162. pass
  4163. return tab
  4164. @plugin_func_loading(get_path(r"template/machinelearning"))
  4165. def make_bar(name, value, tab): # 绘制柱状图
  4166. c = (
  4167. Bar()
  4168. .add_xaxis([f"[{i}]特征" for i in range(len(value))])
  4169. .add_yaxis(name, value, **label_setting)
  4170. .set_global_opts(title_opts=opts.TitleOpts(title="系数w柱状图"), **global_setting)
  4171. )
  4172. tab.add(c, name)
  4173. @plugin_func_loading(get_path(r"template/machinelearning"))
  4174. def judging_digits(num: (int, float)): # 查看小数位数
  4175. a = str(abs(num)).split(".")[0]
  4176. if a == "":
  4177. raise ValueError
  4178. return len(a)
  4179. @plugin_func_loading(get_path(r"template/machinelearning"))
  4180. def num_str(num, accuracy):
  4181. num = str(round(float(num), accuracy))
  4182. if len(num.replace(".", "")) == accuracy:
  4183. return num
  4184. n = num.split(".")
  4185. if len(n) == 0: # 无小数
  4186. return num + "." + "0" * (accuracy - len(num))
  4187. else:
  4188. return num + "0" * (accuracy - len(num) + 1) # len(num)多算了一位小数点
  4189. @plugin_func_loading(get_path(r"template/machinelearning"))
  4190. def des_to_csv(save_dir, name, data, columns=None, row=None):
  4191. save_dir = save_dir + os.sep + name + ".csv"
  4192. print(columns)
  4193. print(row)
  4194. print(data)
  4195. DataFrame(data, columns=columns, index=row).to_csv(
  4196. save_dir,
  4197. header=False if columns is None else True,
  4198. index=False if row is None else True,
  4199. )
  4200. return data
  4201. @plugin_func_loading(get_path(r"template/machinelearning"))
  4202. def pack(output_filename, source_dir):
  4203. with tarfile.open(output_filename, "w:gz") as tar:
  4204. tar.add(source_dir, arcname=basename(source_dir))
  4205. return output_filename
  4206. def set_global(
  4207. more=more_global,
  4208. all=all_global,
  4209. csv=csv_global,
  4210. clf=clf_global,
  4211. tar=tar_global,
  4212. new=new_dir_global,
  4213. ):
  4214. global more_global, all_global, csv_global, clf_global, tar_global, new_dir_global
  4215. more_global = more # 是否使用全部特征绘图
  4216. all_global = all # 是否导出charts
  4217. csv_global = csv # 是否导出CSV
  4218. clf_global = clf # 是否导出模型
  4219. tar_global = tar # 是否打包tar
  4220. new_dir_global = new # 是否新建目录
  4221. class MachineLearnerInit(
  4222. LearnerIO, Calculation, LearnerMerge, LearnerSplit, LearnerDimensions, LearnerShape, metaclass=ABCMeta
  4223. ):
  4224. def __init__(self, *args, **kwargs):
  4225. super().__init__(*args, **kwargs)
  4226. self.learner = {} # 记录机器
  4227. self.learn_dict = {
  4228. "Line": LineModel,
  4229. "Ridge": LineModel,
  4230. "Lasso": LineModel,
  4231. "LogisticRegression": LogisticregressionModel,
  4232. "Knn_class": KnnModel,
  4233. "Knn": KnnModel,
  4234. "Tree_class": TreeModel,
  4235. "Tree": TreeModel,
  4236. "Forest": ForestModel,
  4237. "Forest_class": ForestModel,
  4238. "GradientTree_class": GradienttreeModel,
  4239. "GradientTree": GradienttreeModel,
  4240. "Variance": VarianceModel,
  4241. "SelectKBest": SelectkbestModel,
  4242. "Z-Score": StandardizationModel,
  4243. "MinMaxScaler": MinmaxscalerModel,
  4244. "LogScaler": LogscalerModel,
  4245. "atanScaler": AtanscalerModel,
  4246. "decimalScaler": DecimalscalerModel,
  4247. "sigmodScaler": SigmodscalerModel,
  4248. "Mapzoom": MapzoomModel,
  4249. "Fuzzy_quantization": FuzzyQuantizationModel,
  4250. "Regularization": RegularizationModel,
  4251. "Binarizer": BinarizerModel,
  4252. "Discretization": DiscretizationModel,
  4253. "Label": LabelModel,
  4254. "OneHotEncoder": OneHotEncoderModel,
  4255. "Missed": MissedModel,
  4256. "PCA": PcaModel,
  4257. "RPCA": RpcaModel,
  4258. "KPCA": KpcaModel,
  4259. "LDA": LdaModel,
  4260. "SVC": SvcModel,
  4261. "SVR": SvrModel,
  4262. "MLP": MlpModel,
  4263. "MLP_class": MlpModel,
  4264. "NMF": NmfModel,
  4265. "t-SNE": TsneModel,
  4266. "k-means": KmeansModel,
  4267. "Agglomerative": AgglomerativeModel,
  4268. "DBSCAN": DbscanModel,
  4269. "ClassBar": ClassBar,
  4270. "FeatureScatter": NearFeatureScatter,
  4271. "FeatureScatterClass": NearFeatureScatterClass,
  4272. "FeatureScatter_all": NearFeatureScatterMore,
  4273. "FeatureScatterClass_all": NearFeatureScatterClassMore,
  4274. "HeatMap": NumpyHeatMap,
  4275. "FeatureY-X": FeatureScatterYX,
  4276. "ClusterTree": ClusterTree,
  4277. "MatrixScatter": MatrixScatter,
  4278. "Correlation": Corr,
  4279. "Statistics": DataAnalysis,
  4280. "Fast_Fourier": FastFourier,
  4281. "Reverse_Fast_Fourier": ReverseFastFourier,
  4282. "[2]Reverse_Fast_Fourier": ReverseFastFourierTwonumpy,
  4283. }
  4284. self.data_type = {} # 记录机器的类型
  4285. @staticmethod
  4286. def learner_parameters(parameters, data_type): # 解析参数
  4287. original_parameter = {}
  4288. target_parameter = {}
  4289. # 输入数据
  4290. exec(parameters, original_parameter)
  4291. # 处理数据
  4292. if data_type in ("MLP", "MLP_class"):
  4293. target_parameter["alpha"] = float(
  4294. original_parameter.get("alpha", 0.0001)
  4295. ) # MLP正则化用
  4296. else:
  4297. target_parameter["alpha"] = float(
  4298. original_parameter.get("alpha", 1.0)
  4299. ) # L1和L2正则化用
  4300. target_parameter["C"] = float(
  4301. original_parameter.get(
  4302. "C", 1.0)) # L1和L2正则化用
  4303. if data_type in ("MLP", "MLP_class"):
  4304. target_parameter["max_iter"] = int(
  4305. original_parameter.get("max_iter", 200)
  4306. ) # L1和L2正则化用
  4307. else:
  4308. target_parameter["max_iter"] = int(
  4309. original_parameter.get("max_iter", 1000)
  4310. ) # L1和L2正则化用
  4311. target_parameter["n_neighbors"] = int(
  4312. original_parameter.get("K_knn", 5)
  4313. ) # knn邻居数 (命名不同)
  4314. target_parameter["p"] = int(original_parameter.get("p", 2)) # 距离计算方式
  4315. target_parameter["nDim_2"] = bool(
  4316. original_parameter.get("nDim_2", True)
  4317. ) # 数据是否降维
  4318. if data_type in ("Tree", "Forest", "GradientTree"):
  4319. target_parameter["criterion"] = (
  4320. "mse" if bool(
  4321. original_parameter.get(
  4322. "is_MSE",
  4323. True)) else "mae") # 是否使用基尼不纯度
  4324. else:
  4325. target_parameter["criterion"] = (
  4326. "gini" if bool(
  4327. original_parameter.get(
  4328. "is_Gini",
  4329. True)) else "entropy") # 是否使用基尼不纯度
  4330. target_parameter["splitter"] = (
  4331. "random" if bool(
  4332. original_parameter.get(
  4333. "is_random",
  4334. False)) else "best") # 决策树节点是否随机选用最优
  4335. target_parameter["max_features"] = original_parameter.get(
  4336. "max_features", None
  4337. ) # 选用最多特征数
  4338. target_parameter["max_depth"] = original_parameter.get(
  4339. "max_depth", None
  4340. ) # 最大深度
  4341. target_parameter["min_samples_split"] = int(
  4342. original_parameter.get("min_samples_split", 2)
  4343. ) # 是否继续划分(容易造成过拟合)
  4344. target_parameter["P"] = float(
  4345. original_parameter.get(
  4346. "min_samples_split", 0.8))
  4347. target_parameter["k"] = original_parameter.get("k", 1)
  4348. target_parameter["score_func"] = {
  4349. "chi2": chi2,
  4350. "f_classif": f_classif,
  4351. "mutual_info_classif": mutual_info_classif,
  4352. "f_regression": f_regression,
  4353. "mutual_info_regression": mutual_info_regression,
  4354. }.get(original_parameter.get("score_func", "f_classif"), f_classif)
  4355. target_parameter["feature_range"] = tuple(
  4356. original_parameter.get("feature_range", (0, 1))
  4357. )
  4358. target_parameter["norm"] = original_parameter.get(
  4359. "norm", "l2") # 正则化的方式L1或者L2
  4360. target_parameter["threshold"] = float(
  4361. original_parameter.get("threshold", 0.0)
  4362. ) # 二值化特征
  4363. target_parameter["split_range"] = list(
  4364. original_parameter.get("split_range", [0])
  4365. ) # 二值化特征
  4366. target_parameter["ndim_up"] = bool(
  4367. original_parameter.get("ndim_up", False))
  4368. target_parameter["miss_value"] = original_parameter.get(
  4369. "miss_value", np.nan)
  4370. target_parameter["fill_method"] = original_parameter.get(
  4371. "fill_method", "mean")
  4372. target_parameter["fill_value"] = original_parameter.get(
  4373. "fill_value", None)
  4374. target_parameter["n_components"] = original_parameter.get(
  4375. "n_components", 1)
  4376. target_parameter["kernel"] = original_parameter.get(
  4377. "kernel", "rbf" if data_type in ("SVR", "SVC") else "linear"
  4378. )
  4379. target_parameter["n_Tree"] = original_parameter.get("n_Tree", 100)
  4380. target_parameter["gamma"] = original_parameter.get("gamma", 1)
  4381. target_parameter["hidden_size"] = tuple(
  4382. original_parameter.get("hidden_size", (100,))
  4383. )
  4384. target_parameter["activation"] = str(
  4385. original_parameter.get("activation", "relu")
  4386. )
  4387. target_parameter["solver"] = str(
  4388. original_parameter.get("solver", "adam"))
  4389. if data_type in ("k-means",):
  4390. target_parameter["n_clusters"] = int(
  4391. original_parameter.get("n_clusters", 8)
  4392. )
  4393. else:
  4394. target_parameter["n_clusters"] = int(
  4395. original_parameter.get("n_clusters", 2)
  4396. )
  4397. target_parameter["eps"] = float(
  4398. original_parameter.get(
  4399. "n_clusters", 0.5))
  4400. target_parameter["min_samples"] = int(
  4401. original_parameter.get("n_clusters", 5))
  4402. target_parameter["white_PCA"] = bool(
  4403. original_parameter.get("white_PCA", False))
  4404. return target_parameter
  4405. def get_learner(self, name):
  4406. return self.learner[name]
  4407. def get_learner_type(self, name):
  4408. return self.data_type[name]
  4409. @plugin_class_loading(get_path(r"template/machinelearning"))
  4410. class MachineLearnerAdd(MachineLearnerInit, metaclass=ABCMeta):
  4411. def add_learner(self, learner_str, parameters=""):
  4412. get = self.learn_dict[learner_str]
  4413. name = f"Le[{len(self.learner)}]{learner_str}"
  4414. # 参数调节
  4415. args_use = self.learner_parameters(parameters, learner_str)
  4416. # 生成学习器
  4417. self.learner[name] = get(model=learner_str, args_use=args_use)
  4418. self.data_type[name] = learner_str
  4419. def add_curve_fitting(self, learner):
  4420. named_domain = {}
  4421. exec(learner, named_domain)
  4422. name = f'Le[{len(self.learner)}]{named_domain.get("name", "SELF")}'
  4423. func = named_domain.get("f", lambda x, k, b: k * x + b)
  4424. self.learner[name] = CurveFitting(name, learner, func)
  4425. self.data_type[name] = "Curve_fitting"
  4426. def add_select_from_model(self, learner, parameters=""):
  4427. model = self.get_learner(learner)
  4428. name = f"Le[{len(self.learner)}]SelectFrom_Model:{learner}"
  4429. # 参数调节
  4430. args_use = self.learner_parameters(parameters, "SelectFrom_Model")
  4431. # 生成学习器
  4432. self.learner[name] = SelectFromModel(
  4433. learner=model, args_use=args_use, Dic=self.learn_dict
  4434. )
  4435. self.data_type[name] = "SelectFrom_Model"
  4436. def add_predictive_heat_map(self, learner, parameters=""):
  4437. model = self.get_learner(learner)
  4438. name = f"Le[{len(self.learner)}]Predictive_HeatMap:{learner}"
  4439. # 生成学习器
  4440. args_use = self.learner_parameters(parameters, "Predictive_HeatMap")
  4441. self.learner[name] = PredictiveHeatmap(
  4442. learner=model, args_use=args_use)
  4443. self.data_type[name] = "Predictive_HeatMap"
  4444. def add_predictive_heat_map_more(self, learner, parameters=""):
  4445. model = self.get_learner(learner)
  4446. name = f"Le[{len(self.learner)}]Predictive_HeatMap_More:{learner}"
  4447. # 生成学习器
  4448. args_use = self.learner_parameters(
  4449. parameters, "Predictive_HeatMap_More")
  4450. self.learner[name] = PredictiveHeatmapMore(
  4451. learner=model, args_use=args_use)
  4452. self.data_type[name] = "Predictive_HeatMap_More"
  4453. def add_view_data(self, learner, parameters=""):
  4454. model = self.get_learner(learner)
  4455. name = f"Le[{len(self.learner)}]View_data:{learner}"
  4456. # 生成学习器
  4457. args_use = self.learner_parameters(parameters, "View_data")
  4458. self.learner[name] = ViewData(learner=model, args_use=args_use)
  4459. self.data_type[name] = "View_data"
  4460. @plugin_class_loading(get_path(r"template/machinelearning"))
  4461. class MachineLearnerScore(MachineLearnerInit, metaclass=ABCMeta):
  4462. def score(self, name_x, name_y, learner): # Score_Only表示仅评分 Fit_Simp 是普遍类操作
  4463. model = self.get_learner(learner)
  4464. x = self.get_sheet(name_x)
  4465. y = self.get_sheet(name_y)
  4466. return model.score(x, y)
  4467. def model_evaluation(self, learner, save_dir, name_x, name_y, func=0): # 显示参数
  4468. x = self.get_sheet(name_x)
  4469. y = self.get_sheet(name_y)
  4470. if new_dir_global:
  4471. dic = save_dir + f"{os.sep}{learner}分类评分[CoTan]"
  4472. new_dic = dic
  4473. a = 0
  4474. while exists(new_dic): # 直到他不存在 —— False
  4475. new_dic = dic + f"[{a}]"
  4476. a += 1
  4477. mkdir(new_dic)
  4478. else:
  4479. new_dic = save_dir
  4480. model = self.get_learner(learner)
  4481. # 打包
  4482. func = [
  4483. model.class_score,
  4484. model.regression_score,
  4485. model.clusters_score][func]
  4486. save = func(new_dic, x, y)[0]
  4487. if tar_global:
  4488. pack(f"{new_dic}.tar.gz", new_dic)
  4489. return save, new_dic
  4490. def model_visualization(self, learner, save_dir): # 显示参数
  4491. if new_dir_global:
  4492. dic = save_dir + f"{os.sep}{learner}数据[CoTan]"
  4493. new_dic = dic
  4494. a = 0
  4495. while exists(new_dic): # 直到他不存在 —— False
  4496. new_dic = dic + f"[{a}]"
  4497. a += 1
  4498. mkdir(new_dic)
  4499. else:
  4500. new_dic = save_dir
  4501. model = self.get_learner(learner)
  4502. if (not (model.model is None) or not (
  4503. model.model is list)) and clf_global:
  4504. joblib.dump(model.model, new_dic + f"{os.sep}MODEL.model") # 保存模型
  4505. # 打包
  4506. save = model.data_visualization(new_dic)[0]
  4507. if tar_global:
  4508. pack(f"{new_dic}.tar.gz", new_dic)
  4509. return save, new_dic
  4510. @plugin_class_loading(get_path(r"template/machinelearning"))
  4511. class LearnerActions(MachineLearnerInit, metaclass=ABCMeta):
  4512. def fit_model(self, x_name, y_name, learner, split=0.3, *args, **kwargs):
  4513. x_data = self.get_sheet(x_name)
  4514. y_data = self.get_sheet(y_name)
  4515. model = self.get_learner(learner)
  4516. return model.fit_model(
  4517. x_data, y_data, split=split, x_name=x_name, add_func=self.add_form
  4518. )
  4519. def predict(self, x_name, learner, **kwargs):
  4520. x_data = self.get_sheet(x_name)
  4521. model = self.get_learner(learner)
  4522. y_data, name = model.predict(
  4523. x_data, x_name=x_name, add_func=self.add_form)
  4524. self.add_form(y_data, f"{x_name}:{name}")
  4525. return y_data