Crawler_controller.py 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. from selenium import webdriver
  2. import threading
  3. import time
  4. from os.path import exists
  5. from os import mkdir
  6. import hashlib
  7. from time import sleep
  8. import bs4
  9. import re as regular
  10. import Information_storage
  11. import requests
  12. from selenium.webdriver.common.action_chains import ActionChains
  13. from selenium.webdriver.common.keys import Keys
  14. keys_name_dict = {'ctrl':Keys.CONTROL,'shift':Keys.SHIFT,'tab':Keys.TAB,'left_ctrl':Keys.LEFT_CONTROL,'left_shift':Keys.LEFT_SHIFT,
  15. 'left_alt':Keys.LEFT_ALT,'ALT':Keys.ALT,'enter':Keys.ENTER,'return':Keys.RETURN,'backspace':Keys.BACKSPACE,
  16. 'del':Keys.DELETE,'pgup':Keys.PAGE_UP,'pgdn':Keys.PAGE_DOWN,'home':Keys.HOME,'end':Keys.END,'esc':Keys.CANCEL,
  17. 'insert':Keys.INSERT,'meta':Keys.META,'up':Keys.UP,'down':Keys.DOWN,'right':Keys.RIGHT,'left':Keys.LEFT
  18. }#键-值映射
  19. for i in range(1,13):#F1 - F12按键
  20. keys_name_dict[f'f{i}'] = eval(f'Keys.F{i}')
  21. data_base = Information_storage.DataBase_Home()
  22. class PAGE:
  23. def __init__(self,time_out):
  24. self.url=''
  25. self.UA=''
  26. self.func = 'PAGE'
  27. self.time_out = time_out
  28. def __str__(self):
  29. return f'[{self.time_out}s]{self.func}-{self.url}:UA>{self.UA}'
  30. class REQUESTS_Base(PAGE):
  31. def init(self,UA,url,cookies):
  32. if UA == '':
  33. UA = f'--user-agent ="Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' \
  34. f'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36 Edg/80.0.361.66"'
  35. self.UA = UA
  36. self.headers = {'Accept': 'text/html, application/xhtml+xml, image/jxr, */*',
  37. 'Accept - Encoding': 'gzip, deflate',
  38. 'Accept-Language': 'zh-Hans-CN, zh-Hans; q=0.5',
  39. 'Connection': 'Keep-Alive',
  40. 'User-Agent': UA}
  41. self.url = url
  42. self.cookies = cookies
  43. self.new = True
  44. class URL_POST(REQUESTS_Base):#通过requests的post请求
  45. def __init__(self, url, data, time_out,UA='',cookies=None, **kwargs):
  46. super(URL_POST, self).__init__(time_out)
  47. self.func = 'post'
  48. self.data = data
  49. self.requests = requests.post
  50. self.init(UA,url,cookies)
  51. def __str__(self):
  52. return super(URL_POST, self).__str__() + f';data>{self.data}'
  53. class URL_GET(REQUESTS_Base):#通过requests的post请求
  54. def __init__(self, url, time_out,UA='',cookies=None, **kwargs):
  55. super(URL_GET, self).__init__(time_out)
  56. self.func = 'simplify_get'
  57. self.requests = requests.get
  58. self.init(UA,url,cookies)
  59. class URL_PAGE(PAGE):
  60. def __init__(self,url, time_out,first_run=False,head=False,no_plugins=True,no_js=False,no_java=False,
  61. no_img=False,UA='',cookies=None,new=False,down_load_dir='',**kwargs):
  62. super(URL_PAGE, self).__init__(time_out)
  63. self.url = url
  64. self.func = 'get'
  65. self.options = webdriver.ChromeOptions()
  66. self.cookies = cookies#cookies存储位置
  67. self.new = new#新键页面or新键浏览器
  68. self.down_load_dir = down_load_dir
  69. self.init(first_run,head,no_plugins,no_js,no_java,no_img,UA)
  70. def init(self,first_run,head,no_plugins,no_js,no_java,no_img,UA):
  71. self.options.add_argument('disable-infobars')#不显示
  72. prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory':self.down_load_dir}
  73. self.options.add_experimental_option('prefs', prefs)#下载设置
  74. if first_run:
  75. self.options.add_argument('-first run')
  76. if head:#无头设置
  77. print('FFF')
  78. self.options.add_argument('--headless')
  79. self.options.add_argument('--disable-gpu')
  80. if no_plugins:
  81. self.options.add_argument('--disable-plugins')
  82. if no_js:
  83. self.options.add_argument('--disable-javascript')
  84. if no_java:
  85. self.options.add_argument('--disable-java')
  86. if no_img:
  87. self.options.add_argument('blink-settings=imagesEnabled=false')
  88. if UA == '':
  89. UA = (f'user-agent ="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
  90. f'Chrome/80.0.3987.132 Safari/537.36"')
  91. # self.options.add_argument(f'--user-agent ="{UA}"')
  92. self.UA = UA
  93. def __str__(self):
  94. return f'{self.func}-{self.url}:UA>{self.UA}'
  95. class url:#url管理器
  96. num = 0#url处理器个数
  97. def __init__(self,dic=f'',dic_run=f''):
  98. url.num += 1
  99. self.save_dir = dic
  100. dic += f'/url[{url.num}].cot_url'
  101. dic_run += f'/url_run[{url.num}].cot_url'
  102. self.dir = dic
  103. self.dir_run = dic_run
  104. self.file = open(dic,'a')#写入url_history的文件
  105. self.file_run = open(dic_run,'a')#写入已读url文件
  106. self.url_list = []#待读url
  107. self.url_history = []#url历史
  108. self.filter = {}#过滤函数
  109. def close(self):
  110. self.file.close()
  111. self.file_run.close()
  112. def filter_func(self,url,**kwargs):#url过滤系统
  113. for i in self.filter:
  114. if not self.filter[i](url): return False
  115. return True
  116. def Add_func(self,func,name):#添加过滤函数
  117. self.filter[name] = func
  118. def Del_func(self,index):#删除过滤函数
  119. del self.filter[list(self.filter.keys())[index]]
  120. def return_func(self):
  121. return list(self.filter.keys())
  122. def add_url(self,url,func,data=None,**kwargs):#添加url
  123. if func == '':func = 'get'
  124. if func == 'get':url_ = url
  125. else:
  126. url_ = url + str(data)
  127. if url_ not in self.url_history and self.filter_func(url,func=func):#1.url不存在历史,2.url满足筛选条件
  128. if func == 'get':
  129. self.url_list.append(URL_PAGE(url=url,**kwargs,down_load_dir=self.dir))#添加到待取得url
  130. elif func == 'simplify_get':
  131. self.url_list.append(URL_GET(url=url, **kwargs, down_load_dir=self.dir)) # 添加到待取得url
  132. else:
  133. self.url_list.append(URL_POST(url=url,data=data,**kwargs)) # 添加到待取得url
  134. self.url_history.append(url_)#添加到历史url
  135. self.__out_url(url_)#输出历史url
  136. return True#写入成功
  137. return False#写入失败
  138. def del_url(self,index):#删除url
  139. self.__out_url_run(f'DELETE {self.url_list[index]}')
  140. del self.url_list[index]
  141. def get_url(self) -> (URL_PAGE,URL_POST):#取得url
  142. url_page = self.url_list[0]
  143. self.__out_url_run(url_page.url)
  144. del self.url_list[0]
  145. return url_page
  146. def __out_url(self,url):#输出url历史
  147. self.file.write(f'{url}\n')
  148. self.file.flush()
  149. def __out_url_run(self,url):#输出已经运行的url
  150. self.file_run.write(f'{url}\n')
  151. self.file_run.flush()
  152. def finish(self):
  153. return len(self.url_list) == 0
  154. def return_url(self):
  155. return self.url_list.copy()
  156. def return_url_history(self):
  157. return self.url_history.copy()
  158. class Page_Downloader:
  159. num = 0
  160. def __init__(self,url:url,dic=''):
  161. self.url = url
  162. self.dir = dic
  163. self.log = Information_storage.log(dic)
  164. Page_Downloader.num += 1
  165. self.page_source_dict = {}#页面保存信息
  166. self.cookie_Thread = None#子进程
  167. self.browser = None
  168. self.cookie_dict = {}
  169. self.cookie_dict_list = {}#sele的cookies
  170. self.lase_func = ''
  171. def close(self):
  172. self.log.close()
  173. def stop(self):
  174. try:
  175. self.break_ = False
  176. self.browser.quit()
  177. self.lase_func = ''
  178. except:
  179. pass
  180. def strat_urlGet(self,*args,func_cookie):#用get请求url ->得到一个页面信息
  181. self.break_ = False
  182. self.page_source_dict = {}
  183. self.nowurl = self.url.get_url()#获取一个url
  184. url = self.nowurl.url
  185. if self.nowurl.func == 'get':
  186. if self.nowurl.new == True and self.lase_func == 'get':#重新启动
  187. self.browser.quit()
  188. self.browser = webdriver.Chrome(chrome_options=self.nowurl.options)
  189. try:
  190. self.browser.set_page_load_timeout(self.nowurl.time_out) # 设置页面加载超时
  191. self.browser.set_script_timeout(self.nowurl.time_out) # 设置页面异步js执行超时
  192. self.browser.get(url)
  193. except:
  194. self.browser = webdriver.Chrome(chrome_options=self.nowurl.options)
  195. self.browser.set_page_load_timeout(self.nowurl.time_out) # 设置页面加载超时
  196. self.browser.set_script_timeout(self.nowurl.time_out) # 设置页面异步js执行超时
  197. self.browser.get(url)
  198. try:
  199. if self.nowurl.new != True:raise Exception
  200. list_ = self.cookie_dict_list[self.nowurl.cookies]
  201. self.Tra_cookies()
  202. try:
  203. for i in list_:
  204. self.Add_cookies(i)
  205. except:pass
  206. except:
  207. pass
  208. self.start_cookies(func_cookie,url)
  209. else:#requests模式
  210. if self.lase_func == 'get':
  211. try:self.browser.quit()
  212. except:pass
  213. try:
  214. args = {'cookies':self.cookie_dict[self.nowurl.cookies]}
  215. func_cookie([args['cookies']])
  216. except:
  217. args = {}
  218. func_cookie([])
  219. if self.nowurl.func == 'post':args['data'] = self.nowurl.data
  220. self.browser = self.nowurl.requests(url,headers=self.nowurl.headers,**args,timeout=self.nowurl.time_out)
  221. self.cookie_dict[url] = requests.utils.dict_from_cookiejar(self.browser.cookies)#保存cookies
  222. func_cookie([self.cookie_dict[url]])
  223. self.lase_func = self.nowurl.func
  224. self.Parser.browser = self.browser
  225. self.Parser.init(url)
  226. return self.browser
  227. def start_cookies(self,func_cookie,url):
  228. self.break_ = True
  229. def update_cookie():
  230. nonlocal self
  231. while self.break_:
  232. try:
  233. cookies = self.browser.get_cookies()
  234. func_cookie(cookies) # 与GUI通信显示cookie
  235. self.cookie_dict[url] = cookies
  236. time.sleep(.5)
  237. except:
  238. pass
  239. self.cookie_Thread = threading.Thread(target=update_cookie)
  240. self.cookie_Thread.start()
  241. def Del_cookies(self,name):#删除指定cookies
  242. browser = self.browser
  243. browser.delete_cookie(name)
  244. def Tra_cookies(self):#清空cookies
  245. browser = self.browser
  246. browser.delete_all_cookies()
  247. def Add_cookies(self,cookies:dict):#清空cookies
  248. browser = self.browser
  249. browser.add_cookie(cookies)
  250. def update_cookies(self,name,cookies:dict):
  251. browser = self.browser
  252. cookies_list = browser.get_cookies()
  253. for i in cookies_list:
  254. if i.get('name',None) == name:
  255. browser.delete_cookie(name)#删除原来cookies
  256. i.update(cookies)
  257. browser.add_cookie(i)
  258. return
  259. raise Exception
  260. def set_Page_Parser(self,Parser):
  261. self.Parser = Parser
  262. self.Parser.browser = self.browser
  263. self.Parser.url = self.url
  264. self.Parser.dir = self.dir
  265. self.Parser.log = self.log
  266. class Page_Parser:
  267. def __init__(self,Downloader:Page_Downloader):
  268. self.Downloader = Downloader
  269. self.Downloader.set_Page_Parser(self)
  270. self.func_list = []
  271. self.func_dict = {}
  272. self.n = 0
  273. self.init()
  274. def init(self,url=''):
  275. self.element_dict = {}#记录属性的名字
  276. self.now_url = url
  277. def add_base(self,func): # 装饰器
  278. def wrap(browser=None,num=None,name=None, *args, **kwargs) -> bool:
  279. try:
  280. func(browser=browser,num=num, name=name, *args, **kwargs)
  281. return True
  282. except:
  283. return False
  284. return wrap
  285. def add_func(self,name,func):
  286. self.func_list.append(f'{name}[{self.n}]')
  287. self.func_dict[f'{name}[{self.n}]'] = func
  288. self.n += 1
  289. def tra_func(self):
  290. self.func_list = []
  291. self.func_dict = {}
  292. self.n = 0
  293. def del_func(self,index,end=False):
  294. if end:index = len(self.func_list) - index - 1
  295. del self.func_dict[self.func_list[index]]
  296. self.func_list[index] = 'Func_have_been_del'
  297. self.func_dict['Func_have_been_del'] = lambda *args,**kwargs:None
  298. def return_func(self,only=True):
  299. if only:
  300. return self.func_list.copy()
  301. else:
  302. return [f'var[{index}]@ {i}' for index,i in enumerate(self.func_list.copy())]
  303. def find_ID(self,id,not_all=False,**kwargs):
  304. @self.add_base
  305. def find(browser, num, name, *args, **kwargs):
  306. nonlocal self,id
  307. if browser == None:browser = self.browser
  308. if not_all:self.element_dict[f'{name}[{num}]'] = [browser.find_element_by_id(id)]#返回必须是list
  309. else:self.element_dict[f'{name}[{num}]'] = browser.find_elements_by_id(id)
  310. self.add_func(f'find_ID:{id}',find)#添加func
  311. def find_class(self,class_name,not_all=False,**kwargs):
  312. @self.add_base
  313. def find(browser, num, name, *args, **kwargs):
  314. nonlocal self,class_name
  315. if browser == None:browser = self.browser
  316. if not_all:self.element_dict[f'{name}[{num}]'] = [browser.find_element_by_class_name(class_name)]#返回必须是list
  317. else:self.element_dict[f'{name}[{num}]'] = browser.find_elements_by_class_name(class_name)#返回必须是list
  318. self.add_func(f'find_class:{class_name}',find)#添加func
  319. def find_name(self,name_,not_all=False,**kwargs):
  320. @self.add_base
  321. def find(browser, num, name, *args, **kwargs):
  322. nonlocal self,name_
  323. if browser == None:browser = self.browser
  324. if not_all:self.element_dict[f'{name}[{num}]'] = [browser.find_element_by_name(name_)]#返回必须是list
  325. else:self.element_dict[f'{name}[{num}]'] = browser.find_elements_by_name(name_)#返回必须是list
  326. self.add_func(f'find_name:{name_}',find)#添加func
  327. def find_xpath(self,xpath,not_all=False,**kwargs):
  328. @self.add_base
  329. def find(browser, num, name, *args, **kwargs):
  330. nonlocal self,xpath
  331. if browser == None:browser = self.browser
  332. if not_all:self.element_dict[f'{name}[{num}]'] = [browser.find_element_by_xpath(xpath)]#返回必须是list
  333. else:self.element_dict[f'{name}[{num}]'] = browser.find_elements_by_xpath(xpath)#返回必须是list
  334. self.add_func(f'find_xpath:{xpath}',find)#添加func
  335. def find_css(self,css_selector,not_all=False,**kwargs):
  336. @self.add_base
  337. def find(browser, num, name, *args, **kwargs):
  338. nonlocal self,css_selector
  339. if browser == None:browser = self.browser
  340. if not_all:self.element_dict[f'{name}[{num}]'] = [browser.find_element_by_css_selector(css_selector)]#返回必须是list
  341. else:self.element_dict[f'{name}[{num}]'] = browser.find_elements_by_css_selector(css_selector)#返回必须是list
  342. self.add_func(f'find_css:{css_selector}',find)#添加func
  343. def find_tag_name(self,tag_name,not_all=False,**kwargs):
  344. @self.add_base
  345. def find(browser, num, name, *args, **kwargs):
  346. nonlocal self,tag_name
  347. if browser == None:browser = self.browser
  348. if not_all:self.element_dict[f'{name}[{num}]'] = [browser.find_element_by_tag_name(tag_name)]#返回必须是list
  349. else:self.element_dict[f'{name}[{num}]'] = browser.find_elements_by_tag_name(tag_name)#返回必须是list
  350. self.add_func(f'find_tagName:{tag_name}',find)#添加func\
  351. def find_link_text(self,link_text,not_all=False,**kwargs):#匹配link
  352. @self.add_base
  353. def find(browser, num, name, *args, **kwargs):
  354. nonlocal self,link_text
  355. if browser == None:browser = self.browser
  356. if not_all:self.element_dict[f'{name}[{num}]'] = [browser.find_element_by_link_text(link_text)]#返回必须是list
  357. else:self.element_dict[f'{name}[{num}]'] = browser.find_elements_by_link_text(link_text)#返回必须是list
  358. self.add_func(f'find_link_text:{link_text}',find)#添加func
  359. def find_partial_link_text(self,partial_link_text,not_all=False,**kwargs):#模糊匹配
  360. @self.add_base
  361. def find(browser, num, name, *args, **kwargs):
  362. nonlocal self,partial_link_text
  363. if browser == None:browser = self.browser
  364. if not_all:self.element_dict[f'{name}[{num}]'] = [browser.find_element_by_partial_link_text(partial_link_text)]#返回必须是list
  365. else:self.element_dict[f'{name}[{num}]'] = [browser.find_element_by_partial_link_text(partial_link_text)]#返回必须是list
  366. self.add_func(f'find_partial_link_text:{partial_link_text}',find)#添加func
  367. def find_switch_to_alert(self,*args,**kwargs):#定位弹出框
  368. @self.add_base
  369. def find(browser, num, name, *args, **kwargs):
  370. nonlocal self
  371. if browser == None:browser = self.browser
  372. self.element_dict[f'{name}[{num}]'] = [browser.switch_to.alert()]
  373. self.add_func(f'find_alert',find)#添加func
  374. def find_switch_to_active_element(self,*args,**kwargs):#定位焦点元素
  375. @self.add_base
  376. def find(browser, num, name, *args, **kwargs):
  377. nonlocal self
  378. if browser == None:browser = self.browser
  379. self.element_dict[f'{name}[{num}]'] = [browser.switch_to.active_element()]
  380. self.add_func(f'active_element',find)#添加func
  381. def find_switch_to_frame(self,reference,is_id=False,*args,**kwargs):#定位Frame
  382. @self.add_base
  383. def find(browser, num, name, *args, **kwargs):
  384. nonlocal self,reference,is_id
  385. if browser == None:browser = self.browser
  386. if reference == None:
  387. self.element_dict[f'{name}[{num}]'] = [browser.default_content()]# 回到主文档
  388. elif reference == '':
  389. self.element_dict[f'{name}[{num}]'] = [browser.parent_frame()]# 回到父文档
  390. else:
  391. if is_id:reference = int(reference)
  392. self.element_dict[f'{name}[{num}]'] = [browser.switch_to.frame(str(reference))]# 定位进入文档
  393. func_name = {None:'主文档','':'父文档'}.get(reference,reference)
  394. self.add_func(f'find_frame:{func_name}',find)#添加func
  395. def send_keys(self,text,element_value,index=0,**kwargs):#输入文字
  396. @self.add_base
  397. def action(*args, **kwargs):
  398. nonlocal self
  399. self.element_dict[element_value][index].send_keys(text)
  400. self.add_func(f'sent_text:{text}>{element_value}[{index}]', action) # 添加func
  401. def User_Passwd(self,User,Passwd,element_value,index=0,**kwargs):#输入验证(User&Password)
  402. @self.add_base
  403. def action(*args, **kwargs):
  404. nonlocal self
  405. self.element_dict[element_value][index].authenticate(User,Passwd)
  406. self.add_func(f'User:Passwd:{User};{Passwd}>{element_value}[{index}]', action) # 添加func
  407. def clear(self,element_value,index=0,**kwargs):#清空文本
  408. @self.add_base
  409. def action(*args, **kwargs):
  410. nonlocal self
  411. self.element_dict[element_value][index].clear()
  412. self.add_func(f'clear_text>{element_value}[{index}]', action) # 添加func
  413. def click(self,element_value,index=0,**kwargs):#点击按钮
  414. @self.add_base
  415. def action(*args, **kwargs):
  416. nonlocal self
  417. self.element_dict[element_value][index].click()
  418. self.add_func(f'click>{element_value}[{index}]', action) # 添加func
  419. def accept(self,element_value,index=0,**kwargs):#点击确定(弹出框)
  420. @self.add_base
  421. def action(*args, **kwargs):
  422. nonlocal self
  423. self.element_dict[element_value][index].accept()
  424. self.add_func(f'accept>{element_value}[{index}]', action) # 添加func
  425. def dismiss(self,element_value,index=0,**kwargs):#点击取消(弹出框)
  426. @self.add_base
  427. def action(*args, **kwargs):
  428. nonlocal self
  429. self.element_dict[element_value][index].dismiss()
  430. self.add_func(f'dismiss>{element_value}[{index}]', action) # 添加func
  431. def submit(self,element_value,index=0,**kwargs):#提交表单
  432. @self.add_base
  433. def action(*args, **kwargs):
  434. nonlocal self
  435. self.element_dict[element_value][index].submit()
  436. self.add_func(f'submit>{element_value}[{index}]', action) # 添加func
  437. def deselect_by_index(self,element_value,deselect,index=0,**kwargs):#根据index取消选择
  438. @self.add_base
  439. def action(*args, **kwargs):
  440. nonlocal self
  441. self.element_dict[element_value][index].deselect_by_index(int(deselect))
  442. self.add_func(f'deselect_by_index:{deselect}>{element_value}[{index}]', action) # 添加func
  443. def deselect_by_text(self,element_value,deselect,index=0,**kwargs):#根据text取消选择
  444. @self.add_base
  445. def action(*args, **kwargs):
  446. nonlocal self
  447. self.element_dict[element_value][index].deselect_by_visible_text(deselect)
  448. self.add_func(f'deselect_by_text:{deselect}>{element_value}[{index}]', action) # 添加func
  449. def deselect_by_value(self,element_value,deselect,index=0,**kwargs):#根据value取消选择
  450. @self.add_base
  451. def action(*args, **kwargs):
  452. nonlocal self
  453. self.element_dict[element_value][index].deselect_by_value(deselect)
  454. self.add_func(f'deselect_by_value:{deselect}>{element_value}[{index}]', action) # 添加func
  455. def select_by_index(self,element_value,deselect,index=0,**kwargs):#根据index选择
  456. @self.add_base
  457. def action(*args, **kwargs):
  458. nonlocal self
  459. self.element_dict[element_value][index].select_by_index(int(deselect))
  460. self.add_func(f'select_by_index:{deselect}>{element_value}[{index}]', action) # 添加func
  461. def select_by_text(self,element_value,deselect,index=0,**kwargs):#根据text选择
  462. @self.add_base
  463. def action(*args, **kwargs):
  464. nonlocal self
  465. self.element_dict[element_value][index].select_by_visible_text(deselect)
  466. self.add_func(f'select_by_text:{deselect}>{element_value}[{index}]', action) # 添加func
  467. def select_by_value(self,element_value,deselect,index=0,**kwargs):#根据value选择
  468. @self.add_base
  469. def action(*args, **kwargs):
  470. nonlocal self
  471. self.element_dict[element_value][index].select_by_value(deselect)
  472. self.add_func(f'select_by_value:{deselect}>{element_value}[{index}]', action) # 添加func
  473. def back(self,**kwargs):# 返回
  474. @self.add_base
  475. def action(*args, **kwargs):
  476. nonlocal self
  477. self.browser.back()
  478. self.add_func(f'BACK', action)
  479. def forward(self,**kwargs):# 前进
  480. @self.add_base
  481. def action(*args, **kwargs):
  482. nonlocal self
  483. self.browser.forward()
  484. self.add_func(f'FORWARD', action)
  485. def refresh(self,**kwargs):# 刷新
  486. @self.add_base
  487. def action(*args, **kwargs):
  488. nonlocal self
  489. self.browser.refresh()
  490. self.add_func(f'REFRESH', action)
  491. def wait_sleep(self,time:int=2,**kwargs):#暴力等待
  492. @self.add_base
  493. def action(*args, **kwargs):
  494. nonlocal self
  495. sleep(time)
  496. self.add_func(f'WAIT:{time}s', action)
  497. def set_wait(self,time:int=2,**kwargs):#隐式等待
  498. @self.add_base
  499. def action(*args, **kwargs):
  500. nonlocal self
  501. sleep(time)
  502. self.add_func(f'Loading_wait:{time}s', action)
  503. def run_JS(self,JS,**kwargs):
  504. @self.add_base
  505. def action(num,name,*args, **kwargs):
  506. nonlocal self
  507. get = self.browser.execute_script(JS)
  508. if hasattr(get,'__getitem__'):#可切片
  509. self.element_dict[f'{name}[{num}]'] = get # 返回必须是list
  510. else:
  511. self.element_dict[f'{name}[{num}]'] = [get]
  512. self.add_func(f'run_js:{JS}', action)
  513. def to_text(self,**kwargs):#获取网页源码
  514. @self.add_base
  515. def action(num,name,*args, **kwargs):
  516. nonlocal self
  517. try:
  518. self.element_dict[f'{name}[{num}]'] = [self.browser.page_source,self.now_url]
  519. except:
  520. self.element_dict[f'{name}[{num}]'] = [self.browser.text, self.now_url]#request
  521. self.add_func(f'get_page_source', action)
  522. def out_html(self,element_value,**kwargs):#输出网页源码
  523. @self.add_base
  524. def action(*args, **kwargs):
  525. nonlocal self
  526. md5 = hashlib.md5() # 应用MD5算法
  527. md5.update(f'{time.time()}_{self.now_url}'.encode('utf-8'))
  528. name = md5.hexdigest()
  529. save_dir = self.dir + '/' + name + '.cotan_source'
  530. print(save_dir)
  531. with open(save_dir,'w') as f:
  532. f.write(self.element_dict[element_value][0])
  533. with open(save_dir + '.CoTanURL','w') as f:
  534. f.write(self.element_dict[element_value][1])
  535. self.add_func(f'write_html<{element_value}', action)
  536. def del_all_cookies(self,**kwargs):#删除所有曲奇
  537. @self.add_base
  538. def action(*args, **kwargs):
  539. nonlocal self
  540. self.browser.delete_all_cookies()
  541. self.add_func(f'del_all_cookies', action)
  542. def del_cookies(self,cookies_name,**kwargs):#删除指定曲奇
  543. @self.add_base
  544. def action(*args, **kwargs):
  545. nonlocal self
  546. self.browser.delete_cookie(cookies_name)
  547. self.add_func(f'del_cookies:{cookies_name}', action)
  548. def add_cookies(self,cookies,**kwargs):#添加指定曲奇
  549. @self.add_base
  550. def action(*args, **kwargs):
  551. nonlocal self
  552. self.browser.add_cookie(cookies)
  553. self.add_func(f'add_cookies:{cookies}', action)
  554. def update_cookies(self,cookies_name,cookies,**kwargs):#更新曲奇
  555. @self.add_base
  556. def action(*args, **kwargs):
  557. nonlocal self
  558. now_cookies = self.browser.get_cookie(cookies_name)
  559. self.browser.delete_cookie(cookies_name)
  560. now_cookies.update(cookies)
  561. self.browser.add_cookie(now_cookies)
  562. self.add_func(f'add_cookies:{cookies}', action)
  563. def get_cookies(self,cookies_name,**kwargs):#获取指定曲奇
  564. @self.add_base
  565. def action(num,name,*args, **kwargs):
  566. nonlocal self
  567. self.element_dict[f'{name}[{num}]'] = [self.browser.get_cookie(cookies_name)]
  568. self.add_func(f'get_cookies:{cookies_name}', action)
  569. def get_all_cookies(self,**kwargs):#获取所有曲奇
  570. @self.add_base
  571. def action(num,name,*args, **kwargs):
  572. nonlocal self
  573. self.element_dict[f'{name}[{num}]'] = self.browser.get_cookie()
  574. self.add_func(f'get_all_cookies', action)
  575. def make_bs(self, element_value, **kwargs): # 解析成bs4对象
  576. @self.add_base
  577. def action(num,name,*args, **kwargs):
  578. nonlocal self
  579. self.element_dict[f'{name}[{num}]'] = [bs4.BeautifulSoup(self.element_dict[element_value][0], "html.parser")]
  580. self.add_func(f'Parsing:{element_value}', action) # 添加func
  581. def listSlicing(self,index:(slice,int),element_value):
  582. if type(index) is int:
  583. return [self.element_dict[element_value][index]]
  584. else:
  585. return self.element_dict[element_value][index]
  586. def to_Database(self,element_value,index,data:(str,list),dataBase_name:str,**kwargs):#传入data Base
  587. @self.add_base
  588. def action(*args, **kwargs):
  589. global data_base
  590. nonlocal self
  591. iter_list = self.listSlicing(index, element_value)
  592. for bs in iter_list:
  593. new = []
  594. for i in data:
  595. if i == '$name&':new.append(bs.name)
  596. elif i == '$self&':new.append(str(bs).replace('\n',''))
  597. elif i == '$string$':new.append(str(bs.string).replace('\n',''))
  598. else:
  599. new.append(bs.attrs.get(i,''))
  600. data_base.add_DataBase(dataBase_name,new)
  601. self.add_func(f'DataBase:{data}<{element_value}[{index}]>{dataBase_name}', action) # 添加func
  602. def to_Database_by_re(self,element_value,index,data:str,dataBase_name:str,**kwargs):#通过正则,传入dataBase
  603. data = regular.compile(data)
  604. @self.add_base
  605. def action(*args, **kwargs):
  606. global data_base
  607. nonlocal self
  608. iter_list = self.listSlicing(index, element_value)
  609. for bs in iter_list:
  610. new = regular.findall(data,str(bs))
  611. data_base.add_DataBase(dataBase_name,new)
  612. self.add_func(f'DataBase:{data}<{element_value}[{index}]>{dataBase_name}', action) # 添加func
  613. def findAll(self, element_value,tag:(str,list),attribute:dict,limit,recursive,index:(slice,int),**kwargs):#根据标签定位
  614. if type(tag) is str:
  615. tag = str(tag).split(',')
  616. try:
  617. limit = int(limit)
  618. except:
  619. limit = None
  620. @self.add_base
  621. def action(num,name,*args, **kwargs):
  622. nonlocal self
  623. iter_list = self.listSlicing(index,element_value)
  624. paser_list = []
  625. for bs in iter_list:
  626. try:
  627. re = bs.find_all(tag,attribute,limit=limit,recursive=recursive)
  628. except:
  629. try:
  630. if str(bs.name) not in tag:raise Exception
  631. for agrs_name in attribute:
  632. text = attribute[agrs_name]
  633. if type(text) is str:
  634. if bs.attrs[agrs_name] != text:raise Exception
  635. else:#正则匹配
  636. if not regular.match(text,bs.attrs[agrs_name]): raise Exception
  637. re = [bs]
  638. except:
  639. re = []
  640. paser_list += re
  641. self.element_dict[f'{name}[{num}]'] = paser_list
  642. self.add_func(f'findAll:{element_value}[{index}]', action) # 添加func
  643. def findAll_by_text(self, element_value,text:(regular.compile,str),limit,recursive,index:(slice,int),**kwargs):#根据text定位
  644. try:
  645. limit = int(limit)
  646. except:
  647. limit = None
  648. @self.add_base
  649. def action(num,name,*args, **kwargs):
  650. nonlocal self
  651. iter_list = self.listSlicing(index,element_value)
  652. paser_list = []
  653. for bs in iter_list:
  654. try:
  655. re = bs.find_all(text=text,limit=limit,recursive=recursive)
  656. except:
  657. try:
  658. if type(text) is str:
  659. if str(bs.string) != text:raise Exception
  660. else:
  661. if not regular.match(text,str(bs.string)):raise Exception
  662. re = [bs]
  663. except:
  664. re = []
  665. paser_list += re
  666. self.element_dict[f'{name}[{num}]'] = paser_list
  667. self.add_func(f'findAll_by_text:{element_value}[{index}]', action) # 添加func
  668. def __get_other_base(self,element_value,index:(slice,int),who='children',**kwargs):#获得子、后代、兄弟标签的基类
  669. @self.add_base
  670. def action(num,name,*args, **kwargs):
  671. nonlocal self
  672. iter_list = self.listSlicing(index, element_value)
  673. paser_list = []
  674. for bs in iter_list:
  675. if who != 'brothers':
  676. paser_list += {'children':bs.children,'offspring':bs.descendants,'down':bs.next_siblings,
  677. 'up':bs.previous_siblings}.get(who,bs.children)
  678. else:
  679. paser_list += bs.previous_siblings
  680. paser_list += bs.next_siblings
  681. self.element_dict[f'{name}[{num}]'] = list(set(paser_list))
  682. self.add_func(f'get_{who}:{element_value}[{index}]', action) # 添加func
  683. def get_children(self,element_value,index:(slice,int),**kwargs):
  684. return self.__get_other_base(element_value,index)
  685. def get_offspring(self,element_value,index:(slice,int),**kwargs):
  686. return self.__get_other_base(element_value,index,'offspring')
  687. def get_up(self,element_value,index:(slice,int),**kwargs):
  688. return self.__get_other_base(element_value,index,'up')
  689. def get_down(self,element_value,index:(slice,int),**kwargs):
  690. return self.__get_other_base(element_value,index,'down')
  691. def get_brothers(self,element_value,index:(slice,int),**kwargs):
  692. return self.__get_other_base(element_value,index,'brothers')
  693. def get_by_path(self,element_value,index:(slice,int),path,**kwargs):#根据bs4的目录选择
  694. @self.add_base
  695. def action(num,name,*args, **kwargs):
  696. nonlocal self
  697. iter_list = self.listSlicing(index, element_value)
  698. paser_list = []
  699. for bs in iter_list:
  700. try:
  701. re = eval(str(path),{'self':bs})
  702. if re == None:raise Exception
  703. paser_list.append(re)
  704. except:
  705. pass
  706. self.element_dict[f'{name}[{num}]'] = paser_list
  707. self.add_func(f'get>{path}:{element_value}[{index}]', action) # 添加func
  708. def Webpage_snapshot(self,**kwargs):
  709. @self.add_base
  710. def action(*args, **kwargs):
  711. nonlocal self
  712. md5 = hashlib.md5() # 应用MD5算法
  713. md5.update(f'{time.time()}_{self.now_url}'.encode('utf-8'))
  714. name = md5.hexdigest()
  715. with open(self.dir + '/' + name + '.png.CoTanURL','w') as f:
  716. f.write(self.now_url)
  717. self.browser.save_screenshot(self.dir + '/' + name + '.png')
  718. sleep(1)
  719. self.add_func(f'Webpage_snapshot', action) # 添加func
  720. def add_url(self, element_value, index: (slice, int), url_name,update_func,url_args:dict, **kwargs):# 自动添加url
  721. @self.add_base
  722. def action(*args, **kwargs):
  723. nonlocal self
  724. iter_list = self.listSlicing(index, element_value)
  725. for bs in iter_list:
  726. try:
  727. if url_name == '$name&':
  728. new_url = bs.name
  729. elif url_name == '$self&':
  730. new_url = str(bs).replace('\n', '')
  731. elif url_name == '$string$':
  732. new_url = str(bs.string).replace('\n', '')
  733. else:
  734. new_url = bs.attrs.get(url_name, '')
  735. url.add_url(new_url, **url_args)
  736. except:
  737. pass
  738. update_func()#更新tkinter
  739. self.add_func(f'add_URL<{element_value}[{index}]:{url_name}', action) # 添加func
  740. def to_json(self,**kwargs):
  741. @self.add_base
  742. def action(num, name, *args, **kwargs):
  743. nonlocal self
  744. self.element_dict[f'{name}[{num}]'] = [self.browser.json()]#request 解析为 json
  745. self.add_func(f'to_json', action) # 添加func
  746. def make_ActionChains(self,**kwargs):#创建动作链
  747. @self.add_base
  748. def action(num,name,*args, **kwargs):
  749. nonlocal self
  750. self.element_dict[f'{name}[{num}]'] = [ActionChains(self.browser)]
  751. self.add_func(f'make_ActionChains', action) # 添加func
  752. def ActionChains_click(self,Chains,element_value,index,**kwargs):#单击左
  753. @self.add_base
  754. def action(*args, **kwargs):
  755. nonlocal self
  756. self.element_dict[Chains][0].click(self.element_dict[element_value][index])
  757. self.add_func(f'[{Chains}]click>[{element_value}][{index}]', action) # 添加func
  758. def ActionChains_double_click(self,Chains,element_value,index,**kwargs):#双击左
  759. @self.add_base
  760. def action(*args, **kwargs):
  761. nonlocal self
  762. self.element_dict[Chains][0].double_click(self.element_dict[element_value][index])
  763. self.add_func(f'[{Chains}]double_click>[{element_value}][{index}]', action) # 添加func
  764. def ActionChains_click_right(self,Chains,element_value,index,**kwargs):#点击右
  765. @self.add_base
  766. def action(*args, **kwargs):
  767. nonlocal self
  768. self.element_dict[Chains][0].context_click(self.element_dict[element_value][index])
  769. self.add_func(f'[{Chains}]right_click>[{element_value}][{index}]', action) # 添加func
  770. def ActionChains_click_and_hold(self,Chains,element_value,index,**kwargs):#按住左
  771. @self.add_base
  772. def action(*args, **kwargs):
  773. nonlocal self
  774. self.element_dict[Chains][0].click_and_hold(self.element_dict[element_value][index])
  775. self.add_func(f'[{Chains}]click_and_hold>[{element_value}][{index}]', action) # 添加func
  776. def ActionChains_release(self,Chains,element_value,index,**kwargs):#松开左键
  777. @self.add_base
  778. def action(*args, **kwargs):
  779. nonlocal self
  780. self.element_dict[Chains][0].release(self.element_dict[element_value][index])
  781. self.add_func(f'[{Chains}]release>[{element_value}][{index}]', action) # 添加func
  782. def ActionChains_drag_and_drop(self,Chains,element_value,index,element_value2,index2,**kwargs):#拽托、松开
  783. @self.add_base
  784. def action(*args, **kwargs):
  785. nonlocal self
  786. self.element_dict[Chains][0].drag_and_drop(self.element_dict[element_value][index],
  787. self.element_dict[element_value2][index2])
  788. self.add_func(f'[{Chains}]drag_and_drop>[{element_value}][{index}]', action) # 添加func
  789. def ActionChains_move(self,Chains,element_value,index,**kwargs):#移动鼠标
  790. @self.add_base
  791. def action(*args, **kwargs):
  792. nonlocal self
  793. self.element_dict[Chains][0].move_to_element(self.element_dict[element_value][index])
  794. self.add_func(f'[{Chains}]drag_and_drop>[{element_value}][{index}]', action) # 添加func
  795. def Special_keys(self,key:str,is_special_keys): # 装饰器
  796. if is_special_keys:
  797. return keys_name_dict.get(key.lower(), key),f'[{key.upper()}]'
  798. else:
  799. return key,key
  800. def ActionChains_key_down(self,Chains,key,element_value,index,is_special_keys,**kwargs):#down
  801. new_key,key = self.Special_keys(key,is_special_keys)
  802. @self.add_base
  803. def action(*args, **kwargs):
  804. nonlocal self
  805. self.element_dict[Chains][0].key_down(new_key,self.element_dict[element_value][index])
  806. self.add_func(f'[{Chains}]key_down>{key}:[{element_value}][{index}]', action) # 添加func
  807. def ActionChains_key_up(self,Chains,key,element_value,index,is_special_keys,**kwargs):#down
  808. new_key, key = self.Special_keys(key, is_special_keys)
  809. @self.add_base
  810. def action(*args, **kwargs):
  811. nonlocal self
  812. self.element_dict[Chains][0].key_up(new_key,self.element_dict[element_value][index])
  813. self.add_func(f'[{Chains}]key_up>{key}:[{element_value}][{index}]', action) # 添加func
  814. def ActionChains_send_keys_to_element(self,Chains,key,element_value,index,is_special_keys,**kwargs):#发送到指定元素
  815. new_key, key = self.Special_keys(key, is_special_keys)
  816. @self.add_base
  817. def action(*args, **kwargs):
  818. nonlocal self
  819. self.element_dict[Chains][0].send_keys_to_element(self.element_dict[element_value][index],new_key)
  820. self.add_func(f'[{Chains}]sent>{key}:[{element_value}][{index}]', action) # 添加func
  821. def ActionChains_send_keys(self,Chains,key,is_special_keys,**kwargs):#发送到焦点元素
  822. new_key, key = self.Special_keys(key, is_special_keys)
  823. @self.add_base
  824. def action(*args, **kwargs):
  825. nonlocal self
  826. self.element_dict[Chains][0].send_keys(new_key)
  827. self.add_func(f'[{Chains}].sent>{key}', action) # 添加func
  828. def ActionChains_run(self,Chains,run_time=1,**kwargs):#执行
  829. @self.add_base
  830. def action(*args, **kwargs):
  831. nonlocal self
  832. self.element_dict[Chains][0].perform()
  833. sleep(run_time)
  834. self.add_func(f'[{Chains}].run<{run_time}s', action) # 添加func
  835. def get_all_windows(self,*args,**kwargs):#获取所有句柄
  836. @self.add_base
  837. def find(browser, num, name, *args, **kwargs):
  838. nonlocal self
  839. if browser == None:browser = self.browser
  840. self.element_dict[f'{name}[{num}]'] = browser.window_handles#获得窗口句柄
  841. self.add_func(f'get_all_windows',find)#添加func
  842. def get_now_windows(self,*args,**kwargs):#获取当前窗口句柄
  843. @self.add_base
  844. def find(browser, num, name, *args, **kwargs):
  845. nonlocal self
  846. if browser == None:browser = self.browser
  847. self.element_dict[f'{name}[{num}]'] = [browser.current_window_handle]#获得当前窗口句柄
  848. self.add_func(f'get_now_window',find)#添加func
  849. def switch_to_windwos(self,element_value,index=0,**kwargs):#切换窗口
  850. @self.add_base
  851. def action(*args, **kwargs):
  852. nonlocal self
  853. self.browser.switch_to.window(self.element_dict[element_value][index])
  854. self.add_func(f'switch_to_window>{element_value}[{index}]', action) # 添加func
  855. def Element_interaction(self,update_func=lambda *args:None):#元素交互
  856. func_list = self.func_list
  857. status = None
  858. self.log.write(f'{"*"*5}url:{self.now_url}{"*"*5}')
  859. def update(func_name):
  860. nonlocal status,self
  861. if status:
  862. success_code = 'Success to run'
  863. elif status == None:
  864. success_code = 'No status'
  865. else:
  866. success_code = 'Wrong to run'
  867. self.log.write(f'last:[{success_code}];now:[{func_name}];url:{self.now_url} [END]')
  868. value_box = []
  869. for i in self.element_dict:
  870. try:
  871. value_box.append(f'{i}[{len(i)}] = {self.element_dict[i]}')
  872. except:
  873. value_box.append(f'{i} = {self.element_dict[i]}')
  874. update_func(func_name, success_code, value_box) # 信息更新系统
  875. update('start')
  876. for func_num in range(len(func_list)):
  877. func_name = func_list[func_num]
  878. update(func_name)
  879. status = self.func_dict[func_name](num=f'{func_num}',name='var')
  880. update('Finish')