controller.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. from crawler.template import UrlAdd, UrlReturn, PageDownloaderRequests, PageDownloaderSelenium, PageDownloaderCookies, \
  2. PageParserAutomation, PageParserBrowser, PageParserData, PageParserChains, UrlFile
  3. class Url(UrlAdd, UrlReturn, UrlFile): # url管理器
  4. def return_url(self):
  5. return self.url_list.copy()
  6. def return_url_history(self):
  7. return self.url_history.copy()
  8. class PageDownloader(PageDownloaderRequests, PageDownloaderSelenium, PageDownloaderCookies):
  9. def requests_mode(self, func_cookie, url):
  10. if self.last_mode == "get":
  11. self.selenium_quit()
  12. return super(PageDownloader, self).requests_mode(func_cookie, url)
  13. def set_page_parser(self, parser):
  14. super(PageDownloader, self).set_page_parser(parser)
  15. self.parser.browser = self.browser
  16. self.parser.url = self.url
  17. self.parser.dir = self.dir
  18. self.parser.log = self.log
  19. class PageParser(PageParserAutomation, PageParserBrowser, PageParserData, PageParserChains):
  20. def element_interaction(self, update_func=lambda *args: None): # 元素交互
  21. func_list = self.func_list
  22. status = None
  23. self.log.write(f'{"*"*5}url:{self.url_text}{"*" * 5}')
  24. def update_log(func_name):
  25. nonlocal status, self
  26. if status:
  27. success_code = "Success to run"
  28. elif status is None:
  29. success_code = "No status"
  30. else:
  31. success_code = f"Wrong to run: {e} "
  32. self.log.write(
  33. f"last:[{success_code}];now:[{func_name}];url:{self.url_text} [END]"
  34. )
  35. value_box = []
  36. for i in self.element_dict:
  37. try:
  38. value_box.append(f"{i}[{len(i)}] = {self.element_dict[i]}")
  39. except TypeError:
  40. value_box.append(f"{i} = {self.element_dict[i]}")
  41. update_func(func_name, success_code, value_box) # 信息更新系统
  42. update_log("开始解析")
  43. for func_num in range(len(func_list)):
  44. func_name = func_list[func_num]
  45. update_log(func_name)
  46. status, e = self.func_dict[func_name](num=f"{func_num}", name="var")
  47. update_log("运行完成")