123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- from crawler.template import UrlAdd, UrlReturn, PageDownloaderRequests, PageDownloaderSelenium, PageDownloaderCookies, \
- PageParserAutomation, PageParserBrowser, PageParserData, PageParserChains, UrlFile
- class Url(UrlAdd, UrlReturn, UrlFile): # url管理器
- def return_url(self):
- return self.url_list.copy()
- def return_url_history(self):
- return self.url_history.copy()
- class PageDownloader(PageDownloaderRequests, PageDownloaderSelenium, PageDownloaderCookies):
- def requests_mode(self, func_cookie, url):
- if self.last_mode == "get":
- self.selenium_quit()
- return super(PageDownloader, self).requests_mode(func_cookie, url)
- def set_page_parser(self, parser):
- super(PageDownloader, self).set_page_parser(parser)
- self.parser.browser = self.browser
- self.parser.url = self.url
- self.parser.dir = self.dir
- self.parser.log = self.log
- class PageParser(PageParserAutomation, PageParserBrowser, PageParserData, PageParserChains):
- def element_interaction(self, update_func=lambda *args: None): # 元素交互
- func_list = self.func_list
- status = None
- self.log.write(f'{"*"*5}url:{self.url_text}{"*" * 5}')
- def update_log(func_name):
- nonlocal status, self
- if status:
- success_code = "Success to run"
- elif status is None:
- success_code = "No status"
- else:
- success_code = f"Wrong to run: {e} "
- self.log.write(
- f"last:[{success_code}];now:[{func_name}];url:{self.url_text} [END]"
- )
- value_box = []
- for i in self.element_dict:
- try:
- value_box.append(f"{i}[{len(i)}] = {self.element_dict[i]}")
- except TypeError:
- value_box.append(f"{i} = {self.element_dict[i]}")
- update_func(func_name, success_code, value_box) # 信息更新系统
- update_log("开始解析")
- for func_num in range(len(func_list)):
- func_name = func_list[func_num]
- update_log(func_name)
- status, e = self.func_dict[func_name](num=f"{func_num}", name="var")
- update_log("运行完成")
|