main.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. from abc import ABCMeta, abstractmethod
  2. import requests
  3. from bs4 import BeautifulSoup
  4. import time
  5. class Search(metaclass=ABCMeta):
  6. def __init__(self):
  7. self.url = ""
  8. self.args = ""
  9. self.bd_session = requests.Session()
  10. self.report = None
  11. self.bs4: BeautifulSoup = None
  12. self.word_list = []
  13. self.page_num = 0
  14. self.referer = ""
  15. self.headers = {
  16. 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
  17. 'accept-language': 'zh-CN,zh;q=0.9',
  18. 'cache-control': 'max-age=0',
  19. 'sec-fetch-dest': 'document',
  20. 'sec-fetch-mode': 'navigate',
  21. 'sec-fetch-site': 'none',
  22. 'sec-fetch-user': '?1',
  23. 'connection': 'close',
  24. 'upgrade-insecure-requests': '1',
  25. 'accept-encoding': 'gzip, deflate',
  26. "content-type": "application/x-www-form-urlencoded",
  27. "Upgrade-Insecure-Requests": "1",
  28. 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE',
  29. }
  30. @abstractmethod
  31. def get_report(self, args_list, start):
  32. pass
  33. @abstractmethod
  34. def bs_paser(self):
  35. pass
  36. @abstractmethod
  37. def find_word(self):
  38. pass
  39. @abstractmethod
  40. def __iter__(self):
  41. pass
  42. @abstractmethod
  43. def __next__(self):
  44. pass
  45. @abstractmethod
  46. def output_word(self):
  47. pass
  48. @abstractmethod
  49. def return_page(self):
  50. pass
  51. class BingWeb(Search):
  52. def __init__(self):
  53. super().__init__()
  54. self.url = "https://cn.bing.com"
  55. self.headers["Origin"] = "https://cn.bing.com"
  56. self.headers['host'] = 'cn.bing.com'
  57. def get_report(self, args_list=None, start=True):
  58. if args_list:
  59. self.args = "?" + "q=" + args_list
  60. if start:
  61. self.page_num = 0
  62. if self.referer:
  63. self.headers["referer"] = "self.referer"
  64. self.referer = self.url + self.args
  65. self.report = self.bd_session.get(self.referer, headers=self.headers).text
  66. self.bs_paser()
  67. return self
  68. def bs_paser(self) -> None:
  69. assert self.report, "Don't get report"
  70. self.bs4 = BeautifulSoup(self.report, 'html.parser')
  71. def find_word(self) -> None:
  72. self.word_list = []
  73. word = self.bs4.find_all("li", class_="b_algo")
  74. for w in word:
  75. title = w.find("div", class_="b_title")
  76. try: # 错误捕捉
  77. if title: # 找到了title(官网模式)
  78. self.word_list.append((title.h2.a.text, title.h2.a.get("href")))
  79. else: # 普通词条模式
  80. self.word_list.append((w.h2.a.text, w.h2.a.get("href")))
  81. except AttributeError:
  82. pass
  83. def __iter__(self):
  84. self.page_num = -1
  85. return self
  86. def __next__(self) -> bool:
  87. if self.page_num == -1: # 默认的第一次get
  88. self.page_num += 1
  89. return True
  90. self.page_num += 1
  91. title = self.bs4.find("a", title=f"下一页")
  92. if title:
  93. self.args = title.get("href")
  94. self.report = self.get_report(None, False)
  95. else:
  96. raise StopIteration
  97. return True
  98. def output_word(self):
  99. return self.word_list
  100. def return_page(self):
  101. return self.page_num
  102. class Seacher: # 搜索者
  103. def __init__(self, word: str):
  104. self.web = {"bing": BingWeb()}
  105. self.word = word
  106. self.first = True
  107. def find(self):
  108. for web_name in self.web:
  109. web = self.web[web_name]
  110. web.get_report(self.word).__iter__() # 做好迭代的准备
  111. return self
  112. def __iter__(self):
  113. self.first = True
  114. return self
  115. def __next__(self):
  116. if not self.first:
  117. time.sleep(1)
  118. # 使用了menu之后不需要is_next了
  119. # if not self.is_next():
  120. # raise StopIteration
  121. else:
  122. self.first = False
  123. return_str = ""
  124. for web_name in self.web:
  125. web = self.web[web_name]
  126. try:
  127. web.__next__()
  128. except StopIteration:
  129. pass
  130. else:
  131. web.find_word()
  132. get: list = web.output_word()
  133. return_str += "\n" + "* " * 20 + f"\n{web.return_page()}: [{web_name}] for {self.word} >>>\n"
  134. for i in get:
  135. return_str += f"{i[0]}\n -> {i[1]}\n"
  136. return_str += "* " * 20 + "\n"
  137. return return_str
  138. def out_again(self): # 再输出一次
  139. return_str = ""
  140. for web_name in self.web:
  141. web = self.web[web_name]
  142. get: list = web.output_word()
  143. return_str += "\n" + "* " * 20 + f"\n{web.return_page()}: [{web_name}] for {self.word} >>>\n"
  144. for i in get:
  145. return_str += f"{i[0]}\n{' ' * 8}-> {i[1]}\n"
  146. return_str += "* " * 20 + "\n"
  147. return return_str
  148. @staticmethod
  149. def is_next():
  150. return input("next? [Y/n]") != "n"
  151. class Menu:
  152. def __init__(self):
  153. self.searcher_dict = {}
  154. print("Welcome To SSearch!")
  155. def menu(self) -> None:
  156. while True:
  157. try:
  158. if not self.__menu():
  159. break
  160. except KeyboardInterrupt:
  161. print("Please Enter 'q' to quiz")
  162. except BaseException as e:
  163. print(f"There are some Error:\n{e}\n\n")
  164. def __menu(self):
  165. command = input("[SSearch] > ") # 输入一条指令
  166. if(command == "q"):
  167. return False
  168. try:
  169. exec(f"self.func_{command}()")
  170. except AttributeError:
  171. print("Not Support Command. [help]")
  172. return True
  173. def func_make(self):
  174. word = input("输入关键词:")
  175. name = input(f"输入名字[默认={word}]:")
  176. if not name:
  177. name = word
  178. self.searcher_dict[name] = Seacher(word) # 制造一个搜索器
  179. self.searcher_dict[name].find().__iter__() # 迭代准备
  180. def func_again(self):
  181. name = input(f"输入名字:")
  182. seacher_iter = self.searcher_dict.get(name, None)
  183. if not seacher_iter:
  184. print("没有找到对应搜索器或搜索器已经搜索结束")
  185. else:
  186. print(seacher_iter.out_again())
  187. def func_next(self):
  188. name = input("输入名字:")
  189. seacher_iter = self.searcher_dict.get(name, None)
  190. if not seacher_iter:
  191. print("没有找到对应搜索器或搜索器已经搜索结束")
  192. else:
  193. try:
  194. print(seacher_iter.__next__())
  195. except StopIteration:
  196. print("搜索结束")
  197. except AttributeError as e:
  198. print(f"There are some Error:\n{e}\n\n")
  199. menu = Menu()
  200. menu.menu()