Web_Crawler.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. import Crawler_controller
  2. import os
  3. import tkinter
  4. from tkinter.filedialog import askdirectory
  5. import re
  6. import threading
  7. import time
  8. def Main():
  9. global top,Git,PATH,bg,bbg,fg,cookies_list
  10. PATH = os.getcwd()
  11. top = tkinter.Tk()
  12. cookies_list = []
  13. bg = '#FFFAFA' # 主颜色
  14. bbg = '#FFFAFA' # 按钮颜色
  15. fg = '#000000' # 文字颜色
  16. top["bg"] = bg
  17. FONT = ('黑体', 11) # 设置字体
  18. top.title('CoTan自动化网页')
  19. top.resizable(width=False, height=False)
  20. top.geometry('+10+10') # 设置所在位置
  21. width_B = 13 # 标准宽度
  22. height_B = 2
  23. a_y = 0
  24. a_x = 0
  25. tkinter.Button(top, bg=bbg, fg=fg, text='添加url对象',command=add_url , font=FONT, width=width_B,
  26. height=height_B).grid(column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  27. tkinter.Button(top, bg=bbg, fg=fg, text='删除url对象',command=del_url , font=FONT, width=width_B,
  28. height=height_B).grid(column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  29. tkinter.Button(top, bg=bbg, fg=fg, text='应用过滤机制', font=FONT, width=width_B,
  30. height=height_B).grid(column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  31. global URL_BOX,URL_Input,Func_BOX
  32. a_y += 1
  33. tkinter.Label(top, text='添加url:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  34. URL_Input = tkinter.Entry(top, width=width_B * 2)
  35. URL_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  36. a_y += 1
  37. URL_BOX = tkinter.Listbox(top, width=width_B * 3, height=height_B * 5)
  38. URL_BOX.grid(column=a_x, row=a_y, columnspan=3, rowspan=5, sticky=tkinter.E + tkinter.W + tkinter.S + tkinter.N)
  39. a_y += 5
  40. tkinter.Button(top, bg=bbg, fg=fg, text='HTTPS过滤器',command=add_filter_func_HTTPS, font=FONT, width=width_B,height=height_B).grid(
  41. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  42. tkinter.Button(top, bg=bbg, fg=fg, text='WWW过滤器',command=add_filter_func_WWW, font=FONT, width=width_B,height=height_B).grid(
  43. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  44. tkinter.Button(top, bg=bbg, fg=fg, text='删除过滤器',command=del_func, font=FONT, width=width_B,height=height_B).grid(
  45. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  46. a_y += 1
  47. tkinter.Button(top, bg=bbg, fg=fg, text='自定义过滤器',command=add_filter_func_HTTPS, font=FONT, width=width_B,height=height_B).grid(
  48. column=a_x, row=a_y,columnspan=2, sticky=tkinter.E + tkinter.W)
  49. tkinter.Button(top, bg=bbg, fg=fg, text='清空过滤器', font=FONT, width=width_B,height=height_B).grid(
  50. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  51. global Func_BOX,cookies_fixed
  52. a_y += 1
  53. Func_BOX = tkinter.Listbox(top, width=width_B * 3, height=height_B * 4)
  54. Func_BOX.grid(column=a_x, row=a_y, columnspan=3, rowspan=4, sticky=tkinter.E + tkinter.W + tkinter.S + tkinter.N)
  55. global wait_Func_BOX,Wait_Input,cookies_BOX
  56. a_y += 4
  57. tkinter.Button(top, bg=bbg, fg=fg, text='单点爬虫运行',command=startDownloader, font=FONT, width=width_B,height=height_B).grid(
  58. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  59. tkinter.Button(top, bg=bbg, fg=fg, text='爬虫运行',command=startDownloader, font=FONT, width=width_B,height=height_B).grid(
  60. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  61. tkinter.Button(top, bg=bbg, fg=fg, text='单点爬虫停止',command=startDownloader, font=FONT, width=width_B,height=height_B).grid(
  62. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  63. a_y += 1
  64. cookies_fixed = tkinter.Variable()
  65. tkinter.Label(top, text='【曲奇监视】', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(
  66. column=a_x+1,row=a_y,sticky=tkinter.E + tkinter.W + tkinter.W + tkinter.S + tkinter.N) # 设置说明
  67. tkinter.Checkbutton(top, bg=bg, fg=fg, activebackground=bg, activeforeground=fg, selectcolor=bg, text='固定曲奇',
  68. variable=cookies_fixed).grid(column=a_x + 2, row=a_y, sticky=tkinter.W)
  69. cookies_fixed.set('0')
  70. a_y += 1
  71. cookies_BOX = tkinter.Listbox(top, width=width_B * 3, height=height_B * 8)
  72. cookies_BOX.grid(column=a_x, row=a_y, columnspan=3, rowspan=8, sticky=tkinter.E + tkinter.W + tkinter.S + tkinter.N)
  73. a_y += 8
  74. tkinter.Button(top, bg=bbg, fg=fg, text='清空曲奇',command=Tra_cookies, font=FONT, width=width_B,height=height_B).grid(
  75. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  76. tkinter.Button(top, bg=bbg, fg=fg, text='更新曲奇',command=Update_cookies, font=FONT, width=width_B,height=height_B).grid(
  77. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  78. tkinter.Button(top, bg=bbg, fg=fg, text='删除曲奇',command=Del_cookies, font=FONT, width=width_B,height=height_B).grid(
  79. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  80. global cookies_Input,PAGE_BOX
  81. a_y += 1
  82. cookies_Input = tkinter.Entry(top, width=width_B * 3)
  83. cookies_Input.grid(column=a_x, row=a_y, columnspan=3, sticky=tkinter.E + tkinter.W)
  84. tkinter.Button(top, bg=bbg, fg=fg, text='添加曲奇',command=Add_cookies, font=FONT, width=width_B,height=height_B).grid(
  85. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  86. a_x += 3
  87. tkinter.Label(top, text='', bg=bg, fg=fg, font=FONT, width=1).grid(column=a_x, row=a_y) # 设置说明
  88. a_x += 1
  89. a_y = 0
  90. tkinter.Button(top, bg=bbg, fg=fg, text='根据id搜查',command=lambda :Page_Parser_addFindFunc('id'), font=FONT, width=width_B,height=height_B).grid(
  91. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  92. tkinter.Button(top, bg=bbg, fg=fg, text='根据name搜查',command=lambda :Page_Parser_addFindFunc('name'), font=FONT, width=width_B,height=height_B).grid(
  93. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  94. tkinter.Button(top, bg=bbg, fg=fg, text='根据class搜查',command=lambda :Page_Parser_addFindFunc('class'), font=FONT, width=width_B,height=height_B).grid(
  95. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  96. a_y += 1
  97. tkinter.Button(top, bg=bbg, fg=fg, text='根据xpath搜查',command=lambda :Page_Parser_addFindFunc('xpath'), font=FONT, width=width_B,height=height_B).grid(
  98. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  99. tkinter.Button(top, bg=bbg, fg=fg, text='根据css搜查',command=lambda :Page_Parser_addFindFunc('css'), font=FONT, width=width_B,height=height_B).grid(
  100. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  101. tkinter.Button(top, bg=bbg, fg=fg, text='根据元素名搜查',command=lambda :Page_Parser_addFindFunc('tag'), font=FONT, width=width_B,height=height_B).grid(
  102. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  103. global search_all,search_Input,Parser_Func_BOX
  104. a_y += 1
  105. search_all = tkinter.Variable()
  106. tkinter.Button(top, bg=bbg, fg=fg, text='根据link搜查',command=lambda :Page_Parser_addFindFunc('link'), font=FONT, width=width_B,height=height_B).grid(
  107. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  108. tkinter.Button(top, bg=bbg, fg=fg, text='link模糊搜查',command=lambda :Page_Parser_addFindFunc('partial_link'), font=FONT, width=width_B,height=height_B).grid(
  109. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  110. tkinter.Checkbutton(top, bg=bg, fg=fg, activebackground=bg, activeforeground=fg, selectcolor=bg, text='匹配全部',
  111. variable=search_all).grid(column=a_x + 2, row=a_y, sticky=tkinter.W)
  112. search_all.set('0')
  113. a_y += 1
  114. tkinter.Label(top, text='搜查参数:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  115. search_Input = tkinter.Entry(top, width=width_B * 2)
  116. search_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  117. a_y += 1
  118. Parser_Func_BOX = tkinter.Listbox(top, width=width_B * 3, height=height_B * 4)
  119. Parser_Func_BOX.grid(column=a_x, row=a_y, columnspan=3, rowspan=4, sticky=tkinter.E + tkinter.W + tkinter.S + tkinter.N)
  120. global Var_Input,VarIndex_Input,Send_Input,UserPW_Input,SELE_Input,JS_Input,Time_Input
  121. a_y += 4
  122. tkinter.Label(top, text='操作元素:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  123. Var_Input = tkinter.Entry(top, width=width_B * 2)
  124. Var_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  125. a_y += 1
  126. tkinter.Label(top, text='操作索引:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  127. VarIndex_Input = tkinter.Entry(top, width=width_B * 2)
  128. VarIndex_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  129. a_y += 1
  130. tkinter.Label(top, text='发送信息:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  131. Send_Input = tkinter.Entry(top, width=width_B * 2)
  132. Send_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  133. a_y += 1
  134. tkinter.Label(top, text='认证用户名:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  135. UserName_Input = tkinter.Entry(top, width=width_B * 2)
  136. UserName_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  137. a_y += 1
  138. tkinter.Label(top, text='认证密码:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  139. UserPW_Input = tkinter.Entry(top, width=width_B * 2)
  140. UserPW_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  141. a_y += 1
  142. tkinter.Label(top, text='选择参数:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  143. SELE_Input = tkinter.Entry(top, width=width_B * 2)
  144. SELE_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  145. a_y += 1
  146. tkinter.Label(top, text='等待时间:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  147. Time_Input = tkinter.Entry(top, width=width_B * 2)
  148. Time_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  149. a_y += 1
  150. tkinter.Label(top, text='JavaScript:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  151. JS_Input = tkinter.Entry(top, width=width_B * 2)
  152. JS_Input.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  153. a_y += 1
  154. tkinter.Button(top, bg=bbg, fg=fg, text='发送字符',command=lambda :Page_Parser_addActionFunc('send_keys'), font=FONT, width=width_B,height=height_B).grid(
  155. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  156. tkinter.Button(top, bg=bbg, fg=fg, text='清空字符',command=lambda :Page_Parser_addActionFunc('clear'), font=FONT, width=width_B,height=height_B).grid(
  157. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  158. tkinter.Button(top, bg=bbg, fg=fg, text='提交表单',command=lambda :Page_Parser_addActionFunc('submit'), font=FONT, width=width_B,height=height_B).grid(
  159. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  160. a_y += 1
  161. tkinter.Button(top, bg=bbg, fg=fg, text='点击按钮',command=lambda :Page_Parser_addActionFunc('click'), font=FONT, width=width_B,height=height_B).grid(
  162. column=a_x,columnspan=3, row=a_y, sticky=tkinter.E + tkinter.W)
  163. a_y += 1
  164. tkinter.Button(top, bg=bbg, fg=fg, text='切换Frame(id)',command=Page_Parser_addFrameFunc_id, font=FONT, width=width_B,height=height_B).grid(
  165. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  166. tkinter.Button(top, bg=bbg, fg=fg, text='切换Frame',command=lambda :Page_Parser_addFindFunc('frame'), font=FONT, width=width_B,height=height_B).grid(
  167. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  168. tkinter.Button(top, bg=bbg, fg=fg, text='定位焦点元素',command=lambda :Page_Parser_addFindFunc('active_element'), font=FONT, width=width_B,height=height_B).grid(
  169. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  170. a_y += 1
  171. tkinter.Button(top, bg=bbg, fg=fg, text='捕获弹窗',command=lambda :Page_Parser_addFindFunc('alert'), font=FONT, width=width_B,height=height_B).grid(
  172. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  173. tkinter.Button(top, bg=bbg, fg=fg, text='回到主Frame',command=lambda :Page_Parser_addFrameFunc_FP(False), font=FONT, width=width_B,height=height_B).grid(
  174. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  175. tkinter.Button(top, bg=bbg, fg=fg, text='回到父Frame',command=lambda :Page_Parser_addFrameFunc_FP(True), font=FONT, width=width_B,height=height_B).grid(
  176. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  177. a_y += 1
  178. tkinter.Button(top, bg=bbg, fg=fg, text='弹出框认证',command=lambda :Page_Parser_addActionFunc('User_Passwd'), font=FONT, width=width_B,height=height_B).grid(
  179. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  180. tkinter.Button(top, bg=bbg, fg=fg, text='弹出框确定',command=lambda :Page_Parser_addActionFunc('accept'), font=FONT, width=width_B,height=height_B).grid(
  181. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  182. tkinter.Button(top, bg=bbg, fg=fg, text='弹出框取消',command=lambda :Page_Parser_addActionFunc('dismiss'), font=FONT, width=width_B,height=height_B).grid(
  183. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  184. a_y += 1
  185. tkinter.Button(top, bg=bbg, fg=fg, text='取消选择index',command=lambda :Page_Parser_addActionFunc('deselect_by_index'), font=FONT, width=width_B,height=height_B).grid(
  186. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  187. tkinter.Button(top, bg=bbg, fg=fg, text='取消选择text',command=lambda :Page_Parser_addActionFunc('deselect_by_text'), font=FONT, width=width_B,height=height_B).grid(
  188. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  189. tkinter.Button(top, bg=bbg, fg=fg, text='取消选择value',command=lambda :Page_Parser_addActionFunc('deselect_by_value'), font=FONT, width=width_B,height=height_B).grid(
  190. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  191. a_y += 1
  192. tkinter.Button(top, bg=bbg, fg=fg, text='选择index',command=lambda :Page_Parser_addActionFunc('select_by_index'), font=FONT, width=width_B,height=height_B).grid(
  193. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  194. tkinter.Button(top, bg=bbg, fg=fg, text='选择text',command=lambda :Page_Parser_addActionFunc('select_by_text'), font=FONT, width=width_B,height=height_B).grid(
  195. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  196. tkinter.Button(top, bg=bbg, fg=fg, text='选择value',command=lambda :Page_Parser_addActionFunc('select_by_value'), font=FONT, width=width_B,height=height_B).grid(
  197. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  198. a_y += 1
  199. tkinter.Button(top, bg=bbg, fg=fg, text='页面后退',command=lambda :Page_Parser_addActionFunc('back'), font=FONT, width=width_B,height=height_B).grid(
  200. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  201. tkinter.Button(top, bg=bbg, fg=fg, text='页面刷新',command=lambda :Page_Parser_addActionFunc('refresh'), font=FONT, width=width_B,height=height_B).grid(
  202. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  203. tkinter.Button(top, bg=bbg, fg=fg, text='页面前进',command=lambda :Page_Parser_addActionFunc('forward'), font=FONT, width=width_B,height=height_B).grid(
  204. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  205. a_y += 1
  206. tkinter.Button(top, bg=bbg, fg=fg, text='暴力等待',command=lambda :Page_Parser_addActionFunc('wait_sleep'), font=FONT, width=width_B,height=height_B).grid(
  207. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  208. tkinter.Button(top, bg=bbg, fg=fg, text='元素检查等待',command=lambda :Page_Parser_addActionFunc('set_wait'), font=FONT, width=width_B,height=height_B).grid(
  209. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  210. tkinter.Button(top, bg=bbg, fg=fg, text='运行js',command=lambda :Page_Parser_addActionFunc('run_JS'), font=FONT, width=width_B,height=height_B).grid(
  211. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  212. a_x += 3
  213. tkinter.Label(top, text='', bg=bg, fg=fg, font=FONT, width=1).grid(column=a_x, row=a_y) # 设置说明
  214. a_x += 1
  215. a_y = 0
  216. global Func_Output,Status_Output,FuncValue_BOX
  217. tkinter.Label(top, text='正在执行:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  218. Func_Output = tkinter.Entry(top, width=width_B * 2, state=tkinter.DISABLED)
  219. Func_Output.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  220. a_y += 1
  221. tkinter.Label(top, text='上一次状态:', bg=bg, fg=fg, font=FONT, width=width_B, height=height_B).grid(column=a_x,row=a_y)
  222. Status_Output = tkinter.Entry(top, width=width_B * 2, state=tkinter.DISABLED)
  223. Status_Output.grid(column=a_x + 1, row=a_y, columnspan=2, sticky=tkinter.E + tkinter.W)
  224. a_y += 1
  225. FuncValue_BOX = tkinter.Listbox(top, width=width_B * 3, height=height_B * 5)
  226. FuncValue_BOX.grid(column=a_x, row=a_y, columnspan=3, rowspan=5, sticky=tkinter.E + tkinter.W + tkinter.S + tkinter.N)
  227. a_y += 5
  228. tkinter.Button(top, bg=bbg, fg=fg, text='NONE',command=lambda :Page_Parser_addActionFunc('wait_sleep'), font=FONT, width=width_B,height=height_B).grid(
  229. column=a_x, row=a_y, sticky=tkinter.E + tkinter.W)
  230. tkinter.Button(top, bg=bbg, fg=fg, text='NONE',command=lambda :Page_Parser_addActionFunc('set_wait'), font=FONT, width=width_B,height=height_B).grid(
  231. column=a_x+1, row=a_y, sticky=tkinter.E + tkinter.W)
  232. tkinter.Button(top, bg=bbg, fg=fg, text='NONE',command=lambda :Page_Parser_addActionFunc('run_JS'), font=FONT, width=width_B,height=height_B).grid(
  233. column=a_x+2, row=a_y, sticky=tkinter.E + tkinter.W)
  234. top.update()#要预先update一下,否则会卡住
  235. global url,loader,Page_Parser
  236. save_dir = askdirectory(title='选择项目位置')#项目位置
  237. url = Crawler_controller.url(save_dir,save_dir)
  238. loader = Crawler_controller.Page_Downloader(url,save_dir)
  239. Page_Parser = Crawler_controller.Page_Parser(loader)
  240. top.mainloop()
  241. def update_Status(now_func,status,Value_BOX):
  242. global Func_Output,Status_Output,FuncValue_BOX
  243. Func_Output.set(now_func)
  244. Status_Output.set(status)
  245. FuncValue_BOX.delete(0,tkinter.END)
  246. FuncValue_BOX.insert(0,*Value_BOX)
  247. def Func_Args():#方法args统一转换(不支持Frame)
  248. global Var_Input, VarIndex_Input, Send_Input, UserPW_Input, SELE_Input, JS_Input, Time_Input
  249. try:
  250. time = int(Time_Input.get())
  251. except:
  252. time = 2
  253. try:
  254. index = int(VarIndex_Input.get())
  255. except:
  256. index = 0
  257. return dict(
  258. element_value = Var_Input.get(),
  259. index = index,
  260. text = Send_Input.get(),
  261. User = UserPW_Input.get(),
  262. Passwd = UserPW_Input.get(),
  263. deselect = SELE_Input.get(),
  264. JS = JS_Input.get(),
  265. time=time
  266. )
  267. def Page_Parser_addActionFunc(func):
  268. global Page_Parser
  269. args = Func_Args()
  270. FUNC = {'send_keys':Page_Parser.send_keys,'clear':Page_Parser.clear,'click':Page_Parser.click,'User_Passwd':Page_Parser.User_Passwd,
  271. 'accept':Page_Parser.accept,'dismiss':Page_Parser.dismiss,'submit':Page_Parser.submit,'deselect_by_index':Page_Parser.deselect_by_index,
  272. 'deselect_by_value':Page_Parser.deselect_by_value,'deselect_by_text':Page_Parser.deselect_by_text,'select_by_index':Page_Parser.select_by_index,
  273. 'select_by_value':Page_Parser.select_by_value,'select_by_text':Page_Parser.select_by_text,'back':Page_Parser.back,'forward':Page_Parser.forward,
  274. 'refresh':Page_Parser.refresh,'wait_sleep':Page_Parser.wait_sleep,'set_wait':Page_Parser.set_wait,'run_JS':Page_Parser.run_JS}.get(
  275. func,Page_Parser.send_keys
  276. )
  277. FUNC(**args)
  278. Update_Parser_Func_BOX()
  279. def Page_Parser_addFrameFunc_FP(F=True):
  280. global Page_Parser, search_Input
  281. search = None if F else ''
  282. Page_Parser.find_switch_to_frame(search,True)
  283. Update_Parser_Func_BOX()
  284. def Page_Parser_addFrameFunc_id():
  285. global Page_Parser, search_Input
  286. search = search_Input.get()
  287. Page_Parser.find_switch_to_frame(search,True)
  288. Update_Parser_Func_BOX()
  289. def Page_Parser_addFindFunc(func):
  290. global search_all, search_Input,Page_Parser
  291. not_all = not(bool(search_all.get()))
  292. search = search_Input.get()
  293. FUNC = {'id':Page_Parser.find_ID,'name':Page_Parser.find_name,'class':Page_Parser.find_class,
  294. 'xpath':Page_Parser.find_xpath,'css':Page_Parser.find_css,'tag':Page_Parser.find_tag_name,
  295. 'link':Page_Parser.find_link_text,'partial_link':Page_Parser.find_partial_link_text,
  296. 'alert':Page_Parser.find_switch_to_alert,'active_element':Page_Parser.find_switch_to_active_element,
  297. 'frame':Page_Parser.find_switch_to_frame}.get(func,Page_Parser.find_ID)
  298. FUNC(search,not_all=not_all)
  299. Update_Parser_Func_BOX()
  300. def Update_Parser_Func_BOX():
  301. global Parser_Func_BOX,Page_Parser
  302. Parser_Func_BOX.delete(0,tkinter.END)
  303. Parser_Func_BOX.insert(tkinter.END, *Page_Parser.return_func())
  304. def Update_cookies():
  305. global cookies_BOX,cookies_list,cookies_Input
  306. cookies = eval(cookies_Input.get(),{})
  307. if cookies_fixed.get() == '0':return False
  308. try:
  309. name = cookies_list[cookies_BOX.curselection()[0]].get('name')
  310. loader.update_cookies(name,cookies)
  311. cookies_fixed.set('0')
  312. except:
  313. pass
  314. def Add_cookies():
  315. global cookies_BOX,cookies_list,cookies_Input
  316. cookies = eval(cookies_Input.get(),{})
  317. if cookies_fixed.get() == '0':return False
  318. try:
  319. loader.Add_cookies(cookies)
  320. cookies_fixed.set('0')
  321. except:
  322. raise
  323. def Tra_cookies():
  324. global cookies_BOX,cookies_list
  325. if cookies_fixed.get() == '0':return False
  326. try:
  327. loader.Tra_cookies()
  328. cookies_fixed.set('0')
  329. except:
  330. pass
  331. def Del_cookies():
  332. global cookies_BOX,cookies_list
  333. if cookies_fixed.get() == '0':return False
  334. try:
  335. name = cookies_list[cookies_BOX.curselection()[0]].get('name')
  336. print(name)
  337. loader.Del_cookies(name)
  338. cookies_fixed.set('0')
  339. except:
  340. pass
  341. def cookies_BOX_Update(cookies):
  342. global cookies_BOX,cookies_list
  343. if cookies_fixed.get() == '0':
  344. cookies_list = cookies
  345. cookies_BOX.delete(0,tkinter.END)
  346. cookies_BOX.insert(0,*cookies)
  347. def startDownloader():
  348. def startLoader():
  349. global loader,Page_Parser
  350. loader.strat_urlGet(func_cookie=cookies_BOX_Update)
  351. Page_Parser.Element_interaction(update_Status)
  352. new = threading.Thread(target=startLoader)
  353. new.start()
  354. update_URLBOX()
  355. def add_filter_func_HTTPS():
  356. global url
  357. url.Add_func(lambda url:re.match(re.compile('^https://'),url),'HTTPS过滤')
  358. update_Func_BOX()
  359. def add_filter_func_WWW():
  360. global url
  361. url.Add_func(lambda url:re.match(re.compile('.*www\.'),url),'www过滤')
  362. update_Func_BOX()
  363. def del_func():
  364. global URL_BOX
  365. index = Func_BOX.curselection()[0]
  366. url.Del_func(index)
  367. update_Func_BOX()
  368. def update_Func_BOX():
  369. global url,Func_BOX
  370. Func_BOX.delete(0,tkinter.END)
  371. Func_BOX.insert(tkinter.END,*url.return_func())
  372. def del_url():
  373. global URL_BOX
  374. index = URL_BOX.curselection()[0]
  375. url.del_url(index)
  376. update_URLBOX()
  377. def add_url():
  378. global URL_Input,url
  379. new_url = URL_Input.get()
  380. if new_url == '':return
  381. url.add_url(new_url)
  382. update_URLBOX()
  383. def update_URLBOX():
  384. global url,URL_BOX
  385. URL_BOX.delete(0,tkinter.END)
  386. URL_BOX.insert(tkinter.END,*url.return_url())
  387. if __name__ == "__main__":
  388. Main()