__init__.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. """
  2. """
  3. import logging
  4. from huitun.captcha_ident import CaptchaIdent
  5. from playwright.sync_api import sync_playwright, Page, Playwright
  6. import api
  7. from browser import BaseBrowser
  8. from util.lock_util import LockManager
  9. from util.playwright_util import is_element_present
  10. HUITUN_URL = 'https://xhs.huitun.com/'
  11. lock_manager = LockManager()
  12. class HuiTunBrowser(BaseBrowser):
  13. def __init__(self, phone: str, playwright=None):
  14. super().__init__(phone, playwright)
  15. self.password = None
  16. def __get_name__(self):
  17. return 'huitun'
  18. def login(self, password: str):
  19. """
  20. 登录抖音,一个登录之后,全部的页面都有了登录状态
  21. :return: 2- 需要验证码 1-登录成功
  22. """
  23. self.__init_browser__()
  24. self.page.goto(HUITUN_URL)
  25. self.password = password
  26. self.login_if_need()
  27. self.page.wait_for_timeout(30_000)
  28. self.close()
  29. def login_if_need(self):
  30. """
  31. 登录灰豚
  32. """
  33. login_info_expired = self.page.query_selector('.ant-btn-primary:has-text("知道了")')
  34. if login_info_expired is not None:
  35. login_info_expired.click()
  36. if is_element_present(self.page, '.ant-modal-body'):
  37. if not is_element_present(self.page, 'text=密码登录'):
  38. pwd_login = self.page.query_selector('.b9dOaTo9gfF3wLAi7jlXTg\=\=')
  39. if pwd_login is not None:
  40. pwd_login.click()
  41. self.page.get_by_placeholder('请输入手机号').type(self.phone)
  42. self.page.get_by_placeholder('6-15位数字与字母组合').type(self.password)
  43. self.page.get_by_text('登 录', exact=True).click()
  44. # 验证码登录
  45. captcha_frame = self.page.frames[1]
  46. if captcha_frame is not None:
  47. captcha_tool = CaptchaIdent(self.page)
  48. captcha_tool.start()
  49. def search_note(self, tag_name: str, size: int):
  50. lock_manager.acquire_lock(self.phone)
  51. try:
  52. self.__init_browser__()
  53. self.login_if_need()
  54. self.list_result = []
  55. self.has_more = True
  56. api.assert_not_none(tag_name, "标签不能为空")
  57. self.page.goto('https://xhs.huitun.com/#/note/note_search')
  58. # 展开全部标签
  59. self.page.query_selector('.zgInWFcVVDjRN6BUMm3N0g\=\=').click()
  60. last_tag = self.page.query_selector('.fyBvQcyA81sogVJY0YVnhg\=\=')
  61. if last_tag is not None:
  62. last_tag.click()
  63. tag_ele = self.page.query_selector(f'.IRk6XOEYweiS9APLHrOp-w\=\=:has-text("{tag_name}")')
  64. if tag_ele is not None:
  65. tag_ele.click()
  66. self.page.get_by_text('图文笔记', exact=True).click()
  67. self.page.wait_for_timeout(500)
  68. self.page.on('response', self.search_note_handler)
  69. self.page.get_by_text('近3天', exact=True).click()
  70. # 限定一个上限
  71. page_num = int(2 * size / 10)
  72. for i in range(page_num):
  73. if size is not None and len(self.list_result) >= size:
  74. break
  75. logging.info('继续搜索灰豚')
  76. self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
  77. self.page.wait_for_timeout(2000)
  78. logging.info('搜索灰豚结果数:%s', len(self.list_result))
  79. if not self.has_more:
  80. break
  81. return self.list_result
  82. finally:
  83. lock_manager.release_lock(self.phone)
  84. self.close()
  85. def search_note_handler(self, response):
  86. """
  87. 处理用户主页搜索图文请求响应
  88. :param response:
  89. :return:
  90. """
  91. if response is not None and '/note/search' in response.url:
  92. response_body = response.json()
  93. if response_body.get('status') == 0:
  94. note_list = response_body.get('extData').get('list')
  95. self.has_more = len(note_list) > 0
  96. if len(self.list_result) == 0:
  97. self.list_result = note_list
  98. else:
  99. self.list_result.extend(note_list)
  100. else:
  101. self.has_more = False