__init__.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. """
  2. """
  3. import logging
  4. from huitun.captcha_ident import CaptchaIdent
  5. from playwright.sync_api import sync_playwright, Page, Playwright
  6. import api
  7. from browser import BaseBrowser
  8. from util.lock_util import LockManager
  9. from util.playwright_util import is_element_present
  10. HUITUN_URL = 'https://xhs.huitun.com/'
  11. lock_manager = LockManager()
  12. class HuiTunBrowser(BaseBrowser):
  13. def __init__(self, phone: str, playwright=None):
  14. super().__init__(phone, playwright)
  15. self.password = None
  16. def __get_name__(self):
  17. return 'huitun'
  18. def login(self, password: str):
  19. """
  20. 登录抖音,一个登录之后,全部的页面都有了登录状态
  21. :return: 2- 需要验证码 1-登录成功
  22. """
  23. self.__init_browser__()
  24. self.page.goto(HUITUN_URL)
  25. self.password = password
  26. self.login_if_need()
  27. self.page.wait_for_timeout(30_000)
  28. self.close()
  29. def login_if_need(self):
  30. """
  31. 登录灰豚
  32. """
  33. login_info_expired = self.page.query_selector('.ant-btn-primary:has-text("知道了")')
  34. if login_info_expired is not None:
  35. login_info_expired.click()
  36. if is_element_present(self.page, '.ant-modal-body'):
  37. logging.info('灰豚需要重新登录')
  38. if not is_element_present(self.page, 'text=密码登录'):
  39. pwd_login = self.page.query_selector('.b9dOaTo9gfF3wLAi7jlXTg\=\=')
  40. if pwd_login is not None:
  41. pwd_login.click()
  42. self.page.get_by_placeholder('请输入手机号').type(self.phone)
  43. self.page.get_by_placeholder('6-15位数字与字母组合').type(self.password)
  44. self.page.get_by_text('登 录', exact=True).click()
  45. # 验证码登录
  46. captcha_frame = self.page.frames[1]
  47. if captcha_frame is not None:
  48. captcha_tool = CaptchaIdent(self.page)
  49. captcha_tool.start()
  50. def search_note(self, tag_name: str, size: int):
  51. lock_manager.acquire_lock(self.phone)
  52. try:
  53. self.__init_browser__()
  54. self.list_result = []
  55. self.has_more = True
  56. api.assert_not_none(tag_name, "标签不能为空")
  57. self.page.goto('https://xhs.huitun.com/#/note/note_search')
  58. self.page.wait_for_timeout(2000)
  59. self.login_if_need()
  60. # 展开全部标签
  61. self.page.query_selector('.zgInWFcVVDjRN6BUMm3N0g\=\=').click()
  62. last_tag = self.page.query_selector('.fyBvQcyA81sogVJY0YVnhg\=\=')
  63. if last_tag is not None:
  64. last_tag.click()
  65. tag_ele = self.page.query_selector(f'.IRk6XOEYweiS9APLHrOp-w\=\=:has-text("{tag_name}")')
  66. if tag_ele is not None:
  67. tag_ele.click()
  68. self.page.get_by_text('图文笔记', exact=True).click()
  69. self.page.wait_for_timeout(500)
  70. self.page.on('response', self.search_note_handler)
  71. self.page.get_by_text('近3天', exact=True).click()
  72. # 限定一个上限
  73. page_num = int(2 * size / 10)
  74. for i in range(page_num):
  75. if size is not None and len(self.list_result) >= size:
  76. break
  77. logging.info('继续搜索灰豚')
  78. self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
  79. self.page.wait_for_timeout(2000)
  80. logging.info('搜索灰豚结果数:%s', len(self.list_result))
  81. if not self.has_more:
  82. break
  83. return self.list_result
  84. finally:
  85. lock_manager.release_lock(self.phone)
  86. self.close()
  87. def search_note_handler(self, response):
  88. """
  89. 处理用户主页搜索图文请求响应
  90. :param response:
  91. :return:
  92. """
  93. if response is not None and '/note/search' in response.url:
  94. response_body = response.json()
  95. if response_body.get('status') == 0:
  96. note_list = response_body.get('extData').get('list')
  97. self.has_more = len(note_list) > 0
  98. if len(self.list_result) == 0:
  99. self.list_result = note_list
  100. else:
  101. self.list_result.extend(note_list)
  102. else:
  103. self.has_more = False