__init__.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. """
  2. """
  3. import logging
  4. from playwright.sync_api import sync_playwright, Page, Playwright
  5. import api
  6. from browser import BaseBrowser
  7. from util.lock_util import LockManager
  8. from util.playwright_util import is_element_present
  9. HUITUN_URL = 'https://xhs.huitun.com/'
  10. lock_manager = LockManager()
  11. class HuiTunBrowser(BaseBrowser):
  12. def __get_name__(self):
  13. return 'huitun'
  14. def login(self, password: str):
  15. """
  16. 登录抖音,一个登录之后,全部的页面都有了登录状态
  17. :return: 2- 需要验证码 1-登录成功
  18. """
  19. self.__init_browser__()
  20. self.page.goto(HUITUN_URL)
  21. if is_element_present(self.page, '.ant-modal-body'):
  22. if not is_element_present(self.page, 'text=密码登录'):
  23. pwd_login = self.page.query_selector('.b9dOaTo9gfF3wLAi7jlXTg\=\=')
  24. if pwd_login is not None:
  25. pwd_login.click()
  26. self.page.get_by_placeholder('请输入手机号').type(self.phone)
  27. self.page.get_by_placeholder('6-15位数字与字母组合').type(password)
  28. self.page.get_by_text('登 录', exact=True).click()
  29. self.page.wait_for_timeout(30_000)
  30. self.close()
  31. def search_note(self, tag_name: str, size: int):
  32. lock_manager.acquire_lock(self.phone)
  33. try:
  34. self.__init_browser__()
  35. self.list_result = []
  36. self.has_more = True
  37. api.assert_not_none(tag_name, "标签不能为空")
  38. self.page.goto('https://xhs.huitun.com/#/note/note_search')
  39. # 展开全部标签
  40. self.page.query_selector('.RaWdmGo9iaS1-bQ6mK5K4w\=\=').click()
  41. self.page.query_selector(f'.IRk6XOEYweiS9APLHrOp-w\=\=:has-text("{tag_name}")').click()
  42. self.page.get_by_text('图文笔记', exact=True).click()
  43. self.page.wait_for_timeout(500)
  44. self.page.on('response', self.search_note_handler)
  45. self.page.get_by_text('近3天', exact=True).click()
  46. # 限定一个上限
  47. page_num = int(2 * size / 10)
  48. for i in range(page_num):
  49. if size is not None and len(self.list_result) >= size:
  50. break
  51. logging.info('继续搜索灰豚')
  52. self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
  53. self.page.wait_for_timeout(2000)
  54. logging.info('搜索灰豚结果数:%s', len(self.list_result))
  55. if not self.has_more:
  56. break
  57. return self.list_result
  58. finally:
  59. lock_manager.release_lock(self.phone)
  60. self.close()
  61. def search_note_handler(self, response):
  62. """
  63. 处理用户主页搜索图文请求响应
  64. :param response:
  65. :return:
  66. """
  67. if response is not None and '/note/search' in response.url:
  68. response_body = response.json()
  69. if response_body.get('status') == 0:
  70. note_list = response_body.get('extData').get('list')
  71. self.has_more = len(note_list) > 0
  72. if len(self.list_result) == 0:
  73. self.list_result = note_list
  74. else:
  75. self.list_result.extend(note_list)
  76. else:
  77. self.has_more = False