123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- """
- """
- import logging
- from huitun.captcha_ident import CaptchaIdent
- from playwright.sync_api import sync_playwright, Page, Playwright
- import api
- from browser import BaseBrowser
- from util.lock_util import LockManager
- from util.playwright_util import is_element_present
- HUITUN_URL = 'https://xhs.huitun.com/'
- lock_manager = LockManager()
- class HuiTunBrowser(BaseBrowser):
- def __init__(self, phone: str, playwright=None):
- super().__init__(phone, playwright)
- self.password = None
- def __get_name__(self):
- return 'huitun'
- def login(self, password: str):
- """
- 登录抖音,一个登录之后,全部的页面都有了登录状态
- :return: 2- 需要验证码 1-登录成功
- """
- self.__init_browser__()
- self.page.goto(HUITUN_URL)
- self.password = password
- self.login_if_need()
- self.page.wait_for_timeout(30_000)
- self.close()
- def login_if_need(self):
- """
- 登录灰豚
- """
- login_info_expired = self.page.query_selector('.ant-btn-primary:has-text("知道了")')
- if login_info_expired is not None:
- login_info_expired.click()
- if is_element_present(self.page, '.ant-modal-body'):
- logging.info('灰豚需要重新登录')
- if not is_element_present(self.page, 'text=密码登录'):
- pwd_login = self.page.query_selector('.b9dOaTo9gfF3wLAi7jlXTg\=\=')
- if pwd_login is not None:
- pwd_login.click()
- self.page.get_by_placeholder('请输入手机号').type(self.phone)
- self.page.get_by_placeholder('6-15位数字与字母组合').type(self.password)
- self.page.get_by_text('登 录', exact=True).click()
- # 验证码登录
- captcha_frame = self.page.frames[1]
- if captcha_frame is not None:
- captcha_tool = CaptchaIdent(self.page)
- captcha_tool.start()
- def search_note(self, tag_name: str, size: int):
- lock_manager.acquire_lock(self.phone)
- try:
- self.__init_browser__()
- self.list_result = []
- self.has_more = True
- api.assert_not_none(tag_name, "标签不能为空")
- self.page.goto('https://xhs.huitun.com/#/note/note_search')
- self.page.wait_for_timeout(2000)
- self.login_if_need()
- # 展开全部标签
- self.page.query_selector('.zgInWFcVVDjRN6BUMm3N0g\=\=').click()
- last_tag = self.page.query_selector('.fyBvQcyA81sogVJY0YVnhg\=\=')
- if last_tag is not None:
- last_tag.click()
- tag_ele = self.page.query_selector(f'.IRk6XOEYweiS9APLHrOp-w\=\=:has-text("{tag_name}")')
- if tag_ele is not None:
- tag_ele.click()
- self.page.get_by_text('图文笔记', exact=True).click()
- self.page.wait_for_timeout(500)
- self.page.on('response', self.search_note_handler)
- self.page.get_by_text('近3天', exact=True).click()
- # 限定一个上限
- page_num = int(2 * size / 10)
- for i in range(page_num):
- if size is not None and len(self.list_result) >= size:
- break
- logging.info('继续搜索灰豚')
- self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
- self.page.wait_for_timeout(2000)
- logging.info('搜索灰豚结果数:%s', len(self.list_result))
- if not self.has_more:
- break
- return self.list_result
- finally:
- lock_manager.release_lock(self.phone)
- self.close()
- def search_note_handler(self, response):
- """
- 处理用户主页搜索图文请求响应
- :param response:
- :return:
- """
- if response is not None and '/note/search' in response.url:
- response_body = response.json()
- if response_body.get('status') == 0:
- note_list = response_body.get('extData').get('list')
- self.has_more = len(note_list) > 0
- if len(self.list_result) == 0:
- self.list_result = note_list
- else:
- self.list_result.extend(note_list)
- else:
- self.has_more = False
|