""" 小红书 """ import logging from time import sleep from typing import Optional from playwright.sync_api import sync_playwright, Playwright from browser import BaseBrowser from tools import utils from util.lock_util import LockManager from util.playwright_util import is_element_present from .client import XiaoHongShuClient from .rotate_ident import RotateIdent lock_manager = LockManager() XHS_URL = 'https://www.xiaohongshu.com' class XhsBrowser(BaseBrowser): def __init__(self, phone: str, playwright=None): super().__init__(phone, playwright) def __get_name__(self): return 'xhs' def __init_browser__(self): super().__init_browser__() self.xhs_client = self.create_xhs_client(None) self.rotate_ident = RotateIdent(self.page) self.page.goto(XHS_URL) self.rotate_ident.handle_rotate() def create_xhs_client(self, httpx_proxy: Optional[str]) -> XiaoHongShuClient: """Create xhs client""" utils.logger.info("[XiaoHongShuCrawler.create_xhs_client] Begin create xiaohongshu API client ...") cookie_str, cookie_dict = utils.convert_cookies(self.browser.cookies()) xhs_client_obj = XiaoHongShuClient( proxies=httpx_proxy, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36", "Cookie": cookie_str, "Origin": "https://www.xiaohongshu.com", "Referer": "https://www.xiaohongshu.com", "Content-Type": "application/json;charset=UTF-8" }, playwright_page=self.page, cookie_dict=cookie_dict, ) return xhs_client_obj def login(self): with sync_playwright() as playwright: self.__init_browser__() # 暂时采用手动登录 self.page.wait_for_timeout(60_000) def polish_huitun_note(self, huitun_notes: []): """ 补齐灰豚文章数据 :param huitun_notes: :return: """ self.__init_browser__() if not self.xhs_client.pong(): return huitun_notes for huitun_note in huitun_notes: try: note_id = huitun_note.get('noteId') note_info = self.xhs_client.get_note_by_id(note_id=note_id) huitun_note['authorInfo'] = note_info.get('user') huitun_note['imageList'] = [img.get('url_default') for img in note_info.get('image_list')] sleep(1) except Exception as e: utils.logger.error(f"爬取小红书异常 {e}")