Browse Source

fix: 兼容灰豚新页面,过滤没有用户数据的文章

wuwenyi 5 months ago
parent
commit
bf718ad4f5
4 changed files with 12 additions and 8 deletions
  1. 1 1
      api/search.py
  2. 1 1
      huitun/__init__.py
  3. 8 5
      util/lock_util.py
  4. 2 1
      xhs/__init__.py

+ 1 - 1
api/search.py

@@ -23,7 +23,7 @@ def search_note():
     browser = HuiTunBrowser(lock_util.get_idle_phone('huitun'), playwright)
     result = browser.search_note(request_body.get('tagName'), request_body.get('searchLimit'))
     xhs_browser = XhsBrowser(lock_util.get_idle_phone('xhs'), playwright)
-    xhs_browser.polish_huitun_note(result)
+    result = xhs_browser.polish_huitun_note(result)
     playwright.stop()
     return api.success(result)
 

+ 1 - 1
huitun/__init__.py

@@ -45,7 +45,7 @@ class HuiTunBrowser(BaseBrowser):
             api.assert_not_none(tag_name, "标签不能为空")
             self.page.goto('https://xhs.huitun.com/#/note/note_search')
             # 展开全部标签
-            self.page.query_selector('.RaWdmGo9iaS1-bQ6mK5K4w\=\=').click()
+            self.page.query_selector('zjuiSIkqQPGkA3plGE0SGQ\=\=').click()
             self.page.query_selector(f'.IRk6XOEYweiS9APLHrOp-w\=\=:has-text("{tag_name}")').click()
             self.page.get_by_text('图文笔记', exact=True).click()
             self.page.wait_for_timeout(500)

+ 8 - 5
util/lock_util.py

@@ -3,6 +3,7 @@
 """
 import logging
 import os
+import random
 import threading
 import time
 
@@ -39,7 +40,7 @@ class LockManager:
             return False
 
 
-def add_phone(lock_key: str, phones: set):
+def add_phone(lock_key: str, phones: list):
     directory = f"./.data/{lock_key}"
     if not os.path.exists(directory):
         os.makedirs(directory)
@@ -49,7 +50,7 @@ def add_phone(lock_key: str, phones: set):
         # 检查是否是文件夹
         if os.path.isdir(full_path):
             # 如果是文件夹,将文件夹名称添加到集合中
-            phones.add(entry)
+            phones.append(entry)
     print(f"已存在的{lock_key}账号:", phones)
 
 
@@ -59,8 +60,8 @@ lock_manager_dict = {
 }
 
 lock_phone_dict = {
-    "huitun": set(),
-    "xhs": set()
+    "huitun": list(),
+    "xhs": list()
 }
 
 for key in lock_phone_dict.keys():
@@ -71,7 +72,9 @@ def get_idle_phone(key: str):
     lock_manager = lock_manager_dict[key]
     api.assert_not_none(lock_manager, "lock_manager is None")
     while True:
-        for phone in lock_phone_dict[key]:
+        phone_list = lock_phone_dict[key]
+        for phone in phone_list:
             if not lock_manager.is_locked(phone):
+                random.shuffle(phone_list)
                 return phone
         time.sleep(1)

+ 2 - 1
xhs/__init__.py

@@ -63,7 +63,7 @@ class XhsBrowser(BaseBrowser):
     def login(self):
         self.__init_browser__()
         # 暂时采用手动登录
-        self.page.goto(XHS_URL, wait_until='domcontentloaded')
+        self.page.goto(XHS_URL)
         self.page.wait_for_timeout(60_000)
         self.playwright.stop()
 
@@ -85,6 +85,7 @@ class XhsBrowser(BaseBrowser):
                 self.page.wait_for_timeout(4000)
             except Exception as e:
                 utils.logger.error(f"爬取小红书异常 {e}")
+        return list(filter(lambda note: note.get('authorInfo') is not None, huitun_notes))
 
     def get_note(self, note_id: str):
         # note = self.xhs_client.get_note_by_id(note_id=note_id)