Browse Source

pref: 优化爬取

wuwenyi 8 months ago
parent
commit
ba99fe72b0
2 changed files with 4 additions and 2 deletions
  1. 1 1
      xhs/__init__.py
  2. 3 1
      xhs/client.py

+ 1 - 1
xhs/__init__.py

@@ -72,6 +72,6 @@ class XhsBrowser(BaseBrowser):
                 note_info = self.xhs_client.get_note_by_id(note_id=note_id)
                 huitun_note['authorInfo'] = note_info.get('user')
                 huitun_note['imageList'] = [img.get('url_default') for img in note_info.get('image_list')]
-                sleep(1)
+                sleep(2)
             except Exception as e:
                 utils.logger.error(f"爬取小红书异常 {e}")

+ 3 - 1
xhs/client.py

@@ -89,6 +89,8 @@ class XiaoHongShuClient:
         if return_response:
             return response.text
 
+        if response.status_code == 461:
+            self.update_xsec_token()
         data: Dict = response.json()
         if data["success"]:
             return data.get("data", data.get("success", {}))
@@ -134,7 +136,7 @@ class XiaoHongShuClient:
         更新token
         :return:
         """
-        res = self.get_note_by_keyword('')
+        res = self.get_note_by_keyword('小红书')
         self.xsec_token = res.get('items')[0].get('xsec_token')
 
     async def get_note_media(self, url: str) -> Union[bytes, None]: