|
@@ -1,10 +1,13 @@
|
|
|
import asyncio
|
|
|
import json
|
|
|
+import logging
|
|
|
import re
|
|
|
+import time
|
|
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
|
from urllib.parse import urlencode
|
|
|
|
|
|
import httpx
|
|
|
+import requests
|
|
|
from playwright.async_api import BrowserContext, Page
|
|
|
|
|
|
|
|
@@ -12,7 +15,8 @@ from tools import utils
|
|
|
|
|
|
|
|
|
from .field import SearchNoteType, SearchSortType
|
|
|
-from .help import get_search_id, sign
|
|
|
+from .help import get_search_id, sign, Des, GenerateCurve
|
|
|
+from .rotate_ident import invoke_ident_api
|
|
|
|
|
|
|
|
|
class XiaoHongShuClient:
|
|
@@ -23,6 +27,7 @@ class XiaoHongShuClient:
|
|
|
*,
|
|
|
headers: Dict[str, str],
|
|
|
playwright_page: Page,
|
|
|
+ rotate_ident,
|
|
|
cookie_dict: Dict[str, str],
|
|
|
):
|
|
|
self.xsec_token = None
|
|
@@ -37,6 +42,8 @@ class XiaoHongShuClient:
|
|
|
self.NOTE_ABNORMAL_CODE = -510001
|
|
|
self.playwright_page = playwright_page
|
|
|
self.cookie_dict = cookie_dict
|
|
|
+ self.des = Des()
|
|
|
+ self.rotate_ident = rotate_ident
|
|
|
|
|
|
def _pre_headers(self, url: str, data=None) -> Dict:
|
|
|
"""
|
|
@@ -66,12 +73,13 @@ class XiaoHongShuClient:
|
|
|
self.headers.update(headers)
|
|
|
return self.headers
|
|
|
|
|
|
- def request(self, method, url, **kwargs) -> Union[str, Any]:
|
|
|
+ def request(self, method, url, need_check=True, **kwargs) -> Union[str, Any]:
|
|
|
"""
|
|
|
封装httpx的公共请求方法,对请求响应做一些处理
|
|
|
Args:
|
|
|
method: 请求方法
|
|
|
url: 请求的URL
|
|
|
+ need_check: need check 461
|
|
|
**kwargs: 其他请求参数,例如请求头、请求体等
|
|
|
|
|
|
Returns:
|
|
@@ -89,8 +97,8 @@ class XiaoHongShuClient:
|
|
|
if return_response:
|
|
|
return response.text
|
|
|
|
|
|
- if response.status_code == 461:
|
|
|
- self.update_xsec_token()
|
|
|
+ if response.status_code == 461 and need_check:
|
|
|
+ self.verify()
|
|
|
data: Dict = response.json()
|
|
|
if data["success"]:
|
|
|
return data.get("data", data.get("success", {}))
|
|
@@ -116,7 +124,7 @@ class XiaoHongShuClient:
|
|
|
headers = self._pre_headers(final_uri)
|
|
|
return await self.request(method="GET", url=f"{self._host}{final_uri}", headers=headers)
|
|
|
|
|
|
- def post(self, uri: str, data: dict) -> Dict:
|
|
|
+ def post(self, uri: str, data: dict, need_check=True) -> Dict:
|
|
|
"""
|
|
|
POST请求,对请求头签名
|
|
|
Args:
|
|
@@ -128,7 +136,7 @@ class XiaoHongShuClient:
|
|
|
"""
|
|
|
headers = self._pre_headers(uri, data)
|
|
|
json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
|
|
|
- return self.request(method="POST", url=f"{self._host}{uri}",
|
|
|
+ return self.request(method="POST", url=f"{self._host}{uri}", need_check=need_check,
|
|
|
data=json_str, headers=headers)
|
|
|
|
|
|
def update_xsec_token(self):
|
|
@@ -437,3 +445,48 @@ class XiaoHongShuClient:
|
|
|
await asyncio.sleep(crawl_interval)
|
|
|
result.extend(notes)
|
|
|
return result
|
|
|
+
|
|
|
+ def verify(self):
|
|
|
+ image = self.get_image()
|
|
|
+ self.check(image)
|
|
|
+
|
|
|
+ def get_image(self):
|
|
|
+ json_data = {
|
|
|
+ 'secretId': '000',
|
|
|
+ 'verifyType': '102',
|
|
|
+ 'verifyUuid': '',
|
|
|
+ 'verifyBiz': '461',
|
|
|
+ 'sourceSite': '',
|
|
|
+ }
|
|
|
+ response = self.post('/api/redcaptcha/v2/captcha/register',
|
|
|
+ need_check=False, data=json_data)
|
|
|
+ logging.info(f"get image:{response}")
|
|
|
+ captchaInfo = response["captchaInfo"]
|
|
|
+ self.rid = response["rid"]
|
|
|
+ image_Info = self.des.decrypt("captchaInfo", captchaInfo)
|
|
|
+ captchaUrl = json.loads(image_Info)["captchaUrl"]
|
|
|
+ logging.info('captchaUrl:' + captchaUrl)
|
|
|
+ return captchaUrl
|
|
|
+
|
|
|
+ def check(self, img_url:str):
|
|
|
+ img = self.rotate_ident.do_download_img(img_url)
|
|
|
+ response = invoke_ident_api(img)
|
|
|
+ angle = int(str(response['data']['res_str']).replace('顺时针旋转', '').replace('度', ''))
|
|
|
+ rate = angle / 360
|
|
|
+ distance = int(rate * 227)
|
|
|
+ gen_track = GenerateCurve([0, 0], [distance, 2], [], int(rate * 150)).main()
|
|
|
+ track = self.des.encrypt("track", json.dumps(gen_track, separators=(",", ":")))
|
|
|
+ mouseEnd = self.des.encrypt("mouseEnd", str(distance))
|
|
|
+ time_ = self.des.encrypt("time", str(gen_track[-1][-1] + 199))
|
|
|
+
|
|
|
+ json_data = {
|
|
|
+ 'rid': self.rid,
|
|
|
+ 'verifyType': 102,
|
|
|
+ 'verifyBiz': '461',
|
|
|
+ 'verifyUuid': '',
|
|
|
+ 'sourceSite': '',
|
|
|
+ 'captchaInfo': '{"mouseEnd":"' + mouseEnd + '","time":"' + time_ + '","track":"' + track + '","width":"w1qZrykOUIU="}'
|
|
|
+ }
|
|
|
+ response = self.post('/api/redcaptcha/v2/captcha/check', need_check=False,
|
|
|
+ data=json_data)
|
|
|
+ logging.info(f'check:{response}')
|