|
@@ -1,10 +1,13 @@
|
|
import asyncio
|
|
import asyncio
|
|
import json
|
|
import json
|
|
|
|
+import logging
|
|
import re
|
|
import re
|
|
|
|
+import time
|
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
from urllib.parse import urlencode
|
|
from urllib.parse import urlencode
|
|
|
|
|
|
import httpx
|
|
import httpx
|
|
|
|
+import requests
|
|
from playwright.async_api import BrowserContext, Page
|
|
from playwright.async_api import BrowserContext, Page
|
|
|
|
|
|
# import config
|
|
# import config
|
|
@@ -12,7 +15,8 @@ from tools import utils
|
|
|
|
|
|
# from .exception import DataFetchError, IPBlockError
|
|
# from .exception import DataFetchError, IPBlockError
|
|
from .field import SearchNoteType, SearchSortType
|
|
from .field import SearchNoteType, SearchSortType
|
|
-from .help import get_search_id, sign
|
|
|
|
|
|
+from .help import get_search_id, sign, Des, GenerateCurve
|
|
|
|
+from .rotate_ident import invoke_ident_api
|
|
|
|
|
|
|
|
|
|
class XiaoHongShuClient:
|
|
class XiaoHongShuClient:
|
|
@@ -23,6 +27,7 @@ class XiaoHongShuClient:
|
|
*,
|
|
*,
|
|
headers: Dict[str, str],
|
|
headers: Dict[str, str],
|
|
playwright_page: Page,
|
|
playwright_page: Page,
|
|
|
|
+ rotate_ident,
|
|
cookie_dict: Dict[str, str],
|
|
cookie_dict: Dict[str, str],
|
|
):
|
|
):
|
|
self.xsec_token = None
|
|
self.xsec_token = None
|
|
@@ -37,6 +42,8 @@ class XiaoHongShuClient:
|
|
self.NOTE_ABNORMAL_CODE = -510001
|
|
self.NOTE_ABNORMAL_CODE = -510001
|
|
self.playwright_page = playwright_page
|
|
self.playwright_page = playwright_page
|
|
self.cookie_dict = cookie_dict
|
|
self.cookie_dict = cookie_dict
|
|
|
|
+ self.des = Des()
|
|
|
|
+ self.rotate_ident = rotate_ident
|
|
|
|
|
|
def _pre_headers(self, url: str, data=None) -> Dict:
|
|
def _pre_headers(self, url: str, data=None) -> Dict:
|
|
"""
|
|
"""
|
|
@@ -66,12 +73,13 @@ class XiaoHongShuClient:
|
|
self.headers.update(headers)
|
|
self.headers.update(headers)
|
|
return self.headers
|
|
return self.headers
|
|
|
|
|
|
- def request(self, method, url, **kwargs) -> Union[str, Any]:
|
|
|
|
|
|
+ def request(self, method, url, need_check=True, **kwargs) -> Union[str, Any]:
|
|
"""
|
|
"""
|
|
封装httpx的公共请求方法,对请求响应做一些处理
|
|
封装httpx的公共请求方法,对请求响应做一些处理
|
|
Args:
|
|
Args:
|
|
method: 请求方法
|
|
method: 请求方法
|
|
url: 请求的URL
|
|
url: 请求的URL
|
|
|
|
+ need_check: need check 461
|
|
**kwargs: 其他请求参数,例如请求头、请求体等
|
|
**kwargs: 其他请求参数,例如请求头、请求体等
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
@@ -89,8 +97,8 @@ class XiaoHongShuClient:
|
|
if return_response:
|
|
if return_response:
|
|
return response.text
|
|
return response.text
|
|
|
|
|
|
- if response.status_code == 461:
|
|
|
|
- self.update_xsec_token()
|
|
|
|
|
|
+ if response.status_code == 461 and need_check:
|
|
|
|
+ self.verify()
|
|
data: Dict = response.json()
|
|
data: Dict = response.json()
|
|
if data["success"]:
|
|
if data["success"]:
|
|
return data.get("data", data.get("success", {}))
|
|
return data.get("data", data.get("success", {}))
|
|
@@ -116,7 +124,7 @@ class XiaoHongShuClient:
|
|
headers = self._pre_headers(final_uri)
|
|
headers = self._pre_headers(final_uri)
|
|
return await self.request(method="GET", url=f"{self._host}{final_uri}", headers=headers)
|
|
return await self.request(method="GET", url=f"{self._host}{final_uri}", headers=headers)
|
|
|
|
|
|
- def post(self, uri: str, data: dict) -> Dict:
|
|
|
|
|
|
+ def post(self, uri: str, data: dict, need_check=True) -> Dict:
|
|
"""
|
|
"""
|
|
POST请求,对请求头签名
|
|
POST请求,对请求头签名
|
|
Args:
|
|
Args:
|
|
@@ -128,7 +136,7 @@ class XiaoHongShuClient:
|
|
"""
|
|
"""
|
|
headers = self._pre_headers(uri, data)
|
|
headers = self._pre_headers(uri, data)
|
|
json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
|
|
json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
|
|
- return self.request(method="POST", url=f"{self._host}{uri}",
|
|
|
|
|
|
+ return self.request(method="POST", url=f"{self._host}{uri}", need_check=need_check,
|
|
data=json_str, headers=headers)
|
|
data=json_str, headers=headers)
|
|
|
|
|
|
def update_xsec_token(self):
|
|
def update_xsec_token(self):
|
|
@@ -437,3 +445,48 @@ class XiaoHongShuClient:
|
|
await asyncio.sleep(crawl_interval)
|
|
await asyncio.sleep(crawl_interval)
|
|
result.extend(notes)
|
|
result.extend(notes)
|
|
return result
|
|
return result
|
|
|
|
+
|
|
|
|
+ def verify(self):
|
|
|
|
+ image = self.get_image()
|
|
|
|
+ self.check(image)
|
|
|
|
+
|
|
|
|
+ def get_image(self):
|
|
|
|
+ json_data = {
|
|
|
|
+ 'secretId': '000',
|
|
|
|
+ 'verifyType': '102',
|
|
|
|
+ 'verifyUuid': '',
|
|
|
|
+ 'verifyBiz': '461',
|
|
|
|
+ 'sourceSite': '',
|
|
|
|
+ }
|
|
|
|
+ response = self.post('/api/redcaptcha/v2/captcha/register',
|
|
|
|
+ need_check=False, data=json_data)
|
|
|
|
+ logging.info(f"get image:{response}")
|
|
|
|
+ captchaInfo = response["captchaInfo"]
|
|
|
|
+ self.rid = response["rid"]
|
|
|
|
+ image_Info = self.des.decrypt("captchaInfo", captchaInfo)
|
|
|
|
+ captchaUrl = json.loads(image_Info)["captchaUrl"]
|
|
|
|
+ logging.info('captchaUrl:' + captchaUrl)
|
|
|
|
+ return captchaUrl
|
|
|
|
+
|
|
|
|
+ def check(self, img_url:str):
|
|
|
|
+ img = self.rotate_ident.do_download_img(img_url)
|
|
|
|
+ response = invoke_ident_api(img)
|
|
|
|
+ angle = int(str(response['data']['res_str']).replace('顺时针旋转', '').replace('度', ''))
|
|
|
|
+ rate = angle / 360
|
|
|
|
+ distance = int(rate * 227)
|
|
|
|
+ gen_track = GenerateCurve([0, 0], [distance, 2], [], int(rate * 150)).main()
|
|
|
|
+ track = self.des.encrypt("track", json.dumps(gen_track, separators=(",", ":")))
|
|
|
|
+ mouseEnd = self.des.encrypt("mouseEnd", str(distance))
|
|
|
|
+ time_ = self.des.encrypt("time", str(gen_track[-1][-1] + 199))
|
|
|
|
+ # track = 'P/h0WtKGfU29TgYTjGjG0SIRuELz+YlGj5wZhyl7cM+TXhklOaVpyVwuJrxtRQt8Y0t70fBllPiJYUJZq9XVkPO75tSfvK/mpSkEUXImjF+CnyPZBsAyiUNDFOYPMxF21DU7qp5ZoZMIcAD+Wm5M18s1ctGnk1jK1RugqdwJB412H2H18XTdygGmddBt3KreAoWark9jiba6IjEn5ZLssMlBCn9fRZPwdWNqjre4dGlscChV8wuwXAxz4hIYRI+VgPnX1hossQeX0TBfk0M4f8hFRxZm35d47lwfaqRtYbPsUM5/G8471ViSwStrg+WpckZNBfWs/1cg/wBBJKvss+su/oIF7+NpxMaqryLQW7MSz/F5ejfR8FQCm4/Sp+6tmFf65sEuXAmaHkGYNp5CoabW1AGBW4t4gfB7QLI5PIYZMRRvfsGdGJ1khSEqgiYIBVo3645clzayHwEhpaV4sDu/p6HryP2FcAIw2FL80Q2mWdSiP9ZBSufS+4eAcvz5aIWYKcFRW5wy1QfABc86r9XIlS8kGyQJcE3UPer5JigDqitX19C1FDniYkGaDxanIi1ob3EC2C7BF6pDBUnbaSOhL/8DtzbyfPMI5yAVMcD6ZWPpzDFXbvupYGsjZQP40dwPYdZhtwbTX8ED4FZ5gW7bvHa9AJmADaieOhR6WUKb+MF5NqcNxsPJKc4rJCJwS0DX3DBW9gjL3Zi+tjqvp0RF+ge0On2tLedMgtWi+wFa671XlOakCpD7yYX+CktvhPrTz/Jp8vxVuRaKIX32aPLU3S7xCLJtSiQ2f7xm/efxrUU0h7IjHVlIQLz489UJ26IyKZnzVntmte1UoU9tjZ0WzVSyEEx0l/F0LnN9ZJvbPdMKFWWekZrdHns2/vytsecoqJUibkdpE4h0Xet3NmOkoNzsApY7BDCx1BRnYd5YqWCsh5J594qxTZXca9RcySZlND1s+XCOKskO0ob2EHp+VkLzaHI5gCJGGcJUCSLlc+xxePCIbrjCd4uK2tuCUgLiW9GsZd8MBcQHjJkiA0CUr8CBjHN0asxelR5WZjOcN/cYK2DsoLtHiYjSa2ivbDr5/czi2+x6e8om9MXBDQ/+YN7NWe6e/gznqnkVLfdR5Jpf/vBBxtx08ctETmYtT80BXyv2QZFrSjSxOIvEiluML8mVqvAKSTYh8y4vsYMHEcT4jF0plsn+LOGvzSjhSe9TCvgQ/fyIJ/Z0cJwh0OUkXP5lYqHvvZLgrUKEp6B6riSP4PbVUieUB3PGaXG9e2lV8TR9afre/L8nQolNLc+p08nb8xbqNOIRyrGhSCh68wNpkbfBUUpwy3YkLkkEBHqPYSE/cp4FFAVbSg4/AA=='
|
|
|
|
+ json_data = {
|
|
|
|
+ 'rid': self.rid,
|
|
|
|
+ 'verifyType': 102,
|
|
|
|
+ 'verifyBiz': '461',
|
|
|
|
+ 'verifyUuid': '',
|
|
|
|
+ 'sourceSite': '',
|
|
|
|
+ 'captchaInfo': '{"mouseEnd":"' + mouseEnd + '","time":"' + time_ + '","track":"' + track + '","width":"w1qZrykOUIU="}'
|
|
|
|
+ }
|
|
|
|
+ response = self.post('/api/redcaptcha/v2/captcha/check', need_check=False,
|
|
|
|
+ data=json_data)
|
|
|
|
+ logging.info(f'check:{response}')
|