123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- # -*- coding: utf-8 -*-
- # @Author : relakkes@gmail.com
- # @Time : 2023/12/2 12:53
- # @Desc : 爬虫相关的工具函数
- import base64
- import random
- import re
- from io import BytesIO
- from typing import Dict, List, Optional, Tuple
- import httpx
- from PIL import Image, ImageDraw
- from playwright.async_api import Cookie, Page
- from . import utils
- async def find_login_qrcode(page: Page, selector: str) -> str:
- """find login qrcode image from target selector"""
- try:
- elements = await page.wait_for_selector(
- selector=selector,
- )
- login_qrcode_img = str(await elements.get_property("src")) # type: ignore
- if "http://" in login_qrcode_img or "https://" in login_qrcode_img:
- async with httpx.AsyncClient(follow_redirects=True) as client:
- utils.logger.info(f"[find_login_qrcode] get qrcode by url:{login_qrcode_img}")
- resp = await client.get(login_qrcode_img, headers={"User-Agent": get_user_agent()})
- if resp.status_code == 200:
- image_data = resp.content
- base64_image = base64.b64encode(image_data).decode('utf-8')
- return base64_image
- raise Exception(f"fetch login image url failed, response message:{resp.text}")
- return login_qrcode_img
- except Exception as e:
- print(e)
- return ""
- def show_qrcode(qr_code) -> None: # type: ignore
- """parse base64 encode qrcode image and show it"""
- if "," in qr_code:
- qr_code = qr_code.split(",")[1]
- qr_code = base64.b64decode(qr_code)
- image = Image.open(BytesIO(qr_code))
- # Add a square border around the QR code and display it within the border to improve scanning accuracy.
- width, height = image.size
- new_image = Image.new('RGB', (width + 20, height + 20), color=(255, 255, 255))
- new_image.paste(image, (10, 10))
- draw = ImageDraw.Draw(new_image)
- draw.rectangle((0, 0, width + 19, height + 19), outline=(0, 0, 0), width=1)
- new_image.show()
- def get_user_agent() -> str:
- ua_list = [
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.5112.79 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.5060.53 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.4844.84 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5112.79 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.5060.53 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.4844.84 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5112.79 Safari/537.36"
- ]
- return random.choice(ua_list)
- def get_mobile_user_agent() -> str:
- ua_list = [
- "Mozilla/5.0 (iPhone; CPU iPhone OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Mobile/15E148 Safari/604.1",
- "Mozilla/5.0 (iPad; CPU OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Mobile/15E148 Safari/604.1",
- "Mozilla/5.0 (iPhone; CPU iPhone OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/114.0.5735.99 Mobile/15E148 Safari/604.1",
- "Mozilla/5.0 (iPad; CPU OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/114.0.5735.124 Mobile/15E148 Safari/604.1",
- "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36",
- "Mozilla/5.0 (Linux; Android 13; SAMSUNG SM-S918B) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/21.0 Chrome/110.0.5481.154 Mobile Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0",
- "Mozilla/5.0 (Linux; Android 10; JNY-LX1; HMSCore 6.11.0.302) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.88 HuaweiBrowser/13.0.5.303 Mobile Safari/537.36"
- ]
- return random.choice(ua_list)
- def convert_cookies(cookies: Optional[List[Cookie]]) -> Tuple[str, Dict]:
- if not cookies:
- return "", {}
- cookies_str = ";".join([f"{cookie.get('name')}={cookie.get('value')}" for cookie in cookies])
- cookie_dict = dict()
- for cookie in cookies:
- cookie_dict[cookie.get('name')] = cookie.get('value')
- return cookies_str, cookie_dict
- def convert_str_cookie_to_dict(cookie_str: str) -> Dict:
- cookie_dict: Dict[str, str] = dict()
- if not cookie_str:
- return cookie_dict
- for cookie in cookie_str.split(";"):
- cookie = cookie.strip()
- if not cookie:
- continue
- cookie_list = cookie.split("=")
- if len(cookie_list) != 2:
- continue
- cookie_value = cookie_list[1]
- if isinstance(cookie_value, list):
- cookie_value = "".join(cookie_value)
- cookie_dict[cookie_list[0]] = cookie_value
- return cookie_dict
- def match_interact_info_count(count_str: str) -> int:
- if not count_str:
- return 0
- match = re.search(r'\d+', count_str)
- if match:
- number = match.group()
- return int(number)
- else:
- return 0
|