1 gadu atpakaļ · cc6b93534f
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,73 @@
 
				+# ---> Python
			
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+env/
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*,cover
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+*.log
			
 
				+
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+target/
			
 
				+Lib/
			
 
				+Scripts/
			
 
				+logs/
			
 
				+**/__pycache__/
			
 
				+
			
 
				+### IntelliJ IDEA ###
			
 
				+.idea/
			
 
				+*.iws
			
 
				+*.iml
			
 
				+*.ipr
			
 
				+
			
 
				+# 浏览器上下文数据
			
 
				+.data
			
 
				+
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
 
				+# py-huitun-robot
			
 
				+
			
 
				+灰豚数据rpa
			
--- a/api/__init__.py
+++ b/api/__init__.py
@@ -0,0 +1,89 @@
 
				+"""
			
 
				+通用api
			
 
				+"""
			
 
				+import os
			
 
				+import json
			
 
				+import logging
			
 
				+import time
			
 
				+
			
 
				+SUCCESS_RESPONSE = json.dumps({
			
 
				+    "code": 1,
			
 
				+    "msg": "请求成功",
			
 
				+    "success": True,
			
 
				+}, ensure_ascii=False)
			
 
				+
			
 
				+phones = set()
			
 
				+directory = "./.data/huitun"
			
 
				+if not os.path.exists(directory):
			
 
				+    os.makedirs(directory)
			
 
				+for entry in os.listdir(directory):
			
 
				+    # 构建完整的路径
			
 
				+    full_path = os.path.join(directory, entry)
			
 
				+    # 检查是否是文件夹
			
 
				+    if os.path.isdir(full_path):
			
 
				+        # 如果是文件夹，将文件夹名称添加到集合中
			
 
				+        phones.add(entry)
			
 
				+
			
 
				+print("已存在的账号：", phones)
			
 
				+
			
 
				+
			
 
				+def contain_browser(phone):
			
 
				+    return phone in phones
			
 
				+
			
 
				+
			
 
				+def get_idle_phone():
			
 
				+    from huitun import lock_manager
			
 
				+    while True:
			
 
				+        for phone in phones:
			
 
				+            if not lock_manager.is_locked(phone):
			
 
				+                return phone
			
 
				+        time.sleep(1)
			
 
				+
			
 
				+
			
 
				+def add_phone(phone):
			
 
				+    phones.add(phone)
			
 
				+
			
 
				+
			
 
				+class BusinessException(Exception):
			
 
				+    """
			
 
				+    自定义业务异常
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, msg):
			
 
				+        super().__init__(self)
			
 
				+        self.msg = msg
			
 
				+
			
 
				+
			
 
				+def raiseError(msg):
			
 
				+    """ """
			
 
				+    raise BusinessException(msg)
			
 
				+
			
 
				+
			
 
				+def fail_response(msg: str):
			
 
				+    """
			
 
				+    请求失败
			
 
				+    """
			
 
				+    return json.dumps({
			
 
				+        "code": 0,
			
 
				+        "msg": msg,
			
 
				+        "success": False,
			
 
				+    }, ensure_ascii=False)
			
 
				+
			
 
				+
			
 
				+def assert_not_none(data, msg):
			
 
				+    """
			
 
				+    断言方法
			
 
				+    """
			
 
				+    if data is None:
			
 
				+        raise BusinessException(msg)
			
 
				+
			
 
				+
			
 
				+def success(data=None):
			
 
				+    if data is None:
			
 
				+        return SUCCESS_RESPONSE
			
 
				+    return json.dumps({
			
 
				+        "code": 1,
			
 
				+        "msg": "请求成功",
			
 
				+        "data": data,
			
 
				+        "success": True,
			
 
				+    }, ensure_ascii=False)
			
--- a/api/login.py
+++ b/api/login.py
@@ -0,0 +1,23 @@
 
				+"""
			
 
				+登录接口
			
 
				+"""
			
 
				+
			
 
				+from flask import Blueprint
			
 
				+from flask import request
			
 
				+
			
 
				+import api
			
 
				+import huitun
			
 
				+
			
 
				+login_opt = Blueprint('login', __name__)
			
 
				+
			
 
				+@login_opt.route('/login', methods=["POST"])
			
 
				+def login():
			
 
				+    """
			
 
				+    登录接口
			
 
				+    :return: 1-登录成功 2-需要验证码
			
 
				+    """
			
 
				+    request_body = request.json
			
 
				+    phone = request_body.get('phone')
			
 
				+    browser = huitun.HuiTunBrowser(phone)
			
 
				+    login_result = browser.login(request_body.get('password'))
			
 
				+    return api.success(login_result)
			
--- a/api/search.py
+++ b/api/search.py
@@ -0,0 +1,21 @@
 
				+"""
			
 
				+搜索API
			
 
				+"""
			
 
				+from flask import Blueprint
			
 
				+from flask import request
			
 
				+
			
 
				+import api
			
 
				+from huitun import HuiTunBrowser
			
 
				+
			
 
				+search_opt = Blueprint('search', __name__)
			
 
				+
			
 
				+@search_opt.route('/note', methods=["POST"])
			
 
				+def search_note():
			
 
				+    """
			
 
				+    根据关键字搜索抖音笔记
			
 
				+    :return:
			
 
				+    """
			
 
				+    request_body = request.json
			
 
				+    browser = HuiTunBrowser(api.get_idle_phone())
			
 
				+    result = browser.search_note(request_body.get('tagName'), request_body.get('searchLimit'))
			
 
				+    return api.success(result)
			
--- a/app.py
+++ b/app.py
@@ -0,0 +1,72 @@
 
				+import logging
			
 
				+import os
			
 
				+import traceback
			
 
				+
			
 
				+from flask import Flask, request
			
 
				+
			
 
				+from api import *
			
 
				+from api import login, search
			
 
				+
			
 
				+logs_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logs')
			
 
				+os.makedirs(logs_folder, exist_ok=True)
			
 
				+current_folder = os.path.dirname(os.path.abspath(__file__))
			
 
				+file_handler = logging.FileHandler(filename=f"{current_folder}/logs/app.log", encoding="utf-8")
			
 
				+
			
 
				+# 配置日志格式
			
 
				+formatter = logging.Formatter("%(asctime)s %(levelname)s[%(funcName)s:%(lineno)s]:%(message)s", "%Y-%m-%d %H:%M:%S")
			
 
				+file_handler.setFormatter(formatter)
			
 
				+console_handler = logging.StreamHandler()
			
 
				+
			
 
				+# 配置日志记录器
			
 
				+logger = logging.getLogger()
			
 
				+logger.setLevel(logging.INFO)
			
 
				+logger.addHandler(file_handler)
			
 
				+# 开发的时候打开这行注释，日志会打印在控制台上面
			
 
				+logger.addHandler(console_handler)
			
 
				+
			
 
				+app = Flask(__name__)
			
 
				+
			
 
				+app.register_blueprint(login.login_opt, url_prefix="/login")
			
 
				+app.register_blueprint(search.search_opt, url_prefix="/search")
			
 
				+
			
 
				+
			
 
				+@app.errorhandler(Exception)
			
 
				+def handle_exception(error: Exception):
			
 
				+    """
			
 
				+    全局异常处理
			
 
				+    """
			
 
				+    status_code = 500
			
 
				+    if isinstance(error, BusinessException):
			
 
				+        status_code = 200
			
 
				+        response = fail_response(error.msg)
			
 
				+    else:
			
 
				+        logging.error(error)
			
 
				+        traceback.print_exc()
			
 
				+        response = fail_response(str(error))
			
 
				+    return response, status_code
			
 
				+
			
 
				+
			
 
				+@app.before_request
			
 
				+def log_request():
			
 
				+    """
			
 
				+    打印请求
			
 
				+    """
			
 
				+    logging.info('Request: %s %s', request.method, request.url)
			
 
				+    logging.info('Request Body: %s', request.get_data(as_text=True))
			
 
				+
			
 
				+
			
 
				+@app.after_request
			
 
				+def log_response(response):
			
 
				+    """
			
 
				+    打印返回
			
 
				+    """
			
 
				+    data = response.get_data(as_text=True)
			
 
				+    if len(data) > 1000:
			
 
				+        logging.info('Response Body: %s', data[:1000] + "...")
			
 
				+    else:
			
 
				+        logging.info('Response Body: %s', data)
			
 
				+    return response
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    app.run(host="0.0.0.0", port=8978, threaded=True)
			
--- a/huitun/__init__.py
+++ b/huitun/__init__.py
@@ -0,0 +1,143 @@
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+import logging
			
 
				+import threading
			
 
				+
			
 
				+from playwright.sync_api import sync_playwright, Page, Playwright
			
 
				+
			
 
				+import api
			
 
				+
			
 
				+HUITUN_URL = 'https://xhs.huitun.com/'
			
 
				+
			
 
				+
			
 
				+def is_element_present(page, selector):
			
 
				+    try:
			
 
				+        page.wait_for_selector(selector, timeout=2000)
			
 
				+        return True
			
 
				+    except Exception:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+class LockManager():
			
 
				+    """
			
 
				+    全局锁管理，每个手机号只能打开一个上下文相同的浏览器
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.locks = {}
			
 
				+
			
 
				+    def acquire_lock(self, key):
			
 
				+        if key not in self.locks:
			
 
				+            self.locks[key] = threading.Lock()
			
 
				+        acquire = self.locks[key].acquire(timeout=300)
			
 
				+        if acquire:
			
 
				+            logging.info(f"{key} 获取锁成功")
			
 
				+
			
 
				+    def release_lock(self, key):
			
 
				+        if key in self.locks:
			
 
				+            self.locks[key].release()
			
 
				+            logging.info(f"{key} 释放锁成功")
			
 
				+
			
 
				+    def is_locked(self, key):
			
 
				+        """
			
 
				+        检查给定的键是否处于锁定状态
			
 
				+        """
			
 
				+        if key in self.locks:
			
 
				+            return self.locks[key].locked()
			
 
				+        else:
			
 
				+            return False
			
 
				+
			
 
				+
			
 
				+lock_manager = LockManager()
			
 
				+
			
 
				+
			
 
				+class HuiTunBrowser:
			
 
				+    def __init__(self, phone: str):
			
 
				+        api.assert_not_none(phone, "手机号不能为空")
			
 
				+        self.phone = phone
			
 
				+        self.browser = None
			
 
				+        self.page = None
			
 
				+        self.result = None
			
 
				+        self.list_result = []
			
 
				+        self.has_more = False
			
 
				+
			
 
				+    def __init_browser__(self, playwright: Playwright):
			
 
				+        self.browser = playwright.chromium.launch_persistent_context(
			
 
				+            user_data_dir=f'./.data/huitun/{self.phone}',
			
 
				+            headless=False,
			
 
				+            slow_mo=1000,
			
 
				+            channel="chrome",
			
 
				+            ignore_https_errors=True,
			
 
				+            args=[
			
 
				+                '--disable-blink-features=AutomationControlled',
			
 
				+                '--incognito',
			
 
				+                '--ignore-certificate-errors-spki-list',
			
 
				+                '--disable-web-security',  # 禁用 Web 安全性，类似于 ChromeOptions 中的 --ignore-certificate-errors-spki-list
			
 
				+                '--no-sandbox',  # 禁用沙盒模式
			
 
				+                '--disable-dev-shm-usage',  # 禁用/dev/shm使用
			
 
				+                '--disable-features=site-per-process',  # 禁用每个站点的进程，类似于 ChromeOptions 中的 --no-sandbox
			
 
				+                '--ignore-certificate-errors',  # 忽略证书错误
			
 
				+                '--disable-features=AutomationControlled'  # 禁用与自动化相关的特性
			
 
				+            ])
			
 
				+        self.browser.add_init_script(path="./stealth.min.js")
			
 
				+        self.page = self.browser.new_page()
			
 
				+
			
 
				+    def close(self):
			
 
				+        if self.browser is not None:
			
 
				+            self.browser.close()
			
 
				+        if self.page is not None:
			
 
				+            self.page.close()
			
 
				+
			
 
				+    def login(self, password: str):
			
 
				+        """
			
 
				+        登录抖音，一个登录之后，全部的页面都有了登录状态
			
 
				+        :return: 2- 需要验证码 1-登录成功
			
 
				+        """
			
 
				+        with sync_playwright() as playwright:
			
 
				+            self.__init_browser__(playwright)
			
 
				+            self.page.goto(HUITUN_URL)
			
 
				+            if is_element_present(self.page, '.ant-modal-body'):
			
 
				+                if not is_element_present(self.page, 'text=密码登录'):
			
 
				+                    pwd_login = self.page.query_selector('.b9dOaTo9gfF3wLAi7jlXTg\=\=')
			
 
				+                    if pwd_login is not None:
			
 
				+                        pwd_login.click()
			
 
				+                self.page.get_by_placeholder('请输入手机号').type(self.phone)
			
 
				+                self.page.get_by_placeholder('6-15位数字与字母组合').type(password)
			
 
				+                self.page.get_by_text('登 录', exact=True).click()
			
 
				+                self.page.wait_for_timeout(30_000)
			
 
				+
			
 
				+    def search_note(self, tag_name: str, size: int):
			
 
				+        lock_manager.acquire_lock(self.phone)
			
 
				+        try:
			
 
				+            with sync_playwright() as playwright:
			
 
				+                self.__init_browser__(playwright)
			
 
				+                self.list_result = []
			
 
				+                api.assert_not_none(tag_name, "标签不能为空")
			
 
				+                self.page.on('response', self.search_note_handler)
			
 
				+                self.page.goto('https://xhs.huitun.com/#/note/note_search')
			
 
				+                self.page.wait_for_timeout(3000)
			
 
				+                while size is None or len(self.list_result) < size:
			
 
				+                    logging.info('继续搜索用户主页')
			
 
				+                    self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
			
 
				+                    self.page.wait_for_timeout(2000)
			
 
				+                    logging.info('搜索用户主页图文结果数：%s', len(self.list_result))
			
 
				+                self.close()
			
 
				+                return self.list_result
			
 
				+        finally:
			
 
				+            lock_manager.release_lock(self.phone)
			
 
				+
			
 
				+    def search_note_handler(self, response):
			
 
				+        """
			
 
				+        处理用户主页搜索图文请求响应
			
 
				+        :param response:
			
 
				+        :return:
			
 
				+        """
			
 
				+        if response is not None and '/note/search' in response.url:
			
 
				+            response_body = response.json()
			
 
				+            if response_body.get('status') == 0:
			
 
				+                note_list = response_body.get('extData').get('list')
			
 
				+                if len(self.list_result) == 0:
			
 
				+                    self.list_result = note_list
			
 
				+                else:
			
 
				+                    self.list_result.extend(note_list)
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1 @@
 
				+playwright==1.43.0
			
--- a/stealth.min.js
+++ b/stealth.min.js