captcha_ident.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. import random
  2. import re
  3. import requests
  4. import cv2 as cv
  5. import numpy as np
  6. from PIL import Image
  7. from playwright.sync_api import sync_playwright, Page
  8. from util.playwright_util import is_element_present
  9. class CaptchaIdent:
  10. """
  11. 处理灰豚登录的滑块验证码
  12. """
  13. def __init__(self, page):
  14. self.page = page
  15. self.frame = self.page.frames[1]
  16. def start(self):
  17. for i in range(5):
  18. self.page.wait_for_timeout(1000)
  19. self.get_slide_bg_img()
  20. start_x = self.get_slide_block_img_and_start_x()
  21. distance1, distance2 = self.get_slide_distance(start_x)
  22. slide_result = self.move_to_notch(distance1, distance2)
  23. if not slide_result:
  24. self.refresh_captcha()
  25. else:
  26. return
  27. raise Exception("滑块验证失败")
  28. def get_slide_bg_img(self):
  29. """截取滑动验证码背景图片"""
  30. if self.frame is not None:
  31. bg_ele = self.frame.query_selector('.tc-bg-img')
  32. bg_style = bg_ele.evaluate(
  33. "element => window.getComputedStyle(element).getPropertyValue('background-image')")
  34. bg_img = re.search(r'url\("([^"]+)"\)', bg_style).group(1)
  35. r = requests.get(bg_img)
  36. with open("./slide_bg.png", "wb") as f:
  37. f.write(r.content)
  38. def get_slide_block_img_and_start_x(self):
  39. """获取滑块图片以及初始x坐标"""
  40. print("正在获取滑块图片")
  41. # 首先保存整个登录背景截图
  42. self.page.wait_for_timeout(2000)
  43. slideBg = self.frame.query_selector('#slideBg')
  44. slideBg.screenshot(path="slide_bg.png")
  45. # 获取滑动验证码所在的iframe
  46. captcha_frame = self.frame
  47. # 获取滑块图片
  48. # .tc-fg-item对应的有三个元素,一个是目标滑块,一个是滑轨,还有一个是滑轨上的按钮
  49. for i in range(3):
  50. slide_block_ele = captcha_frame.locator(".tc-fg-item").nth(i)
  51. slide_block_style = slide_block_ele.get_attribute("style")
  52. # 滑轨按钮元素的style值中不包含url字符串
  53. if "url" not in slide_block_style:
  54. continue
  55. # 从元素的style值中分析得出只有目标滑块的top值小于150
  56. top_value = re.search(r'top: (.+)px;', slide_block_style).groups()[0]
  57. if float(top_value) > 150:
  58. continue
  59. # 获取x坐标
  60. slide_block_x = float(re.search(r'left: (.+)px; top: ', slide_block_style).groups()[0])
  61. slide_block_y = float(top_value)
  62. # 通过滑块位置,从背景图中截取滑块图片 # cropped_image = image.crop((left, top, right, bottom))
  63. slide_block_rect = slide_block_ele.bounding_box()
  64. bg = Image.open("slide_bg.png")
  65. # offset = slide_block_rect["width"] // 5 # 从背景图上截取会混入滑块周围的一些像素点,所以加一个偏移值,截取到滑块内部的图片。
  66. slide_block_img = bg.crop((slide_block_x + 4, slide_block_y + 4,
  67. slide_block_x + slide_block_rect["width"] - 4,
  68. slide_block_y + slide_block_rect["height"] - 4))
  69. slide_block_img.save("slide_block.png")
  70. return slide_block_x + 4
  71. def get_slide_distance(self, start_x):
  72. """获取滑动距离"""
  73. print("正在获取滑动距离")
  74. # 通过opencv比较图片,获取缺口位置
  75. slide_bg_img = cv.imread("./slide_bg.png")
  76. slide_bg_img = self.set_contrast_brightness(slide_bg_img, 0.4, 0)
  77. slide_block_img = cv.imread("./slide_block.png")
  78. slide_block_img = self.set_contrast_brightness(slide_block_img, 0.4, 0)
  79. cv.imwrite("./slide_block_handled.png", slide_block_img)
  80. result = cv.matchTemplate(slide_block_img, slide_bg_img, cv.TM_CCOEFF_NORMED)
  81. minVal, maxVal, minLoc, maxLoc = cv.minMaxLoc(result)
  82. # 缺口的x坐标
  83. notch_x1 = minLoc[0]
  84. notch_x2 = maxLoc[0]
  85. # 距离
  86. distance1 = notch_x1 - start_x
  87. distance2 = notch_x2 - start_x
  88. return distance1, distance2
  89. @staticmethod
  90. def set_contrast_brightness(frame, contrast_value, brightness_value):
  91. if not contrast_value:
  92. contrast_value = 0.0
  93. if not brightness_value:
  94. brightness_value = 0
  95. blank = np.zeros(frame.shape, frame.dtype)
  96. frame = cv.addWeighted(frame, contrast_value, blank, 1 - contrast_value, brightness_value)
  97. return frame
  98. @staticmethod
  99. def get_tracks(distance):
  100. """获取移动轨迹"""
  101. tracks = [] # 移动轨迹
  102. current = 0 # 当前位移
  103. mid = distance * 3/4 # 减速阈值
  104. t = 0.5 # 计算间隔
  105. v = 1 # 初始速度
  106. while current < distance:
  107. if current < mid:
  108. a = random.randint(5, 10) # 加速度为正5
  109. else:
  110. a = random.randint(-5, -3) # 加速度为负3
  111. v0 = v # 初速度 v0
  112. v = v0 + a * t # 当前速度
  113. move = v0 * t + 1 / 2 * a * t * t # 移动距离
  114. current += move
  115. tracks.append(round(current))
  116. return tracks
  117. def move_to_notch(self, distance1, distance2):
  118. """移动滑轨按钮到缺口处"""
  119. # 获取滑动验证码所在的iframe
  120. captcha_iframe = self.frame
  121. for i in range(2):
  122. # 获取按钮位置,将鼠标移到上方并按下
  123. slider_btn_rect = captcha_iframe.get_by_alt_text("slider").bounding_box()
  124. self.page.mouse.move(slider_btn_rect['x'], slider_btn_rect['y'])
  125. self.page.mouse.down()
  126. distance = [distance1, distance2][i]
  127. if distance <= 0: # 距离不可能小于等于0
  128. continue
  129. print(f"正在进行第{i + 1}次滑动,滑动距离{distance}")
  130. tracks = self.get_tracks(distance)
  131. for x in tracks:
  132. self.page.mouse.move(slider_btn_rect['x'] + x, random.randint(-5, 5) + slider_btn_rect['y'])
  133. self.page.mouse.move(slider_btn_rect['x'] + tracks[-1] + 5, random.randint(-5, 5) + slider_btn_rect['y'])
  134. self.page.mouse.move(slider_btn_rect['x'] + tracks[-1] - 5, random.randint(-5, 5) + slider_btn_rect['y'])
  135. self.page.mouse.up()
  136. # 滑动结束后等待一段时间
  137. self.page.wait_for_timeout(2000)
  138. # 寻找按钮是否还存在,不存在的话表明已通过滑动验证码,存在的话尝试下一个距离
  139. if not is_element_present(self.page, '.ant-modal-body'):
  140. print("滑动验证通过")
  141. return True
  142. return False
  143. def refresh_captcha(self):
  144. """刷新验证码"""
  145. # 获取滑动验证码所在的iframe
  146. print("刷新验证码")
  147. self.frame = self.page.frames[1]
  148. self.page.wait_for_timeout(2000)