168 lines
4.3 KiB
Python
168 lines
4.3 KiB
Python
|
|
# -*- coding: utf-8 -*-
|
||
|
|
"""
|
||
|
|
完整抓取微信通讯录 - 从头开始滚动到底
|
||
|
|
"""
|
||
|
|
import uiautomation as auto
|
||
|
|
import time
|
||
|
|
import os
|
||
|
|
from PIL import Image
|
||
|
|
|
||
|
|
|
||
|
|
def find_wechat_window():
|
||
|
|
"""查找微信窗口"""
|
||
|
|
wechat_window = auto.WindowControl(searchDepth=1, Name='微信')
|
||
|
|
if wechat_window.Exists(3, 1):
|
||
|
|
return wechat_window
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
def capture_contact_region(wechat_window, index, save_dir):
|
||
|
|
"""截取通讯录区域"""
|
||
|
|
rect = wechat_window.BoundingRectangle
|
||
|
|
|
||
|
|
x_offset = 70
|
||
|
|
y_offset = 130
|
||
|
|
width = 280
|
||
|
|
height = rect.height() - 160
|
||
|
|
|
||
|
|
screenshot_path = os.path.join(save_dir, f"region_{index:03d}.png")
|
||
|
|
|
||
|
|
try:
|
||
|
|
bitmap = wechat_window.ToBitmap(x=x_offset, y=y_offset, width=width, height=height)
|
||
|
|
bitmap.ToFile(screenshot_path)
|
||
|
|
return screenshot_path
|
||
|
|
except Exception as e:
|
||
|
|
print(f"截图失败: {e}")
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
def get_image_hash(image_path):
|
||
|
|
"""计算图片哈希"""
|
||
|
|
try:
|
||
|
|
img = Image.open(image_path)
|
||
|
|
img = img.resize((16, 16), Image.Resampling.LANCZOS)
|
||
|
|
img = img.convert('L')
|
||
|
|
pixels = list(img.get_flattened_data())
|
||
|
|
avg = sum(pixels) / len(pixels)
|
||
|
|
return ''.join(['1' if p > avg else '0' for p in pixels])
|
||
|
|
except:
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
def images_similarity(hash1, hash2):
|
||
|
|
"""计算相似度"""
|
||
|
|
if not hash1 or not hash2:
|
||
|
|
return 0
|
||
|
|
diff = sum(c1 != c2 for c1, c2 in zip(hash1, hash2))
|
||
|
|
return 1 - diff / len(hash1)
|
||
|
|
|
||
|
|
|
||
|
|
def scroll_to_top(wechat_window):
|
||
|
|
"""滚动到顶部"""
|
||
|
|
print("滚动到顶部...")
|
||
|
|
rect = wechat_window.BoundingRectangle
|
||
|
|
center_x = rect.left + 200
|
||
|
|
center_y = rect.top + 400
|
||
|
|
|
||
|
|
# 点击获取焦点
|
||
|
|
auto.SetCursorPos(center_x, center_y)
|
||
|
|
auto.Click(center_x, center_y)
|
||
|
|
time.sleep(0.3)
|
||
|
|
|
||
|
|
# 多次向上滚动确保到顶部
|
||
|
|
for _ in range(20):
|
||
|
|
auto.SendKeys('{Home}')
|
||
|
|
time.sleep(0.2)
|
||
|
|
|
||
|
|
# 额外滚动几次PageUp
|
||
|
|
for _ in range(5):
|
||
|
|
auto.SendKeys('{PageUp}')
|
||
|
|
time.sleep(0.3)
|
||
|
|
|
||
|
|
print("已滚动到顶部")
|
||
|
|
time.sleep(1)
|
||
|
|
|
||
|
|
|
||
|
|
def scroll_down(wechat_window):
|
||
|
|
"""向下滚动一页"""
|
||
|
|
try:
|
||
|
|
rect = wechat_window.BoundingRectangle
|
||
|
|
center_x = rect.left + 200
|
||
|
|
center_y = rect.top + 400
|
||
|
|
|
||
|
|
auto.SetCursorPos(center_x, center_y)
|
||
|
|
auto.Click(center_x, center_y)
|
||
|
|
time.sleep(0.2)
|
||
|
|
auto.WheelDown(wheelTimes=3)
|
||
|
|
time.sleep(0.4)
|
||
|
|
return True
|
||
|
|
except:
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
print("=" * 60)
|
||
|
|
print("完整抓取微信通讯录")
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
# 创建新的截图目录
|
||
|
|
save_dir = r"D:\夏骥\微信研究\scroll_full"
|
||
|
|
os.makedirs(save_dir, exist_ok=True)
|
||
|
|
|
||
|
|
# 清理旧截图
|
||
|
|
for f in os.listdir(save_dir):
|
||
|
|
if f.endswith('.png'):
|
||
|
|
os.remove(os.path.join(save_dir, f))
|
||
|
|
|
||
|
|
# 查找微信窗口
|
||
|
|
print("\n查找微信窗口...")
|
||
|
|
wechat_window = find_wechat_window()
|
||
|
|
if not wechat_window:
|
||
|
|
print("未找到微信窗口!")
|
||
|
|
return
|
||
|
|
|
||
|
|
print(f"找到微信窗口: {wechat_window.Name}")
|
||
|
|
|
||
|
|
# 滚动到顶部
|
||
|
|
print("\n请确保微信通讯录界面已打开...")
|
||
|
|
time.sleep(2)
|
||
|
|
scroll_to_top(wechat_window)
|
||
|
|
|
||
|
|
# 开始截图
|
||
|
|
print("\n开始滚动截图...")
|
||
|
|
screenshots = []
|
||
|
|
last_hash = None
|
||
|
|
no_change = 0
|
||
|
|
max_screenshots = 150
|
||
|
|
|
||
|
|
for i in range(max_screenshots):
|
||
|
|
path = capture_contact_region(wechat_window, i, save_dir)
|
||
|
|
if path:
|
||
|
|
screenshots.append(path)
|
||
|
|
print(f" 截图 {i+1}")
|
||
|
|
|
||
|
|
# 检测是否到底
|
||
|
|
current_hash = get_image_hash(path)
|
||
|
|
if last_hash:
|
||
|
|
sim = images_similarity(last_hash, current_hash)
|
||
|
|
if sim > 0.95:
|
||
|
|
no_change += 1
|
||
|
|
if no_change >= 2:
|
||
|
|
print(f"\n检测到到底,共截图 {len(screenshots)} 张")
|
||
|
|
break
|
||
|
|
else:
|
||
|
|
no_change = 0
|
||
|
|
last_hash = current_hash
|
||
|
|
|
||
|
|
scroll_down(wechat_window)
|
||
|
|
|
||
|
|
print(f"\n完成!共截图 {len(screenshots)} 张")
|
||
|
|
print(f"截图保存目录: {save_dir}")
|
||
|
|
|
||
|
|
# 提示运行OCR
|
||
|
|
print("\n接下来请运行 batch_ocr_full.py 进行OCR识别")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
main()
|