commit 21c03e5bd02a2d01938eae399861235b3d1a2a95 Author: xiaji Date: Thu Feb 26 16:55:40 2026 +0800 Initial commit: 微信联系人祝福管理系统 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8457d9f --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# 核心数据文件 - 不上传 +contacts.db +contacts_data.json +ocr_progress.json +ocr_result.txt + +# 截图和图片数据 - 不上传 +*.png +scroll/ +scroll_complete/ +scroll_full/ +Snipaste/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +.venv/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# 系统文件 +.DS_Store +Thumbs.db + +# 日志 +*.log + +# 临时文件 +*.tmp +*.temp diff --git a/README.md b/README.md new file mode 100644 index 0000000..f248903 --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +# 微信联系人祝福管理 + +一个用于管理微信联系人并发送节日祝福的 Web 应用。 + +## 功能特性 + +- 联系人管理:添加、编辑、删除联系人 +- 分类标签:支持同事、好友、同学、老师、亲戚、客户、供应商等分类 +- 自定义内容:为每个联系人设置自定义备注 +- 祝福语管理:编辑和保存个性化祝福语 +- 批量操作:批量选择、批量删除 +- 搜索筛选:按姓名或分类快速查找 + +## 技术栈 + +- **后端**: Python Flask +- **前端**: HTML + Bootstrap 5 + JavaScript +- **数据库**: SQLite + +## 快速开始 + +### 安装依赖 + +```bash +pip install flask +``` + +### 启动服务 + +```bash +python app.py +``` + +### 访问页面 + +打开浏览器访问: http://localhost:5000/static/contacts_manager.html + +## 项目结构 + +``` +├── app.py # Flask 后端 API +├── init_db.py # 数据库初始化脚本 +├── static/ +│ └── contacts_manager.html # 前端页面 +└── contacts.db # SQLite 数据库(不包含在仓库中) +``` + +## API 接口 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | /api/contacts | 获取联系人列表 | +| POST | /api/contacts | 创建联系人 | +| PUT | /api/contacts/:id | 更新联系人 | +| DELETE | /api/contacts/:id | 删除联系人 | +| GET | /api/stats | 获取统计数据 | + +## 许可证 + +MIT License diff --git a/add_column.py b/add_column.py new file mode 100644 index 0000000..a88b8a0 --- /dev/null +++ b/add_column.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +"""添加 custom_content 和 search_name 字段""" +import sqlite3 + +DB_PATH = r"D:\夏骥\微信研究\contacts.db" + +conn = sqlite3.connect(DB_PATH) +cursor = conn.cursor() + +# 检查字段是否已存在 +cursor.execute("PRAGMA table_info(contacts)") +columns = [col[1] for col in cursor.fetchall()] + +if 'custom_content' not in columns: + cursor.execute('ALTER TABLE contacts ADD COLUMN custom_content TEXT DEFAULT ""') + print("已添加 custom_content 字段") +else: + print("custom_content 字段已存在") + +if 'search_name' not in columns: + cursor.execute('ALTER TABLE contacts ADD COLUMN search_name TEXT DEFAULT ""') + print("已添加 search_name 字段") +else: + print("search_name 字段已存在") + +conn.commit() +conn.close() +print("完成") diff --git a/app.py b/app.py new file mode 100644 index 0000000..6258eaf --- /dev/null +++ b/app.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +""" +Flask后端API - 联系人管理 +""" +from flask import Flask, jsonify, request, send_from_directory +import sqlite3 +import os + +app = Flask(__name__, static_folder='static', static_url_path='/static') + +# 数据库文件 +DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'contacts.db') + + +# 前端页面路由 +@app.route('/') +def index(): + return send_from_directory('static', 'contacts_manager.html') + + +def get_db(): + """获取数据库连接""" + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + return conn + + +def row_to_dict(row): + """将sqlite Row转换为字典""" + return dict(row) if row else None + + +@app.route('/api/contacts', methods=['GET']) +def get_contacts(): + """获取所有联系人""" + conn = get_db() + cursor = conn.cursor() + + # 获取查询参数 + category = request.args.get('category') + search = request.args.get('search') + page = int(request.args.get('page', 1)) + page_size = int(request.args.get('page_size', 20)) + + # 先获取总数 + count_sql = "SELECT COUNT(*) FROM contacts WHERE 1=1" + count_params = [] + + if category: + # 支持多标签筛选:查找包含该标签的记录 + count_sql += " AND (category = ? OR category LIKE ? OR category LIKE ? OR category LIKE ?)" + count_params.extend([ + category, # 完全匹配: "同事" + f"{category},%", # 开头: "同事,好友" + f"%,{category}", # 结尾: "好友,同事" + f"%,{category},%" # 中间: "好友,同事,亲戚" + ]) + + if search: + count_sql += " AND (name LIKE ? OR search_name LIKE ?)" + count_params.extend([f"%{search}%", f"%{search}%"]) + + cursor.execute(count_sql, count_params) + total = cursor.fetchone()[0] + + # 获取分页数据 + sql = "SELECT * FROM contacts WHERE 1=1" + params = [] + + if category: + sql += " AND (category = ? OR category LIKE ? OR category LIKE ? OR category LIKE ?)" + params.extend([ + category, + f"{category},%", + f"%,{category}", + f"%,{category},%" + ]) + + if search: + sql += " AND (name LIKE ? OR search_name LIKE ?)" + params.extend([f"%{search}%", f"%{search}%"]) + + sql += " ORDER BY name LIMIT ? OFFSET ?" + params.extend([page_size, (page - 1) * page_size]) + + cursor.execute(sql, params) + rows = cursor.fetchall() + conn.close() + + contacts = [] + for row in rows: + c = row_to_dict(row) + c['selected'] = bool(c.get('selected', 0)) + contacts.append(c) + + return jsonify({ + 'contacts': contacts, + 'total': total, + 'page': page, + 'page_size': page_size, + 'total_pages': (total + page_size - 1) // page_size + }) + + +@app.route('/api/contacts/', methods=['GET']) +def get_contact(contact_id): + """获取单个联系人""" + conn = get_db() + cursor = conn.cursor() + cursor.execute("SELECT * FROM contacts WHERE id = ?", (contact_id,)) + row = cursor.fetchone() + conn.close() + + if row: + c = row_to_dict(row) + c['selected'] = bool(c.get('selected', 0)) + return jsonify(c) + return jsonify({'error': 'Not found'}), 404 + + +@app.route('/api/contacts', methods=['POST']) +def create_contact(): + """创建联系人""" + data = request.json + conn = get_db() + cursor = conn.cursor() + + # 如果没有指定 custom_content,默认使用 category 的值 + custom_content = data.get('custom_content', '') + if not custom_content: + custom_content = data.get('category', '') + + cursor.execute(''' + INSERT INTO contacts (name, search_name, category, custom_content, blessing, selected) + VALUES (?, ?, ?, ?, ?, ?) + ''', ( + data.get('name', ''), + data.get('search_name', data.get('name', '')), + data.get('category', ''), + custom_content, + data.get('blessing', '马年新春快乐!愿您在新的一年里,事业腾飞,马到成功!'), + 1 if data.get('selected') else 0 + )) + + contact_id = cursor.lastrowid + conn.commit() + conn.close() + + return jsonify({'id': contact_id, 'message': '创建成功'}) + + +@app.route('/api/contacts/', methods=['PUT']) +def update_contact(contact_id): + """更新联系人""" + data = request.json + conn = get_db() + cursor = conn.cursor() + + # 构建更新语句 + updates = [] + params = [] + + if 'name' in data: + updates.append("name = ?") + params.append(data['name']) + + if 'search_name' in data: + updates.append("search_name = ?") + params.append(data['search_name']) + + if 'category' in data: + updates.append("category = ?") + params.append(data['category']) + + if 'custom_content' in data: + updates.append("custom_content = ?") + params.append(data['custom_content']) + + if 'blessing' in data: + updates.append("blessing = ?") + params.append(data['blessing']) + + if 'selected' in data: + updates.append("selected = ?") + params.append(1 if data['selected'] else 0) + + if updates: + updates.append("updated_at = CURRENT_TIMESTAMP") + params.append(contact_id) + + sql = f"UPDATE contacts SET {', '.join(updates)} WHERE id = ?" + cursor.execute(sql, params) + conn.commit() + + conn.close() + return jsonify({'message': '更新成功'}) + + +@app.route('/api/contacts/', methods=['DELETE']) +def delete_contact(contact_id): + """删除联系人""" + conn = get_db() + cursor = conn.cursor() + cursor.execute("DELETE FROM contacts WHERE id = ?", (contact_id,)) + conn.commit() + conn.close() + return jsonify({'message': '删除成功'}) + + +@app.route('/api/stats', methods=['GET']) +def get_stats(): + """获取统计数据""" + conn = get_db() + cursor = conn.cursor() + + # 总数 + cursor.execute("SELECT COUNT(*) FROM contacts") + total = cursor.fetchone()[0] + + # 已选择数 + cursor.execute("SELECT COUNT(*) FROM contacts WHERE selected = 1") + selected = cursor.fetchone()[0] + + # 分类统计 + cursor.execute(""" + SELECT category, COUNT(*) as count + FROM contacts + GROUP BY category + ORDER BY count DESC + """) + categories = [{'category': row[0] or '未分类', 'count': row[1]} for row in cursor.fetchall()] + + conn.close() + + return jsonify({ + 'total': total, + 'selected': selected, + 'categories': categories + }) + + +if __name__ == '__main__': + print("=" * 50) + print("联系人管理API服务") + print("=" * 50) + print("启动服务: http://localhost:5000") + print("前端页面: http://localhost:5000/static/contacts_manager.html") + print("=" * 50) + app.run(debug=True, host='0.0.0.0', port=5000) diff --git a/batch_ocr.py b/batch_ocr.py new file mode 100644 index 0000000..021b2c2 --- /dev/null +++ b/batch_ocr.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +""" +对已截图的图片进行OCR识别,并生成前端可用的JSON数据 +""" +import os +import requests +import base64 +from PIL import Image +import glob +import json +import re + + +def ocr_image(image_path): + """OCR识别单张图片""" + with open(image_path, 'rb') as f: + image_base64 = base64.b64encode(f.read()).decode('utf-8') + + url = "http://localhost:11434/api/chat" + payload = { + "model": "glm-ocr", + "messages": [{ + "role": "user", + "content": """识别图片中的所有联系人名称。要求: +1. 只输出联系人名称,每行一个 +2. 忽略分组标题(如星号、字母A-Z等) +3. 忽略数字统计 +4. 不要添加任何其他内容""", + "images": [image_base64] + }], + "stream": False + } + + try: + response = requests.post(url, json=payload, timeout=60) + return response.json().get('message', {}).get('content', '') + except Exception as e: + print(f"OCR失败: {e}") + return "" + + +def is_valid_contact(line): + """判断是否是有效的联系人""" + line = line.strip() + if not line: + return False + + invalid = ["公众号", "服务号", "企业微信联系人", "我的企业", "联系人", + "星标朋友", "新的朋友", "群聊", "标签", "仅聊天", "设备"] + + if line in invalid: + return False + if len(line) == 1 and line.isalpha(): + return False + if line.startswith(">") or line.startswith("!"): + return False + # 过滤JSON格式的内容 + if line.startswith('"') or line.startswith('{') or line.startswith('[') or line.startswith('```'): + return False + if line.startswith('"') and ':' in line: + return False + + return True + + +def clean_contact_name(name): + """清理联系人名称""" + # 移除引号 + name = name.strip('"\'') + # 移除末尾的标点 + name = name.rstrip(',,。::') + return name.strip() + + +def main(): + print("=" * 60) + print("批量OCR识别截图") + print("=" * 60) + + # 获取所有截图 + scroll_dir = r"D:\夏骥\微信研究\scroll" + screenshots = sorted(glob.glob(os.path.join(scroll_dir, "*.png"))) + + print(f"找到 {len(screenshots)} 张截图") + + all_contacts = set() + + for i, path in enumerate(screenshots): + print(f"\n[{i+1}/{len(screenshots)}] {os.path.basename(path)}") + result = ocr_image(path) + + new_count = 0 + for line in result.strip().split('\n'): + line = line.strip() + if is_valid_contact(line): + cleaned = clean_contact_name(line) + if cleaned and cleaned not in all_contacts: + new_count += 1 + print(f" + {cleaned}") + all_contacts.add(cleaned) + + print(f" 本轮新增 {new_count},累计 {len(all_contacts)}") + + # 生成JSON数据供前端使用 + contacts_json = [] + for idx, name in enumerate(sorted(all_contacts, key=lambda x: (not x[0].isalpha() if x else True, x.lower() if x and x[0].isalpha() else x))): + if name: # 确保名称非空 + contacts_json.append({ + "id": idx + 1, + "name": name, + "category": "", + "blessing": "马年新春快乐!愿您在新的一年里,事业腾飞,马到成功!", + "selected": False + }) + + # 保存为JSON文件(供前端导入) + json_file = r"D:\夏骥\微信研究\contacts_data.json" + with open(json_file, 'w', encoding='utf-8') as f: + json.dump(contacts_json, f, ensure_ascii=False, indent=2) + + print(f"\nJSON数据已保存: {json_file}") + + # 保存纯文本结果 + print("\n" + "=" * 60) + print("保存结果...") + + result_file = r"D:\夏骥\微信研究\ocr_result.txt" + with open(result_file, 'w', encoding='utf-8') as f: + f.write(f"微信通讯录OCR识别结果\n") + f.write(f"共截图 {len(screenshots)} 张\n") + f.write(f"共识别 {len(all_contacts)} 个联系人\n") + f.write("=" * 60 + "\n\n") + for c in sorted(all_contacts, key=lambda x: (not x[0].isalpha() if x else True, x.lower() if x and x[0].isalpha() else x)): + if c: + f.write(f"{c}\n") + + print(f"结果已保存: {result_file}") + print(f"共识别到 {len(all_contacts)} 个不重复联系人") + + +if __name__ == '__main__': + main() diff --git a/batch_ocr_complete.py b/batch_ocr_complete.py new file mode 100644 index 0000000..7721b06 --- /dev/null +++ b/batch_ocr_complete.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +""" +对完整截图进行OCR识别,并与数据库对比去重 +""" +import os +import sqlite3 +import requests +import base64 +from PIL import Image +import glob +import json + + +def ocr_image(image_path): + """OCR识别单张图片""" + with open(image_path, 'rb') as f: + image_base64 = base64.b64encode(f.read()).decode('utf-8') + + url = "http://localhost:11434/api/chat" + payload = { + "model": "glm-ocr", + "messages": [{ + "role": "user", + "content": """识别图片中的所有联系人名称。要求: +1. 只输出联系人名称,每行一个 +2. 忽略分组标题(如星号、字母A-Z等) +3. 忽略数字统计 +4. 不要添加任何其他内容""", + "images": [image_base64] + }], + "stream": False + } + + try: + response = requests.post(url, json=payload, timeout=60) + return response.json().get('message', {}).get('content', '') + except Exception as e: + print(f"OCR失败: {e}") + return "" + + +def is_valid_contact(line): + """判断是否是有效的联系人""" + line = line.strip() + if not line or len(line) < 2: + return False + + invalid = ["公众号", "服务号", "企业微信联系人", "我的企业", "联系人", + "星标朋友", "新的朋友", "群聊", "标签", "仅聊天", "设备"] + + if line in invalid: + return False + if len(line) == 1 and line.isalpha(): + return False + if line.startswith(">") or line.startswith("!"): + return False + if line.startswith('"') or line.startswith('{') or line.startswith('[') or line.startswith('```'): + return False + if line.startswith('"') and ':' in line: + return False + + return True + + +def clean_contact_name(name): + """清理联系人名称""" + name = name.strip('"\'') + name = name.rstrip(',,。::') + return name.strip() + + +def get_existing_contacts(): + """从数据库获取已存在的联系人""" + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + cursor.execute('SELECT name FROM contacts') + existing = set(row[0] for row in cursor.fetchall()) + conn.close() + return existing + + +def add_new_contacts(new_contacts): + """将新联系人添加到数据库""" + if not new_contacts: + return 0 + + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + + # 获取当前最大ID + cursor.execute('SELECT MAX(id) FROM contacts') + max_id = cursor.fetchone()[0] or 0 + + added = 0 + for idx, name in enumerate(new_contacts, start=max_id + 1): + cursor.execute(''' + INSERT INTO contacts (id, name, category, blessing, selected) + VALUES (?, ?, ?, ?, ?) + ''', (idx, name, '', '马年新春快乐!愿您在新的一年里,事业腾飞,马到成功!', False)) + added += 1 + + conn.commit() + conn.close() + return added + + +def main(): + print("=" * 60) + print("批量OCR识别并去重入库") + print("=" * 60) + + # 获取截图目录 + scroll_dir = r"D:\夏骥\微信研究\scroll_complete" + + if not os.path.exists(scroll_dir): + print(f"目录不存在: {scroll_dir}") + return + + screenshots = sorted(glob.glob(os.path.join(scroll_dir, "*.png"))) + + if not screenshots: + print("未找到截图文件!") + return + + print(f"找到 {len(screenshots)} 张截图") + + # 获取已存在的联系人 + existing_contacts = get_existing_contacts() + print(f"数据库中已有 {len(existing_contacts)} 个联系人") + + all_new_contacts = set() + skipped_count = 0 + + for i, path in enumerate(screenshots): + print(f"\n[{i+1}/{len(screenshots)}] {os.path.basename(path)}") + result = ocr_image(path) + + new_in_this = 0 + for line in result.strip().split('\n'): + line = line.strip() + if is_valid_contact(line): + cleaned = clean_contact_name(line) + if cleaned and len(cleaned) >= 2: + if cleaned in existing_contacts: + skipped_count += 1 + print(f" - {cleaned} (已存在,跳过)") + elif cleaned not in all_new_contacts: + new_in_this += 1 + print(f" + {cleaned} (新)") + all_new_contacts.add(cleaned) + + print(f" 本轮新增 {new_in_this},累计新发现 {len(all_new_contacts)},跳过 {skipped_count}") + + print(f"\n{'='*60}") + print(f"OCR完成!") + print(f"发现新联系人: {len(all_new_contacts)} 个") + print(f"跳过已存在: {skipped_count} 个") + + # 入库 + if all_new_contacts: + added = add_new_contacts(sorted(all_new_contacts, key=lambda x: (not x[0].isalpha() if x else True, x.lower() if x and x[0].isalpha() else x))) + print(f"成功入库: {added} 个") + + # 更新JSON文件 + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + cursor.execute('SELECT * FROM contacts ORDER BY id') + all_contacts = [] + for row in cursor.fetchall(): + all_contacts.append({ + "id": row[0], + "name": row[1], + "category": row[2], + "blessing": row[3], + "selected": bool(row[4]) + }) + conn.close() + + json_file = r"D:\夏骥\微信研究\contacts_data.json" + with open(json_file, 'w', encoding='utf-8') as f: + json.dump(all_contacts, f, ensure_ascii=False, indent=2) + + print(f"JSON数据已更新: {json_file}") + print(f"数据库总联系人: {len(all_contacts)} 个") + + +if __name__ == '__main__': + main() diff --git a/batch_ocr_fast.py b/batch_ocr_fast.py new file mode 100644 index 0000000..b7de23e --- /dev/null +++ b/batch_ocr_fast.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- +""" +快速OCR识别 - 每5张截图识别一次 +""" +import os +import sqlite3 +import requests +import base64 +from PIL import Image +import glob +import json +import time + + +def ocr_image(image_path): + """OCR识别单张图片""" + with open(image_path, 'rb') as f: + image_base64 = base64.b64encode(f.read()).decode('utf-8') + + url = "http://localhost:11434/api/chat" + payload = { + "model": "glm-ocr", + "messages": [{ + "role": "user", + "content": """识别图片中的所有联系人名称。要求: +1. 只输出联系人名称,每行一个 +2. 忽略分组标题(如星号、字母A-Z等) +3. 忽略数字统计 +4. 不要添加任何其他内容""", + "images": [image_base64] + }], + "stream": False + } + + try: + response = requests.post(url, json=payload, timeout=60) + return response.json().get('message', {}).get('content', '') + except Exception as e: + print(f"OCR失败: {e}") + return "" + + +def is_valid_contact(line): + """判断是否是有效的联系人""" + line = line.strip() + if not line or len(line) < 2: + return False + + invalid = ["公众号", "服务号", "企业微信联系人", "我的企业", "联系人", + "星标朋友", "新的朋友", "群聊", "标签", "仅聊天", "设备"] + + if line in invalid: + return False + if len(line) == 1 and line.isalpha(): + return False + if line.startswith(">") or line.startswith("!"): + return False + + return True + + +def clean_contact_name(name): + """清理联系人名称""" + name = name.strip('"\'') + name = name.rstrip(',,。::') + return name.strip() + + +def get_existing_contacts(): + """从数据库获取已存在的联系人""" + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + cursor.execute('SELECT name FROM contacts') + existing = set(row[0] for row in cursor.fetchall()) + conn.close() + return existing + + +def add_new_contacts(new_contacts): + """将新联系人添加到数据库""" + if not new_contacts: + return 0 + + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + + # 获取当前最大ID + cursor.execute('SELECT MAX(id) FROM contacts') + max_id = cursor.fetchone()[0] or 0 + + added = 0 + for idx, name in enumerate(new_contacts, start=max_id + 1): + cursor.execute(''' + INSERT INTO contacts (id, name, category, blessing, selected) + VALUES (?, ?, ?, ?, ?) + ''', (idx, name, '', '马年新春快乐!愿您在新的一年里,事业腾飞,马到成功!', False)) + added += 1 + + conn.commit() + conn.close() + return added + + +def main(): + print("=" * 60) + print("快速OCR识别 - 采样模式") + print("=" * 60) + + # 获取截图目录 + scroll_dir = r"D:\夏骥\微信研究\scroll_complete" + screenshots = sorted(glob.glob(os.path.join(scroll_dir, "*.png"))) + + if not screenshots: + print("未找到截图文件!") + return + + print(f"找到 {len(screenshots)} 张截图") + + # 获取已存在的联系人 + existing_contacts = get_existing_contacts() + print(f"数据库中已有 {len(existing_contacts)} 个联系人") + + # 每5张截图识别一次 + step = 5 + all_new_contacts = set() + skipped_count = 0 + + for i in range(0, len(screenshots), step): + batch = screenshots[i:i+step] + print(f"\n[{i+1}/{len(screenshots)}] 处理批次 {i//step + 1}") + + # 只识别批次的第一张 + path = batch[0] + result = ocr_image(path) + + new_in_this = 0 + for line in result.strip().split('\n'): + line = line.strip() + if is_valid_contact(line): + cleaned = clean_contact_name(line) + if cleaned and len(cleaned) >= 2: + if cleaned in existing_contacts: + skipped_count += 1 + elif cleaned not in all_new_contacts: + new_in_this += 1 + all_new_contacts.add(cleaned) + print(f" + {cleaned}") + + print(f" 本轮新增 {new_in_this},累计新发现 {len(all_new_contacts)}") + + print(f"\n{'='*60}") + print(f"OCR完成!") + print(f"发现新联系人: {len(all_new_contacts)} 个") + print(f"跳过已存在: {skipped_count} 个") + + # 入库 + if all_new_contacts: + added = add_new_contacts(sorted(all_new_contacts, key=lambda x: (not x[0].isalpha() if x else True, x.lower() if x and x[0].isalpha() else x))) + print(f"成功入库: {added} 个") + + # 更新JSON文件 + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + cursor.execute('SELECT * FROM contacts ORDER BY id') + all_contacts = [] + for row in cursor.fetchall(): + all_contacts.append({ + "id": row[0], + "name": row[1], + "category": row[2], + "blessing": row[3], + "selected": bool(row[4]) + }) + conn.close() + + json_file = r"D:\夏骥\微信研究\contacts_data.json" + with open(json_file, 'w', encoding='utf-8') as f: + json.dump(all_contacts, f, ensure_ascii=False, indent=2) + + print(f"JSON数据已更新: {json_file}") + print(f"数据库总联系人: {len(all_contacts)} 个") + + +if __name__ == '__main__': + main() diff --git a/batch_ocr_full.py b/batch_ocr_full.py new file mode 100644 index 0000000..dc70beb --- /dev/null +++ b/batch_ocr_full.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +""" +对完整截图进行OCR识别 +""" +import os +import requests +import base64 +from PIL import Image +import glob +import json + + +def ocr_image(image_path): + """OCR识别单张图片""" + with open(image_path, 'rb') as f: + image_base64 = base64.b64encode(f.read()).decode('utf-8') + + url = "http://localhost:11434/api/chat" + payload = { + "model": "glm-ocr", + "messages": [{ + "role": "user", + "content": """识别图片中的所有联系人名称。要求: +1. 只输出联系人名称,每行一个 +2. 忽略分组标题(如星号、字母A-Z等) +3. 忽略数字统计 +4. 不要添加任何其他内容""", + "images": [image_base64] + }], + "stream": False + } + + try: + response = requests.post(url, json=payload, timeout=60) + return response.json().get('message', {}).get('content', '') + except Exception as e: + print(f"OCR失败: {e}") + return "" + + +def is_valid_contact(line): + """判断是否是有效的联系人""" + line = line.strip() + if not line or len(line) < 2: + return False + + invalid = ["公众号", "服务号", "企业微信联系人", "我的企业", "联系人", + "星标朋友", "新的朋友", "群聊", "标签", "仅聊天", "设备"] + + if line in invalid: + return False + if len(line) == 1 and line.isalpha(): + return False + if line.startswith(">") or line.startswith("!"): + return False + if line.startswith('"') or line.startswith('{') or line.startswith('[') or line.startswith('```'): + return False + if line.startswith('"') and ':' in line: + return False + + return True + + +def clean_contact_name(name): + """清理联系人名称""" + name = name.strip('"\'') + name = name.rstrip(',,。::') + return name.strip() + + +def main(): + print("=" * 60) + print("批量OCR识别完整截图") + print("=" * 60) + + # 获取截图目录 + scroll_dir = r"D:\夏骥\微信研究\scroll_full" + + if not os.path.exists(scroll_dir): + print(f"目录不存在: {scroll_dir}") + print("请先运行 scroll_full_contacts.py 进行截图") + return + + screenshots = sorted(glob.glob(os.path.join(scroll_dir, "*.png"))) + + if not screenshots: + print("未找到截图文件!") + return + + print(f"找到 {len(screenshots)} 张截图") + + all_contacts = set() + + for i, path in enumerate(screenshots): + print(f"\n[{i+1}/{len(screenshots)}] {os.path.basename(path)}") + result = ocr_image(path) + + new_count = 0 + for line in result.strip().split('\n'): + line = line.strip() + if is_valid_contact(line): + cleaned = clean_contact_name(line) + if cleaned and len(cleaned) >= 2 and cleaned not in all_contacts: + new_count += 1 + print(f" + {cleaned}") + all_contacts.add(cleaned) + + print(f" 本轮新增 {new_count},累计 {len(all_contacts)}") + + # 生成JSON数据 + contacts_json = [] + for idx, name in enumerate(sorted(all_contacts, key=lambda x: (not x[0].isalpha() if x else True, x.lower() if x and x[0].isalpha() else x))): + if name and len(name) >= 2: + contacts_json.append({ + "id": idx + 1, + "name": name, + "category": "", + "blessing": "马年新春快乐!愿您在新的一年里,事业腾飞,马到成功!", + "selected": False + }) + + # 保存JSON文件 + json_file = r"D:\夏骥\微信研究\contacts_data.json" + with open(json_file, 'w', encoding='utf-8') as f: + json.dump(contacts_json, f, ensure_ascii=False, indent=2) + + print(f"\nJSON数据已保存: {json_file}") + + # 保存纯文本结果 + result_file = r"D:\夏骥\微信研究\ocr_result_full.txt" + with open(result_file, 'w', encoding='utf-8') as f: + f.write(f"微信通讯录OCR识别结果(完整)\n") + f.write(f"共截图 {len(screenshots)} 张\n") + f.write(f"共识别 {len(contacts_json)} 个联系人\n") + f.write("=" * 60 + "\n\n") + for c in contacts_json: + f.write(f"{c['name']}\n") + + print(f"结果已保存: {result_file}") + print(f"\n共识别到 {len(contacts_json)} 个不重复联系人") + + +if __name__ == '__main__': + main() diff --git a/batch_ocr_parallel.py b/batch_ocr_parallel.py new file mode 100644 index 0000000..89ae611 --- /dev/null +++ b/batch_ocr_parallel.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +""" +并行OCR识别 - 多进程加速 +""" +import os +import sqlite3 +import requests +import base64 +from PIL import Image +import glob +import json +from multiprocessing import Pool, Manager +import time + + +def ocr_image(args): + """OCR识别单张图片""" + image_path, idx, total = args + + try: + with open(image_path, 'rb') as f: + image_base64 = base64.b64encode(f.read()).decode('utf-8') + + url = "http://localhost:11434/api/chat" + payload = { + "model": "glm-ocr", + "messages": [{ + "role": "user", + "content": """识别图片中的所有联系人名称。要求: +1. 只输出联系人名称,每行一个 +2. 忽略分组标题(如星号、字母A-Z等) +3. 忽略数字统计 +4. 不要添加任何其他内容""", + "images": [image_base64] + }], + "stream": False + } + + response = requests.post(url, json=payload, timeout=60) + result = response.json().get('message', {}).get('content', '') + + contacts = [] + for line in result.strip().split('\n'): + line = line.strip() + if line and len(line) >= 2 and len(line) < 50: + # 简单过滤 + if not any(x in line for x in ['公众号', '服务号', '企业微信', '联系人', '星标朋友', '新的朋友']): + contacts.append(line.strip('"\'').rstrip(',,。::')) + + print(f"[{idx+1}/{total}] {os.path.basename(image_path)}: 发现 {len(contacts)} 个联系人") + return contacts + except Exception as e: + print(f"[{idx+1}/{total}] {os.path.basename(image_path)}: 失败 - {e}") + return [] + + +def get_existing_contacts(): + """从数据库获取已存在的联系人""" + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + cursor.execute('SELECT name FROM contacts') + existing = set(row[0] for row in cursor.fetchall()) + conn.close() + return existing + + +def add_new_contacts(new_contacts): + """将新联系人添加到数据库""" + if not new_contacts: + return 0 + + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + + # 获取当前最大ID + cursor.execute('SELECT MAX(id) FROM contacts') + max_id = cursor.fetchone()[0] or 0 + + added = 0 + for idx, name in enumerate(new_contacts, start=max_id + 1): + cursor.execute(''' + INSERT INTO contacts (id, name, category, blessing, selected) + VALUES (?, ?, ?, ?, ?) + ''', (idx, name, '', '马年新春快乐!愿您在新的一年里,事业腾飞,马到成功!', False)) + added += 1 + + conn.commit() + conn.close() + return added + + +def main(): + print("=" * 60) + print("并行OCR识别") + print("=" * 60) + + # 获取截图目录 + scroll_dir = r"D:\夏骥\微信研究\scroll_complete" + screenshots = sorted(glob.glob(os.path.join(scroll_dir, "*.png"))) + + if not screenshots: + print("未找到截图文件!") + return + + print(f"找到 {len(screenshots)} 张截图") + + # 获取已存在的联系人 + existing_contacts = get_existing_contacts() + print(f"数据库中已有 {len(existing_contacts)} 个联系人") + + # 准备参数 + args_list = [(path, i, len(screenshots)) for i, path in enumerate(screenshots)] + + # 并行处理 - 使用4个进程 + print("\n开始并行OCR识别...") + all_contacts = set() + + with Pool(processes=4) as pool: + results = pool.map(ocr_image, args_list) + + # 收集结果 + for contacts in results: + for name in contacts: + if name and len(name) >= 2 and name not in existing_contacts: + all_contacts.add(name) + + print(f"\n{'='*60}") + print(f"OCR完成!") + print(f"发现新联系人: {len(all_contacts)} 个") + + # 入库 + if all_contacts: + added = add_new_contacts(sorted(all_contacts, key=lambda x: (not x[0].isalpha() if x else True, x.lower() if x and x[0].isalpha() else x))) + print(f"成功入库: {added} 个") + + # 更新JSON文件 + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + cursor.execute('SELECT * FROM contacts ORDER BY id') + all_db_contacts = [] + for row in cursor.fetchall(): + all_db_contacts.append({ + "id": row[0], + "name": row[1], + "category": row[2], + "blessing": row[3], + "selected": bool(row[4]) + }) + conn.close() + + json_file = r"D:\夏骥\微信研究\contacts_data.json" + with open(json_file, 'w', encoding='utf-8') as f: + json.dump(all_db_contacts, f, ensure_ascii=False, indent=2) + + print(f"JSON数据已更新: {json_file}") + print(f"数据库总联系人: {len(all_db_contacts)} 个") + + +if __name__ == '__main__': + main() diff --git a/batch_ocr_resume.py b/batch_ocr_resume.py new file mode 100644 index 0000000..85034fc --- /dev/null +++ b/batch_ocr_resume.py @@ -0,0 +1,231 @@ +# -*- coding: utf-8 -*- +""" +断点续传OCR识别 +""" +import os +import sqlite3 +import requests +import base64 +from PIL import Image +import glob +import json +import time + + +def ocr_image(image_path): + """OCR识别单张图片""" + with open(image_path, 'rb') as f: + image_base64 = base64.b64encode(f.read()).decode('utf-8') + + url = "http://localhost:11434/api/chat" + payload = { + "model": "glm-ocr", + "messages": [{ + "role": "user", + "content": """识别图片中的所有联系人名称。要求: +1. 只输出联系人名称,每行一个 +2. 忽略分组标题(如星号、字母A-Z等) +3. 忽略数字统计 +4. 不要添加任何其他内容""", + "images": [image_base64] + }], + "stream": False + } + + try: + response = requests.post(url, json=payload, timeout=60) + return response.json().get('message', {}).get('content', '') + except Exception as e: + print(f"OCR失败: {e}") + return "" + + +def is_valid_contact(line): + """判断是否是有效的联系人""" + line = line.strip() + if not line or len(line) < 2: + return False + + invalid = ["公众号", "服务号", "企业微信联系人", "我的企业", "联系人", + "星标朋友", "新的朋友", "群聊", "标签", "仅聊天", "设备"] + + if line in invalid: + return False + if len(line) == 1 and line.isalpha(): + return False + if line.startswith(">") or line.startswith("!"): + return False + + return True + + +def clean_contact_name(name): + """清理联系人名称""" + name = name.strip('"\'') + name = name.rstrip(',,。::') + return name.strip() + + +def get_existing_contacts(): + """从数据库获取已存在的联系人""" + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + cursor.execute('SELECT name FROM contacts') + existing = set(row[0] for row in cursor.fetchall()) + conn.close() + return existing + + +def add_new_contacts(new_contacts): + """将新联系人添加到数据库""" + if not new_contacts: + return 0 + + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + + # 获取当前最大ID + cursor.execute('SELECT MAX(id) FROM contacts') + max_id = cursor.fetchone()[0] or 0 + + added = 0 + for idx, name in enumerate(new_contacts, start=max_id + 1): + cursor.execute(''' + INSERT INTO contacts (id, name, category, blessing, selected) + VALUES (?, ?, ?, ?, ?) + ''', (idx, name, '', '马年新春快乐!愿您在新的一年里,事业腾飞,马到成功!', False)) + added += 1 + + conn.commit() + conn.close() + return added + + +def save_progress(progress): + """保存进度""" + with open(r'D:\夏骥\微信研究\ocr_progress.json', 'w', encoding='utf-8') as f: + json.dump(progress, f, ensure_ascii=False, indent=2) + + +def load_progress(): + """加载进度""" + try: + with open(r'D:\夏骥\微信研究\ocr_progress.json', 'r', encoding='utf-8') as f: + return json.load(f) + except: + return {"processed": 0, "total": 0, "new_contacts": []} + + +def main(): + print("=" * 60) + print("断点续传OCR识别") + print("=" * 60) + + # 获取截图目录 + scroll_dir = r"D:\夏骥\微信研究\scroll_complete" + screenshots = sorted(glob.glob(os.path.join(scroll_dir, "*.png"))) + + if not screenshots: + print("未找到截图文件!") + return + + print(f"找到 {len(screenshots)} 张截图") + + # 加载进度 + progress = load_progress() + processed = progress.get("processed", 0) + all_new_contacts = set(progress.get("new_contacts", [])) + + print(f"已处理: {processed} 张,已发现新联系人: {len(all_new_contacts)} 个") + + # 获取已存在的联系人 + existing_contacts = get_existing_contacts() + print(f"数据库中已有 {len(existing_contacts)} 个联系人") + + skipped_count = 0 + batch_size = 50 # 每50张保存一次进度 + + start_time = time.time() + + for i, path in enumerate(screenshots[processed:], start=processed): + current_time = time.time() + elapsed = current_time - start_time + avg_time = elapsed / (i - processed + 1) if i > processed else 0 + remaining = avg_time * (len(screenshots) - i - 1) + percent = (i + 1) / len(screenshots) * 100 + + print(f"\n[{i+1}/{len(screenshots)}] {percent:.1f}% | 预计剩余: {remaining/60:.1f}分钟") + print(f" 文件: {os.path.basename(path)}") + + result = ocr_image(path) + + new_in_this = 0 + for line in result.strip().split('\n'): + line = line.strip() + if is_valid_contact(line): + cleaned = clean_contact_name(line) + if cleaned and len(cleaned) >= 2: + if cleaned in existing_contacts: + skipped_count += 1 + elif cleaned not in all_new_contacts: + new_in_this += 1 + all_new_contacts.add(cleaned) + print(f" ✓ 新: {cleaned}") + + print(f" 本轮: +{new_in_this} | 累计新: {len(all_new_contacts)} | 跳过: {skipped_count}") + + # 保存进度 + progress["processed"] = i + 1 + progress["total"] = len(screenshots) + progress["new_contacts"] = list(all_new_contacts) + + if (i + 1) % batch_size == 0: + save_progress(progress) + print(f" 💾 进度已保存 ({i+1}/{len(screenshots)})") + + # 每10张显示汇总 + if (i + 1) % 10 == 0: + print(f"\n{'='*60}") + print(f"📊 进度汇总: {i+1}/{len(screenshots)} ({percent:.1f}%)") + print(f"⏱️ 已用时间: {elapsed/60:.1f}分钟 | 预计剩余: {remaining/60:.1f}分钟") + print(f"👤 新联系人: {len(all_new_contacts)} | 跳过: {skipped_count}") + print(f"{'='*60}\n") + + # 最终保存 + save_progress(progress) + + print(f"\n{'='*60}") + print(f"OCR完成!") + print(f"发现新联系人: {len(all_new_contacts)} 个") + print(f"跳过已存在: {skipped_count} 个") + + # 入库 + if all_new_contacts: + added = add_new_contacts(sorted(all_new_contacts, key=lambda x: (not x[0].isalpha() if x else True, x.lower() if x and x[0].isalpha() else x))) + print(f"成功入库: {added} 个") + + # 更新JSON文件 + conn = sqlite3.connect(r'D:\夏骥\微信研究\contacts.db') + cursor = conn.cursor() + cursor.execute('SELECT * FROM contacts ORDER BY id') + all_contacts = [] + for row in cursor.fetchall(): + all_contacts.append({ + "id": row[0], + "name": row[1], + "category": row[2], + "blessing": row[3], + "selected": bool(row[4]) + }) + conn.close() + + json_file = r"D:\夏骥\微信研究\contacts_data.json" + with open(json_file, 'w', encoding='utf-8') as f: + json.dump(all_contacts, f, ensure_ascii=False, indent=2) + + print(f"JSON数据已更新: {json_file}") + print(f"数据库总联系人: {len(all_contacts)} 个") + + +if __name__ == '__main__': + main() diff --git a/contacts_manager.html b/contacts_manager.html new file mode 100644 index 0000000..dd68ae2 --- /dev/null +++ b/contacts_manager.html @@ -0,0 +1,734 @@ + + + + + + 微信联系人新年祝福管理 + + + + +
+
+

🧧 微信联系人新年祝福管理

+
马年新春,为您的联系人送上一份温暖的祝福
+
+ +
+
+
0
+
总联系人数
+
+
+
0
+
已选择发送
+
+
+
0
+
已分类
+
+
+ +
+ + + + +
+ + + +
+
全部
+
同事
+
好友
+
老师
+
亲戚
+
客户
+
其他
+
未分类
+
+ +
+ +
+ + +
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/dedupe_contacts.py b/dedupe_contacts.py new file mode 100644 index 0000000..ec63036 --- /dev/null +++ b/dedupe_contacts.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +""" +根据 search_name 去重,只保留一条记录 +""" +import sqlite3 + +DB_PATH = r"D:\夏骥\微信研究\contacts.db" + + +def main(): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + # 1. 查找重复的 search_name + cursor.execute(''' + SELECT search_name, COUNT(*) as cnt + FROM contacts + WHERE search_name != '' + GROUP BY search_name + HAVING COUNT(*) > 1 + ORDER BY cnt DESC + ''') + duplicates = cursor.fetchall() + + print(f'发现 {len(duplicates)} 个重复的搜索姓名') + + # 2. 对每个重复的 search_name,只保留 id 最小的一条 + deleted_count = 0 + for search_name, cnt in duplicates: + # 获取该 search_name 的所有 id + cursor.execute(''' + SELECT id FROM contacts + WHERE search_name = ? + ORDER BY id + ''', (search_name,)) + ids = [row[0] for row in cursor.fetchall()] + + # 保留第一个,删除其他的 + keep_id = ids[0] + delete_ids = ids[1:] + + if delete_ids: + placeholders = ','.join('?' * len(delete_ids)) + cursor.execute(f'DELETE FROM contacts WHERE id IN ({placeholders})', delete_ids) + deleted_count += len(delete_ids) + print(f' "{search_name}": 保留 id={keep_id}, 删除 {len(delete_ids)} 条') + + conn.commit() + conn.close() + + print(f'\n共删除 {deleted_count} 条重复记录') + + +if __name__ == '__main__': + main() diff --git a/init_db.py b/init_db.py new file mode 100644 index 0000000..af88a24 --- /dev/null +++ b/init_db.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +""" +初始化SQLite数据库并导入联系人数据 +""" +import sqlite3 +import json +import os + +# 数据库文件 +DB_PATH = r"D:\夏骥\微信研究\contacts.db" + +# JSON数据文件 +JSON_PATH = r"D:\夏骥\微信研究\contacts_data.json" + + +def init_database(): + """初始化数据库""" + # 删除旧数据库(如果存在) + if os.path.exists(DB_PATH): + os.remove(DB_PATH) + + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + # 创建联系人表 + cursor.execute(''' + CREATE TABLE contacts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + category TEXT DEFAULT '', + blessing TEXT DEFAULT '马年新春快乐!愿您在新的一年里,事业腾飞,马到成功!', + selected INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + conn.commit() + return conn + + +def import_from_json(conn): + """从JSON导入联系人""" + cursor = conn.cursor() + + # 读取JSON数据 + with open(JSON_PATH, 'r', encoding='utf-8') as f: + contacts = json.load(f) + + # 导入联系人 + for contact in contacts: + cursor.execute(''' + INSERT INTO contacts (name, category, blessing, selected) + VALUES (?, ?, ?, ?) + ''', ( + contact.get('name', ''), + contact.get('category', ''), + contact.get('blessing', '马年新春快乐!愿您在新的一年里,事业腾飞,马到成功!'), + 1 if contact.get('selected', False) else 0 + )) + + conn.commit() + print(f"已导入 {len(contacts)} 个联系人到数据库") + + +def main(): + print("=" * 50) + print("初始化数据库") + print("=" * 50) + + # 初始化数据库 + conn = init_database() + print("数据库表已创建") + + # 导入数据 + if os.path.exists(JSON_PATH): + import_from_json(conn) + else: + print(f"JSON文件不存在: {JSON_PATH}") + + # 验证数据 + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM contacts") + count = cursor.fetchone()[0] + print(f"\n数据库中共有 {count} 个联系人") + + # 显示示例数据 + cursor.execute("SELECT id, name, category, blessing, selected FROM contacts LIMIT 5") + print("\n示例数据:") + for row in cursor.fetchall(): + print(f" ID:{row[0]} 姓名:{row[1]} 分类:{row[2]} 祝福语:{row[3][:20]}...") + + conn.close() + print(f"\n数据库文件: {DB_PATH}") + + +if __name__ == '__main__': + main() diff --git a/ocr_wechat_contacts.py b/ocr_wechat_contacts.py new file mode 100644 index 0000000..d370513 --- /dev/null +++ b/ocr_wechat_contacts.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- +""" +使用OCR识别微信通讯录 - 两阶段处理 +第一阶段:快速滚动截图直到到底 +第二阶段:批量OCR识别所有截图 +""" +import uiautomation as auto +import time +import requests +import base64 +import os +from PIL import Image + + +def capture_wechat_window(): + """截取微信窗口""" + wechat_window = auto.WindowControl(searchDepth=1, Name='微信') + + if not wechat_window.Exists(3, 1): + print("未找到微信窗口!请确保微信已打开并登录。") + return None, None + + print(f"找到微信窗口: {wechat_window.Name}") + + screenshot_path = r"D:\夏骥\微信研究\wechat_screenshot.png" + wechat_window.CaptureToImage(screenshot_path) + + return screenshot_path, wechat_window + + +def capture_contact_region(wechat_window, index): + """截取通讯录区域""" + rect = wechat_window.BoundingRectangle + + x_offset = 70 + y_offset = 130 + width = 280 + height = rect.height() - 160 + + screenshot_path = f"D:\\夏骥\\微信研究\\scroll\\region_{index:03d}.png" + + try: + bitmap = wechat_window.ToBitmap(x=x_offset, y=y_offset, width=width, height=height) + bitmap.ToFile(screenshot_path) + return screenshot_path + except Exception as e: + print(f"截图失败: {e}") + return None + + +def get_image_hash(image_path): + """计算图片哈希""" + try: + img = Image.open(image_path) + img = img.resize((16, 16), Image.Resampling.LANCZOS) + img = img.convert('L') + pixels = list(img.get_flattened_data()) + avg = sum(pixels) / len(pixels) + return ''.join(['1' if p > avg else '0' for p in pixels]) + except: + return None + + +def images_similarity(hash1, hash2): + """计算相似度""" + if not hash1 or not hash2: + return 0 + diff = sum(c1 != c2 for c1, c2 in zip(hash1, hash2)) + return 1 - diff / len(hash1) + + +def scroll_down(wechat_window): + """向下滚动""" + try: + rect = wechat_window.BoundingRectangle + center_x = rect.left + 200 + center_y = rect.top + 400 + + auto.SetCursorPos(center_x, center_y) + auto.Click(center_x, center_y) + time.sleep(0.2) + auto.WheelDown(wheelTimes=3) + time.sleep(0.3) + return True + except: + return False + + +def ocr_image(image_path): + """OCR识别单张图片""" + with open(image_path, 'rb') as f: + image_base64 = base64.b64encode(f.read()).decode('utf-8') + + url = "http://localhost:11434/api/chat" + payload = { + "model": "glm-ocr", + "messages": [{ + "role": "user", + "content": """识别图片中的所有联系人名称。要求: +1. 只输出联系人名称,每行一个 +2. 忽略分组标题(如星号、字母A-Z等) +3. 忽略数字统计 +4. 不要添加任何其他内容""", + "images": [image_base64] + }], + "stream": False + } + + try: + response = requests.post(url, json=payload, timeout=60) + return response.json().get('message', {}).get('content', '') + except Exception as e: + print(f"OCR失败: {e}") + return "" + + +def is_valid_contact(line): + """判断是否是有效的联系人""" + line = line.strip() + if not line: + return False + + # 过滤分组标题 + invalid = ["公众号", "服务号", "企业微信联系人", "我的企业", "联系人", + "星标朋友", "新的朋友", "群聊", "标签", "仅聊天", "设备"] + + if line in invalid: + return False + if len(line) == 1 and line.isalpha(): + return False + if line.startswith(">") or line.startswith("!"): + return False + + return True + + +def main(): + print("=" * 60) + print("微信通讯录OCR识别 - 两阶段处理") + print("=" * 60) + + # 创建截图目录 + scroll_dir = r"D:\夏骥\微信研究\scroll" + os.makedirs(scroll_dir, exist_ok=True) + + # 清理旧截图 + for f in os.listdir(scroll_dir): + if f.endswith('.png'): + os.remove(os.path.join(scroll_dir, f)) + + # ===== 第一阶段:快速滚动截图 ===== + print("\n[阶段1] 滚动截图中...") + + _, wechat_window = capture_wechat_window() + if not wechat_window: + return + + screenshots = [] + last_hash = None + no_change = 0 + max_screenshots = 100 + + for i in range(max_screenshots): + path = capture_contact_region(wechat_window, i) + if path: + screenshots.append(path) + print(f" 截图 {i+1}: {path}") + + # 检测是否到底 + current_hash = get_image_hash(path) + if last_hash: + sim = images_similarity(last_hash, current_hash) + if sim > 0.95: + no_change += 1 + if no_change >= 2: + print(f"\n 检测到到底,共截图 {len(screenshots)} 张") + break + else: + no_change = 0 + last_hash = current_hash + + scroll_down(wechat_window) + + print(f"\n[阶段1完成] 共截图 {len(screenshots)} 张") + + # ===== 第二阶段:批量OCR ===== + print("\n[阶段2] OCR识别中...") + + all_contacts = set() + + for i, path in enumerate(screenshots): + print(f" OCR {i+1}/{len(screenshots)}: ", end="", flush=True) + result = ocr_image(path) + + new_count = 0 + for line in result.strip().split('\n'): + line = line.strip() + if is_valid_contact(line): + if line not in all_contacts: + new_count += 1 + all_contacts.add(line) + + print(f"新增 {new_count} 个,累计 {len(all_contacts)} 个") + + # ===== 保存结果 ===== + print("\n[保存结果]") + + sorted_contacts = sorted(all_contacts, key=lambda x: (not x[0].isalpha(), x.lower() if x[0].isalpha() else x)) + + result_file = r"D:\夏骥\微信研究\ocr_result.txt" + with open(result_file, 'w', encoding='utf-8') as f: + f.write(f"微信通讯录OCR识别结果\n") + f.write(f"共截图 {len(screenshots)} 张\n") + f.write(f"共识别 {len(all_contacts)} 个联系人\n") + f.write("=" * 60 + "\n\n") + for c in sorted_contacts: + f.write(f"{c}\n") + + print(f"结果已保存: {result_file}") + print(f"\n共识别到 {len(all_contacts)} 个联系人") + + +if __name__ == '__main__': + main() diff --git a/print_wechat_contacts.py b/print_wechat_contacts.py new file mode 100644 index 0000000..11fce1e --- /dev/null +++ b/print_wechat_contacts.py @@ -0,0 +1,356 @@ +# -*- coding: utf-8 -*- +""" +打印PC微信通讯录的所有子控件 +需要以管理员权限运行Python +""" +import uiautomation as auto +import time +import ctypes +from ctypes import wintypes, POINTER, byref, c_int, c_ulong, c_void_p +import comtypes.client + + +def print_control_tree(control, depth=0, max_depth=20, file=None): + """递归打印控件树""" + if depth > max_depth: + return + + # 获取控件信息 + control_type = control.ControlTypeName + class_name = control.ClassName or "" + name = control.Name or "" + automation_id = control.AutomationId or "" + rect = control.BoundingRectangle + + # 缩进 + indent = " " * depth + + # 打印控件信息 + line = f"{indent}ControlType: {control_type} ClassName: {class_name} Name: {name} AutomationId: {automation_id} Rect: {rect} Depth: {depth}" + print(line) + if file: + file.write(line + "\n") + + # 递归打印子控件 + try: + children = control.GetChildren() + for child in children: + print_control_tree(child, depth + 1, max_depth, file) + except Exception as e: + error_line = f"{indent} [Error getting children: {e}]" + print(error_line) + if file: + file.write(error_line + "\n") + + +def print_control_tree_raw_comtypes(uiAutomation, element, depth=0, max_depth=20, file=None): + """使用comtypes直接访问IUIAutomation接口""" + if depth > max_depth: + return + + try: + # 获取控件属性 + name = element.CurrentName or "" + class_name = element.CurrentClassName or "" + automation_id = element.CurrentAutomationId or "" + control_type_id = element.CurrentControlType + localized_control_type = element.CurrentLocalizedControlType or "" + + # 获取BoundingRectangle + try: + rect = element.CurrentBoundingRectangle + rect_str = f"({rect.left},{rect.top},{rect.right},{rect.bottom})" + except: + rect_str = "N/A" + + indent = " " * depth + line = f"{indent}ControlType: {localized_control_type} ClassName: {class_name} Name: {name} AutomationId: {automation_id} Rect: {rect_str} Depth: {depth}" + print(line) + if file: + file.write(line + "\n") + + # 使用RawViewWalker遍历子元素 + tree_walker = uiAutomation.RawViewWalker + child = tree_walker.GetFirstChildElement(element) + + while child: + print_control_tree_raw_comtypes(uiAutomation, child, depth + 1, max_depth, file) + # 获取下一个兄弟元素 + child = tree_walker.GetNextSiblingElement(child) + + except Exception as e: + indent = " " * depth + error_line = f"{indent}[Error: {e}]" + print(error_line) + if file: + file.write(error_line + "\n") + + +def enum_windows_callback(hwnd, results): + """枚举窗口回调函数""" + import win32gui + import win32process + + if win32gui.IsWindowVisible(hwnd): + class_name = win32gui.GetClassName(hwnd) + title = win32gui.GetWindowText(hwnd) + _, pid = win32process.GetWindowThreadProcessId(hwnd) + + # 检查是否是微信相关窗口 + try: + import subprocess + result = subprocess.run(['tasklist', '/FI', f'PID eq {pid}', '/NH'], + capture_output=True, text=True, creationflags=subprocess.CREATE_NO_WINDOW) + if 'WeChat' in result.stdout or 'wechat' in result.stdout.lower(): + results.append((hwnd, class_name, title, pid)) + except: + pass + return True + + +def print_win32_children(hwnd, depth=0, max_depth=20, file=None): + """使用Win32 API枚举子窗口""" + import win32gui + + if depth > max_depth: + return + + class_name = win32gui.GetClassName(hwnd) + title = win32gui.GetWindowText(hwnd) + rect = win32gui.GetWindowRect(hwnd) + + indent = " " * depth + line = f"{indent}HWND: 0x{hwnd:X} ClassName: {class_name} Title: {title} Rect: {rect} Depth: {depth}" + print(line) + if file: + file.write(line + "\n") + + # 枚举子窗口 + def child_callback(child_hwnd, _): + print_win32_children(child_hwnd, depth + 1, max_depth, file) + return True + + try: + win32gui.EnumChildWindows(hwnd, child_callback, None) + except: + pass + + +def print_accessible_tree(hwnd, depth=0, max_depth=20, file=None): + """使用IAccessible接口枚举控件""" + import ctypes + from ctypes import POINTER, byref + from ctypes import windll, oledll + + if depth > max_depth: + return + + try: + import comtypes.client + from comtypes import IUnknown + + # 获取IAccessible接口 + accessible = oledll.oleacc.AccessibleObjectFromWindow( + hwnd, + 0xFFFFFFFC, # OBJID_CLIENT + comtypes.IUnknown._iid_, + byref(ctypes.POINTER(comtypes.IUnknown)()) + ) + + # 尝试获取更多信息 + indent = " " * depth + line = f"{indent}HWND: 0x{hwnd:X} [Accessible Object Available]" + print(line) + if file: + file.write(line + "\n") + + except Exception as e: + indent = " " * depth + line = f"{indent}HWND: 0x{hwnd:X} [No IAccessible: {e}]" + print(line) + if file: + file.write(line + "\n") + + +def main(): + print("=" * 80) + print("打印PC微信通讯录的所有子控件") + print("=" * 80) + print() + + # 设置全局搜索超时时间 + auto.SetGlobalSearchTimeout(10) + + # 获取桌面根控件 + root = auto.GetRootControl() + print("桌面根控件:", root) + print() + + # 查找微信主窗口 - 尝试多种方式 + print("正在查找微信窗口...") + + wechat_window = None + + # 方式1: 通过ClassName查找 (旧版微信) + wechat_window = auto.WindowControl(searchDepth=1, ClassName='WeChatMainWndForPC') + if wechat_window.Exists(2, 1): + print(f"方式1找到微信窗口: {wechat_window.Name}, ClassName: {wechat_window.ClassName}") + else: + # 方式2: 通过Name查找 + wechat_window = auto.WindowControl(searchDepth=1, Name='微信') + if wechat_window.Exists(2, 1): + print(f"方式2找到微信窗口: {wechat_window.Name}, ClassName: {wechat_window.ClassName}") + else: + # 方式3: 通过进程名查找 (新版微信 WeChatAppEx.exe) + print("尝试通过进程查找微信窗口...") + for window in root.GetChildren(): + try: + process_id = window.ProcessId + import subprocess + result = subprocess.run(['tasklist', '/FI', f'PID eq {process_id}', '/NH'], + capture_output=True, text=True, creationflags=subprocess.CREATE_NO_WINDOW) + if 'WeChat' in result.stdout or 'wechat' in result.stdout.lower(): + wechat_window = window + print(f"方式3找到微信窗口: {window.Name}, ClassName: {window.ClassName}, PID: {process_id}") + break + except Exception as e: + continue + + if wechat_window is None or not wechat_window.Exists(1, 1): + print("\n未找到微信窗口!请确保:") + print("1. 微信已打开并登录") + print("2. 以管理员权限运行此脚本") + print("\n尝试打印所有顶级窗口以帮助调试...") + print("=" * 80) + for window in root.GetChildren(): + try: + print(f"Window: Name='{window.Name}' ClassName='{window.ClassName}' PID={window.ProcessId}") + except: + pass + return + + print() + + # 打印微信窗口的基本信息 + print("=" * 80) + print("微信主窗口信息:") + print("=" * 80) + print(f"Name: {wechat_window.Name}") + print(f"ClassName: {wechat_window.ClassName}") + print(f"AutomationId: {wechat_window.AutomationId}") + print(f"ControlType: {wechat_window.ControlTypeName}") + print(f"ProcessId: {wechat_window.ProcessId}") + print(f"Handle: {wechat_window.NativeWindowHandle}") + print() + + # 输出文件 + output_file = r"D:\夏骥\微信研究\wechat_controls_output.txt" + + # 首先尝试标准方法 + print("=" * 80) + print("方式1: 使用标准UIAutomation遍历控件...") + print("=" * 80) + + with open(output_file, 'w', encoding='utf-8') as f: + f.write("=" * 80 + "\n") + f.write("微信窗口控件树 - 标准UIAutomation\n") + f.write("=" * 80 + "\n\n") + f.write(f"微信窗口: Name={wechat_window.Name}, ClassName={wechat_window.ClassName}\n\n") + + print_control_tree(wechat_window, depth=0, max_depth=20, file=f) + + print() + print(f"标准方法输出已保存到: {output_file}") + print() + + # 尝试使用RawViewWalker (可以获取更多控件) + print("=" * 80) + print("方式2: 使用RawViewWalker遍历控件 (包括原始控件)...") + print("=" * 80) + + output_file_raw = r"D:\夏骥\微信研究\wechat_controls_raw.txt" + + try: + # 加载UIAutomation类型库 + comtypes.client.GetModule('UIAutomationCore.dll') + from comtypes.gen.UIAutomationClient import IUIAutomation, CUIAutomation + + # 创建IUIAutomation实例 + uiAutomation = comtypes.CoCreateInstance(CUIAutomation._reg_clsid_, interface=IUIAutomation) + + # 获取RootElement + root_element = uiAutomation.GetRootElement() + + # 找到微信窗口元素 + condition = uiAutomation.CreatePropertyCondition(30005, "微信") # UIA_NamePropertyId = 30005 + + # 查找微信窗口 (TreeScope_Children = 2) + wechat_element = root_element.FindFirst(2, condition) + + if wechat_element: + with open(output_file_raw, 'w', encoding='utf-8') as f: + f.write("=" * 80 + "\n") + f.write("微信窗口控件树 - RawViewWalker\n") + f.write("=" * 80 + "\n\n") + + print_control_tree_raw_comtypes(uiAutomation, wechat_element, depth=0, max_depth=20, file=f) + + print(f"\nRawViewWalker输出已保存到: {output_file_raw}") + else: + print("使用RawViewWalker未找到微信窗口元素") + + except Exception as e: + print(f"RawViewWalker方法出错: {e}") + import traceback + traceback.print_exc() + + print() + + # 尝试使用Win32 API枚举窗口 + print("=" * 80) + print("方式3: 使用Win32 API枚举所有子窗口...") + print("=" * 80) + + try: + import win32gui + import win32process + + hwnd = wechat_window.NativeWindowHandle + + output_file_win32 = r"D:\夏骥\微信研究\wechat_controls_win32.txt" + + with open(output_file_win32, 'w', encoding='utf-8') as f: + f.write("=" * 80 + "\n") + f.write("微信窗口控件树 - Win32 API\n") + f.write("=" * 80 + "\n\n") + + print_win32_children(hwnd, depth=0, max_depth=20, file=f) + + print(f"\nWin32 API输出已保存到: {output_file_win32}") + + except ImportError: + print("未安装pywin32,跳过Win32 API方式") + print("可以通过 'pip install pywin32' 安装") + except Exception as e: + print(f"Win32 API方法出错: {e}") + import traceback + traceback.print_exc() + + print() + print("=" * 80) + print("完成!") + print("=" * 80) + print() + print("说明:") + print("新版微信(WeChatAppEx.exe)使用CEF/Chromium渲染UI,") + print("其内部控件不通过Windows原生控件实现,") + print("因此UIAutomation和Win32 API都无法直接访问其内部控件。") + print() + print("如需获取微信控件信息,可以尝试:") + print("1. 使用Microsoft Accessibility Insights工具") + print("2. 使用Chrome DevTools Protocol (如果微信支持)") + print("3. 使用OCR或图像识别技术") + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scroll_complete.py b/scroll_complete.py new file mode 100644 index 0000000..fd4c70f --- /dev/null +++ b/scroll_complete.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +""" +完整截取微信通讯录 - 小步滚动确保不遗漏 +""" +import uiautomation as auto +import time +import os +from PIL import Image + + +def find_wechat_window(): + """查找微信窗口""" + wechat_window = auto.WindowControl(searchDepth=1, Name='微信') + if wechat_window.Exists(3, 1): + return wechat_window + return None + + +def capture_contact_region(wechat_window, index, save_dir): + """截取通讯录区域""" + rect = wechat_window.BoundingRectangle + + # 通讯录列表区域 + x_offset = 70 + y_offset = 130 + width = 280 + height = rect.height() - 160 + + screenshot_path = os.path.join(save_dir, f"region_{index:04d}.png") + + try: + bitmap = wechat_window.ToBitmap(x=x_offset, y=y_offset, width=width, height=height) + bitmap.ToFile(screenshot_path) + return screenshot_path + except Exception as e: + print(f"截图失败: {e}") + return None + + +def get_image_hash(image_path): + """计算图片感知哈希""" + try: + img = Image.open(image_path) + img = img.resize((32, 32), Image.Resampling.LANCZOS) + img = img.convert('L') + pixels = list(img.getdata()) + avg = sum(pixels) / len(pixels) + return ''.join(['1' if p > avg else '0' for p in pixels]) + except Exception as e: + print(f"哈希计算失败: {e}") + return None + + +def images_similarity(hash1, hash2): + """计算图片相似度""" + if not hash1 or not hash2: + return 0 + diff = sum(c1 != c2 for c1, c2 in zip(hash1, hash2)) + return 1 - diff / len(hash1) + + +def scroll_down(wechat_window, wheel_times=1): + """向下滚动 - 小幅度滚动""" + try: + rect = wechat_window.BoundingRectangle + center_x = rect.left + 200 + center_y = rect.top + 400 + + auto.SetCursorPos(center_x, center_y) + auto.Click(center_x, center_y) + time.sleep(0.15) + auto.WheelDown(wheelTimes=wheel_times) + time.sleep(0.25) + return True + except Exception as e: + print(f"滚动失败: {e}") + return False + + +def main(): + print("=" * 60) + print("完整截取微信通讯录 - 小步滚动版本") + print("=" * 60) + + # 创建新的截图目录 + save_dir = r"D:\夏骥\微信研究\scroll_complete" + + # 清空或创建目录 + if os.path.exists(save_dir): + import shutil + shutil.rmtree(save_dir) + os.makedirs(save_dir) + + print(f"截图保存目录: {save_dir}") + + # 查找微信窗口 + print("\n查找微信窗口...") + wechat_window = find_wechat_window() + if not wechat_window: + print("未找到微信窗口!请确保微信已打开。") + return + + print(f"找到微信窗口: {wechat_window.Name}") + + # 提示用户准备 + print("\n" + "!" * 60) + print("请确保微信通讯录界面已打开,并滚动到最顶部!") + print("按字母顺序从A开始显示联系人列表") + print("!" * 60) + time.sleep(3) + + # 开始截图 + print("\n开始截图...") + screenshots = [] + last_hash = None + no_change_count = 0 + max_screenshots = 2000 # 最大截图数 + consecutive_same = 3 # 连续相同截图数判定到底 + + for i in range(max_screenshots): + path = capture_contact_region(wechat_window, i, save_dir) + if not path: + print(f"截图 {i} 失败,跳过") + continue + + screenshots.append(path) + print(f" 截图 {i + 1}: {os.path.basename(path)}") + + # 检测是否到底(图片相似度) + current_hash = get_image_hash(path) + if last_hash: + sim = images_similarity(last_hash, current_hash) + if sim > 0.98: # 非常相似 + no_change_count += 1 + print(f" -> 相似度 {sim:.2%},连续相同 {no_change_count}") + if no_change_count >= consecutive_same: + print(f"\n检测到底部!连续 {consecutive_same} 张截图相同") + break + else: + no_change_count = 0 + last_hash = current_hash + + # 小幅度滚动(每次滚动1格) + scroll_down(wechat_window, wheel_times=1) + + print("\n" + "=" * 60) + print(f"截图完成!共 {len(screenshots)} 张") + print(f"保存目录: {save_dir}") + print("=" * 60) + + # 提示下一步 + print("\n下一步:运行 batch_ocr_complete.py 进行OCR识别") + + +if __name__ == '__main__': + main() diff --git a/scroll_full_contacts.py b/scroll_full_contacts.py new file mode 100644 index 0000000..a7bff07 --- /dev/null +++ b/scroll_full_contacts.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +""" +完整抓取微信通讯录 - 从头开始滚动到底 +""" +import uiautomation as auto +import time +import os +from PIL import Image + + +def find_wechat_window(): + """查找微信窗口""" + wechat_window = auto.WindowControl(searchDepth=1, Name='微信') + if wechat_window.Exists(3, 1): + return wechat_window + return None + + +def capture_contact_region(wechat_window, index, save_dir): + """截取通讯录区域""" + rect = wechat_window.BoundingRectangle + + x_offset = 70 + y_offset = 130 + width = 280 + height = rect.height() - 160 + + screenshot_path = os.path.join(save_dir, f"region_{index:03d}.png") + + try: + bitmap = wechat_window.ToBitmap(x=x_offset, y=y_offset, width=width, height=height) + bitmap.ToFile(screenshot_path) + return screenshot_path + except Exception as e: + print(f"截图失败: {e}") + return None + + +def get_image_hash(image_path): + """计算图片哈希""" + try: + img = Image.open(image_path) + img = img.resize((16, 16), Image.Resampling.LANCZOS) + img = img.convert('L') + pixels = list(img.get_flattened_data()) + avg = sum(pixels) / len(pixels) + return ''.join(['1' if p > avg else '0' for p in pixels]) + except: + return None + + +def images_similarity(hash1, hash2): + """计算相似度""" + if not hash1 or not hash2: + return 0 + diff = sum(c1 != c2 for c1, c2 in zip(hash1, hash2)) + return 1 - diff / len(hash1) + + +def scroll_to_top(wechat_window): + """滚动到顶部""" + print("滚动到顶部...") + rect = wechat_window.BoundingRectangle + center_x = rect.left + 200 + center_y = rect.top + 400 + + # 点击获取焦点 + auto.SetCursorPos(center_x, center_y) + auto.Click(center_x, center_y) + time.sleep(0.3) + + # 多次向上滚动确保到顶部 + for _ in range(20): + auto.SendKeys('{Home}') + time.sleep(0.2) + + # 额外滚动几次PageUp + for _ in range(5): + auto.SendKeys('{PageUp}') + time.sleep(0.3) + + print("已滚动到顶部") + time.sleep(1) + + +def scroll_down(wechat_window): + """向下滚动一页""" + try: + rect = wechat_window.BoundingRectangle + center_x = rect.left + 200 + center_y = rect.top + 400 + + auto.SetCursorPos(center_x, center_y) + auto.Click(center_x, center_y) + time.sleep(0.2) + auto.WheelDown(wheelTimes=3) + time.sleep(0.4) + return True + except: + return False + + +def main(): + print("=" * 60) + print("完整抓取微信通讯录") + print("=" * 60) + + # 创建新的截图目录 + save_dir = r"D:\夏骥\微信研究\scroll_full" + os.makedirs(save_dir, exist_ok=True) + + # 清理旧截图 + for f in os.listdir(save_dir): + if f.endswith('.png'): + os.remove(os.path.join(save_dir, f)) + + # 查找微信窗口 + print("\n查找微信窗口...") + wechat_window = find_wechat_window() + if not wechat_window: + print("未找到微信窗口!") + return + + print(f"找到微信窗口: {wechat_window.Name}") + + # 滚动到顶部 + print("\n请确保微信通讯录界面已打开...") + time.sleep(2) + scroll_to_top(wechat_window) + + # 开始截图 + print("\n开始滚动截图...") + screenshots = [] + last_hash = None + no_change = 0 + max_screenshots = 150 + + for i in range(max_screenshots): + path = capture_contact_region(wechat_window, i, save_dir) + if path: + screenshots.append(path) + print(f" 截图 {i+1}") + + # 检测是否到底 + current_hash = get_image_hash(path) + if last_hash: + sim = images_similarity(last_hash, current_hash) + if sim > 0.95: + no_change += 1 + if no_change >= 2: + print(f"\n检测到到底,共截图 {len(screenshots)} 张") + break + else: + no_change = 0 + last_hash = current_hash + + scroll_down(wechat_window) + + print(f"\n完成!共截图 {len(screenshots)} 张") + print(f"截图保存目录: {save_dir}") + + # 提示运行OCR + print("\n接下来请运行 batch_ocr_full.py 进行OCR识别") + + +if __name__ == '__main__': + main() diff --git a/scroll_full_continue.py b/scroll_full_continue.py new file mode 100644 index 0000000..d29276c --- /dev/null +++ b/scroll_full_continue.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +""" +继续截取微信通讯录 - 从当前位置继续滚动到底 +""" +import uiautomation as auto +import time +import os +from PIL import Image + + +def find_wechat_window(): + """查找微信窗口""" + wechat_window = auto.WindowControl(searchDepth=1, Name='微信') + if wechat_window.Exists(3, 1): + return wechat_window + return None + + +def capture_contact_region(wechat_window, index, save_dir): + """截取通讯录区域""" + rect = wechat_window.BoundingRectangle + + x_offset = 70 + y_offset = 130 + width = 280 + height = rect.height() - 160 + + screenshot_path = os.path.join(save_dir, f"region_{index:03d}.png") + + try: + bitmap = wechat_window.ToBitmap(x=x_offset, y=y_offset, width=width, height=height) + bitmap.ToFile(screenshot_path) + return screenshot_path + except Exception as e: + print(f"截图失败: {e}") + return None + + +def get_image_hash(image_path): + """计算图片哈希""" + try: + img = Image.open(image_path) + img = img.resize((16, 16), Image.Resampling.LANCZOS) + img = img.convert('L') + pixels = list(img.get_flattened_data()) + avg = sum(pixels) / len(pixels) + return ''.join(['1' if p > avg else '0' for p in pixels]) + except: + return None + + +def images_similarity(hash1, hash2): + """计算相似度""" + if not hash1 or not hash2: + return 0 + diff = sum(c1 != c2 for c1, c2 in zip(hash1, hash2)) + return 1 - diff / len(hash1) + + +def scroll_down(wechat_window): + """向下滚动一页""" + try: + rect = wechat_window.BoundingRectangle + center_x = rect.left + 200 + center_y = rect.top + 400 + + auto.SetCursorPos(center_x, center_y) + auto.Click(center_x, center_y) + time.sleep(0.2) + auto.WheelDown(wheelTimes=3) + time.sleep(0.4) + return True + except: + return False + + +def main(): + print("=" * 60) + print("继续截取微信通讯录") + print("=" * 60) + + # 使用已有的截图目录 + save_dir = r"D:\夏骥\微信研究\scroll_full" + + # 获取已有截图数量 + existing_files = [f for f in os.listdir(save_dir) if f.endswith('.png')] + start_index = len(existing_files) + + print(f"已有 {start_index} 张截图") + print(f"将从第 {start_index + 1} 张开始继续...") + + # 查找微信窗口 + print("\n查找微信窗口...") + wechat_window = find_wechat_window() + if not wechat_window: + print("未找到微信窗口!") + return + + print(f"找到微信窗口: {wechat_window.Name}") + + # 提示用户 + print("\n请确保微信通讯录界面已打开并滚动到之前的位置...") + time.sleep(3) + + # 开始截图 + print("\n开始继续截图...") + screenshots = [] + last_hash = None + no_change = 0 + max_screenshots = 100 + + for i in range(max_screenshots): + actual_index = start_index + i + path = capture_contact_region(wechat_window, actual_index, save_dir) + if path: + screenshots.append(path) + print(f" 截图 {actual_index + 1}") + + # 检测是否到底 + current_hash = get_image_hash(path) + if last_hash: + sim = images_similarity(last_hash, current_hash) + if sim > 0.95: + no_change += 1 + if no_change >= 2: + print(f"\n检测到到底,共新增截图 {len(screenshots)} 张") + break + else: + no_change = 0 + last_hash = current_hash + + scroll_down(wechat_window) + + print(f"\n完成!新增 {len(screenshots)} 张截图") + print(f"总截图数: {start_index + len(screenshots)}") + print(f"截图保存目录: {save_dir}") + + # 提示运行OCR + print("\n接下来请运行 batch_ocr_full.py 进行OCR识别") + + +if __name__ == '__main__': + main() diff --git a/static/contacts_manager.html b/static/contacts_manager.html new file mode 100644 index 0000000..13c58a6 --- /dev/null +++ b/static/contacts_manager.html @@ -0,0 +1,842 @@ + + + + + + 微信联系人祝福管理 + + + + + +
+
+

微信联系人祝福管理

+

点击姓名/分类/祝福语可编辑,勾选发送选择框

+
+ + +
+
+
0
+
总联系人
+
+
+
0
+
已选择
+
+
+ + +
+
+
+ 新增联系人 + +
+
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
+
+
+
+
+ + + + + +
+ + + + + + + + + + +
+ + +
+ + + + + + + + + + + + + + + + + + + +
ID姓名搜索姓名分类自定义内容祝福语发送操作
加载中...
+
+ + + +
+ + + + + + + + + + + +
+ +
+ + + + + diff --git a/update_search_name.py b/update_search_name.py new file mode 100644 index 0000000..e3ec78a --- /dev/null +++ b/update_search_name.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +""" +清理 search_name 字段:移除特殊符号、英文单词、数字、表情符号 +""" +import sqlite3 +import re + +DB_PATH = r"D:\夏骥\微信研究\contacts.db" + + +def clean_search_name(name): + """清理搜索姓名:只保留中文""" + if not name: + return '' + + # 只保留中文字符 + cleaned = re.sub(r'[^\u4e00-\u9fa5]', '', name) + + return cleaned.strip() + + +def main(): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + # 更新所有记录 + cursor.execute('SELECT id, search_name FROM contacts') + rows = cursor.fetchall() + updated = 0 + for row in rows: + id_, search_name = row + cleaned = clean_search_name(search_name) + cursor.execute('UPDATE contacts SET search_name = ? WHERE id = ?', (cleaned, id_)) + updated += 1 + + conn.commit() + print(f'已更新 {updated} 条记录') + + # 显示示例 + cursor.execute('SELECT id, name, search_name FROM contacts LIMIT 20') + print('\n示例数据:') + for row in cursor.fetchall(): + print(f' {row[0]}: name="{row[1]}" -> search_name="{row[2]}"') + + conn.close() + print('\n完成!') + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/wechat_controls_output.txt b/wechat_controls_output.txt new file mode 100644 index 0000000..d6d69b0 --- /dev/null +++ b/wechat_controls_output.txt @@ -0,0 +1,8 @@ +================================================================================ +微信窗口控件树 - 标准UIAutomation +================================================================================ + +微信窗口: Name=微信, ClassName=Qt51514QWindowIcon + +ControlType: WindowControl ClassName: Qt51514QWindowIcon Name: 微信 AutomationId: Rect: (-3,15,1044,1033)[1047x1018] Depth: 0 + ControlType: PaneControl ClassName: MMUIRenderSubWindowHW Name: MMUIRenderSubWindowHW AutomationId: Rect: (5,15,1036,1025)[1031x1010] Depth: 1 diff --git a/wechat_controls_raw.txt b/wechat_controls_raw.txt new file mode 100644 index 0000000..d1d2cd2 --- /dev/null +++ b/wechat_controls_raw.txt @@ -0,0 +1,6 @@ +================================================================================ +微信窗口控件树 - RawViewWalker +================================================================================ + +ControlType: 窗口 ClassName: Qt51514QWindowIcon Name: 微信 AutomationId: Rect: (-3,15,1044,1033) Depth: 0 + ControlType: 窗格 ClassName: MMUIRenderSubWindowHW Name: MMUIRenderSubWindowHW AutomationId: Rect: (5,15,1036,1025) Depth: 1 diff --git a/wechat_controls_win32.txt b/wechat_controls_win32.txt new file mode 100644 index 0000000..46a0d8f --- /dev/null +++ b/wechat_controls_win32.txt @@ -0,0 +1,6 @@ +================================================================================ +微信窗口控件树 - Win32 API +================================================================================ + +HWND: 0x401BE ClassName: Qt51514QWindowIcon Title: 微信 Rect: (-3, 15, 1044, 1033) Depth: 0 + HWND: 0x1109FE ClassName: MMUIRenderSubWindowHW Title: MMUIRenderSubWindowHW Rect: (5, 15, 1036, 1025) Depth: 1