Files
weixin-holiday-message/dedupe_contacts.py

56 lines
1.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
根据 search_name 去重,只保留一条记录
"""
import sqlite3
DB_PATH = r"D:\夏骥\微信研究\contacts.db"
def main():
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# 1. 查找重复的 search_name
cursor.execute('''
SELECT search_name, COUNT(*) as cnt
FROM contacts
WHERE search_name != ''
GROUP BY search_name
HAVING COUNT(*) > 1
ORDER BY cnt DESC
''')
duplicates = cursor.fetchall()
print(f'发现 {len(duplicates)} 个重复的搜索姓名')
# 2. 对每个重复的 search_name只保留 id 最小的一条
deleted_count = 0
for search_name, cnt in duplicates:
# 获取该 search_name 的所有 id
cursor.execute('''
SELECT id FROM contacts
WHERE search_name = ?
ORDER BY id
''', (search_name,))
ids = [row[0] for row in cursor.fetchall()]
# 保留第一个,删除其他的
keep_id = ids[0]
delete_ids = ids[1:]
if delete_ids:
placeholders = ','.join('?' * len(delete_ids))
cursor.execute(f'DELETE FROM contacts WHERE id IN ({placeholders})', delete_ids)
deleted_count += len(delete_ids)
print(f' "{search_name}": 保留 id={keep_id}, 删除 {len(delete_ids)}')
conn.commit()
conn.close()
print(f'\n共删除 {deleted_count} 条重复记录')
if __name__ == '__main__':
main()