Files
weixin-holiday-message/dedupe_contacts.py

56 lines
1.5 KiB
Python
Raw Permalink Normal View History

# -*- coding: utf-8 -*-
"""
根据 search_name 去重只保留一条记录
"""
import sqlite3
DB_PATH = r"D:\夏骥\微信研究\contacts.db"
def main():
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# 1. 查找重复的 search_name
cursor.execute('''
SELECT search_name, COUNT(*) as cnt
FROM contacts
WHERE search_name != ''
GROUP BY search_name
HAVING COUNT(*) > 1
ORDER BY cnt DESC
''')
duplicates = cursor.fetchall()
print(f'发现 {len(duplicates)} 个重复的搜索姓名')
# 2. 对每个重复的 search_name只保留 id 最小的一条
deleted_count = 0
for search_name, cnt in duplicates:
# 获取该 search_name 的所有 id
cursor.execute('''
SELECT id FROM contacts
WHERE search_name = ?
ORDER BY id
''', (search_name,))
ids = [row[0] for row in cursor.fetchall()]
# 保留第一个,删除其他的
keep_id = ids[0]
delete_ids = ids[1:]
if delete_ids:
placeholders = ','.join('?' * len(delete_ids))
cursor.execute(f'DELETE FROM contacts WHERE id IN ({placeholders})', delete_ids)
deleted_count += len(delete_ids)
print(f' "{search_name}": 保留 id={keep_id}, 删除 {len(delete_ids)}')
conn.commit()
conn.close()
print(f'\n共删除 {deleted_count} 条重复记录')
if __name__ == '__main__':
main()