Compare commits

...

2 Commits

2 changed files with 30 additions and 13 deletions

View File

@@ -434,8 +434,8 @@ class DatabaseViewer(QMainWindow):
url = url_item.text() if url_item else ""
date = date_item.text() if date_item else ""
# 用空格组合信息
info = f"{title} {url} {date}".strip()
# 按照要求的格式组合信息:"日期 标题\n链接"
info = f"{date} {title}\n{url}".strip()
all_info.append(info)
# 将所有信息用换行符连接

View File

@@ -84,19 +84,36 @@ def parse_file_content(file_path):
return []
def check_duplicate(title, date_str):
"""检查标题+日期是否已存在"""
"""检查标题在最近三天(前天、昨天和今天)是否已存在"""
from datetime import datetime, timedelta
conn = sqlite3.connect('tophub_data.db')
cursor = conn.cursor()
cursor.execute('''
SELECT COUNT(*) FROM articles
WHERE title = ? AND source_date = ?
''', (title, date_str))
count = cursor.fetchone()[0]
conn.close()
return count > 0
try:
# 将输入日期字符串转换为datetime对象
current_date = datetime.strptime(date_str, '%Y-%m-%d')
# 计算前天、昨天和今天的日期
yesterday = current_date - timedelta(days=1)
day_before_yesterday = current_date - timedelta(days=2)
# 检查这三天内是否有相同标题的文章
cursor.execute('''
SELECT COUNT(*) FROM articles
WHERE title = ? AND source_date IN (?, ?, ?)
''', (title,
day_before_yesterday.strftime('%Y-%m-%d'),
yesterday.strftime('%Y-%m-%d'),
date_str))
count = cursor.fetchone()[0]
logger.info(f"检查标题 '{title}' 在最近三天的重复情况: 找到 {count} 条相同记录")
return count > 0
finally:
conn.close()
def classify_title(title):
"""调用API对标题进行分类"""
@@ -193,7 +210,7 @@ def process_temp_files():
# 检查重复
if check_duplicate(article['title'], source_date):
logger.info(f"跳过重复文章: {article['title']}")
logger.info(f"跳过重复文章(最近三天已存在): {article['title']}")
continue
# 分类标题