Files
tophux_scrape/check_interested_values.py
xiaji 25da264413 第一次提交。
其中爬取是tophub_scraper.py
数据入库是 tophub_add_data_to_db.py
查看当前数据内容是 db_viewer.py
2025-11-09 17:20:44 +08:00

50 lines
1.8 KiB
Python

#!/usr/bin/env python3
import sqlite3
from loguru import logger
def check_interested_values():
"""检查is_interested字段的值范围"""
try:
# 连接数据库
conn = sqlite3.connect('tophub_data.db')
cursor = conn.cursor()
# 查询is_interested字段的最小值、最大值和平均值
cursor.execute("SELECT MIN(is_interested), MAX(is_interested), AVG(is_interested) FROM articles")
result = cursor.fetchone()
min_val, max_val, avg_val = result
logger.info(f"is_interested字段统计:")
logger.info(f" 最小值: {min_val}")
logger.info(f" 最大值: {max_val}")
logger.info(f" 平均值: {avg_val:.2f}")
# 查询不同值的分布
cursor.execute("SELECT is_interested, COUNT(*) FROM articles GROUP BY is_interested ORDER BY is_interested")
distribution = cursor.fetchall()
logger.info("\nis_interested值分布:")
for value, count in distribution:
logger.info(f" {value}: {count} 条记录")
# 查询一些示例记录
cursor.execute("SELECT id, title, is_interested FROM articles ORDER BY is_interested DESC LIMIT 5")
examples = cursor.fetchall()
logger.info("\n示例记录:")
for example in examples:
logger.info(f" ID: {example[0]}, 标题: {example[1][:30]}..., is_interested: {example[2]}")
conn.close()
return True
except sqlite3.Error as e:
logger.error(f"数据库操作出错: {str(e)}")
return False
except Exception as e:
logger.error(f"查询数据时出错: {str(e)}")
return False
if __name__ == "__main__":
logger.add("check_interested_values.log", rotation="10 MB", level="INFO")
check_interested_values()