第一次提交。
其中爬取是tophub_scraper.py 数据入库是 tophub_add_data_to_db.py 查看当前数据内容是 db_viewer.py
This commit is contained in:
50
check_interested_values.py
Normal file
50
check_interested_values.py
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python3
|
||||
import sqlite3
|
||||
from loguru import logger
|
||||
|
||||
def check_interested_values():
|
||||
"""检查is_interested字段的值范围"""
|
||||
try:
|
||||
# 连接数据库
|
||||
conn = sqlite3.connect('tophub_data.db')
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 查询is_interested字段的最小值、最大值和平均值
|
||||
cursor.execute("SELECT MIN(is_interested), MAX(is_interested), AVG(is_interested) FROM articles")
|
||||
result = cursor.fetchone()
|
||||
min_val, max_val, avg_val = result
|
||||
|
||||
logger.info(f"is_interested字段统计:")
|
||||
logger.info(f" 最小值: {min_val}")
|
||||
logger.info(f" 最大值: {max_val}")
|
||||
logger.info(f" 平均值: {avg_val:.2f}")
|
||||
|
||||
# 查询不同值的分布
|
||||
cursor.execute("SELECT is_interested, COUNT(*) FROM articles GROUP BY is_interested ORDER BY is_interested")
|
||||
distribution = cursor.fetchall()
|
||||
|
||||
logger.info("\nis_interested值分布:")
|
||||
for value, count in distribution:
|
||||
logger.info(f" {value}: {count} 条记录")
|
||||
|
||||
# 查询一些示例记录
|
||||
cursor.execute("SELECT id, title, is_interested FROM articles ORDER BY is_interested DESC LIMIT 5")
|
||||
examples = cursor.fetchall()
|
||||
|
||||
logger.info("\n示例记录:")
|
||||
for example in examples:
|
||||
logger.info(f" ID: {example[0]}, 标题: {example[1][:30]}..., is_interested: {example[2]}")
|
||||
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
except sqlite3.Error as e:
|
||||
logger.error(f"数据库操作出错: {str(e)}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"查询数据时出错: {str(e)}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.add("check_interested_values.log", rotation="10 MB", level="INFO")
|
||||
check_interested_values()
|
||||
Reference in New Issue
Block a user