118 lines
4.3 KiB
Python
118 lines
4.3 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
填补product_analysis表中follows字段内容的脚本
|
|||
|
|
用于将products表中的user_count转换为数字并更新到product_analysis.follows字段
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import sqlite3
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
from loguru import logger
|
|||
|
|
|
|||
|
|
# 配置日志
|
|||
|
|
logger.remove()
|
|||
|
|
logger.add(sys.stderr, level="INFO", format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>")
|
|||
|
|
|
|||
|
|
class FollowsFiller:
|
|||
|
|
"""用于填补follows字段内容的类"""
|
|||
|
|
|
|||
|
|
def __init__(self, db_path):
|
|||
|
|
self.db_path = db_path
|
|||
|
|
self.api_url = "http://localhost:11434/api/generate"
|
|||
|
|
|
|||
|
|
def connect_to_database(self) -> sqlite3.Connection:
|
|||
|
|
"""连接到SQLite数据库"""
|
|||
|
|
try:
|
|||
|
|
conn = sqlite3.connect(self.db_path)
|
|||
|
|
logger.success(f"成功连接到数据库: {self.db_path}")
|
|||
|
|
return conn
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"连接数据库失败: {e}")
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def check_table_structure(self) -> bool:
|
|||
|
|
"""检查数据库表结构是否正确"""
|
|||
|
|
logger.info("正在检查数据库表结构...")
|
|||
|
|
|
|||
|
|
conn = self.connect_to_database()
|
|||
|
|
cursor = conn.cursor()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 检查products表是否存在
|
|||
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='products'")
|
|||
|
|
if not cursor.fetchone():
|
|||
|
|
logger.error("products表不存在")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
# 检查product_analysis表是否存在
|
|||
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='product_analysis'")
|
|||
|
|
if not cursor.fetchone():
|
|||
|
|
logger.error("product_analysis表不存在")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
# 检查product_analysis表是否有follows字段
|
|||
|
|
cursor.execute("PRAGMA table_info(product_analysis)")
|
|||
|
|
columns = [col[1] for col in cursor.fetchall()]
|
|||
|
|
if 'follows' not in columns:
|
|||
|
|
logger.error("product_analysis表没有follows字段")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
logger.success("数据库表结构检查通过")
|
|||
|
|
return True
|
|||
|
|
finally:
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
def convert_user_count_to_number(self, user_count: str) -> int:
|
|||
|
|
"""将user_count文本转换为数字
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
user_count: 用户数量文本,如"53 followers"或"1.9K followers"
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
转换后的数字
|
|||
|
|
"""
|
|||
|
|
if not user_count or user_count.strip() == "":
|
|||
|
|
logger.info(f"空的用户数量: {user_count}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 移除多余空格和"followers"等文本
|
|||
|
|
import re
|
|||
|
|
cleaned = re.sub(r'\s*followers?\s*$', '', user_count.strip(), flags=re.IGNORECASE)
|
|||
|
|
|
|||
|
|
# 处理K/M等单位
|
|||
|
|
if cleaned.endswith('K') or cleaned.endswith('k'):
|
|||
|
|
return int(float(cleaned[:-1]) * 1000)
|
|||
|
|
elif cleaned.endswith('M') or cleaned.endswith('m'):
|
|||
|
|
return int(float(cleaned[:-1]) * 1000000)
|
|||
|
|
else:
|
|||
|
|
# 直接转换为整数
|
|||
|
|
return int(re.sub(r'[^\d]', '', cleaned))
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"转换用户数量失败: {user_count}, 错误: {e}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def fill_follows_field(self):
|
|||
|
|
"""填补product_analysis表中的follows字段内容"""
|
|||
|
|
logger.info("=== 开始填补follows字段内容 ===")
|
|||
|
|
|
|||
|
|
conn = self.connect_to_database()
|
|||
|
|
cursor = conn.cursor()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 查询所有产品及其对应的分析记录
|
|||
|
|
cursor.execute("""
|
|||
|
|
SELECT p.id, p.name, p.user_count, pa.id as analysis_id, pa.follows
|
|||
|
|
FROM products p
|
|||
|
|
LEFT JOIN product_analysis pa ON p.name = pa.original_name
|
|||
|
|
WHERE pa.id IS NOT NULL
|
|||
|
|
""")
|
|||
|
|
|
|||
|
|
products = cursor.fetchall()
|
|||
|
|
logger.info(f"找到 {len(products)} 个产品及其分析记录")
|
|||
|
|
|
|||
|
|
if not products:
|
|||
|
|
logger.info("没有发现需要填补follows字段的记录")
|
|||
|
|
return
|
|||
|
|
|