#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 填补product_analysis表中follows字段内容的脚本 用于将products表中的user_count转换为数字并更新到product_analysis.follows字段 """ import sqlite3 import os import sys from loguru import logger # 配置日志 logger.remove() logger.add(sys.stderr, level="INFO", format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}") class FollowsFiller: """用于填补follows字段内容的类""" def __init__(self, db_path): self.db_path = db_path self.api_url = "http://localhost:11434/api/generate" def connect_to_database(self) -> sqlite3.Connection: """连接到SQLite数据库""" try: conn = sqlite3.connect(self.db_path) logger.success(f"成功连接到数据库: {self.db_path}") return conn except Exception as e: logger.error(f"连接数据库失败: {e}") raise def check_table_structure(self) -> bool: """检查数据库表结构是否正确""" logger.info("正在检查数据库表结构...") conn = self.connect_to_database() cursor = conn.cursor() try: # 检查products表是否存在 cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='products'") if not cursor.fetchone(): logger.error("products表不存在") return False # 检查product_analysis表是否存在 cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='product_analysis'") if not cursor.fetchone(): logger.error("product_analysis表不存在") return False # 检查product_analysis表是否有follows字段 cursor.execute("PRAGMA table_info(product_analysis)") columns = [col[1] for col in cursor.fetchall()] if 'follows' not in columns: logger.error("product_analysis表没有follows字段") return False logger.success("数据库表结构检查通过") return True finally: conn.close() def convert_user_count_to_number(self, user_count: str) -> int: """将user_count文本转换为数字 Args: user_count: 用户数量文本,如"53 followers"或"1.9K followers" Returns: 转换后的数字 """ if not user_count or user_count.strip() == "": logger.info(f"空的用户数量: {user_count}") return None try: # 移除多余空格和"followers"等文本 import re cleaned = re.sub(r'\s*followers?\s*$', '', user_count.strip(), flags=re.IGNORECASE) # 处理K/M等单位 if cleaned.endswith('K') or cleaned.endswith('k'): return int(float(cleaned[:-1]) * 1000) elif cleaned.endswith('M') or cleaned.endswith('m'): return int(float(cleaned[:-1]) * 1000000) else: # 直接转换为整数 return int(re.sub(r'[^\d]', '', cleaned)) except Exception as e: logger.error(f"转换用户数量失败: {user_count}, 错误: {e}") return None def fill_follows_field(self): """填补product_analysis表中的follows字段内容""" logger.info("=== 开始填补follows字段内容 ===") conn = self.connect_to_database() cursor = conn.cursor() try: # 查询所有产品及其对应的分析记录 cursor.execute(""" SELECT p.id, p.name, p.user_count, pa.id as analysis_id, pa.follows FROM products p LEFT JOIN product_analysis pa ON p.name = pa.original_name WHERE pa.id IS NOT NULL """) products = cursor.fetchall() logger.info(f"找到 {len(products)} 个产品及其分析记录") if not products: logger.info("没有发现需要填补follows字段的记录") return