今日更新数据

2025-12-04 21:27:40 +08:00
parent 9e20d439bf
commit deea6764cf
11 changed files with 19856 additions and 31946 deletions
--- a/product/fill_follows_field.py
+++ b/product/fill_follows_field.py
@@ -1,118 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-填补product_analysis表中follows字段内容的脚本
-用于将products表中的user_count转换为数字并更新到product_analysis.follows字段
-"""
-
-import sqlite3
-import os
-import sys
-from loguru import logger
-
-# 配置日志
-logger.remove()
-logger.add(sys.stderr, level="INFO", format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>")
-
-class FollowsFiller:
-    """用于填补follows字段内容的类"""
-    
-    def __init__(self, db_path):
-        self.db_path = db_path
-        self.api_url = "http://localhost:11434/api/generate"
-    
-    def connect_to_database(self) -> sqlite3.Connection:
-        """连接到SQLite数据库"""
-        try:
-            conn = sqlite3.connect(self.db_path)
-            logger.success(f"成功连接到数据库: {self.db_path}")
-            return conn
-        except Exception as e:
-            logger.error(f"连接数据库失败: {e}")
-            raise
-    
-    def check_table_structure(self) -> bool:
-        """检查数据库表结构是否正确"""
-        logger.info("正在检查数据库表结构...")
-        
-        conn = self.connect_to_database()
-        cursor = conn.cursor()
-        
-        try:
-            # 检查products表是否存在
-            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='products'")
-            if not cursor.fetchone():
-                logger.error("products表不存在")
-                return False
-            
-            # 检查product_analysis表是否存在
-            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='product_analysis'")
-            if not cursor.fetchone():
-                logger.error("product_analysis表不存在")
-                return False
-            
-            # 检查product_analysis表是否有follows字段
-            cursor.execute("PRAGMA table_info(product_analysis)")
-            columns = [col[1] for col in cursor.fetchall()]
-            if 'follows' not in columns:
-                logger.error("product_analysis表没有follows字段")
-                return False
-            
-            logger.success("数据库表结构检查通过")
-            return True
-        finally:
-            conn.close()
-    
-    def convert_user_count_to_number(self, user_count: str) -> int:
-        """将user_count文本转换为数字
-        
-        Args:
-            user_count: 用户数量文本，如"53 followers"或"1.9K followers"
-            
-        Returns:
-            转换后的数字
-        """
-        if not user_count or user_count.strip() == "":
-            logger.info(f"空的用户数量: {user_count}")
-            return None
-        
-        try:
-            # 移除多余空格和"followers"等文本
-            import re
-            cleaned = re.sub(r'\s*followers?\s*$', '', user_count.strip(), flags=re.IGNORECASE)
-            
-            # 处理K/M等单位
-            if cleaned.endswith('K') or cleaned.endswith('k'):
-                return int(float(cleaned[:-1]) * 1000)
-            elif cleaned.endswith('M') or cleaned.endswith('m'):
-                return int(float(cleaned[:-1]) * 1000000)
-            else:
-                # 直接转换为整数
-                return int(re.sub(r'[^\d]', '', cleaned))
-        except Exception as e:
-            logger.error(f"转换用户数量失败: {user_count}, 错误: {e}")
-            return None
-    
-    def fill_follows_field(self):
-        """填补product_analysis表中的follows字段内容"""
-        logger.info("=== 开始填补follows字段内容 ===")
-        
-        conn = self.connect_to_database()
-        cursor = conn.cursor()
-        
-        try:
-            # 查询所有产品及其对应的分析记录
-            cursor.execute("""
-                SELECT p.id, p.name, p.user_count, pa.id as analysis_id, pa.follows
-                FROM products p
-                LEFT JOIN product_analysis pa ON p.name = pa.original_name
-                WHERE pa.id IS NOT NULL
-            """)
-            
-            products = cursor.fetchall()
-            logger.info(f"找到 {len(products)} 个产品及其分析记录")
-            
-            if not products:
-                logger.info("没有发现需要填补follows字段的记录")
-                return
-           
--- a/product/integrated_product_system.py
+++ b/product/integrated_product_system.py
@@ -794,8 +794,107 @@ class IntegratedProductSystem:
                conn.close()
                logger.info("数据库连接已关闭")
    
+    def reanalyze_invalid_difficulty_scores(self):
+        """重新分析difficulty_score为1的行，确保难度评分准确"""
+        logger.info("=== 开始重新分析无效难度评分 ===")
+        
+        conn = None
+        try:
+            # 连接数据库
+            conn = self.connect_to_database()
+            cursor = conn.cursor()
+            
+            # 查询difficulty_score为1的记录
+            cursor.execute("""
+                SELECT id, original_name, product_intro, development_difficulty, ai_response
+                FROM product_analysis
+                WHERE difficulty_score = 1
+            """)
+            
+            invalid_records = cursor.fetchall()
+            logger.info(f"找到 {len(invalid_records)} 条difficulty_score为1的记录需要重新分析")
+            
+            if not invalid_records:
+                logger.info("没有发现需要重新分析的无效难度评分记录")
+                return
+            
+            # 为每个无效记录重新分析难度
+            updated_count = 0
+            for i, (analysis_id, name, introduction, development_difficulty, ai_response) in enumerate(invalid_records, 1):
+                logger.info(f"重新分析记录 {i}/{len(invalid_records)}: {name}")
+                
+                # 调用AI API重新分析产品难度
+                logger.info(f"重新调用Ollama API分析产品难度: {name}")
+                
+                # 构建请求数据 - 使用Ollama API格式，专门用于难度分析
+                prompt = f"这个是【{name}】，简介内容是【{introduction}】。请重新分析这个产品的开发难度，特别是对于一个人加上AI辅助能否开发这个产品，请详细回答。返回的内容是产品名称/产品简介/开发难度。返回的例子一：notion/这个是笔记产品等等/一个人开发难度较高"
+                
+                data = {
+                    "model": "qwen3:8b",
+                    "prompt": prompt,
+                    "stream": False
+                }
+                
+                headers = {
+                    "Content-Type": "application/json"
+                }
+                
+                try:
+                    # 调用Ollama API
+                    response = requests.post(
+                        self.api_url,
+                        headers=headers,
+                        data=json.dumps(data, ensure_ascii=False),
+                        timeout=60
+                    )
+                    
+                    if response.status_code == 200:
+                        result = response.json()
+                        new_ai_response = result.get("response", "").strip()
+                        logger.success(f"成功重新分析产品 '{name}'")
+                        
+                        # 解析新的响应，获取难度分数
+                        _, new_difficulty, new_difficulty_score = self.parse_ai_response(new_ai_response)
+                        
+                        # 特别处理很难的情况，确保分数在70-90之间
+                        difficulty_lower = new_difficulty.lower()
+                        if any(keyword in difficulty_lower for keyword in ['高', '很难', '非常难', '复杂', '困难']):
+                            if new_difficulty_score < 70:
+                                new_difficulty_score = max(70, min(90, new_difficulty_score + 60))
+                                logger.info(f"调整很难产品的难度分数为: {new_difficulty_score} (70-90区间)")
+                        
+                        # 更新数据库记录
+                        cursor.execute("""
+                            UPDATE product_analysis 
+                            SET development_difficulty = ?, 
+                                difficulty_score = ?, 
+                                ai_response = ?
+                            WHERE id = ?
+                        """, (new_difficulty, new_difficulty_score, new_ai_response, analysis_id))
+                        
+                        conn.commit()
+                        updated_count += 1
+                        logger.success(f"成功更新产品 '{name}' 的难度分数为 {new_difficulty_score}")
+                    else:
+                        logger.error(f"API调用失败: {response.status_code}, {response.text}")
+                except Exception as e:
+                    logger.error(f"重新分析产品 '{name}' 失败: {e}")
+                
+                # 避免API调用过于频繁
+                if i < len(invalid_records):
+                    time.sleep(2)
+            
+            logger.success(f"无效难度评分重新分析完成! 成功更新 {updated_count} 条记录")
+            
+        except Exception as e:
+            logger.error(f"重新分析无效难度评分过程中出错: {e}")
+        finally:
+            if conn:
+                conn.close()
+                logger.info("数据库连接已关闭")
+    
    async def run_full_workflow_async(self, max_products=None, analyze_only=False):
-        """异步运行完整工作流程：抓取+分析+补充缺失分数+更新关注数"""
+        """异步运行完整工作流程：抓取+分析+补充缺失分数+更新关注数+重新分析无效难度评分"""
        logger.info("=== 开始全功能产品系统工作流程 ===")
        
        # 初始化数据库
@@ -820,6 +919,10 @@ class IntegratedProductSystem:
        logger.info("步骤4: 开始分析并更新产品关注数...")
        self.analyze_follower_counts()
        
+        # 步骤5: 重新分析invalid难度评分
+        logger.info("步骤5: 开始重新分析invalid难度评分...")
+        self.reanalyze_invalid_difficulty_scores()
+        
        logger.success("=== 全功能产品系统工作流程完成 ===")
    
    def run_full_workflow(self, max_products=None, analyze_only=False):
--- a/product/products.db
+++ b/product/products.db