增加follow字段
This commit is contained in:
42
.trae/documents/实现用户关注数转换功能.md
Normal file
42
.trae/documents/实现用户关注数转换功能.md
Normal file
@@ -0,0 +1,42 @@
|
||||
## 实现计划
|
||||
|
||||
### 1. 数据库结构更新
|
||||
- **修改`init_database`方法**:在`product_analysis`表中添加`follows`字段,用于存储转换后的用户关注数
|
||||
|
||||
### 2. 添加用户关注数转换方法
|
||||
- **创建`convert_user_count_to_number`方法**:使用Ollama API将`user_count`文本转换为数字
|
||||
- 处理不同格式:"53 followers" → 53,"1.9K followers" → 1900
|
||||
- 调用Ollama API进行智能转换
|
||||
- 返回转换后的数字
|
||||
|
||||
### 3. 集成到现有分析流程
|
||||
- **修改`get_product_data`方法**:在查询中包含`user_count`和`url`字段
|
||||
- **更新`analyze_products`方法**:
|
||||
- 扩展返回值处理,包含`user_count`和`url`
|
||||
- 在分析过程中调用转换方法处理关注数
|
||||
- 将转换后的数字传递给保存方法
|
||||
|
||||
### 4. 更新数据保存方法
|
||||
- **修改`save_analysis_result`方法**:添加`follows`参数,将转换后的关注数保存到数据库
|
||||
|
||||
### 5. 添加关注数分析更新功能
|
||||
- **创建`analyze_follower_counts`方法**:
|
||||
- 查询所有产品及其分析记录
|
||||
- 对每个产品转换`user_count`并更新`product_analysis.follows`
|
||||
- 处理已有分析记录的关注数更新
|
||||
|
||||
### 6. 完善工作流程
|
||||
- **更新`run_full_workflow_async`方法**:添加第4步,执行关注数分析更新
|
||||
|
||||
## 预期效果
|
||||
- 新的`product_analysis`表将包含`follows`字段,存储转换后的数字关注数
|
||||
- 新分析的产品将自动转换并保存关注数
|
||||
- 已有产品将通过额外步骤更新关注数
|
||||
- 使用Ollama API确保转换准确性
|
||||
|
||||
## 关键技术点
|
||||
- SQLite数据库表结构修改
|
||||
- Ollama API调用与结果解析
|
||||
- 文本到数字的智能转换
|
||||
- 现有代码的无缝集成
|
||||
- 批量数据处理与更新
|
||||
File diff suppressed because it is too large
Load Diff
118
product/fill_follows_field.py
Normal file
118
product/fill_follows_field.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
填补product_analysis表中follows字段内容的脚本
|
||||
用于将products表中的user_count转换为数字并更新到product_analysis.follows字段
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
|
||||
# 配置日志
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="INFO", format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>")
|
||||
|
||||
class FollowsFiller:
|
||||
"""用于填补follows字段内容的类"""
|
||||
|
||||
def __init__(self, db_path):
|
||||
self.db_path = db_path
|
||||
self.api_url = "http://localhost:11434/api/generate"
|
||||
|
||||
def connect_to_database(self) -> sqlite3.Connection:
|
||||
"""连接到SQLite数据库"""
|
||||
try:
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
logger.success(f"成功连接到数据库: {self.db_path}")
|
||||
return conn
|
||||
except Exception as e:
|
||||
logger.error(f"连接数据库失败: {e}")
|
||||
raise
|
||||
|
||||
def check_table_structure(self) -> bool:
|
||||
"""检查数据库表结构是否正确"""
|
||||
logger.info("正在检查数据库表结构...")
|
||||
|
||||
conn = self.connect_to_database()
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# 检查products表是否存在
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='products'")
|
||||
if not cursor.fetchone():
|
||||
logger.error("products表不存在")
|
||||
return False
|
||||
|
||||
# 检查product_analysis表是否存在
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='product_analysis'")
|
||||
if not cursor.fetchone():
|
||||
logger.error("product_analysis表不存在")
|
||||
return False
|
||||
|
||||
# 检查product_analysis表是否有follows字段
|
||||
cursor.execute("PRAGMA table_info(product_analysis)")
|
||||
columns = [col[1] for col in cursor.fetchall()]
|
||||
if 'follows' not in columns:
|
||||
logger.error("product_analysis表没有follows字段")
|
||||
return False
|
||||
|
||||
logger.success("数据库表结构检查通过")
|
||||
return True
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def convert_user_count_to_number(self, user_count: str) -> int:
|
||||
"""将user_count文本转换为数字
|
||||
|
||||
Args:
|
||||
user_count: 用户数量文本,如"53 followers"或"1.9K followers"
|
||||
|
||||
Returns:
|
||||
转换后的数字
|
||||
"""
|
||||
if not user_count or user_count.strip() == "":
|
||||
logger.info(f"空的用户数量: {user_count}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# 移除多余空格和"followers"等文本
|
||||
import re
|
||||
cleaned = re.sub(r'\s*followers?\s*$', '', user_count.strip(), flags=re.IGNORECASE)
|
||||
|
||||
# 处理K/M等单位
|
||||
if cleaned.endswith('K') or cleaned.endswith('k'):
|
||||
return int(float(cleaned[:-1]) * 1000)
|
||||
elif cleaned.endswith('M') or cleaned.endswith('m'):
|
||||
return int(float(cleaned[:-1]) * 1000000)
|
||||
else:
|
||||
# 直接转换为整数
|
||||
return int(re.sub(r'[^\d]', '', cleaned))
|
||||
except Exception as e:
|
||||
logger.error(f"转换用户数量失败: {user_count}, 错误: {e}")
|
||||
return None
|
||||
|
||||
def fill_follows_field(self):
|
||||
"""填补product_analysis表中的follows字段内容"""
|
||||
logger.info("=== 开始填补follows字段内容 ===")
|
||||
|
||||
conn = self.connect_to_database()
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# 查询所有产品及其对应的分析记录
|
||||
cursor.execute("""
|
||||
SELECT p.id, p.name, p.user_count, pa.id as analysis_id, pa.follows
|
||||
FROM products p
|
||||
LEFT JOIN product_analysis pa ON p.name = pa.original_name
|
||||
WHERE pa.id IS NOT NULL
|
||||
""")
|
||||
|
||||
products = cursor.fetchall()
|
||||
logger.info(f"找到 {len(products)} 个产品及其分析记录")
|
||||
|
||||
if not products:
|
||||
logger.info("没有发现需要填补follows字段的记录")
|
||||
return
|
||||
|
||||
@@ -112,10 +112,18 @@ class IntegratedProductSystem:
|
||||
ai_response TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
difficulty_score INTEGER,
|
||||
product_link TEXT
|
||||
product_link TEXT,
|
||||
follows INTEGER
|
||||
)
|
||||
''')
|
||||
|
||||
# 为现有表添加follows字段(如果不存在)
|
||||
cursor.execute("PRAGMA table_info(product_analysis)")
|
||||
columns = [col[1] for col in cursor.fetchall()]
|
||||
if 'follows' not in columns:
|
||||
cursor.execute("ALTER TABLE product_analysis ADD COLUMN follows INTEGER")
|
||||
logger.info("已为product_analysis表添加follows字段")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
logger.success("产品数据库初始化完成")
|
||||
@@ -262,9 +270,9 @@ class IntegratedProductSystem:
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 查询products表中的name和introduction字段
|
||||
# 查询products表中的id、name、introduction、user_count和url字段
|
||||
cursor.execute("""
|
||||
SELECT id, name, introduction
|
||||
SELECT id, name, introduction, user_count, url
|
||||
FROM products
|
||||
WHERE name IS NOT NULL AND introduction IS NOT NULL
|
||||
AND name != '' AND introduction != ''
|
||||
@@ -274,8 +282,8 @@ class IntegratedProductSystem:
|
||||
logger.info(f"从数据库获取到 {len(products)} 个产品")
|
||||
|
||||
# 显示前几个产品作为示例
|
||||
for i, (id, name, intro) in enumerate(products[:3], 1):
|
||||
logger.info(f"示例产品{i}: ID={id}, 名称='{name}', 简介='{intro[:50]}...'")
|
||||
for i, (id, name, intro, user_count, url) in enumerate(products[:3], 1):
|
||||
logger.info(f"示例产品{i}: ID={id}, 名称='{name}', 简介='{intro[:50]}...', 用户数='{user_count}', URL='{url}'")
|
||||
|
||||
return products
|
||||
|
||||
@@ -321,6 +329,64 @@ class IntegratedProductSystem:
|
||||
logger.error(f"调用Ollama AI API时出错: {e}")
|
||||
return None
|
||||
|
||||
def convert_user_count_to_number(self, user_count: str) -> Optional[int]:
|
||||
"""使用Ollama API将user_count文本转换为数字
|
||||
|
||||
Args:
|
||||
user_count: 用户数量文本,如"53 followers"或"1.9K followers"
|
||||
|
||||
Returns:
|
||||
转换后的数字,或None如果转换失败
|
||||
"""
|
||||
if not user_count or user_count.strip() == "":
|
||||
logger.info(f"空的用户数量: {user_count}")
|
||||
return None
|
||||
|
||||
try:
|
||||
logger.info(f"正在转换用户数量: {user_count}")
|
||||
|
||||
# 构建请求数据,专门用于用户数量转换
|
||||
prompt = f"请将以下用户数量文本转换为纯数字,不要包含任何其他内容:\n{user_count}\n\n转换规则:\n- 直接数字:如'53 followers' → 53\n- K表示千:如'1.9K followers' → 1900\n- M表示百万:如'2.5M followers' → 2500000\n- 只返回数字,不要添加任何单位或解释"
|
||||
|
||||
data = {
|
||||
"model": "qwen3:8b",
|
||||
"prompt": prompt,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# 调用Ollama API
|
||||
response = requests.post(
|
||||
self.api_url,
|
||||
headers=headers,
|
||||
data=json.dumps(data, ensure_ascii=False),
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
converted = result.get("response", "").strip()
|
||||
logger.success(f"成功转换用户数量: {user_count} → {converted}")
|
||||
|
||||
# 提取纯数字
|
||||
import re
|
||||
number_match = re.search(r'\d+(?:\.\d+)?', converted)
|
||||
if number_match:
|
||||
return int(float(number_match.group()))
|
||||
else:
|
||||
logger.error(f"无法从转换结果中提取数字: {converted}")
|
||||
return None
|
||||
else:
|
||||
logger.error(f"Ollama API调用失败: {response.status_code}, {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"转换用户数量时出错: {e}")
|
||||
return None
|
||||
|
||||
def parse_ai_response(self, response: str) -> Tuple[str, str, str, int]:
|
||||
"""解析AI响应内容,提取产品名称、简介、难度描述和难度分数"""
|
||||
try:
|
||||
@@ -398,8 +464,8 @@ class IntegratedProductSystem:
|
||||
def save_analysis_result(self, conn: sqlite3.Connection,
|
||||
original_name: str, difficulty: str,
|
||||
ai_response: str, difficulty_score: int = None,
|
||||
product_link: str = None):
|
||||
"""保存分析结果到数据库,包括难度分数和产品链接"""
|
||||
product_link: str = None, follows: int = None):
|
||||
"""保存分析结果到数据库,包括难度分数、产品链接和关注数"""
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
|
||||
@@ -409,12 +475,12 @@ class IntegratedProductSystem:
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO product_analysis
|
||||
(original_name, development_difficulty, difficulty_score, ai_response, product_link)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (original_name, difficulty, difficulty_score, ai_response, product_link))
|
||||
(original_name, development_difficulty, difficulty_score, ai_response, product_link, follows)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""", (original_name, difficulty, difficulty_score, ai_response, product_link, follows))
|
||||
|
||||
conn.commit()
|
||||
logger.success(f"保存分析结果成功: {original_name}, 难度分数: {difficulty_score}")
|
||||
logger.success(f"保存分析结果成功: {original_name}, 难度分数: {difficulty_score}, 关注数: {follows}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"保存分析结果失败: {e}")
|
||||
@@ -450,7 +516,7 @@ class IntegratedProductSystem:
|
||||
# 逐个分析产品
|
||||
success_count = 0
|
||||
skip_count = 0
|
||||
for i, (original_id, name, introduction) in enumerate(products_to_analyze, 1):
|
||||
for i, (original_id, name, introduction, user_count, url) in enumerate(products_to_analyze, 1):
|
||||
logger.info(f"\n分析进度: {i}/{len(products_to_analyze)} - {name}")
|
||||
|
||||
# 检查产品是否已存在
|
||||
@@ -462,7 +528,7 @@ class IntegratedProductSystem:
|
||||
# 显示API调用状态
|
||||
logger.info(f"正在提交API请求... 进度: {i}/{len(products_to_analyze)}")
|
||||
|
||||
# 调用AI API
|
||||
# 调用AI API分析产品
|
||||
ai_response = self.call_ollama_ai_api(name, introduction)
|
||||
|
||||
if ai_response:
|
||||
@@ -472,8 +538,13 @@ class IntegratedProductSystem:
|
||||
# 解析响应
|
||||
product_intro, difficulty, difficulty_score = self.parse_ai_response(ai_response)
|
||||
|
||||
# 保存结果(不再保存product_intro,避免与ai_response重复)
|
||||
self.save_analysis_result(conn, name, difficulty, ai_response, difficulty_score)
|
||||
# 转换用户关注数
|
||||
follows = None
|
||||
if user_count:
|
||||
follows = self.convert_user_count_to_number(user_count)
|
||||
|
||||
# 保存结果
|
||||
self.save_analysis_result(conn, name, difficulty, ai_response, difficulty_score, url, follows)
|
||||
success_count += 1
|
||||
|
||||
# 显示完成状态
|
||||
@@ -660,8 +731,71 @@ class IntegratedProductSystem:
|
||||
conn.close()
|
||||
logger.info("数据库连接已关闭")
|
||||
|
||||
def analyze_follower_counts(self):
|
||||
"""分析并更新产品的关注数"""
|
||||
logger.info("=== 开始分析产品关注数 ===")
|
||||
|
||||
conn = None
|
||||
try:
|
||||
# 连接数据库
|
||||
conn = self.connect_to_database()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 查询所有产品及其对应的分析记录
|
||||
cursor.execute("""
|
||||
SELECT p.id, p.name, p.user_count, pa.id as analysis_id
|
||||
FROM products p
|
||||
LEFT JOIN product_analysis pa ON p.name = pa.original_name
|
||||
WHERE p.user_count IS NOT NULL AND p.user_count != ''
|
||||
""")
|
||||
|
||||
products = cursor.fetchall()
|
||||
logger.info(f"找到 {len(products)} 个需要更新关注数的产品")
|
||||
|
||||
if not products:
|
||||
logger.info("没有发现需要更新关注数的产品")
|
||||
return
|
||||
|
||||
# 为每个产品转换user_count并更新到product_analysis.follows
|
||||
updated_count = 0
|
||||
for i, (product_id, name, user_count, analysis_id) in enumerate(products, 1):
|
||||
logger.info(f"处理产品关注数 {i}/{len(products)}: {name}, 用户数: {user_count}")
|
||||
|
||||
if not analysis_id:
|
||||
logger.info(f"产品 '{name}' 没有对应的分析记录,跳过")
|
||||
continue
|
||||
|
||||
# 转换用户关注数
|
||||
follows = self.convert_user_count_to_number(user_count)
|
||||
|
||||
# 更新关注数
|
||||
if follows is not None:
|
||||
cursor.execute("""
|
||||
UPDATE product_analysis
|
||||
SET follows = ?
|
||||
WHERE id = ?
|
||||
""", (follows, analysis_id))
|
||||
conn.commit()
|
||||
updated_count += 1
|
||||
logger.success(f"成功更新产品 '{name}' 的关注数为 {follows}")
|
||||
else:
|
||||
logger.warning(f"无法为产品 '{name}' 转换关注数")
|
||||
|
||||
# 避免API调用过于频繁
|
||||
if i < len(products):
|
||||
time.sleep(2)
|
||||
|
||||
logger.success(f"关注数分析完成! 成功更新 {updated_count} 个产品的关注数")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"分析关注数过程中出错: {e}")
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
logger.info("数据库连接已关闭")
|
||||
|
||||
async def run_full_workflow_async(self, max_products=None, analyze_only=False):
|
||||
"""异步运行完整工作流程:抓取+分析+补充缺失分数"""
|
||||
"""异步运行完整工作流程:抓取+分析+补充缺失分数+更新关注数"""
|
||||
logger.info("=== 开始全功能产品系统工作流程 ===")
|
||||
|
||||
# 初始化数据库
|
||||
@@ -682,6 +816,10 @@ class IntegratedProductSystem:
|
||||
logger.info("步骤3: 开始分析并补充缺失的难度分数...")
|
||||
self.analyze_missing_scores()
|
||||
|
||||
# 步骤4: 分析并更新产品关注数
|
||||
logger.info("步骤4: 开始分析并更新产品关注数...")
|
||||
self.analyze_follower_counts()
|
||||
|
||||
logger.success("=== 全功能产品系统工作流程完成 ===")
|
||||
|
||||
def run_full_workflow(self, max_products=None, analyze_only=False):
|
||||
@@ -723,11 +861,11 @@ async def main():
|
||||
chrome_bat_path = os.path.join(os.path.dirname(__file__), "start_chrome.bat")
|
||||
logger.info(f"正在运行Chrome启动脚本: {chrome_bat_path}")
|
||||
try:
|
||||
# 运行批处理程序,等待其完成
|
||||
subprocess.run([chrome_bat_path], check=True, shell=True)
|
||||
logger.success("Chrome启动脚本执行成功")
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Chrome启动脚本执行失败: {e}")
|
||||
# 异步运行批处理程序,不等待其完成
|
||||
subprocess.Popen([chrome_bat_path], shell=True)
|
||||
logger.success("Chrome启动脚本已启动")
|
||||
except Exception as e:
|
||||
logger.error(f"Chrome启动脚本启动失败: {e}")
|
||||
except FileNotFoundError:
|
||||
logger.error(f"未找到Chrome启动脚本: {chrome_bat_path}")
|
||||
|
||||
|
||||
Binary file not shown.
76
product/update_database.py
Normal file
76
product/update_database.py
Normal file
@@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
数据库更新脚本
|
||||
用于更新现有的product_analysis表,添加follows字段
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
from loguru import logger
|
||||
import sys
|
||||
|
||||
# 配置日志
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="INFO", format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>")
|
||||
|
||||
def update_product_analysis_table(db_path):
|
||||
"""更新product_analysis表,添加follows字段"""
|
||||
logger.info(f"开始更新数据库: {db_path}")
|
||||
|
||||
try:
|
||||
# 连接数据库
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 检查product_analysis表是否存在
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='product_analysis'")
|
||||
table_exists = cursor.fetchone() is not None
|
||||
|
||||
if not table_exists:
|
||||
logger.error("product_analysis表不存在,无法更新")
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
# 检查follows字段是否已经存在
|
||||
cursor.execute("PRAGMA table_info(product_analysis)")
|
||||
columns = [col[1] for col in cursor.fetchall()]
|
||||
|
||||
if 'follows' in columns:
|
||||
logger.info("follows字段已经存在,无需更新")
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
# 添加follows字段
|
||||
cursor.execute("ALTER TABLE product_analysis ADD COLUMN follows INTEGER")
|
||||
conn.commit()
|
||||
|
||||
logger.success("成功为product_analysis表添加follows字段")
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"更新数据库失败: {e}")
|
||||
if 'conn' in locals():
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
# 获取默认数据库路径
|
||||
default_db_path = os.path.join(os.path.dirname(__file__), "products.db")
|
||||
|
||||
logger.info("=== 数据库更新脚本开始执行 ===")
|
||||
|
||||
# 更新数据库
|
||||
success = update_product_analysis_table(default_db_path)
|
||||
|
||||
if success:
|
||||
logger.success("=== 数据库更新成功 ===")
|
||||
sys.exit(0)
|
||||
else:
|
||||
logger.error("=== 数据库更新失败 ===")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user