今日更新数据

This commit is contained in:
2025-12-04 21:27:40 +08:00
parent 9e20d439bf
commit deea6764cf
11 changed files with 19856 additions and 31946 deletions

5810
2025年12月4日203727.txt Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 592 KiB

File diff suppressed because it is too large Load Diff

View File

@@ -1,118 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
填补product_analysis表中follows字段内容的脚本
用于将products表中的user_count转换为数字并更新到product_analysis.follows字段
"""
import sqlite3
import os
import sys
from loguru import logger
# 配置日志
logger.remove()
logger.add(sys.stderr, level="INFO", format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>")
class FollowsFiller:
"""用于填补follows字段内容的类"""
def __init__(self, db_path):
self.db_path = db_path
self.api_url = "http://localhost:11434/api/generate"
def connect_to_database(self) -> sqlite3.Connection:
"""连接到SQLite数据库"""
try:
conn = sqlite3.connect(self.db_path)
logger.success(f"成功连接到数据库: {self.db_path}")
return conn
except Exception as e:
logger.error(f"连接数据库失败: {e}")
raise
def check_table_structure(self) -> bool:
"""检查数据库表结构是否正确"""
logger.info("正在检查数据库表结构...")
conn = self.connect_to_database()
cursor = conn.cursor()
try:
# 检查products表是否存在
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='products'")
if not cursor.fetchone():
logger.error("products表不存在")
return False
# 检查product_analysis表是否存在
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='product_analysis'")
if not cursor.fetchone():
logger.error("product_analysis表不存在")
return False
# 检查product_analysis表是否有follows字段
cursor.execute("PRAGMA table_info(product_analysis)")
columns = [col[1] for col in cursor.fetchall()]
if 'follows' not in columns:
logger.error("product_analysis表没有follows字段")
return False
logger.success("数据库表结构检查通过")
return True
finally:
conn.close()
def convert_user_count_to_number(self, user_count: str) -> int:
"""将user_count文本转换为数字
Args:
user_count: 用户数量文本,如"53 followers""1.9K followers"
Returns:
转换后的数字
"""
if not user_count or user_count.strip() == "":
logger.info(f"空的用户数量: {user_count}")
return None
try:
# 移除多余空格和"followers"等文本
import re
cleaned = re.sub(r'\s*followers?\s*$', '', user_count.strip(), flags=re.IGNORECASE)
# 处理K/M等单位
if cleaned.endswith('K') or cleaned.endswith('k'):
return int(float(cleaned[:-1]) * 1000)
elif cleaned.endswith('M') or cleaned.endswith('m'):
return int(float(cleaned[:-1]) * 1000000)
else:
# 直接转换为整数
return int(re.sub(r'[^\d]', '', cleaned))
except Exception as e:
logger.error(f"转换用户数量失败: {user_count}, 错误: {e}")
return None
def fill_follows_field(self):
"""填补product_analysis表中的follows字段内容"""
logger.info("=== 开始填补follows字段内容 ===")
conn = self.connect_to_database()
cursor = conn.cursor()
try:
# 查询所有产品及其对应的分析记录
cursor.execute("""
SELECT p.id, p.name, p.user_count, pa.id as analysis_id, pa.follows
FROM products p
LEFT JOIN product_analysis pa ON p.name = pa.original_name
WHERE pa.id IS NOT NULL
""")
products = cursor.fetchall()
logger.info(f"找到 {len(products)} 个产品及其分析记录")
if not products:
logger.info("没有发现需要填补follows字段的记录")
return

View File

@@ -794,8 +794,107 @@ class IntegratedProductSystem:
conn.close() conn.close()
logger.info("数据库连接已关闭") logger.info("数据库连接已关闭")
def reanalyze_invalid_difficulty_scores(self):
"""重新分析difficulty_score为1的行确保难度评分准确"""
logger.info("=== 开始重新分析无效难度评分 ===")
conn = None
try:
# 连接数据库
conn = self.connect_to_database()
cursor = conn.cursor()
# 查询difficulty_score为1的记录
cursor.execute("""
SELECT id, original_name, product_intro, development_difficulty, ai_response
FROM product_analysis
WHERE difficulty_score = 1
""")
invalid_records = cursor.fetchall()
logger.info(f"找到 {len(invalid_records)} 条difficulty_score为1的记录需要重新分析")
if not invalid_records:
logger.info("没有发现需要重新分析的无效难度评分记录")
return
# 为每个无效记录重新分析难度
updated_count = 0
for i, (analysis_id, name, introduction, development_difficulty, ai_response) in enumerate(invalid_records, 1):
logger.info(f"重新分析记录 {i}/{len(invalid_records)}: {name}")
# 调用AI API重新分析产品难度
logger.info(f"重新调用Ollama API分析产品难度: {name}")
# 构建请求数据 - 使用Ollama API格式专门用于难度分析
prompt = f"这个是【{name}】,简介内容是【{introduction}】。请重新分析这个产品的开发难度特别是对于一个人加上AI辅助能否开发这个产品请详细回答。返回的内容是产品名称/产品简介/开发难度。返回的例子一notion/这个是笔记产品等等/一个人开发难度较高"
data = {
"model": "qwen3:8b",
"prompt": prompt,
"stream": False
}
headers = {
"Content-Type": "application/json"
}
try:
# 调用Ollama API
response = requests.post(
self.api_url,
headers=headers,
data=json.dumps(data, ensure_ascii=False),
timeout=60
)
if response.status_code == 200:
result = response.json()
new_ai_response = result.get("response", "").strip()
logger.success(f"成功重新分析产品 '{name}'")
# 解析新的响应,获取难度分数
_, new_difficulty, new_difficulty_score = self.parse_ai_response(new_ai_response)
# 特别处理很难的情况确保分数在70-90之间
difficulty_lower = new_difficulty.lower()
if any(keyword in difficulty_lower for keyword in ['', '很难', '非常难', '复杂', '困难']):
if new_difficulty_score < 70:
new_difficulty_score = max(70, min(90, new_difficulty_score + 60))
logger.info(f"调整很难产品的难度分数为: {new_difficulty_score} (70-90区间)")
# 更新数据库记录
cursor.execute("""
UPDATE product_analysis
SET development_difficulty = ?,
difficulty_score = ?,
ai_response = ?
WHERE id = ?
""", (new_difficulty, new_difficulty_score, new_ai_response, analysis_id))
conn.commit()
updated_count += 1
logger.success(f"成功更新产品 '{name}' 的难度分数为 {new_difficulty_score}")
else:
logger.error(f"API调用失败: {response.status_code}, {response.text}")
except Exception as e:
logger.error(f"重新分析产品 '{name}' 失败: {e}")
# 避免API调用过于频繁
if i < len(invalid_records):
time.sleep(2)
logger.success(f"无效难度评分重新分析完成! 成功更新 {updated_count} 条记录")
except Exception as e:
logger.error(f"重新分析无效难度评分过程中出错: {e}")
finally:
if conn:
conn.close()
logger.info("数据库连接已关闭")
async def run_full_workflow_async(self, max_products=None, analyze_only=False): async def run_full_workflow_async(self, max_products=None, analyze_only=False):
"""异步运行完整工作流程:抓取+分析+补充缺失分数+更新关注数""" """异步运行完整工作流程:抓取+分析+补充缺失分数+更新关注数+重新分析无效难度评分"""
logger.info("=== 开始全功能产品系统工作流程 ===") logger.info("=== 开始全功能产品系统工作流程 ===")
# 初始化数据库 # 初始化数据库
@@ -820,6 +919,10 @@ class IntegratedProductSystem:
logger.info("步骤4: 开始分析并更新产品关注数...") logger.info("步骤4: 开始分析并更新产品关注数...")
self.analyze_follower_counts() self.analyze_follower_counts()
# 步骤5: 重新分析invalid难度评分
logger.info("步骤5: 开始重新分析invalid难度评分...")
self.reanalyze_invalid_difficulty_scores()
logger.success("=== 全功能产品系统工作流程完成 ===") logger.success("=== 全功能产品系统工作流程完成 ===")
def run_full_workflow(self, max_products=None, analyze_only=False): def run_full_workflow(self, max_products=None, analyze_only=False):

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 354 KiB

After

Width:  |  Height:  |  Size: 374 KiB

View File

@@ -1,11 +1,11 @@
=== Product Hunt 产品信息 === === Product Hunt 产品信息 ===
产品名称: QuickWidgets 产品名称: Monifi
产品简介: The Quickwidgets is lightweight and diverse functions widgets 产品简介: An Excel alternative to manage your personal finances. Track income, expenses, and savings without spreadsheets. Free budgeting app with real-time reports and smart categorization.
制作人发言: 未获取 制作人发言:
用户数: 37 followers 用户数: 32 followers
提取时间: 2025-12-03 18:53:22 提取时间: 2025-12-04 21:05:56

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff