今日更新数据
This commit is contained in:
5810
2025年12月4日203727.txt
Normal file
5810
2025年12月4日203727.txt
Normal file
File diff suppressed because it is too large
Load Diff
BIN
debug_maker_link_failure.png
Normal file
BIN
debug_maker_link_failure.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 592 KiB |
File diff suppressed because it is too large
Load Diff
@@ -1,118 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
填补product_analysis表中follows字段内容的脚本
|
|
||||||
用于将products表中的user_count转换为数字并更新到product_analysis.follows字段
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sqlite3
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
# 配置日志
|
|
||||||
logger.remove()
|
|
||||||
logger.add(sys.stderr, level="INFO", format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>")
|
|
||||||
|
|
||||||
class FollowsFiller:
|
|
||||||
"""用于填补follows字段内容的类"""
|
|
||||||
|
|
||||||
def __init__(self, db_path):
|
|
||||||
self.db_path = db_path
|
|
||||||
self.api_url = "http://localhost:11434/api/generate"
|
|
||||||
|
|
||||||
def connect_to_database(self) -> sqlite3.Connection:
|
|
||||||
"""连接到SQLite数据库"""
|
|
||||||
try:
|
|
||||||
conn = sqlite3.connect(self.db_path)
|
|
||||||
logger.success(f"成功连接到数据库: {self.db_path}")
|
|
||||||
return conn
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"连接数据库失败: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def check_table_structure(self) -> bool:
|
|
||||||
"""检查数据库表结构是否正确"""
|
|
||||||
logger.info("正在检查数据库表结构...")
|
|
||||||
|
|
||||||
conn = self.connect_to_database()
|
|
||||||
cursor = conn.cursor()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 检查products表是否存在
|
|
||||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='products'")
|
|
||||||
if not cursor.fetchone():
|
|
||||||
logger.error("products表不存在")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 检查product_analysis表是否存在
|
|
||||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='product_analysis'")
|
|
||||||
if not cursor.fetchone():
|
|
||||||
logger.error("product_analysis表不存在")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 检查product_analysis表是否有follows字段
|
|
||||||
cursor.execute("PRAGMA table_info(product_analysis)")
|
|
||||||
columns = [col[1] for col in cursor.fetchall()]
|
|
||||||
if 'follows' not in columns:
|
|
||||||
logger.error("product_analysis表没有follows字段")
|
|
||||||
return False
|
|
||||||
|
|
||||||
logger.success("数据库表结构检查通过")
|
|
||||||
return True
|
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
def convert_user_count_to_number(self, user_count: str) -> int:
|
|
||||||
"""将user_count文本转换为数字
|
|
||||||
|
|
||||||
Args:
|
|
||||||
user_count: 用户数量文本,如"53 followers"或"1.9K followers"
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
转换后的数字
|
|
||||||
"""
|
|
||||||
if not user_count or user_count.strip() == "":
|
|
||||||
logger.info(f"空的用户数量: {user_count}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 移除多余空格和"followers"等文本
|
|
||||||
import re
|
|
||||||
cleaned = re.sub(r'\s*followers?\s*$', '', user_count.strip(), flags=re.IGNORECASE)
|
|
||||||
|
|
||||||
# 处理K/M等单位
|
|
||||||
if cleaned.endswith('K') or cleaned.endswith('k'):
|
|
||||||
return int(float(cleaned[:-1]) * 1000)
|
|
||||||
elif cleaned.endswith('M') or cleaned.endswith('m'):
|
|
||||||
return int(float(cleaned[:-1]) * 1000000)
|
|
||||||
else:
|
|
||||||
# 直接转换为整数
|
|
||||||
return int(re.sub(r'[^\d]', '', cleaned))
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"转换用户数量失败: {user_count}, 错误: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def fill_follows_field(self):
|
|
||||||
"""填补product_analysis表中的follows字段内容"""
|
|
||||||
logger.info("=== 开始填补follows字段内容 ===")
|
|
||||||
|
|
||||||
conn = self.connect_to_database()
|
|
||||||
cursor = conn.cursor()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 查询所有产品及其对应的分析记录
|
|
||||||
cursor.execute("""
|
|
||||||
SELECT p.id, p.name, p.user_count, pa.id as analysis_id, pa.follows
|
|
||||||
FROM products p
|
|
||||||
LEFT JOIN product_analysis pa ON p.name = pa.original_name
|
|
||||||
WHERE pa.id IS NOT NULL
|
|
||||||
""")
|
|
||||||
|
|
||||||
products = cursor.fetchall()
|
|
||||||
logger.info(f"找到 {len(products)} 个产品及其分析记录")
|
|
||||||
|
|
||||||
if not products:
|
|
||||||
logger.info("没有发现需要填补follows字段的记录")
|
|
||||||
return
|
|
||||||
|
|
||||||
@@ -794,8 +794,107 @@ class IntegratedProductSystem:
|
|||||||
conn.close()
|
conn.close()
|
||||||
logger.info("数据库连接已关闭")
|
logger.info("数据库连接已关闭")
|
||||||
|
|
||||||
|
def reanalyze_invalid_difficulty_scores(self):
|
||||||
|
"""重新分析difficulty_score为1的行,确保难度评分准确"""
|
||||||
|
logger.info("=== 开始重新分析无效难度评分 ===")
|
||||||
|
|
||||||
|
conn = None
|
||||||
|
try:
|
||||||
|
# 连接数据库
|
||||||
|
conn = self.connect_to_database()
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# 查询difficulty_score为1的记录
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT id, original_name, product_intro, development_difficulty, ai_response
|
||||||
|
FROM product_analysis
|
||||||
|
WHERE difficulty_score = 1
|
||||||
|
""")
|
||||||
|
|
||||||
|
invalid_records = cursor.fetchall()
|
||||||
|
logger.info(f"找到 {len(invalid_records)} 条difficulty_score为1的记录需要重新分析")
|
||||||
|
|
||||||
|
if not invalid_records:
|
||||||
|
logger.info("没有发现需要重新分析的无效难度评分记录")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 为每个无效记录重新分析难度
|
||||||
|
updated_count = 0
|
||||||
|
for i, (analysis_id, name, introduction, development_difficulty, ai_response) in enumerate(invalid_records, 1):
|
||||||
|
logger.info(f"重新分析记录 {i}/{len(invalid_records)}: {name}")
|
||||||
|
|
||||||
|
# 调用AI API重新分析产品难度
|
||||||
|
logger.info(f"重新调用Ollama API分析产品难度: {name}")
|
||||||
|
|
||||||
|
# 构建请求数据 - 使用Ollama API格式,专门用于难度分析
|
||||||
|
prompt = f"这个是【{name}】,简介内容是【{introduction}】。请重新分析这个产品的开发难度,特别是对于一个人加上AI辅助能否开发这个产品,请详细回答。返回的内容是产品名称/产品简介/开发难度。返回的例子一:notion/这个是笔记产品等等/一个人开发难度较高"
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"model": "qwen3:8b",
|
||||||
|
"prompt": prompt,
|
||||||
|
"stream": False
|
||||||
|
}
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 调用Ollama API
|
||||||
|
response = requests.post(
|
||||||
|
self.api_url,
|
||||||
|
headers=headers,
|
||||||
|
data=json.dumps(data, ensure_ascii=False),
|
||||||
|
timeout=60
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
new_ai_response = result.get("response", "").strip()
|
||||||
|
logger.success(f"成功重新分析产品 '{name}'")
|
||||||
|
|
||||||
|
# 解析新的响应,获取难度分数
|
||||||
|
_, new_difficulty, new_difficulty_score = self.parse_ai_response(new_ai_response)
|
||||||
|
|
||||||
|
# 特别处理很难的情况,确保分数在70-90之间
|
||||||
|
difficulty_lower = new_difficulty.lower()
|
||||||
|
if any(keyword in difficulty_lower for keyword in ['高', '很难', '非常难', '复杂', '困难']):
|
||||||
|
if new_difficulty_score < 70:
|
||||||
|
new_difficulty_score = max(70, min(90, new_difficulty_score + 60))
|
||||||
|
logger.info(f"调整很难产品的难度分数为: {new_difficulty_score} (70-90区间)")
|
||||||
|
|
||||||
|
# 更新数据库记录
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE product_analysis
|
||||||
|
SET development_difficulty = ?,
|
||||||
|
difficulty_score = ?,
|
||||||
|
ai_response = ?
|
||||||
|
WHERE id = ?
|
||||||
|
""", (new_difficulty, new_difficulty_score, new_ai_response, analysis_id))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
updated_count += 1
|
||||||
|
logger.success(f"成功更新产品 '{name}' 的难度分数为 {new_difficulty_score}")
|
||||||
|
else:
|
||||||
|
logger.error(f"API调用失败: {response.status_code}, {response.text}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"重新分析产品 '{name}' 失败: {e}")
|
||||||
|
|
||||||
|
# 避免API调用过于频繁
|
||||||
|
if i < len(invalid_records):
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
logger.success(f"无效难度评分重新分析完成! 成功更新 {updated_count} 条记录")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"重新分析无效难度评分过程中出错: {e}")
|
||||||
|
finally:
|
||||||
|
if conn:
|
||||||
|
conn.close()
|
||||||
|
logger.info("数据库连接已关闭")
|
||||||
|
|
||||||
async def run_full_workflow_async(self, max_products=None, analyze_only=False):
|
async def run_full_workflow_async(self, max_products=None, analyze_only=False):
|
||||||
"""异步运行完整工作流程:抓取+分析+补充缺失分数+更新关注数"""
|
"""异步运行完整工作流程:抓取+分析+补充缺失分数+更新关注数+重新分析无效难度评分"""
|
||||||
logger.info("=== 开始全功能产品系统工作流程 ===")
|
logger.info("=== 开始全功能产品系统工作流程 ===")
|
||||||
|
|
||||||
# 初始化数据库
|
# 初始化数据库
|
||||||
@@ -820,6 +919,10 @@ class IntegratedProductSystem:
|
|||||||
logger.info("步骤4: 开始分析并更新产品关注数...")
|
logger.info("步骤4: 开始分析并更新产品关注数...")
|
||||||
self.analyze_follower_counts()
|
self.analyze_follower_counts()
|
||||||
|
|
||||||
|
# 步骤5: 重新分析invalid难度评分
|
||||||
|
logger.info("步骤5: 开始重新分析invalid难度评分...")
|
||||||
|
self.reanalyze_invalid_difficulty_scores()
|
||||||
|
|
||||||
logger.success("=== 全功能产品系统工作流程完成 ===")
|
logger.success("=== 全功能产品系统工作流程完成 ===")
|
||||||
|
|
||||||
def run_full_workflow(self, max_products=None, analyze_only=False):
|
def run_full_workflow(self, max_products=None, analyze_only=False):
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 354 KiB After Width: | Height: | Size: 374 KiB |
@@ -1,11 +1,11 @@
|
|||||||
=== Product Hunt 产品信息 ===
|
=== Product Hunt 产品信息 ===
|
||||||
|
|
||||||
产品名称: QuickWidgets
|
产品名称: Monifi
|
||||||
|
|
||||||
产品简介: The Quickwidgets is lightweight and diverse functions widgets
|
产品简介: An Excel alternative to manage your personal finances. Track income, expenses, and savings without spreadsheets. Free budgeting app with real-time reports and smart categorization.
|
||||||
|
|
||||||
制作人发言: 未获取
|
制作人发言:
|
||||||
|
|
||||||
用户数: 37 followers
|
用户数: 32 followers
|
||||||
|
|
||||||
提取时间: 2025-12-03 18:53:22
|
提取时间: 2025-12-04 21:05:56
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
BIN
tophub_data.db
BIN
tophub_data.db
Binary file not shown.
1099
tophub_scraper.log
1099
tophub_scraper.log
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user