更新今日的数据内容以及更新readme的内容

This commit is contained in:
2025-12-05 18:11:59 +08:00
parent deea6764cf
commit afeb00ccc4
12 changed files with 10620 additions and 15490 deletions

View File

@@ -732,7 +732,7 @@ class IntegratedProductSystem:
logger.info("数据库连接已关闭")
def analyze_follower_counts(self):
"""分析并更新产品的关注数"""
"""分析并更新产品的关注数仅当follows字段为空或不存在时更新"""
logger.info("=== 开始分析产品关注数 ===")
conn = None
@@ -741,12 +741,14 @@ class IntegratedProductSystem:
conn = self.connect_to_database()
cursor = conn.cursor()
# 查询所有产品及其对应的分析记录
# 查询所有产品及其对应的分析记录仅包括follows字段为空或不存在的记录
cursor.execute("""
SELECT p.id, p.name, p.user_count, pa.id as analysis_id
SELECT p.id, p.name, p.user_count, pa.id as analysis_id, pa.follows
FROM products p
LEFT JOIN product_analysis pa ON p.name = pa.original_name
WHERE p.user_count IS NOT NULL AND p.user_count != ''
AND pa.id IS NOT NULL
AND (pa.follows IS NULL OR pa.follows = '')
""")
products = cursor.fetchall()
@@ -758,7 +760,7 @@ class IntegratedProductSystem:
# 为每个产品转换user_count并更新到product_analysis.follows
updated_count = 0
for i, (product_id, name, user_count, analysis_id) in enumerate(products, 1):
for i, (product_id, name, user_count, analysis_id, current_follows) in enumerate(products, 1):
logger.info(f"处理产品关注数 {i}/{len(products)}: {name}, 用户数: {user_count}")
if not analysis_id:
@@ -893,8 +895,90 @@ class IntegratedProductSystem:
conn.close()
logger.info("数据库连接已关闭")
def fill_missing_product_links(self):
"""检查product_analysis表中的product_link字段是否为空如果为空则从tophub_data.db补全"""
logger.info("=== 开始补全缺失的product_link字段 ===")
# 检查tophub_data.db是否存在
tophub_db_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tophub_data.db")
if not os.path.exists(tophub_db_path):
logger.error(f"tophub_data.db不存在: {tophub_db_path}")
return
conn_product = None
conn_tophub = None
try:
# 连接两个数据库
conn_product = self.connect_to_database()
cursor_product = conn_product.cursor()
conn_tophub = sqlite3.connect(tophub_db_path)
cursor_tophub = conn_tophub.cursor()
logger.success(f"成功连接到tophub_data.db: {tophub_db_path}")
# 查询product_link为空的记录
cursor_product.execute("""
SELECT id, original_name
FROM product_analysis
WHERE product_link IS NULL OR product_link = ''
""")
missing_link_records = cursor_product.fetchall()
logger.info(f"找到 {len(missing_link_records)} 条product_link为空的记录需要补全")
if not missing_link_records:
logger.info("没有发现需要补全product_link的记录")
return
# 获取tophub_data.db中的所有producthunt链接
cursor_tophub.execute("SELECT url FROM articles WHERE url LIKE '%producthunt.com%'")
tophub_urls = [row[0] for row in cursor_tophub.fetchall()]
logger.info(f"从tophub_data.db获取到 {len(tophub_urls)} 个producthunt链接")
if not tophub_urls:
logger.error("从tophub_data.db中没有找到producthunt链接")
return
# 为每个缺失product_link的记录查找匹配的URL
updated_count = 0
for i, (analysis_id, original_name) in enumerate(missing_link_records, 1):
logger.info(f"处理记录 {i}/{len(missing_link_records)}: {original_name}")
# 查找匹配的URL
matched_url = None
for url in tophub_urls:
# 简单的匹配逻辑如果产品名称在URL中出现
if original_name.lower() in url.lower():
matched_url = url
break
if matched_url:
# 更新product_link字段
cursor_product.execute("""
UPDATE product_analysis
SET product_link = ?
WHERE id = ?
""", (matched_url, analysis_id))
conn_product.commit()
updated_count += 1
logger.success(f"成功为产品 '{original_name}' 补全链接: {matched_url}")
else:
logger.warning(f"无法为产品 '{original_name}' 找到匹配的链接")
logger.success(f"product_link补全完成! 成功更新 {updated_count} 条记录")
except Exception as e:
logger.error(f"补全product_link过程中出错: {e}")
finally:
# 关闭数据库连接
if conn_product:
conn_product.close()
if conn_tophub:
conn_tophub.close()
logger.info("数据库连接已关闭")
async def run_full_workflow_async(self, max_products=None, analyze_only=False):
"""异步运行完整工作流程:抓取+分析+补充缺失分数+更新关注数+重新分析无效难度评分"""
"""异步运行完整工作流程:抓取+分析+补充缺失分数+更新关注数+重新分析无效难度评分+补全product_link"""
logger.info("=== 开始全功能产品系统工作流程 ===")
# 初始化数据库
@@ -923,6 +1007,10 @@ class IntegratedProductSystem:
logger.info("步骤5: 开始重新分析invalid难度评分...")
self.reanalyze_invalid_difficulty_scores()
# 步骤6: 补全缺失的product_link字段
logger.info("步骤6: 开始补全缺失的product_link字段...")
self.fill_missing_product_links()
logger.success("=== 全功能产品系统工作流程完成 ===")
def run_full_workflow(self, max_products=None, analyze_only=False):