更新今日的数据内容以及更新readme的内容
This commit is contained in:
@@ -732,7 +732,7 @@ class IntegratedProductSystem:
|
||||
logger.info("数据库连接已关闭")
|
||||
|
||||
def analyze_follower_counts(self):
|
||||
"""分析并更新产品的关注数"""
|
||||
"""分析并更新产品的关注数,仅当follows字段为空或不存在时更新"""
|
||||
logger.info("=== 开始分析产品关注数 ===")
|
||||
|
||||
conn = None
|
||||
@@ -741,12 +741,14 @@ class IntegratedProductSystem:
|
||||
conn = self.connect_to_database()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 查询所有产品及其对应的分析记录
|
||||
# 查询所有产品及其对应的分析记录,仅包括follows字段为空或不存在的记录
|
||||
cursor.execute("""
|
||||
SELECT p.id, p.name, p.user_count, pa.id as analysis_id
|
||||
SELECT p.id, p.name, p.user_count, pa.id as analysis_id, pa.follows
|
||||
FROM products p
|
||||
LEFT JOIN product_analysis pa ON p.name = pa.original_name
|
||||
WHERE p.user_count IS NOT NULL AND p.user_count != ''
|
||||
AND pa.id IS NOT NULL
|
||||
AND (pa.follows IS NULL OR pa.follows = '')
|
||||
""")
|
||||
|
||||
products = cursor.fetchall()
|
||||
@@ -758,7 +760,7 @@ class IntegratedProductSystem:
|
||||
|
||||
# 为每个产品转换user_count并更新到product_analysis.follows
|
||||
updated_count = 0
|
||||
for i, (product_id, name, user_count, analysis_id) in enumerate(products, 1):
|
||||
for i, (product_id, name, user_count, analysis_id, current_follows) in enumerate(products, 1):
|
||||
logger.info(f"处理产品关注数 {i}/{len(products)}: {name}, 用户数: {user_count}")
|
||||
|
||||
if not analysis_id:
|
||||
@@ -893,8 +895,90 @@ class IntegratedProductSystem:
|
||||
conn.close()
|
||||
logger.info("数据库连接已关闭")
|
||||
|
||||
def fill_missing_product_links(self):
|
||||
"""检查product_analysis表中的product_link字段是否为空,如果为空则从tophub_data.db补全"""
|
||||
logger.info("=== 开始补全缺失的product_link字段 ===")
|
||||
|
||||
# 检查tophub_data.db是否存在
|
||||
tophub_db_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tophub_data.db")
|
||||
if not os.path.exists(tophub_db_path):
|
||||
logger.error(f"tophub_data.db不存在: {tophub_db_path}")
|
||||
return
|
||||
|
||||
conn_product = None
|
||||
conn_tophub = None
|
||||
try:
|
||||
# 连接两个数据库
|
||||
conn_product = self.connect_to_database()
|
||||
cursor_product = conn_product.cursor()
|
||||
|
||||
conn_tophub = sqlite3.connect(tophub_db_path)
|
||||
cursor_tophub = conn_tophub.cursor()
|
||||
logger.success(f"成功连接到tophub_data.db: {tophub_db_path}")
|
||||
|
||||
# 查询product_link为空的记录
|
||||
cursor_product.execute("""
|
||||
SELECT id, original_name
|
||||
FROM product_analysis
|
||||
WHERE product_link IS NULL OR product_link = ''
|
||||
""")
|
||||
|
||||
missing_link_records = cursor_product.fetchall()
|
||||
logger.info(f"找到 {len(missing_link_records)} 条product_link为空的记录需要补全")
|
||||
|
||||
if not missing_link_records:
|
||||
logger.info("没有发现需要补全product_link的记录")
|
||||
return
|
||||
|
||||
# 获取tophub_data.db中的所有producthunt链接
|
||||
cursor_tophub.execute("SELECT url FROM articles WHERE url LIKE '%producthunt.com%'")
|
||||
tophub_urls = [row[0] for row in cursor_tophub.fetchall()]
|
||||
logger.info(f"从tophub_data.db获取到 {len(tophub_urls)} 个producthunt链接")
|
||||
|
||||
if not tophub_urls:
|
||||
logger.error("从tophub_data.db中没有找到producthunt链接")
|
||||
return
|
||||
|
||||
# 为每个缺失product_link的记录查找匹配的URL
|
||||
updated_count = 0
|
||||
for i, (analysis_id, original_name) in enumerate(missing_link_records, 1):
|
||||
logger.info(f"处理记录 {i}/{len(missing_link_records)}: {original_name}")
|
||||
|
||||
# 查找匹配的URL
|
||||
matched_url = None
|
||||
for url in tophub_urls:
|
||||
# 简单的匹配逻辑:如果产品名称在URL中出现
|
||||
if original_name.lower() in url.lower():
|
||||
matched_url = url
|
||||
break
|
||||
|
||||
if matched_url:
|
||||
# 更新product_link字段
|
||||
cursor_product.execute("""
|
||||
UPDATE product_analysis
|
||||
SET product_link = ?
|
||||
WHERE id = ?
|
||||
""", (matched_url, analysis_id))
|
||||
conn_product.commit()
|
||||
updated_count += 1
|
||||
logger.success(f"成功为产品 '{original_name}' 补全链接: {matched_url}")
|
||||
else:
|
||||
logger.warning(f"无法为产品 '{original_name}' 找到匹配的链接")
|
||||
|
||||
logger.success(f"product_link补全完成! 成功更新 {updated_count} 条记录")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"补全product_link过程中出错: {e}")
|
||||
finally:
|
||||
# 关闭数据库连接
|
||||
if conn_product:
|
||||
conn_product.close()
|
||||
if conn_tophub:
|
||||
conn_tophub.close()
|
||||
logger.info("数据库连接已关闭")
|
||||
|
||||
async def run_full_workflow_async(self, max_products=None, analyze_only=False):
|
||||
"""异步运行完整工作流程:抓取+分析+补充缺失分数+更新关注数+重新分析无效难度评分"""
|
||||
"""异步运行完整工作流程:抓取+分析+补充缺失分数+更新关注数+重新分析无效难度评分+补全product_link"""
|
||||
logger.info("=== 开始全功能产品系统工作流程 ===")
|
||||
|
||||
# 初始化数据库
|
||||
@@ -923,6 +1007,10 @@ class IntegratedProductSystem:
|
||||
logger.info("步骤5: 开始重新分析invalid难度评分...")
|
||||
self.reanalyze_invalid_difficulty_scores()
|
||||
|
||||
# 步骤6: 补全缺失的product_link字段
|
||||
logger.info("步骤6: 开始补全缺失的product_link字段...")
|
||||
self.fill_missing_product_links()
|
||||
|
||||
logger.success("=== 全功能产品系统工作流程完成 ===")
|
||||
|
||||
def run_full_workflow(self, max_products=None, analyze_only=False):
|
||||
|
||||
Reference in New Issue
Block a user