更新今日数据
This commit is contained in:
@@ -946,11 +946,46 @@ class IntegratedProductSystem:
|
||||
|
||||
# 查找匹配的URL
|
||||
matched_url = None
|
||||
|
||||
# 将产品名称转换为URL友好格式
|
||||
import re
|
||||
# 移除特殊字符,替换空格、点号为连字符
|
||||
url_friendly_name = original_name.lower()
|
||||
# 移除常见特殊字符
|
||||
url_friendly_name = re.sub(r'[^a-zA-Z0-9\s.-]', '', url_friendly_name)
|
||||
# 将空格、点号替换为连字符
|
||||
url_friendly_name = re.sub(r'[\s.]+', '-', url_friendly_name)
|
||||
# 移除多余的连字符
|
||||
url_friendly_name = re.sub(r'-+', '-', url_friendly_name).strip('-')
|
||||
|
||||
logger.debug(f"URL友好名称: {url_friendly_name}")
|
||||
|
||||
# 尝试多种匹配方式
|
||||
for url in tophub_urls:
|
||||
# 简单的匹配逻辑:如果产品名称在URL中出现
|
||||
if original_name.lower() in url.lower():
|
||||
url_lower = url.lower()
|
||||
|
||||
# 方式1: URL友好名称完全匹配URL路径中的产品部分
|
||||
if url_friendly_name in url_lower:
|
||||
matched_url = url
|
||||
logger.debug(f"匹配方式1成功: {url}")
|
||||
break
|
||||
|
||||
# 方式2: 检查URL是否包含产品名称的主要部分(按连字符分割)
|
||||
name_parts = url_friendly_name.split('-')
|
||||
# 如果名称包含至少2个部分,检查前两个部分是否都在URL中
|
||||
if len(name_parts) >= 2:
|
||||
if name_parts[0] in url_lower and name_parts[1] in url_lower:
|
||||
matched_url = url
|
||||
logger.debug(f"匹配方式2成功: {url}")
|
||||
break
|
||||
|
||||
# 方式3: 检查产品名称中的主要单词是否在URL中(针对较长名称)
|
||||
if len(name_parts) > 3:
|
||||
# 检查前3个主要部分
|
||||
if all(part in url_lower for part in name_parts[:3]):
|
||||
matched_url = url
|
||||
logger.debug(f"匹配方式3成功: {url}")
|
||||
break
|
||||
|
||||
if matched_url:
|
||||
# 更新product_link字段
|
||||
|
||||
Reference in New Issue
Block a user