102 lines
3.7 KiB
Python
102 lines
3.7 KiB
Python
|
|
# 调用智谱的api,修改每一个项目的分类
|
|||
|
|
# 从db文件读取表,读取第二个项,标题,根据标题,提交到api,获取回复,返回,并更新到db文件
|
|||
|
|
|
|||
|
|
import sqlite3
|
|||
|
|
import time
|
|||
|
|
from loguru import logger
|
|||
|
|
from zhipuai import ZhipuAI
|
|||
|
|
|
|||
|
|
# 配置日志
|
|||
|
|
logger.add("db_modify_zhipu.log", rotation="10 MB", level="INFO")
|
|||
|
|
|
|||
|
|
# 初始化客户端
|
|||
|
|
client = ZhipuAI(api_key="fad3d9f9a45f4d939f0e7a7133fa07bf.X4bOO053GAIPKLE5")
|
|||
|
|
|
|||
|
|
def get_simplified_category(title):
|
|||
|
|
"""
|
|||
|
|
调用智谱API获取简化的分类名称
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
# 创建聊天完成请求
|
|||
|
|
response = client.chat.completions.create(
|
|||
|
|
model="glm-4-flash",
|
|||
|
|
messages=[
|
|||
|
|
{
|
|||
|
|
"role": "system",
|
|||
|
|
"content": "你是一个专业的分类助手。请根据文章标题,提供一个3-6个汉字的简化分类名称,去除空格和特殊符号,更容易理解,并保持原意。"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"role": "user",
|
|||
|
|
"content": f"对以下文字内容进行分类,返回结果为类别,如\"社会新闻\",\"机器人\",\"金融\",\"历史\",\"购物\",\"新质生产力\"等等。目的:只返回2-6个汉字,不返回其它内容。内容:'{title}'"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
temperature=0.7
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 提取回复内容
|
|||
|
|
category = response.choices[0].message.content.strip()
|
|||
|
|
logger.info(f"标题: {title[:30]}... -> 分类: {category}")
|
|||
|
|
return category
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"获取分类失败: {str(e)}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def update_database_categories():
|
|||
|
|
"""
|
|||
|
|
更新数据库中的分类信息
|
|||
|
|
"""
|
|||
|
|
# 连接到数据库
|
|||
|
|
conn = sqlite3.connect('tophub_data.db')
|
|||
|
|
cursor = conn.cursor()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 获取所有记录
|
|||
|
|
cursor.execute("SELECT id, title, category FROM articles")
|
|||
|
|
records = cursor.fetchall()
|
|||
|
|
|
|||
|
|
logger.info(f"共找到 {len(records)} 条记录需要处理")
|
|||
|
|
|
|||
|
|
updated_count = 0
|
|||
|
|
failed_count = 0
|
|||
|
|
|
|||
|
|
# 处理每条记录
|
|||
|
|
for record in records:
|
|||
|
|
record_id, title, current_category = record
|
|||
|
|
|
|||
|
|
# 跳过已经简化的分类(长度<=6且不包含特殊字符)
|
|||
|
|
if current_category and len(current_category) <= 6 and not any(c in current_category for c in " ,.!?;:,。!?;:"):
|
|||
|
|
logger.info(f"跳过记录 {record_id},分类已简化: {current_category}")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
logger.info(f"处理记录 {record_id}: {title[:30]}...")
|
|||
|
|
|
|||
|
|
# 获取新的分类
|
|||
|
|
new_category = get_simplified_category(title)
|
|||
|
|
|
|||
|
|
if new_category:
|
|||
|
|
# 更新数据库
|
|||
|
|
cursor.execute("UPDATE articles SET category = ? WHERE id = ?", (new_category, record_id))
|
|||
|
|
conn.commit()
|
|||
|
|
updated_count += 1
|
|||
|
|
logger.info(f"已更新记录 {record_id} 的分类为: {new_category}")
|
|||
|
|
else:
|
|||
|
|
failed_count += 1
|
|||
|
|
logger.error(f"无法获取记录 {record_id} 的新分类")
|
|||
|
|
|
|||
|
|
# 添加延迟,避免API调用过于频繁
|
|||
|
|
time.sleep(1)
|
|||
|
|
|
|||
|
|
logger.info(f"处理完成! 成功更新 {updated_count} 条记录,失败 {failed_count} 条记录")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"更新数据库时出错: {str(e)}")
|
|||
|
|
conn.rollback()
|
|||
|
|
finally:
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
logger.info("开始更新数据库分类...")
|
|||
|
|
update_database_categories()
|
|||
|
|
logger.info("程序执行完成")
|