102 lines
3.7 KiB
Python
102 lines
3.7 KiB
Python
# 调用智谱的api,修改每一个项目的分类
|
||
# 从db文件读取表,读取第二个项,标题,根据标题,提交到api,获取回复,返回,并更新到db文件
|
||
|
||
import sqlite3
|
||
import time
|
||
from loguru import logger
|
||
from zhipuai import ZhipuAI
|
||
|
||
# 配置日志
|
||
logger.add("db_modify_zhipu.log", rotation="10 MB", level="INFO")
|
||
|
||
# 初始化客户端
|
||
client = ZhipuAI(api_key="fad3d9f9a45f4d939f0e7a7133fa07bf.X4bOO053GAIPKLE5")
|
||
|
||
def get_simplified_category(title):
|
||
"""
|
||
调用智谱API获取简化的分类名称
|
||
"""
|
||
try:
|
||
# 创建聊天完成请求
|
||
response = client.chat.completions.create(
|
||
model="glm-4-flash",
|
||
messages=[
|
||
{
|
||
"role": "system",
|
||
"content": "你是一个专业的分类助手。请根据文章标题,提供一个3-6个汉字的简化分类名称,去除空格和特殊符号,更容易理解,并保持原意。"
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": f"对以下文字内容进行分类,返回结果为类别,如\"社会新闻\",\"机器人\",\"金融\",\"历史\",\"购物\",\"新质生产力\"等等。目的:只返回2-6个汉字,不返回其它内容。内容:'{title}'"
|
||
}
|
||
],
|
||
temperature=0.7
|
||
)
|
||
|
||
# 提取回复内容
|
||
category = response.choices[0].message.content.strip()
|
||
logger.info(f"标题: {title[:30]}... -> 分类: {category}")
|
||
return category
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取分类失败: {str(e)}")
|
||
return None
|
||
|
||
def update_database_categories():
|
||
"""
|
||
更新数据库中的分类信息
|
||
"""
|
||
# 连接到数据库
|
||
conn = sqlite3.connect('tophub_data.db')
|
||
cursor = conn.cursor()
|
||
|
||
try:
|
||
# 获取所有记录
|
||
cursor.execute("SELECT id, title, category FROM articles")
|
||
records = cursor.fetchall()
|
||
|
||
logger.info(f"共找到 {len(records)} 条记录需要处理")
|
||
|
||
updated_count = 0
|
||
failed_count = 0
|
||
|
||
# 处理每条记录
|
||
for record in records:
|
||
record_id, title, current_category = record
|
||
|
||
# 跳过已经简化的分类(长度<=6且不包含特殊字符)
|
||
if current_category and len(current_category) <= 6 and not any(c in current_category for c in " ,.!?;:,。!?;:"):
|
||
logger.info(f"跳过记录 {record_id},分类已简化: {current_category}")
|
||
continue
|
||
|
||
logger.info(f"处理记录 {record_id}: {title[:30]}...")
|
||
|
||
# 获取新的分类
|
||
new_category = get_simplified_category(title)
|
||
|
||
if new_category:
|
||
# 更新数据库
|
||
cursor.execute("UPDATE articles SET category = ? WHERE id = ?", (new_category, record_id))
|
||
conn.commit()
|
||
updated_count += 1
|
||
logger.info(f"已更新记录 {record_id} 的分类为: {new_category}")
|
||
else:
|
||
failed_count += 1
|
||
logger.error(f"无法获取记录 {record_id} 的新分类")
|
||
|
||
# 添加延迟,避免API调用过于频繁
|
||
time.sleep(1)
|
||
|
||
logger.info(f"处理完成! 成功更新 {updated_count} 条记录,失败 {failed_count} 条记录")
|
||
|
||
except Exception as e:
|
||
logger.error(f"更新数据库时出错: {str(e)}")
|
||
conn.rollback()
|
||
finally:
|
||
conn.close()
|
||
|
||
if __name__ == "__main__":
|
||
logger.info("开始更新数据库分类...")
|
||
update_database_categories()
|
||
logger.info("程序执行完成")
|