Files
guba-indicator/llm_analyzer.py

163 lines
5.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
大模型分析模块 - 调用LLM API分析评论情感
"""
import json
import time
import re
from typing import Dict, Optional, Tuple
from openai import OpenAI, OpenAIError
class LLMAnalyzer:
"""大模型情感分析器"""
SYSTEM_PROMPT = """你是一个专业的情感分析助手。你的任务是分析股吧/论坛评论的情感倾向,判断投资者对该股票的态度。
评分规则:
- 0-30: 极度悲观/看空(利空、暴跌、绝望等情绪)
- 31-50: 偏悲观/中性(担忧、谨慎、观望等情绪)
- 51-70: 偏乐观/中性(看好、希望、期待等情绪)
- 71-100: 极度乐观/看涨(利好、暴涨、兴奋等情绪)
请直接输出一个JSON格式的结果包含两个字段
- score: 0-100的整数评分
- label: 简短的态度描述(如"强烈看跌""谨慎观望""温和看涨""强烈看涨"等)
注意:
1. 只返回JSON不要有其他文字
2. 如果无法判断返回50和"无法判断"
3. 分析要客观,不要被表面文字迷惑
"""
def __init__(self, config: Dict):
self.config = config
self.base_url = config.get('base_url', 'https://api.openai.com/v1')
self.api_key = config.get('api_key', '')
self.model = config.get('model', 'gpt-3.5-turbo')
self.timeout = config.get('timeout', 30)
self.retry_times = config.get('retry_times', 3)
self.client = None
if self.api_key:
self._init_client()
def _init_client(self):
"""初始化OpenAI客户端"""
try:
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url,
timeout=self.timeout
)
except Exception as e:
print(f"初始化LLM客户端失败: {e}")
def update_config(self, config: Dict):
"""更新配置"""
self.config.update(config)
self.base_url = config.get('base_url', self.base_url)
self.api_key = config.get('api_key', self.api_key)
self.model = config.get('model', self.model)
self.timeout = config.get('timeout', self.timeout)
self.retry_times = config.get('retry_times', self.retry_times)
if self.api_key:
self._init_client()
def analyze(self, comment: str) -> Tuple[Optional[int], Optional[str]]:
"""
分析单条评论
返回 (score, label)
"""
if not self.client:
return None, "LLM未配置"
if not comment or not comment.strip():
return None, "评论为空"
for attempt in range(self.retry_times):
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": self.SYSTEM_PROMPT},
{"role": "user", "content": f"请分析以下评论的情感倾向:\n\n{comment}"}
],
temperature=0.3,
max_tokens=200
)
result_text = response.choices[0].message.content.strip()
score, label = self._parse_response(result_text)
if score is not None:
return score, label
except OpenAIError as e:
print(f"API调用失败 (尝试 {attempt + 1}/{self.retry_times}): {e}")
if attempt < self.retry_times - 1:
time.sleep(2 ** attempt) # 指数退避
except Exception as e:
print(f"分析过程出错: {e}")
break
return None, "分析失败"
def _parse_response(self, response: str) -> Tuple[Optional[int], Optional[str]]:
"""解析LLM返回的结果"""
try:
# 尝试直接解析JSON
result = json.loads(response)
score = result.get('score', 50)
label = result.get('label', '无法判断')
# 验证分数范围
score = max(0, min(100, int(score)))
return score, label
except json.JSONDecodeError:
# 尝试从文本中提取
pass
# 尝试从文本中提取数字
numbers = re.findall(r'\b(\d{1,3})\b', response)
if numbers:
score = int(numbers[0])
score = max(0, min(100, score))
# 提取标签
label_match = re.search(r'["']([^"']+)["']', response)
if label_match:
label = label_match.group(1)
else:
label = response.split('\n')[0][:20] if response else '无法判断'
return score, label
return None, "解析失败"
def analyze_batch(self, comments: list, delay: float = 1.0) -> list:
"""
批量分析评论
delay: 每次调用之间的延迟(秒)
"""
results = []
for i, comment in enumerate(comments):
print(f"分析评论 {i + 1}/{len(comments)}...")
score, label = self.analyze(comment)
results.append({
'content': comment,
'score': score,
'label': label
})
if delay > 0 and i < len(comments) - 1:
time.sleep(delay)
return results
def is_configured(self) -> bool:
"""检查是否已配置"""
return bool(self.client and self.api_key)