Initial commit: 股吧人气指示器 - PySide6桌面悬浮工具
This commit is contained in:
163
llm_analyzer.py
Normal file
163
llm_analyzer.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""
|
||||
大模型分析模块 - 调用LLM API分析评论情感
|
||||
"""
|
||||
import json
|
||||
import time
|
||||
import re
|
||||
from typing import Dict, Optional, Tuple
|
||||
from openai import OpenAI, OpenAIError
|
||||
|
||||
|
||||
class LLMAnalyzer:
|
||||
"""大模型情感分析器"""
|
||||
|
||||
SYSTEM_PROMPT = """你是一个专业的情感分析助手。你的任务是分析股吧/论坛评论的情感倾向,判断投资者对该股票的态度。
|
||||
|
||||
评分规则:
|
||||
- 0-30: 极度悲观/看空(利空、暴跌、绝望等情绪)
|
||||
- 31-50: 偏悲观/中性(担忧、谨慎、观望等情绪)
|
||||
- 51-70: 偏乐观/中性(看好、希望、期待等情绪)
|
||||
- 71-100: 极度乐观/看涨(利好、暴涨、兴奋等情绪)
|
||||
|
||||
请直接输出一个JSON格式的结果,包含两个字段:
|
||||
- score: 0-100的整数评分
|
||||
- label: 简短的态度描述(如"强烈看跌"、"谨慎观望"、"温和看涨"、"强烈看涨"等)
|
||||
|
||||
注意:
|
||||
1. 只返回JSON,不要有其他文字
|
||||
2. 如果无法判断,返回50和"无法判断"
|
||||
3. 分析要客观,不要被表面文字迷惑
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict):
|
||||
self.config = config
|
||||
self.base_url = config.get('base_url', 'https://api.openai.com/v1')
|
||||
self.api_key = config.get('api_key', '')
|
||||
self.model = config.get('model', 'gpt-3.5-turbo')
|
||||
self.timeout = config.get('timeout', 30)
|
||||
self.retry_times = config.get('retry_times', 3)
|
||||
|
||||
self.client = None
|
||||
if self.api_key:
|
||||
self._init_client()
|
||||
|
||||
def _init_client(self):
|
||||
"""初始化OpenAI客户端"""
|
||||
try:
|
||||
self.client = OpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url,
|
||||
timeout=self.timeout
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"初始化LLM客户端失败: {e}")
|
||||
|
||||
def update_config(self, config: Dict):
|
||||
"""更新配置"""
|
||||
self.config.update(config)
|
||||
self.base_url = config.get('base_url', self.base_url)
|
||||
self.api_key = config.get('api_key', self.api_key)
|
||||
self.model = config.get('model', self.model)
|
||||
self.timeout = config.get('timeout', self.timeout)
|
||||
self.retry_times = config.get('retry_times', self.retry_times)
|
||||
|
||||
if self.api_key:
|
||||
self._init_client()
|
||||
|
||||
def analyze(self, comment: str) -> Tuple[Optional[int], Optional[str]]:
|
||||
"""
|
||||
分析单条评论
|
||||
返回 (score, label)
|
||||
"""
|
||||
if not self.client:
|
||||
return None, "LLM未配置"
|
||||
|
||||
if not comment or not comment.strip():
|
||||
return None, "评论为空"
|
||||
|
||||
for attempt in range(self.retry_times):
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": self.SYSTEM_PROMPT},
|
||||
{"role": "user", "content": f"请分析以下评论的情感倾向:\n\n{comment}"}
|
||||
],
|
||||
temperature=0.3,
|
||||
max_tokens=200
|
||||
)
|
||||
|
||||
result_text = response.choices[0].message.content.strip()
|
||||
score, label = self._parse_response(result_text)
|
||||
|
||||
if score is not None:
|
||||
return score, label
|
||||
|
||||
except OpenAIError as e:
|
||||
print(f"API调用失败 (尝试 {attempt + 1}/{self.retry_times}): {e}")
|
||||
if attempt < self.retry_times - 1:
|
||||
time.sleep(2 ** attempt) # 指数退避
|
||||
except Exception as e:
|
||||
print(f"分析过程出错: {e}")
|
||||
break
|
||||
|
||||
return None, "分析失败"
|
||||
|
||||
def _parse_response(self, response: str) -> Tuple[Optional[int], Optional[str]]:
|
||||
"""解析LLM返回的结果"""
|
||||
try:
|
||||
# 尝试直接解析JSON
|
||||
result = json.loads(response)
|
||||
score = result.get('score', 50)
|
||||
label = result.get('label', '无法判断')
|
||||
|
||||
# 验证分数范围
|
||||
score = max(0, min(100, int(score)))
|
||||
|
||||
return score, label
|
||||
|
||||
except json.JSONDecodeError:
|
||||
# 尝试从文本中提取
|
||||
pass
|
||||
|
||||
# 尝试从文本中提取数字
|
||||
numbers = re.findall(r'\b(\d{1,3})\b', response)
|
||||
if numbers:
|
||||
score = int(numbers[0])
|
||||
score = max(0, min(100, score))
|
||||
|
||||
# 提取标签
|
||||
label_match = re.search(r'["']([^"']+)["']', response)
|
||||
if label_match:
|
||||
label = label_match.group(1)
|
||||
else:
|
||||
label = response.split('\n')[0][:20] if response else '无法判断'
|
||||
|
||||
return score, label
|
||||
|
||||
return None, "解析失败"
|
||||
|
||||
def analyze_batch(self, comments: list, delay: float = 1.0) -> list:
|
||||
"""
|
||||
批量分析评论
|
||||
delay: 每次调用之间的延迟(秒)
|
||||
"""
|
||||
results = []
|
||||
|
||||
for i, comment in enumerate(comments):
|
||||
print(f"分析评论 {i + 1}/{len(comments)}...")
|
||||
score, label = self.analyze(comment)
|
||||
results.append({
|
||||
'content': comment,
|
||||
'score': score,
|
||||
'label': label
|
||||
})
|
||||
|
||||
if delay > 0 and i < len(comments) - 1:
|
||||
time.sleep(delay)
|
||||
|
||||
return results
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""检查是否已配置"""
|
||||
return bool(self.client and self.api_key)
|
||||
Reference in New Issue
Block a user