""" 大模型分析模块 - 调用LLM API分析评论情感 """ import json import time import re from typing import Dict, Optional, Tuple from openai import OpenAI, OpenAIError class LLMAnalyzer: """大模型情感分析器""" SYSTEM_PROMPT = """你是一个专业的情感分析助手。你的任务是分析股吧/论坛评论的情感倾向,判断投资者对该股票的态度。 评分规则: - 0-30: 极度悲观/看空(利空、暴跌、绝望等情绪) - 31-50: 偏悲观/中性(担忧、谨慎、观望等情绪) - 51-70: 偏乐观/中性(看好、希望、期待等情绪) - 71-100: 极度乐观/看涨(利好、暴涨、兴奋等情绪) 请直接输出一个JSON格式的结果,包含两个字段: - score: 0-100的整数评分 - label: 简短的态度描述(如"强烈看跌"、"谨慎观望"、"温和看涨"、"强烈看涨"等) 注意: 1. 只返回JSON,不要有其他文字 2. 如果无法判断,返回50和"无法判断" 3. 分析要客观,不要被表面文字迷惑 """ def __init__(self, config: Dict): self.config = config self.base_url = config.get('base_url', 'https://api.openai.com/v1') self.api_key = config.get('api_key', '') self.model = config.get('model', 'gpt-3.5-turbo') self.timeout = config.get('timeout', 30) self.retry_times = config.get('retry_times', 3) self.client = None if self.api_key: self._init_client() def _init_client(self): """初始化OpenAI客户端""" try: self.client = OpenAI( api_key=self.api_key, base_url=self.base_url, timeout=self.timeout ) except Exception as e: print(f"初始化LLM客户端失败: {e}") def update_config(self, config: Dict): """更新配置""" self.config.update(config) self.base_url = config.get('base_url', self.base_url) self.api_key = config.get('api_key', self.api_key) self.model = config.get('model', self.model) self.timeout = config.get('timeout', self.timeout) self.retry_times = config.get('retry_times', self.retry_times) if self.api_key: self._init_client() def analyze(self, comment: str) -> Tuple[Optional[int], Optional[str]]: """ 分析单条评论 返回 (score, label) """ if not self.client: return None, "LLM未配置" if not comment or not comment.strip(): return None, "评论为空" for attempt in range(self.retry_times): try: response = self.client.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": self.SYSTEM_PROMPT}, {"role": "user", "content": f"请分析以下评论的情感倾向:\n\n{comment}"} ], temperature=0.3, max_tokens=200 ) result_text = response.choices[0].message.content.strip() score, label = self._parse_response(result_text) if score is not None: return score, label except OpenAIError as e: print(f"API调用失败 (尝试 {attempt + 1}/{self.retry_times}): {e}") if attempt < self.retry_times - 1: time.sleep(2 ** attempt) # 指数退避 except Exception as e: print(f"分析过程出错: {e}") break return None, "分析失败" def _parse_response(self, response: str) -> Tuple[Optional[int], Optional[str]]: """解析LLM返回的结果""" try: # 尝试直接解析JSON result = json.loads(response) score = result.get('score', 50) label = result.get('label', '无法判断') # 验证分数范围 score = max(0, min(100, int(score))) return score, label except json.JSONDecodeError: # 尝试从文本中提取 pass # 尝试从文本中提取数字 numbers = re.findall(r'\b(\d{1,3})\b', response) if numbers: score = int(numbers[0]) score = max(0, min(100, score)) # 提取标签 label_match = re.search(r'["']([^"']+)["']', response) if label_match: label = label_match.group(1) else: label = response.split('\n')[0][:20] if response else '无法判断' return score, label return None, "解析失败" def analyze_batch(self, comments: list, delay: float = 1.0) -> list: """ 批量分析评论 delay: 每次调用之间的延迟(秒) """ results = [] for i, comment in enumerate(comments): print(f"分析评论 {i + 1}/{len(comments)}...") score, label = self.analyze(comment) results.append({ 'content': comment, 'score': score, 'label': label }) if delay > 0 and i < len(comments) - 1: time.sleep(delay) return results def is_configured(self) -> bool: """检查是否已配置""" return bool(self.client and self.api_key)