llm_analyzer.py

"""
大模型分析模块 - 调用LLM API分析评论情感
"""
import json
import time
import re
from typing import Dict, Optional, Tuple
from openai import OpenAI, OpenAIError


class LLMAnalyzer:
    """大模型情感分析器"""

    SYSTEM_PROMPT = """你是一个专业的情感分析助手。你的任务是分析股吧/论坛评论的情感倾向，判断投资者对该股票的态度。

评分规则：
- 0-30: 极度悲观/看空（利空、暴跌、绝望等情绪）
- 31-50: 偏悲观/中性（担忧、谨慎、观望等情绪）
- 51-70: 偏乐观/中性（看好、希望、期待等情绪）
- 71-100: 极度乐观/看涨（利好、暴涨、兴奋等情绪）

请直接输出一个JSON格式的结果，包含两个字段：
- score: 0-100的整数评分
- label: 简短的态度描述（如"强烈看跌"、"谨慎观望"、"温和看涨"、"强烈看涨"等）

注意：
1. 只返回JSON，不要有其他文字
2. 如果无法判断，返回50和"无法判断"
3. 分析要客观，不要被表面文字迷惑
"""

    def __init__(self, config: Dict):
        self.config = config
        self.base_url = config.get('base_url', 'https://api.openai.com/v1')
        self.api_key = config.get('api_key', '')
        self.model = config.get('model', 'gpt-3.5-turbo')
        self.timeout = config.get('timeout', 30)
        self.retry_times = config.get('retry_times', 3)

        self.client = None
        if self.api_key:
            self._init_client()

    def _init_client(self):
        """初始化OpenAI客户端"""
        try:
            self.client = OpenAI(
                api_key=self.api_key,
                base_url=self.base_url,
                timeout=self.timeout
            )
        except Exception as e:
            print(f"初始化LLM客户端失败: {e}")

    def update_config(self, config: Dict):
        """更新配置"""
        self.config.update(config)
        self.base_url = config.get('base_url', self.base_url)
        self.api_key = config.get('api_key', self.api_key)
        self.model = config.get('model', self.model)
        self.timeout = config.get('timeout', self.timeout)
        self.retry_times = config.get('retry_times', self.retry_times)

        if self.api_key:
            self._init_client()

    def analyze(self, comment: str) -> Tuple[Optional[int], Optional[str]]:
        """
        分析单条评论
        返回 (score, label)
        """
        if not self.client:
            return None, "LLM未配置"

        if not comment or not comment.strip():
            return None, "评论为空"

        for attempt in range(self.retry_times):
            try:
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": self.SYSTEM_PROMPT},
                        {"role": "user", "content": f"请分析以下评论的情感倾向：\n\n{comment}"}
                    ],
                    temperature=0.3,
                    max_tokens=200
                )

                result_text = response.choices[0].message.content.strip()
                score, label = self._parse_response(result_text)

                if score is not None:
                    return score, label

            except OpenAIError as e:
                print(f"API调用失败 (尝试 {attempt + 1}/{self.retry_times}): {e}")
                if attempt < self.retry_times - 1:
                    time.sleep(2 ** attempt)  # 指数退避
            except Exception as e:
                print(f"分析过程出错: {e}")
                break

        return None, "分析失败"

    def _parse_response(self, response: str) -> Tuple[Optional[int], Optional[str]]:
        """解析LLM返回的结果"""
        try:
            # 尝试直接解析JSON
            result = json.loads(response)
            score = result.get('score', 50)
            label = result.get('label', '无法判断')

            # 验证分数范围
            score = max(0, min(100, int(score)))

            return score, label

        except json.JSONDecodeError:
            # 尝试从文本中提取
            pass

        # 尝试从文本中提取数字
        numbers = re.findall(r'\b(\d{1,3})\b', response)
        if numbers:
            score = int(numbers[0])
            score = max(0, min(100, score))

            # 提取标签
            label_match = re.search(r'["']([^"']+)["']', response)
            if label_match:
                label = label_match.group(1)
            else:
                label = response.split('\n')[0][:20] if response else '无法判断'

            return score, label

        return None, "解析失败"

    def analyze_batch(self, comments: list, delay: float = 1.0) -> list:
        """
        批量分析评论
        delay: 每次调用之间的延迟（秒）
        """
        results = []

        for i, comment in enumerate(comments):
            print(f"分析评论 {i + 1}/{len(comments)}...")
            score, label = self.analyze(comment)
            results.append({
                'content': comment,
                'score': score,
                'label': label
            })

            if delay > 0 and i < len(comments) - 1:
                time.sleep(delay)

        return results

    def is_configured(self) -> bool:
        """检查是否已配置"""
        return bool(self.client and self.api_key)