""" 数据库模块 - SQLite存储评论和分析结果 """ import sqlite3 import hashlib import json from datetime import datetime from typing import List, Dict, Optional, Tuple from pathlib import Path class DatabaseManager: """数据库管理器""" def __init__(self, db_path: str = "guba.db"): self.db_path = Path(db_path) self._init_db() def _init_db(self): """初始化数据库表""" conn = self._get_connection() cursor = conn.cursor() # 评论表 cursor.execute(''' CREATE TABLE IF NOT EXISTS comments ( id INTEGER PRIMARY KEY AUTOINCREMENT, content TEXT NOT NULL, content_hash TEXT UNIQUE NOT NULL, url TEXT, created_at TEXT, fetched_at TEXT DEFAULT CURRENT_TIMESTAMP, analyzed INTEGER DEFAULT 0, sentiment_score REAL, analyzed_at TEXT ) ''') # 分析历史表 cursor.execute(''' CREATE TABLE IF NOT EXISTS analysis_history ( id INTEGER PRIMARY KEY AUTOINCREMENT, comment_id INTEGER, sentiment_score REAL NOT NULL, analysis_text TEXT, created_at TEXT DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (comment_id) REFERENCES comments(id) ) ''') # 配置表 cursor.execute(''' CREATE TABLE IF NOT EXISTS config ( key TEXT PRIMARY KEY, value TEXT, updated_at TEXT DEFAULT CURRENT_TIMESTAMP ) ''') conn.commit() conn.close() def _get_connection(self) -> sqlite3.Connection: """获取数据库连接""" return sqlite3.connect(str(self.db_path)) @staticmethod def hash_content(content: str) -> str: """计算内容哈希值用于去重""" return hashlib.md5(content.encode('utf-8')).hexdigest() def is_comment_exists(self, content_hash: str) -> bool: """检查评论是否已存在""" conn = self._get_connection() cursor = conn.cursor() cursor.execute('SELECT 1 FROM comments WHERE content_hash = ?', (content_hash,)) exists = cursor.fetchone() is not None conn.close() return exists def add_comment(self, content: str, url: str = None) -> Optional[int]: """添加评论,返回评论ID""" content_hash = self.hash_content(content) if self.is_comment_exists(content_hash): return None # 已存在 conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' INSERT INTO comments (content, content_hash, url, created_at) VALUES (?, ?, ?, ?) ''', (content, content_hash, url, datetime.now().isoformat())) comment_id = cursor.lastrowid conn.commit() conn.close() return comment_id def add_comments_batch(self, comments: List[Dict]) -> List[int]: """批量添加评论,返回新添加的ID列表""" new_ids = [] conn = self._get_connection() cursor = conn.cursor() for comment in comments: content = comment.get('content', '') url = comment.get('url') content_hash = self.hash_content(content) if self.is_comment_exists(content_hash): continue cursor.execute(''' INSERT INTO comments (content, content_hash, url, created_at) VALUES (?, ?, ?, ?) ''', (content, content_hash, url, datetime.now().isoformat())) new_ids.append(cursor.lastrowid) conn.commit() conn.close() return new_ids def get_unanalyzed_comments(self, limit: int = 50) -> List[Dict]: """获取未分析的评论""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' SELECT id, content, url FROM comments WHERE analyzed = 0 ORDER BY fetched_at ASC LIMIT ? ''', (limit,)) rows = cursor.fetchall() conn.close() return [{'id': row[0], 'content': row[1], 'url': row[2]} for row in rows] def mark_analyzed(self, comment_id: int, sentiment_score: float, analysis_text: str): """标记评论已分析""" conn = self._get_connection() cursor = conn.cursor() # 更新评论状态 cursor.execute(''' UPDATE comments SET analyzed = 1, sentiment_score = ?, analyzed_at = ? WHERE id = ? ''', (sentiment_score, datetime.now().isoformat(), comment_id)) # 添加分析历史 cursor.execute(''' INSERT INTO analysis_history (comment_id, sentiment_score, analysis_text) VALUES (?, ?, ?) ''', (comment_id, sentiment_score, analysis_text)) conn.commit() conn.close() def get_latest_sentiment_score(self) -> Optional[float]: """获取最新的情感分数""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' SELECT sentiment_score FROM comments WHERE analyzed = 1 AND sentiment_score IS NOT NULL ORDER BY analyzed_at DESC LIMIT 1 ''') row = cursor.fetchone() conn.close() return row[0] if row else None def get_all_scores(self) -> List[float]: """获取所有已分析的分数""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' SELECT sentiment_score FROM comments WHERE analyzed = 1 AND sentiment_score IS NOT NULL ORDER BY analyzed_at DESC ''') rows = cursor.fetchall() conn.close() return [row[0] for row in rows if row[0] is not None] def get_comment_count(self) -> int: """获取评论总数""" conn = self._get_connection() cursor = conn.cursor() cursor.execute('SELECT COUNT(*) FROM comments') count = cursor.fetchone()[0] conn.close() return count def get_analyzed_count(self) -> int: """获取已分析评论数""" conn = self._get_connection() cursor = conn.cursor() cursor.execute('SELECT COUNT(*) FROM comments WHERE analyzed = 1') count = cursor.fetchone()[0] conn.close() return count def get_recent_comments(self, limit: int = 10) -> List[Dict]: """获取最近的评论""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' SELECT id, content, sentiment_score, analyzed_at FROM comments ORDER BY fetched_at DESC LIMIT ? ''', (limit,)) rows = cursor.fetchall() conn.close() return [ {'id': row[0], 'content': row[1][:50] + '...' if len(row[1]) > 50 else row[1], 'score': row[2], 'analyzed_at': row[3]} for row in rows ]