feat(translator): 添加transformers作为备选模型加载方式

支持使用transformers库作为llama-cpp-python的备选方案加载模型
新增模型加载失败时的自动回退机制
更新requirements.txt添加transformers和torch依赖
This commit is contained in:
2026-01-16 11:08:34 +08:00
parent 1e7755f8e7
commit 2659fdd6ac
3 changed files with 107 additions and 33 deletions

7
models/Modelfile Normal file
View File

@@ -0,0 +1,7 @@
FROM ./HY-MT1.5-1.8B_bf16_Q4_K_M.gguf
PARAMETER temperature 0.7
PARAMETER top_p 0.95
PARAMETER num_ctx 2048
SYSTEM 你是一个专业的翻译助手,根据以下要求将中文翻译成英文。

View File

@@ -3,4 +3,6 @@ llama-cpp-python
python-docx
loguru
psutil
GPUtil
GPUtil
transformers
torch

View File

@@ -6,10 +6,18 @@ try:
from llama_cpp import Llama
llama_cpp_available = True
except ImportError:
logger.warning("llama-cpp-python库未找到禁用翻译功能")
logger.warning("llama-cpp-python库未找到尝试使用transformers库")
Llama = None
llama_cpp_available = False
# 尝试导入transformers库
try:
from transformers import AutoModelForCausalLM, AutoTokenizer
transformers_available = True
except ImportError:
logger.warning("transformers库未找到")
transformers_available = False
class Translator:
def __init__(self, model_path=None):
self.model = None
@@ -17,13 +25,11 @@ class Translator:
self.is_ready = False
self.model_name = ""
self.llama_cpp_available = llama_cpp_available
self.transformers_available = transformers_available
self.use_transformers = False
def load_model(self, model_path=None):
"""加载模型"""
if not self.llama_cpp_available:
logger.error("llama-cpp-python库未找到无法加载模型")
return False
if model_path:
self.model_path = model_path
@@ -31,22 +37,73 @@ class Translator:
logger.error("未提供模型路径")
return False
if not os.path.exists(self.model_path):
logger.error(f"模型文件不存在: {self.model_path}")
return False
try:
logger.info(f"开始加载模型: {self.model_path}")
self.model = Llama(
model_path=self.model_path,
n_ctx=2048,
n_threads=4,
n_gpu_layers=100 # 尽可能使用GPU加速
)
self.is_ready = True
self.model_name = os.path.basename(self.model_path)
logger.info(f"模型加载成功: {self.model_name}")
return True
if self.llama_cpp_available:
if os.path.exists(self.model_path):
try:
self.model = Llama(
model_path=self.model_path,
n_ctx=2048,
n_threads=4,
n_gpu_layers=100
)
self.use_transformers = False
self.is_ready = True
self.model_name = os.path.basename(self.model_path)
logger.info(f"模型加载成功: {self.model_name}")
return True
except Exception as e:
logger.warning(f"使用llama-cpp-python加载模型失败: {e}")
else:
logger.warning(f"模型文件不存在: {self.model_path}")
if self.transformers_available:
try:
from transformers import AutoConfig
if os.path.exists(self.model_path):
config = AutoConfig.from_pretrained(
self.model_path,
local_files_only=True,
trust_remote_code=True
)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_path,
local_files_only=True,
trust_remote_code=True,
torch_dtype="auto"
)
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_path,
local_files_only=True,
trust_remote_code=True
)
else:
config = AutoConfig.from_pretrained(
self.model_path,
trust_remote_code=True
)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_path,
trust_remote_code=True,
torch_dtype="auto"
)
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_path,
trust_remote_code=True
)
self.use_transformers = True
self.is_ready = True
self.model_name = self.model_path
logger.info(f"使用transformers加载模型成功: {self.model_name}")
return True
except Exception as e:
logger.error(f"使用transformers加载模型失败: {e}")
return False
logger.error("没有可用的模型加载方式")
return False
except Exception as e:
logger.error(f"模型加载失败: {e}")
self.is_ready = False
@@ -54,10 +111,6 @@ class Translator:
def translate(self, text, context="", terms=None):
"""执行翻译"""
if not self.llama_cpp_available:
logger.error("llama-cpp-python库未找到无法执行翻译")
return ""
if not self.is_ready or not self.model:
logger.error("模型未就绪,无法执行翻译")
return ""
@@ -68,16 +121,28 @@ class Translator:
logger.info(f"开始翻译,输入长度: {len(text)} 字符")
# 调用模型进行翻译
output = self.model(
prompt,
max_tokens=2048,
temperature=0.7,
top_p=0.95,
stop=["\n原文:", "\n译文:", "\n###"]
)
if self.use_transformers:
import torch
inputs = self.tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=2048,
temperature=0.7,
top_p=0.95,
do_sample=True
)
translated_text = self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
else:
output = self.model(
prompt,
max_tokens=2048,
temperature=0.7,
top_p=0.95,
stop=["\n原文:", "\n译文:", "\n###"]
)
translated_text = output["choices"][0]["text"].strip()
translated_text = output["choices"][0]["text"].strip()
logger.info(f"翻译完成,输出长度: {len(translated_text)} 字符")
return translated_text