diff --git a/backend/app/services/llm/enrichment.py b/backend/app/services/llm/enrichment.py index d27d7e1..00192ac 100644 --- a/backend/app/services/llm/enrichment.py +++ b/backend/app/services/llm/enrichment.py @@ -77,11 +77,49 @@ def _safe_format(template: str, vars_: Mapping[str, Any]) -> str: 用途:数据库里用户已存的 prompt 模板可能是旧版的(只支持部分占位符), 新代码传了更多变量也不应崩。 + + 防御: + - 模板里出现的非占位符 `{` / `}`(比如示例 JSON `{"k": "v"}`)会被先 escape 成 `{{` / `}}`, + 避免 str.format 误解析为占位符/格式说明符而抛 ValueError。 + - 用户显式写的 `{{` / `}}`(标准 str.format 转义语法)会被原样保留,不被重复 escape。 """ + import re + + placeholder_re = re.compile(r"\{([A-Za-z_][A-Za-z0-9_.\[\]]*)\}") + sentinels: list[str] = [] + sentinel_map: dict[str, str] = {} + user_escape: list[str] = [] + + def _stash(m: re.Match) -> str: + name = m.group(1) + s = f"\x00PH{len(sentinels)}\x00" + sentinels.append(name) + sentinel_map[s] = name + return s + + def _stash_brace(s: str) -> str: + sentinel = f"\x00UE{len(user_escape)}\x00" + user_escape.append(s) + return sentinel + + # 1) 先 stash 用户显式 {{ / }} + staged = template.replace("{{", _stash_brace("{{")).replace("}}", _stash_brace("}}")) + # 2) stash 合法占位符 + staged = placeholder_re.sub(_stash, staged) + # 3) escape 剩下的单个 { / }(示例 JSON 等字面量) + escaped = staged.replace("{", "{{").replace("}", "}}") + # 4) 还原占位符 + final = escaped + for s, name in sentinel_map.items(): + final = final.replace(s, "{" + name + "}") + # 5) 还原用户显式 {{ / }} + for i, raw in enumerate(user_escape): + final = final.replace(f"\x00UE{i}\x00", raw) + try: - return template.format_map(_SafeDict(vars_)) - except (KeyError, IndexError) as e: - # 极端情况(比如 {} 这种非法占位符)兜底 + return final.format_map(_SafeDict(vars_)) + except (KeyError, IndexError, ValueError) as e: + # 极端情况兜底:按原文返回 logger.warning("_safe_format 解析失败,按原文返回: %s", e) return template