🎨 增加并发访问
This commit is contained in:
691
tools/analyze_compression.py
Normal file
691
tools/analyze_compression.py
Normal file
@@ -0,0 +1,691 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
离线分析上下文压缩管道表现。
|
||||
|
||||
从 Docker 日志中提取压缩相关的 tracing 结构化数据,
|
||||
生成汇总报告,帮助评估五层压缩管道的实际效果。
|
||||
|
||||
使用方法:
|
||||
python3 tools/analyze_compression.py logs/docker.log
|
||||
python3 tools/analyze_compression.py --top 10 logs/docker.log
|
||||
python3 tools/analyze_compression.py --csv output.csv logs/docker.log
|
||||
python3 tools/analyze_compression.py --json logs/docker.log
|
||||
cat logs/docker.log | python3 tools/analyze_compression.py -
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Sequence
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ANSI 清理(复用 diagnose_improper_request.py 的模式)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# 覆盖常见 CSI 序列(含少见的 ':' 参数分隔符),避免污染 URL/字段解析。
|
||||
ANSI_RE = re.compile(r"\x1b\[[0-9;:?]*[A-Za-z]")
|
||||
|
||||
|
||||
def strip_ansi(s: str) -> str:
|
||||
return ANSI_RE.sub("", s)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 时间戳提取
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# ISO 8601 时间戳(行首),兼容带/不带时区
|
||||
TIMESTAMP_RE = re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})")
|
||||
|
||||
|
||||
def extract_timestamp(line: str) -> Optional[str]:
|
||||
"""提取行首 ISO 时间戳,返回秒级精度字符串或 None。"""
|
||||
m = TIMESTAMP_RE.search(line[:40])
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def hour_bucket(ts: str) -> str:
|
||||
"""截取到小时:2025-01-15T10:23:45 -> 2025-01-15T10"""
|
||||
return ts[:13]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# tracing key=value 解析
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
KV_RE = re.compile(r"(\w+)=(\d+(?:\.\d+)?|\"[^\"]*\"|[^\s,]+)")
|
||||
|
||||
|
||||
def parse_kv(line: str) -> Dict[str, str]:
|
||||
"""从 tracing 结构化行中提取所有 key=value 对。"""
|
||||
return {m.group(1): m.group(2).strip('"') for m in KV_RE.finditer(line)}
|
||||
|
||||
|
||||
def kv_int(kv: Dict[str, str], key: str, default: int = 0) -> int:
|
||||
v = kv.get(key)
|
||||
if v is None:
|
||||
return default
|
||||
try:
|
||||
return int(v)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def kv_float(kv: Dict[str, str], key: str, default: float = 0.0) -> float:
|
||||
v = kv.get(key)
|
||||
if v is None:
|
||||
return default
|
||||
try:
|
||||
return float(v)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 数据模型
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequestRecord:
|
||||
"""一次请求行的数据。"""
|
||||
line_no: int
|
||||
timestamp: Optional[str] = None
|
||||
model: str = ""
|
||||
max_tokens: int = 0
|
||||
stream: bool = True
|
||||
message_count: int = 0
|
||||
estimated_input_tokens: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompressionRecord:
|
||||
"""一次压缩统计行的数据。"""
|
||||
line_no: int
|
||||
timestamp: Optional[str] = None
|
||||
estimated_input_tokens: int = 0
|
||||
bytes_saved_total: int = 0
|
||||
whitespace_bytes_saved: int = 0
|
||||
thinking_bytes_saved: int = 0
|
||||
tool_result_bytes_saved: int = 0
|
||||
tool_use_input_bytes_saved: int = 0
|
||||
history_turns_removed: int = 0
|
||||
history_bytes_saved: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContextUsageRecord:
|
||||
"""contextUsageEvent 行的数据。"""
|
||||
line_no: int
|
||||
context_usage_percentage: float = 0.0
|
||||
actual_input_tokens: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class RejectionRecord:
|
||||
"""上游拒绝行的数据。"""
|
||||
line_no: int
|
||||
kiro_request_body_bytes: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdaptiveShrinkRecord:
|
||||
"""自适应二次压缩触发行的数据。"""
|
||||
line_no: int
|
||||
timestamp: Optional[str] = None
|
||||
conversation_id: Optional[str] = None
|
||||
initial_bytes: int = 0
|
||||
final_bytes: int = 0
|
||||
threshold: int = 0
|
||||
iters: int = 0
|
||||
additional_history_turns_removed: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class LocalRejectRecord:
|
||||
"""本地超限拒绝行的数据。"""
|
||||
line_no: int
|
||||
timestamp: Optional[str] = None
|
||||
conversation_id: Optional[str] = None
|
||||
request_body_bytes: int = 0
|
||||
image_bytes: int = 0
|
||||
effective_bytes: int = 0
|
||||
threshold: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class MergedRequest:
|
||||
"""关联后的完整请求记录。"""
|
||||
line_no: int = 0
|
||||
timestamp: Optional[str] = None
|
||||
model: str = ""
|
||||
max_tokens: int = 0
|
||||
stream: bool = True
|
||||
message_count: int = 0
|
||||
estimated_input_tokens: int = 0
|
||||
# 压缩统计
|
||||
bytes_saved_total: int = 0
|
||||
whitespace_bytes_saved: int = 0
|
||||
thinking_bytes_saved: int = 0
|
||||
tool_result_bytes_saved: int = 0
|
||||
tool_use_input_bytes_saved: int = 0
|
||||
history_turns_removed: int = 0
|
||||
history_bytes_saved: int = 0
|
||||
has_compression: bool = False
|
||||
# 上下文使用
|
||||
context_usage_percentage: Optional[float] = None
|
||||
actual_input_tokens: Optional[int] = None
|
||||
# 压缩率
|
||||
compression_rate: float = 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 日志解析
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# 匹配标记
|
||||
MARKER_REQUEST = "Received POST /v1/messages request"
|
||||
MARKER_COMPRESSION = "输入压缩完成"
|
||||
MARKER_CONTEXT_USAGE = "收到 contextUsageEvent"
|
||||
MARKER_REJECTION = "上游拒绝请求:输入上下文过长"
|
||||
MARKER_ADAPTIVE_SHRINK = "请求体超过阈值,已执行自适应二次压缩"
|
||||
MARKER_LOCAL_REJECT = "请求体超过安全阈值,拒绝发送"
|
||||
|
||||
# contextUsageEvent 格式:收到 contextUsageEvent: 67.2%, 计算 input_tokens: 12345
|
||||
CONTEXT_USAGE_RE = re.compile(
|
||||
r"收到 contextUsageEvent:\s*([\d.]+)%.*?input_tokens:\s*(\d+)"
|
||||
)
|
||||
|
||||
|
||||
def parse_log(
|
||||
lines: Sequence[str],
|
||||
*,
|
||||
min_tokens: int = 0,
|
||||
model_pattern: Optional[str] = None,
|
||||
) -> tuple[
|
||||
list[MergedRequest],
|
||||
list[RejectionRecord],
|
||||
list[AdaptiveShrinkRecord],
|
||||
list[LocalRejectRecord],
|
||||
int,
|
||||
]:
|
||||
"""
|
||||
解析日志行,返回 (merged_requests, rejections, total_lines)。
|
||||
|
||||
关联策略:连续出现的请求行和压缩统计行,
|
||||
基于 estimated_input_tokens 匹配 + 行号邻近(间距 ≤ 50 行)。
|
||||
"""
|
||||
requests: list[RequestRecord] = []
|
||||
compressions: list[CompressionRecord] = []
|
||||
context_usages: list[ContextUsageRecord] = []
|
||||
rejections: list[RejectionRecord] = []
|
||||
adaptive_shrinks: list[AdaptiveShrinkRecord] = []
|
||||
local_rejects: list[LocalRejectRecord] = []
|
||||
|
||||
model_re = re.compile(model_pattern, re.IGNORECASE) if model_pattern else None
|
||||
|
||||
for idx, raw_line in enumerate(lines):
|
||||
line_no = idx + 1
|
||||
line = strip_ansi(raw_line)
|
||||
|
||||
if MARKER_REQUEST in line:
|
||||
kv = parse_kv(line)
|
||||
model = kv.get("model", "")
|
||||
if model_re and not model_re.search(model):
|
||||
continue
|
||||
est = kv_int(kv, "estimated_input_tokens")
|
||||
if est < min_tokens:
|
||||
continue
|
||||
requests.append(RequestRecord(
|
||||
line_no=line_no,
|
||||
timestamp=extract_timestamp(line),
|
||||
model=model,
|
||||
max_tokens=kv_int(kv, "max_tokens"),
|
||||
stream=kv.get("stream", "true") == "true",
|
||||
message_count=kv_int(kv, "message_count"),
|
||||
estimated_input_tokens=est,
|
||||
))
|
||||
|
||||
elif MARKER_COMPRESSION in line:
|
||||
kv = parse_kv(line)
|
||||
est = kv_int(kv, "estimated_input_tokens")
|
||||
if est < min_tokens:
|
||||
continue
|
||||
compressions.append(CompressionRecord(
|
||||
line_no=line_no,
|
||||
timestamp=extract_timestamp(line),
|
||||
estimated_input_tokens=est,
|
||||
bytes_saved_total=kv_int(kv, "bytes_saved_total"),
|
||||
whitespace_bytes_saved=kv_int(kv, "whitespace_bytes_saved"),
|
||||
thinking_bytes_saved=kv_int(kv, "thinking_bytes_saved"),
|
||||
tool_result_bytes_saved=kv_int(kv, "tool_result_bytes_saved"),
|
||||
tool_use_input_bytes_saved=kv_int(kv, "tool_use_input_bytes_saved"),
|
||||
history_turns_removed=kv_int(kv, "history_turns_removed"),
|
||||
history_bytes_saved=kv_int(kv, "history_bytes_saved"),
|
||||
))
|
||||
|
||||
elif MARKER_CONTEXT_USAGE in line:
|
||||
m = CONTEXT_USAGE_RE.search(line)
|
||||
if m:
|
||||
context_usages.append(ContextUsageRecord(
|
||||
line_no=line_no,
|
||||
context_usage_percentage=float(m.group(1)),
|
||||
actual_input_tokens=int(m.group(2)),
|
||||
))
|
||||
|
||||
elif MARKER_REJECTION in line:
|
||||
kv = parse_kv(line)
|
||||
rejections.append(RejectionRecord(
|
||||
line_no=line_no,
|
||||
kiro_request_body_bytes=kv_int(kv, "kiro_request_body_bytes"),
|
||||
))
|
||||
|
||||
elif MARKER_ADAPTIVE_SHRINK in line:
|
||||
kv = parse_kv(line)
|
||||
adaptive_shrinks.append(AdaptiveShrinkRecord(
|
||||
line_no=line_no,
|
||||
timestamp=extract_timestamp(line),
|
||||
conversation_id=kv.get("conversation_id"),
|
||||
initial_bytes=kv_int(kv, "initial_bytes"),
|
||||
final_bytes=kv_int(kv, "final_bytes"),
|
||||
threshold=kv_int(kv, "threshold"),
|
||||
iters=kv_int(kv, "iters"),
|
||||
additional_history_turns_removed=kv_int(kv, "additional_history_turns_removed"),
|
||||
))
|
||||
|
||||
elif MARKER_LOCAL_REJECT in line:
|
||||
kv = parse_kv(line)
|
||||
local_rejects.append(LocalRejectRecord(
|
||||
line_no=line_no,
|
||||
timestamp=extract_timestamp(line),
|
||||
conversation_id=kv.get("conversation_id"),
|
||||
request_body_bytes=kv_int(kv, "request_body_bytes"),
|
||||
image_bytes=kv_int(kv, "image_bytes"),
|
||||
effective_bytes=kv_int(kv, "effective_bytes"),
|
||||
threshold=kv_int(kv, "threshold"),
|
||||
))
|
||||
|
||||
# --- 关联请求行与压缩统计行 ---
|
||||
merged = _merge_records(requests, compressions, context_usages)
|
||||
|
||||
return merged, rejections, adaptive_shrinks, local_rejects, len(lines)
|
||||
|
||||
|
||||
def _merge_records(
|
||||
requests: list[RequestRecord],
|
||||
compressions: list[CompressionRecord],
|
||||
context_usages: list[ContextUsageRecord],
|
||||
) -> list[MergedRequest]:
|
||||
"""
|
||||
关联请求行与压缩统计行。
|
||||
|
||||
策略:对每个请求行,在其后 50 行内查找 estimated_input_tokens 相同的压缩统计行。
|
||||
"""
|
||||
merged: list[MergedRequest] = []
|
||||
used_comp_indices: set[int] = set()
|
||||
used_ctx_indices: set[int] = set()
|
||||
|
||||
for req in requests:
|
||||
mr = MergedRequest(
|
||||
line_no=req.line_no,
|
||||
timestamp=req.timestamp,
|
||||
model=req.model,
|
||||
max_tokens=req.max_tokens,
|
||||
stream=req.stream,
|
||||
message_count=req.message_count,
|
||||
estimated_input_tokens=req.estimated_input_tokens,
|
||||
)
|
||||
|
||||
# 查找匹配的压缩统计行
|
||||
for ci, comp in enumerate(compressions):
|
||||
if ci in used_comp_indices:
|
||||
continue
|
||||
# 行号邻近(压缩行在请求行之后 50 行内)
|
||||
if not (0 < comp.line_no - req.line_no <= 50):
|
||||
continue
|
||||
# estimated_input_tokens 匹配
|
||||
if comp.estimated_input_tokens != req.estimated_input_tokens:
|
||||
continue
|
||||
# 匹配成功
|
||||
mr.bytes_saved_total = comp.bytes_saved_total
|
||||
mr.whitespace_bytes_saved = comp.whitespace_bytes_saved
|
||||
mr.thinking_bytes_saved = comp.thinking_bytes_saved
|
||||
mr.tool_result_bytes_saved = comp.tool_result_bytes_saved
|
||||
mr.tool_use_input_bytes_saved = comp.tool_use_input_bytes_saved
|
||||
mr.history_turns_removed = comp.history_turns_removed
|
||||
mr.history_bytes_saved = comp.history_bytes_saved
|
||||
mr.has_compression = True
|
||||
used_comp_indices.add(ci)
|
||||
break
|
||||
|
||||
# 查找匹配的 contextUsageEvent(在请求行之后 500 行内)
|
||||
for ui, ctx in enumerate(context_usages):
|
||||
if ui in used_ctx_indices:
|
||||
continue
|
||||
if not (0 < ctx.line_no - req.line_no <= 500):
|
||||
continue
|
||||
mr.context_usage_percentage = ctx.context_usage_percentage
|
||||
mr.actual_input_tokens = ctx.actual_input_tokens
|
||||
used_ctx_indices.add(ui)
|
||||
break
|
||||
|
||||
# 计算压缩率(基于估算 token 数,假设 1 token ≈ 4 bytes)
|
||||
if mr.estimated_input_tokens > 0 and mr.bytes_saved_total > 0:
|
||||
estimated_bytes = mr.estimated_input_tokens * 4
|
||||
mr.compression_rate = mr.bytes_saved_total / estimated_bytes * 100
|
||||
|
||||
merged.append(mr)
|
||||
|
||||
return merged
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 统计计算
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def median(values: list[float]) -> float:
|
||||
if not values:
|
||||
return 0.0
|
||||
s = sorted(values)
|
||||
n = len(s)
|
||||
if n % 2 == 1:
|
||||
return s[n // 2]
|
||||
return (s[n // 2 - 1] + s[n // 2]) / 2
|
||||
|
||||
|
||||
def percentile(values: list[float], p: float) -> float:
|
||||
if not values:
|
||||
return 0.0
|
||||
s = sorted(values)
|
||||
k = (len(s) - 1) * p / 100
|
||||
f = int(k)
|
||||
c = f + 1 if f + 1 < len(s) else f
|
||||
return s[f] + (s[c] - s[f]) * (k - f)
|
||||
|
||||
|
||||
def fmt_bytes(n: int) -> str:
|
||||
"""格式化字节数为人类可读形式。"""
|
||||
if n >= 1_000_000:
|
||||
return f"{n:,} ({n / 1_000_000:.1f} MB)"
|
||||
if n >= 1_000:
|
||||
return f"{n:,} ({n / 1_000:.1f} KB)"
|
||||
return f"{n:,}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 报告生成
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def generate_report(
|
||||
merged: list[MergedRequest],
|
||||
rejections: list[RejectionRecord],
|
||||
adaptive_shrinks: list[AdaptiveShrinkRecord],
|
||||
local_rejects: list[LocalRejectRecord],
|
||||
total_lines: int,
|
||||
*,
|
||||
top_n: int = 5,
|
||||
) -> str:
|
||||
"""生成文本格式的分析报告。"""
|
||||
lines: list[str] = []
|
||||
w = lines.append
|
||||
|
||||
w("=== 上下文压缩分析报告 ===")
|
||||
w("")
|
||||
w(f"扫描行数: {total_lines:,}")
|
||||
w(f"匹配请求: {len(merged)}")
|
||||
with_comp = [r for r in merged if r.has_compression]
|
||||
w(f"有压缩统计: {len(with_comp)}")
|
||||
w("")
|
||||
|
||||
if not with_comp:
|
||||
w("未找到压缩统计数据。")
|
||||
return "\n".join(lines)
|
||||
|
||||
# --- 总体概览 ---
|
||||
total_saved = sum(r.bytes_saved_total for r in with_comp)
|
||||
avg_saved = total_saved // len(with_comp) if with_comp else 0
|
||||
rates = [r.compression_rate for r in with_comp if r.compression_rate > 0]
|
||||
median_rate = median(rates)
|
||||
|
||||
w("--- 总体概览 ---")
|
||||
w(f"总节省字节: {fmt_bytes(total_saved)}")
|
||||
w(f"平均每请求节省: {avg_saved:,} bytes")
|
||||
w(f"压缩率中位数: {median_rate:.1f}%")
|
||||
w("")
|
||||
|
||||
# --- 各层贡献 ---
|
||||
ws_total = sum(r.whitespace_bytes_saved for r in with_comp)
|
||||
th_total = sum(r.thinking_bytes_saved for r in with_comp)
|
||||
tr_total = sum(r.tool_result_bytes_saved for r in with_comp)
|
||||
tu_total = sum(r.tool_use_input_bytes_saved for r in with_comp)
|
||||
hi_total = sum(r.history_bytes_saved for r in with_comp)
|
||||
|
||||
def layer_line(name: str, val: int) -> str:
|
||||
pct = val / total_saved * 100 if total_saved > 0 else 0
|
||||
avg = val // len(with_comp) if with_comp else 0
|
||||
return f" {name:<18}{val:>12,} bytes ({pct:>5.1f}%) avg {avg:,}/req"
|
||||
|
||||
w("--- 各层贡献 ---")
|
||||
w(layer_line("空白压缩:", ws_total))
|
||||
w(layer_line("thinking 截断:", th_total))
|
||||
w(layer_line("tool_result:", tr_total))
|
||||
w(layer_line("tool_use_input:", tu_total))
|
||||
w(layer_line("历史截断:", hi_total))
|
||||
w("")
|
||||
|
||||
# --- 历史截断详情 ---
|
||||
with_history = [r for r in with_comp if r.history_turns_removed > 0]
|
||||
w("--- 历史截断详情 ---")
|
||||
w(f"触发历史截断的请求: {len(with_history)}/{len(with_comp)} ({len(with_history)/len(with_comp)*100:.1f}%)")
|
||||
if with_history:
|
||||
turns = [r.history_turns_removed for r in with_history]
|
||||
w(f"平均移除轮数: {sum(turns)/len(turns):.1f}")
|
||||
w(f"最大移除轮数: {max(turns)}")
|
||||
w("")
|
||||
|
||||
# --- 上下文窗口使用 ---
|
||||
with_ctx = [r for r in merged if r.context_usage_percentage is not None]
|
||||
w("--- 上下文窗口使用 (contextUsageEvent) ---")
|
||||
if with_ctx:
|
||||
usages = [r.context_usage_percentage for r in with_ctx]
|
||||
avg_usage = sum(usages) / len(usages)
|
||||
over_80 = sum(1 for u in usages if u > 80)
|
||||
over_95 = sum(1 for u in usages if u > 95)
|
||||
overflow = sum(1 for u in usages if u >= 100)
|
||||
w(f"平均使用率: {avg_usage:.1f}%")
|
||||
w(f">80% 使用率的请求: {over_80} ({over_80/len(with_ctx)*100:.1f}%)")
|
||||
w(f">95% 使用率的请求: {over_95} ({over_95/len(with_ctx)*100:.1f}%)")
|
||||
w(f"100% (溢出): {overflow} ({overflow/len(with_ctx)*100:.1f}%)")
|
||||
else:
|
||||
w("无 contextUsageEvent 数据(需要 DEBUG 日志级别)")
|
||||
w("")
|
||||
|
||||
# --- 上游拒绝 ---
|
||||
w("--- 上游拒绝 ---")
|
||||
w(f"输入过长拒绝: {len(rejections)} 次")
|
||||
w("")
|
||||
|
||||
# --- 自适应二次压缩 ---
|
||||
w("--- 自适应二次压缩 ---")
|
||||
w(f"触发次数: {len(adaptive_shrinks)}")
|
||||
if adaptive_shrinks:
|
||||
initial_avg = sum(r.initial_bytes for r in adaptive_shrinks) // len(adaptive_shrinks)
|
||||
final_avg = sum(r.final_bytes for r in adaptive_shrinks) // len(adaptive_shrinks)
|
||||
iters_avg = sum(r.iters for r in adaptive_shrinks) / len(adaptive_shrinks)
|
||||
hist_avg = sum(r.additional_history_turns_removed for r in adaptive_shrinks) / len(adaptive_shrinks)
|
||||
w(f"平均压缩前: {fmt_bytes(initial_avg)}")
|
||||
w(f"平均压缩后: {fmt_bytes(final_avg)}")
|
||||
w(f"平均迭代次数: {iters_avg:.1f}")
|
||||
w(f"平均额外移除轮数: {hist_avg:.1f}")
|
||||
w("")
|
||||
|
||||
# --- 本地拒绝(请求体超限) ---
|
||||
w("--- 本地拒绝 (请求体超限) ---")
|
||||
w(f"拒绝发送: {len(local_rejects)} 次")
|
||||
if local_rejects:
|
||||
top = sorted(local_rejects, key=lambda r: r.effective_bytes, reverse=True)[:5]
|
||||
for r in top:
|
||||
w(
|
||||
" line={line} effective={eff} threshold={th} body={body} image={img} conversationId={cid}".format(
|
||||
line=r.line_no,
|
||||
eff=r.effective_bytes,
|
||||
th=r.threshold,
|
||||
body=r.request_body_bytes,
|
||||
img=r.image_bytes,
|
||||
cid=r.conversation_id or "None",
|
||||
)
|
||||
)
|
||||
w("")
|
||||
|
||||
# --- 高压缩请求 TOP-N ---
|
||||
sorted_by_saved = sorted(with_comp, key=lambda r: r.bytes_saved_total, reverse=True)
|
||||
w(f"--- 高压缩请求 TOP-{top_n} ---")
|
||||
for i, r in enumerate(sorted_by_saved[:top_n], 1):
|
||||
w(f" #{i} line={r.line_no} saved={r.bytes_saved_total:,} rate={r.compression_rate:.1f}% model={r.model} tokens={r.estimated_input_tokens:,}")
|
||||
w("")
|
||||
|
||||
# --- 低效/无压缩请求样本 ---
|
||||
no_comp = [r for r in with_comp if r.bytes_saved_total == 0]
|
||||
w("--- 低效/无压缩请求样本 ---")
|
||||
if no_comp:
|
||||
for r in no_comp[:5]:
|
||||
w(f" line={r.line_no} saved=0 tokens={r.estimated_input_tokens:,} message_count={r.message_count}")
|
||||
else:
|
||||
w(" (无)")
|
||||
w("")
|
||||
|
||||
# --- 时间趋势 ---
|
||||
hourly: Dict[str, list[MergedRequest]] = defaultdict(list)
|
||||
for r in with_comp:
|
||||
if r.timestamp:
|
||||
hourly[hour_bucket(r.timestamp)].append(r)
|
||||
|
||||
if hourly:
|
||||
w("--- 时间趋势 (按小时) ---")
|
||||
for hour in sorted(hourly.keys()):
|
||||
reqs = hourly[hour]
|
||||
avg_s = sum(r.bytes_saved_total for r in reqs) // len(reqs)
|
||||
ctx_reqs = [r for r in reqs if r.context_usage_percentage is not None]
|
||||
avg_ctx = sum(r.context_usage_percentage for r in ctx_reqs) / len(ctx_reqs) if ctx_reqs else 0
|
||||
ctx_str = f" avg_context_usage={avg_ctx:.1f}%" if ctx_reqs else ""
|
||||
w(f" {hour}: requests={len(reqs)} avg_saved={avg_s:,}{ctx_str}")
|
||||
w("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_json_report(
|
||||
merged: list[MergedRequest],
|
||||
rejections: list[RejectionRecord],
|
||||
adaptive_shrinks: list[AdaptiveShrinkRecord],
|
||||
local_rejects: list[LocalRejectRecord],
|
||||
total_lines: int,
|
||||
) -> str:
|
||||
"""生成 JSON 格式的汇总报告。"""
|
||||
with_comp = [r for r in merged if r.has_compression]
|
||||
total_saved = sum(r.bytes_saved_total for r in with_comp)
|
||||
|
||||
report = {
|
||||
"total_lines": total_lines,
|
||||
"matched_requests": len(merged),
|
||||
"with_compression": len(with_comp),
|
||||
"total_bytes_saved": total_saved,
|
||||
"avg_bytes_saved": total_saved // len(with_comp) if with_comp else 0,
|
||||
"layers": {
|
||||
"whitespace": sum(r.whitespace_bytes_saved for r in with_comp),
|
||||
"thinking": sum(r.thinking_bytes_saved for r in with_comp),
|
||||
"tool_result": sum(r.tool_result_bytes_saved for r in with_comp),
|
||||
"tool_use_input": sum(r.tool_use_input_bytes_saved for r in with_comp),
|
||||
"history": sum(r.history_bytes_saved for r in with_comp),
|
||||
},
|
||||
"rejections": len(rejections),
|
||||
"adaptive_shrinks": len(adaptive_shrinks),
|
||||
"local_rejects": len(local_rejects),
|
||||
}
|
||||
return json.dumps(report, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def write_csv(merged: list[MergedRequest], path: str) -> None:
|
||||
"""导出每条请求的明细为 CSV。"""
|
||||
fieldnames = [
|
||||
"line_no", "timestamp", "model", "max_tokens", "stream",
|
||||
"message_count", "estimated_input_tokens", "bytes_saved_total",
|
||||
"whitespace_bytes_saved", "thinking_bytes_saved",
|
||||
"tool_result_bytes_saved", "tool_use_input_bytes_saved",
|
||||
"history_turns_removed", "history_bytes_saved",
|
||||
"compression_rate", "context_usage_percentage", "actual_input_tokens",
|
||||
]
|
||||
with open(path, "w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for r in merged:
|
||||
row = asdict(r)
|
||||
row = {k: row[k] for k in fieldnames}
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI 入口
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="分析上下文压缩管道表现",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__,
|
||||
)
|
||||
parser.add_argument(
|
||||
"logfile", nargs="?", default="logs/docker.log",
|
||||
help="日志文件路径,使用 '-' 从 stdin 读取(默认: logs/docker.log)"
|
||||
)
|
||||
parser.add_argument("--top", type=int, default=5, help="高压缩请求 TOP-N(默认: 5)")
|
||||
parser.add_argument("--csv", metavar="FILE", help="导出每条请求的明细为 CSV")
|
||||
parser.add_argument("--json", action="store_true", help="JSON 格式输出汇总")
|
||||
parser.add_argument("--min-tokens", type=int, default=0, help="仅分析 estimated_input_tokens >= N 的请求")
|
||||
parser.add_argument("--model", metavar="PATTERN", help="按模型名过滤(正则)")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
# 读取日志
|
||||
if args.logfile == "-":
|
||||
log_lines = sys.stdin.read().splitlines()
|
||||
else:
|
||||
try:
|
||||
with open(args.logfile, "r", encoding="utf-8", errors="replace") as f:
|
||||
log_lines = f.read().splitlines()
|
||||
except FileNotFoundError:
|
||||
print(f"ERROR: 日志文件不存在: {args.logfile}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
# 解析
|
||||
merged, rejections, adaptive_shrinks, local_rejects, total_lines = parse_log(
|
||||
log_lines,
|
||||
min_tokens=args.min_tokens,
|
||||
model_pattern=args.model,
|
||||
)
|
||||
|
||||
# 输出
|
||||
if args.json:
|
||||
print(generate_json_report(merged, rejections, adaptive_shrinks, local_rejects, total_lines))
|
||||
else:
|
||||
print(generate_report(merged, rejections, adaptive_shrinks, local_rejects, total_lines, top_n=args.top))
|
||||
|
||||
# CSV 导出
|
||||
if args.csv:
|
||||
write_csv(merged, args.csv)
|
||||
print(f"CSV 已导出: {args.csv}", file=sys.stderr)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
646
tools/diagnose_improper_request.py
Normal file
646
tools/diagnose_improper_request.py
Normal file
@@ -0,0 +1,646 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
离线诊断 `Improperly formed request`(上游 400)常见成因。
|
||||
|
||||
使用方法:
|
||||
python3 tools/diagnose_improper_request.py logs/docker.log
|
||||
|
||||
脚本会从日志中提取 `request_body=...{json}`,对请求做一组启发式校验并输出汇总与样本。
|
||||
目标是快速定位“项目侧可修复的请求构造问题”,而不是复现上游的完整校验逻辑。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
|
||||
# 覆盖常见 CSI 序列(含少见的 ':' 参数分隔符),避免污染 URL/字段解析。
|
||||
ANSI_RE = re.compile(r"\x1b\[[0-9;:?]*[A-Za-z]")
|
||||
KV_RE = re.compile(r"(\w+)=(\d+(?:\.\d+)?|\"[^\"]*\"|[^\s,]+)")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RequestSummary:
|
||||
line_no: int
|
||||
conversation_id: Optional[str]
|
||||
content_len: int
|
||||
tools_n: int
|
||||
tool_results_n: int
|
||||
history_n: int
|
||||
json_len: int
|
||||
|
||||
|
||||
def strip_ansi(s: str) -> str:
|
||||
return ANSI_RE.sub("", s)
|
||||
|
||||
|
||||
def parse_kv(line: str) -> Dict[str, str]:
|
||||
"""从 tracing 结构化行中提取所有 key=value 对。"""
|
||||
return {m.group(1): m.group(2).strip('"') for m in KV_RE.finditer(line)}
|
||||
|
||||
|
||||
def iter_request_bodies(log_text: str) -> Iterable[Tuple[int, Dict[str, Any]]]:
|
||||
"""从日志中提取 request_body JSON。
|
||||
|
||||
支持两种日志格式:
|
||||
1. sensitive-logs 模式:request_body={"conversationState":...}(可能被截断)
|
||||
2. 普通模式:kiro_request_body_bytes=135777(无 JSON 内容)
|
||||
|
||||
对于截断的 JSON,尝试用 raw_decode 解析到第一个完整 JSON 对象。
|
||||
同时排除 response_body 的误匹配。
|
||||
"""
|
||||
decoder = json.JSONDecoder()
|
||||
# 两种来源:
|
||||
# 1. handler DEBUG: "Kiro request body: {json}"(发送前,完整内容)
|
||||
# 2. provider ERROR: "request_body={json}"(400 后,可能被截断)
|
||||
kiro_body_marker = "Kiro request body: "
|
||||
body_re = re.compile(r"(?<![a-z_])request_body=")
|
||||
|
||||
for line_no, line in enumerate(log_text.splitlines(), 1):
|
||||
clean = strip_ansi(line)
|
||||
|
||||
# 来源 1: handler 的 DEBUG 日志(优先,内容更完整)
|
||||
kiro_idx = clean.find(kiro_body_marker)
|
||||
if kiro_idx != -1:
|
||||
brace = clean.find("{", kiro_idx + len(kiro_body_marker))
|
||||
if brace == -1:
|
||||
continue
|
||||
elif "request_body=" in clean:
|
||||
# 来源 2: provider 的 ERROR 日志
|
||||
match = body_re.search(clean)
|
||||
if not match:
|
||||
continue
|
||||
brace = clean.find("{", match.end())
|
||||
if brace == -1:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
# 使用 raw_decode 解析第一个完整 JSON 对象(忽略行尾其他 tracing 字段)
|
||||
try:
|
||||
body, _ = decoder.raw_decode(clean, brace)
|
||||
except json.JSONDecodeError:
|
||||
# JSON 被截断(sensitive-logs 的 truncate_middle),尝试提取可用信息
|
||||
body_str = clean[brace:]
|
||||
yield line_no, _partial_parse_request_body(body_str, line_no)
|
||||
continue
|
||||
|
||||
if isinstance(body, dict):
|
||||
yield line_no, body
|
||||
|
||||
|
||||
def _partial_parse_request_body(truncated_json: str, line_no: int) -> Dict[str, Any]:
|
||||
"""从截断的 JSON 中尽量提取结构信息。
|
||||
|
||||
即使 JSON 不完整,也能通过正则提取 conversationId、工具数量等关键字段,
|
||||
用于启发式诊断。
|
||||
"""
|
||||
info: Dict[str, Any] = {"_partial": True, "_raw_len": len(truncated_json)}
|
||||
|
||||
# 提取 conversationId
|
||||
m = re.search(r'"conversationId"\s*:\s*"([^"]+)"', truncated_json)
|
||||
if m:
|
||||
info["_conversationId"] = m.group(1)
|
||||
|
||||
# 统计 toolUseId 出现次数(近似 tool_use 数量)
|
||||
info["_toolUseId_count"] = len(re.findall(r'"toolUseId"', truncated_json))
|
||||
|
||||
# 统计 toolSpecification 出现次数(近似 tool 定义数量)
|
||||
info["_toolSpec_count"] = len(re.findall(r'"toolSpecification"', truncated_json))
|
||||
|
||||
# 统计 assistantResponseMessage 出现次数(近似 history 轮数)
|
||||
info["_assistant_msg_count"] = len(re.findall(r'"assistantResponseMessage"', truncated_json))
|
||||
|
||||
# 统计 userInputMessage 出现次数
|
||||
info["_user_msg_count"] = len(re.findall(r'"userInputMessage"', truncated_json))
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def _get(d: Dict[str, Any], path: str, default: Any = None) -> Any:
|
||||
cur: Any = d
|
||||
for part in path.split("."):
|
||||
if not isinstance(cur, dict):
|
||||
return default
|
||||
cur = cur.get(part)
|
||||
return cur if cur is not None else default
|
||||
|
||||
|
||||
def summarize(body: Dict[str, Any], line_no: int) -> RequestSummary:
|
||||
# 处理 partial 解析的情况
|
||||
if body.get("_partial"):
|
||||
return RequestSummary(
|
||||
line_no=line_no,
|
||||
conversation_id=body.get("_conversationId"),
|
||||
content_len=-1,
|
||||
tools_n=body.get("_toolSpec_count", -1),
|
||||
tool_results_n=body.get("_toolUseId_count", -1),
|
||||
history_n=body.get("_assistant_msg_count", -1),
|
||||
json_len=body.get("_raw_len", 0),
|
||||
)
|
||||
|
||||
conversation_id = _get(body, "conversationState.conversationId")
|
||||
content = _get(body, "conversationState.currentMessage.userInputMessage.content", "")
|
||||
tools = _get(body, "conversationState.currentMessage.userInputMessage.userInputMessageContext.tools", [])
|
||||
tool_results = _get(
|
||||
body,
|
||||
"conversationState.currentMessage.userInputMessage.userInputMessageContext.toolResults",
|
||||
[],
|
||||
)
|
||||
history = _get(body, "conversationState.history", [])
|
||||
|
||||
json_len = len(json.dumps(body, ensure_ascii=False, separators=(",", ":")))
|
||||
|
||||
return RequestSummary(
|
||||
line_no=line_no,
|
||||
conversation_id=conversation_id if isinstance(conversation_id, str) else None,
|
||||
content_len=len(content) if isinstance(content, str) else -1,
|
||||
tools_n=len(tools) if isinstance(tools, list) else -1,
|
||||
tool_results_n=len(tool_results) if isinstance(tool_results, list) else -1,
|
||||
history_n=len(history) if isinstance(history, list) else -1,
|
||||
json_len=json_len,
|
||||
)
|
||||
|
||||
|
||||
def find_issues(
|
||||
body: Dict[str, Any],
|
||||
*,
|
||||
max_history_messages: int,
|
||||
large_payload_bytes: int,
|
||||
huge_payload_bytes: int,
|
||||
) -> List[str]:
|
||||
# partial 解析的请求只能做有限诊断
|
||||
if body.get("_partial"):
|
||||
issues: List[str] = ["W_TRUNCATED_LOG"]
|
||||
raw_len = body.get("_raw_len", 0)
|
||||
if raw_len > large_payload_bytes:
|
||||
issues.append("W_PAYLOAD_LARGE")
|
||||
return issues
|
||||
|
||||
issues = []
|
||||
|
||||
cs = body.get("conversationState") or {}
|
||||
cm = _get(body, "conversationState.currentMessage.userInputMessage", {})
|
||||
ctx = cm.get("userInputMessageContext") or {}
|
||||
|
||||
content = cm.get("content")
|
||||
images = cm.get("images") or []
|
||||
tools = ctx.get("tools") or []
|
||||
tool_results = ctx.get("toolResults") or []
|
||||
history = cs.get("history") or []
|
||||
|
||||
if isinstance(content, str) and content.strip() == "":
|
||||
if images:
|
||||
issues.append("E_CONTENT_EMPTY_WITH_IMAGES")
|
||||
elif tool_results:
|
||||
issues.append("E_CONTENT_EMPTY_WITH_TOOL_RESULTS")
|
||||
else:
|
||||
issues.append("E_CONTENT_EMPTY")
|
||||
|
||||
# Tool 规范检查:description/schema
|
||||
empty_desc: List[str] = []
|
||||
missing_schema: List[str] = []
|
||||
missing_type: List[str] = []
|
||||
for t in tools if isinstance(tools, list) else []:
|
||||
if not isinstance(t, dict):
|
||||
issues.append("E_TOOL_SHAPE_INVALID")
|
||||
continue
|
||||
spec = t.get("toolSpecification")
|
||||
if not isinstance(spec, dict):
|
||||
issues.append("E_TOOL_SPEC_MISSING")
|
||||
continue
|
||||
|
||||
name = spec.get("name")
|
||||
name_s = name if isinstance(name, str) else "<noname>"
|
||||
|
||||
desc = spec.get("description")
|
||||
if isinstance(desc, str) and desc.strip() == "":
|
||||
empty_desc.append(name_s)
|
||||
|
||||
inp = spec.get("inputSchema")
|
||||
js = inp.get("json") if isinstance(inp, dict) else None
|
||||
if isinstance(js, dict):
|
||||
if "$schema" not in js:
|
||||
missing_schema.append(name_s)
|
||||
if "type" not in js:
|
||||
missing_type.append(name_s)
|
||||
else:
|
||||
issues.append("E_TOOL_INPUT_SCHEMA_NOT_OBJECT")
|
||||
|
||||
if empty_desc:
|
||||
issues.append("E_TOOL_DESCRIPTION_EMPTY")
|
||||
if missing_schema:
|
||||
issues.append("W_TOOL_SCHEMA_MISSING_$SCHEMA")
|
||||
if missing_type:
|
||||
issues.append("W_TOOL_SCHEMA_MISSING_TYPE")
|
||||
|
||||
# Tool result 是否能在 history 的 tool_use 里找到(启发式)
|
||||
tool_use_ids: set[str] = set()
|
||||
history_tool_result_ids: set[str] = set()
|
||||
tool_def_names_ci: set[str] = set()
|
||||
|
||||
for t in tools if isinstance(tools, list) else []:
|
||||
spec = t.get("toolSpecification") if isinstance(t, dict) else None
|
||||
if isinstance(spec, dict) and isinstance(spec.get("name"), str):
|
||||
tool_def_names_ci.add(spec["name"].lower())
|
||||
|
||||
for h in history if isinstance(history, list) else []:
|
||||
if not isinstance(h, dict):
|
||||
continue
|
||||
am = h.get("assistantResponseMessage")
|
||||
if not isinstance(am, dict):
|
||||
# 也可能是 user 消息(含 tool_result)
|
||||
um = h.get("userInputMessage")
|
||||
if not isinstance(um, dict):
|
||||
continue
|
||||
uctx = um.get("userInputMessageContext")
|
||||
if not isinstance(uctx, dict):
|
||||
continue
|
||||
trs = uctx.get("toolResults")
|
||||
if not isinstance(trs, list):
|
||||
continue
|
||||
for tr in trs:
|
||||
if not isinstance(tr, dict):
|
||||
continue
|
||||
tid = tr.get("toolUseId")
|
||||
if isinstance(tid, str):
|
||||
history_tool_result_ids.add(tid)
|
||||
continue
|
||||
|
||||
tus = am.get("toolUses")
|
||||
if isinstance(tus, list):
|
||||
for tu in tus:
|
||||
if not isinstance(tu, dict):
|
||||
continue
|
||||
tid = tu.get("toolUseId")
|
||||
if isinstance(tid, str):
|
||||
tool_use_ids.add(tid)
|
||||
# 历史 tool_use 的 name 必须在 tools 中有定义(上游常见约束)
|
||||
nm = tu.get("name")
|
||||
if isinstance(nm, str) and tool_def_names_ci and nm.lower() not in tool_def_names_ci:
|
||||
issues.append("E_HISTORY_TOOL_USE_NAME_NOT_IN_TOOLS")
|
||||
|
||||
# 同一条历史消息可能同时包含 userInputMessage(少见,但兼容)
|
||||
um = h.get("userInputMessage")
|
||||
if isinstance(um, dict):
|
||||
uctx = um.get("userInputMessageContext")
|
||||
if isinstance(uctx, dict):
|
||||
trs = uctx.get("toolResults")
|
||||
if isinstance(trs, list):
|
||||
for tr in trs:
|
||||
if not isinstance(tr, dict):
|
||||
continue
|
||||
tid = tr.get("toolUseId")
|
||||
if isinstance(tid, str):
|
||||
history_tool_result_ids.add(tid)
|
||||
|
||||
# history 内部的 tool_result 必须能在 history 的 tool_use 中找到(否则极易触发 400)
|
||||
if history_tool_result_ids and tool_use_ids:
|
||||
if any(tid not in tool_use_ids for tid in history_tool_result_ids):
|
||||
issues.append("E_HISTORY_TOOL_RESULT_ORPHAN")
|
||||
elif history_tool_result_ids and not tool_use_ids:
|
||||
issues.append("E_HISTORY_TOOL_RESULT_ORPHAN")
|
||||
|
||||
# currentMessage 的 tool_result 必须能在 history 的 tool_use 中找到
|
||||
orphan_results = 0
|
||||
current_tool_result_ids: set[str] = set()
|
||||
if tool_use_ids and isinstance(tool_results, list):
|
||||
for tr in tool_results:
|
||||
if not isinstance(tr, dict):
|
||||
continue
|
||||
tid = tr.get("toolUseId")
|
||||
if isinstance(tid, str):
|
||||
current_tool_result_ids.add(tid)
|
||||
if tid not in tool_use_ids:
|
||||
orphan_results += 1
|
||||
if orphan_results:
|
||||
issues.append("W_TOOL_RESULT_ORPHAN")
|
||||
|
||||
# history 的 tool_use 必须在 history/currentMessage 的 tool_result 中出现(否则极易触发 400)
|
||||
all_tool_result_ids = history_tool_result_ids | current_tool_result_ids
|
||||
if tool_use_ids and all_tool_result_ids:
|
||||
if any(tid not in all_tool_result_ids for tid in tool_use_ids):
|
||||
issues.append("E_HISTORY_TOOL_USE_ORPHAN")
|
||||
elif tool_use_ids and not all_tool_result_ids:
|
||||
issues.append("E_HISTORY_TOOL_USE_ORPHAN")
|
||||
|
||||
# history 过长(强启发式;日志里经常与 400 同现)
|
||||
if isinstance(history, list) and len(history) > max_history_messages:
|
||||
issues.append("W_HISTORY_TOO_LONG")
|
||||
|
||||
# payload 大小(强启发式;上游可能有不透明的硬限制)
|
||||
json_len = len(json.dumps(body, ensure_ascii=False, separators=(",", ":")))
|
||||
if json_len > huge_payload_bytes:
|
||||
issues.append("W_PAYLOAD_HUGE")
|
||||
elif json_len > large_payload_bytes:
|
||||
issues.append("W_PAYLOAD_LARGE")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def main(argv: List[str]) -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="离线诊断 Improperly formed request(上游 400)常见成因"
|
||||
)
|
||||
parser.add_argument("log", nargs="?", default="logs/docker.log", help="docker.log 路径")
|
||||
parser.add_argument("--max-samples", type=int, default=5, help="每类问题输出样本数量")
|
||||
parser.add_argument("--dump-dir", default=None, help="可选:把 request_body JSON 按行号落盘")
|
||||
parser.add_argument("--max-history", type=int, default=100, help="history 过长阈值(启发式)")
|
||||
# 上游存在约 5MiB 左右的硬限制;默认用 4.5MiB 作为“接近风险”的提示阈值。
|
||||
parser.add_argument("--large-bytes", type=int, default=4_718_592, help="payload 大阈值(启发式)")
|
||||
parser.add_argument("--huge-bytes", type=int, default=8_388_608, help="payload 巨大阈值(启发式)")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
log_path = args.log
|
||||
try:
|
||||
log_text = open(log_path, "r", encoding="utf-8", errors="replace").read()
|
||||
except FileNotFoundError:
|
||||
print(f"ERROR: log file not found: {log_path}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
dump_dir = args.dump_dir
|
||||
if dump_dir:
|
||||
os.makedirs(dump_dir, exist_ok=True)
|
||||
|
||||
# 先扫描项目侧“请求体超限,拒绝发送”的本地拦截(用于验证 4.5MiB 截断/拒绝是否生效)
|
||||
print("=" * 60)
|
||||
print("Phase 0: 扫描本地请求体超限拒绝")
|
||||
print("=" * 60)
|
||||
local_rejects = _scan_local_rejects(log_text)
|
||||
if local_rejects:
|
||||
for r in local_rejects[: args.max_samples]:
|
||||
print(
|
||||
"\n [line {line}] effective_bytes={eff} threshold={th} body={body} image={img} conversationId={cid}".format(
|
||||
line=r.get("line_no"),
|
||||
eff=r.get("effective_bytes", "?"),
|
||||
th=r.get("threshold", "?"),
|
||||
body=r.get("request_body_bytes", "?"),
|
||||
img=r.get("image_bytes", "?"),
|
||||
cid=r.get("conversation_id") or "None",
|
||||
)
|
||||
)
|
||||
if len(local_rejects) > args.max_samples:
|
||||
print(f"\n ... ({len(local_rejects) - args.max_samples} more)")
|
||||
else:
|
||||
print(" 未发现本地请求体超限拒绝")
|
||||
print("")
|
||||
|
||||
# 先扫描所有 400 Improperly formed request 的 ERROR 行,提取上下文
|
||||
print("=" * 60)
|
||||
print("Phase 1: 扫描 400 Improperly formed request 错误")
|
||||
print("=" * 60)
|
||||
error_lines = _scan_400_errors(
|
||||
log_text,
|
||||
max_history_messages=args.max_history,
|
||||
large_payload_bytes=args.large_bytes,
|
||||
huge_payload_bytes=args.huge_bytes,
|
||||
)
|
||||
if error_lines:
|
||||
for el in error_lines:
|
||||
print(f"\n [line {el['line_no']}] bytes={el.get('body_bytes', '?')} "
|
||||
f"url={el.get('url', '?')}")
|
||||
if "_req_body_line" in el:
|
||||
print(f" ↳ 关联请求体: line {el['_req_body_line']}"
|
||||
f"{' (truncated)' if el.get('_req_body_partial') else ''}")
|
||||
if "summary" in el:
|
||||
s = el["summary"]
|
||||
print(f" ↳ conversationId={s.conversation_id or 'None'} "
|
||||
f"content_len={s.content_len} tools={s.tools_n} "
|
||||
f"toolResults={s.tool_results_n} history={s.history_n} "
|
||||
f"json_len={s.json_len}")
|
||||
if "issues" in el and el["issues"]:
|
||||
print(f" ↳ issues: {', '.join(el['issues'])}")
|
||||
elif "_req_body" in el and el["_req_body"].get("_partial"):
|
||||
body = el["_req_body"]
|
||||
print(f" ↳ partial: toolSpecs={body.get('_toolSpec_count', '?')} "
|
||||
f"toolUseIds={body.get('_toolUseId_count', '?')} "
|
||||
f"assistantMsgs={body.get('_assistant_msg_count', '?')} "
|
||||
f"userMsgs={body.get('_user_msg_count', '?')} "
|
||||
f"raw_len={body.get('_raw_len', '?')}")
|
||||
else:
|
||||
print(" 未发现 400 Improperly formed request 错误")
|
||||
print()
|
||||
|
||||
# 再扫描 request_body 条目
|
||||
print("=" * 60)
|
||||
print("Phase 2: 解析 request_body 条目")
|
||||
print("=" * 60)
|
||||
|
||||
issue_counter: Counter[str] = Counter()
|
||||
issues_to_samples: Dict[str, List[RequestSummary]] = defaultdict(list)
|
||||
total = 0
|
||||
partial_count = 0
|
||||
|
||||
for line_no, body in iter_request_bodies(log_text):
|
||||
total += 1
|
||||
if body.get("_partial"):
|
||||
partial_count += 1
|
||||
summary = summarize(body, line_no)
|
||||
issues = find_issues(
|
||||
body,
|
||||
max_history_messages=args.max_history,
|
||||
large_payload_bytes=args.large_bytes,
|
||||
huge_payload_bytes=args.huge_bytes,
|
||||
)
|
||||
|
||||
# 允许 dump 以便做最小化重放/差分调试
|
||||
if dump_dir:
|
||||
out_path = os.path.join(dump_dir, f"req_line_{line_no}.json")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(body, f, ensure_ascii=False, indent=2)
|
||||
|
||||
if not issues:
|
||||
issues = ["(NO_HEURISTIC_MATCH)"]
|
||||
|
||||
for issue in set(issues):
|
||||
issue_counter[issue] += 1
|
||||
if len(issues_to_samples[issue]) < args.max_samples:
|
||||
issues_to_samples[issue].append(summary)
|
||||
|
||||
print(f"Parsed request_body entries: {total} (complete: {total - partial_count}, truncated: {partial_count})")
|
||||
print("")
|
||||
|
||||
if not issue_counter:
|
||||
print("No request_body entries found.")
|
||||
if not error_lines:
|
||||
print("\nHint: 如果使用非 sensitive-logs 模式,日志中不包含 request_body 内容。")
|
||||
print(" 请使用 --features sensitive-logs 重新编译,或检查 kiro_request_body_bytes 字段。")
|
||||
return 0
|
||||
|
||||
print("Issue counts:")
|
||||
for issue, cnt in issue_counter.most_common():
|
||||
print(f" {cnt:4d} {issue}")
|
||||
print("")
|
||||
|
||||
print("Samples:")
|
||||
for issue, cnt in issue_counter.most_common():
|
||||
samples = issues_to_samples.get(issue) or []
|
||||
if not samples:
|
||||
continue
|
||||
print(f"- {issue} (showing {len(samples)}/{cnt})")
|
||||
for s in samples:
|
||||
print(
|
||||
" line={line} conversationId={cid} content_len={cl} tools={tn} toolResults={trn} history={hn} json_len={jl}".format(
|
||||
line=s.line_no,
|
||||
cid=s.conversation_id or "None",
|
||||
cl=s.content_len,
|
||||
tn=s.tools_n,
|
||||
trn=s.tool_results_n,
|
||||
hn=s.history_n,
|
||||
jl=s.json_len,
|
||||
)
|
||||
)
|
||||
print("")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _scan_400_errors(
|
||||
log_text: str,
|
||||
*,
|
||||
max_history_messages: int,
|
||||
large_payload_bytes: int,
|
||||
huge_payload_bytes: int,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""扫描日志中的 400 Improperly formed request 错误行,关联最近的请求体。
|
||||
|
||||
对每个 400 错误,向上查找最近的 'Kiro request body:' DEBUG 行,
|
||||
解析其中的请求体并做启发式诊断。
|
||||
"""
|
||||
lines = log_text.splitlines()
|
||||
results = []
|
||||
body_bytes_re = re.compile(r"(?:kiro_)?request_body_bytes=(\d+)")
|
||||
url_re = re.compile(r"request_url=(\S+)")
|
||||
decoder = json.JSONDecoder()
|
||||
|
||||
for line_no_0, line in enumerate(lines):
|
||||
if "Improperly formed request" not in line:
|
||||
continue
|
||||
clean = strip_ansi(line)
|
||||
# 避免同一错误被多条日志重复命中(provider ERROR 与 handler WARN 都可能包含该子串)
|
||||
# 优先仅统计 provider ERROR(通常包含 request_url=...)
|
||||
if "request_url=" not in clean:
|
||||
continue
|
||||
entry: Dict[str, Any] = {"line_no": line_no_0 + 1}
|
||||
|
||||
m = body_bytes_re.search(clean)
|
||||
if m:
|
||||
entry["body_bytes"] = int(m.group(1))
|
||||
else:
|
||||
# provider ERROR 行通常不带 body bytes,向上关联最近的构建日志/handler WARN(最多回溯 30 行)
|
||||
for back in range(1, min(31, line_no_0 + 1)):
|
||||
prev = strip_ansi(lines[line_no_0 - back])
|
||||
m2 = body_bytes_re.search(prev)
|
||||
if not m2:
|
||||
continue
|
||||
entry["body_bytes"] = int(m2.group(1))
|
||||
entry["_body_bytes_line"] = line_no_0 - back + 1
|
||||
break
|
||||
|
||||
m = url_re.search(clean)
|
||||
if m:
|
||||
entry["url"] = m.group(1)
|
||||
|
||||
req_body = None
|
||||
|
||||
body_re = re.compile(r"(?<![a-z_])request_body=")
|
||||
|
||||
# 1) 向下查找错误块中的 request_body=...(provider 往往把 headers/body 打到后续行)
|
||||
for fwd in range(0, min(31, len(lines) - line_no_0)):
|
||||
cand = strip_ansi(lines[line_no_0 + fwd])
|
||||
match = body_re.search(cand)
|
||||
if not match:
|
||||
continue
|
||||
brace = cand.find("{", match.end())
|
||||
if brace == -1:
|
||||
continue
|
||||
try:
|
||||
req_body, _ = decoder.raw_decode(cand, brace)
|
||||
except json.JSONDecodeError:
|
||||
entry["_req_body_partial"] = True
|
||||
req_body = _partial_parse_request_body(cand[brace:], line_no_0 + fwd + 1)
|
||||
entry["_req_body_line"] = line_no_0 + fwd + 1
|
||||
break
|
||||
|
||||
# 2) 若未找到,再向上查找 handler DEBUG 的 "Kiro request body:"(最多回溯 20 行)
|
||||
if req_body is None:
|
||||
for back in range(1, min(21, line_no_0 + 1)):
|
||||
prev_line = strip_ansi(lines[line_no_0 - back])
|
||||
marker = "Kiro request body: "
|
||||
idx = prev_line.find(marker)
|
||||
if idx == -1:
|
||||
continue
|
||||
brace = prev_line.find("{", idx + len(marker))
|
||||
if brace == -1:
|
||||
break
|
||||
try:
|
||||
req_body, _ = decoder.raw_decode(prev_line, brace)
|
||||
except json.JSONDecodeError:
|
||||
entry["_req_body_partial"] = True
|
||||
req_body = _partial_parse_request_body(prev_line[brace:], line_no_0 - back + 1)
|
||||
entry["_req_body_line"] = line_no_0 - back + 1
|
||||
break
|
||||
|
||||
if req_body and isinstance(req_body, dict):
|
||||
entry["_req_body"] = req_body
|
||||
issues = find_issues(
|
||||
req_body,
|
||||
max_history_messages=max_history_messages,
|
||||
large_payload_bytes=large_payload_bytes,
|
||||
huge_payload_bytes=huge_payload_bytes,
|
||||
)
|
||||
# 基于实际请求体字节数的强信号(日志 JSON 可能被截断/脱敏,不适合用 json_len 判断大小)
|
||||
body_bytes = entry.get("body_bytes")
|
||||
if isinstance(body_bytes, int):
|
||||
if body_bytes > huge_payload_bytes:
|
||||
issues.append("W_BODY_BYTES_HUGE")
|
||||
elif body_bytes > large_payload_bytes:
|
||||
issues.append("W_BODY_BYTES_LARGE")
|
||||
entry["issues"] = sorted(set(issues))
|
||||
entry["summary"] = summarize(req_body, entry.get("_req_body_line", 0))
|
||||
|
||||
results.append(entry)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _scan_local_rejects(log_text: str) -> List[Dict[str, Any]]:
|
||||
"""扫描本地请求体超限拒绝日志。"""
|
||||
marker = "请求体超过安全阈值,拒绝发送"
|
||||
results: List[Dict[str, Any]] = []
|
||||
for line_no, raw in enumerate(log_text.splitlines(), 1):
|
||||
if marker not in raw:
|
||||
continue
|
||||
clean = strip_ansi(raw)
|
||||
kv = parse_kv(clean)
|
||||
results.append(
|
||||
{
|
||||
"line_no": line_no,
|
||||
"conversation_id": kv.get("conversation_id"),
|
||||
"request_body_bytes": _safe_int(kv.get("request_body_bytes")),
|
||||
"image_bytes": _safe_int(kv.get("image_bytes")),
|
||||
"effective_bytes": _safe_int(kv.get("effective_bytes")),
|
||||
"threshold": _safe_int(kv.get("threshold")),
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def _safe_int(v: Optional[str]) -> Optional[int]:
|
||||
if v is None:
|
||||
return None
|
||||
try:
|
||||
return int(v)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
896
tools/event-viewer.html
Normal file
896
tools/event-viewer.html
Normal file
@@ -0,0 +1,896 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>AWS Event Stream Viewer</title>
|
||||
<style>
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
||||
background: #0d1117;
|
||||
color: #c9d1d9;
|
||||
min-height: 100vh;
|
||||
padding: 20px;
|
||||
}
|
||||
.container {
|
||||
max-width: 1400px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
h1 {
|
||||
color: #58a6ff;
|
||||
margin-bottom: 20px;
|
||||
font-size: 24px;
|
||||
}
|
||||
.input-section {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.input-section label {
|
||||
display: block;
|
||||
margin-bottom: 8px;
|
||||
color: #8b949e;
|
||||
font-size: 14px;
|
||||
}
|
||||
.input-controls {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
margin-bottom: 10px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.format-select {
|
||||
padding: 8px 16px;
|
||||
border: 1px solid #30363d;
|
||||
border-radius: 6px;
|
||||
background: #21262d;
|
||||
color: #c9d1d9;
|
||||
font-size: 14px;
|
||||
}
|
||||
textarea {
|
||||
width: 100%;
|
||||
height: 200px;
|
||||
padding: 12px;
|
||||
border: 1px solid #30363d;
|
||||
border-radius: 6px;
|
||||
background: #161b22;
|
||||
color: #c9d1d9;
|
||||
font-family: 'SF Mono', Monaco, 'Courier New', monospace;
|
||||
font-size: 13px;
|
||||
resize: vertical;
|
||||
}
|
||||
textarea:focus {
|
||||
outline: none;
|
||||
border-color: #58a6ff;
|
||||
}
|
||||
.btn {
|
||||
padding: 10px 20px;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
font-size: 14px;
|
||||
font-weight: 500;
|
||||
transition: all 0.2s;
|
||||
}
|
||||
.btn-primary {
|
||||
background: #238636;
|
||||
color: white;
|
||||
}
|
||||
.btn-primary:hover {
|
||||
background: #2ea043;
|
||||
}
|
||||
.btn-secondary {
|
||||
background: #21262d;
|
||||
color: #c9d1d9;
|
||||
border: 1px solid #30363d;
|
||||
}
|
||||
.btn-secondary:hover {
|
||||
background: #30363d;
|
||||
}
|
||||
.results {
|
||||
margin-top: 20px;
|
||||
}
|
||||
.stats {
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
margin-bottom: 20px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.stat-card {
|
||||
background: #161b22;
|
||||
border: 1px solid #30363d;
|
||||
border-radius: 6px;
|
||||
padding: 16px 20px;
|
||||
min-width: 150px;
|
||||
}
|
||||
.stat-card .label {
|
||||
color: #8b949e;
|
||||
font-size: 12px;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.stat-card .value {
|
||||
color: #58a6ff;
|
||||
font-size: 24px;
|
||||
font-weight: 600;
|
||||
}
|
||||
.message-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 12px;
|
||||
}
|
||||
.message {
|
||||
background: #161b22;
|
||||
border: 1px solid #30363d;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.message-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 12px 16px;
|
||||
background: #21262d;
|
||||
border-bottom: 1px solid #30363d;
|
||||
cursor: pointer;
|
||||
}
|
||||
.message-header:hover {
|
||||
background: #30363d;
|
||||
}
|
||||
.message-info {
|
||||
display: flex;
|
||||
gap: 12px;
|
||||
align-items: center;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.message-index {
|
||||
background: #30363d;
|
||||
color: #8b949e;
|
||||
padding: 2px 8px;
|
||||
border-radius: 4px;
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
}
|
||||
.message-type {
|
||||
padding: 2px 8px;
|
||||
border-radius: 4px;
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
}
|
||||
.message-type.event {
|
||||
background: rgba(56, 139, 253, 0.15);
|
||||
color: #58a6ff;
|
||||
}
|
||||
.message-type.error {
|
||||
background: rgba(248, 81, 73, 0.15);
|
||||
color: #f85149;
|
||||
}
|
||||
.message-type.exception {
|
||||
background: rgba(210, 153, 34, 0.15);
|
||||
color: #d29922;
|
||||
}
|
||||
.event-type {
|
||||
color: #7ee787;
|
||||
font-size: 13px;
|
||||
}
|
||||
.message-size {
|
||||
color: #8b949e;
|
||||
font-size: 12px;
|
||||
}
|
||||
.expand-icon {
|
||||
color: #8b949e;
|
||||
transition: transform 0.2s;
|
||||
}
|
||||
.message.expanded .expand-icon {
|
||||
transform: rotate(90deg);
|
||||
}
|
||||
.message-content {
|
||||
display: none;
|
||||
padding: 16px;
|
||||
}
|
||||
.message.expanded .message-content {
|
||||
display: block;
|
||||
}
|
||||
.section-title {
|
||||
color: #8b949e;
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
margin-bottom: 8px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.headers-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 16px;
|
||||
font-size: 13px;
|
||||
}
|
||||
.headers-table th,
|
||||
.headers-table td {
|
||||
text-align: left;
|
||||
padding: 8px 12px;
|
||||
border-bottom: 1px solid #21262d;
|
||||
}
|
||||
.headers-table th {
|
||||
color: #8b949e;
|
||||
font-weight: 500;
|
||||
background: #0d1117;
|
||||
}
|
||||
.headers-table td {
|
||||
font-family: 'SF Mono', Monaco, 'Courier New', monospace;
|
||||
}
|
||||
.header-name {
|
||||
color: #ff7b72;
|
||||
}
|
||||
.header-type {
|
||||
color: #d2a8ff;
|
||||
}
|
||||
.header-value {
|
||||
color: #a5d6ff;
|
||||
}
|
||||
.payload-container {
|
||||
background: #0d1117;
|
||||
border-radius: 6px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.payload-tabs {
|
||||
display: flex;
|
||||
border-bottom: 1px solid #21262d;
|
||||
}
|
||||
.payload-tab {
|
||||
padding: 8px 16px;
|
||||
background: transparent;
|
||||
border: none;
|
||||
color: #8b949e;
|
||||
cursor: pointer;
|
||||
font-size: 13px;
|
||||
border-bottom: 2px solid transparent;
|
||||
margin-bottom: -1px;
|
||||
}
|
||||
.payload-tab.active {
|
||||
color: #58a6ff;
|
||||
border-bottom-color: #58a6ff;
|
||||
}
|
||||
.payload-content {
|
||||
padding: 12px;
|
||||
font-family: 'SF Mono', Monaco, 'Courier New', monospace;
|
||||
font-size: 13px;
|
||||
overflow-x: auto;
|
||||
max-height: 400px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
.payload-json {
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
}
|
||||
.payload-raw {
|
||||
white-space: pre;
|
||||
color: #8b949e;
|
||||
}
|
||||
.payload-hex {
|
||||
white-space: pre;
|
||||
color: #8b949e;
|
||||
font-size: 12px;
|
||||
}
|
||||
.json-key {
|
||||
color: #ff7b72;
|
||||
}
|
||||
.json-string {
|
||||
color: #a5d6ff;
|
||||
}
|
||||
.json-number {
|
||||
color: #79c0ff;
|
||||
}
|
||||
.json-boolean {
|
||||
color: #ff7b72;
|
||||
}
|
||||
.json-null {
|
||||
color: #8b949e;
|
||||
}
|
||||
.error-box {
|
||||
background: rgba(248, 81, 73, 0.1);
|
||||
border: 1px solid rgba(248, 81, 73, 0.4);
|
||||
border-radius: 6px;
|
||||
padding: 12px 16px;
|
||||
color: #f85149;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
.hex-view {
|
||||
display: grid;
|
||||
grid-template-columns: 80px 1fr 1fr;
|
||||
gap: 8px;
|
||||
font-family: 'SF Mono', Monaco, 'Courier New', monospace;
|
||||
font-size: 12px;
|
||||
}
|
||||
.hex-offset {
|
||||
color: #8b949e;
|
||||
}
|
||||
.hex-bytes {
|
||||
color: #7ee787;
|
||||
}
|
||||
.hex-ascii {
|
||||
color: #d2a8ff;
|
||||
}
|
||||
.raw-section {
|
||||
margin-top: 20px;
|
||||
padding: 16px;
|
||||
background: #161b22;
|
||||
border: 1px solid #30363d;
|
||||
border-radius: 8px;
|
||||
}
|
||||
.raw-section h3 {
|
||||
color: #8b949e;
|
||||
font-size: 14px;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
.copy-btn {
|
||||
padding: 4px 8px;
|
||||
font-size: 12px;
|
||||
margin-left: 8px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>AWS Event Stream Viewer</h1>
|
||||
|
||||
<div class="input-section">
|
||||
<label>粘贴二进制数据(支持 Hex / Base64 格式)</label>
|
||||
<div class="input-controls">
|
||||
<select id="inputFormat" class="format-select">
|
||||
<option value="auto">自动检测</option>
|
||||
<option value="hex">Hex</option>
|
||||
<option value="base64">Base64</option>
|
||||
</select>
|
||||
<button class="btn btn-primary" onclick="parseInput()">解析</button>
|
||||
<button class="btn btn-secondary" onclick="clearAll()">清空</button>
|
||||
<button class="btn btn-secondary" onclick="loadExample()">加载示例</button>
|
||||
</div>
|
||||
<textarea id="inputData" placeholder="粘贴 Hex 数据 (如: 00 00 00 3e 00 00 00 1d...) 或 Base64 编码数据..."></textarea>
|
||||
</div>
|
||||
|
||||
<div id="error" class="error-box" style="display: none;"></div>
|
||||
|
||||
<div class="results" id="results" style="display: none;">
|
||||
<div class="stats" id="stats"></div>
|
||||
<div class="message-list" id="messageList"></div>
|
||||
</div>
|
||||
|
||||
<div class="raw-section" id="rawSection" style="display: none;">
|
||||
<h3>原始字节数据 <button class="btn btn-secondary copy-btn" onclick="copyRawHex()">复制 Hex</button></h3>
|
||||
<div class="payload-hex" id="rawHex"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// CRC32 (IEEE/ISO-HDLC) 实现
|
||||
const CRC32_TABLE = (() => {
|
||||
const table = new Uint32Array(256);
|
||||
for (let i = 0; i < 256; i++) {
|
||||
let crc = i;
|
||||
for (let j = 0; j < 8; j++) {
|
||||
crc = (crc & 1) ? (0xEDB88320 ^ (crc >>> 1)) : (crc >>> 1);
|
||||
}
|
||||
table[i] = crc >>> 0;
|
||||
}
|
||||
return table;
|
||||
})();
|
||||
|
||||
function crc32(data) {
|
||||
let crc = 0xFFFFFFFF;
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
crc = CRC32_TABLE[(crc ^ data[i]) & 0xFF] ^ (crc >>> 8);
|
||||
}
|
||||
return (crc ^ 0xFFFFFFFF) >>> 0;
|
||||
}
|
||||
|
||||
// 值类型定义
|
||||
const VALUE_TYPES = {
|
||||
0: { name: 'BoolTrue', size: 0 },
|
||||
1: { name: 'BoolFalse', size: 0 },
|
||||
2: { name: 'Byte', size: 1 },
|
||||
3: { name: 'Short', size: 2 },
|
||||
4: { name: 'Integer', size: 4 },
|
||||
5: { name: 'Long', size: 8 },
|
||||
6: { name: 'ByteArray', size: -1 },
|
||||
7: { name: 'String', size: -1 },
|
||||
8: { name: 'Timestamp', size: 8 },
|
||||
9: { name: 'UUID', size: 16 }
|
||||
};
|
||||
|
||||
// 输入格式检测
|
||||
function detectFormat(input) {
|
||||
const cleaned = input.replace(/[\s\n\r]/g, '');
|
||||
|
||||
// 检测是否是Base64
|
||||
if (/^[A-Za-z0-9+/]+=*$/.test(cleaned) && cleaned.length % 4 === 0) {
|
||||
// 尝试解码验证
|
||||
try {
|
||||
atob(cleaned);
|
||||
return 'base64';
|
||||
} catch {}
|
||||
}
|
||||
|
||||
// 检测是否是Hex
|
||||
if (/^[0-9A-Fa-f\s]+$/.test(input)) {
|
||||
return 'hex';
|
||||
}
|
||||
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
// 解析输入数据为字节数组
|
||||
function parseInputData(input, format) {
|
||||
const cleaned = input.trim();
|
||||
|
||||
if (format === 'auto') {
|
||||
format = detectFormat(cleaned);
|
||||
}
|
||||
|
||||
if (format === 'base64') {
|
||||
try {
|
||||
const binary = atob(cleaned.replace(/[\s\n\r]/g, ''));
|
||||
return new Uint8Array([...binary].map(c => c.charCodeAt(0)));
|
||||
} catch (e) {
|
||||
throw new Error('Base64 解码失败: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
if (format === 'hex') {
|
||||
const hex = cleaned.replace(/[\s\n\r]/g, '').replace(/0x/gi, '');
|
||||
if (hex.length % 2 !== 0) {
|
||||
throw new Error('Hex 数据长度必须是偶数');
|
||||
}
|
||||
const bytes = new Uint8Array(hex.length / 2);
|
||||
for (let i = 0; i < hex.length; i += 2) {
|
||||
bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
throw new Error('无法识别输入格式');
|
||||
}
|
||||
|
||||
// 读取大端序整数
|
||||
function readUint32BE(data, offset) {
|
||||
return (data[offset] << 24 | data[offset + 1] << 16 | data[offset + 2] << 8 | data[offset + 3]) >>> 0;
|
||||
}
|
||||
|
||||
function readUint16BE(data, offset) {
|
||||
return (data[offset] << 8 | data[offset + 1]) >>> 0;
|
||||
}
|
||||
|
||||
function readInt64BE(data, offset) {
|
||||
// JavaScript BigInt for 64-bit
|
||||
let high = readUint32BE(data, offset);
|
||||
let low = readUint32BE(data, offset + 4);
|
||||
return BigInt(high) << 32n | BigInt(low);
|
||||
}
|
||||
|
||||
// 解析头部
|
||||
function parseHeaders(data, headerLength) {
|
||||
const headers = [];
|
||||
let offset = 0;
|
||||
|
||||
while (offset < headerLength) {
|
||||
// 读取名称长度
|
||||
const nameLength = data[offset];
|
||||
offset++;
|
||||
|
||||
if (nameLength === 0 || offset + nameLength > headerLength) {
|
||||
break;
|
||||
}
|
||||
|
||||
// 读取名称
|
||||
const name = new TextDecoder().decode(data.slice(offset, offset + nameLength));
|
||||
offset += nameLength;
|
||||
|
||||
if (offset >= headerLength) break;
|
||||
|
||||
// 读取值类型
|
||||
const valueType = data[offset];
|
||||
offset++;
|
||||
|
||||
const typeInfo = VALUE_TYPES[valueType] || { name: 'Unknown', size: 0 };
|
||||
let value;
|
||||
let valueSize = typeInfo.size;
|
||||
|
||||
// 解析值
|
||||
switch (valueType) {
|
||||
case 0: // BoolTrue
|
||||
value = true;
|
||||
break;
|
||||
case 1: // BoolFalse
|
||||
value = false;
|
||||
break;
|
||||
case 2: // Byte
|
||||
value = data[offset];
|
||||
offset++;
|
||||
break;
|
||||
case 3: // Short
|
||||
value = readUint16BE(data, offset);
|
||||
offset += 2;
|
||||
break;
|
||||
case 4: // Integer
|
||||
value = readUint32BE(data, offset);
|
||||
offset += 4;
|
||||
break;
|
||||
case 5: // Long
|
||||
value = readInt64BE(data, offset).toString();
|
||||
offset += 8;
|
||||
break;
|
||||
case 6: // ByteArray
|
||||
valueSize = readUint16BE(data, offset);
|
||||
offset += 2;
|
||||
value = Array.from(data.slice(offset, offset + valueSize)).map(b => b.toString(16).padStart(2, '0')).join(' ');
|
||||
offset += valueSize;
|
||||
break;
|
||||
case 7: // String
|
||||
valueSize = readUint16BE(data, offset);
|
||||
offset += 2;
|
||||
value = new TextDecoder().decode(data.slice(offset, offset + valueSize));
|
||||
offset += valueSize;
|
||||
break;
|
||||
case 8: // Timestamp
|
||||
value = new Date(Number(readInt64BE(data, offset))).toISOString();
|
||||
offset += 8;
|
||||
break;
|
||||
case 9: // UUID
|
||||
const uuidBytes = data.slice(offset, offset + 16);
|
||||
value = Array.from(uuidBytes).map(b => b.toString(16).padStart(2, '0')).join('');
|
||||
value = `${value.slice(0,8)}-${value.slice(8,12)}-${value.slice(12,16)}-${value.slice(16,20)}-${value.slice(20)}`;
|
||||
offset += 16;
|
||||
break;
|
||||
default:
|
||||
value = '(unknown type)';
|
||||
}
|
||||
|
||||
headers.push({ name, type: typeInfo.name, typeCode: valueType, value });
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
// 解析单个消息帧
|
||||
function parseFrame(data, offset) {
|
||||
if (data.length - offset < 16) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const totalLength = readUint32BE(data, offset);
|
||||
const headerLength = readUint32BE(data, offset + 4);
|
||||
const preludeCrc = readUint32BE(data, offset + 8);
|
||||
|
||||
if (totalLength < 16 || totalLength > 16 * 1024 * 1024) {
|
||||
throw new Error(`消息长度异常: ${totalLength}`);
|
||||
}
|
||||
|
||||
if (data.length - offset < totalLength) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 验证 Prelude CRC
|
||||
const actualPreludeCrc = crc32(data.slice(offset, offset + 8));
|
||||
const preludeCrcValid = preludeCrc === actualPreludeCrc;
|
||||
|
||||
// 验证 Message CRC
|
||||
const messageCrc = readUint32BE(data, offset + totalLength - 4);
|
||||
const actualMessageCrc = crc32(data.slice(offset, offset + totalLength - 4));
|
||||
const messageCrcValid = messageCrc === actualMessageCrc;
|
||||
|
||||
// 解析头部
|
||||
const headersStart = offset + 12;
|
||||
const headersEnd = headersStart + headerLength;
|
||||
const headers = parseHeaders(data.slice(headersStart, headersEnd), headerLength);
|
||||
|
||||
// 提取 payload
|
||||
const payloadStart = headersEnd;
|
||||
const payloadEnd = offset + totalLength - 4;
|
||||
const payload = data.slice(payloadStart, payloadEnd);
|
||||
|
||||
// 获取消息类型
|
||||
const messageType = headers.find(h => h.name === ':message-type')?.value || 'event';
|
||||
const eventType = headers.find(h => h.name === ':event-type')?.value || '';
|
||||
const contentType = headers.find(h => h.name === ':content-type')?.value || 'application/json';
|
||||
|
||||
return {
|
||||
totalLength,
|
||||
headerLength,
|
||||
preludeCrc: { expected: preludeCrc, actual: actualPreludeCrc, valid: preludeCrcValid },
|
||||
messageCrc: { expected: messageCrc, actual: actualMessageCrc, valid: messageCrcValid },
|
||||
headers,
|
||||
payload,
|
||||
messageType,
|
||||
eventType,
|
||||
contentType,
|
||||
rawBytes: data.slice(offset, offset + totalLength)
|
||||
};
|
||||
}
|
||||
|
||||
// 解析所有消息
|
||||
function parseAllMessages(data) {
|
||||
const messages = [];
|
||||
let offset = 0;
|
||||
|
||||
while (offset < data.length) {
|
||||
try {
|
||||
const frame = parseFrame(data, offset);
|
||||
if (!frame) {
|
||||
break;
|
||||
}
|
||||
messages.push(frame);
|
||||
offset += frame.totalLength;
|
||||
} catch (e) {
|
||||
console.error('Parse error at offset', offset, e);
|
||||
// 尝试跳过一个字节继续
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
// 格式化 JSON 带语法高亮
|
||||
function formatJson(obj, indent = 0) {
|
||||
const spaces = ' '.repeat(indent);
|
||||
|
||||
if (obj === null) {
|
||||
return '<span class="json-null">null</span>';
|
||||
}
|
||||
|
||||
if (typeof obj === 'boolean') {
|
||||
return `<span class="json-boolean">${obj}</span>`;
|
||||
}
|
||||
|
||||
if (typeof obj === 'number') {
|
||||
return `<span class="json-number">${obj}</span>`;
|
||||
}
|
||||
|
||||
if (typeof obj === 'string') {
|
||||
const escaped = obj.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"');
|
||||
return `<span class="json-string">"${escaped}"</span>`;
|
||||
}
|
||||
|
||||
if (Array.isArray(obj)) {
|
||||
if (obj.length === 0) return '[]';
|
||||
const items = obj.map(item => spaces + ' ' + formatJson(item, indent + 1));
|
||||
return '[\n' + items.join(',\n') + '\n' + spaces + ']';
|
||||
}
|
||||
|
||||
if (typeof obj === 'object') {
|
||||
const keys = Object.keys(obj);
|
||||
if (keys.length === 0) return '{}';
|
||||
const items = keys.map(key => {
|
||||
const escapedKey = key.replace(/"/g, '\\"');
|
||||
return spaces + ' ' + `<span class="json-key">"${escapedKey}"</span>: ` + formatJson(obj[key], indent + 1);
|
||||
});
|
||||
return '{\n' + items.join(',\n') + '\n' + spaces + '}';
|
||||
}
|
||||
|
||||
return String(obj);
|
||||
}
|
||||
|
||||
// 格式化 Hex 视图
|
||||
function formatHexView(data) {
|
||||
const lines = [];
|
||||
for (let i = 0; i < data.length; i += 16) {
|
||||
const offset = i.toString(16).padStart(8, '0');
|
||||
const bytes = [];
|
||||
const ascii = [];
|
||||
|
||||
for (let j = 0; j < 16; j++) {
|
||||
if (i + j < data.length) {
|
||||
bytes.push(data[i + j].toString(16).padStart(2, '0'));
|
||||
const char = data[i + j];
|
||||
ascii.push(char >= 32 && char <= 126 ? String.fromCharCode(char) : '.');
|
||||
} else {
|
||||
bytes.push(' ');
|
||||
ascii.push(' ');
|
||||
}
|
||||
}
|
||||
|
||||
lines.push(`<span class="hex-offset">${offset}</span> <span class="hex-bytes">${bytes.join(' ')}</span> <span class="hex-ascii">${ascii.join('')}</span>`);
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// 渲染单个消息
|
||||
function renderMessage(message, index) {
|
||||
const messageTypeClass = message.messageType === 'error' ? 'error' :
|
||||
message.messageType === 'exception' ? 'exception' : 'event';
|
||||
|
||||
let payloadText = '';
|
||||
let payloadJson = null;
|
||||
|
||||
try {
|
||||
payloadText = new TextDecoder().decode(message.payload);
|
||||
try {
|
||||
payloadJson = JSON.parse(payloadText);
|
||||
} catch {}
|
||||
} catch {}
|
||||
|
||||
const headersHtml = message.headers.map(h => `
|
||||
<tr>
|
||||
<td><span class="header-name">${h.name}</span></td>
|
||||
<td><span class="header-type">${h.type}</span></td>
|
||||
<td><span class="header-value">${typeof h.value === 'string' ? h.value.replace(/</g, '<').replace(/>/g, '>') : h.value}</span></td>
|
||||
</tr>
|
||||
`).join('');
|
||||
|
||||
const crcStatus = (crc) => crc.valid
|
||||
? '<span style="color: #7ee787;">✓</span>'
|
||||
: `<span style="color: #f85149;">✗ (expected: ${crc.expected.toString(16)}, got: ${crc.actual.toString(16)})</span>`;
|
||||
|
||||
return `
|
||||
<div class="message" id="message-${index}">
|
||||
<div class="message-header" onclick="toggleMessage(${index})">
|
||||
<div class="message-info">
|
||||
<span class="message-index">#${index + 1}</span>
|
||||
<span class="message-type ${messageTypeClass}">${message.messageType}</span>
|
||||
${message.eventType ? `<span class="event-type">${message.eventType}</span>` : ''}
|
||||
</div>
|
||||
<div style="display: flex; align-items: center; gap: 12px;">
|
||||
<span class="message-size">${message.totalLength} bytes</span>
|
||||
<span class="expand-icon">▶</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="message-content">
|
||||
<div class="section-title">CRC 校验</div>
|
||||
<table class="headers-table" style="margin-bottom: 16px;">
|
||||
<tr><td>Prelude CRC</td><td>${crcStatus(message.preludeCrc)}</td></tr>
|
||||
<tr><td>Message CRC</td><td>${crcStatus(message.messageCrc)}</td></tr>
|
||||
</table>
|
||||
|
||||
<div class="section-title">Headers (${message.headers.length})</div>
|
||||
<table class="headers-table">
|
||||
<thead>
|
||||
<tr><th>Name</th><th>Type</th><th>Value</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
${headersHtml}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<div class="section-title">Payload (${message.payload.length} bytes)</div>
|
||||
<div class="payload-container">
|
||||
<div class="payload-tabs">
|
||||
<button class="payload-tab active" onclick="switchPayloadTab(${index}, 'json')">JSON</button>
|
||||
<button class="payload-tab" onclick="switchPayloadTab(${index}, 'raw')">Raw</button>
|
||||
<button class="payload-tab" onclick="switchPayloadTab(${index}, 'hex')">Hex</button>
|
||||
</div>
|
||||
<div class="payload-content">
|
||||
<div class="payload-json" id="payload-json-${index}">${payloadJson ? formatJson(payloadJson) : `<span style="color: #8b949e;">${payloadText.replace(/</g, '<').replace(/>/g, '>') || '(empty)'}</span>`}</div>
|
||||
<div class="payload-raw" id="payload-raw-${index}" style="display: none;">${payloadText.replace(/</g, '<').replace(/>/g, '>') || '(empty)'}</div>
|
||||
<div class="payload-hex" id="payload-hex-${index}" style="display: none;">${formatHexView(message.payload)}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
let parsedData = null;
|
||||
|
||||
function parseInput() {
|
||||
const input = document.getElementById('inputData').value;
|
||||
const format = document.getElementById('inputFormat').value;
|
||||
const errorBox = document.getElementById('error');
|
||||
const resultsBox = document.getElementById('results');
|
||||
const rawSection = document.getElementById('rawSection');
|
||||
|
||||
errorBox.style.display = 'none';
|
||||
resultsBox.style.display = 'none';
|
||||
rawSection.style.display = 'none';
|
||||
|
||||
if (!input.trim()) {
|
||||
errorBox.textContent = '请输入数据';
|
||||
errorBox.style.display = 'block';
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const data = parseInputData(input, format);
|
||||
parsedData = data;
|
||||
|
||||
// 显示原始 Hex
|
||||
document.getElementById('rawHex').innerHTML = formatHexView(data);
|
||||
rawSection.style.display = 'block';
|
||||
|
||||
const messages = parseAllMessages(data);
|
||||
|
||||
if (messages.length === 0) {
|
||||
errorBox.textContent = '未能解析出任何消息。请检查输入数据格式。';
|
||||
errorBox.style.display = 'block';
|
||||
return;
|
||||
}
|
||||
|
||||
// 统计
|
||||
const eventTypes = {};
|
||||
const messageTypes = {};
|
||||
messages.forEach(m => {
|
||||
messageTypes[m.messageType] = (messageTypes[m.messageType] || 0) + 1;
|
||||
if (m.eventType) {
|
||||
eventTypes[m.eventType] = (eventTypes[m.eventType] || 0) + 1;
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById('stats').innerHTML = `
|
||||
<div class="stat-card">
|
||||
<div class="label">总消息数</div>
|
||||
<div class="value">${messages.length}</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="label">总字节数</div>
|
||||
<div class="value">${data.length}</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="label">消息类型</div>
|
||||
<div class="value" style="font-size: 14px;">${Object.entries(messageTypes).map(([k, v]) => `${k}: ${v}`).join(', ')}</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="label">事件类型</div>
|
||||
<div class="value" style="font-size: 14px;">${Object.entries(eventTypes).map(([k, v]) => `${k}: ${v}`).join(', ') || '-'}</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
document.getElementById('messageList').innerHTML = messages.map((m, i) => renderMessage(m, i)).join('');
|
||||
resultsBox.style.display = 'block';
|
||||
|
||||
} catch (e) {
|
||||
errorBox.textContent = '解析错误: ' + e.message;
|
||||
errorBox.style.display = 'block';
|
||||
}
|
||||
}
|
||||
|
||||
function toggleMessage(index) {
|
||||
const msg = document.getElementById(`message-${index}`);
|
||||
msg.classList.toggle('expanded');
|
||||
}
|
||||
|
||||
function switchPayloadTab(index, tab) {
|
||||
const tabs = document.querySelectorAll(`#message-${index} .payload-tab`);
|
||||
tabs.forEach(t => t.classList.remove('active'));
|
||||
event.target.classList.add('active');
|
||||
|
||||
document.getElementById(`payload-json-${index}`).style.display = tab === 'json' ? 'block' : 'none';
|
||||
document.getElementById(`payload-raw-${index}`).style.display = tab === 'raw' ? 'block' : 'none';
|
||||
document.getElementById(`payload-hex-${index}`).style.display = tab === 'hex' ? 'block' : 'none';
|
||||
}
|
||||
|
||||
function clearAll() {
|
||||
document.getElementById('inputData').value = '';
|
||||
document.getElementById('error').style.display = 'none';
|
||||
document.getElementById('results').style.display = 'none';
|
||||
document.getElementById('rawSection').style.display = 'none';
|
||||
parsedData = null;
|
||||
}
|
||||
|
||||
function copyRawHex() {
|
||||
if (!parsedData) return;
|
||||
const hex = Array.from(parsedData).map(b => b.toString(16).padStart(2, '0')).join(' ');
|
||||
navigator.clipboard.writeText(hex).then(() => {
|
||||
alert('已复制到剪贴板');
|
||||
});
|
||||
}
|
||||
|
||||
function loadExample() {
|
||||
// 构造一个示例事件流消息
|
||||
// 这是一个简单的 assistantResponseEvent 消息
|
||||
const example = `
|
||||
00 00 00 8f 00 00 00 47 7d 83 6e 75 0d 3a 6d 65
|
||||
73 73 61 67 65 2d 74 79 70 65 07 00 05 65 76 65
|
||||
6e 74 0b 3a 65 76 65 6e 74 2d 74 79 70 65 07 00
|
||||
16 61 73 73 69 73 74 61 6e 74 52 65 73 70 6f 6e
|
||||
73 65 45 76 65 6e 74 0d 3a 63 6f 6e 74 65 6e 74
|
||||
2d 74 79 70 65 07 00 10 61 70 70 6c 69 63 61 74
|
||||
69 6f 6e 2f 6a 73 6f 6e 7b 22 63 6f 6e 74 65 6e
|
||||
74 22 3a 22 48 65 6c 6c 6f 2c 20 57 6f 72 6c 64
|
||||
21 22 2c 22 73 74 6f 70 22 3a 66 61 6c 73 65 7d
|
||||
c7 8d c2 bc
|
||||
`.trim();
|
||||
document.getElementById('inputData').value = example;
|
||||
document.getElementById('inputFormat').value = 'hex';
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
145
tools/test_empty_content.py
Normal file
145
tools/test_empty_content.py
Normal file
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env python3
|
||||
"""测试空消息内容和 prefill 处理的改进"""
|
||||
|
||||
import json
|
||||
import requests
|
||||
|
||||
BASE_URL = "http://localhost:8080"
|
||||
API_KEY = "test-key"
|
||||
|
||||
def safe_print_response(response):
|
||||
"""安全打印响应,处理非 JSON 情况"""
|
||||
try:
|
||||
data = response.json()
|
||||
print(f"响应: {json.dumps(data, indent=2, ensure_ascii=False)}")
|
||||
return data
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
print(f"响应 (非 JSON): {response.text}")
|
||||
return None
|
||||
|
||||
def test_empty_content():
|
||||
"""测试空消息内容应返回 400 错误"""
|
||||
print("测试 1: 空消息内容")
|
||||
response = requests.post(
|
||||
f"{BASE_URL}/v1/messages",
|
||||
headers={
|
||||
"x-api-key": API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "claude-sonnet-4",
|
||||
"max_tokens": 1024,
|
||||
"messages": [
|
||||
{"role": "user", "content": ""}
|
||||
]
|
||||
}
|
||||
)
|
||||
print(f"状态码: {response.status_code}")
|
||||
data = safe_print_response(response)
|
||||
assert response.status_code == 400, "应返回 400 错误"
|
||||
if data:
|
||||
assert "消息内容为空" in data.get("error", {}).get("message", ""), "错误消息应包含'消息内容为空'"
|
||||
print("✓ 测试通过\n")
|
||||
|
||||
def test_empty_text_blocks():
|
||||
"""测试仅包含空白文本块的消息"""
|
||||
print("测试 2: 仅包含空白文本块")
|
||||
response = requests.post(
|
||||
f"{BASE_URL}/v1/messages",
|
||||
headers={
|
||||
"x-api-key": API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "claude-sonnet-4",
|
||||
"max_tokens": 1024,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": " "},
|
||||
{"type": "text", "text": "\n\t"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
print(f"状态码: {response.status_code}")
|
||||
data = safe_print_response(response)
|
||||
assert response.status_code == 400, "应返回 400 错误"
|
||||
if data:
|
||||
assert "消息内容为空" in data.get("error", {}).get("message", ""), "错误消息应包含'消息内容为空'"
|
||||
print("✓ 测试通过\n")
|
||||
|
||||
def test_prefill_with_empty_user():
|
||||
"""测试 prefill 场景下空 user 消息"""
|
||||
print("测试 3: Prefill 场景下空 user 消息")
|
||||
response = requests.post(
|
||||
f"{BASE_URL}/v1/messages",
|
||||
headers={
|
||||
"x-api-key": API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "claude-sonnet-4",
|
||||
"max_tokens": 1024,
|
||||
"messages": [
|
||||
{"role": "user", "content": ""},
|
||||
{"role": "assistant", "content": "Hi there"}
|
||||
]
|
||||
}
|
||||
)
|
||||
print(f"状态码: {response.status_code}")
|
||||
data = safe_print_response(response)
|
||||
assert response.status_code == 400, "应返回 400 错误"
|
||||
if data:
|
||||
assert "消息内容为空" in data.get("error", {}).get("message", ""), "错误消息应包含'消息内容为空'"
|
||||
print("✓ 测试通过\n")
|
||||
|
||||
def test_valid_message():
|
||||
"""测试正常消息应该成功"""
|
||||
print("测试 4: 正常消息(对照组)")
|
||||
response = requests.post(
|
||||
f"{BASE_URL}/v1/messages",
|
||||
headers={
|
||||
"x-api-key": API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "claude-sonnet-4",
|
||||
"max_tokens": 50,
|
||||
"messages": [
|
||||
{"role": "user", "content": "Say 'test' only"}
|
||||
]
|
||||
}
|
||||
)
|
||||
print(f"状态码: {response.status_code}")
|
||||
if response.status_code == 200:
|
||||
print("✓ 测试通过:正常消息处理成功\n")
|
||||
else:
|
||||
safe_print_response(response)
|
||||
print()
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 60)
|
||||
print("空消息内容验证测试")
|
||||
print("=" * 60 + "\n")
|
||||
|
||||
try:
|
||||
test_empty_content()
|
||||
test_empty_text_blocks()
|
||||
test_prefill_with_empty_user()
|
||||
test_valid_message()
|
||||
print("=" * 60)
|
||||
print("所有测试通过!")
|
||||
print("=" * 60)
|
||||
except AssertionError as e:
|
||||
print(f"\n✗ 测试失败: {e}")
|
||||
except requests.exceptions.ConnectionError:
|
||||
print("\n✗ 无法连接到服务器,请确保服务正在运行")
|
||||
except Exception as e:
|
||||
print(f"\n✗ 发生错误: {e}")
|
||||
Reference in New Issue
Block a user