🎨 增加并发访问

2026-03-05 21:28:41 +08:00
commit 84c66ccaa7
114 changed files with 35396 additions and 0 deletions
--- a/tools/analyze_compression.py
+++ b/tools/analyze_compression.py
@@ -0,0 +1,691 @@
+#!/usr/bin/env python3
+"""
+离线分析上下文压缩管道表现。
+
+从 Docker 日志中提取压缩相关的 tracing 结构化数据，
+生成汇总报告，帮助评估五层压缩管道的实际效果。
+
+使用方法：
+  python3 tools/analyze_compression.py logs/docker.log
+  python3 tools/analyze_compression.py --top 10 logs/docker.log
+  python3 tools/analyze_compression.py --csv output.csv logs/docker.log
+  python3 tools/analyze_compression.py --json logs/docker.log
+  cat logs/docker.log | python3 tools/analyze_compression.py -
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import re
+import sys
+from collections import defaultdict
+from dataclasses import asdict, dataclass, field
+from typing import Any, Dict, List, Optional, Sequence
+
+
+# ---------------------------------------------------------------------------
+# ANSI 清理（复用 diagnose_improper_request.py 的模式）
+# ---------------------------------------------------------------------------
+
+# 覆盖常见 CSI 序列（含少见的 ':' 参数分隔符），避免污染 URL/字段解析。
+ANSI_RE = re.compile(r"\x1b\[[0-9;:?]*[A-Za-z]")
+
+
+def strip_ansi(s: str) -> str:
+    return ANSI_RE.sub("", s)
+
+
+# ---------------------------------------------------------------------------
+# 时间戳提取
+# ---------------------------------------------------------------------------
+
+# ISO 8601 时间戳（行首），兼容带/不带时区
+TIMESTAMP_RE = re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})")
+
+
+def extract_timestamp(line: str) -> Optional[str]:
+    """提取行首 ISO 时间戳，返回秒级精度字符串或 None。"""
+    m = TIMESTAMP_RE.search(line[:40])
+    return m.group(1) if m else None
+
+
+def hour_bucket(ts: str) -> str:
+    """截取到小时：2025-01-15T10:23:45 -> 2025-01-15T10"""
+    return ts[:13]
+
+
+# ---------------------------------------------------------------------------
+# tracing key=value 解析
+# ---------------------------------------------------------------------------
+
+KV_RE = re.compile(r"(\w+)=(\d+(?:\.\d+)?|\"[^\"]*\"|[^\s,]+)")
+
+
+def parse_kv(line: str) -> Dict[str, str]:
+    """从 tracing 结构化行中提取所有 key=value 对。"""
+    return {m.group(1): m.group(2).strip('"') for m in KV_RE.finditer(line)}
+
+
+def kv_int(kv: Dict[str, str], key: str, default: int = 0) -> int:
+    v = kv.get(key)
+    if v is None:
+        return default
+    try:
+        return int(v)
+    except ValueError:
+        return default
+
+
+def kv_float(kv: Dict[str, str], key: str, default: float = 0.0) -> float:
+    v = kv.get(key)
+    if v is None:
+        return default
+    try:
+        return float(v)
+    except ValueError:
+        return default
+
+
+# ---------------------------------------------------------------------------
+# 数据模型
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class RequestRecord:
+    """一次请求行的数据。"""
+    line_no: int
+    timestamp: Optional[str] = None
+    model: str = ""
+    max_tokens: int = 0
+    stream: bool = True
+    message_count: int = 0
+    estimated_input_tokens: int = 0
+
+
+@dataclass
+class CompressionRecord:
+    """一次压缩统计行的数据。"""
+    line_no: int
+    timestamp: Optional[str] = None
+    estimated_input_tokens: int = 0
+    bytes_saved_total: int = 0
+    whitespace_bytes_saved: int = 0
+    thinking_bytes_saved: int = 0
+    tool_result_bytes_saved: int = 0
+    tool_use_input_bytes_saved: int = 0
+    history_turns_removed: int = 0
+    history_bytes_saved: int = 0
+
+
+@dataclass
+class ContextUsageRecord:
+    """contextUsageEvent 行的数据。"""
+    line_no: int
+    context_usage_percentage: float = 0.0
+    actual_input_tokens: int = 0
+
+
+@dataclass
+class RejectionRecord:
+    """上游拒绝行的数据。"""
+    line_no: int
+    kiro_request_body_bytes: int = 0
+
+
+@dataclass
+class AdaptiveShrinkRecord:
+    """自适应二次压缩触发行的数据。"""
+    line_no: int
+    timestamp: Optional[str] = None
+    conversation_id: Optional[str] = None
+    initial_bytes: int = 0
+    final_bytes: int = 0
+    threshold: int = 0
+    iters: int = 0
+    additional_history_turns_removed: int = 0
+
+
+@dataclass
+class LocalRejectRecord:
+    """本地超限拒绝行的数据。"""
+    line_no: int
+    timestamp: Optional[str] = None
+    conversation_id: Optional[str] = None
+    request_body_bytes: int = 0
+    image_bytes: int = 0
+    effective_bytes: int = 0
+    threshold: int = 0
+
+
+@dataclass
+class MergedRequest:
+    """关联后的完整请求记录。"""
+    line_no: int = 0
+    timestamp: Optional[str] = None
+    model: str = ""
+    max_tokens: int = 0
+    stream: bool = True
+    message_count: int = 0
+    estimated_input_tokens: int = 0
+    # 压缩统计
+    bytes_saved_total: int = 0
+    whitespace_bytes_saved: int = 0
+    thinking_bytes_saved: int = 0
+    tool_result_bytes_saved: int = 0
+    tool_use_input_bytes_saved: int = 0
+    history_turns_removed: int = 0
+    history_bytes_saved: int = 0
+    has_compression: bool = False
+    # 上下文使用
+    context_usage_percentage: Optional[float] = None
+    actual_input_tokens: Optional[int] = None
+    # 压缩率
+    compression_rate: float = 0.0
+
+
+# ---------------------------------------------------------------------------
+# 日志解析
+# ---------------------------------------------------------------------------
+
+# 匹配标记
+MARKER_REQUEST = "Received POST /v1/messages request"
+MARKER_COMPRESSION = "输入压缩完成"
+MARKER_CONTEXT_USAGE = "收到 contextUsageEvent"
+MARKER_REJECTION = "上游拒绝请求：输入上下文过长"
+MARKER_ADAPTIVE_SHRINK = "请求体超过阈值，已执行自适应二次压缩"
+MARKER_LOCAL_REJECT = "请求体超过安全阈值，拒绝发送"
+
+# contextUsageEvent 格式：收到 contextUsageEvent: 67.2%, 计算 input_tokens: 12345
+CONTEXT_USAGE_RE = re.compile(
+    r"收到 contextUsageEvent:\s*([\d.]+)%.*?input_tokens:\s*(\d+)"
+)
+
+
+def parse_log(
+    lines: Sequence[str],
+    *,
+    min_tokens: int = 0,
+    model_pattern: Optional[str] = None,
+) -> tuple[
+    list[MergedRequest],
+    list[RejectionRecord],
+    list[AdaptiveShrinkRecord],
+    list[LocalRejectRecord],
+    int,
+]:
+    """
+    解析日志行，返回 (merged_requests, rejections, total_lines)。
+
+    关联策略：连续出现的请求行和压缩统计行，
+    基于 estimated_input_tokens 匹配 + 行号邻近（间距 ≤ 50 行）。
+    """
+    requests: list[RequestRecord] = []
+    compressions: list[CompressionRecord] = []
+    context_usages: list[ContextUsageRecord] = []
+    rejections: list[RejectionRecord] = []
+    adaptive_shrinks: list[AdaptiveShrinkRecord] = []
+    local_rejects: list[LocalRejectRecord] = []
+
+    model_re = re.compile(model_pattern, re.IGNORECASE) if model_pattern else None
+
+    for idx, raw_line in enumerate(lines):
+        line_no = idx + 1
+        line = strip_ansi(raw_line)
+
+        if MARKER_REQUEST in line:
+            kv = parse_kv(line)
+            model = kv.get("model", "")
+            if model_re and not model_re.search(model):
+                continue
+            est = kv_int(kv, "estimated_input_tokens")
+            if est < min_tokens:
+                continue
+            requests.append(RequestRecord(
+                line_no=line_no,
+                timestamp=extract_timestamp(line),
+                model=model,
+                max_tokens=kv_int(kv, "max_tokens"),
+                stream=kv.get("stream", "true") == "true",
+                message_count=kv_int(kv, "message_count"),
+                estimated_input_tokens=est,
+            ))
+
+        elif MARKER_COMPRESSION in line:
+            kv = parse_kv(line)
+            est = kv_int(kv, "estimated_input_tokens")
+            if est < min_tokens:
+                continue
+            compressions.append(CompressionRecord(
+                line_no=line_no,
+                timestamp=extract_timestamp(line),
+                estimated_input_tokens=est,
+                bytes_saved_total=kv_int(kv, "bytes_saved_total"),
+                whitespace_bytes_saved=kv_int(kv, "whitespace_bytes_saved"),
+                thinking_bytes_saved=kv_int(kv, "thinking_bytes_saved"),
+                tool_result_bytes_saved=kv_int(kv, "tool_result_bytes_saved"),
+                tool_use_input_bytes_saved=kv_int(kv, "tool_use_input_bytes_saved"),
+                history_turns_removed=kv_int(kv, "history_turns_removed"),
+                history_bytes_saved=kv_int(kv, "history_bytes_saved"),
+            ))
+
+        elif MARKER_CONTEXT_USAGE in line:
+            m = CONTEXT_USAGE_RE.search(line)
+            if m:
+                context_usages.append(ContextUsageRecord(
+                    line_no=line_no,
+                    context_usage_percentage=float(m.group(1)),
+                    actual_input_tokens=int(m.group(2)),
+                ))
+
+        elif MARKER_REJECTION in line:
+            kv = parse_kv(line)
+            rejections.append(RejectionRecord(
+                line_no=line_no,
+                kiro_request_body_bytes=kv_int(kv, "kiro_request_body_bytes"),
+            ))
+
+        elif MARKER_ADAPTIVE_SHRINK in line:
+            kv = parse_kv(line)
+            adaptive_shrinks.append(AdaptiveShrinkRecord(
+                line_no=line_no,
+                timestamp=extract_timestamp(line),
+                conversation_id=kv.get("conversation_id"),
+                initial_bytes=kv_int(kv, "initial_bytes"),
+                final_bytes=kv_int(kv, "final_bytes"),
+                threshold=kv_int(kv, "threshold"),
+                iters=kv_int(kv, "iters"),
+                additional_history_turns_removed=kv_int(kv, "additional_history_turns_removed"),
+            ))
+
+        elif MARKER_LOCAL_REJECT in line:
+            kv = parse_kv(line)
+            local_rejects.append(LocalRejectRecord(
+                line_no=line_no,
+                timestamp=extract_timestamp(line),
+                conversation_id=kv.get("conversation_id"),
+                request_body_bytes=kv_int(kv, "request_body_bytes"),
+                image_bytes=kv_int(kv, "image_bytes"),
+                effective_bytes=kv_int(kv, "effective_bytes"),
+                threshold=kv_int(kv, "threshold"),
+            ))
+
+    # --- 关联请求行与压缩统计行 ---
+    merged = _merge_records(requests, compressions, context_usages)
+
+    return merged, rejections, adaptive_shrinks, local_rejects, len(lines)
+
+
+def _merge_records(
+    requests: list[RequestRecord],
+    compressions: list[CompressionRecord],
+    context_usages: list[ContextUsageRecord],
+) -> list[MergedRequest]:
+    """
+    关联请求行与压缩统计行。
+
+    策略：对每个请求行，在其后 50 行内查找 estimated_input_tokens 相同的压缩统计行。
+    """
+    merged: list[MergedRequest] = []
+    used_comp_indices: set[int] = set()
+    used_ctx_indices: set[int] = set()
+
+    for req in requests:
+        mr = MergedRequest(
+            line_no=req.line_no,
+            timestamp=req.timestamp,
+            model=req.model,
+            max_tokens=req.max_tokens,
+            stream=req.stream,
+            message_count=req.message_count,
+            estimated_input_tokens=req.estimated_input_tokens,
+        )
+
+        # 查找匹配的压缩统计行
+        for ci, comp in enumerate(compressions):
+            if ci in used_comp_indices:
+                continue
+            # 行号邻近（压缩行在请求行之后 50 行内）
+            if not (0 < comp.line_no - req.line_no <= 50):
+                continue
+            # estimated_input_tokens 匹配
+            if comp.estimated_input_tokens != req.estimated_input_tokens:
+                continue
+            # 匹配成功
+            mr.bytes_saved_total = comp.bytes_saved_total
+            mr.whitespace_bytes_saved = comp.whitespace_bytes_saved
+            mr.thinking_bytes_saved = comp.thinking_bytes_saved
+            mr.tool_result_bytes_saved = comp.tool_result_bytes_saved
+            mr.tool_use_input_bytes_saved = comp.tool_use_input_bytes_saved
+            mr.history_turns_removed = comp.history_turns_removed
+            mr.history_bytes_saved = comp.history_bytes_saved
+            mr.has_compression = True
+            used_comp_indices.add(ci)
+            break
+
+        # 查找匹配的 contextUsageEvent（在请求行之后 500 行内）
+        for ui, ctx in enumerate(context_usages):
+            if ui in used_ctx_indices:
+                continue
+            if not (0 < ctx.line_no - req.line_no <= 500):
+                continue
+            mr.context_usage_percentage = ctx.context_usage_percentage
+            mr.actual_input_tokens = ctx.actual_input_tokens
+            used_ctx_indices.add(ui)
+            break
+
+        # 计算压缩率（基于估算 token 数，假设 1 token ≈ 4 bytes）
+        if mr.estimated_input_tokens > 0 and mr.bytes_saved_total > 0:
+            estimated_bytes = mr.estimated_input_tokens * 4
+            mr.compression_rate = mr.bytes_saved_total / estimated_bytes * 100
+
+        merged.append(mr)
+
+    return merged
+
+
+# ---------------------------------------------------------------------------
+# 统计计算
+# ---------------------------------------------------------------------------
+
+
+def median(values: list[float]) -> float:
+    if not values:
+        return 0.0
+    s = sorted(values)
+    n = len(s)
+    if n % 2 == 1:
+        return s[n // 2]
+    return (s[n // 2 - 1] + s[n // 2]) / 2
+
+
+def percentile(values: list[float], p: float) -> float:
+    if not values:
+        return 0.0
+    s = sorted(values)
+    k = (len(s) - 1) * p / 100
+    f = int(k)
+    c = f + 1 if f + 1 < len(s) else f
+    return s[f] + (s[c] - s[f]) * (k - f)
+
+
+def fmt_bytes(n: int) -> str:
+    """格式化字节数为人类可读形式。"""
+    if n >= 1_000_000:
+        return f"{n:,} ({n / 1_000_000:.1f} MB)"
+    if n >= 1_000:
+        return f"{n:,} ({n / 1_000:.1f} KB)"
+    return f"{n:,}"
+
+
+# ---------------------------------------------------------------------------
+# 报告生成
+# ---------------------------------------------------------------------------
+
+
+def generate_report(
+    merged: list[MergedRequest],
+    rejections: list[RejectionRecord],
+    adaptive_shrinks: list[AdaptiveShrinkRecord],
+    local_rejects: list[LocalRejectRecord],
+    total_lines: int,
+    *,
+    top_n: int = 5,
+) -> str:
+    """生成文本格式的分析报告。"""
+    lines: list[str] = []
+    w = lines.append
+
+    w("=== 上下文压缩分析报告 ===")
+    w("")
+    w(f"扫描行数: {total_lines:,}")
+    w(f"匹配请求: {len(merged)}")
+    with_comp = [r for r in merged if r.has_compression]
+    w(f"有压缩统计: {len(with_comp)}")
+    w("")
+
+    if not with_comp:
+        w("未找到压缩统计数据。")
+        return "\n".join(lines)
+
+    # --- 总体概览 ---
+    total_saved = sum(r.bytes_saved_total for r in with_comp)
+    avg_saved = total_saved // len(with_comp) if with_comp else 0
+    rates = [r.compression_rate for r in with_comp if r.compression_rate > 0]
+    median_rate = median(rates)
+
+    w("--- 总体概览 ---")
+    w(f"总节省字节: {fmt_bytes(total_saved)}")
+    w(f"平均每请求节省: {avg_saved:,} bytes")
+    w(f"压缩率中位数: {median_rate:.1f}%")
+    w("")
+
+    # --- 各层贡献 ---
+    ws_total = sum(r.whitespace_bytes_saved for r in with_comp)
+    th_total = sum(r.thinking_bytes_saved for r in with_comp)
+    tr_total = sum(r.tool_result_bytes_saved for r in with_comp)
+    tu_total = sum(r.tool_use_input_bytes_saved for r in with_comp)
+    hi_total = sum(r.history_bytes_saved for r in with_comp)
+
+    def layer_line(name: str, val: int) -> str:
+        pct = val / total_saved * 100 if total_saved > 0 else 0
+        avg = val // len(with_comp) if with_comp else 0
+        return f"  {name:<18}{val:>12,} bytes ({pct:>5.1f}%)  avg {avg:,}/req"
+
+    w("--- 各层贡献 ---")
+    w(layer_line("空白压缩:", ws_total))
+    w(layer_line("thinking 截断:", th_total))
+    w(layer_line("tool_result:", tr_total))
+    w(layer_line("tool_use_input:", tu_total))
+    w(layer_line("历史截断:", hi_total))
+    w("")
+
+    # --- 历史截断详情 ---
+    with_history = [r for r in with_comp if r.history_turns_removed > 0]
+    w("--- 历史截断详情 ---")
+    w(f"触发历史截断的请求: {len(with_history)}/{len(with_comp)} ({len(with_history)/len(with_comp)*100:.1f}%)")
+    if with_history:
+        turns = [r.history_turns_removed for r in with_history]
+        w(f"平均移除轮数: {sum(turns)/len(turns):.1f}")
+        w(f"最大移除轮数: {max(turns)}")
+    w("")
+
+    # --- 上下文窗口使用 ---
+    with_ctx = [r for r in merged if r.context_usage_percentage is not None]
+    w("--- 上下文窗口使用 (contextUsageEvent) ---")
+    if with_ctx:
+        usages = [r.context_usage_percentage for r in with_ctx]
+        avg_usage = sum(usages) / len(usages)
+        over_80 = sum(1 for u in usages if u > 80)
+        over_95 = sum(1 for u in usages if u > 95)
+        overflow = sum(1 for u in usages if u >= 100)
+        w(f"平均使用率: {avg_usage:.1f}%")
+        w(f">80% 使用率的请求: {over_80} ({over_80/len(with_ctx)*100:.1f}%)")
+        w(f">95% 使用率的请求: {over_95} ({over_95/len(with_ctx)*100:.1f}%)")
+        w(f"100% (溢出): {overflow} ({overflow/len(with_ctx)*100:.1f}%)")
+    else:
+        w("无 contextUsageEvent 数据（需要 DEBUG 日志级别）")
+    w("")
+
+    # --- 上游拒绝 ---
+    w("--- 上游拒绝 ---")
+    w(f"输入过长拒绝: {len(rejections)} 次")
+    w("")
+
+    # --- 自适应二次压缩 ---
+    w("--- 自适应二次压缩 ---")
+    w(f"触发次数: {len(adaptive_shrinks)}")
+    if adaptive_shrinks:
+        initial_avg = sum(r.initial_bytes for r in adaptive_shrinks) // len(adaptive_shrinks)
+        final_avg = sum(r.final_bytes for r in adaptive_shrinks) // len(adaptive_shrinks)
+        iters_avg = sum(r.iters for r in adaptive_shrinks) / len(adaptive_shrinks)
+        hist_avg = sum(r.additional_history_turns_removed for r in adaptive_shrinks) / len(adaptive_shrinks)
+        w(f"平均压缩前: {fmt_bytes(initial_avg)}")
+        w(f"平均压缩后: {fmt_bytes(final_avg)}")
+        w(f"平均迭代次数: {iters_avg:.1f}")
+        w(f"平均额外移除轮数: {hist_avg:.1f}")
+    w("")
+
+    # --- 本地拒绝（请求体超限） ---
+    w("--- 本地拒绝 (请求体超限) ---")
+    w(f"拒绝发送: {len(local_rejects)} 次")
+    if local_rejects:
+        top = sorted(local_rejects, key=lambda r: r.effective_bytes, reverse=True)[:5]
+        for r in top:
+            w(
+                "  line={line} effective={eff} threshold={th} body={body} image={img} conversationId={cid}".format(
+                    line=r.line_no,
+                    eff=r.effective_bytes,
+                    th=r.threshold,
+                    body=r.request_body_bytes,
+                    img=r.image_bytes,
+                    cid=r.conversation_id or "None",
+                )
+            )
+    w("")
+
+    # --- 高压缩请求 TOP-N ---
+    sorted_by_saved = sorted(with_comp, key=lambda r: r.bytes_saved_total, reverse=True)
+    w(f"--- 高压缩请求 TOP-{top_n} ---")
+    for i, r in enumerate(sorted_by_saved[:top_n], 1):
+        w(f"  #{i}  line={r.line_no}  saved={r.bytes_saved_total:,}  rate={r.compression_rate:.1f}%  model={r.model}  tokens={r.estimated_input_tokens:,}")
+    w("")
+
+    # --- 低效/无压缩请求样本 ---
+    no_comp = [r for r in with_comp if r.bytes_saved_total == 0]
+    w("--- 低效/无压缩请求样本 ---")
+    if no_comp:
+        for r in no_comp[:5]:
+            w(f"  line={r.line_no}  saved=0  tokens={r.estimated_input_tokens:,}  message_count={r.message_count}")
+    else:
+        w("  (无)")
+    w("")
+
+    # --- 时间趋势 ---
+    hourly: Dict[str, list[MergedRequest]] = defaultdict(list)
+    for r in with_comp:
+        if r.timestamp:
+            hourly[hour_bucket(r.timestamp)].append(r)
+
+    if hourly:
+        w("--- 时间趋势 (按小时) ---")
+        for hour in sorted(hourly.keys()):
+            reqs = hourly[hour]
+            avg_s = sum(r.bytes_saved_total for r in reqs) // len(reqs)
+            ctx_reqs = [r for r in reqs if r.context_usage_percentage is not None]
+            avg_ctx = sum(r.context_usage_percentage for r in ctx_reqs) / len(ctx_reqs) if ctx_reqs else 0
+            ctx_str = f"  avg_context_usage={avg_ctx:.1f}%" if ctx_reqs else ""
+            w(f"  {hour}:  requests={len(reqs)}  avg_saved={avg_s:,}{ctx_str}")
+        w("")
+
+    return "\n".join(lines)
+
+
+def generate_json_report(
+    merged: list[MergedRequest],
+    rejections: list[RejectionRecord],
+    adaptive_shrinks: list[AdaptiveShrinkRecord],
+    local_rejects: list[LocalRejectRecord],
+    total_lines: int,
+) -> str:
+    """生成 JSON 格式的汇总报告。"""
+    with_comp = [r for r in merged if r.has_compression]
+    total_saved = sum(r.bytes_saved_total for r in with_comp)
+
+    report = {
+        "total_lines": total_lines,
+        "matched_requests": len(merged),
+        "with_compression": len(with_comp),
+        "total_bytes_saved": total_saved,
+        "avg_bytes_saved": total_saved // len(with_comp) if with_comp else 0,
+        "layers": {
+            "whitespace": sum(r.whitespace_bytes_saved for r in with_comp),
+            "thinking": sum(r.thinking_bytes_saved for r in with_comp),
+            "tool_result": sum(r.tool_result_bytes_saved for r in with_comp),
+            "tool_use_input": sum(r.tool_use_input_bytes_saved for r in with_comp),
+            "history": sum(r.history_bytes_saved for r in with_comp),
+        },
+        "rejections": len(rejections),
+        "adaptive_shrinks": len(adaptive_shrinks),
+        "local_rejects": len(local_rejects),
+    }
+    return json.dumps(report, indent=2, ensure_ascii=False)
+
+
+def write_csv(merged: list[MergedRequest], path: str) -> None:
+    """导出每条请求的明细为 CSV。"""
+    fieldnames = [
+        "line_no", "timestamp", "model", "max_tokens", "stream",
+        "message_count", "estimated_input_tokens", "bytes_saved_total",
+        "whitespace_bytes_saved", "thinking_bytes_saved",
+        "tool_result_bytes_saved", "tool_use_input_bytes_saved",
+        "history_turns_removed", "history_bytes_saved",
+        "compression_rate", "context_usage_percentage", "actual_input_tokens",
+    ]
+    with open(path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        for r in merged:
+            row = asdict(r)
+            row = {k: row[k] for k in fieldnames}
+            writer.writerow(row)
+
+
+# ---------------------------------------------------------------------------
+# CLI 入口
+# ---------------------------------------------------------------------------
+
+
+def main(argv: list[str]) -> int:
+    parser = argparse.ArgumentParser(
+        description="分析上下文压缩管道表现",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "logfile", nargs="?", default="logs/docker.log",
+        help="日志文件路径，使用 '-' 从 stdin 读取（默认: logs/docker.log）"
+    )
+    parser.add_argument("--top", type=int, default=5, help="高压缩请求 TOP-N（默认: 5）")
+    parser.add_argument("--csv", metavar="FILE", help="导出每条请求的明细为 CSV")
+    parser.add_argument("--json", action="store_true", help="JSON 格式输出汇总")
+    parser.add_argument("--min-tokens", type=int, default=0, help="仅分析 estimated_input_tokens >= N 的请求")
+    parser.add_argument("--model", metavar="PATTERN", help="按模型名过滤（正则）")
+    args = parser.parse_args(argv)
+
+    # 读取日志
+    if args.logfile == "-":
+        log_lines = sys.stdin.read().splitlines()
+    else:
+        try:
+            with open(args.logfile, "r", encoding="utf-8", errors="replace") as f:
+                log_lines = f.read().splitlines()
+        except FileNotFoundError:
+            print(f"ERROR: 日志文件不存在: {args.logfile}", file=sys.stderr)
+            return 2
+
+    # 解析
+    merged, rejections, adaptive_shrinks, local_rejects, total_lines = parse_log(
+        log_lines,
+        min_tokens=args.min_tokens,
+        model_pattern=args.model,
+    )
+
+    # 输出
+    if args.json:
+        print(generate_json_report(merged, rejections, adaptive_shrinks, local_rejects, total_lines))
+    else:
+        print(generate_report(merged, rejections, adaptive_shrinks, local_rejects, total_lines, top_n=args.top))
+
+    # CSV 导出
+    if args.csv:
+        write_csv(merged, args.csv)
+        print(f"CSV 已导出: {args.csv}", file=sys.stderr)
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
--- a/tools/diagnose_improper_request.py
+++ b/tools/diagnose_improper_request.py
@@ -0,0 +1,646 @@
+#!/usr/bin/env python3
+"""
+离线诊断 `Improperly formed request`（上游 400）常见成因。
+
+使用方法：
+  python3 tools/diagnose_improper_request.py logs/docker.log
+
+脚本会从日志中提取 `request_body=...{json}`，对请求做一组启发式校验并输出汇总与样本。
+目标是快速定位“项目侧可修复的请求构造问题”，而不是复现上游的完整校验逻辑。
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import sys
+from collections import Counter, defaultdict
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+
+
+# 覆盖常见 CSI 序列（含少见的 ':' 参数分隔符），避免污染 URL/字段解析。
+ANSI_RE = re.compile(r"\x1b\[[0-9;:?]*[A-Za-z]")
+KV_RE = re.compile(r"(\w+)=(\d+(?:\.\d+)?|\"[^\"]*\"|[^\s,]+)")
+
+
+@dataclass(frozen=True)
+class RequestSummary:
+    line_no: int
+    conversation_id: Optional[str]
+    content_len: int
+    tools_n: int
+    tool_results_n: int
+    history_n: int
+    json_len: int
+
+
+def strip_ansi(s: str) -> str:
+    return ANSI_RE.sub("", s)
+
+
+def parse_kv(line: str) -> Dict[str, str]:
+    """从 tracing 结构化行中提取所有 key=value 对。"""
+    return {m.group(1): m.group(2).strip('"') for m in KV_RE.finditer(line)}
+
+
+def iter_request_bodies(log_text: str) -> Iterable[Tuple[int, Dict[str, Any]]]:
+    """从日志中提取 request_body JSON。
+
+    支持两种日志格式：
+    1. sensitive-logs 模式：request_body={"conversationState":...}（可能被截断）
+    2. 普通模式：kiro_request_body_bytes=135777（无 JSON 内容）
+
+    对于截断的 JSON，尝试用 raw_decode 解析到第一个完整 JSON 对象。
+    同时排除 response_body 的误匹配。
+    """
+    decoder = json.JSONDecoder()
+    # 两种来源：
+    # 1. handler DEBUG: "Kiro request body: {json}"（发送前，完整内容）
+    # 2. provider ERROR: "request_body={json}"（400 后，可能被截断）
+    kiro_body_marker = "Kiro request body: "
+    body_re = re.compile(r"(?<![a-z_])request_body=")
+
+    for line_no, line in enumerate(log_text.splitlines(), 1):
+        clean = strip_ansi(line)
+
+        # 来源 1: handler 的 DEBUG 日志（优先，内容更完整）
+        kiro_idx = clean.find(kiro_body_marker)
+        if kiro_idx != -1:
+            brace = clean.find("{", kiro_idx + len(kiro_body_marker))
+            if brace == -1:
+                continue
+        elif "request_body=" in clean:
+            # 来源 2: provider 的 ERROR 日志
+            match = body_re.search(clean)
+            if not match:
+                continue
+            brace = clean.find("{", match.end())
+            if brace == -1:
+                continue
+        else:
+            continue
+
+        # 使用 raw_decode 解析第一个完整 JSON 对象（忽略行尾其他 tracing 字段）
+        try:
+            body, _ = decoder.raw_decode(clean, brace)
+        except json.JSONDecodeError:
+            # JSON 被截断（sensitive-logs 的 truncate_middle），尝试提取可用信息
+            body_str = clean[brace:]
+            yield line_no, _partial_parse_request_body(body_str, line_no)
+            continue
+
+        if isinstance(body, dict):
+            yield line_no, body
+
+
+def _partial_parse_request_body(truncated_json: str, line_no: int) -> Dict[str, Any]:
+    """从截断的 JSON 中尽量提取结构信息。
+
+    即使 JSON 不完整，也能通过正则提取 conversationId、工具数量等关键字段，
+    用于启发式诊断。
+    """
+    info: Dict[str, Any] = {"_partial": True, "_raw_len": len(truncated_json)}
+
+    # 提取 conversationId
+    m = re.search(r'"conversationId"\s*:\s*"([^"]+)"', truncated_json)
+    if m:
+        info["_conversationId"] = m.group(1)
+
+    # 统计 toolUseId 出现次数（近似 tool_use 数量）
+    info["_toolUseId_count"] = len(re.findall(r'"toolUseId"', truncated_json))
+
+    # 统计 toolSpecification 出现次数（近似 tool 定义数量）
+    info["_toolSpec_count"] = len(re.findall(r'"toolSpecification"', truncated_json))
+
+    # 统计 assistantResponseMessage 出现次数（近似 history 轮数）
+    info["_assistant_msg_count"] = len(re.findall(r'"assistantResponseMessage"', truncated_json))
+
+    # 统计 userInputMessage 出现次数
+    info["_user_msg_count"] = len(re.findall(r'"userInputMessage"', truncated_json))
+
+    return info
+
+
+def _get(d: Dict[str, Any], path: str, default: Any = None) -> Any:
+    cur: Any = d
+    for part in path.split("."):
+        if not isinstance(cur, dict):
+            return default
+        cur = cur.get(part)
+    return cur if cur is not None else default
+
+
+def summarize(body: Dict[str, Any], line_no: int) -> RequestSummary:
+    # 处理 partial 解析的情况
+    if body.get("_partial"):
+        return RequestSummary(
+            line_no=line_no,
+            conversation_id=body.get("_conversationId"),
+            content_len=-1,
+            tools_n=body.get("_toolSpec_count", -1),
+            tool_results_n=body.get("_toolUseId_count", -1),
+            history_n=body.get("_assistant_msg_count", -1),
+            json_len=body.get("_raw_len", 0),
+        )
+
+    conversation_id = _get(body, "conversationState.conversationId")
+    content = _get(body, "conversationState.currentMessage.userInputMessage.content", "")
+    tools = _get(body, "conversationState.currentMessage.userInputMessage.userInputMessageContext.tools", [])
+    tool_results = _get(
+        body,
+        "conversationState.currentMessage.userInputMessage.userInputMessageContext.toolResults",
+        [],
+    )
+    history = _get(body, "conversationState.history", [])
+
+    json_len = len(json.dumps(body, ensure_ascii=False, separators=(",", ":")))
+
+    return RequestSummary(
+        line_no=line_no,
+        conversation_id=conversation_id if isinstance(conversation_id, str) else None,
+        content_len=len(content) if isinstance(content, str) else -1,
+        tools_n=len(tools) if isinstance(tools, list) else -1,
+        tool_results_n=len(tool_results) if isinstance(tool_results, list) else -1,
+        history_n=len(history) if isinstance(history, list) else -1,
+        json_len=json_len,
+    )
+
+
+def find_issues(
+    body: Dict[str, Any],
+    *,
+    max_history_messages: int,
+    large_payload_bytes: int,
+    huge_payload_bytes: int,
+) -> List[str]:
+    # partial 解析的请求只能做有限诊断
+    if body.get("_partial"):
+        issues: List[str] = ["W_TRUNCATED_LOG"]
+        raw_len = body.get("_raw_len", 0)
+        if raw_len > large_payload_bytes:
+            issues.append("W_PAYLOAD_LARGE")
+        return issues
+
+    issues = []
+
+    cs = body.get("conversationState") or {}
+    cm = _get(body, "conversationState.currentMessage.userInputMessage", {})
+    ctx = cm.get("userInputMessageContext") or {}
+
+    content = cm.get("content")
+    images = cm.get("images") or []
+    tools = ctx.get("tools") or []
+    tool_results = ctx.get("toolResults") or []
+    history = cs.get("history") or []
+
+    if isinstance(content, str) and content.strip() == "":
+        if images:
+            issues.append("E_CONTENT_EMPTY_WITH_IMAGES")
+        elif tool_results:
+            issues.append("E_CONTENT_EMPTY_WITH_TOOL_RESULTS")
+        else:
+            issues.append("E_CONTENT_EMPTY")
+
+    # Tool 规范检查：description/schema
+    empty_desc: List[str] = []
+    missing_schema: List[str] = []
+    missing_type: List[str] = []
+    for t in tools if isinstance(tools, list) else []:
+        if not isinstance(t, dict):
+            issues.append("E_TOOL_SHAPE_INVALID")
+            continue
+        spec = t.get("toolSpecification")
+        if not isinstance(spec, dict):
+            issues.append("E_TOOL_SPEC_MISSING")
+            continue
+
+        name = spec.get("name")
+        name_s = name if isinstance(name, str) else "<noname>"
+
+        desc = spec.get("description")
+        if isinstance(desc, str) and desc.strip() == "":
+            empty_desc.append(name_s)
+
+        inp = spec.get("inputSchema")
+        js = inp.get("json") if isinstance(inp, dict) else None
+        if isinstance(js, dict):
+            if "$schema" not in js:
+                missing_schema.append(name_s)
+            if "type" not in js:
+                missing_type.append(name_s)
+        else:
+            issues.append("E_TOOL_INPUT_SCHEMA_NOT_OBJECT")
+
+    if empty_desc:
+        issues.append("E_TOOL_DESCRIPTION_EMPTY")
+    if missing_schema:
+        issues.append("W_TOOL_SCHEMA_MISSING_$SCHEMA")
+    if missing_type:
+        issues.append("W_TOOL_SCHEMA_MISSING_TYPE")
+
+    # Tool result 是否能在 history 的 tool_use 里找到（启发式）
+    tool_use_ids: set[str] = set()
+    history_tool_result_ids: set[str] = set()
+    tool_def_names_ci: set[str] = set()
+
+    for t in tools if isinstance(tools, list) else []:
+        spec = t.get("toolSpecification") if isinstance(t, dict) else None
+        if isinstance(spec, dict) and isinstance(spec.get("name"), str):
+            tool_def_names_ci.add(spec["name"].lower())
+
+    for h in history if isinstance(history, list) else []:
+        if not isinstance(h, dict):
+            continue
+        am = h.get("assistantResponseMessage")
+        if not isinstance(am, dict):
+            # 也可能是 user 消息（含 tool_result）
+            um = h.get("userInputMessage")
+            if not isinstance(um, dict):
+                continue
+            uctx = um.get("userInputMessageContext")
+            if not isinstance(uctx, dict):
+                continue
+            trs = uctx.get("toolResults")
+            if not isinstance(trs, list):
+                continue
+            for tr in trs:
+                if not isinstance(tr, dict):
+                    continue
+                tid = tr.get("toolUseId")
+                if isinstance(tid, str):
+                    history_tool_result_ids.add(tid)
+            continue
+
+        tus = am.get("toolUses")
+        if isinstance(tus, list):
+            for tu in tus:
+                if not isinstance(tu, dict):
+                    continue
+                tid = tu.get("toolUseId")
+                if isinstance(tid, str):
+                    tool_use_ids.add(tid)
+                # 历史 tool_use 的 name 必须在 tools 中有定义（上游常见约束）
+                nm = tu.get("name")
+                if isinstance(nm, str) and tool_def_names_ci and nm.lower() not in tool_def_names_ci:
+                    issues.append("E_HISTORY_TOOL_USE_NAME_NOT_IN_TOOLS")
+
+        # 同一条历史消息可能同时包含 userInputMessage（少见，但兼容）
+        um = h.get("userInputMessage")
+        if isinstance(um, dict):
+            uctx = um.get("userInputMessageContext")
+            if isinstance(uctx, dict):
+                trs = uctx.get("toolResults")
+                if isinstance(trs, list):
+                    for tr in trs:
+                        if not isinstance(tr, dict):
+                            continue
+                        tid = tr.get("toolUseId")
+                        if isinstance(tid, str):
+                            history_tool_result_ids.add(tid)
+
+    # history 内部的 tool_result 必须能在 history 的 tool_use 中找到（否则极易触发 400）
+    if history_tool_result_ids and tool_use_ids:
+        if any(tid not in tool_use_ids for tid in history_tool_result_ids):
+            issues.append("E_HISTORY_TOOL_RESULT_ORPHAN")
+    elif history_tool_result_ids and not tool_use_ids:
+        issues.append("E_HISTORY_TOOL_RESULT_ORPHAN")
+
+    # currentMessage 的 tool_result 必须能在 history 的 tool_use 中找到
+    orphan_results = 0
+    current_tool_result_ids: set[str] = set()
+    if tool_use_ids and isinstance(tool_results, list):
+        for tr in tool_results:
+            if not isinstance(tr, dict):
+                continue
+            tid = tr.get("toolUseId")
+            if isinstance(tid, str):
+                current_tool_result_ids.add(tid)
+                if tid not in tool_use_ids:
+                    orphan_results += 1
+    if orphan_results:
+        issues.append("W_TOOL_RESULT_ORPHAN")
+
+    # history 的 tool_use 必须在 history/currentMessage 的 tool_result 中出现（否则极易触发 400）
+    all_tool_result_ids = history_tool_result_ids | current_tool_result_ids
+    if tool_use_ids and all_tool_result_ids:
+        if any(tid not in all_tool_result_ids for tid in tool_use_ids):
+            issues.append("E_HISTORY_TOOL_USE_ORPHAN")
+    elif tool_use_ids and not all_tool_result_ids:
+        issues.append("E_HISTORY_TOOL_USE_ORPHAN")
+
+    # history 过长（强启发式；日志里经常与 400 同现）
+    if isinstance(history, list) and len(history) > max_history_messages:
+        issues.append("W_HISTORY_TOO_LONG")
+
+    # payload 大小（强启发式；上游可能有不透明的硬限制）
+    json_len = len(json.dumps(body, ensure_ascii=False, separators=(",", ":")))
+    if json_len > huge_payload_bytes:
+        issues.append("W_PAYLOAD_HUGE")
+    elif json_len > large_payload_bytes:
+        issues.append("W_PAYLOAD_LARGE")
+
+    return issues
+
+
+def main(argv: List[str]) -> int:
+    parser = argparse.ArgumentParser(
+        description="离线诊断 Improperly formed request（上游 400）常见成因"
+    )
+    parser.add_argument("log", nargs="?", default="logs/docker.log", help="docker.log 路径")
+    parser.add_argument("--max-samples", type=int, default=5, help="每类问题输出样本数量")
+    parser.add_argument("--dump-dir", default=None, help="可选：把 request_body JSON 按行号落盘")
+    parser.add_argument("--max-history", type=int, default=100, help="history 过长阈值（启发式）")
+    # 上游存在约 5MiB 左右的硬限制；默认用 4.5MiB 作为“接近风险”的提示阈值。
+    parser.add_argument("--large-bytes", type=int, default=4_718_592, help="payload 大阈值（启发式）")
+    parser.add_argument("--huge-bytes", type=int, default=8_388_608, help="payload 巨大阈值（启发式）")
+    args = parser.parse_args(argv)
+
+    log_path = args.log
+    try:
+        log_text = open(log_path, "r", encoding="utf-8", errors="replace").read()
+    except FileNotFoundError:
+        print(f"ERROR: log file not found: {log_path}", file=sys.stderr)
+        return 2
+
+    dump_dir = args.dump_dir
+    if dump_dir:
+        os.makedirs(dump_dir, exist_ok=True)
+
+    # 先扫描项目侧“请求体超限，拒绝发送”的本地拦截（用于验证 4.5MiB 截断/拒绝是否生效）
+    print("=" * 60)
+    print("Phase 0: 扫描本地请求体超限拒绝")
+    print("=" * 60)
+    local_rejects = _scan_local_rejects(log_text)
+    if local_rejects:
+        for r in local_rejects[: args.max_samples]:
+            print(
+                "\n  [line {line}] effective_bytes={eff} threshold={th} body={body} image={img} conversationId={cid}".format(
+                    line=r.get("line_no"),
+                    eff=r.get("effective_bytes", "?"),
+                    th=r.get("threshold", "?"),
+                    body=r.get("request_body_bytes", "?"),
+                    img=r.get("image_bytes", "?"),
+                    cid=r.get("conversation_id") or "None",
+                )
+            )
+        if len(local_rejects) > args.max_samples:
+            print(f"\n  ... ({len(local_rejects) - args.max_samples} more)")
+    else:
+        print("  未发现本地请求体超限拒绝")
+    print("")
+
+    # 先扫描所有 400 Improperly formed request 的 ERROR 行，提取上下文
+    print("=" * 60)
+    print("Phase 1: 扫描 400 Improperly formed request 错误")
+    print("=" * 60)
+    error_lines = _scan_400_errors(
+        log_text,
+        max_history_messages=args.max_history,
+        large_payload_bytes=args.large_bytes,
+        huge_payload_bytes=args.huge_bytes,
+    )
+    if error_lines:
+        for el in error_lines:
+            print(f"\n  [line {el['line_no']}] bytes={el.get('body_bytes', '?')} "
+                  f"url={el.get('url', '?')}")
+            if "_req_body_line" in el:
+                print(f"    ↳ 关联请求体: line {el['_req_body_line']}"
+                      f"{' (truncated)' if el.get('_req_body_partial') else ''}")
+            if "summary" in el:
+                s = el["summary"]
+                print(f"    ↳ conversationId={s.conversation_id or 'None'} "
+                      f"content_len={s.content_len} tools={s.tools_n} "
+                      f"toolResults={s.tool_results_n} history={s.history_n} "
+                      f"json_len={s.json_len}")
+            if "issues" in el and el["issues"]:
+                print(f"    ↳ issues: {', '.join(el['issues'])}")
+            elif "_req_body" in el and el["_req_body"].get("_partial"):
+                body = el["_req_body"]
+                print(f"    ↳ partial: toolSpecs={body.get('_toolSpec_count', '?')} "
+                      f"toolUseIds={body.get('_toolUseId_count', '?')} "
+                      f"assistantMsgs={body.get('_assistant_msg_count', '?')} "
+                      f"userMsgs={body.get('_user_msg_count', '?')} "
+                      f"raw_len={body.get('_raw_len', '?')}")
+    else:
+        print("  未发现 400 Improperly formed request 错误")
+    print()
+
+    # 再扫描 request_body 条目
+    print("=" * 60)
+    print("Phase 2: 解析 request_body 条目")
+    print("=" * 60)
+
+    issue_counter: Counter[str] = Counter()
+    issues_to_samples: Dict[str, List[RequestSummary]] = defaultdict(list)
+    total = 0
+    partial_count = 0
+
+    for line_no, body in iter_request_bodies(log_text):
+        total += 1
+        if body.get("_partial"):
+            partial_count += 1
+        summary = summarize(body, line_no)
+        issues = find_issues(
+            body,
+            max_history_messages=args.max_history,
+            large_payload_bytes=args.large_bytes,
+            huge_payload_bytes=args.huge_bytes,
+        )
+
+        # 允许 dump 以便做最小化重放/差分调试
+        if dump_dir:
+            out_path = os.path.join(dump_dir, f"req_line_{line_no}.json")
+            with open(out_path, "w", encoding="utf-8") as f:
+                json.dump(body, f, ensure_ascii=False, indent=2)
+
+        if not issues:
+            issues = ["(NO_HEURISTIC_MATCH)"]
+
+        for issue in set(issues):
+            issue_counter[issue] += 1
+            if len(issues_to_samples[issue]) < args.max_samples:
+                issues_to_samples[issue].append(summary)
+
+    print(f"Parsed request_body entries: {total} (complete: {total - partial_count}, truncated: {partial_count})")
+    print("")
+
+    if not issue_counter:
+        print("No request_body entries found.")
+        if not error_lines:
+            print("\nHint: 如果使用非 sensitive-logs 模式，日志中不包含 request_body 内容。")
+            print("      请使用 --features sensitive-logs 重新编译，或检查 kiro_request_body_bytes 字段。")
+        return 0
+
+    print("Issue counts:")
+    for issue, cnt in issue_counter.most_common():
+        print(f"  {cnt:4d}  {issue}")
+    print("")
+
+    print("Samples:")
+    for issue, cnt in issue_counter.most_common():
+        samples = issues_to_samples.get(issue) or []
+        if not samples:
+            continue
+        print(f"- {issue} (showing {len(samples)}/{cnt})")
+        for s in samples:
+            print(
+                "  line={line} conversationId={cid} content_len={cl} tools={tn} toolResults={trn} history={hn} json_len={jl}".format(
+                    line=s.line_no,
+                    cid=s.conversation_id or "None",
+                    cl=s.content_len,
+                    tn=s.tools_n,
+                    trn=s.tool_results_n,
+                    hn=s.history_n,
+                    jl=s.json_len,
+                )
+            )
+        print("")
+
+    return 0
+
+
+def _scan_400_errors(
+    log_text: str,
+    *,
+    max_history_messages: int,
+    large_payload_bytes: int,
+    huge_payload_bytes: int,
+) -> List[Dict[str, Any]]:
+    """扫描日志中的 400 Improperly formed request 错误行，关联最近的请求体。
+
+    对每个 400 错误，向上查找最近的 'Kiro request body:' DEBUG 行，
+    解析其中的请求体并做启发式诊断。
+    """
+    lines = log_text.splitlines()
+    results = []
+    body_bytes_re = re.compile(r"(?:kiro_)?request_body_bytes=(\d+)")
+    url_re = re.compile(r"request_url=(\S+)")
+    decoder = json.JSONDecoder()
+
+    for line_no_0, line in enumerate(lines):
+        if "Improperly formed request" not in line:
+            continue
+        clean = strip_ansi(line)
+        # 避免同一错误被多条日志重复命中（provider ERROR 与 handler WARN 都可能包含该子串）
+        # 优先仅统计 provider ERROR（通常包含 request_url=...）
+        if "request_url=" not in clean:
+            continue
+        entry: Dict[str, Any] = {"line_no": line_no_0 + 1}
+
+        m = body_bytes_re.search(clean)
+        if m:
+            entry["body_bytes"] = int(m.group(1))
+        else:
+            # provider ERROR 行通常不带 body bytes，向上关联最近的构建日志/handler WARN（最多回溯 30 行）
+            for back in range(1, min(31, line_no_0 + 1)):
+                prev = strip_ansi(lines[line_no_0 - back])
+                m2 = body_bytes_re.search(prev)
+                if not m2:
+                    continue
+                entry["body_bytes"] = int(m2.group(1))
+                entry["_body_bytes_line"] = line_no_0 - back + 1
+                break
+
+        m = url_re.search(clean)
+        if m:
+            entry["url"] = m.group(1)
+
+        req_body = None
+
+        body_re = re.compile(r"(?<![a-z_])request_body=")
+
+        # 1) 向下查找错误块中的 request_body=...（provider 往往把 headers/body 打到后续行）
+        for fwd in range(0, min(31, len(lines) - line_no_0)):
+            cand = strip_ansi(lines[line_no_0 + fwd])
+            match = body_re.search(cand)
+            if not match:
+                continue
+            brace = cand.find("{", match.end())
+            if brace == -1:
+                continue
+            try:
+                req_body, _ = decoder.raw_decode(cand, brace)
+            except json.JSONDecodeError:
+                entry["_req_body_partial"] = True
+                req_body = _partial_parse_request_body(cand[brace:], line_no_0 + fwd + 1)
+            entry["_req_body_line"] = line_no_0 + fwd + 1
+            break
+
+        # 2) 若未找到，再向上查找 handler DEBUG 的 "Kiro request body:"（最多回溯 20 行）
+        if req_body is None:
+            for back in range(1, min(21, line_no_0 + 1)):
+                prev_line = strip_ansi(lines[line_no_0 - back])
+                marker = "Kiro request body: "
+                idx = prev_line.find(marker)
+                if idx == -1:
+                    continue
+                brace = prev_line.find("{", idx + len(marker))
+                if brace == -1:
+                    break
+                try:
+                    req_body, _ = decoder.raw_decode(prev_line, brace)
+                except json.JSONDecodeError:
+                    entry["_req_body_partial"] = True
+                    req_body = _partial_parse_request_body(prev_line[brace:], line_no_0 - back + 1)
+                entry["_req_body_line"] = line_no_0 - back + 1
+                break
+
+        if req_body and isinstance(req_body, dict):
+            entry["_req_body"] = req_body
+            issues = find_issues(
+                req_body,
+                max_history_messages=max_history_messages,
+                large_payload_bytes=large_payload_bytes,
+                huge_payload_bytes=huge_payload_bytes,
+            )
+            # 基于实际请求体字节数的强信号（日志 JSON 可能被截断/脱敏，不适合用 json_len 判断大小）
+            body_bytes = entry.get("body_bytes")
+            if isinstance(body_bytes, int):
+                if body_bytes > huge_payload_bytes:
+                    issues.append("W_BODY_BYTES_HUGE")
+                elif body_bytes > large_payload_bytes:
+                    issues.append("W_BODY_BYTES_LARGE")
+            entry["issues"] = sorted(set(issues))
+            entry["summary"] = summarize(req_body, entry.get("_req_body_line", 0))
+
+        results.append(entry)
+
+    return results
+
+
+def _scan_local_rejects(log_text: str) -> List[Dict[str, Any]]:
+    """扫描本地请求体超限拒绝日志。"""
+    marker = "请求体超过安全阈值，拒绝发送"
+    results: List[Dict[str, Any]] = []
+    for line_no, raw in enumerate(log_text.splitlines(), 1):
+        if marker not in raw:
+            continue
+        clean = strip_ansi(raw)
+        kv = parse_kv(clean)
+        results.append(
+            {
+                "line_no": line_no,
+                "conversation_id": kv.get("conversation_id"),
+                "request_body_bytes": _safe_int(kv.get("request_body_bytes")),
+                "image_bytes": _safe_int(kv.get("image_bytes")),
+                "effective_bytes": _safe_int(kv.get("effective_bytes")),
+                "threshold": _safe_int(kv.get("threshold")),
+            }
+        )
+    return results
+
+
+def _safe_int(v: Optional[str]) -> Optional[int]:
+    if v is None:
+        return None
+    try:
+        return int(v)
+    except ValueError:
+        return None
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
--- a/tools/event-viewer.html
+++ b/tools/event-viewer.html
@@ -0,0 +1,896 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AWS Event Stream Viewer</title>
+    <style>
+        * {
+            box-sizing: border-box;
+            margin: 0;
+            padding: 0;
+        }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
+            background: #0d1117;
+            color: #c9d1d9;
+            min-height: 100vh;
+            padding: 20px;
+        }
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+        }
+        h1 {
+            color: #58a6ff;
+            margin-bottom: 20px;
+            font-size: 24px;
+        }
+        .input-section {
+            margin-bottom: 20px;
+        }
+        .input-section label {
+            display: block;
+            margin-bottom: 8px;
+            color: #8b949e;
+            font-size: 14px;
+        }
+        .input-controls {
+            display: flex;
+            gap: 10px;
+            margin-bottom: 10px;
+            flex-wrap: wrap;
+        }
+        .format-select {
+            padding: 8px 16px;
+            border: 1px solid #30363d;
+            border-radius: 6px;
+            background: #21262d;
+            color: #c9d1d9;
+            font-size: 14px;
+        }
+        textarea {
+            width: 100%;
+            height: 200px;
+            padding: 12px;
+            border: 1px solid #30363d;
+            border-radius: 6px;
+            background: #161b22;
+            color: #c9d1d9;
+            font-family: 'SF Mono', Monaco, 'Courier New', monospace;
+            font-size: 13px;
+            resize: vertical;
+        }
+        textarea:focus {
+            outline: none;
+            border-color: #58a6ff;
+        }
+        .btn {
+            padding: 10px 20px;
+            border: none;
+            border-radius: 6px;
+            cursor: pointer;
+            font-size: 14px;
+            font-weight: 500;
+            transition: all 0.2s;
+        }
+        .btn-primary {
+            background: #238636;
+            color: white;
+        }
+        .btn-primary:hover {
+            background: #2ea043;
+        }
+        .btn-secondary {
+            background: #21262d;
+            color: #c9d1d9;
+            border: 1px solid #30363d;
+        }
+        .btn-secondary:hover {
+            background: #30363d;
+        }
+        .results {
+            margin-top: 20px;
+        }
+        .stats {
+            display: flex;
+            gap: 20px;
+            margin-bottom: 20px;
+            flex-wrap: wrap;
+        }
+        .stat-card {
+            background: #161b22;
+            border: 1px solid #30363d;
+            border-radius: 6px;
+            padding: 16px 20px;
+            min-width: 150px;
+        }
+        .stat-card .label {
+            color: #8b949e;
+            font-size: 12px;
+            margin-bottom: 4px;
+        }
+        .stat-card .value {
+            color: #58a6ff;
+            font-size: 24px;
+            font-weight: 600;
+        }
+        .message-list {
+            display: flex;
+            flex-direction: column;
+            gap: 12px;
+        }
+        .message {
+            background: #161b22;
+            border: 1px solid #30363d;
+            border-radius: 8px;
+            overflow: hidden;
+        }
+        .message-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            padding: 12px 16px;
+            background: #21262d;
+            border-bottom: 1px solid #30363d;
+            cursor: pointer;
+        }
+        .message-header:hover {
+            background: #30363d;
+        }
+        .message-info {
+            display: flex;
+            gap: 12px;
+            align-items: center;
+            flex-wrap: wrap;
+        }
+        .message-index {
+            background: #30363d;
+            color: #8b949e;
+            padding: 2px 8px;
+            border-radius: 4px;
+            font-size: 12px;
+            font-weight: 500;
+        }
+        .message-type {
+            padding: 2px 8px;
+            border-radius: 4px;
+            font-size: 12px;
+            font-weight: 500;
+        }
+        .message-type.event {
+            background: rgba(56, 139, 253, 0.15);
+            color: #58a6ff;
+        }
+        .message-type.error {
+            background: rgba(248, 81, 73, 0.15);
+            color: #f85149;
+        }
+        .message-type.exception {
+            background: rgba(210, 153, 34, 0.15);
+            color: #d29922;
+        }
+        .event-type {
+            color: #7ee787;
+            font-size: 13px;
+        }
+        .message-size {
+            color: #8b949e;
+            font-size: 12px;
+        }
+        .expand-icon {
+            color: #8b949e;
+            transition: transform 0.2s;
+        }
+        .message.expanded .expand-icon {
+            transform: rotate(90deg);
+        }
+        .message-content {
+            display: none;
+            padding: 16px;
+        }
+        .message.expanded .message-content {
+            display: block;
+        }
+        .section-title {
+            color: #8b949e;
+            font-size: 12px;
+            font-weight: 500;
+            margin-bottom: 8px;
+            text-transform: uppercase;
+        }
+        .headers-table {
+            width: 100%;
+            border-collapse: collapse;
+            margin-bottom: 16px;
+            font-size: 13px;
+        }
+        .headers-table th,
+        .headers-table td {
+            text-align: left;
+            padding: 8px 12px;
+            border-bottom: 1px solid #21262d;
+        }
+        .headers-table th {
+            color: #8b949e;
+            font-weight: 500;
+            background: #0d1117;
+        }
+        .headers-table td {
+            font-family: 'SF Mono', Monaco, 'Courier New', monospace;
+        }
+        .header-name {
+            color: #ff7b72;
+        }
+        .header-type {
+            color: #d2a8ff;
+        }
+        .header-value {
+            color: #a5d6ff;
+        }
+        .payload-container {
+            background: #0d1117;
+            border-radius: 6px;
+            overflow: hidden;
+        }
+        .payload-tabs {
+            display: flex;
+            border-bottom: 1px solid #21262d;
+        }
+        .payload-tab {
+            padding: 8px 16px;
+            background: transparent;
+            border: none;
+            color: #8b949e;
+            cursor: pointer;
+            font-size: 13px;
+            border-bottom: 2px solid transparent;
+            margin-bottom: -1px;
+        }
+        .payload-tab.active {
+            color: #58a6ff;
+            border-bottom-color: #58a6ff;
+        }
+        .payload-content {
+            padding: 12px;
+            font-family: 'SF Mono', Monaco, 'Courier New', monospace;
+            font-size: 13px;
+            overflow-x: auto;
+            max-height: 400px;
+            overflow-y: auto;
+        }
+        .payload-json {
+            white-space: pre-wrap;
+            word-break: break-word;
+        }
+        .payload-raw {
+            white-space: pre;
+            color: #8b949e;
+        }
+        .payload-hex {
+            white-space: pre;
+            color: #8b949e;
+            font-size: 12px;
+        }
+        .json-key {
+            color: #ff7b72;
+        }
+        .json-string {
+            color: #a5d6ff;
+        }
+        .json-number {
+            color: #79c0ff;
+        }
+        .json-boolean {
+            color: #ff7b72;
+        }
+        .json-null {
+            color: #8b949e;
+        }
+        .error-box {
+            background: rgba(248, 81, 73, 0.1);
+            border: 1px solid rgba(248, 81, 73, 0.4);
+            border-radius: 6px;
+            padding: 12px 16px;
+            color: #f85149;
+            margin-bottom: 16px;
+        }
+        .hex-view {
+            display: grid;
+            grid-template-columns: 80px 1fr 1fr;
+            gap: 8px;
+            font-family: 'SF Mono', Monaco, 'Courier New', monospace;
+            font-size: 12px;
+        }
+        .hex-offset {
+            color: #8b949e;
+        }
+        .hex-bytes {
+            color: #7ee787;
+        }
+        .hex-ascii {
+            color: #d2a8ff;
+        }
+        .raw-section {
+            margin-top: 20px;
+            padding: 16px;
+            background: #161b22;
+            border: 1px solid #30363d;
+            border-radius: 8px;
+        }
+        .raw-section h3 {
+            color: #8b949e;
+            font-size: 14px;
+            margin-bottom: 12px;
+        }
+        .copy-btn {
+            padding: 4px 8px;
+            font-size: 12px;
+            margin-left: 8px;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>AWS Event Stream Viewer</h1>
+
+        <div class="input-section">
+            <label>粘贴二进制数据（支持 Hex / Base64 格式）</label>
+            <div class="input-controls">
+                <select id="inputFormat" class="format-select">
+                    <option value="auto">自动检测</option>
+                    <option value="hex">Hex</option>
+                    <option value="base64">Base64</option>
+                </select>
+                <button class="btn btn-primary" onclick="parseInput()">解析</button>
+                <button class="btn btn-secondary" onclick="clearAll()">清空</button>
+                <button class="btn btn-secondary" onclick="loadExample()">加载示例</button>
+            </div>
+            <textarea id="inputData" placeholder="粘贴 Hex 数据 (如: 00 00 00 3e 00 00 00 1d...) 或 Base64 编码数据..."></textarea>
+        </div>
+
+        <div id="error" class="error-box" style="display: none;"></div>
+
+        <div class="results" id="results" style="display: none;">
+            <div class="stats" id="stats"></div>
+            <div class="message-list" id="messageList"></div>
+        </div>
+
+        <div class="raw-section" id="rawSection" style="display: none;">
+            <h3>原始字节数据 <button class="btn btn-secondary copy-btn" onclick="copyRawHex()">复制 Hex</button></h3>
+            <div class="payload-hex" id="rawHex"></div>
+        </div>
+    </div>
+
+    <script>
+        // CRC32 (IEEE/ISO-HDLC) 实现
+        const CRC32_TABLE = (() => {
+            const table = new Uint32Array(256);
+            for (let i = 0; i < 256; i++) {
+                let crc = i;
+                for (let j = 0; j < 8; j++) {
+                    crc = (crc & 1) ? (0xEDB88320 ^ (crc >>> 1)) : (crc >>> 1);
+                }
+                table[i] = crc >>> 0;
+            }
+            return table;
+        })();
+
+        function crc32(data) {
+            let crc = 0xFFFFFFFF;
+            for (let i = 0; i < data.length; i++) {
+                crc = CRC32_TABLE[(crc ^ data[i]) & 0xFF] ^ (crc >>> 8);
+            }
+            return (crc ^ 0xFFFFFFFF) >>> 0;
+        }
+
+        // 值类型定义
+        const VALUE_TYPES = {
+            0: { name: 'BoolTrue', size: 0 },
+            1: { name: 'BoolFalse', size: 0 },
+            2: { name: 'Byte', size: 1 },
+            3: { name: 'Short', size: 2 },
+            4: { name: 'Integer', size: 4 },
+            5: { name: 'Long', size: 8 },
+            6: { name: 'ByteArray', size: -1 },
+            7: { name: 'String', size: -1 },
+            8: { name: 'Timestamp', size: 8 },
+            9: { name: 'UUID', size: 16 }
+        };
+
+        // 输入格式检测
+        function detectFormat(input) {
+            const cleaned = input.replace(/[\s\n\r]/g, '');
+
+            // 检测是否是Base64
+            if (/^[A-Za-z0-9+/]+=*$/.test(cleaned) && cleaned.length % 4 === 0) {
+                // 尝试解码验证
+                try {
+                    atob(cleaned);
+                    return 'base64';
+                } catch {}
+            }
+
+            // 检测是否是Hex
+            if (/^[0-9A-Fa-f\s]+$/.test(input)) {
+                return 'hex';
+            }
+
+            return 'unknown';
+        }
+
+        // 解析输入数据为字节数组
+        function parseInputData(input, format) {
+            const cleaned = input.trim();
+
+            if (format === 'auto') {
+                format = detectFormat(cleaned);
+            }
+
+            if (format === 'base64') {
+                try {
+                    const binary = atob(cleaned.replace(/[\s\n\r]/g, ''));
+                    return new Uint8Array([...binary].map(c => c.charCodeAt(0)));
+                } catch (e) {
+                    throw new Error('Base64 解码失败: ' + e.message);
+                }
+            }
+
+            if (format === 'hex') {
+                const hex = cleaned.replace(/[\s\n\r]/g, '').replace(/0x/gi, '');
+                if (hex.length % 2 !== 0) {
+                    throw new Error('Hex 数据长度必须是偶数');
+                }
+                const bytes = new Uint8Array(hex.length / 2);
+                for (let i = 0; i < hex.length; i += 2) {
+                    bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
+                }
+                return bytes;
+            }
+
+            throw new Error('无法识别输入格式');
+        }
+
+        // 读取大端序整数
+        function readUint32BE(data, offset) {
+            return (data[offset] << 24 | data[offset + 1] << 16 | data[offset + 2] << 8 | data[offset + 3]) >>> 0;
+        }
+
+        function readUint16BE(data, offset) {
+            return (data[offset] << 8 | data[offset + 1]) >>> 0;
+        }
+
+        function readInt64BE(data, offset) {
+            // JavaScript BigInt for 64-bit
+            let high = readUint32BE(data, offset);
+            let low = readUint32BE(data, offset + 4);
+            return BigInt(high) << 32n | BigInt(low);
+        }
+
+        // 解析头部
+        function parseHeaders(data, headerLength) {
+            const headers = [];
+            let offset = 0;
+
+            while (offset < headerLength) {
+                // 读取名称长度
+                const nameLength = data[offset];
+                offset++;
+
+                if (nameLength === 0 || offset + nameLength > headerLength) {
+                    break;
+                }
+
+                // 读取名称
+                const name = new TextDecoder().decode(data.slice(offset, offset + nameLength));
+                offset += nameLength;
+
+                if (offset >= headerLength) break;
+
+                // 读取值类型
+                const valueType = data[offset];
+                offset++;
+
+                const typeInfo = VALUE_TYPES[valueType] || { name: 'Unknown', size: 0 };
+                let value;
+                let valueSize = typeInfo.size;
+
+                // 解析值
+                switch (valueType) {
+                    case 0: // BoolTrue
+                        value = true;
+                        break;
+                    case 1: // BoolFalse
+                        value = false;
+                        break;
+                    case 2: // Byte
+                        value = data[offset];
+                        offset++;
+                        break;
+                    case 3: // Short
+                        value = readUint16BE(data, offset);
+                        offset += 2;
+                        break;
+                    case 4: // Integer
+                        value = readUint32BE(data, offset);
+                        offset += 4;
+                        break;
+                    case 5: // Long
+                        value = readInt64BE(data, offset).toString();
+                        offset += 8;
+                        break;
+                    case 6: // ByteArray
+                        valueSize = readUint16BE(data, offset);
+                        offset += 2;
+                        value = Array.from(data.slice(offset, offset + valueSize)).map(b => b.toString(16).padStart(2, '0')).join(' ');
+                        offset += valueSize;
+                        break;
+                    case 7: // String
+                        valueSize = readUint16BE(data, offset);
+                        offset += 2;
+                        value = new TextDecoder().decode(data.slice(offset, offset + valueSize));
+                        offset += valueSize;
+                        break;
+                    case 8: // Timestamp
+                        value = new Date(Number(readInt64BE(data, offset))).toISOString();
+                        offset += 8;
+                        break;
+                    case 9: // UUID
+                        const uuidBytes = data.slice(offset, offset + 16);
+                        value = Array.from(uuidBytes).map(b => b.toString(16).padStart(2, '0')).join('');
+                        value = `${value.slice(0,8)}-${value.slice(8,12)}-${value.slice(12,16)}-${value.slice(16,20)}-${value.slice(20)}`;
+                        offset += 16;
+                        break;
+                    default:
+                        value = '(unknown type)';
+                }
+
+                headers.push({ name, type: typeInfo.name, typeCode: valueType, value });
+            }
+
+            return headers;
+        }
+
+        // 解析单个消息帧
+        function parseFrame(data, offset) {
+            if (data.length - offset < 16) {
+                return null;
+            }
+
+            const totalLength = readUint32BE(data, offset);
+            const headerLength = readUint32BE(data, offset + 4);
+            const preludeCrc = readUint32BE(data, offset + 8);
+
+            if (totalLength < 16 || totalLength > 16 * 1024 * 1024) {
+                throw new Error(`消息长度异常: ${totalLength}`);
+            }
+
+            if (data.length - offset < totalLength) {
+                return null;
+            }
+
+            // 验证 Prelude CRC
+            const actualPreludeCrc = crc32(data.slice(offset, offset + 8));
+            const preludeCrcValid = preludeCrc === actualPreludeCrc;
+
+            // 验证 Message CRC
+            const messageCrc = readUint32BE(data, offset + totalLength - 4);
+            const actualMessageCrc = crc32(data.slice(offset, offset + totalLength - 4));
+            const messageCrcValid = messageCrc === actualMessageCrc;
+
+            // 解析头部
+            const headersStart = offset + 12;
+            const headersEnd = headersStart + headerLength;
+            const headers = parseHeaders(data.slice(headersStart, headersEnd), headerLength);
+
+            // 提取 payload
+            const payloadStart = headersEnd;
+            const payloadEnd = offset + totalLength - 4;
+            const payload = data.slice(payloadStart, payloadEnd);
+
+            // 获取消息类型
+            const messageType = headers.find(h => h.name === ':message-type')?.value || 'event';
+            const eventType = headers.find(h => h.name === ':event-type')?.value || '';
+            const contentType = headers.find(h => h.name === ':content-type')?.value || 'application/json';
+
+            return {
+                totalLength,
+                headerLength,
+                preludeCrc: { expected: preludeCrc, actual: actualPreludeCrc, valid: preludeCrcValid },
+                messageCrc: { expected: messageCrc, actual: actualMessageCrc, valid: messageCrcValid },
+                headers,
+                payload,
+                messageType,
+                eventType,
+                contentType,
+                rawBytes: data.slice(offset, offset + totalLength)
+            };
+        }
+
+        // 解析所有消息
+        function parseAllMessages(data) {
+            const messages = [];
+            let offset = 0;
+
+            while (offset < data.length) {
+                try {
+                    const frame = parseFrame(data, offset);
+                    if (!frame) {
+                        break;
+                    }
+                    messages.push(frame);
+                    offset += frame.totalLength;
+                } catch (e) {
+                    console.error('Parse error at offset', offset, e);
+                    // 尝试跳过一个字节继续
+                    offset++;
+                }
+            }
+
+            return messages;
+        }
+
+        // 格式化 JSON 带语法高亮
+        function formatJson(obj, indent = 0) {
+            const spaces = '  '.repeat(indent);
+
+            if (obj === null) {
+                return '<span class="json-null">null</span>';
+            }
+
+            if (typeof obj === 'boolean') {
+                return `<span class="json-boolean">${obj}</span>`;
+            }
+
+            if (typeof obj === 'number') {
+                return `<span class="json-number">${obj}</span>`;
+            }
+
+            if (typeof obj === 'string') {
+                const escaped = obj.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
+                return `<span class="json-string">"${escaped}"</span>`;
+            }
+
+            if (Array.isArray(obj)) {
+                if (obj.length === 0) return '[]';
+                const items = obj.map(item => spaces + '  ' + formatJson(item, indent + 1));
+                return '[\n' + items.join(',\n') + '\n' + spaces + ']';
+            }
+
+            if (typeof obj === 'object') {
+                const keys = Object.keys(obj);
+                if (keys.length === 0) return '{}';
+                const items = keys.map(key => {
+                    const escapedKey = key.replace(/"/g, '\\"');
+                    return spaces + '  ' + `<span class="json-key">"${escapedKey}"</span>: ` + formatJson(obj[key], indent + 1);
+                });
+                return '{\n' + items.join(',\n') + '\n' + spaces + '}';
+            }
+
+            return String(obj);
+        }
+
+        // 格式化 Hex 视图
+        function formatHexView(data) {
+            const lines = [];
+            for (let i = 0; i < data.length; i += 16) {
+                const offset = i.toString(16).padStart(8, '0');
+                const bytes = [];
+                const ascii = [];
+
+                for (let j = 0; j < 16; j++) {
+                    if (i + j < data.length) {
+                        bytes.push(data[i + j].toString(16).padStart(2, '0'));
+                        const char = data[i + j];
+                        ascii.push(char >= 32 && char <= 126 ? String.fromCharCode(char) : '.');
+                    } else {
+                        bytes.push('  ');
+                        ascii.push(' ');
+                    }
+                }
+
+                lines.push(`<span class="hex-offset">${offset}</span>  <span class="hex-bytes">${bytes.join(' ')}</span>  <span class="hex-ascii">${ascii.join('')}</span>`);
+            }
+            return lines.join('\n');
+        }
+
+        // 渲染单个消息
+        function renderMessage(message, index) {
+            const messageTypeClass = message.messageType === 'error' ? 'error' :
+                                    message.messageType === 'exception' ? 'exception' : 'event';
+
+            let payloadText = '';
+            let payloadJson = null;
+
+            try {
+                payloadText = new TextDecoder().decode(message.payload);
+                try {
+                    payloadJson = JSON.parse(payloadText);
+                } catch {}
+            } catch {}
+
+            const headersHtml = message.headers.map(h => `
+                <tr>
+                    <td><span class="header-name">${h.name}</span></td>
+                    <td><span class="header-type">${h.type}</span></td>
+                    <td><span class="header-value">${typeof h.value === 'string' ? h.value.replace(/</g, '&lt;').replace(/>/g, '&gt;') : h.value}</span></td>
+                </tr>
+            `).join('');
+
+            const crcStatus = (crc) => crc.valid
+                ? '<span style="color: #7ee787;">&#10003;</span>'
+                : `<span style="color: #f85149;">&#10007; (expected: ${crc.expected.toString(16)}, got: ${crc.actual.toString(16)})</span>`;
+
+            return `
+                <div class="message" id="message-${index}">
+                    <div class="message-header" onclick="toggleMessage(${index})">
+                        <div class="message-info">
+                            <span class="message-index">#${index + 1}</span>
+                            <span class="message-type ${messageTypeClass}">${message.messageType}</span>
+                            ${message.eventType ? `<span class="event-type">${message.eventType}</span>` : ''}
+                        </div>
+                        <div style="display: flex; align-items: center; gap: 12px;">
+                            <span class="message-size">${message.totalLength} bytes</span>
+                            <span class="expand-icon">&#9654;</span>
+                        </div>
+                    </div>
+                    <div class="message-content">
+                        <div class="section-title">CRC 校验</div>
+                        <table class="headers-table" style="margin-bottom: 16px;">
+                            <tr><td>Prelude CRC</td><td>${crcStatus(message.preludeCrc)}</td></tr>
+                            <tr><td>Message CRC</td><td>${crcStatus(message.messageCrc)}</td></tr>
+                        </table>
+
+                        <div class="section-title">Headers (${message.headers.length})</div>
+                        <table class="headers-table">
+                            <thead>
+                                <tr><th>Name</th><th>Type</th><th>Value</th></tr>
+                            </thead>
+                            <tbody>
+                                ${headersHtml}
+                            </tbody>
+                        </table>
+
+                        <div class="section-title">Payload (${message.payload.length} bytes)</div>
+                        <div class="payload-container">
+                            <div class="payload-tabs">
+                                <button class="payload-tab active" onclick="switchPayloadTab(${index}, 'json')">JSON</button>
+                                <button class="payload-tab" onclick="switchPayloadTab(${index}, 'raw')">Raw</button>
+                                <button class="payload-tab" onclick="switchPayloadTab(${index}, 'hex')">Hex</button>
+                            </div>
+                            <div class="payload-content">
+                                <div class="payload-json" id="payload-json-${index}">${payloadJson ? formatJson(payloadJson) : `<span style="color: #8b949e;">${payloadText.replace(/</g, '&lt;').replace(/>/g, '&gt;') || '(empty)'}</span>`}</div>
+                                <div class="payload-raw" id="payload-raw-${index}" style="display: none;">${payloadText.replace(/</g, '&lt;').replace(/>/g, '&gt;') || '(empty)'}</div>
+                                <div class="payload-hex" id="payload-hex-${index}" style="display: none;">${formatHexView(message.payload)}</div>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            `;
+        }
+
+        let parsedData = null;
+
+        function parseInput() {
+            const input = document.getElementById('inputData').value;
+            const format = document.getElementById('inputFormat').value;
+            const errorBox = document.getElementById('error');
+            const resultsBox = document.getElementById('results');
+            const rawSection = document.getElementById('rawSection');
+
+            errorBox.style.display = 'none';
+            resultsBox.style.display = 'none';
+            rawSection.style.display = 'none';
+
+            if (!input.trim()) {
+                errorBox.textContent = '请输入数据';
+                errorBox.style.display = 'block';
+                return;
+            }
+
+            try {
+                const data = parseInputData(input, format);
+                parsedData = data;
+
+                // 显示原始 Hex
+                document.getElementById('rawHex').innerHTML = formatHexView(data);
+                rawSection.style.display = 'block';
+
+                const messages = parseAllMessages(data);
+
+                if (messages.length === 0) {
+                    errorBox.textContent = '未能解析出任何消息。请检查输入数据格式。';
+                    errorBox.style.display = 'block';
+                    return;
+                }
+
+                // 统计
+                const eventTypes = {};
+                const messageTypes = {};
+                messages.forEach(m => {
+                    messageTypes[m.messageType] = (messageTypes[m.messageType] || 0) + 1;
+                    if (m.eventType) {
+                        eventTypes[m.eventType] = (eventTypes[m.eventType] || 0) + 1;
+                    }
+                });
+
+                document.getElementById('stats').innerHTML = `
+                    <div class="stat-card">
+                        <div class="label">总消息数</div>
+                        <div class="value">${messages.length}</div>
+                    </div>
+                    <div class="stat-card">
+                        <div class="label">总字节数</div>
+                        <div class="value">${data.length}</div>
+                    </div>
+                    <div class="stat-card">
+                        <div class="label">消息类型</div>
+                        <div class="value" style="font-size: 14px;">${Object.entries(messageTypes).map(([k, v]) => `${k}: ${v}`).join(', ')}</div>
+                    </div>
+                    <div class="stat-card">
+                        <div class="label">事件类型</div>
+                        <div class="value" style="font-size: 14px;">${Object.entries(eventTypes).map(([k, v]) => `${k}: ${v}`).join(', ') || '-'}</div>
+                    </div>
+                `;
+
+                document.getElementById('messageList').innerHTML = messages.map((m, i) => renderMessage(m, i)).join('');
+                resultsBox.style.display = 'block';
+
+            } catch (e) {
+                errorBox.textContent = '解析错误: ' + e.message;
+                errorBox.style.display = 'block';
+            }
+        }
+
+        function toggleMessage(index) {
+            const msg = document.getElementById(`message-${index}`);
+            msg.classList.toggle('expanded');
+        }
+
+        function switchPayloadTab(index, tab) {
+            const tabs = document.querySelectorAll(`#message-${index} .payload-tab`);
+            tabs.forEach(t => t.classList.remove('active'));
+            event.target.classList.add('active');
+
+            document.getElementById(`payload-json-${index}`).style.display = tab === 'json' ? 'block' : 'none';
+            document.getElementById(`payload-raw-${index}`).style.display = tab === 'raw' ? 'block' : 'none';
+            document.getElementById(`payload-hex-${index}`).style.display = tab === 'hex' ? 'block' : 'none';
+        }
+
+        function clearAll() {
+            document.getElementById('inputData').value = '';
+            document.getElementById('error').style.display = 'none';
+            document.getElementById('results').style.display = 'none';
+            document.getElementById('rawSection').style.display = 'none';
+            parsedData = null;
+        }
+
+        function copyRawHex() {
+            if (!parsedData) return;
+            const hex = Array.from(parsedData).map(b => b.toString(16).padStart(2, '0')).join(' ');
+            navigator.clipboard.writeText(hex).then(() => {
+                alert('已复制到剪贴板');
+            });
+        }
+
+        function loadExample() {
+            // 构造一个示例事件流消息
+            // 这是一个简单的 assistantResponseEvent 消息
+            const example = `
+00 00 00 8f 00 00 00 47 7d 83 6e 75 0d 3a 6d 65
+73 73 61 67 65 2d 74 79 70 65 07 00 05 65 76 65
+6e 74 0b 3a 65 76 65 6e 74 2d 74 79 70 65 07 00
+16 61 73 73 69 73 74 61 6e 74 52 65 73 70 6f 6e
+73 65 45 76 65 6e 74 0d 3a 63 6f 6e 74 65 6e 74
+2d 74 79 70 65 07 00 10 61 70 70 6c 69 63 61 74
+69 6f 6e 2f 6a 73 6f 6e 7b 22 63 6f 6e 74 65 6e
+74 22 3a 22 48 65 6c 6c 6f 2c 20 57 6f 72 6c 64
+21 22 2c 22 73 74 6f 70 22 3a 66 61 6c 73 65 7d
+c7 8d c2 bc
+            `.trim();
+            document.getElementById('inputData').value = example;
+            document.getElementById('inputFormat').value = 'hex';
+        }
+    </script>
+</body>
+</html>
--- a/tools/test_empty_content.py
+++ b/tools/test_empty_content.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+"""测试空消息内容和 prefill 处理的改进"""
+
+import json
+import requests
+
+BASE_URL = "http://localhost:8080"
+API_KEY = "test-key"
+
+def safe_print_response(response):
+    """安全打印响应，处理非 JSON 情况"""
+    try:
+        data = response.json()
+        print(f"响应: {json.dumps(data, indent=2, ensure_ascii=False)}")
+        return data
+    except (json.JSONDecodeError, ValueError):
+        print(f"响应 (非 JSON): {response.text}")
+        return None
+
+def test_empty_content():
+    """测试空消息内容应返回 400 错误"""
+    print("测试 1: 空消息内容")
+    response = requests.post(
+        f"{BASE_URL}/v1/messages",
+        headers={
+            "x-api-key": API_KEY,
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+        },
+        json={
+            "model": "claude-sonnet-4",
+            "max_tokens": 1024,
+            "messages": [
+                {"role": "user", "content": ""}
+            ]
+        }
+    )
+    print(f"状态码: {response.status_code}")
+    data = safe_print_response(response)
+    assert response.status_code == 400, "应返回 400 错误"
+    if data:
+        assert "消息内容为空" in data.get("error", {}).get("message", ""), "错误消息应包含'消息内容为空'"
+    print("✓ 测试通过\n")
+
+def test_empty_text_blocks():
+    """测试仅包含空白文本块的消息"""
+    print("测试 2: 仅包含空白文本块")
+    response = requests.post(
+        f"{BASE_URL}/v1/messages",
+        headers={
+            "x-api-key": API_KEY,
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+        },
+        json={
+            "model": "claude-sonnet-4",
+            "max_tokens": 1024,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "   "},
+                        {"type": "text", "text": "\n\t"}
+                    ]
+                }
+            ]
+        }
+    )
+    print(f"状态码: {response.status_code}")
+    data = safe_print_response(response)
+    assert response.status_code == 400, "应返回 400 错误"
+    if data:
+        assert "消息内容为空" in data.get("error", {}).get("message", ""), "错误消息应包含'消息内容为空'"
+    print("✓ 测试通过\n")
+
+def test_prefill_with_empty_user():
+    """测试 prefill 场景下空 user 消息"""
+    print("测试 3: Prefill 场景下空 user 消息")
+    response = requests.post(
+        f"{BASE_URL}/v1/messages",
+        headers={
+            "x-api-key": API_KEY,
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+        },
+        json={
+            "model": "claude-sonnet-4",
+            "max_tokens": 1024,
+            "messages": [
+                {"role": "user", "content": ""},
+                {"role": "assistant", "content": "Hi there"}
+            ]
+        }
+    )
+    print(f"状态码: {response.status_code}")
+    data = safe_print_response(response)
+    assert response.status_code == 400, "应返回 400 错误"
+    if data:
+        assert "消息内容为空" in data.get("error", {}).get("message", ""), "错误消息应包含'消息内容为空'"
+    print("✓ 测试通过\n")
+
+def test_valid_message():
+    """测试正常消息应该成功"""
+    print("测试 4: 正常消息（对照组）")
+    response = requests.post(
+        f"{BASE_URL}/v1/messages",
+        headers={
+            "x-api-key": API_KEY,
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+        },
+        json={
+            "model": "claude-sonnet-4",
+            "max_tokens": 50,
+            "messages": [
+                {"role": "user", "content": "Say 'test' only"}
+            ]
+        }
+    )
+    print(f"状态码: {response.status_code}")
+    if response.status_code == 200:
+        print("✓ 测试通过：正常消息处理成功\n")
+    else:
+        safe_print_response(response)
+        print()
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("空消息内容验证测试")
+    print("=" * 60 + "\n")
+
+    try:
+        test_empty_content()
+        test_empty_text_blocks()
+        test_prefill_with_empty_user()
+        test_valid_message()
+        print("=" * 60)
+        print("所有测试通过！")
+        print("=" * 60)
+    except AssertionError as e:
+        print(f"\n✗ 测试失败: {e}")
+    except requests.exceptions.ConnectionError:
+        print("\n✗ 无法连接到服务器，请确保服务正在运行")
+    except Exception as e:
+        print(f"\n✗ 发生错误: {e}")