Files
MiPi_TEST/analyze_captures.py

306 lines
12 KiB
Python
Raw Normal View History

2026-04-08 12:55:34 +01:00
"""
analyze_captures.py
Groups MIPI oscilloscope CSV files by capture, runs csv_preprocessor on each,
then sends the compact summaries to the Claude API for trend analysis.
Usage:
python analyze_captures.py # all captures in ./data
python analyze_captures.py --last N # most recent N captures only
python analyze_captures.py --capture 0001 # single capture by number
"""
import argparse
2026-04-08 15:42:51 +01:00
import html
2026-04-08 12:55:34 +01:00
import sys
2026-04-08 14:19:31 +01:00
from datetime import datetime
2026-04-08 12:55:34 +01:00
from pathlib import Path
import anthropic
2026-04-08 15:42:51 +01:00
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent / ".env")
2026-04-08 12:55:34 +01:00
2026-04-09 08:45:57 +01:00
from csv_preprocessor import (
analyze_file, analyze_lp_file, analyze_1v8_file,
group_captures, ChannelMetrics, LPMetrics, V1V8Metrics,
)
2026-04-08 12:55:34 +01:00
2026-04-08 15:42:51 +01:00
DATA_DIR = Path(__file__).parent / "data"
REPORTS_DIR = Path(__file__).parent / "reports"
2026-04-08 12:55:34 +01:00
CLAUDE_MODEL = "claude-opus-4-6"
SYSTEM_PROMPT = (
"You are an expert in MIPI D-PHY signal integrity analysis. "
"You will be given compact pre-processed summaries of oscilloscope captures "
2026-04-09 08:45:57 +01:00
"from a MIPI CLK and DAT0 differential pair, plus 1.8 V supply rail measurements. "
"The MIPI PHY (NXP i.MX 8M Mini) drives LP states from the 1.8 V VDDIO. "
"Each capture has up to four data sets: "
"sig (high-res HS quality), proto (long-window HS stats), "
"lp (single-ended LP-11/LP-00/HS burst including SoT sequence), "
"and pwr (1.8 V supply captured during the LP→HS transition). "
2026-04-08 12:55:34 +01:00
"Analyse the data for trends, degradation, anomalies, or consistent spec concerns "
"across captures. Be concise and actionable."
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def process_capture(
ts: str,
num: int,
files: dict[str, Path],
verbose: bool = False,
) -> tuple[str, list[ChannelMetrics]]:
"""
Run the pre-processor on all CSV files for one capture.
Returns (text_summary, list_of_metrics).
Missing files produce a one-line note instead of crashing.
"""
lines = [f"=== Capture {num:04d} {ts} ==="]
metrics_list: list[ChannelMetrics | LPMetrics] = []
2026-04-09 08:45:57 +01:00
for key in ("proto_clk", "proto_dat", "sig_clk", "sig_dat", "lp_clk", "lp_dat", "pwr_1v8"):
2026-04-08 12:55:34 +01:00
if key not in files:
2026-04-09 08:45:57 +01:00
if key == "pwr_1v8":
lines.append(f" [{key}] NOT CAPTURED (Rigol not connected or no droop)")
else:
lines.append(f" [{key}] MISSING")
2026-04-08 12:55:34 +01:00
continue
try:
if key.startswith("lp_"):
m = analyze_lp_file(files[key])
2026-04-09 08:45:57 +01:00
elif key == "pwr_1v8":
m = analyze_1v8_file(files[key])
2026-04-08 12:55:34 +01:00
else:
m = analyze_file(files[key])
lines.append(m.summary())
metrics_list.append(m)
if verbose:
print(m.summary())
except Exception as exc:
lines.append(f" [{key}] ERROR: {exc}")
return "\n".join(lines), metrics_list
def build_prompt(all_summaries: list[str]) -> str:
body = "\n\n".join(all_summaries)
return (
"Below are pre-processed summaries of MIPI D-PHY captures. "
"Each capture has three passes per lane (CLK and DAT0):\n"
" sig — high-res HS differential (rise/fall times)\n"
" proto — long-window HS differential (jitter, clock freq, amplitude)\n"
2026-04-09 08:45:57 +01:00
" lp — single-ended LP state capture (LP-11 voltage, SoT sequence, HS bursts)\n"
" pwr — 1.8 V supply rail captured during LP→HS transition (droop, ripple, spec)\n\n"
2026-04-08 12:55:34 +01:00
f"{body}\n\n"
"Please:\n"
"1. Identify any consistent spec concerns (HS voltage, LP-11 voltage, LP-low timing).\n"
2026-04-09 08:45:57 +01:00
"2. Highlight any trends over captures (amplitude drift, jitter, LP-11 voltage, 1.8 V droop, etc.).\n"
2026-04-08 12:55:34 +01:00
"3. Flag anomalies — missing LP transitions, short LP-low, unexpected burst counts.\n"
2026-04-09 08:45:57 +01:00
"4. Correlate 1.8 V supply droop/ripple with MIPI LP anomalies — does droop depth or ripple "
" correlate with SoT timing violations, short LP-low plateaux, or LP-11 voltage drops? "
" If pwr data is absent, note that supply correlation could not be assessed.\n"
"5. For any ERROR or WARNING lines in the summaries, explain the most likely cause "
" (e.g. missing file, bad trigger, signal absent, probe issue, supply marginal) and what to check.\n"
"6. Provide specific, actionable recommendations to address all identified issues and anomalies.\n"
"7. Summarise overall signal health in 23 sentences."
2026-04-08 12:55:34 +01:00
)
2026-04-08 15:42:51 +01:00
def save_html_report(analysis: str, token_line: str, keys: list) -> Path:
"""Write a timestamped HTML report to the reports/ directory."""
REPORTS_DIR.mkdir(exist_ok=True)
now = datetime.now()
filename = now.strftime("%Y%m%d_%H%M%S_analysis.html")
path = REPORTS_DIR / filename
cap_range = (
f"Capture {keys[0][1]:04d}"
if len(keys) == 1
else f"Captures {keys[0][1]:04d}{keys[-1][1]:04d}"
)
date_str = now.strftime("%Y-%m-%d %H:%M:%S")
# Convert plain text analysis to basic HTML (preserve line breaks, bold **)
def text_to_html(text: str) -> str:
escaped = html.escape(text)
# **bold**
import re
escaped = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', escaped)
# Blank lines → paragraph breaks
paragraphs = re.split(r'\n{2,}', escaped)
parts = []
for para in paragraphs:
lines = para.strip().splitlines()
if not lines:
continue
# Numbered or bullet list
if lines[0].lstrip().startswith(('1.', '2.', '3.', '-', '*')):
items = ''.join(f'<li>{l.lstrip("0123456789.-* ")}</li>' for l in lines if l.strip())
tag = 'ol' if lines[0].lstrip()[0].isdigit() else 'ul'
parts.append(f'<{tag}>{items}</{tag}>')
else:
parts.append('<p>' + '<br>'.join(lines) + '</p>')
return '\n'.join(parts)
body_html = text_to_html(analysis)
html_content = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>MIPI Analysis {cap_range}</title>
<style>
body {{ font-family: Arial, sans-serif; max-width: 900px; margin: 40px auto; padding: 0 20px; color: #222; }}
h1 {{ color: #1a3a5c; border-bottom: 2px solid #1a3a5c; padding-bottom: 8px; }}
.meta {{ color: #555; font-size: 0.95em; margin-top: -8px; margin-bottom: 24px; }}
p {{ line-height: 1.6; }}
ol, ul {{ line-height: 1.8; padding-left: 24px; }}
li {{ margin: 4px 0; }}
.tokens {{ color: #888; font-size: 0.8em; margin-top: 32px; border-top: 1px solid #ddd; padding-top: 8px; }}
@media print {{ body {{ margin: 20px; }} }}
</style>
</head>
<body>
<h1>MIPI D-PHY Analysis Report</h1>
<p class="meta">
<strong>Generated:</strong> {date_str} &nbsp;|&nbsp;
<strong>Scope:</strong> {cap_range} &nbsp;|&nbsp;
<strong>Model:</strong> {CLAUDE_MODEL}
</p>
{body_html}
<p class="tokens">{html.escape(token_line)}</p>
</body>
</html>
"""
path.write_text(html_content, encoding="utf-8")
return path
2026-04-08 12:55:34 +01:00
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
2026-04-08 14:19:31 +01:00
def run_analysis(last: int = 10) -> None:
"""
Called by mgmt_worker after each file transfer.
Analyses the most recent `last` captures and prints the Claude report.
"""
groups = group_captures(DATA_DIR)
if not groups:
print("[ANALYSIS] No captures found.")
return
keys = sorted(groups.keys())[-last:]
print(f"\n[ANALYSIS] Processing {len(keys)} most-recent capture(s)...")
all_summaries: list[str] = []
for ts, num in keys:
summary_text, _ = process_capture(ts, num, groups[(ts, num)])
all_summaries.append(summary_text)
prompt = build_prompt(all_summaries)
print(f"[ANALYSIS] Sending {len(prompt):,} chars to {CLAUDE_MODEL}...")
client = anthropic.Anthropic()
message = client.messages.create(
model = CLAUDE_MODEL,
2026-04-09 09:17:42 +01:00
max_tokens = 3072,
2026-04-08 14:19:31 +01:00
system = SYSTEM_PROMPT,
messages = [{"role": "user", "content": prompt}],
)
2026-04-08 15:42:51 +01:00
analysis = message.content[0].text
2026-04-08 14:19:31 +01:00
token_line = f"Tokens: {message.usage.input_tokens} in / {message.usage.output_tokens} out"
# ── Console ───────────────────────────────────────────────────────────
separator = "=" * 60
print(f"\n{separator}")
print("CLAUDE ANALYSIS")
print(separator)
print(analysis)
print(f"({token_line})")
print(separator + "\n")
2026-04-08 15:42:51 +01:00
# ── HTML report ───────────────────────────────────────────────────────
report_path = save_html_report(analysis, token_line, keys)
print(f"[ANALYSIS] Report saved to {report_path}")
2026-04-08 14:19:31 +01:00
2026-04-08 12:55:34 +01:00
def main() -> None:
parser = argparse.ArgumentParser(description="Analyse MIPI CSV captures with Claude")
parser.add_argument("--last", type=int, default=None, metavar="N",
help="Process only the N most recent captures")
parser.add_argument("--capture", type=str, default=None, metavar="NUM",
help="Process a single capture number (e.g. 0001)")
parser.add_argument("--verbose", action="store_true",
help="Print per-file summaries to stdout")
parser.add_argument("--dry-run", action="store_true",
help="Print summaries and prompt but do not call Claude API")
args = parser.parse_args()
# --- Discover and filter captures ---
groups = group_captures(DATA_DIR)
if not groups:
print(f"No CSV files found in {DATA_DIR}", file=sys.stderr)
sys.exit(1)
keys = sorted(groups.keys()) # sorted by (timestamp, capture_num)
if args.capture is not None:
target_num = int(args.capture)
keys = [k for k in keys if k[1] == target_num]
if not keys:
print(f"Capture {args.capture} not found.", file=sys.stderr)
sys.exit(1)
if args.last is not None:
keys = keys[-args.last:]
print(f"Processing {len(keys)} capture(s) from {DATA_DIR}\n")
# --- Run pre-processor ---
all_summaries: list[str] = []
for ts, num in keys:
summary_text, _ = process_capture(ts, num, groups[(ts, num)], verbose=args.verbose)
all_summaries.append(summary_text)
if not args.verbose:
print(f" Processed capture {num:04d} {ts}")
# --- Build Claude prompt ---
prompt = build_prompt(all_summaries)
if args.dry_run:
print("\n--- Prompt that would be sent to Claude ---")
print(prompt)
return
# --- Call Claude API ---
print(f"\nSending {len(prompt):,} characters to {CLAUDE_MODEL}...\n")
client = anthropic.Anthropic()
message = client.messages.create(
model = CLAUDE_MODEL,
2026-04-09 09:17:42 +01:00
max_tokens = 3072,
2026-04-08 12:55:34 +01:00
system = SYSTEM_PROMPT,
messages = [{"role": "user", "content": prompt}],
)
2026-04-08 14:19:31 +01:00
analysis = message.content[0].text
token_line = f"Tokens: {message.usage.input_tokens} in / {message.usage.output_tokens} out"
separator = "=" * 60
2026-04-08 12:55:34 +01:00
2026-04-08 14:19:31 +01:00
# Console
print(f"\n{separator}\nCLAUDE ANALYSIS\n{separator}")
2026-04-08 12:55:34 +01:00
print(analysis)
2026-04-08 14:19:31 +01:00
print(f"({token_line})")
print(separator)
2026-04-08 15:42:51 +01:00
# HTML report
report_path = save_html_report(analysis, token_line, keys)
print(f"\nReport saved to {report_path}")
2026-04-08 12:55:34 +01:00
if __name__ == "__main__":
main()