analyze_captures.py

"""
analyze_captures.py

Groups MIPI oscilloscope CSV files by capture, runs csv_preprocessor on each,
then sends the compact summaries to the Claude API for trend analysis.

Usage:
    python analyze_captures.py                  # all captures in ./data
    python analyze_captures.py --last N         # most recent N captures only
    python analyze_captures.py --capture 0001   # single capture by number
"""

import argparse
import sys
from datetime import datetime
from pathlib import Path

import anthropic
import requests

from csv_preprocessor import analyze_file, analyze_lp_file, group_captures, ChannelMetrics, LPMetrics

DATA_DIR     = Path(__file__).parent / "data"
ANALYSIS_LOG = Path(__file__).parent / "analysis_log.txt"
DISPLAY_URL  = "http://192.168.45.8:5000/display"

CLAUDE_MODEL   = "claude-opus-4-6"
SYSTEM_PROMPT  = (
    "You are an expert in MIPI D-PHY signal integrity analysis. "
    "You will be given compact pre-processed summaries of oscilloscope captures "
    "from a MIPI CLK and DAT0 differential pair. "
    "Each capture has three passes: sig (high-res HS quality), proto (long-window HS stats), "
    "and lp (single-ended, shows LP-11/LP-00/HS burst structure including the SoT sequence). "
    "Analyse the data for trends, degradation, anomalies, or consistent spec concerns "
    "across captures. Be concise and actionable."
)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def process_capture(
    ts: str,
    num: int,
    files: dict[str, Path],
    verbose: bool = False,
) -> tuple[str, list[ChannelMetrics]]:
    """
    Run the pre-processor on all CSV files for one capture.
    Returns (text_summary, list_of_metrics).
    Missing files produce a one-line note instead of crashing.
    """
    lines = [f"=== Capture {num:04d}  {ts} ==="]
    metrics_list: list[ChannelMetrics | LPMetrics] = []

    for key in ("proto_clk", "proto_dat", "sig_clk", "sig_dat", "lp_clk", "lp_dat"):
        if key not in files:
            lines.append(f"  [{key}] MISSING")
            continue
        try:
            if key.startswith("lp_"):
                m = analyze_lp_file(files[key])
            else:
                m = analyze_file(files[key])
            lines.append(m.summary())
            metrics_list.append(m)
            if verbose:
                print(m.summary())
        except Exception as exc:
            lines.append(f"  [{key}] ERROR: {exc}")

    return "\n".join(lines), metrics_list


def build_prompt(all_summaries: list[str]) -> str:
    body = "\n\n".join(all_summaries)
    return (
        "Below are pre-processed summaries of MIPI D-PHY captures. "
        "Each capture has three passes per lane (CLK and DAT0):\n"
        "  sig   — high-res HS differential (rise/fall times)\n"
        "  proto — long-window HS differential (jitter, clock freq, amplitude)\n"
        "  lp    — single-ended LP state capture (LP-11 voltage, SoT sequence, HS bursts)\n\n"
        f"{body}\n\n"
        "Please:\n"
        "1. Identify any consistent spec concerns (HS voltage, LP-11 voltage, LP-low timing).\n"
        "2. Highlight any trends over captures (amplitude drift, jitter, LP-11 voltage, etc.).\n"
        "3. Flag anomalies — missing LP transitions, short LP-low, unexpected burst counts.\n"
        "4. Summarise overall signal health in 2–3 sentences."
    )


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def run_analysis(last: int = 10) -> None:
    """
    Called by mgmt_worker after each file transfer.
    Analyses the most recent `last` captures and prints the Claude report.
    """
    groups = group_captures(DATA_DIR)
    if not groups:
        print("[ANALYSIS] No captures found.")
        return

    keys = sorted(groups.keys())[-last:]
    print(f"\n[ANALYSIS] Processing {len(keys)} most-recent capture(s)...")

    all_summaries: list[str] = []
    for ts, num in keys:
        summary_text, _ = process_capture(ts, num, groups[(ts, num)])
        all_summaries.append(summary_text)

    prompt = build_prompt(all_summaries)
    print(f"[ANALYSIS] Sending {len(prompt):,} chars to {CLAUDE_MODEL}...")

    client  = anthropic.Anthropic()
    message = client.messages.create(
        model      = CLAUDE_MODEL,
        max_tokens = 1024,
        system     = SYSTEM_PROMPT,
        messages   = [{"role": "user", "content": prompt}],
    )
    analysis = message.content[0].text
    token_line = f"Tokens: {message.usage.input_tokens} in / {message.usage.output_tokens} out"

    # ── Console ───────────────────────────────────────────────────────────
    separator = "=" * 60
    print(f"\n{separator}")
    print("CLAUDE ANALYSIS")
    print(separator)
    print(analysis)
    print(f"({token_line})")
    print(separator + "\n")

    # ── Append to log file ────────────────────────────────────────────────
    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(ANALYSIS_LOG, "a", encoding="utf-8") as f:
        f.write(f"\n{'='*60}\n{ts}  —  captures {keys[0][1]:04d}–{keys[-1][1]:04d}\n{'='*60}\n")
        f.write(analysis)
        f.write(f"\n({token_line})\n")
    print(f"[ANALYSIS] Report appended to {ANALYSIS_LOG}")

    # ── Send to display ───────────────────────────────────────────────────
    try:
        requests.post(DISPLAY_URL, json={"text": analysis}, timeout=5)
        print("[ANALYSIS] Report sent to display.")
    except Exception as e:
        print(f"[ANALYSIS] Display send failed: {e}")


def main() -> None:
    parser = argparse.ArgumentParser(description="Analyse MIPI CSV captures with Claude")
    parser.add_argument("--last",    type=int,  default=None, metavar="N",
                        help="Process only the N most recent captures")
    parser.add_argument("--capture", type=str,  default=None, metavar="NUM",
                        help="Process a single capture number (e.g. 0001)")
    parser.add_argument("--verbose", action="store_true",
                        help="Print per-file summaries to stdout")
    parser.add_argument("--dry-run", action="store_true",
                        help="Print summaries and prompt but do not call Claude API")
    args = parser.parse_args()

    # --- Discover and filter captures ---
    groups = group_captures(DATA_DIR)
    if not groups:
        print(f"No CSV files found in {DATA_DIR}", file=sys.stderr)
        sys.exit(1)

    keys = sorted(groups.keys())  # sorted by (timestamp, capture_num)

    if args.capture is not None:
        target_num = int(args.capture)
        keys = [k for k in keys if k[1] == target_num]
        if not keys:
            print(f"Capture {args.capture} not found.", file=sys.stderr)
            sys.exit(1)

    if args.last is not None:
        keys = keys[-args.last:]

    print(f"Processing {len(keys)} capture(s) from {DATA_DIR}\n")

    # --- Run pre-processor ---
    all_summaries: list[str] = []
    for ts, num in keys:
        summary_text, _ = process_capture(ts, num, groups[(ts, num)], verbose=args.verbose)
        all_summaries.append(summary_text)
        if not args.verbose:
            print(f"  Processed capture {num:04d}  {ts}")

    # --- Build Claude prompt ---
    prompt = build_prompt(all_summaries)

    if args.dry_run:
        print("\n--- Prompt that would be sent to Claude ---")
        print(prompt)
        return

    # --- Call Claude API ---
    print(f"\nSending {len(prompt):,} characters to {CLAUDE_MODEL}...\n")
    client   = anthropic.Anthropic()
    message  = client.messages.create(
        model      = CLAUDE_MODEL,
        max_tokens = 1024,
        system     = SYSTEM_PROMPT,
        messages   = [{"role": "user", "content": prompt}],
    )
    analysis   = message.content[0].text
    token_line = f"Tokens: {message.usage.input_tokens} in / {message.usage.output_tokens} out"
    separator  = "=" * 60
    ts         = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # Console
    print(f"\n{separator}\nCLAUDE ANALYSIS\n{separator}")
    print(analysis)
    print(f"({token_line})")
    print(separator)

    # Log file
    with open(ANALYSIS_LOG, "a", encoding="utf-8") as f:
        f.write(f"\n{separator}\n{ts}\n{separator}\n")
        f.write(analysis)
        f.write(f"\n({token_line})\n")
    print(f"\nReport appended to {ANALYSIS_LOG}")

    # Display
    try:
        requests.post(DISPLAY_URL, json={"text": analysis}, timeout=5)
        print("Report sent to display.")
    except Exception as e:
        print(f"Display send failed: {e}")


if __name__ == "__main__":
    main()