starting to work

2026-04-18 06:01:12 +01:00
parent 3ed974a2a5
commit 1f19834906
1 changed files with 109 additions and 44 deletions
--- a/display.py
+++ b/display.py
@@ -18,6 +18,7 @@ import math
 import threading
 import time as _time
 import argparse
 import subprocess
 import urllib.request
 import urllib.parse
 import xml.etree.ElementTree as ET
@@ -238,31 +239,80 @@ def _weather_worker(locations: list[tuple[float, float, str]]) -> None:
 # WAKE WORD
 # ═══════════════════════════════════════════════════════════════════════════════
-_OWW_MODEL     = '/home/dfr84/Python/JARVIS/Jarvis.onnx'
+_OWW_MODEL      = '/home/dfr84/Python/JARVIS/Jarvis.onnx'
-_OWW_THRESHOLD = 0.5
+_OWW_THRESHOLD  = 0.6
-_OWW_CHUNK     = 1280
+_OWW_DEBOUNCE   = 2    # consecutive frames above threshold to trigger
 _OWW_CHUNK      = 1280
 _LISTEN_SECONDS = 4.0
 _RESPONSE_HOLD  = 3.0
 # state: 'idle' | 'listening' | 'positive' | 'negative'
 _wake: dict = {'state': 'idle', 'detected_at': 0.0, 'idle_since': 0.0}
 _WAKE_COOLDOWN = 4.0   # seconds after response before listening again
 _FACE_GREEN = ( 40, 200,  80)
 _FACE_BLUE  = ( 40,  80, 220)
 _FACE_RED   = (200,  50,  50)
 def _lerp_color(c1: tuple, c2: tuple, t: float) -> tuple:
    return tuple(int(c1[i] + (c2[i] - c1[i]) * t) for i in range(3))
 def _speak(text: str) -> None:
    subprocess.run(['espeak-ng', '-s', '150', text], capture_output=True)
 def _handle_command(raw: bytes, n_ch: int, native_hz: int) -> None:
    try:
        import speech_recognition as sr
        import numpy as np
        audio_np = np.frombuffer(raw, dtype=np.int16)
        if n_ch > 1:
            audio_np = audio_np.reshape(-1, n_ch)[:, 0]
        target_hz = 16000
        if native_hz != target_hz:
            ratio    = target_hz / native_hz
            new_len  = int(len(audio_np) * ratio)
            indices  = np.round(np.linspace(0, len(audio_np) - 1, new_len)).astype(int)
            audio_np = audio_np[indices]
        audio_data = sr.AudioData(audio_np.tobytes(), target_hz, 2)
        text = sr.Recognizer().recognize_google(audio_data).lower()
        if 'are you there' in text:
            _wake['state'] = 'positive'
            _speak('Yes, I am here')
        else:
            _wake['state'] = 'negative'
            _speak("Sorry, I didn't understand")
    except Exception:
        _wake['state'] = 'negative'
        _speak("Sorry, I didn't understand")
    _time.sleep(_RESPONSE_HOLD)
    _wake['idle_since'] = _time.time()
    _wake['state'] = 'idle'
 # Shared wake state — written by audio thread, read by render thread
 _wake: dict = {'active': False, 'detected_at': 0.0}
 def _wake_worker() -> None:
    try:
        import pyaudio
        import numpy as np
        from openwakeword.model import Model
-    except ImportError as e:
+    except ImportError:
        print(f'[WAKE] Missing dependency: {e} — wake word disabled')
        return
    try:
-        model = Model(wakeword_model_paths=[_OWW_MODEL])
+        model      = Model(wakeword_model_paths=[_OWW_MODEL])
-
+        audio      = pyaudio.PyAudio()
-        audio    = pyaudio.PyAudio()
+        dev_info   = audio.get_device_info_by_index(_args.mic)
        dev_info = audio.get_device_info_by_index(_args.mic)
        n_ch       = int(dev_info['maxInputChannels'])
        native_hz  = int(dev_info['defaultSampleRate'])
        target_hz  = 16000
        # frames_per_buffer scaled so we always get ~_OWW_CHUNK samples at 16 kHz
        buf_frames = int(_OWW_CHUNK * native_hz / target_hz)
        stream = audio.open(
@@ -274,35 +324,49 @@ def _wake_worker() -> None:
            frames_per_buffer=buf_frames,
        )
        cmd_frames: list[bytes] = []
        hit_count = 0
        while True:
-            data       = stream.read(buf_frames, exception_on_overflow=False)
+            data  = stream.read(buf_frames, exception_on_overflow=False)
-            audio_data = np.frombuffer(data, dtype=np.int16)
+            state = _wake['state']
-            if n_ch > 1:
+
-                audio_data = audio_data.reshape(-1, n_ch)[:, 0]
+            if state == 'idle':
-            # resample to 16 kHz
+                audio_np = np.frombuffer(data, dtype=np.int16)
-            if native_hz != target_hz:
+                if n_ch > 1:
-                ratio      = target_hz / native_hz
+                    audio_np = audio_np.reshape(-1, n_ch)[:, 0]
-                new_len    = int(len(audio_data) * ratio)
+                if native_hz != target_hz:
-                indices    = np.round(np.linspace(0, len(audio_data) - 1, new_len)).astype(int)
+                    ratio    = target_hz / native_hz
-                audio_data = audio_data[indices]
+                    new_len  = int(len(audio_np) * ratio)
-            prediction = model.predict(audio_data)
+                    indices  = np.round(np.linspace(0, len(audio_np) - 1, new_len)).astype(int)
-            for score in prediction.values():
+                    audio_np = audio_np[indices]
-                if score >= _OWW_THRESHOLD:
+                if _time.time() - _wake['idle_since'] < _WAKE_COOLDOWN:
-                    _wake['active']      = True
+                    hit_count = 0
                    continue
                triggered = any(s >= _OWW_THRESHOLD for s in model.predict(audio_np).values())
                if triggered:
                    hit_count += 1
                else:
                    hit_count = 0
                if hit_count >= _OWW_DEBOUNCE:
                    hit_count = 0
                    _wake['state']       = 'listening'
                    _wake['detected_at'] = _time.time()
-                    break
+                    cmd_frames.clear()
    except Exception as e:
        import traceback
        print(f'[WAKE] {type(e).__name__}: {e}')
        traceback.print_exc()
            elif state == 'listening':
                cmd_frames.append(data)
                if _time.time() - _wake['detected_at'] >= _LISTEN_SECONDS:
                    _wake['state'] = 'processing'
                    threading.Thread(
                        target=_handle_command,
                        args=(b''.join(cmd_frames), n_ch, native_hz),
                        daemon=True,
                    ).start()
                    cmd_frames.clear()
-def _lerp_color(c1: tuple, c2: tuple, t: float) -> tuple:
+    except Exception:
-    return tuple(int(c1[i] + (c2[i] - c1[i]) * t) for i in range(3))
+        pass
 _FACE_GREEN = ( 40, 200,  80)
 _FACE_BLUE  = ( 40,  80, 220)
 _WAKE_DURATION = 8.0   # seconds before returning to idle
 # ═══════════════════════════════════════════════════════════════════════════════
@@ -596,14 +660,15 @@ def main() -> None:
                pygame.quit(); return
        # ── Wake-word face colour ─────────────────────────────────────────────
-        if _wake['active']:
+        _ws = _wake['state']
        if _ws in ('listening', 'processing'):
            elapsed = _time.time() - _wake['detected_at']
-            if elapsed >= _WAKE_DURATION:
+            t = (math.sin(elapsed * math.pi * 2.0) + 1.0) / 2.0
-                _wake['active'] = False
+            face_color = _lerp_color(_FACE_GREEN, _FACE_BLUE, t)
-                face_color = None
+        elif _ws == 'positive':
-            else:
+            face_color = _FACE_GREEN
-                t = (math.sin(elapsed * math.pi * 2.0) + 1.0) / 2.0  # 0→1, 1 Hz
+        elif _ws == 'negative':
-                face_color = _lerp_color(_FACE_GREEN, _FACE_BLUE, t)
+            face_color = _FACE_RED
        else:
            face_color = None