starting to work
This commit is contained in:
153
display.py
153
display.py
@@ -18,6 +18,7 @@ import math
|
|||||||
import threading
|
import threading
|
||||||
import time as _time
|
import time as _time
|
||||||
import argparse
|
import argparse
|
||||||
|
import subprocess
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
@@ -238,31 +239,80 @@ def _weather_worker(locations: list[tuple[float, float, str]]) -> None:
|
|||||||
# WAKE WORD
|
# WAKE WORD
|
||||||
# ═══════════════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
_OWW_MODEL = '/home/dfr84/Python/JARVIS/Jarvis.onnx'
|
_OWW_MODEL = '/home/dfr84/Python/JARVIS/Jarvis.onnx'
|
||||||
_OWW_THRESHOLD = 0.5
|
_OWW_THRESHOLD = 0.6
|
||||||
_OWW_CHUNK = 1280
|
_OWW_DEBOUNCE = 2 # consecutive frames above threshold to trigger
|
||||||
|
_OWW_CHUNK = 1280
|
||||||
|
_LISTEN_SECONDS = 4.0
|
||||||
|
_RESPONSE_HOLD = 3.0
|
||||||
|
|
||||||
|
# state: 'idle' | 'listening' | 'positive' | 'negative'
|
||||||
|
_wake: dict = {'state': 'idle', 'detected_at': 0.0, 'idle_since': 0.0}
|
||||||
|
_WAKE_COOLDOWN = 4.0 # seconds after response before listening again
|
||||||
|
|
||||||
|
_FACE_GREEN = ( 40, 200, 80)
|
||||||
|
_FACE_BLUE = ( 40, 80, 220)
|
||||||
|
_FACE_RED = (200, 50, 50)
|
||||||
|
|
||||||
|
|
||||||
|
def _lerp_color(c1: tuple, c2: tuple, t: float) -> tuple:
|
||||||
|
return tuple(int(c1[i] + (c2[i] - c1[i]) * t) for i in range(3))
|
||||||
|
|
||||||
|
|
||||||
|
def _speak(text: str) -> None:
|
||||||
|
subprocess.run(['espeak-ng', '-s', '150', text], capture_output=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _handle_command(raw: bytes, n_ch: int, native_hz: int) -> None:
|
||||||
|
try:
|
||||||
|
import speech_recognition as sr
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
audio_np = np.frombuffer(raw, dtype=np.int16)
|
||||||
|
if n_ch > 1:
|
||||||
|
audio_np = audio_np.reshape(-1, n_ch)[:, 0]
|
||||||
|
|
||||||
|
target_hz = 16000
|
||||||
|
if native_hz != target_hz:
|
||||||
|
ratio = target_hz / native_hz
|
||||||
|
new_len = int(len(audio_np) * ratio)
|
||||||
|
indices = np.round(np.linspace(0, len(audio_np) - 1, new_len)).astype(int)
|
||||||
|
audio_np = audio_np[indices]
|
||||||
|
|
||||||
|
audio_data = sr.AudioData(audio_np.tobytes(), target_hz, 2)
|
||||||
|
text = sr.Recognizer().recognize_google(audio_data).lower()
|
||||||
|
|
||||||
|
if 'are you there' in text:
|
||||||
|
_wake['state'] = 'positive'
|
||||||
|
_speak('Yes, I am here')
|
||||||
|
else:
|
||||||
|
_wake['state'] = 'negative'
|
||||||
|
_speak("Sorry, I didn't understand")
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
_wake['state'] = 'negative'
|
||||||
|
_speak("Sorry, I didn't understand")
|
||||||
|
|
||||||
|
_time.sleep(_RESPONSE_HOLD)
|
||||||
|
_wake['idle_since'] = _time.time()
|
||||||
|
_wake['state'] = 'idle'
|
||||||
|
|
||||||
# Shared wake state — written by audio thread, read by render thread
|
|
||||||
_wake: dict = {'active': False, 'detected_at': 0.0}
|
|
||||||
|
|
||||||
def _wake_worker() -> None:
|
def _wake_worker() -> None:
|
||||||
try:
|
try:
|
||||||
import pyaudio
|
import pyaudio
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from openwakeword.model import Model
|
from openwakeword.model import Model
|
||||||
except ImportError as e:
|
except ImportError:
|
||||||
print(f'[WAKE] Missing dependency: {e} — wake word disabled')
|
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
model = Model(wakeword_model_paths=[_OWW_MODEL])
|
model = Model(wakeword_model_paths=[_OWW_MODEL])
|
||||||
|
audio = pyaudio.PyAudio()
|
||||||
audio = pyaudio.PyAudio()
|
dev_info = audio.get_device_info_by_index(_args.mic)
|
||||||
dev_info = audio.get_device_info_by_index(_args.mic)
|
|
||||||
n_ch = int(dev_info['maxInputChannels'])
|
n_ch = int(dev_info['maxInputChannels'])
|
||||||
native_hz = int(dev_info['defaultSampleRate'])
|
native_hz = int(dev_info['defaultSampleRate'])
|
||||||
target_hz = 16000
|
target_hz = 16000
|
||||||
# frames_per_buffer scaled so we always get ~_OWW_CHUNK samples at 16 kHz
|
|
||||||
buf_frames = int(_OWW_CHUNK * native_hz / target_hz)
|
buf_frames = int(_OWW_CHUNK * native_hz / target_hz)
|
||||||
|
|
||||||
stream = audio.open(
|
stream = audio.open(
|
||||||
@@ -274,35 +324,49 @@ def _wake_worker() -> None:
|
|||||||
frames_per_buffer=buf_frames,
|
frames_per_buffer=buf_frames,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cmd_frames: list[bytes] = []
|
||||||
|
hit_count = 0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = stream.read(buf_frames, exception_on_overflow=False)
|
data = stream.read(buf_frames, exception_on_overflow=False)
|
||||||
audio_data = np.frombuffer(data, dtype=np.int16)
|
state = _wake['state']
|
||||||
if n_ch > 1:
|
|
||||||
audio_data = audio_data.reshape(-1, n_ch)[:, 0]
|
if state == 'idle':
|
||||||
# resample to 16 kHz
|
audio_np = np.frombuffer(data, dtype=np.int16)
|
||||||
if native_hz != target_hz:
|
if n_ch > 1:
|
||||||
ratio = target_hz / native_hz
|
audio_np = audio_np.reshape(-1, n_ch)[:, 0]
|
||||||
new_len = int(len(audio_data) * ratio)
|
if native_hz != target_hz:
|
||||||
indices = np.round(np.linspace(0, len(audio_data) - 1, new_len)).astype(int)
|
ratio = target_hz / native_hz
|
||||||
audio_data = audio_data[indices]
|
new_len = int(len(audio_np) * ratio)
|
||||||
prediction = model.predict(audio_data)
|
indices = np.round(np.linspace(0, len(audio_np) - 1, new_len)).astype(int)
|
||||||
for score in prediction.values():
|
audio_np = audio_np[indices]
|
||||||
if score >= _OWW_THRESHOLD:
|
if _time.time() - _wake['idle_since'] < _WAKE_COOLDOWN:
|
||||||
_wake['active'] = True
|
hit_count = 0
|
||||||
|
continue
|
||||||
|
triggered = any(s >= _OWW_THRESHOLD for s in model.predict(audio_np).values())
|
||||||
|
if triggered:
|
||||||
|
hit_count += 1
|
||||||
|
else:
|
||||||
|
hit_count = 0
|
||||||
|
if hit_count >= _OWW_DEBOUNCE:
|
||||||
|
hit_count = 0
|
||||||
|
_wake['state'] = 'listening'
|
||||||
_wake['detected_at'] = _time.time()
|
_wake['detected_at'] = _time.time()
|
||||||
break
|
cmd_frames.clear()
|
||||||
except Exception as e:
|
|
||||||
import traceback
|
|
||||||
print(f'[WAKE] {type(e).__name__}: {e}')
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
|
elif state == 'listening':
|
||||||
|
cmd_frames.append(data)
|
||||||
|
if _time.time() - _wake['detected_at'] >= _LISTEN_SECONDS:
|
||||||
|
_wake['state'] = 'processing'
|
||||||
|
threading.Thread(
|
||||||
|
target=_handle_command,
|
||||||
|
args=(b''.join(cmd_frames), n_ch, native_hz),
|
||||||
|
daemon=True,
|
||||||
|
).start()
|
||||||
|
cmd_frames.clear()
|
||||||
|
|
||||||
def _lerp_color(c1: tuple, c2: tuple, t: float) -> tuple:
|
except Exception:
|
||||||
return tuple(int(c1[i] + (c2[i] - c1[i]) * t) for i in range(3))
|
pass
|
||||||
|
|
||||||
_FACE_GREEN = ( 40, 200, 80)
|
|
||||||
_FACE_BLUE = ( 40, 80, 220)
|
|
||||||
_WAKE_DURATION = 8.0 # seconds before returning to idle
|
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
@@ -596,14 +660,15 @@ def main() -> None:
|
|||||||
pygame.quit(); return
|
pygame.quit(); return
|
||||||
|
|
||||||
# ── Wake-word face colour ─────────────────────────────────────────────
|
# ── Wake-word face colour ─────────────────────────────────────────────
|
||||||
if _wake['active']:
|
_ws = _wake['state']
|
||||||
|
if _ws in ('listening', 'processing'):
|
||||||
elapsed = _time.time() - _wake['detected_at']
|
elapsed = _time.time() - _wake['detected_at']
|
||||||
if elapsed >= _WAKE_DURATION:
|
t = (math.sin(elapsed * math.pi * 2.0) + 1.0) / 2.0
|
||||||
_wake['active'] = False
|
face_color = _lerp_color(_FACE_GREEN, _FACE_BLUE, t)
|
||||||
face_color = None
|
elif _ws == 'positive':
|
||||||
else:
|
face_color = _FACE_GREEN
|
||||||
t = (math.sin(elapsed * math.pi * 2.0) + 1.0) / 2.0 # 0→1, 1 Hz
|
elif _ws == 'negative':
|
||||||
face_color = _lerp_color(_FACE_GREEN, _FACE_BLUE, t)
|
face_color = _FACE_RED
|
||||||
else:
|
else:
|
||||||
face_color = None
|
face_color = None
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user