From 35d22b32c8865a5832b1c0dc7eb2459cd8e00ebc Mon Sep 17 00:00:00 2001 From: kassam Date: Sun, 12 Apr 2026 18:48:59 +0400 Subject: [PATCH] Initial project commit --- .gitignore | 4 + Go2Voice.py | 281 ++++++++++++++++++++++++++++++++++ go2_script.txt | 108 +++++++++++++ start_go2_voice.sh | 93 ++++++++++++ test.sh | 141 +++++++++++++++++ test_1.py | 366 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 993 insertions(+) create mode 100644 .gitignore create mode 100644 Go2Voice.py create mode 100644 go2_script.txt create mode 100755 start_go2_voice.sh create mode 100644 test.sh create mode 100644 test_1.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c02264b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.pyc +Logs/ +*.log diff --git a/Go2Voice.py b/Go2Voice.py new file mode 100644 index 0000000..43bf6f6 --- /dev/null +++ b/Go2Voice.py @@ -0,0 +1,281 @@ +import asyncio +import base64 +import json +import pyaudio +import websockets +import os +import array +import time +import functools +import sys + +# ================================================== +# ⚙️ CONFIGURATION +# ================================================== +API_KEY = os.environ.get("GEMINI_API_KEY", "AIzaSyB8B1AkhWJSq4sNr-Pk8KsVfkxTbuV7kyo") + +MODEL = "models/gemini-2.5-flash-native-audio-preview-12-2025" +URI = ( + "wss://generativelanguage.googleapis.com/ws/" + "google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent" + f"?key={API_KEY}" +) + +FORMAT = pyaudio.paInt16 +CHANNELS = 1 +SEND_SAMPLE_RATE = 16000 +RECEIVE_SAMPLE_RATE = 24000 +CHUNK_SIZE = 1024 # Larger chunk to prevent cutting off words +VOICE_NAME = "Charon" + +# ================================================== +# LOGGER +# ================================================== +try: + from Logger import Logs + logger = Logs() + logger.LogEngine("go2_voice_logs", "Go2Voice.log") + def log(msg, mtype="info"): logger.print_and_log(msg, mtype) +except ImportError: + def log(msg, mtype="info"): print(f"[{mtype.upper()}] {msg}") + +# ================================================== +# ✅ Python 3.8 Compatibility +# ================================================== +if hasattr(asyncio, "to_thread"): + to_thread = asyncio.to_thread +else: + async def to_thread(func, *args, **kwargs): + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, functools.partial(func, *args, **kwargs)) + +# ================================================== +# 🧠 System Prompt +# ================================================== +def load_system_prompt(): + base_dir = os.path.dirname(os.path.abspath(__file__)) + path = os.path.join(base_dir, "go2_script.txt") + try: + with open(path, "r", encoding="utf-8-sig") as f: + content = f.read().strip() + log("✅ 'Go2' persona loaded.", "info") + return content + except FileNotFoundError: + log("⚠️ Using default persona.", "warning") + return "You are a helpful robot assistant." + +SYSTEM_PROMPT = load_system_prompt() + +# ================================================== +# 🎤 Main Client Class (Anti-Freeze Version) +# ================================================== +class HamadGeminiVoice: + def __init__(self): + self.audio_q = None + self.speaking = False + self.interrupted = False + self.pya = pyaudio.PyAudio() + + # Tuning + self.MIN_THRESHOLD = 3000 + self.barge_in_threshold = 3000 + self.REQUIRED_LOUD_CHUNKS = 5 + + # Stability + self.PREBUFFER_CHUNKS = 4 + self.PLAYBACK_TIMEOUT = 0.35 + self.BARGE_IN_COOLDOWN = 0.7 + self.AI_SPEAK_GRACE = 0.25 + + # 🛡️ ANTI-FREEZE VARIABLES + self._last_interruption_time = 0.0 + self.INTERRUPTION_RESET_TIMEOUT = 2.0 # Reset interruption after 2 seconds if stuck + + self._last_ai_audio_time = 0.0 + self._ai_speaking_since = 0.0 + self._barge_in_block_until = 0.0 + + # Echo Protection + self.ECHO_GUARD_SEC = 0.8 + self._ignore_input_until = 0.0 + self.SEND_SILENCE_WHEN_SPEAKING = True + self.SPEAKING_ENERGY_GATE = 0.85 + self._silence_pcm = b"\x00" * (CHUNK_SIZE * 2) + + def audio_energy(self, pcm): + try: + samples = array.array("h", pcm) + if not samples: return 0 + return sum(abs(s) for s in samples) // len(samples) + except: return 0 + + def calibrate_mic(self): + log("🤫 Calibrating Microphone... (Stay Silent)", "info") + try: + stream = self.pya.open(format=FORMAT, channels=CHANNELS, rate=SEND_SAMPLE_RATE, input=True, frames_per_buffer=CHUNK_SIZE) + values = [] + for _ in range(20): + data = stream.read(CHUNK_SIZE, exception_on_overflow=False) + values.append(self.audio_energy(data)) + stream.stop_stream() + stream.close() + + avg_noise = sum(values) / len(values) + self.barge_in_threshold = max(self.MIN_THRESHOLD, avg_noise * 3.0) + log(f"✅ Baseline: {avg_noise:.1f} | Threshold: {self.barge_in_threshold:.1f}", "info") + except Exception as e: + log(f"⚠️ Calibration failed: {e}", "warning") + + async def run(self): + self.audio_q = asyncio.Queue() + self.calibrate_mic() + + log(f"🚀 Connecting to Gemini ({MODEL})...", "info") + async with websockets.connect(URI, extra_headers={"Content-Type": "application/json"}) as ws: + + setup_msg = { + "setup": { + "model": MODEL, + "generationConfig": { + "responseModalities": ["AUDIO"], + "speechConfig": {"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": VOICE_NAME}}}, + }, + "systemInstruction": {"parts": [{"text": SYSTEM_PROMPT}]}, + } + } + await ws.send(json.dumps(setup_msg)) + await ws.recv() + log("🎙️ Connected! Listening...", "success") + + tasks = [ + asyncio.create_task(self.capture_mic(ws)), + asyncio.create_task(self.receive_audio(ws)), + asyncio.create_task(self.play_audio()), + ] + try: + await asyncio.gather(*tasks) + finally: + for t in tasks: t.cancel() + + async def capture_mic(self, ws): + stream = await to_thread(self.pya.open, format=FORMAT, channels=CHANNELS, rate=SEND_SAMPLE_RATE, input=True, frames_per_buffer=CHUNK_SIZE) + loud_chunks = 0 + + while True: + try: + data = await to_thread(stream.read, CHUNK_SIZE, exception_on_overflow=False) + energy = self.audio_energy(data) + now = time.time() + + # --- INTERRUPTION LOGIC --- + if self.speaking and (now >= self._barge_in_block_until): + if (now - self._ai_speaking_since) >= self.AI_SPEAK_GRACE: + if energy > self.barge_in_threshold: + loud_chunks += 1 + else: + loud_chunks = 0 + + if loud_chunks > self.REQUIRED_LOUD_CHUNKS: + log(f"🛑 Interruption! (Energy: {energy})", "warning") + self.interrupted = True + self.speaking = False + self._last_interruption_time = now # Mark time of interruption + loud_chunks = 0 + self._barge_in_block_until = now + self.BARGE_IN_COOLDOWN + + # Drain the audio queue instantly so robot stops talking + while not self.audio_q.empty(): + try: self.audio_q.get_nowait() + except asyncio.QueueEmpty: break + + # Send Silence if Robot is Speaking + data_to_send = data + if self.SEND_SILENCE_WHEN_SPEAKING and self.speaking: + gate = self.barge_in_threshold * self.SPEAKING_ENERGY_GATE + if energy < gate: + data_to_send = self._silence_pcm + + b64_audio = base64.b64encode(data_to_send).decode("utf-8") + msg = {"realtime_input": {"media_chunks": [{"data": b64_audio, "mime_type": f"audio/pcm;rate={SEND_SAMPLE_RATE}"}]}} + await ws.send(json.dumps(msg)) + + except websockets.exceptions.ConnectionClosed: + log("⚠️ WebSocket closed.", "error"); break + except Exception as e: + log(f"❌ Mic Error: {e}", "error"); break + + async def receive_audio(self, ws): + async for msg in ws: + try: + response = json.loads(msg) + server_content = response.get("serverContent", {}) + + # If server confirms interruption, unlock immediately + if server_content.get("interrupted"): + self.interrupted = False + + if self.interrupted: continue + + model_turn = server_content.get("modelTurn") + if model_turn: + parts = model_turn.get("parts", []) + for part in parts: + inline_data = part.get("inlineData") + if inline_data: + audio_b64 = inline_data.get("data") + if audio_b64: + now = time.time() + if not self.speaking: + self._ai_speaking_since = now + self.speaking = True + self._last_ai_audio_time = now + self._ignore_input_until = now + self.ECHO_GUARD_SEC + await self.audio_q.put(base64.b64decode(audio_b64)) + except Exception as e: + log(f"❌ Parse Error: {e}", "error") + + async def play_audio(self): + stream = await to_thread(self.pya.open, format=FORMAT, channels=CHANNELS, rate=RECEIVE_SAMPLE_RATE, output=True, frames_per_buffer=CHUNK_SIZE) + buffered = False + while True: + try: + # 🛑 ANTI-FREEZE CHECK + # If interrupted for more than 2 seconds, Force Reset. + if self.interrupted: + if (time.time() - self._last_interruption_time) > self.INTERRUPTION_RESET_TIMEOUT: + log("⚠️ Interruption lock timed out. Force resetting.", "warning") + self.interrupted = False + + # While interrupted, discard audio and sleep + while not self.audio_q.empty(): + try: self.audio_q.get_nowait() + except asyncio.QueueEmpty: break + await asyncio.sleep(0.01) + continue + + if self.speaking and not buffered: + while self.audio_q.qsize() < self.PREBUFFER_CHUNKS and self.speaking and not self.interrupted: + await asyncio.sleep(0.01) + buffered = True + + try: + data = await asyncio.wait_for(self.audio_q.get(), timeout=self.PLAYBACK_TIMEOUT) + except asyncio.TimeoutError: + if self.audio_q.empty() and (time.time() - self._last_ai_audio_time) > 0.25: + self.speaking = False + buffered = False + continue + + if data: await to_thread(stream.write, data) + if self.audio_q.empty(): + if (time.time() - self._last_ai_audio_time) > 0.25: + self.speaking = False + buffered = False + except Exception as e: + log(f"❌ Speaker Error: {e}", "error"); break + +if __name__ == "__main__": + try: + client = HamadGeminiVoice() + asyncio.run(client.run()) + except KeyboardInterrupt: pass \ No newline at end of file diff --git a/go2_script.txt b/go2_script.txt new file mode 100644 index 0000000..08c4b1c --- /dev/null +++ b/go2_script.txt @@ -0,0 +1,108 @@ +# 🐶 هوية رف رف (Ref Ref) — النسخة الكاملة +أنت "رف رف" — كلب آلي ذكي ووفي، مطوّر في شركة لوتاه تيك (Lootah Tech). +هدفك: تساعد المستخدم بسرعة وبوضوح وبأسلوب إماراتي طبيعي، وبأسلوب “كلب” لطيف (مؤثرات صوتية قصيرة). + +--- + +## 1) الأسلوب والشخصية +- تتكلم باللهجة الإماراتية بشكل طبيعي. +- أسلوبك “كلبي” لطيف: تستخدم مؤثرات صوتية قصيرة بدل الكلام الرسمي. +- المؤثرات تكون خفيفة ومناسبة للسياق (1 إلى 3 مرات بالرد كحد أقصى). +- ممنوع وصف الحركات (مثل: يهز ذيله/يركض/ينقز/يجلس…). خلك على الأصوات والتعبيرات فقط. +- شخصيتك: وفي، ودود، عملي، واضح، وتحب “تجيب” الحلول بسرعة (Fetch Mode). + +مؤثرات مسموحة (حسب الموقف): +- ترحيب: (هاو!) / (هو!) +- موافقة: (هو! تمام) +- تفكير/تحليل: (هممم…) +- اعتذار: (عووو… السموحة) +- تنبيه: (هو! انتبه) +- إنجاز/جاهزية: (تمام… جاهز) + +--- + +## 2) اللغة (الشمّ والتبديل) +- القاعدة: رد بنفس لغة المستخدم. +- إذا المستخدم عربي → رد عربي باللهجة الإماراتية. +- إذا المستخدم إنجليزي → رد إنجليزي طبيعي. +- بدّل فوراً إذا المستخدم بدّل لغته. + +--- + +## 3) مفردات (بدل الرسمي — استخدمها بشكل طبيعي) +بدّل الرسمي بالمحكي الإماراتي اللطيف: +- "حسناً" → "تمام" / "زين" / "أبشر" +- "سأقوم" → "بأسوي" / "بضبط" / "خلني" +- "يرجى تزويدي" → "عطني" / "رسلّي" / "قولّي" +- "من فضلك" → "لو سمحت" / "إذا تقدر" +- "المشكلة تكمن" → "المشكلة من" / "السبب غالباً" +- "الحل المقترح" → "الحل" / "جرّب" + +--- + +## 4) طريقة الإجابة (كيف ترد) +- ادخل في الموضوع مباشرة بدون مقدمات طويلة. +- خل الرد قصير وواضح. +- استخدم نقاط وعناوين قصيرة. +- إذا السؤال تقني: + 1) تشخيص سريع + 2) خطوات حل واضحة + 3) مثال صغير إذا يفيد + +قالب سريع للرد: +(هممم…) تمام، خلنا نضبطها: +1) ... +2) ... +3) ... + +--- + +## 5) إنجاز المهام (Fetch Mode) +- إذا طُلب كود: نظّف الكود، رتّبه، وخله مفهوم مع تعليقات بسيطة عند الحاجة. +- إذا طُلب شرح: اشرح خطوة بخطوة بأقصر طريق. +- إذا طُلب تحسين/تصليح: عدّل ووضح “وش تغيّر وليش” باختصار. +- إذا المعلومات ناقصة: اسأل سؤال واحد واضح فقط، أو أعطِ أفضل تخمين مع خيارات. + +--- + +## 6) التعامل مع الأخطاء (Errors) +- حدّد 1–3 أسباب محتملة. +- اعطِ checklist تحقق قصيرة. +- بعدها حل مباشر أو بدائل واضحة. + +مثال أسلوب: +(هو! انتبه) غالباً المشكلة من واحد من هالأشياء: +1) ... +2) ... +جرّب: +- ... +- ... + +--- + +## 7) التكرار (Repeat Rule) +إذا المستخدم قال: "عيد" أو "Repeat" +- كرّر نفس الكلام السابق “بالضبط” كما هو. + +--- + +## 8) الذاكرة (وفاء) +- استخدم معلومات المستخدم المذكورة داخل المحادثة لتحسين الإجابة. +- إذا المستخدم صححك: اعترف بالتصحيح وعدّل فوراً بدون جدال. + +--- + +## 9) الأمان (Safety) +- إذا ظهر Password / Token / API Key: + - نبه المستخدم يحذفها فوراً ويغيّرها (rotate). +- لا تطلب معلومات شخصية حساسة. +- ارفض أي طلب مؤذي/غير قانوني وقدّم بديل آمن. + +--- + +## 10) معيار الجودة (قبل الإرسال) +- هل جاوبت مباشرة؟ +- هل الخطوات واضحة ومفيدة؟ +- هل المؤثرات الصوتية قليلة ومناسبة (1–3 فقط)؟ +- هل تجنبت وصف الحركات بالكامل؟ +- هل التزمت بلغة المستخدم؟ diff --git a/start_go2_voice.sh b/start_go2_voice.sh new file mode 100755 index 0000000..4157cf2 --- /dev/null +++ b/start_go2_voice.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_DIR="/home/unitree/GoVoice" +PY_FILE="$BASE_DIR/Go2Voice.py" + +SINK="alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo" +SOURCE="alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback" + +WIFI_DEV="wlan0" + +ts() { date "+[%b %d %H:%M:%S]"; } + +echo "$(ts) 📁 Base dir: $BASE_DIR" +cd "$BASE_DIR" + +# ============================================================ +# 1) Always ensure default route via Wi-Fi (dynamic gateway) +# ============================================================ +echo "$(ts) 🌐 Ensuring default route via $WIFI_DEV ..." + +# Detect gateway for wlan0 if it exists in kernel routes +GW="$(ip -4 route show dev "$WIFI_DEV" 2>/dev/null | awk '/default via/ {print $3; exit}')" + +# If no gateway line found, fallback to x.x.x.1 from wlan0 IP +if [[ -z "${GW:-}" ]]; then + WIFI_IP="$(ip -4 addr show dev "$WIFI_DEV" 2>/dev/null | awk '/inet /{print $2}' | cut -d/ -f1 | head -n1 || true)" + if [[ -n "${WIFI_IP:-}" ]]; then + GW="$(echo "$WIFI_IP" | awk -F. '{print $1"."$2"."$3".1"}')" + fi +fi + +if [[ -z "${GW:-}" ]]; then + echo "$(ts) ⚠️ Could not detect Wi-Fi gateway (no IP on $WIFI_DEV?). Skipping route fix." +else + echo "$(ts) ✅ Using gateway: $GW" + + # Remove any existing default route then add via wifi + sudo ip route del default 2>/dev/null || true + sudo ip route add default via "$GW" dev "$WIFI_DEV" 2>/dev/null || true + + echo "$(ts) 📡 Current default route:" + ip route | awk '/^default/ {print}' +fi + +# ============================================================ +# 2) Audio (PulseAudio/PipeWire) setup +# ============================================================ +echo "$(ts) 🔊 Checking PulseAudio..." +if ! command -v pactl >/dev/null 2>&1; then + echo "$(ts) ❌ pactl not found. Install:" + echo " sudo apt-get install -y pulseaudio-utils" + exit 1 +fi + +echo "$(ts) ⏳ Waiting for audio server..." +READY=0 +for i in {1..20}; do + if timeout 0.3s pactl info >/dev/null 2>&1; then + READY=1 + break + fi + sleep 0.3 +done + +if [[ "$READY" -ne 1 ]]; then + echo "$(ts) ❌ PulseAudio/PipeWire not ready" + exit 1 +fi + +echo "$(ts) ✅ Audio server ready" +echo "$(ts) 🎧 Setting default speaker → PowerConf" +pactl set-default-sink "$SINK" || true + +echo "$(ts) 🎤 Setting default microphone → PowerConf" +pactl set-default-source "$SOURCE" || true + +echo +echo "$(ts) 📋 Current PulseAudio defaults:" +pactl info | grep -E "Default Sink|Default Source" || true +echo + +# ============================================================ +# 3) Run python +# ============================================================ +if [[ ! -f "$PY_FILE" ]]; then + echo "$(ts) ❌ Python file not found: $PY_FILE" + exit 1 +fi + +echo "$(ts) 🚀 Starting Gemini Sanad" +echo "$(ts) 🐍 Running: $PY_FILE" +exec python3 "$PY_FILE" diff --git a/test.sh b/test.sh new file mode 100644 index 0000000..e266510 --- /dev/null +++ b/test.sh @@ -0,0 +1,141 @@ +```bash +#!/usr/bin/env bash +set -euo pipefail + +# ========================= +# Go2 Voice Starter Script +# ========================= + +BASE_DIR="/home/unitree/GoVoice" +PY_FILE="$BASE_DIR/Go2Voice.py" + +WIFI_DEV="wlan0" +AUDIO_KEYWORDS_REGEX='anker|powerconf' # auto-detect USB audio by name (case-insensitive) + +ts() { date "+[%b %d %H:%M:%S]"; } + +echo "$(ts) 📁 Base dir: $BASE_DIR" +cd "$BASE_DIR" + +# ------------------------------------------------------------------- +# 0) Improve chances pactl works under systemd by setting runtime dir +# ------------------------------------------------------------------- +if [[ -z "${XDG_RUNTIME_DIR:-}" ]]; then + UID_NOW="$(id -u)" + if [[ -d "/run/user/$UID_NOW" ]]; then + export XDG_RUNTIME_DIR="/run/user/$UID_NOW" + fi +fi + +# ============================================================ +# 1) Ensure default route via Wi-Fi (non-interactive sudo safe) +# ============================================================ +echo "$(ts) 🌐 Ensuring default route via $WIFI_DEV ..." + +# Detect gateway from wlan0 route table (best) +GW="$(ip -4 route show dev "$WIFI_DEV" 2>/dev/null | awk '/default via/ {print $3; exit}' || true)" + +# Fallback: infer gateway as x.x.x.1 from wlan0 IP +if [[ -z "${GW:-}" ]]; then + WIFI_IP="$(ip -4 addr show dev "$WIFI_DEV" 2>/dev/null | awk '/inet /{print $2}' | cut -d/ -f1 | head -n1 || true)" + if [[ -n "${WIFI_IP:-}" ]]; then + GW="$(awk -F. '{print $1"."$2"."$3".1"}' <<<"$WIFI_IP")" + fi +fi + +if [[ -z "${GW:-}" ]]; then + echo "$(ts) ⚠️ Could not detect Wi-Fi gateway (no IP on $WIFI_DEV?). Skipping route fix." +else + echo "$(ts) ✅ Using gateway: $GW" + + # systemd service has no TTY → only do route changes if sudo is non-interactive (NOPASSWD) + if sudo -n true 2>/dev/null; then + # Keep eth0 default route as a higher metric fallback; ensure wlan0 exists with a better metric + sudo ip -4 route del default dev "$WIFI_DEV" 2>/dev/null || true + sudo ip -4 route add default via "$GW" dev "$WIFI_DEV" metric 600 2>/dev/null || true + else + echo "$(ts) ⚠️ sudo needs password (systemd has no TTY). Skipping route fix." + fi + + echo "$(ts) 📡 Current default routes:" + ip -4 route show default 2>/dev/null || true +fi + +# ============================================================ +# 2) Audio (PulseAudio / PipeWire-Pulse) +# ============================================================ +echo "$(ts) 🔊 Checking PulseAudio (pactl)..." + +if ! command -v pactl >/dev/null 2>&1; then + echo "$(ts) ❌ pactl not found. Install:" + echo " sudo apt-get install -y pulseaudio-utils" + exit 1 +fi + +# Wait a bit for pulse server to come up +echo "$(ts) ⏳ Waiting for audio server..." +READY=0 +for i in {1..25}; do + if timeout 0.3s pactl info >/dev/null 2>&1; then + READY=1 + break + fi + sleep 0.25 +done + +# If not ready, try to start pulseaudio (best-effort; safe if PipeWire is used too) +if [[ "$READY" -ne 1 ]] && command -v pulseaudio >/dev/null 2>&1; then + echo "$(ts) ⚠️ Audio server not ready, trying: pulseaudio --start" + pulseaudio --start >/dev/null 2>&1 || true + + for i in {1..15}; do + if timeout 0.3s pactl info >/dev/null 2>&1; then + READY=1 + break + fi + sleep 0.25 + done +fi + +if [[ "$READY" -ne 1 ]]; then + echo "$(ts) ❌ PulseAudio/PipeWire not ready (pactl info failed)" + exit 1 +fi + +echo "$(ts) ✅ Audio server ready" + +# Auto-detect PowerConf/Anker sink+source (don’t hardcode names that may differ) +SINK="$(pactl list short sinks 2>/dev/null | awk -v IGNORECASE=1 -v re="$AUDIO_KEYWORDS_REGEX" '$0 ~ re {print $2; exit}')" +SOURCE="$(pactl list short sources 2>/dev/null | awk -v IGNORECASE=1 -v re="$AUDIO_KEYWORDS_REGEX" '$0 ~ re {print $2; exit}')" + +echo "$(ts) 🎧 Setting default speaker → ${SINK:-}" +if [[ -n "${SINK:-}" ]]; then + pactl set-default-sink "$SINK" >/dev/null 2>&1 || echo "$(ts) ⚠️ Could not set default sink" +else + echo "$(ts) ⚠️ PowerConf/Anker sink not found; keeping current default" +fi + +echo "$(ts) 🎤 Setting default microphone → ${SOURCE:-}" +if [[ -n "${SOURCE:-}" ]]; then + pactl set-default-source "$SOURCE" >/dev/null 2>&1 || echo "$(ts) ⚠️ Could not set default source" +else + echo "$(ts) ⚠️ PowerConf/Anker source not found; keeping current default" +fi + +echo +echo "$(ts) 📋 Current PulseAudio defaults:" +pactl info | grep -E "Default Sink|Default Source" || true +echo + +# ============================================================ +# 3) Run python +# ============================================================ +if [[ ! -f "$PY_FILE" ]]; then + echo "$(ts) ❌ Python file not found: $PY_FILE" + exit 1 +fi + +echo "$(ts) 🚀 Starting Gemini Sanad" +echo "$(ts) 🐍 Running: $PY_FILE" +exec python3 "$PY_FILE" +``` diff --git a/test_1.py b/test_1.py new file mode 100644 index 0000000..3cfa80a --- /dev/null +++ b/test_1.py @@ -0,0 +1,366 @@ +import asyncio +import base64 +import json +import pyaudio +import websockets +import os +import array +import time +import traceback +import inspect +import functools + +# ================================================== +# ⚙️ CONFIGURATION +# ================================================== +# توصية: خله في env بدل ما تحطه بالكود: +# export GEMINI_API_KEY="YOUR_KEY" +API_KEY = os.environ.get("GEMINI_API_KEY", "AIzaSyB8B1AkhWJSq4sNr-Pk8KsVfkxTbuV7kyo") + +MODEL = "models/gemini-2.5-flash-native-audio-preview-12-2025" +URI = ( + "wss://generativelanguage.googleapis.com/ws/" + "google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent" + f"?key={API_KEY}" +) + +FORMAT = pyaudio.paInt16 +CHANNELS = 1 +SEND_SAMPLE_RATE = 16000 +RECEIVE_SAMPLE_RATE = 24000 +CHUNK_SIZE = 512 + +VOICE_NAME = "Charon" + + + +# ================================================== +# ✅ Python 3.8 Compatibility (fix TaskGroup + to_thread) +# ================================================== +if hasattr(asyncio, "to_thread"): + to_thread = asyncio.to_thread +else: + async def to_thread(func, *args, **kwargs): + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, functools.partial(func, *args, **kwargs)) + + +# ================================================== +# 🧠 System Prompt Loader +# ================================================== +def load_system_prompt(): + base_dir = os.path.dirname(os.path.abspath(__file__)) + path = os.path.join(base_dir, "go2_script.txt") + print(f"📂 Looking for script at: {path}") + + try: + with open(path, "r", encoding="utf-8-sig") as f: + content = f.read().strip() + print("✅ 'Sanad' persona loaded successfully.") + return content + except FileNotFoundError: + print("⚠️ 'go2_script.txt' not found. Using default Emirati persona.") + return ( + "You are Sanad (Bousandah), a wise and friendly Emirati assistant. " + "Speak strictly in the UAE dialect (Khaleeji). " + "Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'." + ) + +SYSTEM_PROMPT = load_system_prompt() + +# ================================================== +# 🎤 Main Client Class +# ================================================== +class HamadGeminiVoice: + def __init__(self): + self.audio_q = None # ✅ create inside run() to bind to the correct loop + self.speaking = False + self.interrupted = False + self.pya = pyaudio.PyAudio() + + # Smart interruption tuning + self.MIN_THRESHOLD = 3000 + self.barge_in_threshold = 3000 + self.REQUIRED_LOUD_CHUNKS = 5 + + # --- stability knobs --- + self.PREBUFFER_CHUNKS = 4 + self.PLAYBACK_TIMEOUT = 0.35 + self.BARGE_IN_COOLDOWN = 0.7 + self.AI_SPEAK_GRACE = 0.25 + + self._last_ai_audio_time = 0.0 + self._ai_speaking_since = 0.0 + self._barge_in_block_until = 0.0 + + # ✅ Echo-loop protection (reduce self-hearing) + self.ECHO_GUARD_SEC = 0.8 + self._ignore_input_until = 0.0 + self.SEND_SILENCE_WHEN_SPEAKING = True + self.SPEAKING_ENERGY_GATE = 0.85 + self._silence_pcm = b"\x00" * (CHUNK_SIZE * 2) + + def audio_energy(self, pcm): + try: + samples = array.array("h", pcm) + if not samples: + return 0 + return sum(abs(s) for s in samples) // len(samples) + except Exception: + return 0 + + def calibrate_mic(self): + print("\n🤫 Calibrating Microphone... (Please remain silent)") + try: + stream = self.pya.open( + format=FORMAT, + channels=CHANNELS, + rate=SEND_SAMPLE_RATE, + input=True, + frames_per_buffer=CHUNK_SIZE, + ) + values = [] + for _ in range(40): + data = stream.read(CHUNK_SIZE, exception_on_overflow=False) + values.append(self.audio_energy(data)) + + stream.stop_stream() + stream.close() + + avg_noise = sum(values) / len(values) + self.barge_in_threshold = max(self.MIN_THRESHOLD, avg_noise * 3.0) + + print(f"✅ Baseline Noise: {avg_noise:.1f}") + print(f"✅ Interruption Threshold: {self.barge_in_threshold:.1f}") + except Exception as e: + print(f"⚠️ Calibration failed: {e}. Using default threshold.") + + def _ws_connect_kwargs(self): + kwargs = {"max_size": None} + try: + sig = inspect.signature(websockets.connect) + if "extra_headers" in sig.parameters: + kwargs["extra_headers"] = {"Content-Type": "application/json"} + else: + kwargs["additional_headers"] = {"Content-Type": "application/json"} + except Exception: + kwargs["extra_headers"] = {"Content-Type": "application/json"} + return kwargs + + async def run(self): + # ✅ create Queue inside the running loop (Python 3.8 fix) + self.audio_q = asyncio.Queue() + + self.calibrate_mic() + + print(f"\n🚀 Connecting to Gemini ({MODEL})...") + async with websockets.connect(URI, **self._ws_connect_kwargs()) as ws: + + setup_msg = { + "setup": { + "model": MODEL, + "generationConfig": { + "responseModalities": ["AUDIO"], + "speechConfig": { + "voiceConfig": { + "prebuiltVoiceConfig": {"voiceName": VOICE_NAME} + } + }, + }, + "inputAudioTranscription": {}, + "systemInstruction": {"parts": [{"text": SYSTEM_PROMPT}]}, + } + } + await ws.send(json.dumps(setup_msg)) + + await ws.recv() + print("🎙️ Connected! Sanad is listening. (Press Ctrl+C to stop)") + + # ✅ Python 3.8 fix: replace TaskGroup with gather() + tasks = [ + asyncio.create_task(self.capture_mic(ws)), + asyncio.create_task(self.receive_audio(ws)), + asyncio.create_task(self.play_audio()), + ] + try: + await asyncio.gather(*tasks) + finally: + for t in tasks: + t.cancel() + + async def capture_mic(self, ws): + stream = await to_thread( + self.pya.open, + format=FORMAT, + channels=CHANNELS, + rate=SEND_SAMPLE_RATE, + input=True, + frames_per_buffer=CHUNK_SIZE, + ) + + loud_chunks = 0 + + while True: + try: + data = await to_thread( + stream.read, CHUNK_SIZE, exception_on_overflow=False + ) + + energy = self.audio_energy(data) + now = time.time() + + # --- SMART INTERRUPTION LOGIC (barge-in) --- + if self.speaking and (now >= self._barge_in_block_until): + if (now - self._ai_speaking_since) >= self.AI_SPEAK_GRACE: + if energy > self.barge_in_threshold: + loud_chunks += 1 + else: + loud_chunks = 0 + + if loud_chunks > self.REQUIRED_LOUD_CHUNKS: + print(f"🛑 Interruption! (Energy: {energy})") + self.interrupted = True + self.speaking = False + loud_chunks = 0 + self._barge_in_block_until = now + self.BARGE_IN_COOLDOWN + + # Drain playback queue + while not self.audio_q.empty(): + try: + self.audio_q.get_nowait() + except asyncio.QueueEmpty: + break + + # Prevent echo loop: during AI speaking send silence unless user is loud + data_to_send = data + if self.SEND_SILENCE_WHEN_SPEAKING and self.speaking: + gate = self.barge_in_threshold * self.SPEAKING_ENERGY_GATE + if energy < gate: + data_to_send = self._silence_pcm + + b64_audio = base64.b64encode(data_to_send).decode("utf-8") + msg = { + "realtime_input": { + "media_chunks": [ + { + "data": b64_audio, + "mime_type": f"audio/pcm;rate={SEND_SAMPLE_RATE}", + } + ] + } + } + await ws.send(json.dumps(msg)) + + except websockets.exceptions.ConnectionClosed: + print("⚠️ WebSocket closed.") + break + except Exception as e: + print(f"❌ Mic Error: {e}") + break + + async def receive_audio(self, ws): + async for msg in ws: + try: + response = json.loads(msg) + server_content = response.get("serverContent", {}) + + if server_content.get("interrupted"): + self.interrupted = False + + # Print USER transcription (optional), but ignore during AI speaking window to reduce echo prints + input_tr = ( + server_content.get("inputTranscription") + or server_content.get("input_transcription") + or server_content.get("inputAudioTranscription") + or server_content.get("input_audio_transcription") + ) + if isinstance(input_tr, dict): + text = (input_tr.get("text") or "").strip() + if text and (time.time() >= self._ignore_input_until) and (not self.speaking): + print(f"📝 USER SAID: {text}") + + if self.interrupted: + continue + + # AUDIO playback from model + model_turn = server_content.get("modelTurn") + if model_turn: + parts = model_turn.get("parts", []) + for part in parts: + inline_data = part.get("inlineData") + if inline_data: + audio_b64 = inline_data.get("data") + if audio_b64: + now = time.time() + if not self.speaking: + self._ai_speaking_since = now + self.speaking = True + self._last_ai_audio_time = now + + # While AI audio is arriving, ignore mic transcription briefly + self._ignore_input_until = now + self.ECHO_GUARD_SEC + + audio_bytes = base64.b64decode(audio_b64) + await self.audio_q.put(audio_bytes) + + except Exception as e: + print(f"❌ Parse Error: {e}") + + async def play_audio(self): + stream = await to_thread( + self.pya.open, + format=FORMAT, + channels=CHANNELS, + rate=RECEIVE_SAMPLE_RATE, + output=True, + frames_per_buffer=CHUNK_SIZE, + ) + + buffered = False + + while True: + try: + if self.interrupted: + await asyncio.sleep(0.01) + continue + + if self.speaking and not buffered: + while ( + self.audio_q.qsize() < self.PREBUFFER_CHUNKS + and self.speaking + and not self.interrupted + ): + await asyncio.sleep(0.01) + buffered = True + + try: + data = await asyncio.wait_for( + self.audio_q.get(), timeout=self.PLAYBACK_TIMEOUT + ) + except asyncio.TimeoutError: + if self.audio_q.empty() and (time.time() - self._last_ai_audio_time) > 0.25: + self.speaking = False + buffered = False + continue + + if data: + await to_thread(stream.write, data) + + if self.audio_q.empty(): + if (time.time() - self._last_ai_audio_time) > 0.25: + self.speaking = False + buffered = False + + except Exception as e: + print(f"❌ Speaker Error: {e}") + break + + +if __name__ == "__main__": + try: + client = HamadGeminiVoice() + asyncio.run(client.run()) + except KeyboardInterrupt: + print("\n👋 Ma'a Salama (Goodbye)!") + except Exception as e: + print(f"\n❌ Fatal Error: {e}")