diff --git a/apk/app/index.tsx b/apk/app/index.tsx index 37848e9..5c425d7 100644 --- a/apk/app/index.tsx +++ b/apk/app/index.tsx @@ -196,6 +196,7 @@ export default function RecorderScreen() { ? trimmedUrl : `${trimmedUrl}/audio/upload`; if (uploadUrl) { + setIsUploading(true); try { const mimeType = buildMimeType(uri); const extension = buildFileExtension(uri); @@ -232,9 +233,12 @@ export default function RecorderScreen() { strings.uploadFailed, error instanceof Error ? error.message : "", ); + } finally { + setIsUploading(false); } } else { setStatusMessage(strings.noBackendUrl); + setIsUploading(false); } } catch (error) { recordingRef.current = null; diff --git a/backend/.env.example b/backend/.env.example index e285a33..edcd250 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -6,4 +6,8 @@ RASPBERRY_PI_PORT=8000 QUIBOT_TOKEN=MY_SECRET_TOKEN # Backend server config -PORT=3000 +PORT=5000 + +LLAMA_CPP_URL=https://ollama.epsem.aranroig.com/v1/chat/completitions +LLAMA_PREAMBLE=./prompts/preamble.md +LLAMA_API_KEY=your_api_key \ No newline at end of file diff --git a/backend/.gitignore b/backend/.gitignore index aa0926a..80f310c 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -2,3 +2,5 @@ node_modules/ dist/ .env *.log +quibot-audio-*.txt +**/quibot-audio-*.txt diff --git a/backend/prompts/preamble.md b/backend/prompts/preamble.md new file mode 100644 index 0000000..4c74175 --- /dev/null +++ b/backend/prompts/preamble.md @@ -0,0 +1,4 @@ +Ets la QuiBot, un robot femení que ajuda als nens a aprendre sobre quimica. Disposes de dos rodes i dos braços. +Has de ser educada i tenir perspectiva de gènere. + + diff --git a/backend/quibot-audio-1781783002989.txt b/backend/quibot-audio-1781783002989.txt deleted file mode 100644 index 1c79d4a..0000000 --- a/backend/quibot-audio-1781783002989.txt +++ /dev/null @@ -1 +0,0 @@ -Col·la, pítalo, la ola, ola. diff --git a/backend/quibot-audio-1781783032108.txt b/backend/quibot-audio-1781783032108.txt deleted file mode 100644 index 78ddaff..0000000 --- a/backend/quibot-audio-1781783032108.txt +++ /dev/null @@ -1 +0,0 @@ -Hola, què tal, hola, hola, hola, hola... diff --git a/backend/quibot-audio-1781783047628.txt b/backend/quibot-audio-1781783047628.txt deleted file mode 100644 index 6d8a56e..0000000 --- a/backend/quibot-audio-1781783047628.txt +++ /dev/null @@ -1 +0,0 @@ -Hola, que tal, bon dia. diff --git a/backend/src/config.ts b/backend/src/config.ts index 7c71801..7126d50 100644 --- a/backend/src/config.ts +++ b/backend/src/config.ts @@ -1,4 +1,5 @@ import dotenv from 'dotenv'; +import { readFileSync } from 'fs'; dotenv.config(); @@ -6,6 +7,12 @@ let _raspberryHost = process.env.RASPBERRY_PI_HOST ?? 'http://raspberrypi.local' let _raspberryPort = Number(process.env.RASPBERRY_PI_PORT) || 8000; let _token = process.env.QUIBOT_TOKEN ?? 'MY_SECRET_TOKEN'; const APP_PORT = Number(process.env.PORT) || 5000; +const llamacppUrl = process.env.LLAMA_CPP_URL ?? ''; +const llamacppApiKey = process.env.LLAMA_API_KEY ?? ''; +const llamaPreambleRaw = process.env.LLAMA_PREAMBLE ?? ''; +const llamacppPreamble = llamaPreambleRaw.endsWith('.md') + ? readFileSync(llamaPreambleRaw, 'utf-8') + : llamaPreambleRaw; export const getRaspberryHost = () => _raspberryHost; export const getRaspberryPort = () => _raspberryPort; @@ -31,4 +38,8 @@ export const getConfig = () => ({ token: getToken(), }); +export const getLlamacppUrl = () => llamacppUrl; +export const getLlamacppApiKey = () => llamacppApiKey; +export const getLlamacppPreamble = () => llamacppPreamble; + export const getAppPort = () => APP_PORT; diff --git a/backend/src/controllers/audio.controller.ts b/backend/src/controllers/audio.controller.ts index f439d34..d9fd146 100644 --- a/backend/src/controllers/audio.controller.ts +++ b/backend/src/controllers/audio.controller.ts @@ -1,15 +1,14 @@ import { Router } from 'express'; import multer from 'multer'; -import { execFile } from 'child_process'; -import { tmpdir } from 'os'; import { join } from 'path'; +import { tmpdir } from 'os'; +import { rm, writeFile } from 'fs'; import { promisify } from 'util'; -import { writeFile, unlink } from 'fs'; +import { whisperService } from '../services/whisper.service.js'; import { raspiService } from '../services/raspi.service.js'; - -const execFileAsync = promisify(execFile); +import { llamacppService } from '../services/llama.service.js'; +const unlinkAsync = promisify(rm); const writeFileAsync = promisify(writeFile); -const unlinkAsync = promisify(unlink); const router = Router(); @@ -69,11 +68,9 @@ router.post('/process/:filename', async (req, res) => { } }); -const whisperModel = process.env.WHISPER_MODEL ?? 'base'; -const whisperLanguage = process.env.WHISPER_LANGUAGE ?? 'ca'; - router.post('/upload', upload.single('file'), async (req, res) => { let tmpFile: string | undefined; + let tmpTxt: string | undefined; try { if (!req.file) { return res.status(400).json({ error: 'No audio file provided' }); @@ -83,23 +80,24 @@ router.post('/upload', upload.single('file'), async (req, res) => { tmpFile = join(tmpdir(), `quibot-audio-${Date.now()}.${ext}`); await writeFileAsync(tmpFile, req.file.buffer); - console.log(`[whisper] Model: ${whisperModel}, Language: ${whisperLanguage}, File: ${tmpFile}`); + const transcription = await whisperService.transcribe(tmpFile); + console.log(transcription); - const { stdout, stderr } = await execFileAsync('whisper', [ - tmpFile, - '--model', whisperModel, - '--language', whisperLanguage, - '--output_format', 'txt', - ], { maxBuffer: 50 * 1024 * 1024 }); + const txtPath = join(tmpdir(), `quibot-audio-${Date.now()}.txt`); + tmpTxt = txtPath; + await writeFileAsync(txtPath, transcription); - if (stderr) { - console.log(`[whisper] stderr: ${stderr}`); - } - - const transcription = stdout.trim(); + const llmResponse = await llamacppService.chatWithPreamble(transcription).catch( + (err: unknown) => { + const msg = err instanceof Error ? err.message : String(err); + console.error(`[audio] llama.cpp failed: ${msg}`); + return undefined; + }, + ); res.json({ transcription, + llmResponse, originalFilename: req.file.originalname, }); } catch (err: unknown) { @@ -113,6 +111,13 @@ router.post('/upload', upload.single('file'), async (req, res) => { // ignore cleanup errors } } + if (tmpTxt) { + try { + await unlinkAsync(tmpTxt); + } catch { + // ignore cleanup errors + } + } } }); diff --git a/backend/src/index.ts b/backend/src/index.ts index e806080..f83c8a2 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -2,6 +2,7 @@ import express from 'express'; import cors from 'cors'; import router from './routes/router.js'; import { getAppPort, getConfig } from './config.js'; +import { whisperService } from './services/whisper.service.js'; const app = express(); @@ -20,6 +21,18 @@ app.get('/health', (_req, res) => { res.json({ status: 'ok', settings }); }); -app.listen(getAppPort(), () => { +const server = app.listen(getAppPort(), () => { console.log(`QuiBot backend listening on port ${getAppPort()}`); + whisperService.spawn(); }); + +async function shutdown(signal: string) { + console.log(`[server] ${signal} received, shutting down...`); + server.close(async () => { + await whisperService.shutdown(); + process.exit(0); + }); +} + +process.on('SIGINT', () => shutdown('SIGINT')); +process.on('SIGTERM', () => shutdown('SIGTERM')); diff --git a/backend/src/services/llama.service.ts b/backend/src/services/llama.service.ts new file mode 100644 index 0000000..dd3b323 --- /dev/null +++ b/backend/src/services/llama.service.ts @@ -0,0 +1,54 @@ +import { getLlamacppUrl, getLlamacppApiKey, getLlamacppPreamble } from '../config.js'; + +interface LlamaRequest { + messages: Array<{ role: string; content: string }>; +} + +interface LlamaChatChoice { + message: { + content: string; + }; +} + +interface LlamaResponse { + choices?: LlamaChatChoice[]; +} + +export const llamacppService = { + async chat(messages: Array<{ role: string; content: string }>): Promise { + const apiUrl = getLlamacppUrl(); + if (!apiUrl) { + return ''; + } + + const apiKey = getLlamacppApiKey(); + const headers: Record = { 'Content-Type': 'application/json' }; + if (apiKey) { + headers['Authorization'] = `Bearer ${apiKey}`; + } + + const response = await fetch(apiUrl, { + method: 'POST', + headers, + body: JSON.stringify({ messages } satisfies LlamaRequest), + }); + + if (!response.ok) { + const text = await response.text().catch(() => ''); + throw new Error(`llama.cpp request failed (${response.status}): ${text.slice(0, 300)}`); + } + + const data = (await response.json()) as LlamaResponse; + const content = data.choices?.[0]?.message?.content?.trim() ?? ''; + return content; + }, + + async chatWithPreamble(userText: string): Promise { + const preamble = getLlamacppPreamble(); + const messages = preamble ? [ + { role: 'system', content: preamble }, + { role: 'user', content: userText }, + ] : [{ role: 'user', content: userText }]; + return this.chat(messages); + }, +}; diff --git a/backend/src/services/whisper.service.ts b/backend/src/services/whisper.service.ts new file mode 100644 index 0000000..bcecfdc --- /dev/null +++ b/backend/src/services/whisper.service.ts @@ -0,0 +1,218 @@ +import { spawn, ChildProcess } from 'child_process'; +import { join } from 'path'; +import { fileURLToPath } from 'url'; +import { randomUUID } from 'crypto'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = join(__filename, '..'); + +const SCRIPT_DIR = join(__dirname, '..'); + +const PYTHON = join(SCRIPT_DIR, '../.venv/bin/python3'); + +const whisperModel = process.env.WHISPER_MODEL ?? 'base'; +const whisperLanguage = process.env.WHISPER_LANGUAGE ?? 'ca'; + +interface TranscriptResult { + msgId: string; + text?: string; + error?: string; +} + +interface InitResult { + type: 'init_ok' | 'init_error'; +} + +class WhisperService { + private proc: ChildProcess | null = null; + private onInitResolve: (() => void) | null = null; + private onInitReject: ((err: Error) => void) | null = null; + + spawn(): void { + if (this.proc) return; + + const scriptPath = join(SCRIPT_DIR, 'whisper-worker.py'); + + this.proc = spawn(PYTHON, [scriptPath], { + stdio: ['pipe', 'pipe', 'pipe'], + env: { ...process.env }, + }); + + if (!this.proc.stdout || !this.proc.stderr || !this.proc.stdin) { + console.error('[whisper-svc] Missing stdin/stdout/stderr'); + this.proc = null; + return; + } + + const proc = this.proc; + if (!proc?.stdout) return; + let buf = ''; + proc.stdout.on('data', (chunk: Buffer) => { + buf += chunk.toString(); + while (true) { + const nl = buf.indexOf('\n'); + if (nl === -1) break; + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + try { + const msg = JSON.parse(line); + if (msg.type === 'ready') { + console.log('[whisper-svc] Worker ready, sending init...'); + proc.stdin!.write( + JSON.stringify({ type: 'init', model: whisperModel, language: whisperLanguage }) + '\n', + ); + } else if (msg.type === 'init_ok') { + console.log(`[whisper-svc] Model loaded (model=${whisperModel}, lang=${whisperLanguage})`); + if (this.onInitResolve) { + const r = this.onInitResolve; + this.onInitResolve = null; + this.onInitReject = null; + r(); + } + } else if (msg.type === 'init_error') { + const err = new Error(`whisper-svc init failed: ${msg.error || 'unknown'}`); + if (this.onInitReject) { + const r = this.onInitReject; + this.onInitResolve = null; + this.onInitReject = null; + r(err); + } + } else if (msg.type === 'transcript' || msg.type === 'error') { + this.resolveTranscript(msg.msgId, msg); + } + } catch { /* skip */ } + } + }); + + const stderr = proc.stderr; + if (stderr) { + stderr.on('data', (chunk: Buffer) => { + const text = chunk.toString().trim(); + if (text) console.log(`[whisper-svc] stderr: ${text}`); + }); + } + + proc.on('exit', (code, signal) => { + console.log(`[whisper-svc] Exited code=${code} signal=${signal}`); + this.proc = null; + }); + + proc.on('error', (err) => { + console.error(`[whisper-svc] Error: ${err.message}`); + this.proc = null; + }); + } + + private pending: Map void> = new Map(); + + private resolveTranscript(msgId: string, msg: { type?: string; text?: string; error?: string }) { + const cb = this.pending.get(msgId); + this.pending.delete(msgId); + if (cb) { + if (msg.type === 'error') { + cb({ + msgId, + text: msg.text, + error: msg.error ?? msg.text ?? 'unknown error', + }); + } else { + cb({ msgId, text: msg.text }); + } + } + } + + private waitForInit(): Promise { + if (this.onInitResolve) return Promise.resolve(); // already initializing + + return new Promise((resolve, reject) => { + let cleared = false; + const timer = setTimeout(() => { + if (cleared) return; + cleared = true; + this.onInitReject = null; + reject(new Error('whisper-svc init timed out')); + }, 90_000); + this.onInitResolve = () => { + if (cleared) return; + cleared = true; + clearTimeout(timer); + resolve(); + }; + this.onInitReject = (err: Error) => { + if (cleared) return; + cleared = true; + clearTimeout(timer); + reject(err); + }; + }); + } + + async transcribe(audioPath: string): Promise { + if (!this.proc) { + this.spawn(); + } + + await this.waitForInit(); + + const msgId = randomUUID() + '-' + Date.now(); + + return new Promise((resolve, reject) => { + let cleared = false; + let timer: ReturnType | null = null; + + const resolvePromise = (result: TranscriptResult) => { + if (cleared) return; + cleared = true; + if (timer) clearTimeout(timer); + if (result.error) { + reject(new Error(`whisper-svc: ${result.error}`)); + } else if (result.text) { + resolve(result.text.trim()); + } else { + reject(new Error('whisper-svc: empty response')); + } + }; + + this.pending.set(msgId, resolvePromise); + + timer = setTimeout(() => { + if (cleared) return; + cleared = true; + this.pending.delete(msgId); + reject(new Error('whisper-svc: transcription timed out')); + }, 120_000); + + const proc = this.proc; + if (proc && proc.stdin) { + proc.stdin.write( + JSON.stringify({ type: 'transcribe', path: audioPath, msgId }) + '\n', + ); + } else { + cleared = true; + if (timer) clearTimeout(timer); + this.pending.delete(msgId); + reject(new Error('whisper subprocess not running')); + } + }); + } + + async shutdown(): Promise { + const proc = this.proc; + if (proc) { + try { + proc.stdin!.end(); + await new Promise((resolve) => { + proc.on('exit', () => resolve()); + setTimeout(() => { + if (!proc.killed) proc.kill('SIGTERM'); + resolve(); + }, 3000); + }); + } catch { /* ignore */ } + this.proc = null; + } + } +} + +export const whisperService = new WhisperService(); diff --git a/backend/src/whisper-worker.py b/backend/src/whisper-worker.py new file mode 100644 index 0000000..f265ca9 --- /dev/null +++ b/backend/src/whisper-worker.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +"""Persistent Whisper transcription worker – single subprocess, model loaded once.""" + +import sys +import json + + +def main(): + from faster_whisper import WhisperModel + + model_path = "base" + language = "ca" + model = None + + # Signal node that the process is alive and listening + print(json.dumps({"type": "ready"}), flush=True) + + for line in sys.stdin: + line = line.strip() + if not line: + continue + + try: + msg = json.loads(line) + except json.JSONDecodeError: + continue + + if msg.get("type") == "init": + model_path = msg.get("model", "base") + language = msg.get("language", "ca") or "ca" + print(f"[whisper-worker] Loading model='{model_path}' language='{language}'", file=sys.stderr, flush=True) + model = WhisperModel(model_path, device="cpu", compute_type="int8") + print(json.dumps({"type": "init_ok"}), flush=True) + continue + + if msg.get("type") == "transcribe": + audio_path = msg.get("path") + msg_id = msg.get("msgId", "") + if not audio_path: + print(json.dumps({"type": "error", "text": "no path provided", "msgId": msg_id}), flush=True) + continue + + try: + segments, info = model.transcribe(audio_path, language=language or None) + transcript = "" + for seg in segments: + transcript += seg.text + " " + result_text = transcript.strip() + print(json.dumps({"type": "transcript", "text": result_text, "msgId": msg_id}), flush=True) + except Exception as exc: + print(json.dumps({"type": "error", "text": str(exc), "msgId": msg_id}), flush=True) + + +if __name__ == "__main__": + main()