LLM connection

2026-06-18 21:16:28 +02:00
parent 9a23863320
commit 5086743a11
13 changed files with 393 additions and 26 deletions
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -6,4 +6,8 @@ RASPBERRY_PI_PORT=8000
 QUIBOT_TOKEN=MY_SECRET_TOKEN

 # Backend server config
-PORT=3000
+PORT=5000
+
+LLAMA_CPP_URL=https://ollama.epsem.aranroig.com/v1/chat/completitions
+LLAMA_PREAMBLE=./prompts/preamble.md
+LLAMA_API_KEY=your_api_key
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -2,3 +2,5 @@ node_modules/
 dist/
 .env
 *.log
+quibot-audio-*.txt
+**/quibot-audio-*.txt
--- a/backend/prompts/preamble.md
+++ b/backend/prompts/preamble.md
@@ -0,0 +1,4 @@
+Ets la QuiBot, un robot femení que ajuda als nens a aprendre sobre quimica. Disposes de dos rodes i dos braços.
+Has de ser educada i tenir perspectiva de gènere.
+
+
--- a/backend/quibot-audio-1781783002989.txt
+++ b/backend/quibot-audio-1781783002989.txt
@@ -1 +0,0 @@
-Col·la, pítalo, la ola, ola.
--- a/backend/quibot-audio-1781783032108.txt
+++ b/backend/quibot-audio-1781783032108.txt
@@ -1 +0,0 @@
-Hola, què tal, hola, hola, hola, hola...
--- a/backend/quibot-audio-1781783047628.txt
+++ b/backend/quibot-audio-1781783047628.txt
@@ -1 +0,0 @@
-Hola, que tal, bon dia.
--- a/backend/src/config.ts
+++ b/backend/src/config.ts
@@ -1,4 +1,5 @@
 import dotenv from 'dotenv';
+import { readFileSync } from 'fs';

 dotenv.config();

@@ -6,6 +7,12 @@ let _raspberryHost = process.env.RASPBERRY_PI_HOST ?? 'http://raspberrypi.local'
 let _raspberryPort = Number(process.env.RASPBERRY_PI_PORT) || 8000;
 let _token = process.env.QUIBOT_TOKEN ?? 'MY_SECRET_TOKEN';
 const APP_PORT = Number(process.env.PORT) || 5000;
+const llamacppUrl = process.env.LLAMA_CPP_URL ?? '';
+const llamacppApiKey = process.env.LLAMA_API_KEY ?? '';
+const llamaPreambleRaw = process.env.LLAMA_PREAMBLE ?? '';
+const llamacppPreamble = llamaPreambleRaw.endsWith('.md')
+  ? readFileSync(llamaPreambleRaw, 'utf-8')
+  : llamaPreambleRaw;

 export const getRaspberryHost = () => _raspberryHost;
 export const getRaspberryPort = () => _raspberryPort;
@@ -31,4 +38,8 @@ export const getConfig = () => ({
  token: getToken(),
 });

+export const getLlamacppUrl = () => llamacppUrl;
+export const getLlamacppApiKey = () => llamacppApiKey;
+export const getLlamacppPreamble = () => llamacppPreamble;
+
 export const getAppPort = () => APP_PORT;
--- a/backend/src/controllers/audio.controller.ts
+++ b/backend/src/controllers/audio.controller.ts
@@ -1,15 +1,14 @@
 import { Router } from 'express';
 import multer from 'multer';
-import { execFile } from 'child_process';
-import { tmpdir } from 'os';
 import { join } from 'path';
+import { tmpdir } from 'os';
+import { rm, writeFile } from 'fs';
 import { promisify } from 'util';
-import { writeFile, unlink } from 'fs';
+import { whisperService } from '../services/whisper.service.js';
 import { raspiService } from '../services/raspi.service.js';
-
-const execFileAsync = promisify(execFile);
+import { llamacppService } from '../services/llama.service.js';
+const unlinkAsync = promisify(rm);
 const writeFileAsync = promisify(writeFile);
-const unlinkAsync = promisify(unlink);

 const router = Router();

@@ -69,11 +68,9 @@ router.post('/process/:filename', async (req, res) => {
  }
 });

-const whisperModel = process.env.WHISPER_MODEL ?? 'base';
-const whisperLanguage = process.env.WHISPER_LANGUAGE ?? 'ca';
-
 router.post('/upload', upload.single('file'), async (req, res) => {
  let tmpFile: string | undefined;
+  let tmpTxt: string | undefined;
  try {
    if (!req.file) {
      return res.status(400).json({ error: 'No audio file provided' });
@@ -83,23 +80,24 @@ router.post('/upload', upload.single('file'), async (req, res) => {
    tmpFile = join(tmpdir(), `quibot-audio-${Date.now()}.${ext}`);
    await writeFileAsync(tmpFile, req.file.buffer);

-    console.log(`[whisper] Model: ${whisperModel}, Language: ${whisperLanguage}, File: ${tmpFile}`);
+    const transcription = await whisperService.transcribe(tmpFile);
+    console.log(transcription);

-    const { stdout, stderr } = await execFileAsync('whisper', [
-      tmpFile,
-      '--model', whisperModel,
-      '--language', whisperLanguage,
-      '--output_format', 'txt',
-    ], { maxBuffer: 50 * 1024 * 1024 });
+    const txtPath = join(tmpdir(), `quibot-audio-${Date.now()}.txt`);
+    tmpTxt = txtPath;
+    await writeFileAsync(txtPath, transcription);

-    if (stderr) {
-      console.log(`[whisper] stderr: ${stderr}`);
-    }
-
-    const transcription = stdout.trim();
+    const llmResponse = await llamacppService.chatWithPreamble(transcription).catch(
+      (err: unknown) => {
+        const msg = err instanceof Error ? err.message : String(err);
+        console.error(`[audio] llama.cpp failed: ${msg}`);
+        return undefined;
+      },
+    );

    res.json({
      transcription,
+      llmResponse,
      originalFilename: req.file.originalname,
    });
  } catch (err: unknown) {
@@ -113,6 +111,13 @@ router.post('/upload', upload.single('file'), async (req, res) => {
        // ignore cleanup errors
      }
    }
+    if (tmpTxt) {
+      try {
+        await unlinkAsync(tmpTxt);
+      } catch {
+        // ignore cleanup errors
+      }
+    }
  }
 });

--- a/backend/src/index.ts
+++ b/backend/src/index.ts
@@ -2,6 +2,7 @@ import express from 'express';
 import cors from 'cors';
 import router from './routes/router.js';
 import { getAppPort, getConfig } from './config.js';
+import { whisperService } from './services/whisper.service.js';

 const app = express();

@@ -20,6 +21,18 @@ app.get('/health', (_req, res) => {
  res.json({ status: 'ok', settings });
 });

-app.listen(getAppPort(), () => {
+const server = app.listen(getAppPort(), () => {
  console.log(`QuiBot backend listening on port ${getAppPort()}`);
+  whisperService.spawn();
 });
+
+async function shutdown(signal: string) {
+  console.log(`[server] ${signal} received, shutting down...`);
+  server.close(async () => {
+    await whisperService.shutdown();
+    process.exit(0);
+  });
+}
+
+process.on('SIGINT', () => shutdown('SIGINT'));
+process.on('SIGTERM', () => shutdown('SIGTERM'));
--- a/backend/src/services/llama.service.ts
+++ b/backend/src/services/llama.service.ts
@@ -0,0 +1,54 @@
+import { getLlamacppUrl, getLlamacppApiKey, getLlamacppPreamble } from '../config.js';
+
+interface LlamaRequest {
+  messages: Array<{ role: string; content: string }>;
+}
+
+interface LlamaChatChoice {
+  message: {
+    content: string;
+  };
+}
+
+interface LlamaResponse {
+  choices?: LlamaChatChoice[];
+}
+
+export const llamacppService = {
+  async chat(messages: Array<{ role: string; content: string }>): Promise<string> {
+    const apiUrl = getLlamacppUrl();
+    if (!apiUrl) {
+      return '';
+    }
+
+    const apiKey = getLlamacppApiKey();
+    const headers: Record<string, string> = { 'Content-Type': 'application/json' };
+    if (apiKey) {
+      headers['Authorization'] = `Bearer ${apiKey}`;
+    }
+
+    const response = await fetch(apiUrl, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify({ messages } satisfies LlamaRequest),
+    });
+
+    if (!response.ok) {
+      const text = await response.text().catch(() => '');
+      throw new Error(`llama.cpp request failed (${response.status}): ${text.slice(0, 300)}`);
+    }
+
+    const data = (await response.json()) as LlamaResponse;
+    const content = data.choices?.[0]?.message?.content?.trim() ?? '';
+    return content;
+  },
+
+  async chatWithPreamble(userText: string): Promise<string> {
+    const preamble = getLlamacppPreamble();
+    const messages = preamble ? [
+      { role: 'system', content: preamble },
+      { role: 'user', content: userText },
+    ] : [{ role: 'user', content: userText }];
+    return this.chat(messages);
+  },
+};
--- a/backend/src/services/whisper.service.ts
+++ b/backend/src/services/whisper.service.ts
@@ -0,0 +1,218 @@
+import { spawn, ChildProcess } from 'child_process';
+import { join } from 'path';
+import { fileURLToPath } from 'url';
+import { randomUUID } from 'crypto';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = join(__filename, '..');
+
+const SCRIPT_DIR = join(__dirname, '..');
+
+const PYTHON = join(SCRIPT_DIR, '../.venv/bin/python3');
+
+const whisperModel = process.env.WHISPER_MODEL ?? 'base';
+const whisperLanguage = process.env.WHISPER_LANGUAGE ?? 'ca';
+
+interface TranscriptResult {
+  msgId: string;
+  text?: string;
+  error?: string;
+}
+
+interface InitResult {
+  type: 'init_ok' | 'init_error';
+}
+
+class WhisperService {
+  private proc: ChildProcess | null = null;
+  private onInitResolve: (() => void) | null = null;
+  private onInitReject: ((err: Error) => void) | null = null;
+
+  spawn(): void {
+    if (this.proc) return;
+
+    const scriptPath = join(SCRIPT_DIR, 'whisper-worker.py');
+
+    this.proc = spawn(PYTHON, [scriptPath], {
+      stdio: ['pipe', 'pipe', 'pipe'],
+      env: { ...process.env },
+    });
+
+    if (!this.proc.stdout || !this.proc.stderr || !this.proc.stdin) {
+      console.error('[whisper-svc] Missing stdin/stdout/stderr');
+      this.proc = null;
+      return;
+    }
+
+    const proc = this.proc;
+    if (!proc?.stdout) return;
+    let buf = '';
+    proc.stdout.on('data', (chunk: Buffer) => {
+      buf += chunk.toString();
+      while (true) {
+        const nl = buf.indexOf('\n');
+        if (nl === -1) break;
+        const line = buf.slice(0, nl).trim();
+        buf = buf.slice(nl + 1);
+        if (!line) continue;
+        try {
+          const msg = JSON.parse(line);
+          if (msg.type === 'ready') {
+            console.log('[whisper-svc] Worker ready, sending init...');
+            proc.stdin!.write(
+              JSON.stringify({ type: 'init', model: whisperModel, language: whisperLanguage }) + '\n',
+            );
+          } else if (msg.type === 'init_ok') {
+            console.log(`[whisper-svc] Model loaded (model=${whisperModel}, lang=${whisperLanguage})`);
+            if (this.onInitResolve) {
+              const r = this.onInitResolve;
+              this.onInitResolve = null;
+              this.onInitReject = null;
+              r();
+            }
+          } else if (msg.type === 'init_error') {
+            const err = new Error(`whisper-svc init failed: ${msg.error || 'unknown'}`);
+            if (this.onInitReject) {
+              const r = this.onInitReject;
+              this.onInitResolve = null;
+              this.onInitReject = null;
+              r(err);
+            }
+          } else if (msg.type === 'transcript' || msg.type === 'error') {
+            this.resolveTranscript(msg.msgId, msg);
+          }
+        } catch { /* skip */ }
+      }
+    });
+
+    const stderr = proc.stderr;
+    if (stderr) {
+      stderr.on('data', (chunk: Buffer) => {
+        const text = chunk.toString().trim();
+        if (text) console.log(`[whisper-svc] stderr: ${text}`);
+      });
+    }
+
+    proc.on('exit', (code, signal) => {
+      console.log(`[whisper-svc] Exited code=${code} signal=${signal}`);
+      this.proc = null;
+    });
+
+    proc.on('error', (err) => {
+      console.error(`[whisper-svc] Error: ${err.message}`);
+      this.proc = null;
+    });
+  }
+
+  private pending: Map<string, (result: TranscriptResult) => void> = new Map();
+
+  private resolveTranscript(msgId: string, msg: { type?: string; text?: string; error?: string }) {
+    const cb = this.pending.get(msgId);
+    this.pending.delete(msgId);
+    if (cb) {
+      if (msg.type === 'error') {
+        cb({
+          msgId,
+          text: msg.text,
+          error: msg.error ?? msg.text ?? 'unknown error',
+        });
+      } else {
+        cb({ msgId, text: msg.text });
+      }
+    }
+  }
+
+  private waitForInit(): Promise<void> {
+    if (this.onInitResolve) return Promise.resolve(); // already initializing
+
+    return new Promise<void>((resolve, reject) => {
+      let cleared = false;
+      const timer = setTimeout(() => {
+        if (cleared) return;
+        cleared = true;
+        this.onInitReject = null;
+        reject(new Error('whisper-svc init timed out'));
+      }, 90_000);
+      this.onInitResolve = () => {
+        if (cleared) return;
+        cleared = true;
+        clearTimeout(timer);
+        resolve();
+      };
+      this.onInitReject = (err: Error) => {
+        if (cleared) return;
+        cleared = true;
+        clearTimeout(timer);
+        reject(err);
+      };
+    });
+  }
+
+  async transcribe(audioPath: string): Promise<string> {
+    if (!this.proc) {
+      this.spawn();
+    }
+
+    await this.waitForInit();
+
+    const msgId = randomUUID() + '-' + Date.now();
+
+    return new Promise((resolve, reject) => {
+      let cleared = false;
+      let timer: ReturnType<typeof setTimeout> | null = null;
+
+      const resolvePromise = (result: TranscriptResult) => {
+        if (cleared) return;
+        cleared = true;
+        if (timer) clearTimeout(timer);
+        if (result.error) {
+          reject(new Error(`whisper-svc: ${result.error}`));
+        } else if (result.text) {
+          resolve(result.text.trim());
+        } else {
+          reject(new Error('whisper-svc: empty response'));
+        }
+      };
+
+      this.pending.set(msgId, resolvePromise);
+
+      timer = setTimeout(() => {
+        if (cleared) return;
+        cleared = true;
+        this.pending.delete(msgId);
+        reject(new Error('whisper-svc: transcription timed out'));
+      }, 120_000);
+
+      const proc = this.proc;
+      if (proc && proc.stdin) {
+        proc.stdin.write(
+          JSON.stringify({ type: 'transcribe', path: audioPath, msgId }) + '\n',
+        );
+      } else {
+        cleared = true;
+        if (timer) clearTimeout(timer);
+        this.pending.delete(msgId);
+        reject(new Error('whisper subprocess not running'));
+      }
+    });
+  }
+
+  async shutdown(): Promise<void> {
+    const proc = this.proc;
+    if (proc) {
+      try {
+        proc.stdin!.end();
+        await new Promise<void>((resolve) => {
+          proc.on('exit', () => resolve());
+          setTimeout(() => {
+            if (!proc.killed) proc.kill('SIGTERM');
+            resolve();
+          }, 3000);
+        });
+      } catch { /* ignore */ }
+      this.proc = null;
+    }
+  }
+}
+
+export const whisperService = new WhisperService();
--- a/backend/src/whisper-worker.py
+++ b/backend/src/whisper-worker.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+"""Persistent Whisper transcription worker – single subprocess, model loaded once."""
+
+import sys
+import json
+
+
+def main():
+    from faster_whisper import WhisperModel
+
+    model_path = "base"
+    language = "ca"
+    model = None
+
+    # Signal node that the process is alive and listening
+    print(json.dumps({"type": "ready"}), flush=True)
+
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+
+        try:
+            msg = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+
+        if msg.get("type") == "init":
+            model_path = msg.get("model", "base")
+            language = msg.get("language", "ca") or "ca"
+            print(f"[whisper-worker] Loading model='{model_path}' language='{language}'", file=sys.stderr, flush=True)
+            model = WhisperModel(model_path, device="cpu", compute_type="int8")
+            print(json.dumps({"type": "init_ok"}), flush=True)
+            continue
+
+        if msg.get("type") == "transcribe":
+            audio_path = msg.get("path")
+            msg_id = msg.get("msgId", "")
+            if not audio_path:
+                print(json.dumps({"type": "error", "text": "no path provided", "msgId": msg_id}), flush=True)
+                continue
+
+            try:
+                segments, info = model.transcribe(audio_path, language=language or None)
+                transcript = ""
+                for seg in segments:
+                    transcript += seg.text + " "
+                result_text = transcript.strip()
+                print(json.dumps({"type": "transcript", "text": result_text, "msgId": msg_id}), flush=True)
+            except Exception as exc:
+                print(json.dumps({"type": "error", "text": str(exc), "msgId": msg_id}), flush=True)
+
+
+if __name__ == "__main__":
+    main()