si

2026-06-18 21:46:44 +02:00
parent 5086743a11
commit 76a56d1a42
5 changed files with 1834 additions and 1305 deletions
--- a/apk/app/index.tsx
+++ b/apk/app/index.tsx
@@ -1,4 +1,5 @@
 import { Audio, InterruptionModeAndroid, InterruptionModeIOS } from "expo-av";
+import * as Speech from "expo-speech";
 import { router, useFocusEffect } from "expo-router";
 import { useCallback, useEffect, useRef, useState } from "react";
 import Svg, { Path } from "react-native-svg";
@@ -58,6 +59,8 @@ export default function RecorderScreen() {
  const [recordingMs, setRecordingMs] = useState(0);
  const [statusMessage, setStatusMessage] = useState("");
  const [responsePreview, setResponsePreview] = useState("");
+  const [llmResponseText, setLlmResponseText] = useState("");
+  const [transcriptionText, setTranscriptionText] = useState("");
  const [isUploading, setIsUploading] = useState(false);
  const [isHolding, setIsHolding] = useState(false);
  const recordingRef = useRef<Audio.Recording | null>(null);
@@ -120,9 +123,60 @@ export default function RecorderScreen() {
    };
  }, [recording]);

+  async function speak(text: string) {
+    if (!text || !text.trim()) {
+      console.log("[TTS] Skipping empty text");
+      return;
+    }
+
+    console.log("[TTS] ===== START speak =====");
+    Speech.stop();
+    await new Promise((r) => setTimeout(r, 100));
+
+    try {
+      await Audio.setAudioModeAsync({
+        allowsRecordingIOS: false,
+        playsInSilentModeIOS: true,
+        interruptionModeAndroid: InterruptionModeAndroid.DoNotMix,
+        interruptionModeIOS: InterruptionModeIOS.DoNotMix,
+        shouldDuckAndroid: true,
+        staysActiveInBackground: false,
+      });
+      console.log("[TTS] Audio mode reset OK");
+    } catch (err) {
+      console.log("[TTS] Audio mode error:", err);
+    }
+
+    const lang = locale === "ca" ? "ca-ES" : "en-US";
+    await new Promise((r) => setTimeout(r, 800));
+
+    console.log("[TTS] Calling Speech.speak. Text length:", text.length, "Lang:", lang);
+    try {
+      Speech.speak(text, {
+        language: lang,
+        onDone: () => console.log("[TTS] ✅ onDone fired"),
+        onError: (error) => console.log("[TTS] ❌ onError:", error),
+      });
+      console.log("[TTS] Speech.speak() call returned OK");
+    } catch (err) {
+      console.log("[TTS] Speech.speak() threw:", err);
+    }
+  }
+
+  async function speakSequentially(texts: string[]) {
+    if (texts.length === 0) return;
+    for (let i = 0; i < texts.length; i++) {
+      await speak(texts[i]);
+      await new Promise((r) => setTimeout(r, 1500));
+    }
+  }
+
  async function startRecording() {
    try {
+      Speech.stop();
+      setTranscriptionText("");
      setResponsePreview("");
+      setLlmResponseText("");
      setRecordingUri(null);

      const permission = await Audio.requestPermissionsAsync();
@@ -167,6 +221,8 @@ export default function RecorderScreen() {
      return;
    }

+    console.log("[APP] stopRecordingAndUpload called");
+
    try {
      const activeRecording = recordingRef.current;
      const currentStatus = await activeRecording.getStatusAsync();
@@ -220,12 +276,40 @@ export default function RecorderScreen() {
          });

          const responseText = await response.text();
-          setResponsePreview(responseText.slice(0, 400));

          if (!response.ok) {
            throw new Error(`${response.status}. ${responseText}`);
          }

+          try {
+            const data = JSON.parse(responseText);
+            setResponsePreview(responseText.slice(0, 400));
+
+            const textsToSpeak: string[] = [];
+
+            if (data.transcription) {
+              setTranscriptionText(data.transcription);
+              textsToSpeak.push(data.transcription);
+            }
+
+            if (data.llmResponse) {
+              setLlmResponseText(data.llmResponse);
+              textsToSpeak.push(data.llmResponse);
+            }
+
+            if (textsToSpeak.length > 0) {
+              setStatusMessage(strings.voiceMessageSent + ". " + strings.playing);
+              void speakSequentially(textsToSpeak);
+            } else {
+              setLlmResponseText("");
+            }
+          } catch (parseError) {
+            console.log("[APP] JSON parse failed:", parseError, "Response was:", responseText.substring(0, 200));
+            setResponsePreview(responseText.slice(0, 400));
+            setTranscriptionText("");
+            setLlmResponseText("");
+          }
+
          setStatusMessage(strings.voiceMessageSent);
        } catch (error) {
          setStatusMessage(strings.uploadFailed);
@@ -283,7 +367,10 @@ export default function RecorderScreen() {
    try {
      setIsUploading(true);
      setStatusMessage(strings.uploadingRecording);
+      Speech.stop();
+      setTranscriptionText("");
      setResponsePreview("");
+      setLlmResponseText("");

      const mimeType = buildMimeType(targetUri);
      const extension = buildFileExtension(targetUri);
@@ -308,12 +395,39 @@ export default function RecorderScreen() {
      });

      const responseText = await response.text();
-      setResponsePreview(responseText.slice(0, 400));

      if (!response.ok) {
        throw new Error(`${response.status}. ${responseText}`);
      }

+      try {
+        const data = JSON.parse(responseText);
+        setResponsePreview(responseText.slice(0, 400));
+
+        const textsToSpeak: string[] = [];
+
+        if (data.transcription) {
+          setTranscriptionText(data.transcription);
+          textsToSpeak.push(data.transcription);
+        }
+
+        if (data.llmResponse) {
+          setLlmResponseText(data.llmResponse);
+          textsToSpeak.push(data.llmResponse);
+        }
+
+        if (textsToSpeak.length > 0) {
+          setStatusMessage(strings.voiceMessageSent + ". " + strings.playing);
+          void speakSequentially(textsToSpeak);
+        } else {
+          setLlmResponseText("");
+        }
+      } catch {
+        setResponsePreview(responseText.slice(0, 400));
+        setTranscriptionText("");
+        setLlmResponseText("");
+      }
+
      setStatusMessage(strings.uploadComplete);
    } catch (error) {
      setStatusMessage(strings.uploadFailed);
@@ -326,6 +440,11 @@ export default function RecorderScreen() {
    }
  }

+  function handleSpeak() {
+    const texts = [transcriptionText, llmResponseText].filter(Boolean);
+    void speakSequentially(texts);
+  }
+
  const releaseLabel = t("releaseToStop", locale);
  const holdLabel = t("holdToRecord", locale);
  const openSettingsLabel = t("openSettingsHint", locale);
@@ -419,7 +538,30 @@ export default function RecorderScreen() {
                  : openSettingsLabel}
            </Text>

-            {responsePreview ? (
+            {transcriptionText ? (
+              <View style={styles.transcriptionBox}>
+                <Text style={styles.transcriptionLabel}>{strings.yourMessage}</Text>
+                <Text style={styles.transcriptionText}>{transcriptionText}</Text>
+              </View>
+            ) : null}
+
+            {llmResponseText ? (
+              <View style={styles.llmResponseBox}>
+                <View style={styles.llmResponseHeader}>
+                  <Text style={styles.llmResponseLabel}>{strings.aiReply}</Text>
+                  <Pressable onPress={handleSpeak} style={styles.speakButton}>
+                    <Svg width="20" height="20" viewBox="0 0 24 24" fill="none">
+                      <Path d="M11 5L6 9H2v6h4l5 4V5z" fill="#13304a" />
+                      <Path d="M15.5 8.5a5.5 5.5 0 0 1 0 7" stroke="#13304a" strokeWidth="2" strokeLinecap="round" />
+                      <Path d="M18.5 5.5a9 9 0 0 1 0 13" stroke="#13304a" strokeWidth="2" strokeLinecap="round" />
+                    </Svg>
+                  </Pressable>
+                </View>
+                <Text style={styles.llmResponseText}>{llmResponseText}</Text>
+              </View>
+            ) : null}
+
+            {responsePreview && !transcriptionText && !llmResponseText ? (
              <View style={styles.responseBox}>
                <Text style={styles.responseLabel}>{serverResponseLabel}</Text>
                <Text style={styles.responseText}>{responsePreview}</Text>
@@ -557,4 +699,58 @@ const styles = StyleSheet.create({
    fontSize: 14,
    lineHeight: 20,
  },
+  llmResponseBox: {
+    backgroundColor: "#e8f4e8",
+    borderRadius: 16,
+    gap: 6,
+    marginTop: 4,
+    padding: 14,
+    borderWidth: 1,
+    borderColor: "#b8d9b8",
+  },
+  llmResponseHeader: {
+    flexDirection: "row",
+    alignItems: "center",
+    justifyContent: "space-between",
+  },
+  llmResponseLabel: {
+    color: "#2a6a2a",
+    fontSize: 13,
+    fontWeight: "700",
+    textTransform: "uppercase",
+    textAlign: "center",
+  },
+  llmResponseText: {
+    color: "#2d4a2d",
+    fontSize: 16,
+    lineHeight: 24,
+  },
+  speakButton: {
+    backgroundColor: "#f7f0e0",
+    borderRadius: 20,
+    padding: 6,
+    borderWidth: 1,
+    borderColor: "#dccfb9",
+  },
+  transcriptionBox: {
+    backgroundColor: "#e8ecf4",
+    borderRadius: 16,
+    gap: 6,
+    marginTop: 4,
+    padding: 14,
+    borderWidth: 1,
+    borderColor: "#b8c9d9",
+  },
+  transcriptionLabel: {
+    color: "#1a4a6a",
+    fontSize: 13,
+    fontWeight: "700",
+    textTransform: "uppercase",
+    textAlign: "center",
+  },
+  transcriptionText: {
+    color: "#1f3a52",
+    fontSize: 16,
+    lineHeight: 24,
+  },
 });