Files
quibot/apk/app/index.tsx
2026-06-18 21:46:44 +02:00

757 lines
21 KiB
TypeScript

import { Audio, InterruptionModeAndroid, InterruptionModeIOS } from "expo-av";
import * as Speech from "expo-speech";
import { router, useFocusEffect } from "expo-router";
import { useCallback, useEffect, useRef, useState } from "react";
import Svg, { Path } from "react-native-svg";
import {
ActivityIndicator,
Alert,
KeyboardAvoidingView,
Platform,
Pressable,
ScrollView,
StyleSheet,
Text,
View,
} from "react-native";
import { loadRecorderSettings } from "@/lib/recorder-settings";
import { getStrings, type Locale, t } from "@/lib/translations";
function formatDuration(durationMs: number) {
const totalSeconds = Math.floor(durationMs / 1000);
const minutes = Math.floor(totalSeconds / 60);
const seconds = totalSeconds % 60;
return `${minutes.toString().padStart(2, "0")}:${seconds
.toString()
.padStart(2, "0")}`;
}
function buildMimeType(uri: string) {
const extension = uri.split(".").pop()?.split("?")[0]?.toLowerCase();
switch (extension) {
case "wav":
return "audio/wav";
case "caf":
return "audio/x-caf";
case "webm":
return "audio/webm";
case "mp3":
return "audio/mpeg";
default:
return "audio/m4a";
}
}
function buildFileExtension(uri: string) {
return uri.split(".").pop()?.split("?")[0]?.toLowerCase() || "m4a";
}
export default function RecorderScreen() {
const [backendUrl, setBackendUrl] = useState("");
const [authToken, setAuthToken] = useState("");
const [fieldName, setFieldName] = useState("file");
const [locale, setLocale] = useState<Locale>("ca");
const [strings, setStrings] = useState(() => getStrings("ca"));
const [recording, setRecording] = useState<Audio.Recording | null>(null);
const [recordingUri, setRecordingUri] = useState<string | null>(null);
const [recordingMs, setRecordingMs] = useState(0);
const [statusMessage, setStatusMessage] = useState("");
const [responsePreview, setResponsePreview] = useState("");
const [llmResponseText, setLlmResponseText] = useState("");
const [transcriptionText, setTranscriptionText] = useState("");
const [isUploading, setIsUploading] = useState(false);
const [isHolding, setIsHolding] = useState(false);
const recordingRef = useRef<Audio.Recording | null>(null);
const refreshSettings = useCallback(() => {
let isMounted = true;
async function loadStoredValues() {
try {
const settings = await loadRecorderSettings();
if (!isMounted) {
return;
}
setBackendUrl(settings.backendUrl);
setAuthToken(settings.authToken);
setFieldName(settings.fieldName);
setLocale(settings.language);
setStrings(getStrings(settings.language));
} catch {
if (isMounted) {
setStatusMessage(strings.loadError);
}
}
}
void loadStoredValues();
return () => {
isMounted = false;
};
}, []);
useFocusEffect(refreshSettings);
useEffect(() => {
if (!recording) {
return;
}
const interval = setInterval(() => {
void recording.getStatusAsync().then((status) => {
if (typeof status.durationMillis === "number") {
setRecordingMs(status.durationMillis ?? 0);
}
});
}, 250);
return () => {
clearInterval(interval);
};
}, [recording]);
useEffect(() => {
return () => {
if (recording) {
void recording.stopAndUnloadAsync().catch(() => undefined);
}
};
}, [recording]);
async function speak(text: string) {
if (!text || !text.trim()) {
console.log("[TTS] Skipping empty text");
return;
}
console.log("[TTS] ===== START speak =====");
Speech.stop();
await new Promise((r) => setTimeout(r, 100));
try {
await Audio.setAudioModeAsync({
allowsRecordingIOS: false,
playsInSilentModeIOS: true,
interruptionModeAndroid: InterruptionModeAndroid.DoNotMix,
interruptionModeIOS: InterruptionModeIOS.DoNotMix,
shouldDuckAndroid: true,
staysActiveInBackground: false,
});
console.log("[TTS] Audio mode reset OK");
} catch (err) {
console.log("[TTS] Audio mode error:", err);
}
const lang = locale === "ca" ? "ca-ES" : "en-US";
await new Promise((r) => setTimeout(r, 800));
console.log("[TTS] Calling Speech.speak. Text length:", text.length, "Lang:", lang);
try {
Speech.speak(text, {
language: lang,
onDone: () => console.log("[TTS] ✅ onDone fired"),
onError: (error) => console.log("[TTS] ❌ onError:", error),
});
console.log("[TTS] Speech.speak() call returned OK");
} catch (err) {
console.log("[TTS] Speech.speak() threw:", err);
}
}
async function speakSequentially(texts: string[]) {
if (texts.length === 0) return;
for (let i = 0; i < texts.length; i++) {
await speak(texts[i]);
await new Promise((r) => setTimeout(r, 1500));
}
}
async function startRecording() {
try {
Speech.stop();
setTranscriptionText("");
setResponsePreview("");
setLlmResponseText("");
setRecordingUri(null);
const permission = await Audio.requestPermissionsAsync();
if (!permission.granted) {
setStatusMessage(strings.micPermissionDenied);
Alert.alert(
strings.micAccessRequiredTitle,
strings.micAccessRequiredMsg,
);
return;
}
await Audio.setAudioModeAsync({
allowsRecordingIOS: true,
interruptionModeAndroid: InterruptionModeAndroid.DoNotMix,
interruptionModeIOS: InterruptionModeIOS.DoNotMix,
playsInSilentModeIOS: true,
shouldDuckAndroid: true,
staysActiveInBackground: false,
});
const result = await Audio.Recording.createAsync(
Audio.RecordingOptionsPresets.HIGH_QUALITY,
);
recordingRef.current = result.recording;
setRecording(result.recording);
setRecordingMs(0);
setStatusMessage(strings.recording);
} catch (error) {
setStatusMessage(strings.couldNotStartRecording);
Alert.alert(
strings.recordingFailedTitle,
error instanceof Error ? error.message : "",
);
}
}
async function stopRecordingAndUpload() {
if (!recordingRef.current) {
return;
}
console.log("[APP] stopRecordingAndUpload called");
try {
const activeRecording = recordingRef.current;
const currentStatus = await activeRecording.getStatusAsync();
const durationMillis = currentStatus.durationMillis ?? 0;
await activeRecording.stopAndUnloadAsync();
await Audio.setAudioModeAsync({
allowsRecordingIOS: false,
playsInSilentModeIOS: true,
});
const uri = activeRecording.getURI();
recordingRef.current = null;
setRecording(null);
setRecordingMs(durationMillis);
if (!uri) {
setStatusMessage(strings.readyToRecord);
return;
}
setRecordingUri(uri);
setStatusMessage(strings.finishedUpload);
const trimmedUrl = backendUrl.trim().replace(/\/+$/, '');
const uploadUrl = trimmedUrl.endsWith('/audio/upload')
? trimmedUrl
: `${trimmedUrl}/audio/upload`;
if (uploadUrl) {
setIsUploading(true);
try {
const mimeType = buildMimeType(uri);
const extension = buildFileExtension(uri);
const formData = new FormData();
formData.append(fieldName.trim() || "file", {
name: `recording-${Date.now()}.${extension}`,
type: mimeType,
uri: uri,
} as never);
const headers: Record<string, string> = {};
if (authToken.trim()) {
headers.Authorization = `Bearer ${authToken.trim()}`;
}
const response = await fetch(uploadUrl, {
method: "POST",
headers,
body: formData,
});
const responseText = await response.text();
if (!response.ok) {
throw new Error(`${response.status}. ${responseText}`);
}
try {
const data = JSON.parse(responseText);
setResponsePreview(responseText.slice(0, 400));
const textsToSpeak: string[] = [];
if (data.transcription) {
setTranscriptionText(data.transcription);
textsToSpeak.push(data.transcription);
}
if (data.llmResponse) {
setLlmResponseText(data.llmResponse);
textsToSpeak.push(data.llmResponse);
}
if (textsToSpeak.length > 0) {
setStatusMessage(strings.voiceMessageSent + ". " + strings.playing);
void speakSequentially(textsToSpeak);
} else {
setLlmResponseText("");
}
} catch (parseError) {
console.log("[APP] JSON parse failed:", parseError, "Response was:", responseText.substring(0, 200));
setResponsePreview(responseText.slice(0, 400));
setTranscriptionText("");
setLlmResponseText("");
}
setStatusMessage(strings.voiceMessageSent);
} catch (error) {
setStatusMessage(strings.uploadFailed);
Alert.alert(
strings.uploadFailed,
error instanceof Error ? error.message : "",
);
} finally {
setIsUploading(false);
}
} else {
setStatusMessage(strings.noBackendUrl);
setIsUploading(false);
}
} catch (error) {
recordingRef.current = null;
setRecording(null);
setStatusMessage(strings.stopFailedTitle);
Alert.alert(
strings.stopFailedTitle,
error instanceof Error ? error.message : "",
);
}
}
async function handlePressIn() {
if (isUploading) return;
setIsHolding(true);
await startRecording();
}
async function handlePressOut() {
if (!isHolding) return;
setIsHolding(false);
await stopRecordingAndUpload();
}
async function uploadRecording(uriOverride?: string) {
const targetUri = uriOverride ?? recordingUri;
if (!targetUri) {
return;
}
const trimmedUrl = backendUrl.trim().replace(/\/+$/, '');
const uploadUrl = trimmedUrl.endsWith('/audio/upload')
? trimmedUrl
: `${trimmedUrl}/audio/upload`;
if (!uploadUrl) {
Alert.alert(strings.missingBackendUrlTitle, strings.missingBackendUrlMsg);
return;
}
try {
setIsUploading(true);
setStatusMessage(strings.uploadingRecording);
Speech.stop();
setTranscriptionText("");
setResponsePreview("");
setLlmResponseText("");
const mimeType = buildMimeType(targetUri);
const extension = buildFileExtension(targetUri);
const formData = new FormData();
formData.append(fieldName.trim() || "file", {
name: `recording-${Date.now()}.${extension}`,
type: mimeType,
uri: targetUri,
} as never);
const headers: Record<string, string> = {};
if (authToken.trim()) {
headers.Authorization = `Bearer ${authToken.trim()}`;
}
const response = await fetch(uploadUrl, {
method: "POST",
headers,
body: formData,
});
const responseText = await response.text();
if (!response.ok) {
throw new Error(`${response.status}. ${responseText}`);
}
try {
const data = JSON.parse(responseText);
setResponsePreview(responseText.slice(0, 400));
const textsToSpeak: string[] = [];
if (data.transcription) {
setTranscriptionText(data.transcription);
textsToSpeak.push(data.transcription);
}
if (data.llmResponse) {
setLlmResponseText(data.llmResponse);
textsToSpeak.push(data.llmResponse);
}
if (textsToSpeak.length > 0) {
setStatusMessage(strings.voiceMessageSent + ". " + strings.playing);
void speakSequentially(textsToSpeak);
} else {
setLlmResponseText("");
}
} catch {
setResponsePreview(responseText.slice(0, 400));
setTranscriptionText("");
setLlmResponseText("");
}
setStatusMessage(strings.uploadComplete);
} catch (error) {
setStatusMessage(strings.uploadFailed);
Alert.alert(
strings.uploadFailed,
error instanceof Error ? error.message : "",
);
} finally {
setIsUploading(false);
}
}
function handleSpeak() {
const texts = [transcriptionText, llmResponseText].filter(Boolean);
void speakSequentially(texts);
}
const releaseLabel = t("releaseToStop", locale);
const holdLabel = t("holdToRecord", locale);
const openSettingsLabel = t("openSettingsHint", locale);
const appTitleLabel = t("appTitle", locale);
const recorderTitleLabel = t("recorderTitle", locale);
const serverResponseLabel = t("serverResponse", locale);
return (
<View style={styles.safeArea}>
<KeyboardAvoidingView
style={styles.keyboardAvoidingView}
behavior={Platform.OS === "ios" ? "padding" : undefined}
>
<ScrollView
style={styles.scrollView}
contentContainerStyle={styles.content}
keyboardShouldPersistTaps="handled"
>
<View style={styles.hero}>
<View style={styles.heroTopRow}>
<View style={styles.heroBadge}>
<Text style={styles.heroBadgeText}>{appTitleLabel}</Text>
</View>
<Pressable
onPress={() => router.push("/settings")}
hitSlop={10}
style={styles.settingsCog}
>
<Svg width="20" height="20" viewBox="0 0 24 24" fill="none">
<Path
d="M12 15a3 3 0 1 0 0-6 3 3 0 0 0 0 6Z"
fill="#d3deea"
/>
<Path
d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 1 1-2.83 2.83l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-4 0v-.09a1.65 1.65 0 0 0-1.08-1.51 1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 1 1-2.83-2.83l-.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1 0-4h.09a1.65 1.65 0 0 0 1.51-1.08 1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 1 1 2.83-2.83l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 4 0v.09a1.65 1.65 0 0 0 1.08 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 1 1 2.83 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9c.26.604.852.997 1.51 1H21a2 2 0 0 1 0 4h-.09a1.65 1.65 0 0 0-1.51 1.08Z"
stroke="#d3deea"
strokeWidth="1.5"
/>
</Svg>
</Pressable>
</View>
</View>
<View style={styles.panel}>
<Text style={[styles.meterValueCentered, isHolding && { color: "#d04f2d" }]}>
{formatDuration(recordingMs)}
</Text>
<Pressable
disabled={isUploading}
onPressIn={handlePressIn}
onPressOut={handlePressOut}
style={[
styles.micButton,
isHolding ? styles.holdingButton : styles.idleButton,
isUploading && styles.buttonDisabled,
]}
>
{isUploading ? (
<ActivityIndicator color="#fff6f3" size="large" />
) : (
<Svg width="64" height="64" viewBox="0 0 24 24" fill="none">
<Path
d="M12 3a3 3 0 0 0-3 3v6a3 3 0 0 0 6 0V6a3 3 0 0 0-3-3z"
fill="#fff6f3"
stroke="#fff6f3"
strokeWidth="1"
/>
<Path
d="M19 10v1a7 7 0 0 1-14 0v-1"
stroke="#fff6f3"
strokeWidth="2"
strokeLinecap="round"
/>
<Path
d="M12 18v3"
stroke="#fff6f3"
strokeWidth="2"
strokeLinecap="round"
/>
</Svg>
)}
</Pressable>
<Text style={styles.statusText}>{statusMessage || strings.readyToRecord}</Text>
<Text style={styles.helperText}>
{isHolding
? releaseLabel
: backendUrl.trim()
? holdLabel
: openSettingsLabel}
</Text>
{transcriptionText ? (
<View style={styles.transcriptionBox}>
<Text style={styles.transcriptionLabel}>{strings.yourMessage}</Text>
<Text style={styles.transcriptionText}>{transcriptionText}</Text>
</View>
) : null}
{llmResponseText ? (
<View style={styles.llmResponseBox}>
<View style={styles.llmResponseHeader}>
<Text style={styles.llmResponseLabel}>{strings.aiReply}</Text>
<Pressable onPress={handleSpeak} style={styles.speakButton}>
<Svg width="20" height="20" viewBox="0 0 24 24" fill="none">
<Path d="M11 5L6 9H2v6h4l5 4V5z" fill="#13304a" />
<Path d="M15.5 8.5a5.5 5.5 0 0 1 0 7" stroke="#13304a" strokeWidth="2" strokeLinecap="round" />
<Path d="M18.5 5.5a9 9 0 0 1 0 13" stroke="#13304a" strokeWidth="2" strokeLinecap="round" />
</Svg>
</Pressable>
</View>
<Text style={styles.llmResponseText}>{llmResponseText}</Text>
</View>
) : null}
{responsePreview && !transcriptionText && !llmResponseText ? (
<View style={styles.responseBox}>
<Text style={styles.responseLabel}>{serverResponseLabel}</Text>
<Text style={styles.responseText}>{responsePreview}</Text>
</View>
) : null}
</View>
</ScrollView>
</KeyboardAvoidingView>
</View>
);
}
const styles = StyleSheet.create({
safeArea: {
flex: 1,
backgroundColor: "#f4efe4",
},
keyboardAvoidingView: {
flex: 1,
},
scrollView: {
flex: 1,
},
content: {
flex: 1,
alignItems: "center",
justifyContent: "center",
paddingVertical: 32,
paddingHorizontal: 20,
gap: 18,
},
hero: {
backgroundColor: "transparent",
paddingHorizontal: 22,
paddingTop: 40,
},
heroTopRow: {
alignItems: "center",
flexDirection: "row",
justifyContent: "space-between",
},
heroBadge: {
backgroundColor: "#f2b15d",
borderRadius: 999,
paddingHorizontal: 12,
paddingVertical: 6,
},
heroBadgeText: {
color: "#13304a",
fontSize: 12,
fontWeight: "700",
letterSpacing: 0.5,
textTransform: "uppercase",
},
settingsCog: {
alignItems: "center",
justifyContent: "center",
width: 40,
height: 40,
borderRadius: 999,
backgroundColor: "#13304a",
marginLeft: 12,
},
panel: {
backgroundColor: "#fffaf1",
borderColor: "#dccfb9",
borderRadius: 24,
borderWidth: 1,
gap: 12,
padding: 18,
alignSelf: "center",
maxWidth: 340,
},
meterValueCentered: {
color: "#d04f2d",
fontSize: 40,
fontWeight: "800",
textAlign: "center",
},
micButton: {
alignItems: "center",
borderRadius: 999,
height: 164,
justifyContent: "center",
marginVertical: 6,
width: 164,
alignSelf: "center",
},
idleButton: {
backgroundColor: "#13304a",
},
holdingButton: {
backgroundColor: "#d04f2d",
transform: [{ scale: 1.08 }],
},
micButtonText: {
color: "#fff6f3",
fontSize: 20,
fontWeight: "800",
},
recordingLabel: {
fontSize: 18,
},
buttonDisabled: {
opacity: 0.45,
},
statusText: {
color: "#1f2d3d",
fontSize: 15,
lineHeight: 21,
textAlign: "center",
},
helperText: {
color: "#665f54",
fontSize: 13,
lineHeight: 18,
textAlign: "center",
},
responseBox: {
backgroundColor: "#f7f0e0",
borderRadius: 16,
gap: 6,
marginTop: 4,
padding: 14,
},
responseLabel: {
color: "#13304a",
fontSize: 13,
fontWeight: "700",
textTransform: "uppercase",
textAlign: "center",
},
responseText: {
color: "#36475a",
fontSize: 14,
lineHeight: 20,
},
llmResponseBox: {
backgroundColor: "#e8f4e8",
borderRadius: 16,
gap: 6,
marginTop: 4,
padding: 14,
borderWidth: 1,
borderColor: "#b8d9b8",
},
llmResponseHeader: {
flexDirection: "row",
alignItems: "center",
justifyContent: "space-between",
},
llmResponseLabel: {
color: "#2a6a2a",
fontSize: 13,
fontWeight: "700",
textTransform: "uppercase",
textAlign: "center",
},
llmResponseText: {
color: "#2d4a2d",
fontSize: 16,
lineHeight: 24,
},
speakButton: {
backgroundColor: "#f7f0e0",
borderRadius: 20,
padding: 6,
borderWidth: 1,
borderColor: "#dccfb9",
},
transcriptionBox: {
backgroundColor: "#e8ecf4",
borderRadius: 16,
gap: 6,
marginTop: 4,
padding: 14,
borderWidth: 1,
borderColor: "#b8c9d9",
},
transcriptionLabel: {
color: "#1a4a6a",
fontSize: 13,
fontWeight: "700",
textTransform: "uppercase",
textAlign: "center",
},
transcriptionText: {
color: "#1f3a52",
fontSize: 16,
lineHeight: 24,
},
});