diff --git a/README.md b/README.md index e35216d..1b29cf4 100644 --- a/README.md +++ b/README.md @@ -25,15 +25,15 @@ OpenAI Realtime API over WebRTC Push-To-Talk Android Phone[/Mobile] + Watch[/Wea ## TODOs (Not necessarily in any order) -1. Background service (easy to do wrong; I want to get the UX right) -2. Localize strings (I committed the sin of hard coding strings) -3. Tests (another sin I committed) -4. Get `Stop` working better -5. **Standalone** `Wear` version (lower priority; requires adding tiles for settings, conversation, etc) -6. Add `text` input/output feature -7. Learn Tool/Function integration -8. Find way to integrate with Gmail/Tasks/Keep/etc -9. Find way to save conversations to https://chatgpt.com/ history +1. Localize strings (I committed the sin of hard coding strings) +2. Tests (another sin I committed) +3. Get `Stop` working better +4. **Standalone** `Wear` version (lower priority; requires adding tiles for settings, conversation, etc) +5. Add `text` input/output feature +6. Learn Tool/Function integration +7. Find way to integrate with Gmail/Tasks/Keep/etc +8. Find way to save conversations to https://chatgpt.com/ history +9. Implement a `VoiceInteractionService`? https://developer.android.com/reference/android/service/voice/VoiceInteractionService ## Development * If Mobile or Wear physical device wireless debugging does not connect in Android Studio: diff --git a/mobile/src/main/AndroidManifest.xml b/mobile/src/main/AndroidManifest.xml index 9fd85f6..e4eb9f3 100644 --- a/mobile/src/main/AndroidManifest.xml +++ b/mobile/src/main/AndroidManifest.xml @@ -14,6 +14,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mobile/src/main/java/com/swooby/alfredai/AlfredAiApp.kt b/mobile/src/main/java/com/swooby/alfredai/AlfredAiApp.kt index aaa56ea..379e55d 100644 --- a/mobile/src/main/java/com/swooby/alfredai/AlfredAiApp.kt +++ b/mobile/src/main/java/com/swooby/alfredai/AlfredAiApp.kt @@ -9,6 +9,7 @@ import androidx.lifecycle.ViewModelProvider import androidx.lifecycle.ViewModelStore import androidx.lifecycle.ViewModelStoreOwner +/* @MainThread inline fun ComponentActivity.appViewModels(): Lazy { return ViewModelLazy( @@ -18,6 +19,7 @@ inline fun ComponentActivity.appViewModels(): Lazy { defaultViewModelCreationExtras } ) } +*/ class AlfredAiApp : Application(), ViewModelStoreOwner { @@ -25,8 +27,8 @@ class AlfredAiApp : Application(), ViewModelStoreOwner val mobileViewModel by lazy { ViewModelProvider( - this, - ViewModelProvider.AndroidViewModelFactory.getInstance(this) + owner = this, + factory = ViewModelProvider.AndroidViewModelFactory.getInstance(this) )[MobileViewModel::class.java] } diff --git a/mobile/src/main/java/com/swooby/alfredai/MobileActivity.kt b/mobile/src/main/java/com/swooby/alfredai/MobileActivity.kt index bb85263..f9e0506 100644 --- a/mobile/src/main/java/com/swooby/alfredai/MobileActivity.kt +++ b/mobile/src/main/java/com/swooby/alfredai/MobileActivity.kt @@ -136,10 +136,10 @@ class MobileActivity : ComponentActivity() { private const val TAG = "PushToTalkActivity" } - private val mobileViewModel: MobileViewModel by appViewModels() + private val mobileViewModel by lazy { (application as AlfredAiApp).mobileViewModel } override fun onCreate(savedInstanceState: Bundle?) { - Log.d(TAG, "onCreate()") + Log.d(TAG, "onCreate(savedInstanceState=$savedInstanceState)") super.onCreate(savedInstanceState) enableEdgeToEdge() setContent { @@ -147,18 +147,15 @@ class MobileActivity : ComponentActivity() { } } - override fun onDestroy() { - Log.d(TAG, "onDestroy()") - super.onDestroy() - - // Temporary, until Background/Foreground Service is implemented - mobileViewModel.realtimeClient?.disconnect() + override fun onStart() { + super.onStart() + mobileViewModel.onStartOrResume(this) } -} -enum class ConversationSpeaker { - Local, - Remote, + override fun onStop() { + super.onStop() + mobileViewModel.onPauseOrStop(this) + } } @OptIn(ExperimentalMaterial3Api::class, ExperimentalFoundationApi::class) @@ -185,12 +182,6 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) { ) val debugConnectDelayMillis = if (BuildConfig.DEBUG && false) 10_000L else 0L - @Suppress( - "SimplifyBooleanWithConstants", - //"KotlinConstantConditions", - ) - val debugLogConversation = BuildConfig.DEBUG && true - @Suppress( "SimplifyBooleanWithConstants", "KotlinConstantConditions", @@ -201,37 +192,35 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) { //region Conversation // - data class ConversationItem( - val id: String?, - val speaker: ConversationSpeaker, - val initialText: String - ) { - var text by mutableStateOf(initialText) - } - - val conversationItems = remember { mutableStateListOf() } - @Suppress( - "SimplifyBooleanWithConstants", - "KotlinConstantConditions", - ) - if (BuildConfig.DEBUG && false) { - fun generateRandomSentence(): String { - val subjects = listOf("The cat", "The dog", "The bird", "The fish") - val verbs = listOf("jumps", "runs", "flies", "swims") - val objects = listOf("over the fence", "in the park", "through the air", "in the water") - return "${subjects.random()} ${verbs.random()} ${objects.random()}." - } + fun mockConversationItems(): List { + val conversationItems = mutableListOf() + @Suppress( + "SimplifyBooleanWithConstants", + "KotlinConstantConditions", + ) + if (BuildConfig.DEBUG && false) { + fun generateRandomSentence(): String { + val subjects = listOf("The cat", "The dog", "The bird", "The fish") + val verbs = listOf("jumps", "runs", "flies", "swims") + val objects = listOf("over the fence", "in the park", "through the air", "in the water") + return "${subjects.random()} ${verbs.random()} ${objects.random()}." + } - for (i in 0..20) { - conversationItems.add( - ConversationItem( - id = "$i", - speaker = ConversationSpeaker.entries.random(), - initialText = generateRandomSentence() + for (i in 0..20) { + conversationItems.add( + MobileViewModel.ConversationItem( + id = "$i", + speaker = MobileViewModel.ConversationSpeaker.entries.random(), + initialText = generateRandomSentence() + ) ) - ) + } } + return conversationItems } + + val conversationItems = mobileViewModel?.conversationItems ?: mockConversationItems() + val conversationListState = rememberLazyListState() LaunchedEffect(Unit) { snapshotFlow { conversationItems.lastOrNull()?.text } @@ -281,7 +270,7 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) { isConnectSwitchOn = true if (!isConnectingOrConnected) { isConnectingOrConnected = true - conversationItems.clear() + mobileViewModel.conversationItemsClear() jobConnect = CoroutineScope(Dispatchers.IO).launch { if (debugConnectDelayMillis > 0) { delay(debugConnectDelayMillis) @@ -350,6 +339,10 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) { Manifest.permission.BLUETOOTH, Manifest.permission.BLUETOOTH_CONNECT, Manifest.permission.BLUETOOTH_SCAN, + Manifest.permission.FOREGROUND_SERVICE, + //Manifest.permission.FOREGROUND_SERVICE_MICROPHONE, + //Manifest.permission.FOREGROUND_SERVICE_REMOTE_MESSAGING, + Manifest.permission.FOREGROUND_SERVICE_SPECIAL_USE, ) } @@ -522,88 +515,25 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) { return false } - override fun onServerEventConversationCreated( - realtimeServerEventConversationCreated: RealtimeServerEventConversationCreated - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventConversationCreated($realtimeServerEventConversationCreated)") - } + override fun onServerEventConversationCreated(realtimeServerEventConversationCreated: RealtimeServerEventConversationCreated) { } - override fun onServerEventConversationItemCreated( - realtimeServerEventConversationItemCreated: RealtimeServerEventConversationItemCreated - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventConversationItemCreated($realtimeServerEventConversationItemCreated)") - } - val item = realtimeServerEventConversationItemCreated.item - val id = item.id - val content = item.content - val text = content?.joinToString(separator = "") { it.text ?: "" } ?: "" - if (text.isNotBlank()) { - if (debugLogConversation) { - Log.w(TAG, "onServerEventConversationItemCreated: conversationItems.add(ConversationItem(id=${quote(id)}, initialText=${quote(text)}") - } - conversationItems.add( - ConversationItem( - id = id, - speaker = ConversationSpeaker.Local, - initialText = text - ) - ) - } + override fun onServerEventConversationItemCreated(realtimeServerEventConversationItemCreated: RealtimeServerEventConversationItemCreated) { } - override fun onServerEventConversationItemDeleted( - realtimeServerEventConversationItemDeleted: RealtimeServerEventConversationItemDeleted - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventConversationItemDeleted($realtimeServerEventConversationItemDeleted)") - } + override fun onServerEventConversationItemDeleted(realtimeServerEventConversationItemDeleted: RealtimeServerEventConversationItemDeleted) { } - override fun onServerEventConversationItemInputAudioTranscriptionCompleted( - realtimeServerEventConversationItemInputAudioTranscriptionCompleted: RealtimeServerEventConversationItemInputAudioTranscriptionCompleted - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventConversationItemInputAudioTranscriptionCompleted($realtimeServerEventConversationItemInputAudioTranscriptionCompleted)") - } - val id = realtimeServerEventConversationItemInputAudioTranscriptionCompleted.itemId - val transcript = - realtimeServerEventConversationItemInputAudioTranscriptionCompleted.transcript.trim() // DO TRIM! - if (transcript.isNotBlank()) { - if (debugLogConversation) { - Log.w(TAG, "onServerEventConversationItemInputAudioTranscriptionCompleted: conversationItems.add(ConversationItem(id=${quote(id)}, initialText=${quote(transcript)}") - } - conversationItems.add( - ConversationItem( - id = id, - speaker = ConversationSpeaker.Local, - initialText = transcript - ) - ) - } + override fun onServerEventConversationItemInputAudioTranscriptionCompleted(realtimeServerEventConversationItemInputAudioTranscriptionCompleted: RealtimeServerEventConversationItemInputAudioTranscriptionCompleted) { } - override fun onServerEventConversationItemInputAudioTranscriptionFailed( - realtimeServerEventConversationItemInputAudioTranscriptionFailed: RealtimeServerEventConversationItemInputAudioTranscriptionFailed - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventConversationItemInputAudioTranscriptionFailed($realtimeServerEventConversationItemInputAudioTranscriptionFailed)") - } + override fun onServerEventConversationItemInputAudioTranscriptionFailed(realtimeServerEventConversationItemInputAudioTranscriptionFailed: RealtimeServerEventConversationItemInputAudioTranscriptionFailed) { } - override fun onServerEventConversationItemTruncated( - realtimeServerEventConversationItemTruncated: RealtimeServerEventConversationItemTruncated - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventConversationItemTruncated($realtimeServerEventConversationItemTruncated)") - } + override fun onServerEventConversationItemTruncated(realtimeServerEventConversationItemTruncated: RealtimeServerEventConversationItemTruncated) { } - override fun onServerEventError( - realtimeServerEventError: RealtimeServerEventError - ) { + override fun onServerEventError(realtimeServerEventError: RealtimeServerEventError) { Log.d(TAG, "onServerEventError($realtimeServerEventError)") val error = realtimeServerEventError.error val text = error.message @@ -615,27 +545,19 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) { ) } - override fun onServerEventInputAudioBufferCleared( - realtimeServerEventInputAudioBufferCleared: RealtimeServerEventInputAudioBufferCleared - ) { + override fun onServerEventInputAudioBufferCleared(realtimeServerEventInputAudioBufferCleared: RealtimeServerEventInputAudioBufferCleared) { Log.d(TAG, "onServerEventInputAudioBufferCleared($realtimeServerEventInputAudioBufferCleared)") } - override fun onServerEventInputAudioBufferCommitted( - realtimeServerEventInputAudioBufferCommitted: RealtimeServerEventInputAudioBufferCommitted - ) { + override fun onServerEventInputAudioBufferCommitted(realtimeServerEventInputAudioBufferCommitted: RealtimeServerEventInputAudioBufferCommitted) { Log.d(TAG, "onServerEventInputAudioBufferCommitted($realtimeServerEventInputAudioBufferCommitted)") } - override fun onServerEventInputAudioBufferSpeechStarted( - realtimeServerEventInputAudioBufferSpeechStarted: RealtimeServerEventInputAudioBufferSpeechStarted - ) { + override fun onServerEventInputAudioBufferSpeechStarted(realtimeServerEventInputAudioBufferSpeechStarted: RealtimeServerEventInputAudioBufferSpeechStarted) { Log.d(TAG, "onServerEventInputAudioBufferSpeechStarted($realtimeServerEventInputAudioBufferSpeechStarted)") } - override fun onServerEventInputAudioBufferSpeechStopped( - realtimeServerEventInputAudioBufferSpeechStopped: RealtimeServerEventInputAudioBufferSpeechStopped - ) { + override fun onServerEventInputAudioBufferSpeechStopped(realtimeServerEventInputAudioBufferSpeechStopped: RealtimeServerEventInputAudioBufferSpeechStopped) { Log.d(TAG, "onServerEventInputAudioBufferSpeechStopped($realtimeServerEventInputAudioBufferSpeechStopped)") } @@ -652,166 +574,68 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) { } } - override fun onServerEventRateLimitsUpdated( - realtimeServerEventRateLimitsUpdated: RealtimeServerEventRateLimitsUpdated - ) { + override fun onServerEventRateLimitsUpdated(realtimeServerEventRateLimitsUpdated: RealtimeServerEventRateLimitsUpdated) { Log.d(TAG, "onServerEventRateLimitsUpdated($realtimeServerEventRateLimitsUpdated)") } - override fun onServerEventResponseAudioDelta( - realtimeServerEventResponseAudioDelta: RealtimeServerEventResponseAudioDelta - ) { + override fun onServerEventResponseAudioDelta(realtimeServerEventResponseAudioDelta: RealtimeServerEventResponseAudioDelta) { Log.d(TAG, "onServerEventResponseAudioDelta($realtimeServerEventResponseAudioDelta)") } - override fun onServerEventResponseAudioDone( - realtimeServerEventResponseAudioDone: RealtimeServerEventResponseAudioDone - ) { + override fun onServerEventResponseAudioDone(realtimeServerEventResponseAudioDone: RealtimeServerEventResponseAudioDone) { Log.d(TAG, "onServerEventResponseAudioDone($realtimeServerEventResponseAudioDone)") } - override fun onServerEventResponseAudioTranscriptDelta( - realtimeServerEventResponseAudioTranscriptDelta: RealtimeServerEventResponseAudioTranscriptDelta - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventResponseAudioTranscriptDelta($realtimeServerEventResponseAudioTranscriptDelta)") - } - val id = realtimeServerEventResponseAudioTranscriptDelta.itemId - val delta = - realtimeServerEventResponseAudioTranscriptDelta.delta // DO **NOT** TRIM! - - // Append this delta to any current in progress conversation, - // or create a new conversation - val index = conversationItems.indexOfFirst { it.id == id } - if (debugLogConversation) { - Log.w(TAG, "onServerEventResponseAudioTranscriptDelta: id=$id, delta=$delta, index=$index") - } - if (index == -1) { - val conversationItem = ConversationItem( - id = id, - speaker = ConversationSpeaker.Remote, - initialText = delta, - ) - if (debugLogConversation) { - Log.w(TAG, "conversationItems.add($conversationItem)") - } - conversationItems.add(conversationItem) - } else { - val conversationItem = conversationItems[index] - conversationItem.text += delta - if (debugLogConversation) { - Log.w(TAG, "conversationItems.set($index, $conversationItem)") - } - conversationItems[index] = conversationItem - } + override fun onServerEventResponseAudioTranscriptDelta(realtimeServerEventResponseAudioTranscriptDelta: RealtimeServerEventResponseAudioTranscriptDelta) { } - override fun onServerEventResponseAudioTranscriptDone( - realtimeServerEventResponseAudioTranscriptDone: RealtimeServerEventResponseAudioTranscriptDone - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventResponseAudioTranscriptDone($realtimeServerEventResponseAudioTranscriptDone)") - } + override fun onServerEventResponseAudioTranscriptDone(realtimeServerEventResponseAudioTranscriptDone: RealtimeServerEventResponseAudioTranscriptDone) { } - override fun onServerEventResponseContentPartAdded( - realtimeServerEventResponseContentPartAdded: RealtimeServerEventResponseContentPartAdded - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventResponseContentPartAdded($realtimeServerEventResponseContentPartAdded)") - } + override fun onServerEventResponseContentPartAdded(realtimeServerEventResponseContentPartAdded: RealtimeServerEventResponseContentPartAdded) { } - override fun onServerEventResponseContentPartDone( - realtimeServerEventResponseContentPartDone: RealtimeServerEventResponseContentPartDone - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventResponseContentPartDone($realtimeServerEventResponseContentPartDone)") - } + override fun onServerEventResponseContentPartDone(realtimeServerEventResponseContentPartDone: RealtimeServerEventResponseContentPartDone) { } - override fun onServerEventResponseCreated( - realtimeServerEventResponseCreated: RealtimeServerEventResponseCreated - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventResponseCreated($realtimeServerEventResponseCreated)") - } + override fun onServerEventResponseCreated(realtimeServerEventResponseCreated: RealtimeServerEventResponseCreated) { } - override fun onServerEventResponseDone( - realtimeServerEventResponseDone: RealtimeServerEventResponseDone - ) { - if (debugLogConversation) { - Log.d(TAG, "onServerEventResponseDone($realtimeServerEventResponseDone)") - } - realtimeServerEventResponseDone.response.output?.forEach { outputConversationItem -> - if (debugLogConversation) { - Log.w(TAG, "onServerEventResponseDone: outputConversationItem=$outputConversationItem") - } - val id = outputConversationItem.id ?: return@forEach - val index = conversationItems.indexOfFirst { it.id == id } - if (index != -1) { - val conversationItem = conversationItems[index] - if (debugLogConversation) { - Log.w(TAG, "onServerEventResponseDone: removing $conversationItem at index=$index") - } - conversationItems.removeAt(index) - if (debugLogConversation) { - Log.w(TAG, "onServerEventResponseDone: adding $conversationItem at end") - } - conversationItems.add(conversationItem) - } - } + override fun onServerEventResponseDone(realtimeServerEventResponseDone: RealtimeServerEventResponseDone) { } - override fun onServerEventResponseFunctionCallArgumentsDelta( - realtimeServerEventResponseFunctionCallArgumentsDelta: RealtimeServerEventResponseFunctionCallArgumentsDelta - ) { + override fun onServerEventResponseFunctionCallArgumentsDelta(realtimeServerEventResponseFunctionCallArgumentsDelta: RealtimeServerEventResponseFunctionCallArgumentsDelta) { Log.d(TAG, "onServerEventResponseFunctionCallArgumentsDelta($realtimeServerEventResponseFunctionCallArgumentsDelta)") } - override fun onServerEventResponseFunctionCallArgumentsDone( - realtimeServerEventResponseFunctionCallArgumentsDone: RealtimeServerEventResponseFunctionCallArgumentsDone - ) { + override fun onServerEventResponseFunctionCallArgumentsDone(realtimeServerEventResponseFunctionCallArgumentsDone: RealtimeServerEventResponseFunctionCallArgumentsDone) { Log.d(TAG, "onServerEventResponseFunctionCallArgumentsDone($realtimeServerEventResponseFunctionCallArgumentsDone)") } - override fun onServerEventResponseOutputItemAdded( - realtimeServerEventResponseOutputItemAdded: RealtimeServerEventResponseOutputItemAdded - ) { + override fun onServerEventResponseOutputItemAdded(realtimeServerEventResponseOutputItemAdded: RealtimeServerEventResponseOutputItemAdded) { Log.d(TAG, "onServerEventResponseOutputItemAdded($realtimeServerEventResponseOutputItemAdded)") } - override fun onServerEventResponseOutputItemDone( - realtimeServerEventResponseOutputItemDone: RealtimeServerEventResponseOutputItemDone - ) { + override fun onServerEventResponseOutputItemDone(realtimeServerEventResponseOutputItemDone: RealtimeServerEventResponseOutputItemDone) { Log.d(TAG, "onServerEventResponseOutputItemDone($realtimeServerEventResponseOutputItemDone)") } - override fun onServerEventResponseTextDelta( - realtimeServerEventResponseTextDelta: RealtimeServerEventResponseTextDelta - ) { + override fun onServerEventResponseTextDelta(realtimeServerEventResponseTextDelta: RealtimeServerEventResponseTextDelta) { Log.d(TAG, "onServerEventResponseTextDelta($realtimeServerEventResponseTextDelta)") } - override fun onServerEventResponseTextDone( - realtimeServerEventResponseTextDone: RealtimeServerEventResponseTextDone - ) { + override fun onServerEventResponseTextDone(realtimeServerEventResponseTextDone: RealtimeServerEventResponseTextDone) { Log.d(TAG, "onServerEventResponseTextDone($realtimeServerEventResponseTextDone)") } - override fun onServerEventSessionCreated( - realtimeServerEventSessionCreated: RealtimeServerEventSessionCreated - ) { + override fun onServerEventSessionCreated(realtimeServerEventSessionCreated: RealtimeServerEventSessionCreated) { Log.d(TAG, "onServerEventSessionCreated($realtimeServerEventSessionCreated)") if (debugToastVerbose) { showToast(context = context, text = "Session Created", forceInvokeOnMain = true) } } - override fun onServerEventSessionUpdated( - realtimeServerEventSessionUpdated: RealtimeServerEventSessionUpdated - ) { + override fun onServerEventSessionUpdated(realtimeServerEventSessionUpdated: RealtimeServerEventSessionUpdated) { Log.d(TAG, "onServerEventSessionUpdated($realtimeServerEventSessionUpdated)") if (debugToastVerbose) { showToast(context = context, text = "Session Updated", forceInvokeOnMain = true) @@ -957,86 +781,106 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) { modifier = Modifier.padding(start = 4.dp), text = "Conversation:", ) - IconButton(onClick = { conversationItems.clear() }) { + IconButton(onClick = { mobileViewModel?.conversationItemsClear() }) { Icon( painterResource(id = R.drawable.baseline_clear_all_24), contentDescription = "Clear All", ) } } - LazyColumn( - modifier = Modifier - .border( - 1.dp, - Color.LightGray, - shape = RoundedCornerShape(8.dp) + if (conversationItems.isEmpty()) { + Box( + modifier = Modifier + .border( + 1.dp, + Color.LightGray, + shape = RoundedCornerShape(8.dp) + ) + .fillMaxWidth() + .weight(1f), + contentAlignment = Alignment.Center + ) { + Text( + text = "No conversation items", + textAlign = TextAlign.Center, ) - .fillMaxWidth() - .weight(1f), - contentPadding = PaddingValues(8.dp), - verticalArrangement = Arrangement.spacedBy(8.dp), - state = conversationListState, - ) { - items( - count = conversationItems.size, - ) { index -> - val item = conversationItems[index] - val avatarResId: Int - val horizontalAlignment: Arrangement.Horizontal - val contentAlignment: Alignment - val textAlign: TextAlign - when (item.speaker) { - ConversationSpeaker.Local -> { - avatarResId = R.drawable.baseline_person_24 - horizontalAlignment = Arrangement.Start - contentAlignment = Alignment.CenterStart - textAlign = TextAlign.Start - } - ConversationSpeaker.Remote -> { - avatarResId = R.drawable.baseline_memory_24 - horizontalAlignment = Arrangement.End - contentAlignment = Alignment.CenterEnd - textAlign = TextAlign.End - } - } - Row( - modifier = Modifier - .fillMaxWidth(), - verticalAlignment = Alignment.CenterVertically, - horizontalArrangement = horizontalAlignment - ) { - if (item.speaker == ConversationSpeaker.Local) { - Icon( - painterResource(id = avatarResId), - contentDescription = "Local", - ) - Spacer(modifier = Modifier.width(8.dp)) + } + } else { + LazyColumn( + modifier = Modifier + .border( + 1.dp, + Color.LightGray, + shape = RoundedCornerShape(8.dp) + ) + .fillMaxWidth() + .weight(1f), + verticalArrangement = Arrangement.spacedBy(8.dp), + contentPadding = PaddingValues(8.dp), + state = conversationListState, + ) { + items( + count = conversationItems.size, + ) { index -> + val item = conversationItems[index] + val avatarResId: Int + val horizontalAlignment: Arrangement.Horizontal + val contentAlignment: Alignment + val textAlign: TextAlign + when (item.speaker) { + MobileViewModel.ConversationSpeaker.Local -> { + avatarResId = R.drawable.baseline_person_24 + horizontalAlignment = Arrangement.Start + contentAlignment = Alignment.CenterStart + textAlign = TextAlign.Start + } + + MobileViewModel.ConversationSpeaker.Remote -> { + avatarResId = R.drawable.baseline_memory_24 + horizontalAlignment = Arrangement.End + contentAlignment = Alignment.CenterEnd + textAlign = TextAlign.End + } } - Box( + Row( modifier = Modifier - .weight(1f), - contentAlignment = contentAlignment + .fillMaxWidth(), + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = horizontalAlignment ) { - SelectionContainer { - Text( - modifier = Modifier - .border( - 1.dp, - Color.Gray, - shape = RoundedCornerShape(8.dp) - ) - .padding(8.dp), - text = item.text, - textAlign = textAlign, + if (item.speaker == MobileViewModel.ConversationSpeaker.Local) { + Icon( + painterResource(id = avatarResId), + contentDescription = "Local", + ) + Spacer(modifier = Modifier.width(8.dp)) + } + Box( + modifier = Modifier + .weight(1f), + contentAlignment = contentAlignment + ) { + SelectionContainer { + Text( + modifier = Modifier + .border( + 1.dp, + Color.Gray, + shape = RoundedCornerShape(8.dp) + ) + .padding(8.dp), + text = item.text, + textAlign = textAlign, + ) + } + } + if (item.speaker == MobileViewModel.ConversationSpeaker.Remote) { + Spacer(modifier = Modifier.width(8.dp)) + Icon( + painterResource(id = avatarResId), + contentDescription = "Remote", ) } - } - if (item.speaker == ConversationSpeaker.Remote) { - Spacer(modifier = Modifier.width(8.dp)) - Icon( - painterResource(id = avatarResId), - contentDescription = "Remote", - ) } } } diff --git a/mobile/src/main/java/com/swooby/alfredai/MobileForegroundService.kt b/mobile/src/main/java/com/swooby/alfredai/MobileForegroundService.kt new file mode 100644 index 0000000..c32600d --- /dev/null +++ b/mobile/src/main/java/com/swooby/alfredai/MobileForegroundService.kt @@ -0,0 +1,272 @@ +package com.swooby.alfredai + +import android.app.Notification +import android.app.NotificationChannel +import android.app.NotificationManager +import android.app.PendingIntent +import android.app.Service +import android.content.BroadcastReceiver +import android.content.Context +import android.content.Intent +import android.content.IntentFilter +import android.content.pm.ServiceInfo +import android.util.Log +import android.widget.Toast +import androidx.core.app.NotificationCompat +import androidx.core.app.Person +import androidx.core.graphics.drawable.IconCompat +import com.swooby.alfredai.AppUtils.showToast +import com.swooby.alfredai.MobileViewModel.ConnectionState +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.Job +import kotlinx.coroutines.cancel +import kotlinx.coroutines.launch + +class MobileForegroundService : Service() { + companion object { + private const val TAG = "MobileForegroundService" + + /** + * Must compliment AndroidManifest's `foregroundServiceType` value for this service. + */ + const val FOREGROUND_SERVICE_TYPE = + //ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE + //ServiceInfo.FOREGROUND_SERVICE_TYPE_REMOTE_MESSAGING + ServiceInfo.FOREGROUND_SERVICE_TYPE_SPECIAL_USE + + private const val ACTION_DISCONNECT = "com.swooby.alfredai.action.DISCONNECT" + + private const val CHANNEL_ID = "FOREGROUND_SERVICE_CHANNEL" + private const val CHANNEL_NAME = "Foreground Service Channel" + private const val CHANNEL_DESCRIPTION = "Non-dismissible notifications for session status" + private const val NOTIFICATION_ID_SERVICE = 101 + + private const val CONNECTION_NAME = "OpenAI Realtime" + + fun start(context: Context) { + val intent = Intent(context, MobileForegroundService::class.java) + context.startForegroundService(intent) + } + + fun stop(context: Context) { + val intent = Intent(context, MobileForegroundService::class.java) + context.stopService(intent) + } + } + + private val serviceScope = CoroutineScope(Dispatchers.Main + Job()) + + private val mobileViewModel by lazy { (application as AlfredAiApp).mobileViewModel } + + private var isForegroundStarted = false + + override fun onBind(intent: Intent?) = null + + override fun onCreate() { + Log.d(TAG, "onCreate()") + super.onCreate() + createNotificationChannel() + observeConnectionStateForNotifications() + } + + override fun onDestroy() { + Log.d(TAG, "onDestroy()") + super.onDestroy() + isForegroundStarted = false + serviceScope.cancel() + } + + override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { + Log.d(TAG, "onStartCommand(intent=$intent)") + when (intent?.action) { + ACTION_DISCONNECT -> { + Log.i(TAG, "onStartCommand: ACTION_DISCONNECT") + disconnectAndStopForegroundService() + } + } + return START_STICKY + } + + private fun observeConnectionStateForNotifications() { + serviceScope.launch { + mobileViewModel.connectionStateFlow.collect { connectionState -> + Log.i(TAG, "connectionStateFlow: connectionState=$connectionState") + showNotification(connectionState) + } + } + } + + private fun showNotification(connectionState: ConnectionState) { + Log.d(TAG, "showNotification(connectionState=$connectionState)") + when (connectionState) { + ConnectionState.Connecting -> { + try { + isForegroundStarted = true + startForeground( + NOTIFICATION_ID_SERVICE, + createNotification( + contentTitle = "AlfredAI: Connecting...", + contentText = "Connecting to $CONNECTION_NAME..." + ), + FOREGROUND_SERVICE_TYPE, + ) + } catch (e: SecurityException) { + Log.e(TAG, "connectionStateFlow: SecurityException: ${e.message}") + isForegroundStarted = false + /* + + I see this if I press the `Home` button immediately after launching the app... + ...IF WE ARE USING THE PREFERRED [in AndroidManifest.xml]: + * `` + * `` + + `MobileForegroundService com.swooby.alfredai E onStartCommand: SecurityException: + Starting FGS with type microphone callerApp=ProcessRecord{28abe4 12711:com.swooby.alfredai/u0a211} + targetSDK=34 requires permissions: all of the permissions allOf=true [android.permission.FOREGROUND_SERVICE_MICROPHONE] + any of the permissions allOf=false [android.permission.CAPTURE_AUDIO_HOTWORD, android.permission.CAPTURE_AUDIO_OUTPUT, + android.permission.CAPTURE_MEDIA_OUTPUT, android.permission.CAPTURE_TUNER_AUDIO_INPUT, + android.permission.CAPTURE_VOICE_COMMUNICATION_OUTPUT, android.permission.RECORD_AUDIO] + and the app must be in the eligible state/exemptions to access the foreground only permission` + + The key phrase is that last line. + + https://developer.android.com/develop/background-work/services/fgs/service-types#microphone + + "Note: The RECORD_AUDIO runtime permission is subject to while-in-use restrictions. + For this reason, you cannot create a microphone foreground service while your app + is in the background ..." + + https://developer.android.com/develop/background-work/services/fgs/restrictions-bg-start#wiu-restrictions + + "For while-in-use permissions, this causes a potential problem. If your app has + a while-in-use permission, it only has that permission while it's in the foreground. + This means if your app is in the background, and it tries to create a foreground + service of type camera, location, or microphone, the system sees that your app + doesn't currently have the required permissions, and it throws a SecurityException." + "For this reason, if your foreground service needs a while-in-use permission, you + must call Context.startForegroundService() or Context.bindService() while your + app has a visible activity, unless the service falls into one of the `defined exemptions`." + + https://developer.android.com/develop/background-work/services/fgs/restrictions-bg-start#wiu-restrictions-exemptions + + None of the exemptions realistically apply to this app. + + They naively think that it is acceptable to only start a service when the Activity + is visible. + Sure, we could fairly simply write code to wait until the Activity UI is visible + and **then** start the service, but it is still easy to hit `Home` at the right/wrong + moment before `startForeground` is called and still cause this code to fail. + + I could see creating a `MutableStateFlow` that is observed for changes when an Activity + is added, and only then trying to immediately call `startForeground`, but even that + still screams of `race-condition` problems. + + They introduced a race-condition back in API26 (2017), and have continued to kick + the can down the road on that race-condition, making it worse by piling on even + more pointless restrictions rather than fix the problem: + https://stackoverflow.com/a/49418249/252308 (2017/06) + https://issuetracker.google.com/issues/76112072 (2018/05) + + Instead of fighting all of this, I have chosen to implement the foreground service as: + * `` + * `` + This works perfectly fine for non-PlayStore apps. + If this app ever gets on the PlayStore then they can review it and accept or reject + its specialUse case then. + If they reject then I could always try the observable `MutableStateFlow` option. + + For more info, see: + https://developer.android.com/develop/background-work/services/fgs/launch + + */ + showToast(this@MobileForegroundService, "Don't press `Home` while the app is starting!", Toast.LENGTH_LONG) + disconnectAndStopForegroundService() + } + } + ConnectionState.Connected -> { + updateNotification( + contentTitle = "AlfredAI: Connected", + contentText = "Connected to $CONNECTION_NAME" + ) + } + ConnectionState.Disconnected -> { + disconnectAndStopForegroundService() + } + } + } + + private fun disconnectAndStopForegroundService() { + Log.d(TAG, "disconnectAndStopForegroundService()") + if (isForegroundStarted) { + isForegroundStarted = false + Log.d(TAG, "disconnectAndStopForegroundService: disconnecting") + mobileViewModel.disconnect() + Log.d(TAG, "disconnectAndStopForegroundService: showing disconnected notification") + updateNotification( + contentTitle = "AlfredAI: Disconnected", + contentText = "Disconnected from $CONNECTION_NAME" + ) + Log.d(TAG, "disconnectAndStopForegroundService: stopping foreground service") + stopForeground(STOP_FOREGROUND_REMOVE) + stopSelf() + } + } + + private val notificationManager by lazy { getSystemService(NotificationManager::class.java) } + + private fun createNotificationChannel() { + val importance = NotificationManager.IMPORTANCE_LOW + val channel = NotificationChannel(CHANNEL_ID, CHANNEL_NAME, importance).apply { + description = CHANNEL_DESCRIPTION + } + notificationManager.createNotificationChannel(channel) + } + + private fun createNotification(contentTitle: String, contentText: String): Notification { + val context = this + + val notificationPendingIntent = PendingIntent.getActivity( + context, + 0, + Intent(context, MobileActivity::class.java).apply { + flags = Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_CLEAR_TASK + }, + PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_IMMUTABLE + ) + + val disconnectPendingIntent = PendingIntent.getService( + context, + 0, + Intent(context, MobileForegroundService::class.java).apply { + action = ACTION_DISCONNECT + }, + PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_IMMUTABLE + ) + + val builder = NotificationCompat.Builder(context, CHANNEL_ID) + .setContentTitle(contentTitle) + .setContentText(contentText) + .setSmallIcon(R.drawable.alfredai_24) + .setPriority(NotificationCompat.PRIORITY_LOW) + .setOngoing(true) + .setAutoCancel(false) + .setContentIntent(notificationPendingIntent) + if (isForegroundStarted) { + val person = Person.Builder() + .setName(CONNECTION_NAME) + .setIcon(IconCompat.createWithResource(context, R.drawable.alfredai_24)) + .setImportant(true) + .setBot(true) + .build() + builder + .setStyle(NotificationCompat.CallStyle.forOngoingCall(person, disconnectPendingIntent)) + } + return builder.build() + } + + private fun updateNotification(contentTitle: String, contentText: String) { + val notification = createNotification(contentTitle, contentText) + notificationManager.notify(NOTIFICATION_ID_SERVICE, notification) + } +} diff --git a/mobile/src/main/java/com/swooby/alfredai/MobileViewModel.kt b/mobile/src/main/java/com/swooby/alfredai/MobileViewModel.kt index cfb92e0..da64304 100644 --- a/mobile/src/main/java/com/swooby/alfredai/MobileViewModel.kt +++ b/mobile/src/main/java/com/swooby/alfredai/MobileViewModel.kt @@ -1,10 +1,23 @@ package com.swooby.alfredai +import android.app.Activity import android.app.Application +import android.app.Notification +import android.app.NotificationChannel +import android.app.NotificationManager +import android.app.PendingIntent +import android.content.Context +import android.content.Intent import android.util.Log import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateListOf import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.setValue +import androidx.core.app.NotificationCompat +import androidx.lifecycle.viewModelScope +import com.openai.infrastructure.ApiClient +import com.openai.infrastructure.ClientError +import com.openai.infrastructure.ClientException import com.openai.models.RealtimeServerEventConversationCreated import com.openai.models.RealtimeServerEventConversationItemCreated import com.openai.models.RealtimeServerEventConversationItemDeleted @@ -12,6 +25,7 @@ import com.openai.models.RealtimeServerEventConversationItemInputAudioTranscript import com.openai.models.RealtimeServerEventConversationItemInputAudioTranscriptionFailed import com.openai.models.RealtimeServerEventConversationItemTruncated import com.openai.models.RealtimeServerEventError +import com.openai.models.RealtimeServerEventErrorError import com.openai.models.RealtimeServerEventInputAudioBufferCleared import com.openai.models.RealtimeServerEventInputAudioBufferCommitted import com.openai.models.RealtimeServerEventInputAudioBufferSpeechStarted @@ -39,6 +53,7 @@ import com.openai.models.RealtimeSessionModel import com.openai.models.RealtimeSessionVoice import com.swooby.alfredai.PushToTalkPreferences.Companion.getMaxResponseOutputTokens import com.swooby.alfredai.Utils.playAudioResourceOnce +import com.swooby.alfredai.Utils.quote import com.swooby.alfredai.openai.realtime.RealtimeClient import com.swooby.alfredai.openai.realtime.RealtimeClient.RealtimeClientListener import com.swooby.alfredai.openai.realtime.RealtimeClient.ServerEventOutputAudioBufferAudioStopped @@ -47,16 +62,30 @@ import com.twilio.audioswitch.AudioSwitch import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.launch +import org.json.JSONObject import java.math.BigDecimal +import java.net.UnknownHostException class MobileViewModel(application: Application) : SharedViewModel(application) { companion object { - const val DEBUG = true + const val DEBUG = false + + private const val CHANNEL_ID = "SESSION_STATUS_CHANNEL" + private const val CHANNEL_NAME = "Session Status Channel" + private const val CHANNEL_DESCRIPTION = "Dismissible notifications for notable session status events" + private const val NOTIFICATION_ID_SESSION = 1001 + + @Suppress( + "SimplifyBooleanWithConstants", + "KotlinConstantConditions", + ) + val debugSimulateSessionExpired = BuildConfig.DEBUG && false } override val TAG: String @@ -66,16 +95,89 @@ class MobileViewModel(application: Application) : override val remoteCapabilityName: String get() = "verify_remote_alfredai_wear_app" + private var jobDebugFakeSessionExpired: Job? = null + override fun init() { super.init() + + createNotificationChannel() + audioSwitchStart() + + observeConnectionForServiceStartStop() + + if (debugSimulateSessionExpired) { + jobDebugFakeSessionExpired = viewModelScope.launch { + delay(20_000L) // Delay for 20 seconds... + // Create a fake RealtimeServerEventError + val fakeError = RealtimeServerEventError( + eventId = "fake-event-id", + type = RealtimeServerEventError.Type.error, + error = RealtimeServerEventErrorError( + type = "invalid_request_error", + message = "Your session hit the maximum duration of 30 minutes.", + code = "session_expired", + ) + ) + listener.onServerEventSessionExpired(fakeError) + Log.d(TAG, "Debug: Fake session expired event triggered.") + disconnect() + } + } } override fun close() { super.close() + disconnect() + onDisconnecting() + onDisconnected() audioSwitchStop() } + fun disconnect() { + if (realtimeClient?.isConnected == true) { + realtimeClient?.disconnect() + } + } + + /** + * Called by listener.onDisconnected() + */ + private fun onDisconnecting() { + if (jobDebugFakeSessionExpired?.isActive == true) { + jobDebugFakeSessionExpired?.cancel() + } + _connectionStateFlow.value = ConnectionState.Disconnected + } + + /** + * Called by listener.onDisconnected() + */ + private fun onDisconnected() { + audioSwitch.deactivate() + } + + // + //region In-Use-Activities Detection + // + + private val inUseActivities = mutableSetOf() + + private val hasNoInUseActivities: Boolean + get() = inUseActivities.isEmpty() + + fun onStartOrResume(activity: Activity) { + inUseActivities.add(activity) + } + + fun onPauseOrStop(activity: Activity) { + inUseActivities.remove(activity) + } + + // + //endregion + // + // //region Preferences // @@ -242,10 +344,16 @@ class MobileViewModel(application: Application) : _realtimeClient?.disconnect() + val httpClient = if (DEBUG) + RealtimeClient.httpLoggingClient + else + ApiClient.defaultClient + _realtimeClient = RealtimeClient( - getApplication(), - prefs.apiKey, - sessionConfig, + applicationContext = getApplication(), + dangerousApiKey = prefs.apiKey, + sessionConfig = sessionConfig, + httpClient = httpClient, debug = DEBUG ) _realtimeClient?.addListener(listener) @@ -347,6 +455,125 @@ class MobileViewModel(application: Application) : } } } + // + //region Conversation + // + + @Suppress( + "SimplifyBooleanWithConstants", + //"KotlinConstantConditions", + ) + val debugLogConversation = BuildConfig.DEBUG && true + + enum class ConversationSpeaker { + Local, + Remote, + } + + data class ConversationItem( + val id: String?, + val speaker: ConversationSpeaker, + val initialText: String + ) { + var text by mutableStateOf(initialText) + } + + private val _conversationItems = mutableStateListOf() + val conversationItems: List + get() = _conversationItems + + fun conversationItemsClear() { + _conversationItems.clear() + } + + // + //endregion + // + + // + //region Service Start/Stop & Notifications + // + + enum class ConnectionState { + Connecting, + Connected, + Disconnected, + } + + private val _connectionStateFlow = MutableStateFlow(ConnectionState.Disconnected) + val connectionStateFlow = _connectionStateFlow.asStateFlow() + + private fun observeConnectionForServiceStartStop() { + viewModelScope.launch { + connectionStateFlow.collect { connectionState -> + Log.i(TAG, "connectionStateFlow: connectionState=$connectionState") + when (connectionState) { + ConnectionState.Connecting -> { + MobileForegroundService.start(getApplication()) + } + ConnectionState.Disconnected -> { + MobileForegroundService.stop(getApplication()) + } + else -> {} + } + } + } + } + + private val notificationManager by lazy { getApplication().getSystemService(NotificationManager::class.java) } + + private fun createNotificationChannel() { + val importance = NotificationManager.IMPORTANCE_DEFAULT + val channel = NotificationChannel(CHANNEL_ID, CHANNEL_NAME, importance).apply { + description = CHANNEL_DESCRIPTION + } + notificationManager.createNotificationChannel(channel) + } + + private fun createNotification(contentTitle: String, contentText: String): Notification { + val context = getApplication() + + val notificationPendingIntent = PendingIntent.getActivity( + context, + 0, + Intent(context, MobileActivity::class.java).apply { + flags = Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_CLEAR_TASK + }, + PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_IMMUTABLE + ) + + return NotificationCompat.Builder(context, CHANNEL_ID) + .setContentTitle(contentTitle) + .setContentText(contentText) + .setSmallIcon(R.drawable.alfredai_24) + .setPriority(NotificationCompat.PRIORITY_HIGH) + .setAutoCancel(true) // Makes the notification dismissible + .setOnlyAlertOnce(true) // Prevents multiple alerts if the notification is updated + .setContentIntent(notificationPendingIntent) + .build() + } + + private fun updateNotification(contentTitle: String, contentText: String) { + val notification = createNotification(contentTitle, contentText) + notificationManager.notify(NOTIFICATION_ID_SESSION, notification) + } + + private fun showNotificationSessionExpired(contentText: String) { + updateNotification("AlfredAI: Session Expired", contentText) + } + + private fun showNotificationMysteriousUnknownHostException() { + val message = "Mysterious \"Unable to resolve host `api.openai.com`\" error; Try again." + updateNotification("AlfredAI: Client Error", quote(message)) + } + + private fun showNotificationSessionClientError(message: String) { + updateNotification("AlfredAI: Client Error", quote(message)) + } + + // + //endregion + // // //region persistent RealtimeClientListener @@ -355,29 +582,74 @@ class MobileViewModel(application: Application) : private val listeners = mutableListOf() private val listener = object : RealtimeClientListener { override fun onConnecting() { + _connectionStateFlow.value = ConnectionState.Connecting + + conversationItemsClear() + listeners.forEach { it.onConnecting() } } override fun onError(error: Exception) { + when (error) { + is UnknownHostException -> { + /* + 2025-01-17 17:43:21.190 27419-27475 okhttp.OkHttpClient com.swooby.alfredai I <-- HTTP FAILED: java.net.UnknownHostException: Unable to resolve host "api.openai.com": No address associated with hostname + 2025-01-17 17:43:21.190 27419-27475 RealtimeClient com.swooby.alfredai E connect: exception=java.net.UnknownHostException: Unable to resolve host "api.openai.com": No address associated with hostname + 2025-01-17 17:43:21.190 27419-27475 PushToTalkActivity com.swooby.alfredai D onError(java.net.UnknownHostException: Unable to resolve host "api.openai.com": No address associated with hostname) + 2025-01-17 17:43:21.191 27419-27475 RealtimeClient com.swooby.alfredai D +disconnect() + 2025-01-17 17:43:21.191 27419-27475 RealtimeClient com.swooby.alfredai D -disconnect() + 2025-01-17 17:43:21.191 27419-27475 RealtimeClient com.swooby.alfredai D connect: ephemeralApiKey=null + 2025-01-17 17:43:21.191 27419-27475 PushToTalkActivity com.swooby.alfredai D onError(com.openai.infrastructure.ClientException: No Ephemeral API Key In Response) + */ + showNotificationMysteriousUnknownHostException() + } + + is ClientException -> { + var message: String? = error.message ?: error.toString() + val response = error.response as? ClientError<*> + if (response != null) { + var expectedJsonBody = response.body + if (expectedJsonBody == null) { + message = response.message + } + if (expectedJsonBody == null && message == null) { + expectedJsonBody = "{ \"error\": { \"code\": \"-1\" }" + } + if (expectedJsonBody != null) { + val jsonObject = JSONObject(expectedJsonBody.toString()) + val jsonError = jsonObject.getJSONObject("error") + val errorCode = jsonError.getString("code") + message = errorCode + } + } + showNotificationSessionClientError(message!!) + } + } listeners.forEach { it.onError(error) } } override fun onConnected() { + _connectionStateFlow.value = ConnectionState.Connected + audioSwitch.activate() + listeners.forEach { it.onConnected() } } override fun onDisconnected() { - audioSwitch.deactivate() + this@MobileViewModel.onDisconnecting() + listeners.forEach { it.onDisconnected() } + + this@MobileViewModel.onDisconnected() } override fun onBinaryMessageReceived(data: ByteArray): Boolean { @@ -395,42 +667,85 @@ class MobileViewModel(application: Application) : } override fun onServerEventConversationCreated(realtimeServerEventConversationCreated: RealtimeServerEventConversationCreated) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventConversationCreated($realtimeServerEventConversationCreated)") + } listeners.forEach { it.onServerEventConversationCreated(realtimeServerEventConversationCreated) } } override fun onServerEventConversationItemCreated(realtimeServerEventConversationItemCreated: RealtimeServerEventConversationItemCreated) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventConversationItemCreated($realtimeServerEventConversationItemCreated)") + } + val item = realtimeServerEventConversationItemCreated.item + val id = item.id + val content = item.content + val text = content?.joinToString(separator = "") { it.text ?: "" } ?: "" + if (text.isNotBlank()) { + if (debugLogConversation) { + Log.w(TAG, "onServerEventConversationItemCreated: conversationItems.add(ConversationItem(id=${quote(id)}, initialText=${quote(text)}") + } + _conversationItems.add( + ConversationItem( + id = id, + speaker = ConversationSpeaker.Local, + initialText = text + ) + ) + } listeners.forEach { it.onServerEventConversationItemCreated(realtimeServerEventConversationItemCreated) } } override fun onServerEventConversationItemDeleted(realtimeServerEventConversationItemDeleted: RealtimeServerEventConversationItemDeleted) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventConversationItemDeleted($realtimeServerEventConversationItemDeleted)") + } listeners.forEach { it.onServerEventConversationItemDeleted(realtimeServerEventConversationItemDeleted) } } - override fun onServerEventConversationItemInputAudioTranscriptionCompleted( - realtimeServerEventConversationItemInputAudioTranscriptionCompleted: RealtimeServerEventConversationItemInputAudioTranscriptionCompleted - ) { + override fun onServerEventConversationItemInputAudioTranscriptionCompleted(realtimeServerEventConversationItemInputAudioTranscriptionCompleted: RealtimeServerEventConversationItemInputAudioTranscriptionCompleted) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventConversationItemInputAudioTranscriptionCompleted($realtimeServerEventConversationItemInputAudioTranscriptionCompleted)") + } + val id = realtimeServerEventConversationItemInputAudioTranscriptionCompleted.itemId + val transcript = + realtimeServerEventConversationItemInputAudioTranscriptionCompleted.transcript.trim() // DO TRIM! + if (transcript.isNotBlank()) { + if (debugLogConversation) { + Log.w(TAG, "onServerEventConversationItemInputAudioTranscriptionCompleted: conversationItems.add(ConversationItem(id=${quote(id)}, initialText=${quote(transcript)}") + } + _conversationItems.add( + ConversationItem( + id = id, + speaker = ConversationSpeaker.Local, + initialText = transcript + ) + ) + } listeners.forEach { it.onServerEventConversationItemInputAudioTranscriptionCompleted(realtimeServerEventConversationItemInputAudioTranscriptionCompleted) } } - override fun onServerEventConversationItemInputAudioTranscriptionFailed( - realtimeServerEventConversationItemInputAudioTranscriptionFailed: RealtimeServerEventConversationItemInputAudioTranscriptionFailed - ) { + override fun onServerEventConversationItemInputAudioTranscriptionFailed(realtimeServerEventConversationItemInputAudioTranscriptionFailed: RealtimeServerEventConversationItemInputAudioTranscriptionFailed) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventConversationItemInputAudioTranscriptionFailed($realtimeServerEventConversationItemInputAudioTranscriptionFailed)") + } listeners.forEach { it.onServerEventConversationItemInputAudioTranscriptionFailed(realtimeServerEventConversationItemInputAudioTranscriptionFailed) } } - override fun onServerEventConversationItemTruncated( - realtimeServerEventConversationItemTruncated: RealtimeServerEventConversationItemTruncated - ) { + override fun onServerEventConversationItemTruncated(realtimeServerEventConversationItemTruncated: RealtimeServerEventConversationItemTruncated) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventConversationItemTruncated($realtimeServerEventConversationItemTruncated)") + } listeners.forEach { it.onServerEventConversationItemTruncated(realtimeServerEventConversationItemTruncated) } @@ -448,25 +763,19 @@ class MobileViewModel(application: Application) : } } - override fun onServerEventInputAudioBufferCommitted( - realtimeServerEventInputAudioBufferCommitted: RealtimeServerEventInputAudioBufferCommitted - ) { + override fun onServerEventInputAudioBufferCommitted(realtimeServerEventInputAudioBufferCommitted: RealtimeServerEventInputAudioBufferCommitted) { listeners.forEach { it.onServerEventInputAudioBufferCommitted(realtimeServerEventInputAudioBufferCommitted) } } - override fun onServerEventInputAudioBufferSpeechStarted( - realtimeServerEventInputAudioBufferSpeechStarted: RealtimeServerEventInputAudioBufferSpeechStarted - ) { + override fun onServerEventInputAudioBufferSpeechStarted(realtimeServerEventInputAudioBufferSpeechStarted: RealtimeServerEventInputAudioBufferSpeechStarted) { listeners.forEach { it.onServerEventInputAudioBufferSpeechStarted(realtimeServerEventInputAudioBufferSpeechStarted) } } - override fun onServerEventInputAudioBufferSpeechStopped( - realtimeServerEventInputAudioBufferSpeechStopped: RealtimeServerEventInputAudioBufferSpeechStopped - ) { + override fun onServerEventInputAudioBufferSpeechStopped(realtimeServerEventInputAudioBufferSpeechStopped: RealtimeServerEventInputAudioBufferSpeechStopped) { listeners.forEach { it.onServerEventInputAudioBufferSpeechStopped(realtimeServerEventInputAudioBufferSpeechStopped) } @@ -496,43 +805,101 @@ class MobileViewModel(application: Application) : } } - override fun onServerEventResponseAudioTranscriptDelta( - realtimeServerEventResponseAudioTranscriptDelta: RealtimeServerEventResponseAudioTranscriptDelta - ) { + override fun onServerEventResponseAudioTranscriptDelta(realtimeServerEventResponseAudioTranscriptDelta: RealtimeServerEventResponseAudioTranscriptDelta) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventResponseAudioTranscriptDelta($realtimeServerEventResponseAudioTranscriptDelta)") + } + val id = realtimeServerEventResponseAudioTranscriptDelta.itemId + val delta = + realtimeServerEventResponseAudioTranscriptDelta.delta // DO **NOT** TRIM! + + // Append this delta to any current in progress conversation, + // or create a new conversation + val index = _conversationItems.indexOfFirst { it.id == id } + if (debugLogConversation) { + Log.w(TAG, "onServerEventResponseAudioTranscriptDelta: id=$id, delta=$delta, index=$index") + } + if (index == -1) { + val conversationItem = ConversationItem( + id = id, + speaker = ConversationSpeaker.Remote, + initialText = delta, + ) + if (debugLogConversation) { + Log.w(TAG, "conversationItems.add($conversationItem)") + } + _conversationItems.add(conversationItem) + } else { + val conversationItem = _conversationItems[index] + conversationItem.text += delta + if (debugLogConversation) { + Log.w(TAG, "conversationItems.set($index, $conversationItem)") + } + _conversationItems[index] = conversationItem + } listeners.forEach { it.onServerEventResponseAudioTranscriptDelta(realtimeServerEventResponseAudioTranscriptDelta) } } - override fun onServerEventResponseAudioTranscriptDone( - realtimeServerEventResponseAudioTranscriptDone: RealtimeServerEventResponseAudioTranscriptDone - ) { + override fun onServerEventResponseAudioTranscriptDone(realtimeServerEventResponseAudioTranscriptDone: RealtimeServerEventResponseAudioTranscriptDone) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventResponseAudioTranscriptDone($realtimeServerEventResponseAudioTranscriptDone)") + } listeners.forEach { it.onServerEventResponseAudioTranscriptDone(realtimeServerEventResponseAudioTranscriptDone) } } - override fun onServerEventResponseContentPartAdded( - realtimeServerEventResponseContentPartAdded: RealtimeServerEventResponseContentPartAdded - ) { + override fun onServerEventResponseContentPartAdded(realtimeServerEventResponseContentPartAdded: RealtimeServerEventResponseContentPartAdded) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventResponseContentPartAdded($realtimeServerEventResponseContentPartAdded)") + } listeners.forEach { it.onServerEventResponseContentPartAdded(realtimeServerEventResponseContentPartAdded) } } override fun onServerEventResponseContentPartDone(realtimeServerEventResponseContentPartDone: RealtimeServerEventResponseContentPartDone) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventResponseContentPartDone($realtimeServerEventResponseContentPartDone)") + } listeners.forEach { it.onServerEventResponseContentPartDone(realtimeServerEventResponseContentPartDone) } } override fun onServerEventResponseCreated(realtimeServerEventResponseCreated: RealtimeServerEventResponseCreated) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventResponseCreated($realtimeServerEventResponseCreated)") + } listeners.forEach { it.onServerEventResponseCreated(realtimeServerEventResponseCreated) } } override fun onServerEventResponseDone(realtimeServerEventResponseDone: RealtimeServerEventResponseDone) { + if (debugLogConversation) { + Log.d(TAG, "onServerEventResponseDone($realtimeServerEventResponseDone)") + } + realtimeServerEventResponseDone.response.output?.forEach { outputConversationItem -> + if (debugLogConversation) { + Log.w(TAG, "onServerEventResponseDone: outputConversationItem=$outputConversationItem") + } + val id = outputConversationItem.id ?: return@forEach + val index = _conversationItems.indexOfFirst { it.id == id } + if (index != -1) { + val conversationItem = _conversationItems[index] + if (debugLogConversation) { + Log.w(TAG, "onServerEventResponseDone: removing $conversationItem at index=$index") + } + _conversationItems.removeAt(index) + if (debugLogConversation) { + Log.w(TAG, "onServerEventResponseDone: adding $conversationItem at end") + } + _conversationItems.add(conversationItem) + } + } listeners.forEach { it.onServerEventResponseDone(realtimeServerEventResponseDone) } @@ -591,6 +958,11 @@ class MobileViewModel(application: Application) : } override fun onServerEventSessionExpired(realtimeServerEventError: RealtimeServerEventError) { + if (hasNoInUseActivities) { + val message = realtimeServerEventError.error.message + showNotificationSessionExpired(message) + } + listeners.forEach { it.onServerEventSessionExpired(realtimeServerEventError) } diff --git a/mobile/src/main/res/drawable/alfredai_24.xml b/mobile/src/main/res/drawable/alfredai_24.xml new file mode 100644 index 0000000..0be13d3 --- /dev/null +++ b/mobile/src/main/res/drawable/alfredai_24.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/shared/src/main/java/com/swooby/alfredai/SharedViewModel.kt b/shared/src/main/java/com/swooby/alfredai/SharedViewModel.kt index e9069ef..2d321d5 100644 --- a/shared/src/main/java/com/swooby/alfredai/SharedViewModel.kt +++ b/shared/src/main/java/com/swooby/alfredai/SharedViewModel.kt @@ -46,11 +46,13 @@ abstract class SharedViewModel(application: Application) : } open fun init() { + Log.d(TAG, "init()") messageClient.addListener(messageClientListener) searchForRemoteAppNode() } open fun close() { + Log.d(TAG, "close()") messageClient.removeListener(messageClientListener) } diff --git a/shared/src/main/java/com/swooby/alfredai/Utils.kt b/shared/src/main/java/com/swooby/alfredai/Utils.kt index b64caaf..a88e2c1 100644 --- a/shared/src/main/java/com/swooby/alfredai/Utils.kt +++ b/shared/src/main/java/com/swooby/alfredai/Utils.kt @@ -119,6 +119,13 @@ object Utils { android.Manifest.permission.BLUETOOTH_SCAN, -> "Nearby devices" + android.Manifest.permission.POST_NOTIFICATIONS, + android.Manifest.permission.FOREGROUND_SERVICE, + //android.Manifest.permission.FOREGROUND_SERVICE_MICROPHONE, + //android.Manifest.permission.FOREGROUND_SERVICE_REMOTE_MESSAGING, + android.Manifest.permission.FOREGROUND_SERVICE_SPECIAL_USE, + -> "Notifications" + //... else diff --git a/shared/src/main/java/com/swooby/alfredai/openai/realtime/RealtimeClient.kt b/shared/src/main/java/com/swooby/alfredai/openai/realtime/RealtimeClient.kt index fe4f0d7..42eb747 100644 --- a/shared/src/main/java/com/swooby/alfredai/openai/realtime/RealtimeClient.kt +++ b/shared/src/main/java/com/swooby/alfredai/openai/realtime/RealtimeClient.kt @@ -1,8 +1,6 @@ package com.swooby.alfredai.openai.realtime import android.content.Context -import android.media.AudioDeviceInfo -import android.media.AudioManager import com.openai.apis.RealtimeApi import com.openai.infrastructure.ApiClient import com.openai.infrastructure.ClientException @@ -87,11 +85,19 @@ import java.nio.ByteBuffer class RealtimeClient(private val applicationContext: Context, private val dangerousApiKey: String, private var sessionConfig: RealtimeSessionCreateRequest, - httpClient: OkHttpClient = httpLoggingClient, + httpClient: OkHttpClient = ApiClient.defaultClient, private val debug:Boolean = false) { companion object { private val log = RealtimeLog(RealtimeClient::class) + @Suppress( + "SimplifyBooleanWithConstants", + "KotlinConstantConditions", + ) + private val debugInduceMysteriousUnknownHostException = BuildConfig.DEBUG && false + + private const val MYSTERIOUS_UNKNOWN_HOST_EXCEPTION_MESSAGE = "Unable to resolve host \"api.openai.com\": No address associated with hostname" + val httpLoggingClient = OkHttpClient.Builder() .addInterceptor(HttpLoggingInterceptor().apply { level = HttpLoggingInterceptor.Level.BODY @@ -119,6 +125,8 @@ class RealtimeClient(private val applicationContext: Context, _isConnectingOrConnected = value if (value) { notifyConnecting() + } else { + notifyDisconnected() } } } @@ -191,11 +199,8 @@ class RealtimeClient(private val applicationContext: Context, * https://platform.openai.com/docs/api-reference/realtime-sessions/create */ val realtimeSessionCreateResponse: RealtimeSessionCreateResponse? = try { - @Suppress("SimplifyBooleanWithConstants", "KotlinConstantConditions") - val debugInduceError = BuildConfig.DEBUG && false - @Suppress("KotlinConstantConditions") - if (debugInduceError) { - throw UnknownHostException("Unable to resolve host \"api.openai.com\": No address associated with hostname") + if (debugInduceMysteriousUnknownHostException) { + throw UnknownHostException(MYSTERIOUS_UNKNOWN_HOST_EXCEPTION_MESSAGE) } realtime.createRealtimeSession(sessionConfig) } catch (exception: Exception) { @@ -307,19 +312,7 @@ class RealtimeClient(private val applicationContext: Context, } ) ?: throw IllegalStateException("Failed to create PeerConnection") - // - //region Audio - // - // TODO: abstract this out and allow routing of audio to different audio devices, - // especially bluetooth headset/earplugs/headphones - // TODO: For debugging purposes, is there a way to hear/echo the captured microphone audio? - // Closest thing I can find is to enable session input_audio_transcription, - // but that costs more money! :/ setLocalAudioMicrophone(peerConnectionFactory) - //setLocalAudioSpeaker(applicationContext) - // - //endregion - // val dataChannelInit = DataChannel.Init() val logDc = RealtimeLog(DataChannel::class) @@ -485,23 +478,10 @@ class RealtimeClient(private val applicationContext: Context, localAudioTrackMicrophoneSender = peerConnection?.addTrack(localAudioTrackMicrophone) } - private fun getSpeakerphone(audioManager: AudioManager): AudioDeviceInfo? { - val availableDevices = audioManager.availableCommunicationDevices - return availableDevices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_SPEAKER } - } - - private fun setLocalAudioSpeaker(context: Context) { - val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager - audioManager.mode = AudioManager.MODE_IN_COMMUNICATION - getSpeakerphone(audioManager)?.also { - audioManager.setCommunicationDevice(it) - } - } - /** * Essentially unmute or mute the speaker */ - fun setLocalAudioTrackSpeakerEnabled(enabled: Boolean) { + private fun setLocalAudioTrackSpeakerEnabled(enabled: Boolean) { log.d("setLocalAudioTrackSpeakerEnabled($enabled)") remoteAudioTracks.forEach { it.setEnabled(enabled)