diff --git a/README.md b/README.md
index e35216d..1b29cf4 100644
--- a/README.md
+++ b/README.md
@@ -25,15 +25,15 @@ OpenAI Realtime API over WebRTC Push-To-Talk Android Phone[/Mobile] + Watch[/Wea
## TODOs
(Not necessarily in any order)
-1. Background service (easy to do wrong; I want to get the UX right)
-2. Localize strings (I committed the sin of hard coding strings)
-3. Tests (another sin I committed)
-4. Get `Stop` working better
-5. **Standalone** `Wear` version (lower priority; requires adding tiles for settings, conversation, etc)
-6. Add `text` input/output feature
-7. Learn Tool/Function integration
-8. Find way to integrate with Gmail/Tasks/Keep/etc
-9. Find way to save conversations to https://chatgpt.com/ history
+1. Localize strings (I committed the sin of hard coding strings)
+2. Tests (another sin I committed)
+3. Get `Stop` working better
+4. **Standalone** `Wear` version (lower priority; requires adding tiles for settings, conversation, etc)
+5. Add `text` input/output feature
+6. Learn Tool/Function integration
+7. Find way to integrate with Gmail/Tasks/Keep/etc
+8. Find way to save conversations to https://chatgpt.com/ history
+9. Implement a `VoiceInteractionService`? https://developer.android.com/reference/android/service/voice/VoiceInteractionService
## Development
* If Mobile or Wear physical device wireless debugging does not connect in Android Studio:
diff --git a/mobile/src/main/AndroidManifest.xml b/mobile/src/main/AndroidManifest.xml
index 9fd85f6..e4eb9f3 100644
--- a/mobile/src/main/AndroidManifest.xml
+++ b/mobile/src/main/AndroidManifest.xml
@@ -14,6 +14,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/mobile/src/main/java/com/swooby/alfredai/AlfredAiApp.kt b/mobile/src/main/java/com/swooby/alfredai/AlfredAiApp.kt
index aaa56ea..379e55d 100644
--- a/mobile/src/main/java/com/swooby/alfredai/AlfredAiApp.kt
+++ b/mobile/src/main/java/com/swooby/alfredai/AlfredAiApp.kt
@@ -9,6 +9,7 @@ import androidx.lifecycle.ViewModelProvider
import androidx.lifecycle.ViewModelStore
import androidx.lifecycle.ViewModelStoreOwner
+/*
@MainThread
inline fun ComponentActivity.appViewModels(): Lazy {
return ViewModelLazy(
@@ -18,6 +19,7 @@ inline fun ComponentActivity.appViewModels(): Lazy
{ defaultViewModelCreationExtras }
)
}
+*/
class AlfredAiApp : Application(), ViewModelStoreOwner
{
@@ -25,8 +27,8 @@ class AlfredAiApp : Application(), ViewModelStoreOwner
val mobileViewModel by lazy {
ViewModelProvider(
- this,
- ViewModelProvider.AndroidViewModelFactory.getInstance(this)
+ owner = this,
+ factory = ViewModelProvider.AndroidViewModelFactory.getInstance(this)
)[MobileViewModel::class.java]
}
diff --git a/mobile/src/main/java/com/swooby/alfredai/MobileActivity.kt b/mobile/src/main/java/com/swooby/alfredai/MobileActivity.kt
index bb85263..f9e0506 100644
--- a/mobile/src/main/java/com/swooby/alfredai/MobileActivity.kt
+++ b/mobile/src/main/java/com/swooby/alfredai/MobileActivity.kt
@@ -136,10 +136,10 @@ class MobileActivity : ComponentActivity() {
private const val TAG = "PushToTalkActivity"
}
- private val mobileViewModel: MobileViewModel by appViewModels()
+ private val mobileViewModel by lazy { (application as AlfredAiApp).mobileViewModel }
override fun onCreate(savedInstanceState: Bundle?) {
- Log.d(TAG, "onCreate()")
+ Log.d(TAG, "onCreate(savedInstanceState=$savedInstanceState)")
super.onCreate(savedInstanceState)
enableEdgeToEdge()
setContent {
@@ -147,18 +147,15 @@ class MobileActivity : ComponentActivity() {
}
}
- override fun onDestroy() {
- Log.d(TAG, "onDestroy()")
- super.onDestroy()
-
- // Temporary, until Background/Foreground Service is implemented
- mobileViewModel.realtimeClient?.disconnect()
+ override fun onStart() {
+ super.onStart()
+ mobileViewModel.onStartOrResume(this)
}
-}
-enum class ConversationSpeaker {
- Local,
- Remote,
+ override fun onStop() {
+ super.onStop()
+ mobileViewModel.onPauseOrStop(this)
+ }
}
@OptIn(ExperimentalMaterial3Api::class, ExperimentalFoundationApi::class)
@@ -185,12 +182,6 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) {
)
val debugConnectDelayMillis = if (BuildConfig.DEBUG && false) 10_000L else 0L
- @Suppress(
- "SimplifyBooleanWithConstants",
- //"KotlinConstantConditions",
- )
- val debugLogConversation = BuildConfig.DEBUG && true
-
@Suppress(
"SimplifyBooleanWithConstants",
"KotlinConstantConditions",
@@ -201,37 +192,35 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) {
//region Conversation
//
- data class ConversationItem(
- val id: String?,
- val speaker: ConversationSpeaker,
- val initialText: String
- ) {
- var text by mutableStateOf(initialText)
- }
-
- val conversationItems = remember { mutableStateListOf() }
- @Suppress(
- "SimplifyBooleanWithConstants",
- "KotlinConstantConditions",
- )
- if (BuildConfig.DEBUG && false) {
- fun generateRandomSentence(): String {
- val subjects = listOf("The cat", "The dog", "The bird", "The fish")
- val verbs = listOf("jumps", "runs", "flies", "swims")
- val objects = listOf("over the fence", "in the park", "through the air", "in the water")
- return "${subjects.random()} ${verbs.random()} ${objects.random()}."
- }
+ fun mockConversationItems(): List {
+ val conversationItems = mutableListOf()
+ @Suppress(
+ "SimplifyBooleanWithConstants",
+ "KotlinConstantConditions",
+ )
+ if (BuildConfig.DEBUG && false) {
+ fun generateRandomSentence(): String {
+ val subjects = listOf("The cat", "The dog", "The bird", "The fish")
+ val verbs = listOf("jumps", "runs", "flies", "swims")
+ val objects = listOf("over the fence", "in the park", "through the air", "in the water")
+ return "${subjects.random()} ${verbs.random()} ${objects.random()}."
+ }
- for (i in 0..20) {
- conversationItems.add(
- ConversationItem(
- id = "$i",
- speaker = ConversationSpeaker.entries.random(),
- initialText = generateRandomSentence()
+ for (i in 0..20) {
+ conversationItems.add(
+ MobileViewModel.ConversationItem(
+ id = "$i",
+ speaker = MobileViewModel.ConversationSpeaker.entries.random(),
+ initialText = generateRandomSentence()
+ )
)
- )
+ }
}
+ return conversationItems
}
+
+ val conversationItems = mobileViewModel?.conversationItems ?: mockConversationItems()
+
val conversationListState = rememberLazyListState()
LaunchedEffect(Unit) {
snapshotFlow { conversationItems.lastOrNull()?.text }
@@ -281,7 +270,7 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) {
isConnectSwitchOn = true
if (!isConnectingOrConnected) {
isConnectingOrConnected = true
- conversationItems.clear()
+ mobileViewModel.conversationItemsClear()
jobConnect = CoroutineScope(Dispatchers.IO).launch {
if (debugConnectDelayMillis > 0) {
delay(debugConnectDelayMillis)
@@ -350,6 +339,10 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) {
Manifest.permission.BLUETOOTH,
Manifest.permission.BLUETOOTH_CONNECT,
Manifest.permission.BLUETOOTH_SCAN,
+ Manifest.permission.FOREGROUND_SERVICE,
+ //Manifest.permission.FOREGROUND_SERVICE_MICROPHONE,
+ //Manifest.permission.FOREGROUND_SERVICE_REMOTE_MESSAGING,
+ Manifest.permission.FOREGROUND_SERVICE_SPECIAL_USE,
)
}
@@ -522,88 +515,25 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) {
return false
}
- override fun onServerEventConversationCreated(
- realtimeServerEventConversationCreated: RealtimeServerEventConversationCreated
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventConversationCreated($realtimeServerEventConversationCreated)")
- }
+ override fun onServerEventConversationCreated(realtimeServerEventConversationCreated: RealtimeServerEventConversationCreated) {
}
- override fun onServerEventConversationItemCreated(
- realtimeServerEventConversationItemCreated: RealtimeServerEventConversationItemCreated
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventConversationItemCreated($realtimeServerEventConversationItemCreated)")
- }
- val item = realtimeServerEventConversationItemCreated.item
- val id = item.id
- val content = item.content
- val text = content?.joinToString(separator = "") { it.text ?: "" } ?: ""
- if (text.isNotBlank()) {
- if (debugLogConversation) {
- Log.w(TAG, "onServerEventConversationItemCreated: conversationItems.add(ConversationItem(id=${quote(id)}, initialText=${quote(text)}")
- }
- conversationItems.add(
- ConversationItem(
- id = id,
- speaker = ConversationSpeaker.Local,
- initialText = text
- )
- )
- }
+ override fun onServerEventConversationItemCreated(realtimeServerEventConversationItemCreated: RealtimeServerEventConversationItemCreated) {
}
- override fun onServerEventConversationItemDeleted(
- realtimeServerEventConversationItemDeleted: RealtimeServerEventConversationItemDeleted
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventConversationItemDeleted($realtimeServerEventConversationItemDeleted)")
- }
+ override fun onServerEventConversationItemDeleted(realtimeServerEventConversationItemDeleted: RealtimeServerEventConversationItemDeleted) {
}
- override fun onServerEventConversationItemInputAudioTranscriptionCompleted(
- realtimeServerEventConversationItemInputAudioTranscriptionCompleted: RealtimeServerEventConversationItemInputAudioTranscriptionCompleted
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventConversationItemInputAudioTranscriptionCompleted($realtimeServerEventConversationItemInputAudioTranscriptionCompleted)")
- }
- val id = realtimeServerEventConversationItemInputAudioTranscriptionCompleted.itemId
- val transcript =
- realtimeServerEventConversationItemInputAudioTranscriptionCompleted.transcript.trim() // DO TRIM!
- if (transcript.isNotBlank()) {
- if (debugLogConversation) {
- Log.w(TAG, "onServerEventConversationItemInputAudioTranscriptionCompleted: conversationItems.add(ConversationItem(id=${quote(id)}, initialText=${quote(transcript)}")
- }
- conversationItems.add(
- ConversationItem(
- id = id,
- speaker = ConversationSpeaker.Local,
- initialText = transcript
- )
- )
- }
+ override fun onServerEventConversationItemInputAudioTranscriptionCompleted(realtimeServerEventConversationItemInputAudioTranscriptionCompleted: RealtimeServerEventConversationItemInputAudioTranscriptionCompleted) {
}
- override fun onServerEventConversationItemInputAudioTranscriptionFailed(
- realtimeServerEventConversationItemInputAudioTranscriptionFailed: RealtimeServerEventConversationItemInputAudioTranscriptionFailed
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventConversationItemInputAudioTranscriptionFailed($realtimeServerEventConversationItemInputAudioTranscriptionFailed)")
- }
+ override fun onServerEventConversationItemInputAudioTranscriptionFailed(realtimeServerEventConversationItemInputAudioTranscriptionFailed: RealtimeServerEventConversationItemInputAudioTranscriptionFailed) {
}
- override fun onServerEventConversationItemTruncated(
- realtimeServerEventConversationItemTruncated: RealtimeServerEventConversationItemTruncated
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventConversationItemTruncated($realtimeServerEventConversationItemTruncated)")
- }
+ override fun onServerEventConversationItemTruncated(realtimeServerEventConversationItemTruncated: RealtimeServerEventConversationItemTruncated) {
}
- override fun onServerEventError(
- realtimeServerEventError: RealtimeServerEventError
- ) {
+ override fun onServerEventError(realtimeServerEventError: RealtimeServerEventError) {
Log.d(TAG, "onServerEventError($realtimeServerEventError)")
val error = realtimeServerEventError.error
val text = error.message
@@ -615,27 +545,19 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) {
)
}
- override fun onServerEventInputAudioBufferCleared(
- realtimeServerEventInputAudioBufferCleared: RealtimeServerEventInputAudioBufferCleared
- ) {
+ override fun onServerEventInputAudioBufferCleared(realtimeServerEventInputAudioBufferCleared: RealtimeServerEventInputAudioBufferCleared) {
Log.d(TAG, "onServerEventInputAudioBufferCleared($realtimeServerEventInputAudioBufferCleared)")
}
- override fun onServerEventInputAudioBufferCommitted(
- realtimeServerEventInputAudioBufferCommitted: RealtimeServerEventInputAudioBufferCommitted
- ) {
+ override fun onServerEventInputAudioBufferCommitted(realtimeServerEventInputAudioBufferCommitted: RealtimeServerEventInputAudioBufferCommitted) {
Log.d(TAG, "onServerEventInputAudioBufferCommitted($realtimeServerEventInputAudioBufferCommitted)")
}
- override fun onServerEventInputAudioBufferSpeechStarted(
- realtimeServerEventInputAudioBufferSpeechStarted: RealtimeServerEventInputAudioBufferSpeechStarted
- ) {
+ override fun onServerEventInputAudioBufferSpeechStarted(realtimeServerEventInputAudioBufferSpeechStarted: RealtimeServerEventInputAudioBufferSpeechStarted) {
Log.d(TAG, "onServerEventInputAudioBufferSpeechStarted($realtimeServerEventInputAudioBufferSpeechStarted)")
}
- override fun onServerEventInputAudioBufferSpeechStopped(
- realtimeServerEventInputAudioBufferSpeechStopped: RealtimeServerEventInputAudioBufferSpeechStopped
- ) {
+ override fun onServerEventInputAudioBufferSpeechStopped(realtimeServerEventInputAudioBufferSpeechStopped: RealtimeServerEventInputAudioBufferSpeechStopped) {
Log.d(TAG, "onServerEventInputAudioBufferSpeechStopped($realtimeServerEventInputAudioBufferSpeechStopped)")
}
@@ -652,166 +574,68 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) {
}
}
- override fun onServerEventRateLimitsUpdated(
- realtimeServerEventRateLimitsUpdated: RealtimeServerEventRateLimitsUpdated
- ) {
+ override fun onServerEventRateLimitsUpdated(realtimeServerEventRateLimitsUpdated: RealtimeServerEventRateLimitsUpdated) {
Log.d(TAG, "onServerEventRateLimitsUpdated($realtimeServerEventRateLimitsUpdated)")
}
- override fun onServerEventResponseAudioDelta(
- realtimeServerEventResponseAudioDelta: RealtimeServerEventResponseAudioDelta
- ) {
+ override fun onServerEventResponseAudioDelta(realtimeServerEventResponseAudioDelta: RealtimeServerEventResponseAudioDelta) {
Log.d(TAG, "onServerEventResponseAudioDelta($realtimeServerEventResponseAudioDelta)")
}
- override fun onServerEventResponseAudioDone(
- realtimeServerEventResponseAudioDone: RealtimeServerEventResponseAudioDone
- ) {
+ override fun onServerEventResponseAudioDone(realtimeServerEventResponseAudioDone: RealtimeServerEventResponseAudioDone) {
Log.d(TAG, "onServerEventResponseAudioDone($realtimeServerEventResponseAudioDone)")
}
- override fun onServerEventResponseAudioTranscriptDelta(
- realtimeServerEventResponseAudioTranscriptDelta: RealtimeServerEventResponseAudioTranscriptDelta
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventResponseAudioTranscriptDelta($realtimeServerEventResponseAudioTranscriptDelta)")
- }
- val id = realtimeServerEventResponseAudioTranscriptDelta.itemId
- val delta =
- realtimeServerEventResponseAudioTranscriptDelta.delta // DO **NOT** TRIM!
-
- // Append this delta to any current in progress conversation,
- // or create a new conversation
- val index = conversationItems.indexOfFirst { it.id == id }
- if (debugLogConversation) {
- Log.w(TAG, "onServerEventResponseAudioTranscriptDelta: id=$id, delta=$delta, index=$index")
- }
- if (index == -1) {
- val conversationItem = ConversationItem(
- id = id,
- speaker = ConversationSpeaker.Remote,
- initialText = delta,
- )
- if (debugLogConversation) {
- Log.w(TAG, "conversationItems.add($conversationItem)")
- }
- conversationItems.add(conversationItem)
- } else {
- val conversationItem = conversationItems[index]
- conversationItem.text += delta
- if (debugLogConversation) {
- Log.w(TAG, "conversationItems.set($index, $conversationItem)")
- }
- conversationItems[index] = conversationItem
- }
+ override fun onServerEventResponseAudioTranscriptDelta(realtimeServerEventResponseAudioTranscriptDelta: RealtimeServerEventResponseAudioTranscriptDelta) {
}
- override fun onServerEventResponseAudioTranscriptDone(
- realtimeServerEventResponseAudioTranscriptDone: RealtimeServerEventResponseAudioTranscriptDone
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventResponseAudioTranscriptDone($realtimeServerEventResponseAudioTranscriptDone)")
- }
+ override fun onServerEventResponseAudioTranscriptDone(realtimeServerEventResponseAudioTranscriptDone: RealtimeServerEventResponseAudioTranscriptDone) {
}
- override fun onServerEventResponseContentPartAdded(
- realtimeServerEventResponseContentPartAdded: RealtimeServerEventResponseContentPartAdded
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventResponseContentPartAdded($realtimeServerEventResponseContentPartAdded)")
- }
+ override fun onServerEventResponseContentPartAdded(realtimeServerEventResponseContentPartAdded: RealtimeServerEventResponseContentPartAdded) {
}
- override fun onServerEventResponseContentPartDone(
- realtimeServerEventResponseContentPartDone: RealtimeServerEventResponseContentPartDone
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventResponseContentPartDone($realtimeServerEventResponseContentPartDone)")
- }
+ override fun onServerEventResponseContentPartDone(realtimeServerEventResponseContentPartDone: RealtimeServerEventResponseContentPartDone) {
}
- override fun onServerEventResponseCreated(
- realtimeServerEventResponseCreated: RealtimeServerEventResponseCreated
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventResponseCreated($realtimeServerEventResponseCreated)")
- }
+ override fun onServerEventResponseCreated(realtimeServerEventResponseCreated: RealtimeServerEventResponseCreated) {
}
- override fun onServerEventResponseDone(
- realtimeServerEventResponseDone: RealtimeServerEventResponseDone
- ) {
- if (debugLogConversation) {
- Log.d(TAG, "onServerEventResponseDone($realtimeServerEventResponseDone)")
- }
- realtimeServerEventResponseDone.response.output?.forEach { outputConversationItem ->
- if (debugLogConversation) {
- Log.w(TAG, "onServerEventResponseDone: outputConversationItem=$outputConversationItem")
- }
- val id = outputConversationItem.id ?: return@forEach
- val index = conversationItems.indexOfFirst { it.id == id }
- if (index != -1) {
- val conversationItem = conversationItems[index]
- if (debugLogConversation) {
- Log.w(TAG, "onServerEventResponseDone: removing $conversationItem at index=$index")
- }
- conversationItems.removeAt(index)
- if (debugLogConversation) {
- Log.w(TAG, "onServerEventResponseDone: adding $conversationItem at end")
- }
- conversationItems.add(conversationItem)
- }
- }
+ override fun onServerEventResponseDone(realtimeServerEventResponseDone: RealtimeServerEventResponseDone) {
}
- override fun onServerEventResponseFunctionCallArgumentsDelta(
- realtimeServerEventResponseFunctionCallArgumentsDelta: RealtimeServerEventResponseFunctionCallArgumentsDelta
- ) {
+ override fun onServerEventResponseFunctionCallArgumentsDelta(realtimeServerEventResponseFunctionCallArgumentsDelta: RealtimeServerEventResponseFunctionCallArgumentsDelta) {
Log.d(TAG, "onServerEventResponseFunctionCallArgumentsDelta($realtimeServerEventResponseFunctionCallArgumentsDelta)")
}
- override fun onServerEventResponseFunctionCallArgumentsDone(
- realtimeServerEventResponseFunctionCallArgumentsDone: RealtimeServerEventResponseFunctionCallArgumentsDone
- ) {
+ override fun onServerEventResponseFunctionCallArgumentsDone(realtimeServerEventResponseFunctionCallArgumentsDone: RealtimeServerEventResponseFunctionCallArgumentsDone) {
Log.d(TAG, "onServerEventResponseFunctionCallArgumentsDone($realtimeServerEventResponseFunctionCallArgumentsDone)")
}
- override fun onServerEventResponseOutputItemAdded(
- realtimeServerEventResponseOutputItemAdded: RealtimeServerEventResponseOutputItemAdded
- ) {
+ override fun onServerEventResponseOutputItemAdded(realtimeServerEventResponseOutputItemAdded: RealtimeServerEventResponseOutputItemAdded) {
Log.d(TAG, "onServerEventResponseOutputItemAdded($realtimeServerEventResponseOutputItemAdded)")
}
- override fun onServerEventResponseOutputItemDone(
- realtimeServerEventResponseOutputItemDone: RealtimeServerEventResponseOutputItemDone
- ) {
+ override fun onServerEventResponseOutputItemDone(realtimeServerEventResponseOutputItemDone: RealtimeServerEventResponseOutputItemDone) {
Log.d(TAG, "onServerEventResponseOutputItemDone($realtimeServerEventResponseOutputItemDone)")
}
- override fun onServerEventResponseTextDelta(
- realtimeServerEventResponseTextDelta: RealtimeServerEventResponseTextDelta
- ) {
+ override fun onServerEventResponseTextDelta(realtimeServerEventResponseTextDelta: RealtimeServerEventResponseTextDelta) {
Log.d(TAG, "onServerEventResponseTextDelta($realtimeServerEventResponseTextDelta)")
}
- override fun onServerEventResponseTextDone(
- realtimeServerEventResponseTextDone: RealtimeServerEventResponseTextDone
- ) {
+ override fun onServerEventResponseTextDone(realtimeServerEventResponseTextDone: RealtimeServerEventResponseTextDone) {
Log.d(TAG, "onServerEventResponseTextDone($realtimeServerEventResponseTextDone)")
}
- override fun onServerEventSessionCreated(
- realtimeServerEventSessionCreated: RealtimeServerEventSessionCreated
- ) {
+ override fun onServerEventSessionCreated(realtimeServerEventSessionCreated: RealtimeServerEventSessionCreated) {
Log.d(TAG, "onServerEventSessionCreated($realtimeServerEventSessionCreated)")
if (debugToastVerbose) {
showToast(context = context, text = "Session Created", forceInvokeOnMain = true)
}
}
- override fun onServerEventSessionUpdated(
- realtimeServerEventSessionUpdated: RealtimeServerEventSessionUpdated
- ) {
+ override fun onServerEventSessionUpdated(realtimeServerEventSessionUpdated: RealtimeServerEventSessionUpdated) {
Log.d(TAG, "onServerEventSessionUpdated($realtimeServerEventSessionUpdated)")
if (debugToastVerbose) {
showToast(context = context, text = "Session Updated", forceInvokeOnMain = true)
@@ -957,86 +781,106 @@ fun MobileApp(mobileViewModel: MobileViewModel? = null) {
modifier = Modifier.padding(start = 4.dp),
text = "Conversation:",
)
- IconButton(onClick = { conversationItems.clear() }) {
+ IconButton(onClick = { mobileViewModel?.conversationItemsClear() }) {
Icon(
painterResource(id = R.drawable.baseline_clear_all_24),
contentDescription = "Clear All",
)
}
}
- LazyColumn(
- modifier = Modifier
- .border(
- 1.dp,
- Color.LightGray,
- shape = RoundedCornerShape(8.dp)
+ if (conversationItems.isEmpty()) {
+ Box(
+ modifier = Modifier
+ .border(
+ 1.dp,
+ Color.LightGray,
+ shape = RoundedCornerShape(8.dp)
+ )
+ .fillMaxWidth()
+ .weight(1f),
+ contentAlignment = Alignment.Center
+ ) {
+ Text(
+ text = "No conversation items",
+ textAlign = TextAlign.Center,
)
- .fillMaxWidth()
- .weight(1f),
- contentPadding = PaddingValues(8.dp),
- verticalArrangement = Arrangement.spacedBy(8.dp),
- state = conversationListState,
- ) {
- items(
- count = conversationItems.size,
- ) { index ->
- val item = conversationItems[index]
- val avatarResId: Int
- val horizontalAlignment: Arrangement.Horizontal
- val contentAlignment: Alignment
- val textAlign: TextAlign
- when (item.speaker) {
- ConversationSpeaker.Local -> {
- avatarResId = R.drawable.baseline_person_24
- horizontalAlignment = Arrangement.Start
- contentAlignment = Alignment.CenterStart
- textAlign = TextAlign.Start
- }
- ConversationSpeaker.Remote -> {
- avatarResId = R.drawable.baseline_memory_24
- horizontalAlignment = Arrangement.End
- contentAlignment = Alignment.CenterEnd
- textAlign = TextAlign.End
- }
- }
- Row(
- modifier = Modifier
- .fillMaxWidth(),
- verticalAlignment = Alignment.CenterVertically,
- horizontalArrangement = horizontalAlignment
- ) {
- if (item.speaker == ConversationSpeaker.Local) {
- Icon(
- painterResource(id = avatarResId),
- contentDescription = "Local",
- )
- Spacer(modifier = Modifier.width(8.dp))
+ }
+ } else {
+ LazyColumn(
+ modifier = Modifier
+ .border(
+ 1.dp,
+ Color.LightGray,
+ shape = RoundedCornerShape(8.dp)
+ )
+ .fillMaxWidth()
+ .weight(1f),
+ verticalArrangement = Arrangement.spacedBy(8.dp),
+ contentPadding = PaddingValues(8.dp),
+ state = conversationListState,
+ ) {
+ items(
+ count = conversationItems.size,
+ ) { index ->
+ val item = conversationItems[index]
+ val avatarResId: Int
+ val horizontalAlignment: Arrangement.Horizontal
+ val contentAlignment: Alignment
+ val textAlign: TextAlign
+ when (item.speaker) {
+ MobileViewModel.ConversationSpeaker.Local -> {
+ avatarResId = R.drawable.baseline_person_24
+ horizontalAlignment = Arrangement.Start
+ contentAlignment = Alignment.CenterStart
+ textAlign = TextAlign.Start
+ }
+
+ MobileViewModel.ConversationSpeaker.Remote -> {
+ avatarResId = R.drawable.baseline_memory_24
+ horizontalAlignment = Arrangement.End
+ contentAlignment = Alignment.CenterEnd
+ textAlign = TextAlign.End
+ }
}
- Box(
+ Row(
modifier = Modifier
- .weight(1f),
- contentAlignment = contentAlignment
+ .fillMaxWidth(),
+ verticalAlignment = Alignment.CenterVertically,
+ horizontalArrangement = horizontalAlignment
) {
- SelectionContainer {
- Text(
- modifier = Modifier
- .border(
- 1.dp,
- Color.Gray,
- shape = RoundedCornerShape(8.dp)
- )
- .padding(8.dp),
- text = item.text,
- textAlign = textAlign,
+ if (item.speaker == MobileViewModel.ConversationSpeaker.Local) {
+ Icon(
+ painterResource(id = avatarResId),
+ contentDescription = "Local",
+ )
+ Spacer(modifier = Modifier.width(8.dp))
+ }
+ Box(
+ modifier = Modifier
+ .weight(1f),
+ contentAlignment = contentAlignment
+ ) {
+ SelectionContainer {
+ Text(
+ modifier = Modifier
+ .border(
+ 1.dp,
+ Color.Gray,
+ shape = RoundedCornerShape(8.dp)
+ )
+ .padding(8.dp),
+ text = item.text,
+ textAlign = textAlign,
+ )
+ }
+ }
+ if (item.speaker == MobileViewModel.ConversationSpeaker.Remote) {
+ Spacer(modifier = Modifier.width(8.dp))
+ Icon(
+ painterResource(id = avatarResId),
+ contentDescription = "Remote",
)
}
- }
- if (item.speaker == ConversationSpeaker.Remote) {
- Spacer(modifier = Modifier.width(8.dp))
- Icon(
- painterResource(id = avatarResId),
- contentDescription = "Remote",
- )
}
}
}
diff --git a/mobile/src/main/java/com/swooby/alfredai/MobileForegroundService.kt b/mobile/src/main/java/com/swooby/alfredai/MobileForegroundService.kt
new file mode 100644
index 0000000..c32600d
--- /dev/null
+++ b/mobile/src/main/java/com/swooby/alfredai/MobileForegroundService.kt
@@ -0,0 +1,272 @@
+package com.swooby.alfredai
+
+import android.app.Notification
+import android.app.NotificationChannel
+import android.app.NotificationManager
+import android.app.PendingIntent
+import android.app.Service
+import android.content.BroadcastReceiver
+import android.content.Context
+import android.content.Intent
+import android.content.IntentFilter
+import android.content.pm.ServiceInfo
+import android.util.Log
+import android.widget.Toast
+import androidx.core.app.NotificationCompat
+import androidx.core.app.Person
+import androidx.core.graphics.drawable.IconCompat
+import com.swooby.alfredai.AppUtils.showToast
+import com.swooby.alfredai.MobileViewModel.ConnectionState
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.Job
+import kotlinx.coroutines.cancel
+import kotlinx.coroutines.launch
+
+class MobileForegroundService : Service() {
+ companion object {
+ private const val TAG = "MobileForegroundService"
+
+ /**
+ * Must compliment AndroidManifest's `foregroundServiceType` value for this service.
+ */
+ const val FOREGROUND_SERVICE_TYPE =
+ //ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE
+ //ServiceInfo.FOREGROUND_SERVICE_TYPE_REMOTE_MESSAGING
+ ServiceInfo.FOREGROUND_SERVICE_TYPE_SPECIAL_USE
+
+ private const val ACTION_DISCONNECT = "com.swooby.alfredai.action.DISCONNECT"
+
+ private const val CHANNEL_ID = "FOREGROUND_SERVICE_CHANNEL"
+ private const val CHANNEL_NAME = "Foreground Service Channel"
+ private const val CHANNEL_DESCRIPTION = "Non-dismissible notifications for session status"
+ private const val NOTIFICATION_ID_SERVICE = 101
+
+ private const val CONNECTION_NAME = "OpenAI Realtime"
+
+ fun start(context: Context) {
+ val intent = Intent(context, MobileForegroundService::class.java)
+ context.startForegroundService(intent)
+ }
+
+ fun stop(context: Context) {
+ val intent = Intent(context, MobileForegroundService::class.java)
+ context.stopService(intent)
+ }
+ }
+
+ private val serviceScope = CoroutineScope(Dispatchers.Main + Job())
+
+ private val mobileViewModel by lazy { (application as AlfredAiApp).mobileViewModel }
+
+ private var isForegroundStarted = false
+
+ override fun onBind(intent: Intent?) = null
+
+ override fun onCreate() {
+ Log.d(TAG, "onCreate()")
+ super.onCreate()
+ createNotificationChannel()
+ observeConnectionStateForNotifications()
+ }
+
+ override fun onDestroy() {
+ Log.d(TAG, "onDestroy()")
+ super.onDestroy()
+ isForegroundStarted = false
+ serviceScope.cancel()
+ }
+
+ override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int {
+ Log.d(TAG, "onStartCommand(intent=$intent)")
+ when (intent?.action) {
+ ACTION_DISCONNECT -> {
+ Log.i(TAG, "onStartCommand: ACTION_DISCONNECT")
+ disconnectAndStopForegroundService()
+ }
+ }
+ return START_STICKY
+ }
+
+ private fun observeConnectionStateForNotifications() {
+ serviceScope.launch {
+ mobileViewModel.connectionStateFlow.collect { connectionState ->
+ Log.i(TAG, "connectionStateFlow: connectionState=$connectionState")
+ showNotification(connectionState)
+ }
+ }
+ }
+
+ private fun showNotification(connectionState: ConnectionState) {
+ Log.d(TAG, "showNotification(connectionState=$connectionState)")
+ when (connectionState) {
+ ConnectionState.Connecting -> {
+ try {
+ isForegroundStarted = true
+ startForeground(
+ NOTIFICATION_ID_SERVICE,
+ createNotification(
+ contentTitle = "AlfredAI: Connecting...",
+ contentText = "Connecting to $CONNECTION_NAME..."
+ ),
+ FOREGROUND_SERVICE_TYPE,
+ )
+ } catch (e: SecurityException) {
+ Log.e(TAG, "connectionStateFlow: SecurityException: ${e.message}")
+ isForegroundStarted = false
+ /*
+
+ I see this if I press the `Home` button immediately after launching the app...
+ ...IF WE ARE USING THE PREFERRED [in AndroidManifest.xml]:
+ * ``
+ * ``
+
+ `MobileForegroundService com.swooby.alfredai E onStartCommand: SecurityException:
+ Starting FGS with type microphone callerApp=ProcessRecord{28abe4 12711:com.swooby.alfredai/u0a211}
+ targetSDK=34 requires permissions: all of the permissions allOf=true [android.permission.FOREGROUND_SERVICE_MICROPHONE]
+ any of the permissions allOf=false [android.permission.CAPTURE_AUDIO_HOTWORD, android.permission.CAPTURE_AUDIO_OUTPUT,
+ android.permission.CAPTURE_MEDIA_OUTPUT, android.permission.CAPTURE_TUNER_AUDIO_INPUT,
+ android.permission.CAPTURE_VOICE_COMMUNICATION_OUTPUT, android.permission.RECORD_AUDIO]
+ and the app must be in the eligible state/exemptions to access the foreground only permission`
+
+ The key phrase is that last line.
+
+ https://developer.android.com/develop/background-work/services/fgs/service-types#microphone
+
+ "Note: The RECORD_AUDIO runtime permission is subject to while-in-use restrictions.
+ For this reason, you cannot create a microphone foreground service while your app
+ is in the background ..."
+
+ https://developer.android.com/develop/background-work/services/fgs/restrictions-bg-start#wiu-restrictions
+
+ "For while-in-use permissions, this causes a potential problem. If your app has
+ a while-in-use permission, it only has that permission while it's in the foreground.
+ This means if your app is in the background, and it tries to create a foreground
+ service of type camera, location, or microphone, the system sees that your app
+ doesn't currently have the required permissions, and it throws a SecurityException."
+ "For this reason, if your foreground service needs a while-in-use permission, you
+ must call Context.startForegroundService() or Context.bindService() while your
+ app has a visible activity, unless the service falls into one of the `defined exemptions`."
+
+ https://developer.android.com/develop/background-work/services/fgs/restrictions-bg-start#wiu-restrictions-exemptions
+
+ None of the exemptions realistically apply to this app.
+
+ They naively think that it is acceptable to only start a service when the Activity
+ is visible.
+ Sure, we could fairly simply write code to wait until the Activity UI is visible
+ and **then** start the service, but it is still easy to hit `Home` at the right/wrong
+ moment before `startForeground` is called and still cause this code to fail.
+
+ I could see creating a `MutableStateFlow` that is observed for changes when an Activity
+ is added, and only then trying to immediately call `startForeground`, but even that
+ still screams of `race-condition` problems.
+
+ They introduced a race-condition back in API26 (2017), and have continued to kick
+ the can down the road on that race-condition, making it worse by piling on even
+ more pointless restrictions rather than fix the problem:
+ https://stackoverflow.com/a/49418249/252308 (2017/06)
+ https://issuetracker.google.com/issues/76112072 (2018/05)
+
+ Instead of fighting all of this, I have chosen to implement the foreground service as:
+ * ``
+ * ``
+ This works perfectly fine for non-PlayStore apps.
+ If this app ever gets on the PlayStore then they can review it and accept or reject
+ its specialUse case then.
+ If they reject then I could always try the observable `MutableStateFlow` option.
+
+ For more info, see:
+ https://developer.android.com/develop/background-work/services/fgs/launch
+
+ */
+ showToast(this@MobileForegroundService, "Don't press `Home` while the app is starting!", Toast.LENGTH_LONG)
+ disconnectAndStopForegroundService()
+ }
+ }
+ ConnectionState.Connected -> {
+ updateNotification(
+ contentTitle = "AlfredAI: Connected",
+ contentText = "Connected to $CONNECTION_NAME"
+ )
+ }
+ ConnectionState.Disconnected -> {
+ disconnectAndStopForegroundService()
+ }
+ }
+ }
+
+ private fun disconnectAndStopForegroundService() {
+ Log.d(TAG, "disconnectAndStopForegroundService()")
+ if (isForegroundStarted) {
+ isForegroundStarted = false
+ Log.d(TAG, "disconnectAndStopForegroundService: disconnecting")
+ mobileViewModel.disconnect()
+ Log.d(TAG, "disconnectAndStopForegroundService: showing disconnected notification")
+ updateNotification(
+ contentTitle = "AlfredAI: Disconnected",
+ contentText = "Disconnected from $CONNECTION_NAME"
+ )
+ Log.d(TAG, "disconnectAndStopForegroundService: stopping foreground service")
+ stopForeground(STOP_FOREGROUND_REMOVE)
+ stopSelf()
+ }
+ }
+
+ private val notificationManager by lazy { getSystemService(NotificationManager::class.java) }
+
+ private fun createNotificationChannel() {
+ val importance = NotificationManager.IMPORTANCE_LOW
+ val channel = NotificationChannel(CHANNEL_ID, CHANNEL_NAME, importance).apply {
+ description = CHANNEL_DESCRIPTION
+ }
+ notificationManager.createNotificationChannel(channel)
+ }
+
+ private fun createNotification(contentTitle: String, contentText: String): Notification {
+ val context = this
+
+ val notificationPendingIntent = PendingIntent.getActivity(
+ context,
+ 0,
+ Intent(context, MobileActivity::class.java).apply {
+ flags = Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_CLEAR_TASK
+ },
+ PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_IMMUTABLE
+ )
+
+ val disconnectPendingIntent = PendingIntent.getService(
+ context,
+ 0,
+ Intent(context, MobileForegroundService::class.java).apply {
+ action = ACTION_DISCONNECT
+ },
+ PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_IMMUTABLE
+ )
+
+ val builder = NotificationCompat.Builder(context, CHANNEL_ID)
+ .setContentTitle(contentTitle)
+ .setContentText(contentText)
+ .setSmallIcon(R.drawable.alfredai_24)
+ .setPriority(NotificationCompat.PRIORITY_LOW)
+ .setOngoing(true)
+ .setAutoCancel(false)
+ .setContentIntent(notificationPendingIntent)
+ if (isForegroundStarted) {
+ val person = Person.Builder()
+ .setName(CONNECTION_NAME)
+ .setIcon(IconCompat.createWithResource(context, R.drawable.alfredai_24))
+ .setImportant(true)
+ .setBot(true)
+ .build()
+ builder
+ .setStyle(NotificationCompat.CallStyle.forOngoingCall(person, disconnectPendingIntent))
+ }
+ return builder.build()
+ }
+
+ private fun updateNotification(contentTitle: String, contentText: String) {
+ val notification = createNotification(contentTitle, contentText)
+ notificationManager.notify(NOTIFICATION_ID_SERVICE, notification)
+ }
+}
diff --git a/mobile/src/main/java/com/swooby/alfredai/MobileViewModel.kt b/mobile/src/main/java/com/swooby/alfredai/MobileViewModel.kt
index cfb92e0..da64304 100644
--- a/mobile/src/main/java/com/swooby/alfredai/MobileViewModel.kt
+++ b/mobile/src/main/java/com/swooby/alfredai/MobileViewModel.kt
@@ -1,10 +1,23 @@
package com.swooby.alfredai
+import android.app.Activity
import android.app.Application
+import android.app.Notification
+import android.app.NotificationChannel
+import android.app.NotificationManager
+import android.app.PendingIntent
+import android.content.Context
+import android.content.Intent
import android.util.Log
import androidx.compose.runtime.getValue
+import androidx.compose.runtime.mutableStateListOf
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.setValue
+import androidx.core.app.NotificationCompat
+import androidx.lifecycle.viewModelScope
+import com.openai.infrastructure.ApiClient
+import com.openai.infrastructure.ClientError
+import com.openai.infrastructure.ClientException
import com.openai.models.RealtimeServerEventConversationCreated
import com.openai.models.RealtimeServerEventConversationItemCreated
import com.openai.models.RealtimeServerEventConversationItemDeleted
@@ -12,6 +25,7 @@ import com.openai.models.RealtimeServerEventConversationItemInputAudioTranscript
import com.openai.models.RealtimeServerEventConversationItemInputAudioTranscriptionFailed
import com.openai.models.RealtimeServerEventConversationItemTruncated
import com.openai.models.RealtimeServerEventError
+import com.openai.models.RealtimeServerEventErrorError
import com.openai.models.RealtimeServerEventInputAudioBufferCleared
import com.openai.models.RealtimeServerEventInputAudioBufferCommitted
import com.openai.models.RealtimeServerEventInputAudioBufferSpeechStarted
@@ -39,6 +53,7 @@ import com.openai.models.RealtimeSessionModel
import com.openai.models.RealtimeSessionVoice
import com.swooby.alfredai.PushToTalkPreferences.Companion.getMaxResponseOutputTokens
import com.swooby.alfredai.Utils.playAudioResourceOnce
+import com.swooby.alfredai.Utils.quote
import com.swooby.alfredai.openai.realtime.RealtimeClient
import com.swooby.alfredai.openai.realtime.RealtimeClient.RealtimeClientListener
import com.swooby.alfredai.openai.realtime.RealtimeClient.ServerEventOutputAudioBufferAudioStopped
@@ -47,16 +62,30 @@ import com.twilio.audioswitch.AudioSwitch
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
+import kotlinx.coroutines.delay
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.launch
+import org.json.JSONObject
import java.math.BigDecimal
+import java.net.UnknownHostException
class MobileViewModel(application: Application) :
SharedViewModel(application)
{
companion object {
- const val DEBUG = true
+ const val DEBUG = false
+
+ private const val CHANNEL_ID = "SESSION_STATUS_CHANNEL"
+ private const val CHANNEL_NAME = "Session Status Channel"
+ private const val CHANNEL_DESCRIPTION = "Dismissible notifications for notable session status events"
+ private const val NOTIFICATION_ID_SESSION = 1001
+
+ @Suppress(
+ "SimplifyBooleanWithConstants",
+ "KotlinConstantConditions",
+ )
+ val debugSimulateSessionExpired = BuildConfig.DEBUG && false
}
override val TAG: String
@@ -66,16 +95,89 @@ class MobileViewModel(application: Application) :
override val remoteCapabilityName: String
get() = "verify_remote_alfredai_wear_app"
+ private var jobDebugFakeSessionExpired: Job? = null
+
override fun init() {
super.init()
+
+ createNotificationChannel()
+
audioSwitchStart()
+
+ observeConnectionForServiceStartStop()
+
+ if (debugSimulateSessionExpired) {
+ jobDebugFakeSessionExpired = viewModelScope.launch {
+ delay(20_000L) // Delay for 20 seconds...
+ // Create a fake RealtimeServerEventError
+ val fakeError = RealtimeServerEventError(
+ eventId = "fake-event-id",
+ type = RealtimeServerEventError.Type.error,
+ error = RealtimeServerEventErrorError(
+ type = "invalid_request_error",
+ message = "Your session hit the maximum duration of 30 minutes.",
+ code = "session_expired",
+ )
+ )
+ listener.onServerEventSessionExpired(fakeError)
+ Log.d(TAG, "Debug: Fake session expired event triggered.")
+ disconnect()
+ }
+ }
}
override fun close() {
super.close()
+ disconnect()
+ onDisconnecting()
+ onDisconnected()
audioSwitchStop()
}
+ fun disconnect() {
+ if (realtimeClient?.isConnected == true) {
+ realtimeClient?.disconnect()
+ }
+ }
+
+ /**
+ * Called by listener.onDisconnected()
+ */
+ private fun onDisconnecting() {
+ if (jobDebugFakeSessionExpired?.isActive == true) {
+ jobDebugFakeSessionExpired?.cancel()
+ }
+ _connectionStateFlow.value = ConnectionState.Disconnected
+ }
+
+ /**
+ * Called by listener.onDisconnected()
+ */
+ private fun onDisconnected() {
+ audioSwitch.deactivate()
+ }
+
+ //
+ //region In-Use-Activities Detection
+ //
+
+ private val inUseActivities = mutableSetOf()
+
+ private val hasNoInUseActivities: Boolean
+ get() = inUseActivities.isEmpty()
+
+ fun onStartOrResume(activity: Activity) {
+ inUseActivities.add(activity)
+ }
+
+ fun onPauseOrStop(activity: Activity) {
+ inUseActivities.remove(activity)
+ }
+
+ //
+ //endregion
+ //
+
//
//region Preferences
//
@@ -242,10 +344,16 @@ class MobileViewModel(application: Application) :
_realtimeClient?.disconnect()
+ val httpClient = if (DEBUG)
+ RealtimeClient.httpLoggingClient
+ else
+ ApiClient.defaultClient
+
_realtimeClient = RealtimeClient(
- getApplication(),
- prefs.apiKey,
- sessionConfig,
+ applicationContext = getApplication(),
+ dangerousApiKey = prefs.apiKey,
+ sessionConfig = sessionConfig,
+ httpClient = httpClient,
debug = DEBUG
)
_realtimeClient?.addListener(listener)
@@ -347,6 +455,125 @@ class MobileViewModel(application: Application) :
}
}
}
+ //
+ //region Conversation
+ //
+
+ @Suppress(
+ "SimplifyBooleanWithConstants",
+ //"KotlinConstantConditions",
+ )
+ val debugLogConversation = BuildConfig.DEBUG && true
+
+ enum class ConversationSpeaker {
+ Local,
+ Remote,
+ }
+
+ data class ConversationItem(
+ val id: String?,
+ val speaker: ConversationSpeaker,
+ val initialText: String
+ ) {
+ var text by mutableStateOf(initialText)
+ }
+
+ private val _conversationItems = mutableStateListOf()
+ val conversationItems: List
+ get() = _conversationItems
+
+ fun conversationItemsClear() {
+ _conversationItems.clear()
+ }
+
+ //
+ //endregion
+ //
+
+ //
+ //region Service Start/Stop & Notifications
+ //
+
+ enum class ConnectionState {
+ Connecting,
+ Connected,
+ Disconnected,
+ }
+
+ private val _connectionStateFlow = MutableStateFlow(ConnectionState.Disconnected)
+ val connectionStateFlow = _connectionStateFlow.asStateFlow()
+
+ private fun observeConnectionForServiceStartStop() {
+ viewModelScope.launch {
+ connectionStateFlow.collect { connectionState ->
+ Log.i(TAG, "connectionStateFlow: connectionState=$connectionState")
+ when (connectionState) {
+ ConnectionState.Connecting -> {
+ MobileForegroundService.start(getApplication())
+ }
+ ConnectionState.Disconnected -> {
+ MobileForegroundService.stop(getApplication())
+ }
+ else -> {}
+ }
+ }
+ }
+ }
+
+ private val notificationManager by lazy { getApplication().getSystemService(NotificationManager::class.java) }
+
+ private fun createNotificationChannel() {
+ val importance = NotificationManager.IMPORTANCE_DEFAULT
+ val channel = NotificationChannel(CHANNEL_ID, CHANNEL_NAME, importance).apply {
+ description = CHANNEL_DESCRIPTION
+ }
+ notificationManager.createNotificationChannel(channel)
+ }
+
+ private fun createNotification(contentTitle: String, contentText: String): Notification {
+ val context = getApplication()
+
+ val notificationPendingIntent = PendingIntent.getActivity(
+ context,
+ 0,
+ Intent(context, MobileActivity::class.java).apply {
+ flags = Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_CLEAR_TASK
+ },
+ PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_IMMUTABLE
+ )
+
+ return NotificationCompat.Builder(context, CHANNEL_ID)
+ .setContentTitle(contentTitle)
+ .setContentText(contentText)
+ .setSmallIcon(R.drawable.alfredai_24)
+ .setPriority(NotificationCompat.PRIORITY_HIGH)
+ .setAutoCancel(true) // Makes the notification dismissible
+ .setOnlyAlertOnce(true) // Prevents multiple alerts if the notification is updated
+ .setContentIntent(notificationPendingIntent)
+ .build()
+ }
+
+ private fun updateNotification(contentTitle: String, contentText: String) {
+ val notification = createNotification(contentTitle, contentText)
+ notificationManager.notify(NOTIFICATION_ID_SESSION, notification)
+ }
+
+ private fun showNotificationSessionExpired(contentText: String) {
+ updateNotification("AlfredAI: Session Expired", contentText)
+ }
+
+ private fun showNotificationMysteriousUnknownHostException() {
+ val message = "Mysterious \"Unable to resolve host `api.openai.com`\" error; Try again."
+ updateNotification("AlfredAI: Client Error", quote(message))
+ }
+
+ private fun showNotificationSessionClientError(message: String) {
+ updateNotification("AlfredAI: Client Error", quote(message))
+ }
+
+ //
+ //endregion
+ //
//
//region persistent RealtimeClientListener
@@ -355,29 +582,74 @@ class MobileViewModel(application: Application) :
private val listeners = mutableListOf()
private val listener = object : RealtimeClientListener {
override fun onConnecting() {
+ _connectionStateFlow.value = ConnectionState.Connecting
+
+ conversationItemsClear()
+
listeners.forEach {
it.onConnecting()
}
}
override fun onError(error: Exception) {
+ when (error) {
+ is UnknownHostException -> {
+ /*
+ 2025-01-17 17:43:21.190 27419-27475 okhttp.OkHttpClient com.swooby.alfredai I <-- HTTP FAILED: java.net.UnknownHostException: Unable to resolve host "api.openai.com": No address associated with hostname
+ 2025-01-17 17:43:21.190 27419-27475 RealtimeClient com.swooby.alfredai E connect: exception=java.net.UnknownHostException: Unable to resolve host "api.openai.com": No address associated with hostname
+ 2025-01-17 17:43:21.190 27419-27475 PushToTalkActivity com.swooby.alfredai D onError(java.net.UnknownHostException: Unable to resolve host "api.openai.com": No address associated with hostname)
+ 2025-01-17 17:43:21.191 27419-27475 RealtimeClient com.swooby.alfredai D +disconnect()
+ 2025-01-17 17:43:21.191 27419-27475 RealtimeClient com.swooby.alfredai D -disconnect()
+ 2025-01-17 17:43:21.191 27419-27475 RealtimeClient com.swooby.alfredai D connect: ephemeralApiKey=null
+ 2025-01-17 17:43:21.191 27419-27475 PushToTalkActivity com.swooby.alfredai D onError(com.openai.infrastructure.ClientException: No Ephemeral API Key In Response)
+ */
+ showNotificationMysteriousUnknownHostException()
+ }
+
+ is ClientException -> {
+ var message: String? = error.message ?: error.toString()
+ val response = error.response as? ClientError<*>
+ if (response != null) {
+ var expectedJsonBody = response.body
+ if (expectedJsonBody == null) {
+ message = response.message
+ }
+ if (expectedJsonBody == null && message == null) {
+ expectedJsonBody = "{ \"error\": { \"code\": \"-1\" }"
+ }
+ if (expectedJsonBody != null) {
+ val jsonObject = JSONObject(expectedJsonBody.toString())
+ val jsonError = jsonObject.getJSONObject("error")
+ val errorCode = jsonError.getString("code")
+ message = errorCode
+ }
+ }
+ showNotificationSessionClientError(message!!)
+ }
+ }
listeners.forEach {
it.onError(error)
}
}
override fun onConnected() {
+ _connectionStateFlow.value = ConnectionState.Connected
+
audioSwitch.activate()
+
listeners.forEach {
it.onConnected()
}
}
override fun onDisconnected() {
- audioSwitch.deactivate()
+ this@MobileViewModel.onDisconnecting()
+
listeners.forEach {
it.onDisconnected()
}
+
+ this@MobileViewModel.onDisconnected()
}
override fun onBinaryMessageReceived(data: ByteArray): Boolean {
@@ -395,42 +667,85 @@ class MobileViewModel(application: Application) :
}
override fun onServerEventConversationCreated(realtimeServerEventConversationCreated: RealtimeServerEventConversationCreated) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventConversationCreated($realtimeServerEventConversationCreated)")
+ }
listeners.forEach {
it.onServerEventConversationCreated(realtimeServerEventConversationCreated)
}
}
override fun onServerEventConversationItemCreated(realtimeServerEventConversationItemCreated: RealtimeServerEventConversationItemCreated) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventConversationItemCreated($realtimeServerEventConversationItemCreated)")
+ }
+ val item = realtimeServerEventConversationItemCreated.item
+ val id = item.id
+ val content = item.content
+ val text = content?.joinToString(separator = "") { it.text ?: "" } ?: ""
+ if (text.isNotBlank()) {
+ if (debugLogConversation) {
+ Log.w(TAG, "onServerEventConversationItemCreated: conversationItems.add(ConversationItem(id=${quote(id)}, initialText=${quote(text)}")
+ }
+ _conversationItems.add(
+ ConversationItem(
+ id = id,
+ speaker = ConversationSpeaker.Local,
+ initialText = text
+ )
+ )
+ }
listeners.forEach {
it.onServerEventConversationItemCreated(realtimeServerEventConversationItemCreated)
}
}
override fun onServerEventConversationItemDeleted(realtimeServerEventConversationItemDeleted: RealtimeServerEventConversationItemDeleted) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventConversationItemDeleted($realtimeServerEventConversationItemDeleted)")
+ }
listeners.forEach {
it.onServerEventConversationItemDeleted(realtimeServerEventConversationItemDeleted)
}
}
- override fun onServerEventConversationItemInputAudioTranscriptionCompleted(
- realtimeServerEventConversationItemInputAudioTranscriptionCompleted: RealtimeServerEventConversationItemInputAudioTranscriptionCompleted
- ) {
+ override fun onServerEventConversationItemInputAudioTranscriptionCompleted(realtimeServerEventConversationItemInputAudioTranscriptionCompleted: RealtimeServerEventConversationItemInputAudioTranscriptionCompleted) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventConversationItemInputAudioTranscriptionCompleted($realtimeServerEventConversationItemInputAudioTranscriptionCompleted)")
+ }
+ val id = realtimeServerEventConversationItemInputAudioTranscriptionCompleted.itemId
+ val transcript =
+ realtimeServerEventConversationItemInputAudioTranscriptionCompleted.transcript.trim() // DO TRIM!
+ if (transcript.isNotBlank()) {
+ if (debugLogConversation) {
+ Log.w(TAG, "onServerEventConversationItemInputAudioTranscriptionCompleted: conversationItems.add(ConversationItem(id=${quote(id)}, initialText=${quote(transcript)}")
+ }
+ _conversationItems.add(
+ ConversationItem(
+ id = id,
+ speaker = ConversationSpeaker.Local,
+ initialText = transcript
+ )
+ )
+ }
listeners.forEach {
it.onServerEventConversationItemInputAudioTranscriptionCompleted(realtimeServerEventConversationItemInputAudioTranscriptionCompleted)
}
}
- override fun onServerEventConversationItemInputAudioTranscriptionFailed(
- realtimeServerEventConversationItemInputAudioTranscriptionFailed: RealtimeServerEventConversationItemInputAudioTranscriptionFailed
- ) {
+ override fun onServerEventConversationItemInputAudioTranscriptionFailed(realtimeServerEventConversationItemInputAudioTranscriptionFailed: RealtimeServerEventConversationItemInputAudioTranscriptionFailed) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventConversationItemInputAudioTranscriptionFailed($realtimeServerEventConversationItemInputAudioTranscriptionFailed)")
+ }
listeners.forEach {
it.onServerEventConversationItemInputAudioTranscriptionFailed(realtimeServerEventConversationItemInputAudioTranscriptionFailed)
}
}
- override fun onServerEventConversationItemTruncated(
- realtimeServerEventConversationItemTruncated: RealtimeServerEventConversationItemTruncated
- ) {
+ override fun onServerEventConversationItemTruncated(realtimeServerEventConversationItemTruncated: RealtimeServerEventConversationItemTruncated) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventConversationItemTruncated($realtimeServerEventConversationItemTruncated)")
+ }
listeners.forEach {
it.onServerEventConversationItemTruncated(realtimeServerEventConversationItemTruncated)
}
@@ -448,25 +763,19 @@ class MobileViewModel(application: Application) :
}
}
- override fun onServerEventInputAudioBufferCommitted(
- realtimeServerEventInputAudioBufferCommitted: RealtimeServerEventInputAudioBufferCommitted
- ) {
+ override fun onServerEventInputAudioBufferCommitted(realtimeServerEventInputAudioBufferCommitted: RealtimeServerEventInputAudioBufferCommitted) {
listeners.forEach {
it.onServerEventInputAudioBufferCommitted(realtimeServerEventInputAudioBufferCommitted)
}
}
- override fun onServerEventInputAudioBufferSpeechStarted(
- realtimeServerEventInputAudioBufferSpeechStarted: RealtimeServerEventInputAudioBufferSpeechStarted
- ) {
+ override fun onServerEventInputAudioBufferSpeechStarted(realtimeServerEventInputAudioBufferSpeechStarted: RealtimeServerEventInputAudioBufferSpeechStarted) {
listeners.forEach {
it.onServerEventInputAudioBufferSpeechStarted(realtimeServerEventInputAudioBufferSpeechStarted)
}
}
- override fun onServerEventInputAudioBufferSpeechStopped(
- realtimeServerEventInputAudioBufferSpeechStopped: RealtimeServerEventInputAudioBufferSpeechStopped
- ) {
+ override fun onServerEventInputAudioBufferSpeechStopped(realtimeServerEventInputAudioBufferSpeechStopped: RealtimeServerEventInputAudioBufferSpeechStopped) {
listeners.forEach {
it.onServerEventInputAudioBufferSpeechStopped(realtimeServerEventInputAudioBufferSpeechStopped)
}
@@ -496,43 +805,101 @@ class MobileViewModel(application: Application) :
}
}
- override fun onServerEventResponseAudioTranscriptDelta(
- realtimeServerEventResponseAudioTranscriptDelta: RealtimeServerEventResponseAudioTranscriptDelta
- ) {
+ override fun onServerEventResponseAudioTranscriptDelta(realtimeServerEventResponseAudioTranscriptDelta: RealtimeServerEventResponseAudioTranscriptDelta) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventResponseAudioTranscriptDelta($realtimeServerEventResponseAudioTranscriptDelta)")
+ }
+ val id = realtimeServerEventResponseAudioTranscriptDelta.itemId
+ val delta =
+ realtimeServerEventResponseAudioTranscriptDelta.delta // DO **NOT** TRIM!
+
+ // Append this delta to any current in progress conversation,
+ // or create a new conversation
+ val index = _conversationItems.indexOfFirst { it.id == id }
+ if (debugLogConversation) {
+ Log.w(TAG, "onServerEventResponseAudioTranscriptDelta: id=$id, delta=$delta, index=$index")
+ }
+ if (index == -1) {
+ val conversationItem = ConversationItem(
+ id = id,
+ speaker = ConversationSpeaker.Remote,
+ initialText = delta,
+ )
+ if (debugLogConversation) {
+ Log.w(TAG, "conversationItems.add($conversationItem)")
+ }
+ _conversationItems.add(conversationItem)
+ } else {
+ val conversationItem = _conversationItems[index]
+ conversationItem.text += delta
+ if (debugLogConversation) {
+ Log.w(TAG, "conversationItems.set($index, $conversationItem)")
+ }
+ _conversationItems[index] = conversationItem
+ }
listeners.forEach {
it.onServerEventResponseAudioTranscriptDelta(realtimeServerEventResponseAudioTranscriptDelta)
}
}
- override fun onServerEventResponseAudioTranscriptDone(
- realtimeServerEventResponseAudioTranscriptDone: RealtimeServerEventResponseAudioTranscriptDone
- ) {
+ override fun onServerEventResponseAudioTranscriptDone(realtimeServerEventResponseAudioTranscriptDone: RealtimeServerEventResponseAudioTranscriptDone) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventResponseAudioTranscriptDone($realtimeServerEventResponseAudioTranscriptDone)")
+ }
listeners.forEach {
it.onServerEventResponseAudioTranscriptDone(realtimeServerEventResponseAudioTranscriptDone)
}
}
- override fun onServerEventResponseContentPartAdded(
- realtimeServerEventResponseContentPartAdded: RealtimeServerEventResponseContentPartAdded
- ) {
+ override fun onServerEventResponseContentPartAdded(realtimeServerEventResponseContentPartAdded: RealtimeServerEventResponseContentPartAdded) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventResponseContentPartAdded($realtimeServerEventResponseContentPartAdded)")
+ }
listeners.forEach {
it.onServerEventResponseContentPartAdded(realtimeServerEventResponseContentPartAdded)
}
}
override fun onServerEventResponseContentPartDone(realtimeServerEventResponseContentPartDone: RealtimeServerEventResponseContentPartDone) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventResponseContentPartDone($realtimeServerEventResponseContentPartDone)")
+ }
listeners.forEach {
it.onServerEventResponseContentPartDone(realtimeServerEventResponseContentPartDone)
}
}
override fun onServerEventResponseCreated(realtimeServerEventResponseCreated: RealtimeServerEventResponseCreated) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventResponseCreated($realtimeServerEventResponseCreated)")
+ }
listeners.forEach {
it.onServerEventResponseCreated(realtimeServerEventResponseCreated)
}
}
override fun onServerEventResponseDone(realtimeServerEventResponseDone: RealtimeServerEventResponseDone) {
+ if (debugLogConversation) {
+ Log.d(TAG, "onServerEventResponseDone($realtimeServerEventResponseDone)")
+ }
+ realtimeServerEventResponseDone.response.output?.forEach { outputConversationItem ->
+ if (debugLogConversation) {
+ Log.w(TAG, "onServerEventResponseDone: outputConversationItem=$outputConversationItem")
+ }
+ val id = outputConversationItem.id ?: return@forEach
+ val index = _conversationItems.indexOfFirst { it.id == id }
+ if (index != -1) {
+ val conversationItem = _conversationItems[index]
+ if (debugLogConversation) {
+ Log.w(TAG, "onServerEventResponseDone: removing $conversationItem at index=$index")
+ }
+ _conversationItems.removeAt(index)
+ if (debugLogConversation) {
+ Log.w(TAG, "onServerEventResponseDone: adding $conversationItem at end")
+ }
+ _conversationItems.add(conversationItem)
+ }
+ }
listeners.forEach {
it.onServerEventResponseDone(realtimeServerEventResponseDone)
}
@@ -591,6 +958,11 @@ class MobileViewModel(application: Application) :
}
override fun onServerEventSessionExpired(realtimeServerEventError: RealtimeServerEventError) {
+ if (hasNoInUseActivities) {
+ val message = realtimeServerEventError.error.message
+ showNotificationSessionExpired(message)
+ }
+
listeners.forEach {
it.onServerEventSessionExpired(realtimeServerEventError)
}
diff --git a/mobile/src/main/res/drawable/alfredai_24.xml b/mobile/src/main/res/drawable/alfredai_24.xml
new file mode 100644
index 0000000..0be13d3
--- /dev/null
+++ b/mobile/src/main/res/drawable/alfredai_24.xml
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/shared/src/main/java/com/swooby/alfredai/SharedViewModel.kt b/shared/src/main/java/com/swooby/alfredai/SharedViewModel.kt
index e9069ef..2d321d5 100644
--- a/shared/src/main/java/com/swooby/alfredai/SharedViewModel.kt
+++ b/shared/src/main/java/com/swooby/alfredai/SharedViewModel.kt
@@ -46,11 +46,13 @@ abstract class SharedViewModel(application: Application) :
}
open fun init() {
+ Log.d(TAG, "init()")
messageClient.addListener(messageClientListener)
searchForRemoteAppNode()
}
open fun close() {
+ Log.d(TAG, "close()")
messageClient.removeListener(messageClientListener)
}
diff --git a/shared/src/main/java/com/swooby/alfredai/Utils.kt b/shared/src/main/java/com/swooby/alfredai/Utils.kt
index b64caaf..a88e2c1 100644
--- a/shared/src/main/java/com/swooby/alfredai/Utils.kt
+++ b/shared/src/main/java/com/swooby/alfredai/Utils.kt
@@ -119,6 +119,13 @@ object Utils {
android.Manifest.permission.BLUETOOTH_SCAN,
-> "Nearby devices"
+ android.Manifest.permission.POST_NOTIFICATIONS,
+ android.Manifest.permission.FOREGROUND_SERVICE,
+ //android.Manifest.permission.FOREGROUND_SERVICE_MICROPHONE,
+ //android.Manifest.permission.FOREGROUND_SERVICE_REMOTE_MESSAGING,
+ android.Manifest.permission.FOREGROUND_SERVICE_SPECIAL_USE,
+ -> "Notifications"
+
//...
else
diff --git a/shared/src/main/java/com/swooby/alfredai/openai/realtime/RealtimeClient.kt b/shared/src/main/java/com/swooby/alfredai/openai/realtime/RealtimeClient.kt
index fe4f0d7..42eb747 100644
--- a/shared/src/main/java/com/swooby/alfredai/openai/realtime/RealtimeClient.kt
+++ b/shared/src/main/java/com/swooby/alfredai/openai/realtime/RealtimeClient.kt
@@ -1,8 +1,6 @@
package com.swooby.alfredai.openai.realtime
import android.content.Context
-import android.media.AudioDeviceInfo
-import android.media.AudioManager
import com.openai.apis.RealtimeApi
import com.openai.infrastructure.ApiClient
import com.openai.infrastructure.ClientException
@@ -87,11 +85,19 @@ import java.nio.ByteBuffer
class RealtimeClient(private val applicationContext: Context,
private val dangerousApiKey: String,
private var sessionConfig: RealtimeSessionCreateRequest,
- httpClient: OkHttpClient = httpLoggingClient,
+ httpClient: OkHttpClient = ApiClient.defaultClient,
private val debug:Boolean = false) {
companion object {
private val log = RealtimeLog(RealtimeClient::class)
+ @Suppress(
+ "SimplifyBooleanWithConstants",
+ "KotlinConstantConditions",
+ )
+ private val debugInduceMysteriousUnknownHostException = BuildConfig.DEBUG && false
+
+ private const val MYSTERIOUS_UNKNOWN_HOST_EXCEPTION_MESSAGE = "Unable to resolve host \"api.openai.com\": No address associated with hostname"
+
val httpLoggingClient = OkHttpClient.Builder()
.addInterceptor(HttpLoggingInterceptor().apply {
level = HttpLoggingInterceptor.Level.BODY
@@ -119,6 +125,8 @@ class RealtimeClient(private val applicationContext: Context,
_isConnectingOrConnected = value
if (value) {
notifyConnecting()
+ } else {
+ notifyDisconnected()
}
}
}
@@ -191,11 +199,8 @@ class RealtimeClient(private val applicationContext: Context,
* https://platform.openai.com/docs/api-reference/realtime-sessions/create
*/
val realtimeSessionCreateResponse: RealtimeSessionCreateResponse? = try {
- @Suppress("SimplifyBooleanWithConstants", "KotlinConstantConditions")
- val debugInduceError = BuildConfig.DEBUG && false
- @Suppress("KotlinConstantConditions")
- if (debugInduceError) {
- throw UnknownHostException("Unable to resolve host \"api.openai.com\": No address associated with hostname")
+ if (debugInduceMysteriousUnknownHostException) {
+ throw UnknownHostException(MYSTERIOUS_UNKNOWN_HOST_EXCEPTION_MESSAGE)
}
realtime.createRealtimeSession(sessionConfig)
} catch (exception: Exception) {
@@ -307,19 +312,7 @@ class RealtimeClient(private val applicationContext: Context,
}
) ?: throw IllegalStateException("Failed to create PeerConnection")
- //
- //region Audio
- //
- // TODO: abstract this out and allow routing of audio to different audio devices,
- // especially bluetooth headset/earplugs/headphones
- // TODO: For debugging purposes, is there a way to hear/echo the captured microphone audio?
- // Closest thing I can find is to enable session input_audio_transcription,
- // but that costs more money! :/
setLocalAudioMicrophone(peerConnectionFactory)
- //setLocalAudioSpeaker(applicationContext)
- //
- //endregion
- //
val dataChannelInit = DataChannel.Init()
val logDc = RealtimeLog(DataChannel::class)
@@ -485,23 +478,10 @@ class RealtimeClient(private val applicationContext: Context,
localAudioTrackMicrophoneSender = peerConnection?.addTrack(localAudioTrackMicrophone)
}
- private fun getSpeakerphone(audioManager: AudioManager): AudioDeviceInfo? {
- val availableDevices = audioManager.availableCommunicationDevices
- return availableDevices.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_SPEAKER }
- }
-
- private fun setLocalAudioSpeaker(context: Context) {
- val audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
- audioManager.mode = AudioManager.MODE_IN_COMMUNICATION
- getSpeakerphone(audioManager)?.also {
- audioManager.setCommunicationDevice(it)
- }
- }
-
/**
* Essentially unmute or mute the speaker
*/
- fun setLocalAudioTrackSpeakerEnabled(enabled: Boolean) {
+ private fun setLocalAudioTrackSpeakerEnabled(enabled: Boolean) {
log.d("setLocalAudioTrackSpeakerEnabled($enabled)")
remoteAudioTracks.forEach {
it.setEnabled(enabled)