Documentation Index
Fetch the complete documentation index at: https://cloud-docs.mentra.glass/llms.txt
Use this file to discover all available pages before exploring further.
Overview
TranscriptionManager handles all transcription-related functionality within a user session. It provides provider abstraction for multiple transcription services (Azure and Soniox), manages stream lifecycle, handles multi-language support, implements VAD (Voice Activity Detection) audio buffering, and maintains transcript history.
File: packages/cloud/src/services/session/transcription/TranscriptionManager.ts
Key Features
- Provider Abstraction: Supports Azure and Soniox transcription services
- Multi-Language Support: Handles transcription for various languages
- VAD Audio Buffering: Prevents speech loss during stream startup
- Automatic Provider Failover: Falls back to alternative providers on failure
- Stream Health Monitoring: Detects and replaces unhealthy streams
- Transcript History: Maintains per-language transcript segments
- Local Transcription Support: Can receive transcripts from device-side processing
Architecture
Provider Management
Provider Configuration
interface TranscriptionConfig {
providers: {
defaultProvider: ProviderType;
fallbackProvider?: ProviderType;
};
azure: AzureTranscriptionConfig;
soniox: SonioxTranscriptionConfig;
retries: RetryConfig;
performance: PerformanceConfig;
limits: ResourceLimits;
}
Provider Initialization
private async initializeProviders(): Promise<void> {
// Try Azure
try {
const azureProvider = new AzureTranscriptionProvider(
this.config.azure,
this.logger
);
await azureProvider.initialize();
this.providers.set(ProviderType.AZURE, azureProvider);
} catch (error) {
this.logger.error(error, "Failed to initialize Azure provider");
}
// Try Soniox
try {
const sonioxProvider = new SonioxTranscriptionProvider(
this.config.soniox,
this.logger
);
await sonioxProvider.initialize();
this.providers.set(ProviderType.SONIOX, sonioxProvider);
} catch (error) {
this.logger.error(error, "Failed to initialize Soniox provider");
}
// Initialize provider selector
this.providerSelector = new ProviderSelector(
this.providers,
this.config,
this.logger
);
}
Stream Lifecycle Management
Subscription Updates
async updateSubscriptions(subscriptions: ExtendedStreamType[]): Promise<void> {
await this.ensureInitialized();
// Filter out translation subscriptions (handled by TranslationManager)
const validSubscriptions = subscriptions.filter(sub => {
if (typeof sub === "string" && sub.startsWith("translation:")) {
return false;
}
return true;
});
const desired = new Set(validSubscriptions);
const current = new Set(this.streams.keys());
// Stop removed streams
for (const subscription of current) {
if (!desired.has(subscription)) {
await this.stopStream(subscription);
}
}
// Start new streams
for (const subscription of desired) {
if (!current.has(subscription)) {
await this.startStream(subscription);
}
}
this.activeSubscriptions = desired;
}
Stream Creation
private async createStreamInstance(
subscription: ExtendedStreamType,
provider: TranscriptionProvider
): Promise<StreamInstance> {
const languageInfo = getLanguageInfo(subscription)!;
const streamId = this.generateStreamId(subscription);
const callbacks = {
onReady: () => this.logger.debug({ subscription }, "Stream ready"),
onError: (error: Error) => this.handleStreamError(subscription, stream, error),
onClosed: () => this.streams.delete(subscription),
onData: (data: any) => this.relayDataToApps(subscription, data)
};
return await provider.createTranscriptionStream(
languageInfo.transcribeLanguage,
{
streamId,
userSession: this.userSession,
subscription,
callbacks
}
);
}
VAD Audio Buffering
Buffer Management
// VAD buffer configuration
private vadAudioBuffer: ArrayBuffer[] = [];
private vadBufferMaxSize = 50; // ~2.5 seconds at 50ms chunks
private isBufferingForVAD = false;
private vadBufferTimeout?: NodeJS.Timeout;
private vadBufferTimeoutMs = 10000; // 10 second timeout
Buffer Operations
private startVADBuffering(): void {
this.isBufferingForVAD = true;
this.vadAudioBuffer = [];
// Set timeout to automatically flush
this.vadBufferTimeout = setTimeout(() => {
this.logger.warn("VAD buffer timeout - force flushing");
this.flushVADBuffer();
}, this.vadBufferTimeoutMs);
}
private flushVADBuffer(): void {
if (!this.isBufferingForVAD || this.vadAudioBuffer.length === 0) {
return;
}
// Send all buffered chunks to streams
for (const audioData of this.vadAudioBuffer) {
this.feedAudioToStreams(audioData);
}
// Clear buffer
this.vadAudioBuffer = [];
this.isBufferingForVAD = false;
}
Stream Health Management
Health Checking
private isStreamHealthy(stream: StreamInstance): boolean {
if (stream.state === StreamState.ERROR ||
stream.state === StreamState.CLOSED) {
return false;
}
// Check provider health
if (!stream.provider.isHealthy()) {
return false;
}
// Check metrics
if (stream.metrics.errorRate > 0.5 ||
stream.metrics.consecutiveErrors > 3) {
return false;
}
return true;
}
Stream Recovery
async ensureStreamsExist(): Promise<void> {
const currentSubscriptions = Array.from(this.activeSubscriptions);
// Clean up unhealthy streams
for (const [subscription, stream] of this.streams.entries()) {
if (!this.activeSubscriptions.has(subscription) ||
!this.isStreamHealthy(stream)) {
await this.cleanupStream(subscription, "unhealthy_stream");
}
}
// Start VAD buffering for new streams
this.startVADBuffering();
// Create missing streams
const createPromises = currentSubscriptions
.filter(sub => !this.streams.has(sub))
.map(sub => this.startStreamFast(sub));
await Promise.allSettled(createPromises);
// Flush buffered audio
this.flushVADBuffer();
}
Error Handling and Recovery
Provider Failover
private async handleStreamError(
subscription: ExtendedStreamType,
stream: StreamInstance | null,
error: Error
): Promise<void> {
const currentProvider = stream?.provider.name;
// Record provider failure
if (stream) {
stream.provider.recordFailure(error);
}
// Clean up failed stream
await this.cleanupStream(subscription, "provider_error");
// Smart provider cycling
if (currentProvider === ProviderType.SONIOX) {
if (this.isSonioxRateLimit(error)) {
// Rate limit - try Azure immediately
if (await this.trySpecificProvider(subscription, ProviderType.AZURE)) {
return;
}
}
}
// Schedule retry with backoff
this.scheduleStreamRetry(subscription, attempts + 1, error);
}
Transcript History
History Management
private transcriptHistory: {
segments: TranscriptSegment[]; // Legacy en-US support
languageSegments: Map<string, TranscriptSegment[]>; // Multi-language
} = {
segments: [],
languageSegments: new Map()
};
private readonly HISTORY_RETENTION_MS = 30 * 60 * 1000; // 30 minutes
private readonly HISTORY_PRUNE_INTERVAL_MS = 5 * 60 * 1000; // Prune every 5 min
History Pruning
private startHistoryPruning(): void {
this.historyPruneInterval = setInterval(() => {
const cutoffTime = Date.now() - this.HISTORY_RETENTION_MS;
// Prune legacy segments
this.transcriptHistory.segments = this.transcriptHistory.segments
.filter(segment => segment.timestamp > cutoffTime);
// Prune language-specific segments
for (const [lang, segments] of this.transcriptHistory.languageSegments) {
const pruned = segments.filter(s => s.timestamp > cutoffTime);
if (pruned.length === 0) {
this.transcriptHistory.languageSegments.delete(lang);
} else {
this.transcriptHistory.languageSegments.set(lang, pruned);
}
}
}, this.HISTORY_PRUNE_INTERVAL_MS);
}
Local Transcription Support
async handleLocalTranscription(message: LocalTranscription): Promise<void> {
this.logger.debug({ message }, "Local transcription received");
// Relay directly to apps (no provider needed)
this.relayDataToApps(StreamType.TRANSCRIPTION, message);
}
Integration Methods
Audio Feed
feedAudio(audioData: ArrayBuffer): void {
// Buffer if VAD is starting up
if (this.isBufferingForVAD) {
this.vadAudioBuffer.push(audioData);
// Prevent buffer overflow
if (this.vadAudioBuffer.length > this.vadBufferMaxSize) {
this.vadAudioBuffer.shift();
}
return;
}
// Normal feeding to streams
this.feedAudioToStreams(audioData);
}
Token Finalization
finalizePendingTokens(): void {
// Called when VAD stops - forces providers to send final transcriptions
for (const [subscription, stream] of this.streams) {
if (stream.provider.name === "soniox") {
// Soniox buffers tokens - force finalization
if ("forceFinalizePendingTokens" in stream) {
(stream as any).forceFinalizePendingTokens();
}
}
// Azure sends final results immediately
}
}
Metrics and Monitoring
getMetrics(): Record<string, any> {
return {
totalStreams: this.streams.size,
activeStreams: this.getActiveStreamCount(),
byProvider: this.getStreamsByProvider(),
byState: this.getStreamsByState(),
vadBufferSize: this.vadAudioBuffer.length,
isBuffering: this.isBufferingForVAD,
historySize: {
legacy: this.transcriptHistory.segments.length,
languages: this.transcriptHistory.languageSegments.size
}
};
}
Configuration
Timing Constants
// Stream creation timeout
private readonly DEFAULT_TIMEOUT_MS = 10000; // 10 seconds
private readonly VAD_TIMEOUT_MS = 5000; // 5 seconds for VAD
// Retry configuration
private readonly MAX_RETRY_ATTEMPTS = 3;
private readonly RETRY_BACKOFF_MS = 1000;
// Buffer limits
private readonly VAD_BUFFER_MAX_SIZE = 50; // ~2.5 seconds
private readonly VAD_BUFFER_TIMEOUT_MS = 10000; // 10 seconds
Lifecycle Management
Disposal
async dispose(): Promise<void> {
// Stop health monitoring
if (this.healthCheckInterval) {
clearInterval(this.healthCheckInterval);
}
// Stop history pruning
if (this.historyPruneInterval) {
clearInterval(this.historyPruneInterval);
}
// Close all streams
await Promise.allSettled(
Array.from(this.streams.values()).map(stream => stream.close())
);
// Dispose providers
await Promise.allSettled(
Array.from(this.providers.values()).map(provider => provider.dispose())
);
this.streams.clear();
this.providers.clear();
}
Best Practices
- Always buffer audio during VAD startup to prevent speech loss
- Monitor stream health and replace unhealthy streams automatically
- Use appropriate timeouts - shorter for VAD scenarios
- Implement smart provider failover based on error types
- Prune transcript history to prevent memory growth
- Handle local transcription for device-side processing
Integration Points
- AudioManager: Receives audio data for transcription
- TranslationManager: Works in parallel for translation needs
- SubscriptionService: Determines which apps receive transcripts
- Provider Classes: Azure and Soniox implementations
- VAD Detection: Triggers stream lifecycle events