Coverage for integrations / channels / media / tts.py: 56.8%

271 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2Text-to-Speech System for audio synthesis. 

3 

4Active providers: LuxTTS (offline, 24kHz, voice cloning), Pocket TTS (offline, CPU, MIT). 

5Cloud providers (openai, elevenlabs, edge, google, amazon) are disabled — HART OS is 

6offline-first with no closed-source TTS dependencies. 

7""" 

8 

9import asyncio 

10from dataclasses import dataclass, field 

11from enum import Enum 

12from typing import Optional, List, Dict, Any, Union 

13from pathlib import Path 

14import logging 

15import os 

16 

17logger = logging.getLogger(__name__) 

18 

19# Docker-compatible paths 

20TEMP_DIR = os.environ.get("TTS_TEMP_DIR", "/tmp/tts") 

21APP_TEMP_DIR = os.environ.get("APP_TEMP_DIR", "/app/temp") 

22 

23 

24class TTSProvider(Enum): 

25 """Supported TTS providers.""" 

26 LUXTTS = "luxtts" # Offline: LuxTTS 24kHz — GPU/CPU, voice cloning, Apache 2.0 

27 POCKET = "pocket" # Offline: Pocket TTS (Kyutai) — 100M params, CPU, MIT 

28 CHATTERBOX = "chatterbox" # GPU: English, emotional, voice cloning, 3.8GB VRAM 

29 CHATTERBOX_ML = "chatterbox_ml" # GPU: 23 languages, voice cloning, 12GB VRAM 

30 COSYVOICE = "cosyvoice" # GPU: 9 languages (zh/ja/ko/de/es/fr/it/ru/en), 3.5GB 

31 F5 = "f5_tts" # GPU: English + Chinese, voice cloning, 1.3GB VRAM 

32 INDIC_PARLER = "indic_parler" # GPU: 22 Indic languages + English, 1.8GB VRAM 

33 ESPEAK = "espeak" # CPU: 100+ languages, robotic quality, instant 

34 # Cloud providers — kept for config compatibility, disabled at runtime 

35 OPENAI = "openai" # Disabled: closed-source cloud API 

36 ELEVENLABS = "elevenlabs" # Disabled: closed-source cloud API 

37 EDGE = "edge" # Disabled: closed-source cloud API 

38 GOOGLE = "google" # Disabled: closed-source cloud API 

39 AMAZON = "amazon" # Disabled: closed-source cloud API 

40 

41 

42class AudioFormat(Enum): 

43 """Supported audio output formats.""" 

44 MP3 = "mp3" 

45 OPUS = "opus" 

46 WAV = "wav" 

47 OGG = "ogg" 

48 AAC = "aac" 

49 FLAC = "flac" 

50 PCM = "pcm" 

51 

52 

53@dataclass 

54class VoiceInfo: 

55 """Information about an available voice.""" 

56 id: str 

57 name: str 

58 language: str 

59 gender: Optional[str] = None 

60 description: Optional[str] = None 

61 preview_url: Optional[str] = None 

62 provider: Optional[str] = None 

63 styles: List[str] = field(default_factory=list) 

64 sample_rate: int = 24000 

65 metadata: Dict[str, Any] = field(default_factory=dict) 

66 

67 def to_dict(self) -> Dict[str, Any]: 

68 return { 

69 "id": self.id, 

70 "name": self.name, 

71 "language": self.language, 

72 "gender": self.gender, 

73 "description": self.description, 

74 "preview_url": self.preview_url, 

75 "provider": self.provider, 

76 "styles": self.styles, 

77 "sample_rate": self.sample_rate, 

78 "metadata": self.metadata 

79 } 

80 

81 

82@dataclass 

83class SynthesisResult: 

84 """Result of a TTS synthesis operation.""" 

85 audio: bytes 

86 format: AudioFormat 

87 duration: float 

88 sample_rate: int 

89 voice_id: str 

90 metadata: Dict[str, Any] = field(default_factory=dict) 

91 

92 def to_dict(self) -> Dict[str, Any]: 

93 return { 

94 "format": self.format.value, 

95 "duration": self.duration, 

96 "sample_rate": self.sample_rate, 

97 "voice_id": self.voice_id, 

98 "size": len(self.audio), 

99 "metadata": self.metadata 

100 } 

101 

102 

103@dataclass 

104class SSMLConfig: 

105 """SSML synthesis configuration.""" 

106 rate: Optional[str] = None # x-slow, slow, medium, fast, x-fast 

107 pitch: Optional[str] = None # x-low, low, medium, high, x-high 

108 volume: Optional[str] = None # silent, x-soft, soft, medium, loud, x-loud 

109 emphasis: Optional[str] = None # strong, moderate, reduced 

110 language: Optional[str] = None 

111 

112 def to_dict(self) -> Dict[str, Any]: 

113 return { 

114 "rate": self.rate, 

115 "pitch": self.pitch, 

116 "volume": self.volume, 

117 "emphasis": self.emphasis, 

118 "language": self.language 

119 } 

120 

121 

122class TTSEngine: 

123 """ 

124 Text-to-Speech engine for audio synthesis. 

125 

126 Supports multiple providers for converting text to speech. 

127 """ 

128 

129 # Optimal formats per channel 

130 CHANNEL_FORMATS = { 

131 "telegram": AudioFormat.OGG, 

132 "discord": AudioFormat.OPUS, 

133 "whatsapp": AudioFormat.OGG, 

134 "slack": AudioFormat.MP3, 

135 "web": AudioFormat.MP3, 

136 "default": AudioFormat.MP3 

137 } 

138 

139 # Default voices per provider 

140 DEFAULT_VOICES = { 

141 TTSProvider.LUXTTS: "default", 

142 TTSProvider.POCKET: "alba", 

143 TTSProvider.OPENAI: "alloy", 

144 TTSProvider.ELEVENLABS: "21m00Tcm4TlvDq8ikWAM", # Rachel 

145 TTSProvider.EDGE: "en-US-AriaNeural", 

146 TTSProvider.GOOGLE: "en-US-Standard-A", 

147 TTSProvider.AMAZON: "Joanna" 

148 } 

149 

150 # Models per provider 

151 DEFAULT_MODELS = { 

152 TTSProvider.LUXTTS: "luxtts-48k", 

153 TTSProvider.POCKET: "pocket-100m", 

154 TTSProvider.OPENAI: "tts-1", 

155 TTSProvider.ELEVENLABS: "eleven_monolingual_v1", 

156 TTSProvider.EDGE: "neural", 

157 TTSProvider.GOOGLE: "standard", 

158 TTSProvider.AMAZON: "neural" 

159 } 

160 

161 def __init__( 

162 self, 

163 provider: Union[TTSProvider, str] = TTSProvider.POCKET, 

164 api_key: Optional[str] = None, 

165 model: Optional[str] = None, 

166 default_voice: Optional[str] = None, 

167 config: Optional[Dict[str, Any]] = None 

168 ): 

169 """ 

170 Initialize TTS engine. 

171 

172 Args: 

173 provider: TTS provider to use 

174 api_key: API key for the provider 

175 model: Specific model to use 

176 default_voice: Default voice ID 

177 config: Additional configuration options 

178 """ 

179 if isinstance(provider, str): 

180 provider = TTSProvider(provider.lower()) 

181 

182 self.provider = provider 

183 self.api_key = api_key 

184 self.config = config or {} 

185 

186 # Set default model and voice per provider 

187 self.model = model or self.DEFAULT_MODELS.get(provider, "default") 

188 self.default_voice = default_voice or self.DEFAULT_VOICES.get(provider) 

189 

190 # Initialize provider-specific client 

191 self._client = None 

192 self._initialized = False 

193 

194 # Cache for voices 

195 self._voices_cache: Optional[List[VoiceInfo]] = None 

196 self._cache_timestamp: float = 0 

197 

198 # Ensure temp directories exist 

199 self._ensure_temp_dirs() 

200 

201 def _ensure_temp_dirs(self): 

202 """Ensure temp directories exist (Docker-compatible).""" 

203 for dir_path in [TEMP_DIR, APP_TEMP_DIR]: 

204 try: 

205 Path(dir_path).mkdir(parents=True, exist_ok=True) 

206 except (PermissionError, OSError): 

207 # In Docker, these might already exist or need root 

208 pass 

209 

210 async def _ensure_initialized(self): 

211 """Ensure provider client is initialized.""" 

212 if self._initialized: 

213 return 

214 

215 if self.provider in (TTSProvider.OPENAI, TTSProvider.ELEVENLABS, 

216 TTSProvider.EDGE, TTSProvider.GOOGLE, 

217 TTSProvider.AMAZON): 

218 logger.info("%s provider selected but disabled (closed-source). " 

219 "Synthesis calls will return empty audio.", self.provider.value) 

220 

221 self._initialized = True 

222 

223 async def synthesize( 

224 self, 

225 text: str, 

226 voice: Optional[str] = None, 

227 format: Optional[AudioFormat] = None, 

228 speed: float = 1.0 

229 ) -> bytes: 

230 """ 

231 Synthesize text to speech. 

232 

233 Args: 

234 text: Text to synthesize 

235 voice: Voice ID (uses default if not specified) 

236 format: Output audio format 

237 speed: Speech speed multiplier (0.5 to 2.0) 

238 

239 Returns: 

240 Audio bytes in the specified format 

241 """ 

242 await self._ensure_initialized() 

243 

244 voice = voice or self.default_voice 

245 format = format or AudioFormat.MP3 

246 speed = max(0.5, min(2.0, speed)) # Clamp speed 

247 

248 logger.info(f"Synthesizing {len(text)} chars with voice {voice}") 

249 

250 # Provider-specific synthesis 

251 if self.provider == TTSProvider.LUXTTS: 

252 return await self._synthesize_luxtts(text, voice, format, speed) 

253 elif self.provider == TTSProvider.POCKET: 

254 return await self._synthesize_pocket(text, voice, format, speed) 

255 elif self.provider in (TTSProvider.OPENAI, TTSProvider.ELEVENLABS, 

256 TTSProvider.EDGE, TTSProvider.GOOGLE, 

257 TTSProvider.AMAZON): 

258 return await self._synthesize_cloud_disabled(self.provider.value) 

259 

260 return b"" 

261 

262 async def _synthesize_luxtts( 

263 self, 

264 text: str, 

265 voice: str, 

266 format: AudioFormat, 

267 speed: float 

268 ) -> bytes: 

269 """Synthesize using LuxTTS (offline, GPU/CPU, 48kHz, voice cloning). 

270 

271 Uses integrations.service_tools.luxtts_tool for actual synthesis, 

272 then reads the output WAV file and returns raw bytes. 

273 """ 

274 import json as _json 

275 try: 

276 from integrations.service_tools.luxtts_tool import luxtts_synthesize 

277 result = _json.loads(luxtts_synthesize( 

278 text, 

279 voice_audio=voice if voice != "default" else None, 

280 speed=speed, 

281 )) 

282 if 'error' in result: 

283 logger.warning(f"LuxTTS error: {result['error']}") 

284 return b"" 

285 wav_path = result.get('path', '') 

286 if wav_path and os.path.isfile(wav_path): 

287 with open(wav_path, 'rb') as f: 

288 return f.read() 

289 return b"" 

290 except ImportError: 

291 logger.warning("luxtts_tool not available") 

292 return b"" 

293 except Exception as e: 

294 logger.warning(f"LuxTTS synthesis failed: {e}") 

295 return b"" 

296 

297 async def _synthesize_pocket( 

298 self, 

299 text: str, 

300 voice: str, 

301 format: AudioFormat, 

302 speed: float 

303 ) -> bytes: 

304 """Synthesize using Pocket TTS (offline, CPU, 100M params). 

305 

306 Uses integrations.service_tools.pocket_tts_tool for actual synthesis, 

307 then reads the output WAV file and returns raw bytes. 

308 """ 

309 import json as _json 

310 try: 

311 from integrations.service_tools.pocket_tts_tool import pocket_tts_synthesize 

312 result = _json.loads(pocket_tts_synthesize(text, voice)) 

313 if 'error' in result: 

314 logger.warning(f"Pocket TTS error: {result['error']}") 

315 return b"" 

316 wav_path = result.get('path', '') 

317 if wav_path and os.path.isfile(wav_path): 

318 with open(wav_path, 'rb') as f: 

319 audio_bytes = f.read() 

320 # WAV is native format; convert if needed 

321 if format == AudioFormat.WAV: 

322 return audio_bytes 

323 # For other formats, return WAV (caller can convert) 

324 return audio_bytes 

325 return b"" 

326 except ImportError: 

327 logger.warning("pocket_tts_tool not available") 

328 return b"" 

329 except Exception as e: 

330 logger.warning(f"Pocket TTS synthesis failed: {e}") 

331 return b"" 

332 

333 async def _synthesize_cloud_disabled(self, provider_name: str) -> bytes: 

334 """Return empty bytes for disabled cloud TTS providers. 

335 

336 HART OS is offline-first — no closed-source TTS APIs. 

337 Use TTSProvider.POCKET or TTSProvider.LUXTTS instead. 

338 """ 

339 logger.warning( 

340 "%s TTS is disabled (closed-source cloud API). " 

341 "Use POCKET or LUXTTS for offline synthesis.", provider_name 

342 ) 

343 return b"" 

344 

345 async def synthesize_ssml( 

346 self, 

347 ssml: str, 

348 voice: Optional[str] = None, 

349 format: Optional[AudioFormat] = None 

350 ) -> bytes: 

351 """ 

352 Synthesize SSML to speech. 

353 

354 Args: 

355 ssml: SSML markup to synthesize 

356 voice: Voice ID (uses default if not specified) 

357 format: Output audio format 

358 

359 Returns: 

360 Audio bytes in the specified format 

361 """ 

362 await self._ensure_initialized() 

363 

364 voice = voice or self.default_voice 

365 format = format or AudioFormat.MP3 

366 

367 logger.info(f"Synthesizing SSML with voice {voice}") 

368 

369 # Provider-specific SSML synthesis 

370 # Most providers support SSML with varying feature sets 

371 

372 if self.provider in (TTSProvider.LUXTTS, TTSProvider.POCKET): 

373 # LuxTTS and Pocket TTS don't support SSML — strip tags, synthesize plain text 

374 import re 

375 plain = re.sub(r'<[^>]+>', '', ssml).strip() 

376 if plain: 

377 return await self.synthesize(plain, voice, format) 

378 return b"" 

379 

380 # Cloud providers: disabled 

381 logger.warning("SSML synthesis not available (cloud providers disabled)") 

382 return b"" 

383 

384 async def list_voices( 

385 self, 

386 language: Optional[str] = None, 

387 gender: Optional[str] = None, 

388 use_cache: bool = True 

389 ) -> List[VoiceInfo]: 

390 """ 

391 List available voices. 

392 

393 Args: 

394 language: Filter by language code (e.g., "en-US") 

395 gender: Filter by gender ("male", "female", "neutral") 

396 use_cache: Whether to use cached voice list 

397 

398 Returns: 

399 List of available voices 

400 """ 

401 await self._ensure_initialized() 

402 

403 import time 

404 

405 # Check cache 

406 if use_cache and self._voices_cache is not None: 

407 cache_age = time.time() - self._cache_timestamp 

408 if cache_age < 3600: # 1 hour cache 

409 voices = self._voices_cache 

410 return self._filter_voices(voices, language, gender) 

411 

412 # Fetch voices from provider 

413 voices = await self._fetch_voices() 

414 

415 # Update cache 

416 self._voices_cache = voices 

417 self._cache_timestamp = time.time() 

418 

419 return self._filter_voices(voices, language, gender) 

420 

421 async def _fetch_voices(self) -> List[VoiceInfo]: 

422 """Fetch available voices from provider.""" 

423 voices = [] 

424 

425 if self.provider == TTSProvider.LUXTTS: 

426 # LuxTTS cloned voices 

427 try: 

428 import json as _json 

429 from integrations.service_tools.luxtts_tool import luxtts_list_voices 

430 data = _json.loads(luxtts_list_voices()) 

431 for v in data.get('voices', []): 

432 voices.append(VoiceInfo( 

433 id=v['id'], name=v['name'], language="en", 

434 provider="luxtts", sample_rate=24000, 

435 metadata={"type": v.get('type', 'cloned')}, 

436 )) 

437 except (ImportError, Exception): 

438 voices.append(VoiceInfo( 

439 id="default", name="Default", language="en", 

440 provider="luxtts", sample_rate=24000, 

441 )) 

442 elif self.provider == TTSProvider.POCKET: 

443 # Pocket TTS built-in + cloned voices 

444 try: 

445 import json as _json 

446 from integrations.service_tools.pocket_tts_tool import pocket_tts_list_voices 

447 data = _json.loads(pocket_tts_list_voices()) 

448 for v in data.get('voices', []): 

449 voices.append(VoiceInfo( 

450 id=v['id'], name=v['name'], language="en", 

451 provider="pocket", metadata={"type": v.get('type', 'builtin')}, 

452 )) 

453 except (ImportError, Exception): 

454 # Fallback: correct 8 built-in voices (pocket-tts 1.1.1) 

455 for name in ["alba", "marius", "javert", "jean", 

456 "fantine", "cosette", "eponine", "azelma"]: 

457 voices.append(VoiceInfo( 

458 id=name, name=name.title(), language="en", provider="pocket", 

459 )) 

460 # Cloud providers: disabled, no voices to list 

461 

462 return voices 

463 

464 def _filter_voices( 

465 self, 

466 voices: List[VoiceInfo], 

467 language: Optional[str], 

468 gender: Optional[str] 

469 ) -> List[VoiceInfo]: 

470 """Filter voices by criteria.""" 

471 filtered = voices 

472 

473 if language: 

474 filtered = [v for v in filtered if v.language.lower().startswith(language.lower())] 

475 

476 if gender: 

477 filtered = [v for v in filtered if v.gender and v.gender.lower() == gender.lower()] 

478 

479 return filtered 

480 

481 def get_optimal_format(self, channel: str) -> str: 

482 """ 

483 Get optimal audio format for a channel. 

484 

485 Args: 

486 channel: Channel name (telegram, discord, etc.) 

487 

488 Returns: 

489 Optimal format string (opus, mp3, wav, ogg) 

490 """ 

491 format_enum = self.CHANNEL_FORMATS.get( 

492 channel.lower(), 

493 self.CHANNEL_FORMATS["default"] 

494 ) 

495 return format_enum.value 

496 

497 def get_supported_formats(self) -> List[str]: 

498 """Get list of supported output formats.""" 

499 formats = { 

500 TTSProvider.LUXTTS: ["wav"], 

501 TTSProvider.POCKET: ["wav"], 

502 TTSProvider.OPENAI: ["mp3", "opus", "aac", "flac", "wav", "pcm"], 

503 TTSProvider.ELEVENLABS: ["mp3", "wav", "ogg"], 

504 TTSProvider.EDGE: ["mp3", "wav", "ogg"], 

505 TTSProvider.GOOGLE: ["mp3", "wav", "ogg"], 

506 TTSProvider.AMAZON: ["mp3", "ogg", "pcm"] 

507 } 

508 return formats.get(self.provider, ["mp3", "wav"]) 

509 

510 def get_max_text_length(self) -> int: 

511 """Get maximum text length for single request.""" 

512 limits = { 

513 TTSProvider.LUXTTS: 10000, # Local — no API limits, just memory 

514 TTSProvider.POCKET: 10000, # Local — no API limits, just memory 

515 TTSProvider.OPENAI: 4096, 

516 TTSProvider.ELEVENLABS: 5000, 

517 TTSProvider.EDGE: 10000, 

518 TTSProvider.GOOGLE: 5000, 

519 TTSProvider.AMAZON: 3000 

520 } 

521 return limits.get(self.provider, 4096) 

522 

523 async def synthesize_long_text( 

524 self, 

525 text: str, 

526 voice: Optional[str] = None, 

527 format: Optional[AudioFormat] = None 

528 ) -> bytes: 

529 """ 

530 Synthesize long text by chunking. 

531 

532 Args: 

533 text: Text to synthesize (can exceed max length) 

534 voice: Voice ID 

535 format: Output audio format 

536 

537 Returns: 

538 Combined audio bytes 

539 """ 

540 max_length = self.get_max_text_length() 

541 

542 if len(text) <= max_length: 

543 return await self.synthesize(text, voice, format) 

544 

545 # Split text into chunks at sentence boundaries 

546 chunks = self._split_text(text, max_length) 

547 

548 # Synthesize each chunk 

549 audio_parts = [] 

550 for chunk in chunks: 

551 audio = await self.synthesize(chunk, voice, format) 

552 audio_parts.append(audio) 

553 

554 # Combine audio parts 

555 return self._combine_audio(audio_parts, format or AudioFormat.MP3) 

556 

557 def _split_text(self, text: str, max_length: int) -> List[str]: 

558 """Split text into chunks at sentence boundaries.""" 

559 sentences = [] 

560 current = "" 

561 

562 # Simple sentence splitting 

563 for char in text: 

564 current += char 

565 if char in ".!?" and len(current) > 0: 

566 sentences.append(current.strip()) 

567 current = "" 

568 

569 if current.strip(): 

570 sentences.append(current.strip()) 

571 

572 # Combine sentences into chunks 

573 chunks = [] 

574 current_chunk = "" 

575 

576 for sentence in sentences: 

577 if len(current_chunk) + len(sentence) + 1 <= max_length: 

578 current_chunk += (" " if current_chunk else "") + sentence 

579 else: 

580 if current_chunk: 

581 chunks.append(current_chunk) 

582 current_chunk = sentence 

583 

584 if current_chunk: 

585 chunks.append(current_chunk) 

586 

587 return chunks 

588 

589 def _combine_audio(self, parts: List[bytes], format: AudioFormat) -> bytes: 

590 """Combine multiple audio parts.""" 

591 # For MP3/OGG, simple concatenation often works 

592 # For WAV, would need proper header handling 

593 if format in [AudioFormat.MP3, AudioFormat.OGG, AudioFormat.OPUS]: 

594 return b"".join(parts) 

595 

596 # For WAV, would need proper combination 

597 # This is a placeholder - real implementation would use audio library 

598 return b"".join(parts) 

599 

600 async def save_to_file( 

601 self, 

602 text: str, 

603 file_path: str, 

604 voice: Optional[str] = None, 

605 format: Optional[AudioFormat] = None 

606 ) -> str: 

607 """ 

608 Synthesize and save to file. 

609 

610 Args: 

611 text: Text to synthesize 

612 file_path: Output file path 

613 voice: Voice ID 

614 format: Output audio format 

615 

616 Returns: 

617 Path to saved file 

618 """ 

619 audio = await self.synthesize(text, voice, format) 

620 

621 path = Path(file_path) 

622 path.parent.mkdir(parents=True, exist_ok=True) 

623 

624 with open(path, 'wb') as f: 

625 f.write(audio) 

626 

627 return str(path) 

628 

629 def build_ssml( 

630 self, 

631 text: str, 

632 config: Optional[SSMLConfig] = None 

633 ) -> str: 

634 """ 

635 Build SSML from text and configuration. 

636 

637 Args: 

638 text: Plain text 

639 config: SSML configuration options 

640 

641 Returns: 

642 SSML markup string 

643 """ 

644 config = config or SSMLConfig() 

645 

646 ssml_parts = ['<speak>'] 

647 

648 # Add prosody if configured 

649 prosody_attrs = [] 

650 if config.rate: 

651 prosody_attrs.append(f'rate="{config.rate}"') 

652 if config.pitch: 

653 prosody_attrs.append(f'pitch="{config.pitch}"') 

654 if config.volume: 

655 prosody_attrs.append(f'volume="{config.volume}"') 

656 

657 if prosody_attrs: 

658 ssml_parts.append(f'<prosody {" ".join(prosody_attrs)}>') 

659 

660 # Add emphasis if configured 

661 if config.emphasis: 

662 ssml_parts.append(f'<emphasis level="{config.emphasis}">') 

663 ssml_parts.append(text) 

664 ssml_parts.append('</emphasis>') 

665 else: 

666 ssml_parts.append(text) 

667 

668 if prosody_attrs: 

669 ssml_parts.append('</prosody>') 

670 

671 ssml_parts.append('</speak>') 

672 

673 return "".join(ssml_parts) 

674 

675 def estimate_duration(self, text: str, speed: float = 1.0) -> float: 

676 """ 

677 Estimate audio duration for text. 

678 

679 Args: 

680 text: Text to estimate 

681 speed: Speech speed multiplier 

682 

683 Returns: 

684 Estimated duration in seconds 

685 """ 

686 # Average speaking rate is ~150 words per minute 

687 words = len(text.split()) 

688 base_duration = (words / 150) * 60 # seconds 

689 return base_duration / speed 

690 

691 def get_provider_info(self) -> Dict[str, Any]: 

692 """Get information about the current provider.""" 

693 _cloud = (TTSProvider.OPENAI, TTSProvider.ELEVENLABS, 

694 TTSProvider.EDGE, TTSProvider.GOOGLE, TTSProvider.AMAZON) 

695 return { 

696 "provider": self.provider.value, 

697 "model": self.model, 

698 "default_voice": self.default_voice, 

699 "max_text_length": self.get_max_text_length(), 

700 "supported_formats": self.get_supported_formats(), 

701 "ssml_support": False, # No active provider supports SSML 

702 "offline": self.provider not in _cloud, 

703 "disabled": self.provider in _cloud, 

704 }