Coverage for integrations / vision / ltx2_server.py: 20.9%

163 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2LTX-Video Generation Server 

3Optimized for NVIDIA RTX 3070 (8GB VRAM) 

4 

5Uses: diffusers LTXPipeline with memory optimizations 

6 

7Runs on localhost:5002 

8Endpoint: POST /generate, POST /generate_long 

9 

10Usage: 

11 python ltx2_server.py 

12""" 

13 

14import os 

15import time 

16import uuid 

17import torch 

18import logging 

19from flask import Flask, request, jsonify, send_file 

20from threading import Lock 

21 

22# G10: Resolve LTX server port from port_registry / env var instead of hardcoded 5002 

23_LTX_PORT = int(os.environ.get('HART_LTX_PORT', '5002')) 

24_LTX_BASE_URL = os.environ.get('HART_LTX_URL', f'http://localhost:{_LTX_PORT}') 

25 

26try: 

27 from integrations.service_tools.vram_manager import clear_cuda_cache 

28except ImportError: 

29 def clear_cuda_cache(): 

30 try: 

31 if torch.cuda.is_available(): 

32 torch.cuda.empty_cache() 

33 if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): 

34 torch.mps.empty_cache() 

35 except Exception: 

36 pass 

37 

38# Configure logging 

39logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 

40logger = logging.getLogger(__name__) 

41 

42app = Flask(__name__) 

43 

44# Global pipeline and lock for thread safety 

45pipeline = None 

46model_lock = Lock() 

47 

48# Paths 

49BASE_DIR = os.path.dirname(__file__) 

50OUTPUT_DIR = os.path.join(BASE_DIR, "coding", "ltx_outputs") 

51os.makedirs(OUTPUT_DIR, exist_ok=True) 

52 

53 

54def load_pipeline(): 

55 """Load LTX-Video pipeline optimized for 8GB VRAM""" 

56 global pipeline 

57 

58 if pipeline is not None: 

59 return pipeline 

60 

61 logger.info("Loading LTX-Video model (optimized for 8GB VRAM)...") 

62 

63 try: 

64 from diffusers import LTXPipeline 

65 

66 # LTX-Video models that work on 8GB VRAM 

67 model_options = [ 

68 "Lightricks/LTX-Video-0.9.1", # Stable release 

69 "Lightricks/LTX-Video", # Latest 

70 ] 

71 

72 for model_id in model_options: 

73 try: 

74 logger.info(f"Trying model: {model_id}") 

75 pipeline = LTXPipeline.from_pretrained( 

76 model_id, 

77 torch_dtype=torch.bfloat16, 

78 ) 

79 logger.info(f"Loaded: {model_id}") 

80 break 

81 except Exception as e: 

82 logger.warning(f"Model {model_id} failed: {e}") 

83 continue 

84 

85 if pipeline is None: 

86 raise RuntimeError("Could not load any LTX-Video model") 

87 

88 # Memory optimizations for 8GB VRAM 

89 logger.info("Applying memory optimizations...") 

90 

91 # CPU offloading - keeps model in CPU, moves to GPU only during inference 

92 pipeline.enable_model_cpu_offload() 

93 

94 # VAE optimizations 

95 pipeline.vae.enable_tiling() 

96 pipeline.vae.enable_slicing() 

97 

98 logger.info("LTX-Video ready with CPU offload + VAE tiling/slicing") 

99 return pipeline 

100 

101 except Exception as e: 

102 logger.error(f"Failed to load model: {e}") 

103 raise 

104 

105 

106@app.route('/health', methods=['GET']) 

107def health(): 

108 """Health check endpoint""" 

109 return jsonify({ 

110 "status": "ok", 

111 "model_loaded": pipeline is not None, 

112 "model": "LTX-Video (diffusers)", 

113 "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A", 

114 "cuda_available": torch.cuda.is_available(), 

115 "vram_total_gb": round(torch.cuda.get_device_properties(0).total_memory / 1e9, 2) if torch.cuda.is_available() else 0, 

116 "vram_used_gb": round(torch.cuda.memory_allocated(0) / 1e9, 2) if torch.cuda.is_available() else 0 

117 }) 

118 

119 

120@app.route('/generate', methods=['POST']) 

121def generate_video(): 

122 """ 

123 Generate video from text prompt using LTX-Video 

124 

125 Request JSON: 

126 { 

127 "prompt": "A cartoon cat walking in a magical garden", 

128 "num_frames": 49, 

129 "width": 704, 

130 "height": 480, 

131 "num_inference_steps": 30, 

132 "guidance_scale": 3.0, 

133 "fps": 24, 

134 "seed": 12345 # optional 

135 } 

136 """ 

137 global pipeline 

138 

139 try: 

140 data = request.get_json() 

141 

142 if not data or 'prompt' not in data: 

143 return jsonify({"error": "Missing 'prompt' in request"}), 400 

144 

145 # Extract parameters with RTX 3070 (8GB) optimized defaults 

146 prompt = data.get('prompt') 

147 

148 # LTX-Video on 8GB VRAM settings: 

149 # - 512x320 = safe 

150 # - 704x480 = medium (with CPU offload) 

151 # - 49-97 frames = 2-4 seconds 

152 num_frames = min(data.get('num_frames', 49), 97) 

153 width = data.get('width', 704) 

154 height = data.get('height', 480) 

155 

156 # Ensure divisibility: width/height by 32, frames by 8+1 

157 width = (width // 32) * 32 

158 height = (height // 32) * 32 

159 num_frames = ((num_frames - 1) // 8) * 8 + 1 

160 

161 num_inference_steps = data.get('num_inference_steps', 30) 

162 guidance_scale = data.get('guidance_scale', 3.0) 

163 fps = data.get('fps', 24) 

164 seed = data.get('seed', int(time.time()) % 2147483647) 

165 

166 logger.info(f"Generating video: {prompt[:50]}...") 

167 logger.info(f"Parameters: {width}x{height}, {num_frames} frames, {num_inference_steps} steps, seed={seed}") 

168 

169 # Load pipeline if not already loaded 

170 with model_lock: 

171 if pipeline is None: 

172 load_pipeline() 

173 

174 # Clear CUDA cache before generation 

175 clear_cuda_cache() 

176 

177 # Generate video 

178 start_time = time.time() 

179 video_id = str(uuid.uuid4())[:8] 

180 output_filename = f"ltx_{video_id}_{int(time.time())}.mp4" 

181 output_path = os.path.join(OUTPUT_DIR, output_filename) 

182 

183 with model_lock: 

184 logger.info("Using LTX-Video diffusers pipeline") 

185 generator = torch.Generator(device="cpu").manual_seed(seed) 

186 

187 output = pipeline( 

188 prompt=prompt, 

189 width=width, 

190 height=height, 

191 num_frames=num_frames, 

192 num_inference_steps=num_inference_steps, 

193 guidance_scale=guidance_scale, 

194 generator=generator, 

195 ) 

196 

197 # Save video frames 

198 video_frames = output.frames[0] 

199 try: 

200 from diffusers.utils import export_to_video 

201 export_to_video(video_frames, output_path, fps=fps) 

202 except ImportError: 

203 import imageio 

204 imageio.mimwrite(output_path, video_frames, fps=fps) 

205 

206 generation_time = time.time() - start_time 

207 logger.info(f"Video generated in {generation_time:.2f}s: {output_path}") 

208 

209 # Clear cache after generation 

210 clear_cuda_cache() 

211 

212 return jsonify({ 

213 "status": "success", 

214 "video_path": output_path, 

215 "video_url": f"{_LTX_BASE_URL}/video/{output_filename}", 

216 "output_url": f"{_LTX_BASE_URL}/video/{output_filename}", 

217 "generation_time_seconds": round(generation_time, 2), 

218 "parameters": { 

219 "width": width, 

220 "height": height, 

221 "num_frames": num_frames, 

222 "num_inference_steps": num_inference_steps, 

223 "seed": seed 

224 } 

225 }) 

226 

227 except torch.cuda.OutOfMemoryError: 

228 clear_cuda_cache() 

229 logger.error("CUDA out of memory! Try reducing resolution or num_frames") 

230 return jsonify({ 

231 "error": "GPU out of memory. Try reducing width/height (e.g., 512x320) or num_frames (e.g., 33)" 

232 }), 507 

233 

234 except Exception as e: 

235 logger.error(f"Generation failed: {e}") 

236 return jsonify({"error": str(e)}), 500 

237 

238 

239@app.route('/video/<filename>', methods=['GET']) 

240def serve_video(filename): 

241 """Serve generated video files""" 

242 video_path = os.path.join(OUTPUT_DIR, filename) 

243 if os.path.exists(video_path): 

244 return send_file(video_path, mimetype='video/mp4') 

245 return jsonify({"error": "Video not found"}), 404 

246 

247 

248@app.route('/list', methods=['GET']) 

249def list_videos(): 

250 """List all generated videos""" 

251 videos = [] 

252 for f in os.listdir(OUTPUT_DIR): 

253 if f.endswith('.mp4'): 

254 videos.append({ 

255 "filename": f, 

256 "url": f"{_LTX_BASE_URL}/video/{f}", 

257 "size_mb": round(os.path.getsize(os.path.join(OUTPUT_DIR, f)) / 1e6, 2) 

258 }) 

259 return jsonify({"videos": videos}) 

260 

261 

262@app.route('/clear_cache', methods=['POST']) 

263def clear_cache(): 

264 """Clear CUDA cache to free up VRAM""" 

265 clear_cuda_cache() 

266 return jsonify({ 

267 "status": "cache_cleared", 

268 "vram_used_gb": round(torch.cuda.memory_allocated(0) / 1e9, 2) if torch.cuda.is_available() else 0 

269 }) 

270 

271 

272@app.route('/generate_long', methods=['POST']) 

273def generate_long_video(): 

274 """ 

275 Generate longer videos (10-30 seconds) by iteratively extending 

276 

277 For 20 second video at 25fps = 500 frames 

278 Strategy: Generate in chunks, use last frames as conditioning for next chunk 

279 

280 Request JSON: 

281 { 

282 "prompt": "A serene landscape with mountains and flowing river", 

283 "duration_seconds": 20, 

284 "width": 512, 

285 "height": 320, 

286 "fps": 25 

287 } 

288 """ 

289 global pipeline 

290 

291 try: 

292 data = request.get_json() 

293 

294 if not data or 'prompt' not in data: 

295 return jsonify({"error": "Missing 'prompt' in request"}), 400 

296 

297 prompt = data.get('prompt') 

298 duration_seconds = min(data.get('duration_seconds', 10), 30) # Cap at 30s 

299 width = (data.get('width', 512) // 32) * 32 

300 height = (data.get('height', 320) // 32) * 32 

301 fps = data.get('fps', 25) 

302 seed = data.get('seed', int(time.time()) % 2147483647) 

303 

304 # Calculate frames needed 

305 total_frames_needed = int(duration_seconds * fps) 

306 

307 # Chunk settings: generate 49 frames per chunk with 8 frame overlap 

308 frames_per_chunk = 49 # Must be (n*8)+1 

309 overlap_frames = 8 

310 

311 logger.info(f"Generating {duration_seconds}s video ({total_frames_needed} frames)") 

312 logger.info(f"Strategy: {frames_per_chunk} frames/chunk with {overlap_frames} overlap") 

313 

314 # Load pipeline 

315 with model_lock: 

316 if pipeline is None: 

317 load_pipeline() 

318 

319 start_time = time.time() 

320 all_frames = [] 

321 chunk_num = 0 

322 

323 while len(all_frames) < total_frames_needed: 

324 chunk_num += 1 

325 logger.info(f"Generating chunk {chunk_num} (frames {len(all_frames)}-{len(all_frames)+frames_per_chunk})") 

326 

327 clear_cuda_cache() 

328 

329 chunk_seed = seed + chunk_num 

330 output_chunk = os.path.join(OUTPUT_DIR, f"chunk_{chunk_num}_{int(time.time())}.mp4") 

331 

332 with model_lock: 

333 # Use diffusers LTX-Video pipeline 

334 generator = torch.Generator(device="cpu").manual_seed(chunk_seed) 

335 output = pipeline( 

336 prompt=prompt, 

337 width=width, 

338 height=height, 

339 num_frames=frames_per_chunk, 

340 num_inference_steps=25, # Fewer steps for speed in long videos 

341 guidance_scale=3.0, 

342 generator=generator, 

343 ) 

344 chunk_frames = list(output.frames[0]) # Convert to list of frames 

345 

346 # Add frames (skip overlap frames for subsequent chunks) 

347 if len(all_frames) == 0: 

348 all_frames.extend(chunk_frames) 

349 else: 

350 # Skip first overlap_frames to avoid duplicates 

351 all_frames.extend(chunk_frames[overlap_frames:]) 

352 

353 logger.info(f"Total frames so far: {len(all_frames)}") 

354 

355 # Clean up chunk file 

356 if os.path.exists(output_chunk): 

357 os.remove(output_chunk) 

358 

359 # Trim to exact length 

360 all_frames = all_frames[:total_frames_needed] 

361 

362 # Save final video 

363 import imageio 

364 video_id = str(uuid.uuid4())[:8] 

365 output_filename = f"ltx2_long_{video_id}_{int(time.time())}.mp4" 

366 output_path = os.path.join(OUTPUT_DIR, output_filename) 

367 

368 imageio.mimwrite(output_path, all_frames, fps=fps, codec='libx264') 

369 

370 generation_time = time.time() - start_time 

371 logger.info(f"Long video generated in {generation_time:.2f}s: {output_path}") 

372 

373 return jsonify({ 

374 "status": "success", 

375 "video_path": output_path, 

376 "video_url": f"{_LTX_BASE_URL}/video/{output_filename}", 

377 "duration_seconds": duration_seconds, 

378 "total_frames": len(all_frames), 

379 "chunks_generated": chunk_num, 

380 "generation_time_seconds": round(generation_time, 2) 

381 }) 

382 

383 except torch.cuda.OutOfMemoryError: 

384 clear_cuda_cache() 

385 logger.error("CUDA OOM during long video generation") 

386 return jsonify({"error": "GPU out of memory. Try smaller resolution (384x256)"}), 507 

387 

388 except Exception as e: 

389 logger.error(f"Long video generation failed: {e}") 

390 return jsonify({"error": str(e)}), 500 

391 

392 

393@app.route('/unload', methods=['POST']) 

394def unload_model(): 

395 """Unload model to free VRAM""" 

396 global pipeline 

397 with model_lock: 

398 if pipeline is not None: 

399 del pipeline 

400 pipeline = None 

401 clear_cuda_cache() 

402 return jsonify({"status": "model_unloaded"}) 

403 

404 

405if __name__ == '__main__': 

406 print(""" 

407 ================================================================ 

408 | LTX-Video Generation Server | 

409 | Optimized for RTX 3070 (8GB VRAM) | 

410 | Using: diffusers + CPU Offloading | 

411 ================================================================ 

412 | Model: Lightricks/LTX-Video (auto-downloaded from HF) | 

413 ================================================================ 

414 | Endpoints: | 

415 | POST /generate - Generate short video (2-4s) | 

416 | POST /generate_long - Generate long video (10-30s scenes) | 

417 | GET /health - Check server status | 

418 | GET /video/<file> - Serve generated video | 

419 | GET /list - List all generated videos | 

420 | POST /clear_cache - Clear CUDA cache | 

421 | POST /unload - Unload model from VRAM | 

422 ================================================================ 

423 | RTX 3070 (8GB) Recommended Settings: | 

424 | Safe: 512x320, 49 frames (~2s), 25 steps | 

425 | Medium: 704x480, 49 frames (~2s), 30 steps | 

426 | Max: 704x480, 97 frames (~4s), 30 steps | 

427 ================================================================ 

428 | Memory Optimizations Enabled: | 

429 | - CPU Offloading (model in CPU, inference on GPU) | 

430 | - VAE Tiling & Slicing | 

431 ================================================================ 

432 """) 

433 

434 # Check CUDA availability 

435 if torch.cuda.is_available(): 

436 print(f"GPU: {torch.cuda.get_device_name(0)}") 

437 print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB") 

438 else: 

439 print("WARNING: CUDA not available! GPU generation requires CUDA.") 

440 

441 print(f"\nStarting server on {_LTX_BASE_URL}") 

442 print("Model will be downloaded from HuggingFace on first request...") 

443 print("First request may take a few minutes to download the model.\n") 

444 

445 # Run Flask server 

446 app.run(host='0.0.0.0', port=_LTX_PORT, threaded=True)