Coverage for integrations/vision/ltx2

1"""

2LTX-Video Generation Server

3Optimized for NVIDIA RTX 3070 (8GB VRAM)

5Uses: diffusers LTXPipeline with memory optimizations

7Runs on localhost:5002

8Endpoint: POST /generate, POST /generate_long

10Usage:

11 python ltx2_server.py

12"""

14import os

15import time

16import uuid

17import torch

18import logging

19from flask import Flask, request, jsonify, send_file

20from threading import Lock

22# G10: Resolve LTX server port from port_registry / env var instead of hardcoded 5002

23_LTX_PORT = int(os.environ.get('HART_LTX_PORT', '5002'))

24_LTX_BASE_URL = os.environ.get('HART_LTX_URL', f'http://localhost:{_LTX_PORT}')

26try:

27 from integrations.service_tools.vram_manager import clear_cuda_cache

28except ImportError:

29 def clear_cuda_cache():

30 try:

31 if torch.cuda.is_available():

32 torch.cuda.empty_cache()

33 if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():

34 torch.mps.empty_cache()

35 except Exception:

36 pass

38# Configure logging

39logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

40logger = logging.getLogger(__name__)

42app = Flask(__name__)

44# Global pipeline and lock for thread safety

45pipeline = None

46model_lock = Lock()

48# Paths

49BASE_DIR = os.path.dirname(__file__)

50OUTPUT_DIR = os.path.join(BASE_DIR, "coding", "ltx_outputs")

51os.makedirs(OUTPUT_DIR, exist_ok=True)

54def load_pipeline():

55 """Load LTX-Video pipeline optimized for 8GB VRAM"""

56 global pipeline

58 if pipeline is not None:

59 return pipeline

61 logger.info("Loading LTX-Video model (optimized for 8GB VRAM)...")

63 try:

64 from diffusers import LTXPipeline

66 # LTX-Video models that work on 8GB VRAM

67 model_options = [

68 "Lightricks/LTX-Video-0.9.1", # Stable release

69 "Lightricks/LTX-Video", # Latest

70 ]

72 for model_id in model_options:

73 try:

74 logger.info(f"Trying model: {model_id}")

75 pipeline = LTXPipeline.from_pretrained(

76 model_id,

77 torch_dtype=torch.bfloat16,

78 )

79 logger.info(f"Loaded: {model_id}")

80 break

81 except Exception as e:

82 logger.warning(f"Model {model_id} failed: {e}")

83 continue

85 if pipeline is None:

86 raise RuntimeError("Could not load any LTX-Video model")

88 # Memory optimizations for 8GB VRAM

89 logger.info("Applying memory optimizations...")

91 # CPU offloading - keeps model in CPU, moves to GPU only during inference

92 pipeline.enable_model_cpu_offload()

94 # VAE optimizations

95 pipeline.vae.enable_tiling()

96 pipeline.vae.enable_slicing()

98 logger.info("LTX-Video ready with CPU offload + VAE tiling/slicing")

99 return pipeline

100

101 except Exception as e:

102 logger.error(f"Failed to load model: {e}")

103 raise

104

105

106@app.route('/health', methods=['GET'])

107def health():

108 """Health check endpoint"""

109 return jsonify({

110 "status": "ok",

111 "model_loaded": pipeline is not None,

112 "model": "LTX-Video (diffusers)",

113 "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A",

114 "cuda_available": torch.cuda.is_available(),

115 "vram_total_gb": round(torch.cuda.get_device_properties(0).total_memory / 1e9, 2) if torch.cuda.is_available() else 0,

116 "vram_used_gb": round(torch.cuda.memory_allocated(0) / 1e9, 2) if torch.cuda.is_available() else 0

117 })

118

119

120@app.route('/generate', methods=['POST'])

121def generate_video():

122 """

123 Generate video from text prompt using LTX-Video

124

125 Request JSON:

126 {

127 "prompt": "A cartoon cat walking in a magical garden",

128 "num_frames": 49,

129 "width": 704,

130 "height": 480,

131 "num_inference_steps": 30,

132 "guidance_scale": 3.0,

133 "fps": 24,

134 "seed": 12345 # optional

135 }

136 """

137 global pipeline

138

139 try:

140 data = request.get_json()

141

142 if not data or 'prompt' not in data:

143 return jsonify({"error": "Missing 'prompt' in request"}), 400

144

145 # Extract parameters with RTX 3070 (8GB) optimized defaults

146 prompt = data.get('prompt')

147

148 # LTX-Video on 8GB VRAM settings:

149 # - 512x320 = safe

150 # - 704x480 = medium (with CPU offload)

151 # - 49-97 frames = 2-4 seconds

152 num_frames = min(data.get('num_frames', 49), 97)

153 width = data.get('width', 704)

154 height = data.get('height', 480)

155

156 # Ensure divisibility: width/height by 32, frames by 8+1

157 width = (width // 32) * 32

158 height = (height // 32) * 32

159 num_frames = ((num_frames - 1) // 8) * 8 + 1

160

161 num_inference_steps = data.get('num_inference_steps', 30)

162 guidance_scale = data.get('guidance_scale', 3.0)

163 fps = data.get('fps', 24)

164 seed = data.get('seed', int(time.time()) % 2147483647)

165

166 logger.info(f"Generating video: {prompt[:50]}...")

167 logger.info(f"Parameters: {width}x{height}, {num_frames} frames, {num_inference_steps} steps, seed={seed}")

168

169 # Load pipeline if not already loaded

170 with model_lock:

171 if pipeline is None:

172 load_pipeline()

173

174 # Clear CUDA cache before generation

175 clear_cuda_cache()

176

177 # Generate video

178 start_time = time.time()

179 video_id = str(uuid.uuid4())[:8]

180 output_filename = f"ltx_{video_id}_{int(time.time())}.mp4"

181 output_path = os.path.join(OUTPUT_DIR, output_filename)

182

183 with model_lock:

184 logger.info("Using LTX-Video diffusers pipeline")

185 generator = torch.Generator(device="cpu").manual_seed(seed)

186

187 output = pipeline(

188 prompt=prompt,

189 width=width,

190 height=height,

191 num_frames=num_frames,

192 num_inference_steps=num_inference_steps,

193 guidance_scale=guidance_scale,

194 generator=generator,

195 )

196

197 # Save video frames

198 video_frames = output.frames[0]

199 try:

200 from diffusers.utils import export_to_video

201 export_to_video(video_frames, output_path, fps=fps)

202 except ImportError:

203 import imageio

204 imageio.mimwrite(output_path, video_frames, fps=fps)

205

206 generation_time = time.time() - start_time

207 logger.info(f"Video generated in {generation_time:.2f}s: {output_path}")

208

209 # Clear cache after generation

210 clear_cuda_cache()

211

212 return jsonify({

213 "status": "success",

214 "video_path": output_path,

215 "video_url": f"{_LTX_BASE_URL}/video/{output_filename}",

216 "output_url": f"{_LTX_BASE_URL}/video/{output_filename}",

217 "generation_time_seconds": round(generation_time, 2),

218 "parameters": {

219 "width": width,

220 "height": height,

221 "num_frames": num_frames,

222 "num_inference_steps": num_inference_steps,

223 "seed": seed

224 }

225 })

226

227 except torch.cuda.OutOfMemoryError:

228 clear_cuda_cache()

229 logger.error("CUDA out of memory! Try reducing resolution or num_frames")

230 return jsonify({

231 "error": "GPU out of memory. Try reducing width/height (e.g., 512x320) or num_frames (e.g., 33)"

232 }), 507

233

234 except Exception as e:

235 logger.error(f"Generation failed: {e}")

236 return jsonify({"error": str(e)}), 500

237

238

239@app.route('/video/<filename>', methods=['GET'])

240def serve_video(filename):

241 """Serve generated video files"""

242 video_path = os.path.join(OUTPUT_DIR, filename)

243 if os.path.exists(video_path):

244 return send_file(video_path, mimetype='video/mp4')

245 return jsonify({"error": "Video not found"}), 404

246

247

248@app.route('/list', methods=['GET'])

249def list_videos():

250 """List all generated videos"""

251 videos = []

252 for f in os.listdir(OUTPUT_DIR):

253 if f.endswith('.mp4'):

254 videos.append({

255 "filename": f,

256 "url": f"{_LTX_BASE_URL}/video/{f}",

257 "size_mb": round(os.path.getsize(os.path.join(OUTPUT_DIR, f)) / 1e6, 2)

258 })

259 return jsonify({"videos": videos})

260

261

262@app.route('/clear_cache', methods=['POST'])

263def clear_cache():

264 """Clear CUDA cache to free up VRAM"""

265 clear_cuda_cache()

266 return jsonify({

267 "status": "cache_cleared",

268 "vram_used_gb": round(torch.cuda.memory_allocated(0) / 1e9, 2) if torch.cuda.is_available() else 0

269 })

270

271

272@app.route('/generate_long', methods=['POST'])

273def generate_long_video():

274 """

275 Generate longer videos (10-30 seconds) by iteratively extending

276

277 For 20 second video at 25fps = 500 frames

278 Strategy: Generate in chunks, use last frames as conditioning for next chunk

279

280 Request JSON:

281 {

282 "prompt": "A serene landscape with mountains and flowing river",

283 "duration_seconds": 20,

284 "width": 512,

285 "height": 320,

286 "fps": 25

287 }

288 """

289 global pipeline

290

291 try:

292 data = request.get_json()

293

294 if not data or 'prompt' not in data:

295 return jsonify({"error": "Missing 'prompt' in request"}), 400

296

297 prompt = data.get('prompt')

298 duration_seconds = min(data.get('duration_seconds', 10), 30) # Cap at 30s

299 width = (data.get('width', 512) // 32) * 32

300 height = (data.get('height', 320) // 32) * 32

301 fps = data.get('fps', 25)

302 seed = data.get('seed', int(time.time()) % 2147483647)

303

304 # Calculate frames needed

305 total_frames_needed = int(duration_seconds * fps)

306

307 # Chunk settings: generate 49 frames per chunk with 8 frame overlap

308 frames_per_chunk = 49 # Must be (n*8)+1

309 overlap_frames = 8

310

311 logger.info(f"Generating {duration_seconds}s video ({total_frames_needed} frames)")

312 logger.info(f"Strategy: {frames_per_chunk} frames/chunk with {overlap_frames} overlap")

313

314 # Load pipeline

315 with model_lock:

316 if pipeline is None:

317 load_pipeline()

318

319 start_time = time.time()

320 all_frames = []

321 chunk_num = 0

322

323 while len(all_frames) < total_frames_needed:

324 chunk_num += 1

325 logger.info(f"Generating chunk {chunk_num} (frames {len(all_frames)}-{len(all_frames)+frames_per_chunk})")

326

327 clear_cuda_cache()

328

329 chunk_seed = seed + chunk_num

330 output_chunk = os.path.join(OUTPUT_DIR, f"chunk_{chunk_num}_{int(time.time())}.mp4")

331

332 with model_lock:

333 # Use diffusers LTX-Video pipeline

334 generator = torch.Generator(device="cpu").manual_seed(chunk_seed)

335 output = pipeline(

336 prompt=prompt,

337 width=width,

338 height=height,

339 num_frames=frames_per_chunk,

340 num_inference_steps=25, # Fewer steps for speed in long videos

341 guidance_scale=3.0,

342 generator=generator,

343 )

344 chunk_frames = list(output.frames[0]) # Convert to list of frames

345

346 # Add frames (skip overlap frames for subsequent chunks)

347 if len(all_frames) == 0:

348 all_frames.extend(chunk_frames)

349 else:

350 # Skip first overlap_frames to avoid duplicates

351 all_frames.extend(chunk_frames[overlap_frames:])

352

353 logger.info(f"Total frames so far: {len(all_frames)}")

354

355 # Clean up chunk file

356 if os.path.exists(output_chunk):

357 os.remove(output_chunk)

358

359 # Trim to exact length

360 all_frames = all_frames[:total_frames_needed]

361

362 # Save final video

363 import imageio

364 video_id = str(uuid.uuid4())[:8]

365 output_filename = f"ltx2_long_{video_id}_{int(time.time())}.mp4"

366 output_path = os.path.join(OUTPUT_DIR, output_filename)

367

368 imageio.mimwrite(output_path, all_frames, fps=fps, codec='libx264')

369

370 generation_time = time.time() - start_time

371 logger.info(f"Long video generated in {generation_time:.2f}s: {output_path}")

372

373 return jsonify({

374 "status": "success",

375 "video_path": output_path,

376 "video_url": f"{_LTX_BASE_URL}/video/{output_filename}",

377 "duration_seconds": duration_seconds,

378 "total_frames": len(all_frames),

379 "chunks_generated": chunk_num,

380 "generation_time_seconds": round(generation_time, 2)

381 })

382

383 except torch.cuda.OutOfMemoryError:

384 clear_cuda_cache()

385 logger.error("CUDA OOM during long video generation")

386 return jsonify({"error": "GPU out of memory. Try smaller resolution (384x256)"}), 507

387

388 except Exception as e:

389 logger.error(f"Long video generation failed: {e}")

390 return jsonify({"error": str(e)}), 500

391

392

393@app.route('/unload', methods=['POST'])

394def unload_model():

395 """Unload model to free VRAM"""

396 global pipeline

397 with model_lock:

398 if pipeline is not None:

399 del pipeline

400 pipeline = None

401 clear_cuda_cache()

402 return jsonify({"status": "model_unloaded"})

403

404

405if __name__ == '__main__':

406 print("""

407 ================================================================

408 | LTX-Video Generation Server |

409 | Optimized for RTX 3070 (8GB VRAM) |

410 | Using: diffusers + CPU Offloading |

411 ================================================================

412 | Model: Lightricks/LTX-Video (auto-downloaded from HF) |

413 ================================================================

414 | Endpoints: |

415 | POST /generate - Generate short video (2-4s) |

416 | POST /generate_long - Generate long video (10-30s scenes) |

417 | GET /health - Check server status |

418 | GET /video/<file> - Serve generated video |

419 | GET /list - List all generated videos |

420 | POST /clear_cache - Clear CUDA cache |

421 | POST /unload - Unload model from VRAM |

422 ================================================================

423 | RTX 3070 (8GB) Recommended Settings: |

424 | Safe: 512x320, 49 frames (~2s), 25 steps |

425 | Medium: 704x480, 49 frames (~2s), 30 steps |

426 | Max: 704x480, 97 frames (~4s), 30 steps |

427 ================================================================

428 | Memory Optimizations Enabled: |

429 | - CPU Offloading (model in CPU, inference on GPU) |

430 | - VAE Tiling & Slicing |

431 ================================================================

432 """)

433

434 # Check CUDA availability

435 if torch.cuda.is_available():

436 print(f"GPU: {torch.cuda.get_device_name(0)}")

437 print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

Coverage for integrations / vision / ltx2_server.py: 20.9%

163 statements