Coverage for integrations / vision / ltx2_server.py: 20.9%
163 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2LTX-Video Generation Server
3Optimized for NVIDIA RTX 3070 (8GB VRAM)
5Uses: diffusers LTXPipeline with memory optimizations
7Runs on localhost:5002
8Endpoint: POST /generate, POST /generate_long
10Usage:
11 python ltx2_server.py
12"""
14import os
15import time
16import uuid
17import torch
18import logging
19from flask import Flask, request, jsonify, send_file
20from threading import Lock
22# G10: Resolve LTX server port from port_registry / env var instead of hardcoded 5002
23_LTX_PORT = int(os.environ.get('HART_LTX_PORT', '5002'))
24_LTX_BASE_URL = os.environ.get('HART_LTX_URL', f'http://localhost:{_LTX_PORT}')
26try:
27 from integrations.service_tools.vram_manager import clear_cuda_cache
28except ImportError:
29 def clear_cuda_cache():
30 try:
31 if torch.cuda.is_available():
32 torch.cuda.empty_cache()
33 if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
34 torch.mps.empty_cache()
35 except Exception:
36 pass
38# Configure logging
39logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
40logger = logging.getLogger(__name__)
42app = Flask(__name__)
44# Global pipeline and lock for thread safety
45pipeline = None
46model_lock = Lock()
48# Paths
49BASE_DIR = os.path.dirname(__file__)
50OUTPUT_DIR = os.path.join(BASE_DIR, "coding", "ltx_outputs")
51os.makedirs(OUTPUT_DIR, exist_ok=True)
54def load_pipeline():
55 """Load LTX-Video pipeline optimized for 8GB VRAM"""
56 global pipeline
58 if pipeline is not None:
59 return pipeline
61 logger.info("Loading LTX-Video model (optimized for 8GB VRAM)...")
63 try:
64 from diffusers import LTXPipeline
66 # LTX-Video models that work on 8GB VRAM
67 model_options = [
68 "Lightricks/LTX-Video-0.9.1", # Stable release
69 "Lightricks/LTX-Video", # Latest
70 ]
72 for model_id in model_options:
73 try:
74 logger.info(f"Trying model: {model_id}")
75 pipeline = LTXPipeline.from_pretrained(
76 model_id,
77 torch_dtype=torch.bfloat16,
78 )
79 logger.info(f"Loaded: {model_id}")
80 break
81 except Exception as e:
82 logger.warning(f"Model {model_id} failed: {e}")
83 continue
85 if pipeline is None:
86 raise RuntimeError("Could not load any LTX-Video model")
88 # Memory optimizations for 8GB VRAM
89 logger.info("Applying memory optimizations...")
91 # CPU offloading - keeps model in CPU, moves to GPU only during inference
92 pipeline.enable_model_cpu_offload()
94 # VAE optimizations
95 pipeline.vae.enable_tiling()
96 pipeline.vae.enable_slicing()
98 logger.info("LTX-Video ready with CPU offload + VAE tiling/slicing")
99 return pipeline
101 except Exception as e:
102 logger.error(f"Failed to load model: {e}")
103 raise
106@app.route('/health', methods=['GET'])
107def health():
108 """Health check endpoint"""
109 return jsonify({
110 "status": "ok",
111 "model_loaded": pipeline is not None,
112 "model": "LTX-Video (diffusers)",
113 "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A",
114 "cuda_available": torch.cuda.is_available(),
115 "vram_total_gb": round(torch.cuda.get_device_properties(0).total_memory / 1e9, 2) if torch.cuda.is_available() else 0,
116 "vram_used_gb": round(torch.cuda.memory_allocated(0) / 1e9, 2) if torch.cuda.is_available() else 0
117 })
120@app.route('/generate', methods=['POST'])
121def generate_video():
122 """
123 Generate video from text prompt using LTX-Video
125 Request JSON:
126 {
127 "prompt": "A cartoon cat walking in a magical garden",
128 "num_frames": 49,
129 "width": 704,
130 "height": 480,
131 "num_inference_steps": 30,
132 "guidance_scale": 3.0,
133 "fps": 24,
134 "seed": 12345 # optional
135 }
136 """
137 global pipeline
139 try:
140 data = request.get_json()
142 if not data or 'prompt' not in data:
143 return jsonify({"error": "Missing 'prompt' in request"}), 400
145 # Extract parameters with RTX 3070 (8GB) optimized defaults
146 prompt = data.get('prompt')
148 # LTX-Video on 8GB VRAM settings:
149 # - 512x320 = safe
150 # - 704x480 = medium (with CPU offload)
151 # - 49-97 frames = 2-4 seconds
152 num_frames = min(data.get('num_frames', 49), 97)
153 width = data.get('width', 704)
154 height = data.get('height', 480)
156 # Ensure divisibility: width/height by 32, frames by 8+1
157 width = (width // 32) * 32
158 height = (height // 32) * 32
159 num_frames = ((num_frames - 1) // 8) * 8 + 1
161 num_inference_steps = data.get('num_inference_steps', 30)
162 guidance_scale = data.get('guidance_scale', 3.0)
163 fps = data.get('fps', 24)
164 seed = data.get('seed', int(time.time()) % 2147483647)
166 logger.info(f"Generating video: {prompt[:50]}...")
167 logger.info(f"Parameters: {width}x{height}, {num_frames} frames, {num_inference_steps} steps, seed={seed}")
169 # Load pipeline if not already loaded
170 with model_lock:
171 if pipeline is None:
172 load_pipeline()
174 # Clear CUDA cache before generation
175 clear_cuda_cache()
177 # Generate video
178 start_time = time.time()
179 video_id = str(uuid.uuid4())[:8]
180 output_filename = f"ltx_{video_id}_{int(time.time())}.mp4"
181 output_path = os.path.join(OUTPUT_DIR, output_filename)
183 with model_lock:
184 logger.info("Using LTX-Video diffusers pipeline")
185 generator = torch.Generator(device="cpu").manual_seed(seed)
187 output = pipeline(
188 prompt=prompt,
189 width=width,
190 height=height,
191 num_frames=num_frames,
192 num_inference_steps=num_inference_steps,
193 guidance_scale=guidance_scale,
194 generator=generator,
195 )
197 # Save video frames
198 video_frames = output.frames[0]
199 try:
200 from diffusers.utils import export_to_video
201 export_to_video(video_frames, output_path, fps=fps)
202 except ImportError:
203 import imageio
204 imageio.mimwrite(output_path, video_frames, fps=fps)
206 generation_time = time.time() - start_time
207 logger.info(f"Video generated in {generation_time:.2f}s: {output_path}")
209 # Clear cache after generation
210 clear_cuda_cache()
212 return jsonify({
213 "status": "success",
214 "video_path": output_path,
215 "video_url": f"{_LTX_BASE_URL}/video/{output_filename}",
216 "output_url": f"{_LTX_BASE_URL}/video/{output_filename}",
217 "generation_time_seconds": round(generation_time, 2),
218 "parameters": {
219 "width": width,
220 "height": height,
221 "num_frames": num_frames,
222 "num_inference_steps": num_inference_steps,
223 "seed": seed
224 }
225 })
227 except torch.cuda.OutOfMemoryError:
228 clear_cuda_cache()
229 logger.error("CUDA out of memory! Try reducing resolution or num_frames")
230 return jsonify({
231 "error": "GPU out of memory. Try reducing width/height (e.g., 512x320) or num_frames (e.g., 33)"
232 }), 507
234 except Exception as e:
235 logger.error(f"Generation failed: {e}")
236 return jsonify({"error": str(e)}), 500
239@app.route('/video/<filename>', methods=['GET'])
240def serve_video(filename):
241 """Serve generated video files"""
242 video_path = os.path.join(OUTPUT_DIR, filename)
243 if os.path.exists(video_path):
244 return send_file(video_path, mimetype='video/mp4')
245 return jsonify({"error": "Video not found"}), 404
248@app.route('/list', methods=['GET'])
249def list_videos():
250 """List all generated videos"""
251 videos = []
252 for f in os.listdir(OUTPUT_DIR):
253 if f.endswith('.mp4'):
254 videos.append({
255 "filename": f,
256 "url": f"{_LTX_BASE_URL}/video/{f}",
257 "size_mb": round(os.path.getsize(os.path.join(OUTPUT_DIR, f)) / 1e6, 2)
258 })
259 return jsonify({"videos": videos})
262@app.route('/clear_cache', methods=['POST'])
263def clear_cache():
264 """Clear CUDA cache to free up VRAM"""
265 clear_cuda_cache()
266 return jsonify({
267 "status": "cache_cleared",
268 "vram_used_gb": round(torch.cuda.memory_allocated(0) / 1e9, 2) if torch.cuda.is_available() else 0
269 })
272@app.route('/generate_long', methods=['POST'])
273def generate_long_video():
274 """
275 Generate longer videos (10-30 seconds) by iteratively extending
277 For 20 second video at 25fps = 500 frames
278 Strategy: Generate in chunks, use last frames as conditioning for next chunk
280 Request JSON:
281 {
282 "prompt": "A serene landscape with mountains and flowing river",
283 "duration_seconds": 20,
284 "width": 512,
285 "height": 320,
286 "fps": 25
287 }
288 """
289 global pipeline
291 try:
292 data = request.get_json()
294 if not data or 'prompt' not in data:
295 return jsonify({"error": "Missing 'prompt' in request"}), 400
297 prompt = data.get('prompt')
298 duration_seconds = min(data.get('duration_seconds', 10), 30) # Cap at 30s
299 width = (data.get('width', 512) // 32) * 32
300 height = (data.get('height', 320) // 32) * 32
301 fps = data.get('fps', 25)
302 seed = data.get('seed', int(time.time()) % 2147483647)
304 # Calculate frames needed
305 total_frames_needed = int(duration_seconds * fps)
307 # Chunk settings: generate 49 frames per chunk with 8 frame overlap
308 frames_per_chunk = 49 # Must be (n*8)+1
309 overlap_frames = 8
311 logger.info(f"Generating {duration_seconds}s video ({total_frames_needed} frames)")
312 logger.info(f"Strategy: {frames_per_chunk} frames/chunk with {overlap_frames} overlap")
314 # Load pipeline
315 with model_lock:
316 if pipeline is None:
317 load_pipeline()
319 start_time = time.time()
320 all_frames = []
321 chunk_num = 0
323 while len(all_frames) < total_frames_needed:
324 chunk_num += 1
325 logger.info(f"Generating chunk {chunk_num} (frames {len(all_frames)}-{len(all_frames)+frames_per_chunk})")
327 clear_cuda_cache()
329 chunk_seed = seed + chunk_num
330 output_chunk = os.path.join(OUTPUT_DIR, f"chunk_{chunk_num}_{int(time.time())}.mp4")
332 with model_lock:
333 # Use diffusers LTX-Video pipeline
334 generator = torch.Generator(device="cpu").manual_seed(chunk_seed)
335 output = pipeline(
336 prompt=prompt,
337 width=width,
338 height=height,
339 num_frames=frames_per_chunk,
340 num_inference_steps=25, # Fewer steps for speed in long videos
341 guidance_scale=3.0,
342 generator=generator,
343 )
344 chunk_frames = list(output.frames[0]) # Convert to list of frames
346 # Add frames (skip overlap frames for subsequent chunks)
347 if len(all_frames) == 0:
348 all_frames.extend(chunk_frames)
349 else:
350 # Skip first overlap_frames to avoid duplicates
351 all_frames.extend(chunk_frames[overlap_frames:])
353 logger.info(f"Total frames so far: {len(all_frames)}")
355 # Clean up chunk file
356 if os.path.exists(output_chunk):
357 os.remove(output_chunk)
359 # Trim to exact length
360 all_frames = all_frames[:total_frames_needed]
362 # Save final video
363 import imageio
364 video_id = str(uuid.uuid4())[:8]
365 output_filename = f"ltx2_long_{video_id}_{int(time.time())}.mp4"
366 output_path = os.path.join(OUTPUT_DIR, output_filename)
368 imageio.mimwrite(output_path, all_frames, fps=fps, codec='libx264')
370 generation_time = time.time() - start_time
371 logger.info(f"Long video generated in {generation_time:.2f}s: {output_path}")
373 return jsonify({
374 "status": "success",
375 "video_path": output_path,
376 "video_url": f"{_LTX_BASE_URL}/video/{output_filename}",
377 "duration_seconds": duration_seconds,
378 "total_frames": len(all_frames),
379 "chunks_generated": chunk_num,
380 "generation_time_seconds": round(generation_time, 2)
381 })
383 except torch.cuda.OutOfMemoryError:
384 clear_cuda_cache()
385 logger.error("CUDA OOM during long video generation")
386 return jsonify({"error": "GPU out of memory. Try smaller resolution (384x256)"}), 507
388 except Exception as e:
389 logger.error(f"Long video generation failed: {e}")
390 return jsonify({"error": str(e)}), 500
393@app.route('/unload', methods=['POST'])
394def unload_model():
395 """Unload model to free VRAM"""
396 global pipeline
397 with model_lock:
398 if pipeline is not None:
399 del pipeline
400 pipeline = None
401 clear_cuda_cache()
402 return jsonify({"status": "model_unloaded"})
405if __name__ == '__main__':
406 print("""
407 ================================================================
408 | LTX-Video Generation Server |
409 | Optimized for RTX 3070 (8GB VRAM) |
410 | Using: diffusers + CPU Offloading |
411 ================================================================
412 | Model: Lightricks/LTX-Video (auto-downloaded from HF) |
413 ================================================================
414 | Endpoints: |
415 | POST /generate - Generate short video (2-4s) |
416 | POST /generate_long - Generate long video (10-30s scenes) |
417 | GET /health - Check server status |
418 | GET /video/<file> - Serve generated video |
419 | GET /list - List all generated videos |
420 | POST /clear_cache - Clear CUDA cache |
421 | POST /unload - Unload model from VRAM |
422 ================================================================
423 | RTX 3070 (8GB) Recommended Settings: |
424 | Safe: 512x320, 49 frames (~2s), 25 steps |
425 | Medium: 704x480, 49 frames (~2s), 30 steps |
426 | Max: 704x480, 97 frames (~4s), 30 steps |
427 ================================================================
428 | Memory Optimizations Enabled: |
429 | - CPU Offloading (model in CPU, inference on GPU) |
430 | - VAE Tiling & Slicing |
431 ================================================================
432 """)
434 # Check CUDA availability
435 if torch.cuda.is_available():
436 print(f"GPU: {torch.cuda.get_device_name(0)}")
437 print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
438 else:
439 print("WARNING: CUDA not available! GPU generation requires CUDA.")
441 print(f"\nStarting server on {_LTX_BASE_URL}")
442 print("Model will be downloaded from HuggingFace on first request...")
443 print("First request may take a few minutes to download the model.\n")
445 # Run Flask server
446 app.run(host='0.0.0.0', port=_LTX_PORT, threaded=True)