Coverage for integrations / vision / minicpm_server.py: 0.0%
112 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2MiniCPM Vision Server — stripped standalone Flask server for the sidecar.
4Derived from the root minicpm.py but with configurable model directory,
5no external config.json dependency, and proper CLI args.
7Usage:
8 python -m integrations.vision.minicpm_server --model_dir ~/.hevolve/models/minicpm --port 9891
9"""
10import argparse
11import asyncio
12import logging
13import os
14import sys
15import time
16import uuid
17from concurrent.futures import ThreadPoolExecutor
18from logging.handlers import RotatingFileHandler
19from threading import Lock
21from flask import Flask, request, jsonify
22from werkzeug.utils import secure_filename
24logger = logging.getLogger('minicpm_server')
26app = Flask(__name__)
27_model = None
28_tokenizer = None
29_device = None
30_executor = ThreadPoolExecutor(max_workers=2)
31_last_processing_time = {}
32_last_processing_lock = Lock()
34UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '_uploads')
35ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'}
38def _init_model(model_dir: str, device: str = 'cuda:0'):
39 """Load MiniCPM-V-2 onto the specified device (CUDA, MPS, or CPU)."""
40 global _model, _tokenizer, _device
41 import torch
42 from transformers import AutoModel, AutoTokenizer
44 logger.info(f"Loading MiniCPM from {model_dir} on {device}")
45 _device = device
46 dtype = torch.float16 if device != 'cpu' else torch.float32
47 _model = AutoModel.from_pretrained(
48 model_dir,
49 trust_remote_code=True,
50 torch_dtype=dtype,
51 ).to(device=device, dtype=dtype)
52 _model.eval()
53 _tokenizer = AutoTokenizer.from_pretrained(
54 model_dir,
55 trust_remote_code=True,
56 )
57 logger.info(f"MiniCPM model loaded on {device} ({dtype})")
60def _process_image_sync(image, prompt: str) -> str:
61 """Run MiniCPM inference synchronously. Returns description string."""
62 import torch
63 msgs = [{'role': 'user', 'content': prompt}]
64 with torch.inference_mode():
65 res, _, _ = _model.chat(
66 image=image,
67 msgs=msgs,
68 context=None,
69 tokenizer=_tokenizer,
70 sampling=True,
71 temperature=0.7,
72 )
73 return res
76def _allowed_file(filename):
77 return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
80@app.route('/')
81def index():
82 return jsonify({'status': 'working', 'model': 'MiniCPM-V-2'})
85@app.route('/status', methods=['GET'])
86def status():
87 return jsonify({
88 'status': 'running',
89 'model_loaded': _model is not None,
90 'device': str(_device),
91 })
94@app.route('/upload', methods=['POST'])
95def upload():
96 """Process an image with an optional prompt. Returns {"result": "description"}."""
97 from PIL import Image
99 if 'file' not in request.files:
100 return jsonify({'error': 'No file part'}), 400
102 file = request.files['file']
103 if file.filename == '':
104 return jsonify({'error': 'No selected file'}), 400
105 if not _allowed_file(file.filename):
106 return jsonify({'error': 'File type not allowed'}), 400
108 prompt = request.form.get(
109 'prompt',
110 'you are looking at user\'s camera feed, describe this image in 20 words',
111 )
112 user_id = request.form.get('user_id', '0')
114 # Rate limit: 4 second throttle per user
115 with _last_processing_lock:
116 last_time = _last_processing_time.get(user_id, 0)
117 if time.time() - last_time < 4 and last_time > 0:
118 return jsonify({'result': '', 'throttled': True}), 429
119 _last_processing_time[user_id] = time.time()
121 try:
122 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
123 filename = f"{uuid.uuid4().hex}_{secure_filename(file.filename)}"
124 filepath = os.path.join(UPLOAD_FOLDER, filename)
125 file.save(filepath)
127 image = Image.open(filepath).convert('RGB').resize((255, 255))
128 result = _process_image_sync(image, prompt)
130 # Clean up saved file
131 try:
132 os.remove(filepath)
133 except OSError:
134 pass
136 return jsonify({'result': result})
137 except Exception as e:
138 logger.error(f"Error processing image: {e}")
139 return jsonify({'error': 'Processing failed'}), 500
142@app.route('/describe', methods=['POST'])
143def describe_raw():
144 """Accept raw image bytes (no multipart) + query param prompt."""
145 from PIL import Image
146 import io
148 prompt = request.args.get(
149 'prompt',
150 'you are looking at user\'s camera feed, describe this image in 20 words',
151 )
153 try:
154 image = Image.open(io.BytesIO(request.data)).convert('RGB').resize((255, 255))
155 result = _process_image_sync(image, prompt)
156 return jsonify({'result': result})
157 except Exception as e:
158 logger.error(f"Error in describe_raw: {e}")
159 return jsonify({'error': 'Processing failed'}), 500
162def main():
163 parser = argparse.ArgumentParser(description='MiniCPM Vision Sidecar')
164 parser.add_argument('--model_dir', default=os.path.join(
165 os.path.expanduser('~'), '.hevolve', 'models', 'minicpm',
166 ))
167 from core.port_registry import get_port
168 parser.add_argument('--port', type=int, default=get_port('vision'))
169 parser.add_argument('--host', default='0.0.0.0')
170 parser.add_argument('--device', default=None)
171 parser.add_argument('--log_file', default='minicpm_sidecar.log')
172 args = parser.parse_args()
174 # Logging setup
175 handler = RotatingFileHandler(args.log_file, maxBytes=500_000, backupCount=2)
176 handler.setLevel(logging.INFO)
177 fmt = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
178 handler.setFormatter(fmt)
179 logger.addHandler(handler)
180 logger.addHandler(logging.StreamHandler(sys.stdout))
181 logger.setLevel(logging.INFO)
183 # Auto-detect device if not specified
184 device = args.device
185 if device is None:
186 import torch
187 if torch.cuda.is_available():
188 device = 'cuda:0'
189 elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
190 device = 'mps'
191 else:
192 device = 'cpu'
193 logger.info(f"Auto-detected device: {device}")
195 # Load model
196 _init_model(args.model_dir, device)
198 # Serve
199 from waitress import serve
200 logger.info(f"MiniCPM sidecar starting on {args.host}:{args.port}")
201 serve(app, host=args.host, port=args.port, threads=4)
204if __name__ == '__main__':
205 main()