Coverage for integrations / vision / minicpm_server.py: 0.0%

112 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2MiniCPM Vision Server — stripped standalone Flask server for the sidecar. 

3 

4Derived from the root minicpm.py but with configurable model directory, 

5no external config.json dependency, and proper CLI args. 

6 

7Usage: 

8 python -m integrations.vision.minicpm_server --model_dir ~/.hevolve/models/minicpm --port 9891 

9""" 

10import argparse 

11import asyncio 

12import logging 

13import os 

14import sys 

15import time 

16import uuid 

17from concurrent.futures import ThreadPoolExecutor 

18from logging.handlers import RotatingFileHandler 

19from threading import Lock 

20 

21from flask import Flask, request, jsonify 

22from werkzeug.utils import secure_filename 

23 

24logger = logging.getLogger('minicpm_server') 

25 

26app = Flask(__name__) 

27_model = None 

28_tokenizer = None 

29_device = None 

30_executor = ThreadPoolExecutor(max_workers=2) 

31_last_processing_time = {} 

32_last_processing_lock = Lock() 

33 

34UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '_uploads') 

35ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'} 

36 

37 

38def _init_model(model_dir: str, device: str = 'cuda:0'): 

39 """Load MiniCPM-V-2 onto the specified device (CUDA, MPS, or CPU).""" 

40 global _model, _tokenizer, _device 

41 import torch 

42 from transformers import AutoModel, AutoTokenizer 

43 

44 logger.info(f"Loading MiniCPM from {model_dir} on {device}") 

45 _device = device 

46 dtype = torch.float16 if device != 'cpu' else torch.float32 

47 _model = AutoModel.from_pretrained( 

48 model_dir, 

49 trust_remote_code=True, 

50 torch_dtype=dtype, 

51 ).to(device=device, dtype=dtype) 

52 _model.eval() 

53 _tokenizer = AutoTokenizer.from_pretrained( 

54 model_dir, 

55 trust_remote_code=True, 

56 ) 

57 logger.info(f"MiniCPM model loaded on {device} ({dtype})") 

58 

59 

60def _process_image_sync(image, prompt: str) -> str: 

61 """Run MiniCPM inference synchronously. Returns description string.""" 

62 import torch 

63 msgs = [{'role': 'user', 'content': prompt}] 

64 with torch.inference_mode(): 

65 res, _, _ = _model.chat( 

66 image=image, 

67 msgs=msgs, 

68 context=None, 

69 tokenizer=_tokenizer, 

70 sampling=True, 

71 temperature=0.7, 

72 ) 

73 return res 

74 

75 

76def _allowed_file(filename): 

77 return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS 

78 

79 

80@app.route('/') 

81def index(): 

82 return jsonify({'status': 'working', 'model': 'MiniCPM-V-2'}) 

83 

84 

85@app.route('/status', methods=['GET']) 

86def status(): 

87 return jsonify({ 

88 'status': 'running', 

89 'model_loaded': _model is not None, 

90 'device': str(_device), 

91 }) 

92 

93 

94@app.route('/upload', methods=['POST']) 

95def upload(): 

96 """Process an image with an optional prompt. Returns {"result": "description"}.""" 

97 from PIL import Image 

98 

99 if 'file' not in request.files: 

100 return jsonify({'error': 'No file part'}), 400 

101 

102 file = request.files['file'] 

103 if file.filename == '': 

104 return jsonify({'error': 'No selected file'}), 400 

105 if not _allowed_file(file.filename): 

106 return jsonify({'error': 'File type not allowed'}), 400 

107 

108 prompt = request.form.get( 

109 'prompt', 

110 'you are looking at user\'s camera feed, describe this image in 20 words', 

111 ) 

112 user_id = request.form.get('user_id', '0') 

113 

114 # Rate limit: 4 second throttle per user 

115 with _last_processing_lock: 

116 last_time = _last_processing_time.get(user_id, 0) 

117 if time.time() - last_time < 4 and last_time > 0: 

118 return jsonify({'result': '', 'throttled': True}), 429 

119 _last_processing_time[user_id] = time.time() 

120 

121 try: 

122 os.makedirs(UPLOAD_FOLDER, exist_ok=True) 

123 filename = f"{uuid.uuid4().hex}_{secure_filename(file.filename)}" 

124 filepath = os.path.join(UPLOAD_FOLDER, filename) 

125 file.save(filepath) 

126 

127 image = Image.open(filepath).convert('RGB').resize((255, 255)) 

128 result = _process_image_sync(image, prompt) 

129 

130 # Clean up saved file 

131 try: 

132 os.remove(filepath) 

133 except OSError: 

134 pass 

135 

136 return jsonify({'result': result}) 

137 except Exception as e: 

138 logger.error(f"Error processing image: {e}") 

139 return jsonify({'error': 'Processing failed'}), 500 

140 

141 

142@app.route('/describe', methods=['POST']) 

143def describe_raw(): 

144 """Accept raw image bytes (no multipart) + query param prompt.""" 

145 from PIL import Image 

146 import io 

147 

148 prompt = request.args.get( 

149 'prompt', 

150 'you are looking at user\'s camera feed, describe this image in 20 words', 

151 ) 

152 

153 try: 

154 image = Image.open(io.BytesIO(request.data)).convert('RGB').resize((255, 255)) 

155 result = _process_image_sync(image, prompt) 

156 return jsonify({'result': result}) 

157 except Exception as e: 

158 logger.error(f"Error in describe_raw: {e}") 

159 return jsonify({'error': 'Processing failed'}), 500 

160 

161 

162def main(): 

163 parser = argparse.ArgumentParser(description='MiniCPM Vision Sidecar') 

164 parser.add_argument('--model_dir', default=os.path.join( 

165 os.path.expanduser('~'), '.hevolve', 'models', 'minicpm', 

166 )) 

167 from core.port_registry import get_port 

168 parser.add_argument('--port', type=int, default=get_port('vision')) 

169 parser.add_argument('--host', default='0.0.0.0') 

170 parser.add_argument('--device', default=None) 

171 parser.add_argument('--log_file', default='minicpm_sidecar.log') 

172 args = parser.parse_args() 

173 

174 # Logging setup 

175 handler = RotatingFileHandler(args.log_file, maxBytes=500_000, backupCount=2) 

176 handler.setLevel(logging.INFO) 

177 fmt = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 

178 handler.setFormatter(fmt) 

179 logger.addHandler(handler) 

180 logger.addHandler(logging.StreamHandler(sys.stdout)) 

181 logger.setLevel(logging.INFO) 

182 

183 # Auto-detect device if not specified 

184 device = args.device 

185 if device is None: 

186 import torch 

187 if torch.cuda.is_available(): 

188 device = 'cuda:0' 

189 elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): 

190 device = 'mps' 

191 else: 

192 device = 'cpu' 

193 logger.info(f"Auto-detected device: {device}") 

194 

195 # Load model 

196 _init_model(args.model_dir, device) 

197 

198 # Serve 

199 from waitress import serve 

200 logger.info(f"MiniCPM sidecar starting on {args.host}:{args.port}") 

201 serve(app, host=args.host, port=args.port, threads=4) 

202 

203 

204if __name__ == '__main__': 

205 main()