Coverage for integrations / vlm / mobile.py: 66.3%

95 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2integrations.vlm.mobile — Android + iOS surface for the VLM stack. 

3 

4Phases 8 + 9 of memory/vlm_best_of_all_worlds_plan.md §6 / §7. 

5 

6**Android (Phase 8)** — full participant. An on-device companion 

7service (Kotlin, in a sibling Nunba-HART-Companion sub-project) 

8exposes the Accessibility tree + MediaProjection capture over the 

9PeerLink ``compute`` channel. HARTOS Python here exposes the 

10client side: shape contracts, dispatch helpers, and per-platform 

11guards so callers don't need to ``sys.platform`` branch themselves. 

12 

13**iOS (Phase 9)** — sandbox forbids cross-app capture and dispatch. 

14Functions return ``{'status': 'platform_unsupported', 'platform': 

15'ios', 'reason': '...'}`` so callers can fall back to URL-scheme 

16launchers + Shortcuts (the only Apple-permitted dispatch). 

17 

18The Android companion app is out of scope for this module — it 

19ships separately in Nunba-HART-Companion/android/. This module 

20defines the wire protocol both sides agree on. 

21 

22Wire protocol (compute channel, JSON-encoded): 

23 REQUEST (HARTOS → companion): 

24 { 

25 'type': 'android_list_windows' | 'android_capture_window' 

26 | 'android_get_node_tree' | 'android_dispatch_action', 

27 'request_id': 'uuid-...', 

28 'window_id': '...' (optional, for capture/dispatch), 

29 'action': {...} (optional, for dispatch), 

30 } 

31 RESPONSE (companion → HARTOS): 

32 { 

33 'type': '<request_type>_result', 

34 'request_id': 'uuid-...', 

35 'status': 'ok' | 'error' | 'platform_unsupported', 

36 'error': '...' (when status=error), 

37 'data': {...} (shape per request type — see callers below), 

38 } 

39""" 

40 

41import logging 

42import os 

43import platform 

44import sys 

45import time 

46import uuid 

47from typing import List, Optional 

48 

49logger = logging.getLogger('hevolve.vlm.mobile') 

50 

51 

52# ─── Platform detection ────────────────────────────────────────────── 

53 

54def _detect_mobile_platform() -> str: 

55 """Return one of 'android', 'ios', or '' (desktop / unknown). 

56 

57 Android: ``ANDROID_ARGUMENT`` env var set by Termux / Pydroid; 

58 or ``sys.platform == 'android'`` on newer CPython builds. 

59 iOS: ``platform.machine()`` starts with 'iP' (iPhone/iPad/iPod); 

60 or ``HEVOLVE_FORCE_PLATFORM=ios`` for testing. 

61 """ 

62 forced = os.environ.get('HEVOLVE_FORCE_PLATFORM', '').lower() 

63 if forced in ('android', 'ios'): 

64 return forced 

65 if 'ANDROID_ARGUMENT' in os.environ or sys.platform == 'android': 

66 return 'android' 

67 if platform.system() == 'Darwin' and platform.machine().startswith('iP'): 

68 return 'ios' 

69 return '' 

70 

71 

72# ─── iOS stubs (Phase 9) ───────────────────────────────────────────── 

73 

74_IOS_UNSUPPORTED = { 

75 'status': 'platform_unsupported', 

76 'platform': 'ios', 

77 'reason': ( 

78 'iOS sandbox forbids cross-app screen capture and action ' 

79 'dispatch from third-party apps. Use URL schemes / ' 

80 'Shortcuts for Apple-permitted dispatch, or run Nunba ' 

81 'in-app for in-Nunba grounding only.' 

82 ), 

83} 

84 

85 

86def _ios_unsupported_response(extra: Optional[dict] = None) -> dict: 

87 """Standard iOS-unsupported envelope. Callers JSON-serialize.""" 

88 response = dict(_IOS_UNSUPPORTED) 

89 if extra: 

90 response.update(extra) 

91 return response 

92 

93 

94# ─── Android client (Phase 8) ──────────────────────────────────────── 

95 

96def list_android_windows(*, peer_dispatch=None, 

97 timeout: float = 5.0) -> List[dict]: 

98 """Enumerate Android app windows + activities visible to the 

99 companion app. 

100 

101 Args: 

102 peer_dispatch: optional callable 

103 ``peer_dispatch(channel, payload, timeout) -> response_dict`` 

104 for sending to the paired companion device. When None, 

105 this function falls back to the local companion (Termux 

106 UNIX socket at /data/data/com.termux/files/usr/var/run/ 

107 nunba-companion.sock) — only useful when HARTOS itself is 

108 running ON the Android device. 

109 timeout: max wait for companion response, in seconds. 

110 

111 Returns: 

112 Per the wire-protocol shape — list of window dicts: 

113 [{window_id, package, activity, title, rect, monitor_idx, 

114 is_foreground, is_accessible}] 

115 Empty list when no companion is reachable, or a list with 

116 a single ``{'platform_unsupported': True}`` marker on iOS. 

117 """ 

118 plat = _detect_mobile_platform() 

119 if plat == 'ios': 

120 return [_ios_unsupported_response({'request': 'list_windows'})] 

121 if plat != 'android': 

122 # Caller is running on a desktop and asking about Android — 

123 # only reachable via PeerLink. Without peer_dispatch we 

124 # can't talk to the companion, so return empty. 

125 if peer_dispatch is None: 

126 logger.debug( 

127 'list_android_windows: no peer_dispatch and not on Android') 

128 return [] 

129 payload = { 

130 'type': 'android_list_windows', 

131 'request_id': str(uuid.uuid4()), 

132 'ts': time.time(), 

133 } 

134 response = _send_to_companion(payload, peer_dispatch, timeout) 

135 if response is None: 

136 return [] 

137 if response.get('status') != 'ok': 

138 logger.debug(f'list_android_windows companion error: ' 

139 f'{response.get("error")}') 

140 return [] 

141 return list(response.get('data', {}).get('windows') or []) 

142 

143 

144def capture_android_window(window_id: str, *, peer_dispatch=None, 

145 timeout: float = 5.0) -> Optional[bytes]: 

146 """Capture an Android window's pixels via MediaProjection. 

147 

148 Returns JPEG bytes or None. Only works when: 

149 * HARTOS is on the device with companion installed + accessibility 

150 service enabled, OR 

151 * peer_dispatch routes to a paired Android via PeerLink. 

152 

153 iOS not supported (sandbox); returns None. 

154 """ 

155 plat = _detect_mobile_platform() 

156 if plat == 'ios': 

157 return None 

158 payload = { 

159 'type': 'android_capture_window', 

160 'request_id': str(uuid.uuid4()), 

161 'window_id': window_id, 

162 'ts': time.time(), 

163 } 

164 response = _send_to_companion(payload, peer_dispatch, timeout) 

165 if response is None or response.get('status') != 'ok': 

166 return None 

167 import base64 

168 b64 = response.get('data', {}).get('jpeg_base64') 

169 if not b64: 

170 return None 

171 try: 

172 return base64.b64decode(b64) 

173 except Exception: 

174 return None 

175 

176 

177def get_android_node_tree(*, window_id: Optional[str] = None, 

178 peer_dispatch=None, 

179 timeout: float = 5.0) -> Optional[dict]: 

180 """Fetch the AccessibilityNodeInfo tree of the foreground window 

181 (or *window_id* if specified). This is often a SUPERIOR signal 

182 to VLM grounding on Android — text/contentDescription/clickable 

183 flags are exposed directly without pixel reasoning. Most agents 

184 operate primarily by tree matching and only fall back to VLM 

185 when the UI is canvas-rendered (games, Compose without semantics). 

186 

187 Returns: 

188 Tree dict ``{root: {class, text, content_description, 

189 clickable, bounds, children: [...]}}`` or None on failure. 

190 """ 

191 plat = _detect_mobile_platform() 

192 if plat == 'ios': 

193 return _ios_unsupported_response({'request': 'get_node_tree'}) 

194 payload = { 

195 'type': 'android_get_node_tree', 

196 'request_id': str(uuid.uuid4()), 

197 'window_id': window_id, 

198 'ts': time.time(), 

199 } 

200 response = _send_to_companion(payload, peer_dispatch, timeout) 

201 if response is None or response.get('status') != 'ok': 

202 return None 

203 return response.get('data', {}).get('tree') 

204 

205 

206def dispatch_android_action(action: dict, *, 

207 peer_dispatch=None, 

208 timeout: float = 5.0) -> dict: 

209 """Send a VLM-emitted action to the Android companion for execution. 

210 

211 Action mapping (companion handles): 

212 ``left_click [x,y]`` → AccessibilityService.dispatchGesture 

213 OR node.performAction(ACTION_CLICK) 

214 ``type "text"`` → node.performAction(ACTION_SET_TEXT) 

215 ``key "BACK"|"HOME"`` → performGlobalAction(GLOBAL_ACTION_BACK) 

216 ``scroll_down`` → dispatchGesture swipe 

217 ``open_file_gui "X"`` → Intent.ACTION_VIEW launcher 

218 """ 

219 plat = _detect_mobile_platform() 

220 if plat == 'ios': 

221 return _ios_unsupported_response({'request': 'dispatch_action'}) 

222 payload = { 

223 'type': 'android_dispatch_action', 

224 'request_id': str(uuid.uuid4()), 

225 'action': action, 

226 'ts': time.time(), 

227 } 

228 response = _send_to_companion(payload, peer_dispatch, timeout) 

229 return response or {'status': 'no_response'} 

230 

231 

232# ─── Companion transport ───────────────────────────────────────────── 

233 

234def _send_to_companion(payload: dict, peer_dispatch, timeout: float 

235 ) -> Optional[dict]: 

236 """Best-effort send to the companion app. 

237 

238 Resolution order (mirrors plan §10's resolver): 

239 1. peer_dispatch callable (caller-supplied, typically wraps 

240 PeerLink compute channel) 

241 2. Local UNIX socket on Android (companion-on-same-device) 

242 3. None (no companion reachable) 

243 """ 

244 if peer_dispatch is not None: 

245 try: 

246 return peer_dispatch('compute', payload, timeout=timeout) 

247 except Exception as e: 

248 logger.debug(f'peer_dispatch failed: {e}') 

249 return None 

250 return _send_via_local_socket(payload, timeout) 

251 

252 

253def _send_via_local_socket(payload: dict, timeout: float 

254 ) -> Optional[dict]: 

255 """UNIX-socket transport for Termux / on-device deployment.""" 

256 if _detect_mobile_platform() != 'android': 

257 return None 

258 import json 

259 import socket as _sk 

260 sock_path = os.environ.get( 

261 'HEVOLVE_ANDROID_COMPANION_SOCK', 

262 '/data/data/com.termux/files/usr/var/run/nunba-companion.sock') 

263 if not os.path.exists(sock_path): 

264 logger.debug(f'companion socket missing at {sock_path}') 

265 return None 

266 try: 

267 with _sk.socket(_sk.AF_UNIX, _sk.SOCK_STREAM) as s: 

268 s.settimeout(timeout) 

269 s.connect(sock_path) 

270 s.sendall((json.dumps(payload) + '\n').encode('utf-8')) 

271 data = b'' 

272 while b'\n' not in data: 

273 chunk = s.recv(8192) 

274 if not chunk: 

275 break 

276 data += chunk 

277 return json.loads(data.decode('utf-8').strip()) 

278 except Exception as e: 

279 logger.debug(f'local socket transport failed: {e}') 

280 return None