Coverage for integrations / vlm / mobile.py: 66.3%
95 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2integrations.vlm.mobile — Android + iOS surface for the VLM stack.
4Phases 8 + 9 of memory/vlm_best_of_all_worlds_plan.md §6 / §7.
6**Android (Phase 8)** — full participant. An on-device companion
7service (Kotlin, in a sibling Nunba-HART-Companion sub-project)
8exposes the Accessibility tree + MediaProjection capture over the
9PeerLink ``compute`` channel. HARTOS Python here exposes the
10client side: shape contracts, dispatch helpers, and per-platform
11guards so callers don't need to ``sys.platform`` branch themselves.
13**iOS (Phase 9)** — sandbox forbids cross-app capture and dispatch.
14Functions return ``{'status': 'platform_unsupported', 'platform':
15'ios', 'reason': '...'}`` so callers can fall back to URL-scheme
16launchers + Shortcuts (the only Apple-permitted dispatch).
18The Android companion app is out of scope for this module — it
19ships separately in Nunba-HART-Companion/android/. This module
20defines the wire protocol both sides agree on.
22Wire protocol (compute channel, JSON-encoded):
23 REQUEST (HARTOS → companion):
24 {
25 'type': 'android_list_windows' | 'android_capture_window'
26 | 'android_get_node_tree' | 'android_dispatch_action',
27 'request_id': 'uuid-...',
28 'window_id': '...' (optional, for capture/dispatch),
29 'action': {...} (optional, for dispatch),
30 }
31 RESPONSE (companion → HARTOS):
32 {
33 'type': '<request_type>_result',
34 'request_id': 'uuid-...',
35 'status': 'ok' | 'error' | 'platform_unsupported',
36 'error': '...' (when status=error),
37 'data': {...} (shape per request type — see callers below),
38 }
39"""
41import logging
42import os
43import platform
44import sys
45import time
46import uuid
47from typing import List, Optional
49logger = logging.getLogger('hevolve.vlm.mobile')
52# ─── Platform detection ──────────────────────────────────────────────
54def _detect_mobile_platform() -> str:
55 """Return one of 'android', 'ios', or '' (desktop / unknown).
57 Android: ``ANDROID_ARGUMENT`` env var set by Termux / Pydroid;
58 or ``sys.platform == 'android'`` on newer CPython builds.
59 iOS: ``platform.machine()`` starts with 'iP' (iPhone/iPad/iPod);
60 or ``HEVOLVE_FORCE_PLATFORM=ios`` for testing.
61 """
62 forced = os.environ.get('HEVOLVE_FORCE_PLATFORM', '').lower()
63 if forced in ('android', 'ios'):
64 return forced
65 if 'ANDROID_ARGUMENT' in os.environ or sys.platform == 'android':
66 return 'android'
67 if platform.system() == 'Darwin' and platform.machine().startswith('iP'):
68 return 'ios'
69 return ''
72# ─── iOS stubs (Phase 9) ─────────────────────────────────────────────
74_IOS_UNSUPPORTED = {
75 'status': 'platform_unsupported',
76 'platform': 'ios',
77 'reason': (
78 'iOS sandbox forbids cross-app screen capture and action '
79 'dispatch from third-party apps. Use URL schemes / '
80 'Shortcuts for Apple-permitted dispatch, or run Nunba '
81 'in-app for in-Nunba grounding only.'
82 ),
83}
86def _ios_unsupported_response(extra: Optional[dict] = None) -> dict:
87 """Standard iOS-unsupported envelope. Callers JSON-serialize."""
88 response = dict(_IOS_UNSUPPORTED)
89 if extra:
90 response.update(extra)
91 return response
94# ─── Android client (Phase 8) ────────────────────────────────────────
96def list_android_windows(*, peer_dispatch=None,
97 timeout: float = 5.0) -> List[dict]:
98 """Enumerate Android app windows + activities visible to the
99 companion app.
101 Args:
102 peer_dispatch: optional callable
103 ``peer_dispatch(channel, payload, timeout) -> response_dict``
104 for sending to the paired companion device. When None,
105 this function falls back to the local companion (Termux
106 UNIX socket at /data/data/com.termux/files/usr/var/run/
107 nunba-companion.sock) — only useful when HARTOS itself is
108 running ON the Android device.
109 timeout: max wait for companion response, in seconds.
111 Returns:
112 Per the wire-protocol shape — list of window dicts:
113 [{window_id, package, activity, title, rect, monitor_idx,
114 is_foreground, is_accessible}]
115 Empty list when no companion is reachable, or a list with
116 a single ``{'platform_unsupported': True}`` marker on iOS.
117 """
118 plat = _detect_mobile_platform()
119 if plat == 'ios':
120 return [_ios_unsupported_response({'request': 'list_windows'})]
121 if plat != 'android':
122 # Caller is running on a desktop and asking about Android —
123 # only reachable via PeerLink. Without peer_dispatch we
124 # can't talk to the companion, so return empty.
125 if peer_dispatch is None:
126 logger.debug(
127 'list_android_windows: no peer_dispatch and not on Android')
128 return []
129 payload = {
130 'type': 'android_list_windows',
131 'request_id': str(uuid.uuid4()),
132 'ts': time.time(),
133 }
134 response = _send_to_companion(payload, peer_dispatch, timeout)
135 if response is None:
136 return []
137 if response.get('status') != 'ok':
138 logger.debug(f'list_android_windows companion error: '
139 f'{response.get("error")}')
140 return []
141 return list(response.get('data', {}).get('windows') or [])
144def capture_android_window(window_id: str, *, peer_dispatch=None,
145 timeout: float = 5.0) -> Optional[bytes]:
146 """Capture an Android window's pixels via MediaProjection.
148 Returns JPEG bytes or None. Only works when:
149 * HARTOS is on the device with companion installed + accessibility
150 service enabled, OR
151 * peer_dispatch routes to a paired Android via PeerLink.
153 iOS not supported (sandbox); returns None.
154 """
155 plat = _detect_mobile_platform()
156 if plat == 'ios':
157 return None
158 payload = {
159 'type': 'android_capture_window',
160 'request_id': str(uuid.uuid4()),
161 'window_id': window_id,
162 'ts': time.time(),
163 }
164 response = _send_to_companion(payload, peer_dispatch, timeout)
165 if response is None or response.get('status') != 'ok':
166 return None
167 import base64
168 b64 = response.get('data', {}).get('jpeg_base64')
169 if not b64:
170 return None
171 try:
172 return base64.b64decode(b64)
173 except Exception:
174 return None
177def get_android_node_tree(*, window_id: Optional[str] = None,
178 peer_dispatch=None,
179 timeout: float = 5.0) -> Optional[dict]:
180 """Fetch the AccessibilityNodeInfo tree of the foreground window
181 (or *window_id* if specified). This is often a SUPERIOR signal
182 to VLM grounding on Android — text/contentDescription/clickable
183 flags are exposed directly without pixel reasoning. Most agents
184 operate primarily by tree matching and only fall back to VLM
185 when the UI is canvas-rendered (games, Compose without semantics).
187 Returns:
188 Tree dict ``{root: {class, text, content_description,
189 clickable, bounds, children: [...]}}`` or None on failure.
190 """
191 plat = _detect_mobile_platform()
192 if plat == 'ios':
193 return _ios_unsupported_response({'request': 'get_node_tree'})
194 payload = {
195 'type': 'android_get_node_tree',
196 'request_id': str(uuid.uuid4()),
197 'window_id': window_id,
198 'ts': time.time(),
199 }
200 response = _send_to_companion(payload, peer_dispatch, timeout)
201 if response is None or response.get('status') != 'ok':
202 return None
203 return response.get('data', {}).get('tree')
206def dispatch_android_action(action: dict, *,
207 peer_dispatch=None,
208 timeout: float = 5.0) -> dict:
209 """Send a VLM-emitted action to the Android companion for execution.
211 Action mapping (companion handles):
212 ``left_click [x,y]`` → AccessibilityService.dispatchGesture
213 OR node.performAction(ACTION_CLICK)
214 ``type "text"`` → node.performAction(ACTION_SET_TEXT)
215 ``key "BACK"|"HOME"`` → performGlobalAction(GLOBAL_ACTION_BACK)
216 ``scroll_down`` → dispatchGesture swipe
217 ``open_file_gui "X"`` → Intent.ACTION_VIEW launcher
218 """
219 plat = _detect_mobile_platform()
220 if plat == 'ios':
221 return _ios_unsupported_response({'request': 'dispatch_action'})
222 payload = {
223 'type': 'android_dispatch_action',
224 'request_id': str(uuid.uuid4()),
225 'action': action,
226 'ts': time.time(),
227 }
228 response = _send_to_companion(payload, peer_dispatch, timeout)
229 return response or {'status': 'no_response'}
232# ─── Companion transport ─────────────────────────────────────────────
234def _send_to_companion(payload: dict, peer_dispatch, timeout: float
235 ) -> Optional[dict]:
236 """Best-effort send to the companion app.
238 Resolution order (mirrors plan §10's resolver):
239 1. peer_dispatch callable (caller-supplied, typically wraps
240 PeerLink compute channel)
241 2. Local UNIX socket on Android (companion-on-same-device)
242 3. None (no companion reachable)
243 """
244 if peer_dispatch is not None:
245 try:
246 return peer_dispatch('compute', payload, timeout=timeout)
247 except Exception as e:
248 logger.debug(f'peer_dispatch failed: {e}')
249 return None
250 return _send_via_local_socket(payload, timeout)
253def _send_via_local_socket(payload: dict, timeout: float
254 ) -> Optional[dict]:
255 """UNIX-socket transport for Termux / on-device deployment."""
256 if _detect_mobile_platform() != 'android':
257 return None
258 import json
259 import socket as _sk
260 sock_path = os.environ.get(
261 'HEVOLVE_ANDROID_COMPANION_SOCK',
262 '/data/data/com.termux/files/usr/var/run/nunba-companion.sock')
263 if not os.path.exists(sock_path):
264 logger.debug(f'companion socket missing at {sock_path}')
265 return None
266 try:
267 with _sk.socket(_sk.AF_UNIX, _sk.SOCK_STREAM) as s:
268 s.settimeout(timeout)
269 s.connect(sock_path)
270 s.sendall((json.dumps(payload) + '\n').encode('utf-8'))
271 data = b''
272 while b'\n' not in data:
273 chunk = s.recv(8192)
274 if not chunk:
275 break
276 data += chunk
277 return json.loads(data.decode('utf-8').strip())
278 except Exception as e:
279 logger.debug(f'local socket transport failed: {e}')
280 return None