Coverage for core / user_lang.py: 22.9%

105 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1"""core.user_lang — canonical read/write for the user's preferred language. 

2 

3Before this module existed, the preferred-language signal leaked across 

4five unsynchronised readers: 

5 1. llama_config._read_preferred_lang() (boot: reads JSON file) 

6 2. /chat request body data.get('preferred_lang') (per-turn override) 

7 3. HART_USER_LANGUAGE env var (headless/CI override) 

8 4. hart_onboarding.get_node_identity() (first-run onboarding) 

9 5. user_context.py cloud profile (cross-device sync) 

10 

11And a single writer (hart_intelligence_entry._persist_language) with 

12three buggy guards that caused "hart_language.json stuck at first value 

13ever written" (see commit ef674b7). 

14 

15This module owns BOTH sides: 

16 

17 get_preferred_lang(request_override=None) -> str 

18 Precedence: request > hart_language.json > env > node_identity > 'en' 

19 Cached by mtime — ~1µs per call when file unchanged. 

20 

21 set_preferred_lang(lang) -> bool 

22 Atomic tmp+os.replace+fsync. Idempotent (skips write if current 

23 value matches). No `!= 'en'` guard, no `not_exists` guard — just 

24 "if different, write it, fire listeners". 

25 

26 on_lang_change(callback) -> None 

27 Subscribe to transition events. Callback receives (old, new). 

28 Fires in a daemon thread so /chat hot path isn't stalled. 

29 

30The intent each of the 5 sources served is preserved: 

31 - Request override → honored by `request_override` param. 

32 - File (boot) → is the persisted value; read by this module. 

33 - Env override → headless deployments; still respected. 

34 - node_identity → read once at onboarding; written into the file. 

35 - Cloud profile → still read by user_context.py for LLM prompt 

36 string (cross-device), unchanged. 

37""" 

38from __future__ import annotations 

39 

40import json 

41import logging 

42import os 

43import threading 

44from pathlib import Path 

45from typing import Callable, List, Optional, Tuple 

46 

47from core.constants import SUPPORTED_LANG_DICT 

48 

49logger = logging.getLogger(__name__) 

50 

51 

52_HART_LANG_PATH = os.path.join( 

53 os.path.expanduser('~'), 'Documents', 'Nunba', 'data', 

54 'hart_language.json', 

55) 

56 

57 

58# ── Read-side cache (mtime-invalidated) ───────────────────────────── 

59 

60_cache: dict = {'value': None, 'mtime': 0} 

61_cache_lock = threading.Lock() 

62 

63 

64def _load_from_file() -> Optional[str]: 

65 """Read `hart_language.json` with mtime caching. Returns None if 

66 file missing / unreadable / invalid — callers fall back to env or 

67 default.""" 

68 try: 

69 st = os.stat(_HART_LANG_PATH) 

70 except OSError: 

71 return None 

72 with _cache_lock: 

73 if _cache['value'] is not None and _cache['mtime'] == st.st_mtime_ns: 

74 return _cache['value'] 

75 try: 

76 with open(_HART_LANG_PATH, encoding='utf-8') as f: 

77 data = json.load(f) or {} 

78 lang = data.get('language') 

79 if not lang or lang[:2] not in SUPPORTED_LANG_DICT: 

80 return None 

81 with _cache_lock: 

82 _cache['value'] = lang 

83 _cache['mtime'] = st.st_mtime_ns 

84 return lang 

85 except Exception: 

86 return None 

87 

88 

89def _load_from_env() -> Optional[str]: 

90 v = os.environ.get('HART_USER_LANGUAGE', '').strip() 

91 if v and v[:2] in SUPPORTED_LANG_DICT: 

92 return v 

93 return None 

94 

95 

96def _load_from_node_identity() -> Optional[str]: 

97 """Last-resort read of the onboarding-time language choice. 

98 Best-effort — returns None on any import/file failure rather than 

99 exploding a chat request.""" 

100 try: 

101 from hart_onboarding import get_node_identity 

102 v = (get_node_identity() or {}).get('language', '') 

103 if v and v[:2] in SUPPORTED_LANG_DICT: 

104 return v 

105 except Exception: 

106 pass 

107 return None 

108 

109 

110def get_preferred_lang(request_override: Optional[str] = None) -> str: 

111 """Resolve the user's preferred language. 

112 

113 Precedence (first match wins): 

114 1. `request_override` — the /chat handler passes `data.get('preferred_lang')` 

115 here so per-turn UI selections always win. 

116 2. `hart_language.json` on disk — persisted across boots. 

117 3. `HART_USER_LANGUAGE` env var — headless / CI override. 

118 4. hart_onboarding node identity — first-run onboarding answer. 

119 5. Hard default `'en'`. 

120 

121 Never raises — always returns a valid ISO 639-1 from 

122 SUPPORTED_LANG_DICT. 

123 """ 

124 if request_override: 

125 code = request_override[:2] if len(request_override) >= 2 else request_override 

126 if code in SUPPORTED_LANG_DICT: 

127 return request_override 

128 v = _load_from_file() 

129 if v: 

130 return v 

131 v = _load_from_env() 

132 if v: 

133 return v 

134 v = _load_from_node_identity() 

135 if v: 

136 return v 

137 return 'en' 

138 

139 

140# ── Write-side + on-change subscriber bus ─────────────────────────── 

141 

142_listeners: List[Callable[[Optional[str], str], None]] = [] 

143_listeners_lock = threading.Lock() 

144 

145 

146def on_lang_change(callback: Callable[[Optional[str], str], None]) -> None: 

147 """Register a callback for (old_lang, new_lang) transitions. 

148 Callback fires in a daemon thread; exceptions are swallowed. 

149 No-ops on `set_preferred_lang(x)` when x is already current.""" 

150 with _listeners_lock: 

151 _listeners.append(callback) 

152 

153 

154def _fire_listeners(old: Optional[str], new: str) -> None: 

155 with _listeners_lock: 

156 snapshot = list(_listeners) 

157 

158 def _run(): 

159 for cb in snapshot: 

160 try: 

161 cb(old, new) 

162 except Exception as e: 

163 logger.warning(f"on_lang_change listener {cb!r} failed: {e}") 

164 

165 threading.Thread( 

166 target=_run, daemon=True, name='user-lang-change', 

167 ).start() 

168 

169 

170def set_preferred_lang(lang: Optional[str]) -> bool: 

171 """Persist the user's language choice to `hart_language.json` 

172 atomically AND fire `on_lang_change` listeners on transition. 

173 

174 Idempotent — if the current on-disk value equals `lang`, no write 

175 occurs, no listeners fire, returns True. 

176 

177 Returns False on: 

178 * invalid `lang` (not in SUPPORTED_LANG_DICT) 

179 * write failure (disk full, permission denied) — original file 

180 stays intact because we write to .tmp then atomically replace. 

181 """ 

182 if not lang: 

183 return False 

184 code = lang[:2] if len(lang) >= 2 else lang 

185 if code not in SUPPORTED_LANG_DICT: 

186 return False 

187 

188 # Read current (skip if unchanged) 

189 current = _load_from_file() 

190 if current == lang: 

191 return True # idempotent — no write, no listener fire 

192 

193 tmp = _HART_LANG_PATH + '.tmp' 

194 try: 

195 os.makedirs(os.path.dirname(_HART_LANG_PATH), exist_ok=True) 

196 with open(tmp, 'w', encoding='utf-8') as f: 

197 json.dump({'language': lang}, f) 

198 f.flush() 

199 try: 

200 os.fsync(f.fileno()) 

201 except OSError: 

202 pass 

203 os.replace(tmp, _HART_LANG_PATH) 

204 except OSError as e: 

205 # Clean up tmp and leave original file intact 

206 try: 

207 if os.path.isfile(tmp): 

208 os.remove(tmp) 

209 except OSError: 

210 pass 

211 logger.warning(f"set_preferred_lang({lang!r}) failed: {e}") 

212 return False 

213 

214 # Invalidate cache so next get_preferred_lang sees the new value 

215 with _cache_lock: 

216 _cache['value'] = lang 

217 try: 

218 _cache['mtime'] = os.stat(_HART_LANG_PATH).st_mtime_ns 

219 except OSError: 

220 _cache['mtime'] = 0 

221 

222 _fire_listeners(current, lang) 

223 return True 

224 

225 

226__all__ = [ 

227 'get_preferred_lang', 

228 'set_preferred_lang', 

229 'on_lang_change', 

230]