Coverage for core / _transformers_lazy_guard.py: 91.2%

34 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1"""Install transformers `_LazyModule.__getattr__` recursion guards. 

2 

3This module exists solely for its **import side-effect** — it patches 

4`transformers` to prevent the ``_LazyModule.__getattr__`` re-entry 

5recursion that hangs splash / agent_daemon / Hypercorn workers. 

6 

7Background (carried forward verbatim from the original install site at 

8`hart_intelligence_entry.py:80-209`, commit a6d2dca): 

9 

10The guard was originally installed at the top of HIE because HIE was 

11the single canonical entry point into HARTOS — anyone touching the 

12package went through HIE first. That assumption broke when modules 

13under ``core/`` and ``integrations/`` started getting imported via 

14non-HIE paths (e.g. Nunba's bg_import → ``models.catalog`` → 

15``integrations.service_tools.model_catalog`` → ``registry`` → 

16``core.labeled_tool``). Those paths now reach into `langchain.agents` 

17→ `transformers` BEFORE HIE has loaded the guard. 

18 

19Fix: extract the guard to a tiny standalone module imported by both 

20``core/__init__.py`` and ``integrations/__init__.py``. Every HARTOS 

21entry path now triggers the guard before any transformers attribute 

22access. Patch is idempotent (``_hartos_reentry_guarded`` sentinel) so 

23re-import during a single interpreter session is a no-op; HIE's 

24original install block still runs unchanged as a third defense. 

25 

26Two patches: 

27 

28 1. GPT2TokenizerFast direct-bind. ``transformers.__dict__`` gets 

29 ``GPT2TokenizerFast`` resolved to its real class so the very first 

30 ``from transformers import GPT2TokenizerFast`` (langchain_core 

31 does this transitively) is a plain dict hit, not a lazy lookup. 

32 

33 2. ``_LazyModule.__getattr__`` re-entry guard. Wraps the method 

34 with ``threading.local()`` state. When the same ``(module, name)`` 

35 pair is requested again on the SAME thread while the original 

36 lookup is still in flight (which is what ``hasattr`` does inside 

37 ``__getattr__``), raise ``AttributeError`` immediately. ``hasattr`` 

38 swallows ``AttributeError`` → returns ``False`` → caller proceeds 

39 without recursion. Cross-name recursion is preserved (only the 

40 pathological same-name re-entry is short-circuited). 

41 

42See ``memory/feedback_transformers_lazy_module_patch.md`` for the 

43follow-up tracking the upstream HuggingFace fix. 

44""" 

45from __future__ import annotations 

46 

47 

48def _install_gpt2_direct_bind() -> None: 

49 """Resolve `transformers.GPT2TokenizerFast` eagerly so the very first 

50 consumer lookup hits a populated `__dict__` instead of the lazy graph.""" 

51 try: 

52 import transformers as _tf 

53 from transformers.models.gpt2.tokenization_gpt2_fast import ( 

54 GPT2TokenizerFast as _gpt2_fast, 

55 ) 

56 if 'GPT2TokenizerFast' not in _tf.__dict__: 

57 _tf.__dict__['GPT2TokenizerFast'] = _gpt2_fast 

58 except Exception: 

59 # transformers not installed, version moved the submodule, or the 

60 # bundled hevolvearmor strip removed it. Recursion guard below 

61 # still applies; this direct-bind is a one-symbol fast-path. 

62 pass 

63 

64 

65def _install_lazy_module_reentry_guard() -> None: 

66 """Wrap `_LazyModule.__getattr__` with a threading.local re-entry guard. 

67 

68 Idempotent — the sentinel `_hartos_reentry_guarded` ensures repeated 

69 calls in the same interpreter are no-ops. 

70 """ 

71 try: 

72 import threading 

73 from transformers.utils import import_utils as _tf_iu 

74 

75 _LazyModule = getattr(_tf_iu, '_LazyModule', None) 

76 if _LazyModule is None: 

77 return 

78 if getattr(_LazyModule, '_hartos_reentry_guarded', False): 

79 return # already wrapped — idempotent 

80 

81 _orig_getattr = _LazyModule.__getattr__ 

82 _resolving = threading.local() 

83 

84 # Bind original + local via default args so the closure carries its 

85 # own references (caller's module body can be GC'd safely). 

86 def _hartos_guarded_getattr( 

87 self, name, _orig=_orig_getattr, _local=_resolving): 

88 in_progress = getattr(_local, 'set', None) 

89 if in_progress is None: 

90 in_progress = set() 

91 _local.set = in_progress 

92 key = (id(self), name) 

93 if key in in_progress: 

94 # Same (module, name) re-entry on this thread — `hasattr` 

95 # probe asking "is this bound yet?"; we're mid-resolution 

96 # so the answer is "not yet". AttributeError lets 

97 # `hasattr` return False, which is the intended semantic. 

98 raise AttributeError( 

99 f"module {self.__name__!r} has no attribute {name!r} " 

100 f"(HARTOS re-entry guard: same-name __getattr__ " 

101 f"recursion broken)" 

102 ) 

103 in_progress.add(key) 

104 try: 

105 return _orig(self, name) 

106 finally: 

107 in_progress.discard(key) 

108 

109 _LazyModule.__getattr__ = _hartos_guarded_getattr 

110 _LazyModule._hartos_reentry_guarded = True 

111 except Exception: 

112 # transformers absent, version moved _LazyModule, or guard 

113 # already-installed elsewhere — fall through. Worker threads 

114 # will hit the lazy path and may pay the recursion once, but the 

115 # GPT2TokenizerFast direct-bind above still covers the common 

116 # entry symbol. 

117 pass 

118 

119 

120# Run on import — every entry path that triggers this module gets both 

121# guards before any transformers attribute access can fire. 

122_install_gpt2_direct_bind() 

123_install_lazy_module_reentry_guard()