Coverage for integrations / social / media_curator_service.py: 100.0%

85 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2HevolveSocial - Conversational social-media curator service (closes #401). 

3 

4Parses natural-language user feedback into structured curator intents 

5so the seeded social_media_curator_agent can act on them deterministi- 

6cally (or via richer LLM parsing when the model is available). 

7 

8Design contract (project_encounter_icebreaker.md §11 + the seeded 

9goal at goal_seeding.SEED_BOOTSTRAP_GOALS slug='social_media_ 

10curator_agent'): 

11 

12 * INPUT: a user utterance (voice → STT'd to text, or typed). 

13 * OUTPUT: a CuratorIntent — what the user is asking for, in 

14 structured form, plus confidence + the raw text for audit. 

15 * NEVER auto-publishes — caller stages the action behind a 

16 user-approval tap, same flow as the icebreaker (no_autosend + 

17 consent_required gates from the seeded goal). 

18 * Topology gate: drafting / scheduling work that READS user memory 

19 or runs an LLM is consent-gated on central topology, same rule 

20 as icebreaker_service.draft_icebreaker. This module's parser 

21 itself is pure (no memory access, no LLM by default), so it 

22 runs unconditionally on any topology — the consent gate fires 

23 one layer up when the parser's intent triggers a downstream 

24 memory read or LLM call. 

25 

26The deterministic keyword classifier covers the bulk of the 

27"this one's cool / skip / caption it / post Friday" vocabulary the 

28seeded goal lists. Callers that have an LLM available can pass 

29``llm_callback`` to upgrade the classifier — same fall-through 

30contract as draft_icebreaker (LLM result wins; on raise/empty/ 

31non-string, falls back to the deterministic path). 

32""" 

33from __future__ import annotations 

34 

35import logging 

36import re 

37from dataclasses import dataclass, field 

38from typing import Any, Callable, Optional 

39 

40logger = logging.getLogger('hevolve_social') 

41 

42 

43# Recognised intent kinds. Stable strings — clients pattern-match. 

44INTENT_APPROVE = 'approve' # "post this", "share it", "looks great" 

45INTENT_REJECT = 'reject' # "skip", "not this", "pass" 

46INTENT_CAPTION_STYLE = 'caption_style' # "caption with hiking vibe" 

47INTENT_SCHEDULE = 'schedule' # "post Friday morning" 

48INTENT_CHANNELS = 'channels' # "on twitter and linkedin" 

49INTENT_UNKNOWN = 'unknown' 

50 

51VALID_INTENTS = frozenset({ 

52 INTENT_APPROVE, INTENT_REJECT, INTENT_CAPTION_STYLE, 

53 INTENT_SCHEDULE, INTENT_CHANNELS, INTENT_UNKNOWN, 

54}) 

55 

56 

57# Recognised platform names. Lowercased substring match against the 

58# user utterance. Add aliases here, single source — never inline a 

59# platform list elsewhere in the curator pipeline. 

60_PLATFORM_TOKENS: tuple[str, ...] = ( 

61 'twitter', 'x.com', 'linkedin', 'mastodon', 'bluesky', 

62 'instagram', 'facebook', 'reddit', 'threads', 

63) 

64 

65_PLATFORM_ALIASES: dict[str, str] = { 

66 'x.com': 'twitter', 

67 'x ': 'twitter', 

68} 

69 

70 

71@dataclass(frozen=True) 

72class CuratorIntent: 

73 """One parsed user utterance.""" 

74 kind: str # one of VALID_INTENTS 

75 payload: dict[str, Any] = field(default_factory=dict) 

76 confidence: float = 0.0 

77 raw_text: str = '' 

78 

79 

80# ────────────────────────────────────────────────────────────────────── 

81# Deterministic keyword/regex classifiers — one per intent. Each 

82# returns (payload_dict_or_None, confidence) where None means "no match". 

83# Pure functions; tested in isolation. 

84# ────────────────────────────────────────────────────────────────────── 

85 

86 

87_APPROVE_RX = re.compile( 

88 r'\b(post(?:\s+(?:this|that|it))?|share(?:\s+(?:this|that|it))?|' 

89 r'looks\s+(?:great|cool|good|nice)|approve|keep(?:\s+(?:this|that|it))?' 

90 r')\b', 

91 re.IGNORECASE, 

92) 

93 

94_REJECT_RX = re.compile( 

95 r'\b(skip(?:\s+(?:this|that|it))?|not\s+(?:this|that|it)|pass' 

96 r'|reject|drop(?:\s+(?:this|that|it))?|nope?' 

97 r')\b', 

98 re.IGNORECASE, 

99) 

100 

101_CAPTION_RX = re.compile( 

102 r'caption(?:\s+(?:it|this|that))?\s+' 

103 r'(?:with|in|using)?\s*(?:a\s+)?' 

104 r'([a-z][\w\-]{1,30})\s*(?:vibe|tone|style|mood)?', 

105 re.IGNORECASE, 

106) 

107 

108# Time hints — naive but covers the seeded vocabulary. 

109_DAY_TOKENS = ( 

110 'monday', 'tuesday', 'wednesday', 'thursday', 

111 'friday', 'saturday', 'sunday', 

112) 

113_TIME_OF_DAY = ('morning', 'noon', 'afternoon', 'evening', 'night') 

114 

115 

116def _classify_approve(text: str) -> Optional[tuple[dict, float]]: 

117 if _APPROVE_RX.search(text): 

118 return {}, 0.85 

119 return None 

120 

121 

122def _classify_reject(text: str) -> Optional[tuple[dict, float]]: 

123 if _REJECT_RX.search(text): 

124 return {}, 0.85 

125 return None 

126 

127 

128def _classify_caption_style(text: str) -> Optional[tuple[dict, float]]: 

129 m = _CAPTION_RX.search(text) 

130 if not m: 

131 return None 

132 style = m.group(1).lower().strip() 

133 # Reject caption-style hits where the captured token is itself a 

134 # filler word — guards against "caption it nicely" matching the 

135 # adverb instead of a real style. 

136 if style in {'it', 'this', 'that', 'a', 'with', 'in'}: 

137 return None 

138 return {'style': style}, 0.75 

139 

140 

141def _classify_schedule(text: str) -> Optional[tuple[dict, float]]: 

142 lc = text.lower() 

143 day = next((d for d in _DAY_TOKENS if d in lc), None) 

144 tod = next((t for t in _TIME_OF_DAY if t in lc), None) 

145 # "tomorrow" / "today" hints also count as a schedule signal. 

146 relative = next( 

147 (r for r in ('tomorrow', 'today', 'tonight') if r in lc), 

148 None, 

149 ) 

150 if not (day or tod or relative): 

151 return None 

152 payload: dict[str, Any] = {} 

153 if day: 

154 payload['day'] = day 

155 if tod: 

156 payload['time_of_day'] = tod 

157 if relative: 

158 payload['relative'] = relative 

159 # Confidence scales with how specific the schedule hint is. 

160 confidence = 0.55 + 0.15 * len(payload) 

161 return payload, min(confidence, 0.95) 

162 

163 

164def _classify_channels(text: str) -> Optional[tuple[dict, float]]: 

165 lc = text.lower() 

166 found: list[str] = [] 

167 for token in _PLATFORM_TOKENS: 

168 if token in lc: 

169 canonical = _PLATFORM_ALIASES.get(token, token) 

170 if canonical not in found: 

171 found.append(canonical) 

172 if not found: 

173 return None 

174 return {'channels': found}, 0.7 

175 

176 

177# Order matters: more specific classifiers first. approve / reject 

178# beat schedule / channels because "post Friday morning" should resolve 

179# as schedule, not approve, even though "post" is in the approve regex. 

180_CLASSIFIERS: tuple[tuple[str, Callable[[str], Optional[tuple[dict, float]]]], ...] = ( 

181 (INTENT_CAPTION_STYLE, _classify_caption_style), 

182 (INTENT_SCHEDULE, _classify_schedule), 

183 (INTENT_CHANNELS, _classify_channels), 

184 (INTENT_APPROVE, _classify_approve), 

185 (INTENT_REJECT, _classify_reject), 

186) 

187 

188 

189def parse_curator_command( 

190 text: str, 

191 *, 

192 llm_callback: Optional[Callable[[str], dict]] = None, 

193) -> CuratorIntent: 

194 """Parse a user utterance into a structured CuratorIntent. 

195 

196 Args: 

197 text: user utterance (voice → STT'd, or typed). 

198 llm_callback: optional richer parser; takes the raw text and 

199 returns a dict {'kind', 'payload', 'confidence'}. 

200 When supplied AND non-raising AND returns a kind 

201 in VALID_INTENTS, its result is used. Otherwise 

202 falls back to the deterministic classifier. 

203 

204 Returns: 

205 CuratorIntent. Always returns SOME intent — `kind=INTENT_UNKNOWN` 

206 when nothing matches (so callers can branch on .kind without 

207 None-checking the return). 

208 """ 

209 raw = (text or '').strip() 

210 if not raw: 

211 return CuratorIntent(kind=INTENT_UNKNOWN, raw_text='', confidence=0.0) 

212 

213 if llm_callback is not None: 

214 try: 

215 cand = llm_callback(raw) 

216 if isinstance(cand, dict): 

217 k = str(cand.get('kind', '')).lower() 

218 if k in VALID_INTENTS: 

219 return CuratorIntent( 

220 kind=k, 

221 payload=dict(cand.get('payload') or {}), 

222 confidence=float(cand.get('confidence', 0.5)), 

223 raw_text=raw, 

224 ) 

225 except Exception as exc: # noqa: BLE001 

226 logger.debug( 

227 'media_curator: llm_callback raised, falling back: %s', 

228 exc, 

229 ) 

230 

231 for kind, classify in _CLASSIFIERS: 

232 result = classify(raw) 

233 if result is not None: 

234 payload, confidence = result 

235 return CuratorIntent( 

236 kind=kind, 

237 payload=payload, 

238 confidence=confidence, 

239 raw_text=raw, 

240 ) 

241 return CuratorIntent(kind=INTENT_UNKNOWN, raw_text=raw, confidence=0.0)