Coverage for integrations / social / media_curator_service.py: 100.0%
85 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2HevolveSocial - Conversational social-media curator service (closes #401).
4Parses natural-language user feedback into structured curator intents
5so the seeded social_media_curator_agent can act on them deterministi-
6cally (or via richer LLM parsing when the model is available).
8Design contract (project_encounter_icebreaker.md §11 + the seeded
9goal at goal_seeding.SEED_BOOTSTRAP_GOALS slug='social_media_
10curator_agent'):
12 * INPUT: a user utterance (voice → STT'd to text, or typed).
13 * OUTPUT: a CuratorIntent — what the user is asking for, in
14 structured form, plus confidence + the raw text for audit.
15 * NEVER auto-publishes — caller stages the action behind a
16 user-approval tap, same flow as the icebreaker (no_autosend +
17 consent_required gates from the seeded goal).
18 * Topology gate: drafting / scheduling work that READS user memory
19 or runs an LLM is consent-gated on central topology, same rule
20 as icebreaker_service.draft_icebreaker. This module's parser
21 itself is pure (no memory access, no LLM by default), so it
22 runs unconditionally on any topology — the consent gate fires
23 one layer up when the parser's intent triggers a downstream
24 memory read or LLM call.
26The deterministic keyword classifier covers the bulk of the
27"this one's cool / skip / caption it / post Friday" vocabulary the
28seeded goal lists. Callers that have an LLM available can pass
29``llm_callback`` to upgrade the classifier — same fall-through
30contract as draft_icebreaker (LLM result wins; on raise/empty/
31non-string, falls back to the deterministic path).
32"""
33from __future__ import annotations
35import logging
36import re
37from dataclasses import dataclass, field
38from typing import Any, Callable, Optional
40logger = logging.getLogger('hevolve_social')
43# Recognised intent kinds. Stable strings — clients pattern-match.
44INTENT_APPROVE = 'approve' # "post this", "share it", "looks great"
45INTENT_REJECT = 'reject' # "skip", "not this", "pass"
46INTENT_CAPTION_STYLE = 'caption_style' # "caption with hiking vibe"
47INTENT_SCHEDULE = 'schedule' # "post Friday morning"
48INTENT_CHANNELS = 'channels' # "on twitter and linkedin"
49INTENT_UNKNOWN = 'unknown'
51VALID_INTENTS = frozenset({
52 INTENT_APPROVE, INTENT_REJECT, INTENT_CAPTION_STYLE,
53 INTENT_SCHEDULE, INTENT_CHANNELS, INTENT_UNKNOWN,
54})
57# Recognised platform names. Lowercased substring match against the
58# user utterance. Add aliases here, single source — never inline a
59# platform list elsewhere in the curator pipeline.
60_PLATFORM_TOKENS: tuple[str, ...] = (
61 'twitter', 'x.com', 'linkedin', 'mastodon', 'bluesky',
62 'instagram', 'facebook', 'reddit', 'threads',
63)
65_PLATFORM_ALIASES: dict[str, str] = {
66 'x.com': 'twitter',
67 'x ': 'twitter',
68}
71@dataclass(frozen=True)
72class CuratorIntent:
73 """One parsed user utterance."""
74 kind: str # one of VALID_INTENTS
75 payload: dict[str, Any] = field(default_factory=dict)
76 confidence: float = 0.0
77 raw_text: str = ''
80# ──────────────────────────────────────────────────────────────────────
81# Deterministic keyword/regex classifiers — one per intent. Each
82# returns (payload_dict_or_None, confidence) where None means "no match".
83# Pure functions; tested in isolation.
84# ──────────────────────────────────────────────────────────────────────
87_APPROVE_RX = re.compile(
88 r'\b(post(?:\s+(?:this|that|it))?|share(?:\s+(?:this|that|it))?|'
89 r'looks\s+(?:great|cool|good|nice)|approve|keep(?:\s+(?:this|that|it))?'
90 r')\b',
91 re.IGNORECASE,
92)
94_REJECT_RX = re.compile(
95 r'\b(skip(?:\s+(?:this|that|it))?|not\s+(?:this|that|it)|pass'
96 r'|reject|drop(?:\s+(?:this|that|it))?|nope?'
97 r')\b',
98 re.IGNORECASE,
99)
101_CAPTION_RX = re.compile(
102 r'caption(?:\s+(?:it|this|that))?\s+'
103 r'(?:with|in|using)?\s*(?:a\s+)?'
104 r'([a-z][\w\-]{1,30})\s*(?:vibe|tone|style|mood)?',
105 re.IGNORECASE,
106)
108# Time hints — naive but covers the seeded vocabulary.
109_DAY_TOKENS = (
110 'monday', 'tuesday', 'wednesday', 'thursday',
111 'friday', 'saturday', 'sunday',
112)
113_TIME_OF_DAY = ('morning', 'noon', 'afternoon', 'evening', 'night')
116def _classify_approve(text: str) -> Optional[tuple[dict, float]]:
117 if _APPROVE_RX.search(text):
118 return {}, 0.85
119 return None
122def _classify_reject(text: str) -> Optional[tuple[dict, float]]:
123 if _REJECT_RX.search(text):
124 return {}, 0.85
125 return None
128def _classify_caption_style(text: str) -> Optional[tuple[dict, float]]:
129 m = _CAPTION_RX.search(text)
130 if not m:
131 return None
132 style = m.group(1).lower().strip()
133 # Reject caption-style hits where the captured token is itself a
134 # filler word — guards against "caption it nicely" matching the
135 # adverb instead of a real style.
136 if style in {'it', 'this', 'that', 'a', 'with', 'in'}:
137 return None
138 return {'style': style}, 0.75
141def _classify_schedule(text: str) -> Optional[tuple[dict, float]]:
142 lc = text.lower()
143 day = next((d for d in _DAY_TOKENS if d in lc), None)
144 tod = next((t for t in _TIME_OF_DAY if t in lc), None)
145 # "tomorrow" / "today" hints also count as a schedule signal.
146 relative = next(
147 (r for r in ('tomorrow', 'today', 'tonight') if r in lc),
148 None,
149 )
150 if not (day or tod or relative):
151 return None
152 payload: dict[str, Any] = {}
153 if day:
154 payload['day'] = day
155 if tod:
156 payload['time_of_day'] = tod
157 if relative:
158 payload['relative'] = relative
159 # Confidence scales with how specific the schedule hint is.
160 confidence = 0.55 + 0.15 * len(payload)
161 return payload, min(confidence, 0.95)
164def _classify_channels(text: str) -> Optional[tuple[dict, float]]:
165 lc = text.lower()
166 found: list[str] = []
167 for token in _PLATFORM_TOKENS:
168 if token in lc:
169 canonical = _PLATFORM_ALIASES.get(token, token)
170 if canonical not in found:
171 found.append(canonical)
172 if not found:
173 return None
174 return {'channels': found}, 0.7
177# Order matters: more specific classifiers first. approve / reject
178# beat schedule / channels because "post Friday morning" should resolve
179# as schedule, not approve, even though "post" is in the approve regex.
180_CLASSIFIERS: tuple[tuple[str, Callable[[str], Optional[tuple[dict, float]]]], ...] = (
181 (INTENT_CAPTION_STYLE, _classify_caption_style),
182 (INTENT_SCHEDULE, _classify_schedule),
183 (INTENT_CHANNELS, _classify_channels),
184 (INTENT_APPROVE, _classify_approve),
185 (INTENT_REJECT, _classify_reject),
186)
189def parse_curator_command(
190 text: str,
191 *,
192 llm_callback: Optional[Callable[[str], dict]] = None,
193) -> CuratorIntent:
194 """Parse a user utterance into a structured CuratorIntent.
196 Args:
197 text: user utterance (voice → STT'd, or typed).
198 llm_callback: optional richer parser; takes the raw text and
199 returns a dict {'kind', 'payload', 'confidence'}.
200 When supplied AND non-raising AND returns a kind
201 in VALID_INTENTS, its result is used. Otherwise
202 falls back to the deterministic classifier.
204 Returns:
205 CuratorIntent. Always returns SOME intent — `kind=INTENT_UNKNOWN`
206 when nothing matches (so callers can branch on .kind without
207 None-checking the return).
208 """
209 raw = (text or '').strip()
210 if not raw:
211 return CuratorIntent(kind=INTENT_UNKNOWN, raw_text='', confidence=0.0)
213 if llm_callback is not None:
214 try:
215 cand = llm_callback(raw)
216 if isinstance(cand, dict):
217 k = str(cand.get('kind', '')).lower()
218 if k in VALID_INTENTS:
219 return CuratorIntent(
220 kind=k,
221 payload=dict(cand.get('payload') or {}),
222 confidence=float(cand.get('confidence', 0.5)),
223 raw_text=raw,
224 )
225 except Exception as exc: # noqa: BLE001
226 logger.debug(
227 'media_curator: llm_callback raised, falling back: %s',
228 exc,
229 )
231 for kind, classify in _CLASSIFIERS:
232 result = classify(raw)
233 if result is not None:
234 payload, confidence = result
235 return CuratorIntent(
236 kind=kind,
237 payload=payload,
238 confidence=confidence,
239 raw_text=raw,
240 )
241 return CuratorIntent(kind=INTENT_UNKNOWN, raw_text=raw, confidence=0.0)