Coverage for integrations/social/media_curator

1"""

2HevolveSocial - Conversational social-media curator service (closes #401).

4Parses natural-language user feedback into structured curator intents

5so the seeded social_media_curator_agent can act on them deterministi-

6cally (or via richer LLM parsing when the model is available).

8Design contract (project_encounter_icebreaker.md §11 + the seeded

9goal at goal_seeding.SEED_BOOTSTRAP_GOALS slug='social_media_

10curator_agent'):

12 * INPUT: a user utterance (voice → STT'd to text, or typed).

13 * OUTPUT: a CuratorIntent — what the user is asking for, in

14 structured form, plus confidence + the raw text for audit.

15 * NEVER auto-publishes — caller stages the action behind a

16 user-approval tap, same flow as the icebreaker (no_autosend +

17 consent_required gates from the seeded goal).

18 * Topology gate: drafting / scheduling work that READS user memory

19 or runs an LLM is consent-gated on central topology, same rule

20 as icebreaker_service.draft_icebreaker. This module's parser

21 itself is pure (no memory access, no LLM by default), so it

22 runs unconditionally on any topology — the consent gate fires

23 one layer up when the parser's intent triggers a downstream

24 memory read or LLM call.

26The deterministic keyword classifier covers the bulk of the

27"this one's cool / skip / caption it / post Friday" vocabulary the

28seeded goal lists. Callers that have an LLM available can pass

29``llm_callback`` to upgrade the classifier — same fall-through

30contract as draft_icebreaker (LLM result wins; on raise/empty/

31non-string, falls back to the deterministic path).

32"""

33from __future__ import annotations

35import logging

36import re

37from dataclasses import dataclass, field

38from typing import Any, Callable, Optional

40logger = logging.getLogger('hevolve_social')

43# Recognised intent kinds. Stable strings — clients pattern-match.

44INTENT_APPROVE = 'approve' # "post this", "share it", "looks great"

45INTENT_REJECT = 'reject' # "skip", "not this", "pass"

46INTENT_CAPTION_STYLE = 'caption_style' # "caption with hiking vibe"

47INTENT_SCHEDULE = 'schedule' # "post Friday morning"

48INTENT_CHANNELS = 'channels' # "on twitter and linkedin"

49INTENT_UNKNOWN = 'unknown'

51VALID_INTENTS = frozenset({

52 INTENT_APPROVE, INTENT_REJECT, INTENT_CAPTION_STYLE,

53 INTENT_SCHEDULE, INTENT_CHANNELS, INTENT_UNKNOWN,

54})

57# Recognised platform names. Lowercased substring match against the

58# user utterance. Add aliases here, single source — never inline a

59# platform list elsewhere in the curator pipeline.

60_PLATFORM_TOKENS: tuple[str, ...] = (

61 'twitter', 'x.com', 'linkedin', 'mastodon', 'bluesky',

62 'instagram', 'facebook', 'reddit', 'threads',

63)

65_PLATFORM_ALIASES: dict[str, str] = {

66 'x.com': 'twitter',

67 'x ': 'twitter',

68}

71@dataclass(frozen=True)

72class CuratorIntent:

73 """One parsed user utterance."""

74 kind: str # one of VALID_INTENTS

75 payload: dict[str, Any] = field(default_factory=dict)

76 confidence: float = 0.0

77 raw_text: str = ''

80# ──────────────────────────────────────────────────────────────────────

81# Deterministic keyword/regex classifiers — one per intent. Each

82# returns (payload_dict_or_None, confidence) where None means "no match".

83# Pure functions; tested in isolation.

84# ──────────────────────────────────────────────────────────────────────

87_APPROVE_RX = re.compile(

88 r'\b(post(?:\s+(?:this|that|it))?|share(?:\s+(?:this|that|it))?|'

90 r')\b',

91 re.IGNORECASE,

92)

94_REJECT_RX = re.compile(

97 r')\b',

98 re.IGNORECASE,

99)

100

101_CAPTION_RX = re.compile(

102 r'caption(?:\s+(?:it|this|that))?\s+'

103 r'(?:with|in|using)?\s*(?:a\s+)?'

104 r'([a-z][\w\-]{1,30})\s*(?:vibe|tone|style|mood)?',

105 re.IGNORECASE,

106)

107

108# Time hints — naive but covers the seeded vocabulary.

109_DAY_TOKENS = (

110 'monday', 'tuesday', 'wednesday', 'thursday',

111 'friday', 'saturday', 'sunday',

112)

113_TIME_OF_DAY = ('morning', 'noon', 'afternoon', 'evening', 'night')

114

115

116def _classify_approve(text: str) -> Optional[tuple[dict, float]]:

117 if _APPROVE_RX.search(text):

118 return {}, 0.85

119 return None

120

121

122def _classify_reject(text: str) -> Optional[tuple[dict, float]]:

123 if _REJECT_RX.search(text):

124 return {}, 0.85

125 return None

126

127

128def _classify_caption_style(text: str) -> Optional[tuple[dict, float]]:

129 m = _CAPTION_RX.search(text)

130 if not m:

131 return None

132 style = m.group(1).lower().strip()

133 # Reject caption-style hits where the captured token is itself a

134 # filler word — guards against "caption it nicely" matching the

135 # adverb instead of a real style.

136 if style in {'it', 'this', 'that', 'a', 'with', 'in'}:

137 return None

138 return {'style': style}, 0.75

139

140

141def _classify_schedule(text: str) -> Optional[tuple[dict, float]]:

142 lc = text.lower()

143 day = next((d for d in _DAY_TOKENS if d in lc), None)

144 tod = next((t for t in _TIME_OF_DAY if t in lc), None)

145 # "tomorrow" / "today" hints also count as a schedule signal.

146 relative = next(

147 (r for r in ('tomorrow', 'today', 'tonight') if r in lc),

148 None,

149 )

150 if not (day or tod or relative):

151 return None

152 payload: dict[str, Any] = {}

153 if day:

154 payload['day'] = day

155 if tod:

156 payload['time_of_day'] = tod

157 if relative:

158 payload['relative'] = relative

159 # Confidence scales with how specific the schedule hint is.

160 confidence = 0.55 + 0.15 * len(payload)

161 return payload, min(confidence, 0.95)

162

163

164def _classify_channels(text: str) -> Optional[tuple[dict, float]]:

165 lc = text.lower()

166 found: list[str] = []

167 for token in _PLATFORM_TOKENS:

168 if token in lc:

169 canonical = _PLATFORM_ALIASES.get(token, token)

170 if canonical not in found:

171 found.append(canonical)

172 if not found:

173 return None

174 return {'channels': found}, 0.7

175

176

177# Order matters: more specific classifiers first. approve / reject

178# beat schedule / channels because "post Friday morning" should resolve

179# as schedule, not approve, even though "post" is in the approve regex.

180_CLASSIFIERS: tuple[tuple[str, Callable[[str], Optional[tuple[dict, float]]]], ...] = (

181 (INTENT_CAPTION_STYLE, _classify_caption_style),

182 (INTENT_SCHEDULE, _classify_schedule),

183 (INTENT_CHANNELS, _classify_channels),

184 (INTENT_APPROVE, _classify_approve),

185 (INTENT_REJECT, _classify_reject),

186)

187

188

189def parse_curator_command(

190 text: str,

191 *,

192 llm_callback: Optional[Callable[[str], dict]] = None,

193) -> CuratorIntent:

194 """Parse a user utterance into a structured CuratorIntent.

195

196 Args:

197 text: user utterance (voice → STT'd, or typed).

198 llm_callback: optional richer parser; takes the raw text and

199 returns a dict {'kind', 'payload', 'confidence'}.

200 When supplied AND non-raising AND returns a kind

201 in VALID_INTENTS, its result is used. Otherwise

202 falls back to the deterministic classifier.

203

204 Returns:

205 CuratorIntent. Always returns SOME intent — `kind=INTENT_UNKNOWN`

206 when nothing matches (so callers can branch on .kind without

207 None-checking the return).

208 """

209 raw = (text or '').strip()

210 if not raw:

211 return CuratorIntent(kind=INTENT_UNKNOWN, raw_text='', confidence=0.0)

212

213 if llm_callback is not None:

214 try:

215 cand = llm_callback(raw)

216 if isinstance(cand, dict):

217 k = str(cand.get('kind', '')).lower()

218 if k in VALID_INTENTS:

219 return CuratorIntent(

220 kind=k,

221 payload=dict(cand.get('payload') or {}),

222 confidence=float(cand.get('confidence', 0.5)),

223 raw_text=raw,

224 )

225 except Exception as exc: # noqa: BLE001

226 logger.debug(

227 'media_curator: llm_callback raised, falling back: %s',

228 exc,

229 )

230

231 for kind, classify in _CLASSIFIERS:

232 result = classify(raw)

233 if result is not None:

234 payload, confidence = result

235 return CuratorIntent(

236 kind=kind,

237 payload=payload,

238 confidence=confidence,

239 raw_text=raw,

240 )

241 return CuratorIntent(kind=INTENT_UNKNOWN, raw_text=raw, confidence=0.0)

Coverage for integrations / social / media_curator_service.py: 100.0%

85 statements