Coverage for security/action_classifier.py: 100.0%

1"""

2Action Classifier — Destructive Action Detection + Preview Gate

4Classifies action text as 'safe', 'destructive', or 'unknown'.

5Used by lifecycle_hooks.py to gate destructive operations behind

6an opt-in preview approval flow.

8Usage:

9 from security.action_classifier import classify_action, should_preview

11 cls = classify_action("DELETE FROM users WHERE id=5") # 'destructive'

12 if should_preview(action_text, preview_enabled=True):

13 # Route to PREVIEW_PENDING state

14"""

16import re

17import logging

18from typing import Literal

20logger = logging.getLogger('hevolve_security')

22ActionClass = Literal['safe', 'destructive', 'unknown']

24# Patterns indicating destructive operations

25DESTRUCTIVE_PATTERNS = [

27 re.compile(r'\brm\s+(-[rf]+\s+)?/', re.I),

28 re.compile(r'\bDELETE\s+FROM\b', re.I),

29 re.compile(r'\bDROP\s+(TABLE|DATABASE|INDEX|SCHEMA)\b', re.I),

30 re.compile(r'\bTRUNCATE\s+(TABLE)?\b', re.I),

31 re.compile(r'\bformat\s+[a-zA-Z]:', re.I),

32 re.compile(r'\bmkfs\b', re.I),

33 re.compile(r'\bdd\s+if=', re.I),

34 re.compile(r'\bgit\s+(push\s+--force|reset\s+--hard|clean\s+-fd)', re.I),

35 re.compile(r'\bkill\s+-9\b', re.I),

36 re.compile(r'\bshutdown\b', re.I),

37 re.compile(r'\breboot\b', re.I),

38]

40# Patterns that are clearly read-only / safe

41SAFE_PATTERNS = [

43 re.compile(r'\bSELECT\b(?!.*\bINTO\b)', re.I),

44 re.compile(r'\bcat\s', re.I),

45 re.compile(r'\bls\b', re.I),

46 re.compile(r'\bgit\s+(status|log|diff|show|branch)\b', re.I),

47]

50def classify_action(action_text: str) -> ActionClass:

51 """

52 Classify an action as safe, destructive, or unknown.

54 Destructive takes priority: if both safe and destructive patterns match,

55 the action is classified as destructive (fail-safe).

56 """

57 if not action_text or not action_text.strip():

58 return 'unknown'

60 is_destructive = any(p.search(action_text) for p in DESTRUCTIVE_PATTERNS)

61 is_safe = any(p.search(action_text) for p in SAFE_PATTERNS)

63 if is_destructive:

64 logger.info(f"Action classified as DESTRUCTIVE: {action_text[:80]}")

65 return 'destructive'

67 if is_safe:

68 return 'safe'

70 return 'unknown'

73def should_preview(action_text: str, preview_enabled: bool = False) -> bool:

74 """

75 Determine if an action should go through the preview approval flow.

77 Preview is opt-in. When enabled, destructive and unknown actions

78 require user approval before execution.

80 Args:

81 action_text: The action to classify

82 preview_enabled: Whether the user/agent has opted into preview mode

84 Returns:

85 True if the action should be routed to PREVIEW_PENDING

86 """

87 if not preview_enabled:

88 return False

90 classification = classify_action(action_text)

91 return classification in ('destructive', 'unknown')

Coverage for security / action_classifier.py: 100.0%

23 statements