Coverage for security / action_classifier.py: 100.0%

23 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2Action Classifier — Destructive Action Detection + Preview Gate 

3 

4Classifies action text as 'safe', 'destructive', or 'unknown'. 

5Used by lifecycle_hooks.py to gate destructive operations behind 

6an opt-in preview approval flow. 

7 

8Usage: 

9 from security.action_classifier import classify_action, should_preview 

10 

11 cls = classify_action("DELETE FROM users WHERE id=5") # 'destructive' 

12 if should_preview(action_text, preview_enabled=True): 

13 # Route to PREVIEW_PENDING state 

14""" 

15 

16import re 

17import logging 

18from typing import Literal 

19 

20logger = logging.getLogger('hevolve_security') 

21 

22ActionClass = Literal['safe', 'destructive', 'unknown'] 

23 

24# Patterns indicating destructive operations 

25DESTRUCTIVE_PATTERNS = [ 

26 re.compile(r'\b(delete|remove|drop|truncate|destroy|overwrite|erase|purge|wipe)\b', re.I), 

27 re.compile(r'\brm\s+(-[rf]+\s+)?/', re.I), 

28 re.compile(r'\bDELETE\s+FROM\b', re.I), 

29 re.compile(r'\bDROP\s+(TABLE|DATABASE|INDEX|SCHEMA)\b', re.I), 

30 re.compile(r'\bTRUNCATE\s+(TABLE)?\b', re.I), 

31 re.compile(r'\bformat\s+[a-zA-Z]:', re.I), 

32 re.compile(r'\bmkfs\b', re.I), 

33 re.compile(r'\bdd\s+if=', re.I), 

34 re.compile(r'\bgit\s+(push\s+--force|reset\s+--hard|clean\s+-fd)', re.I), 

35 re.compile(r'\bkill\s+-9\b', re.I), 

36 re.compile(r'\bshutdown\b', re.I), 

37 re.compile(r'\breboot\b', re.I), 

38] 

39 

40# Patterns that are clearly read-only / safe 

41SAFE_PATTERNS = [ 

42 re.compile(r'\b(read|get|list|show|describe|explain|search|query|fetch|view|check|status)\b', re.I), 

43 re.compile(r'\bSELECT\b(?!.*\bINTO\b)', re.I), 

44 re.compile(r'\bcat\s', re.I), 

45 re.compile(r'\bls\b', re.I), 

46 re.compile(r'\bgit\s+(status|log|diff|show|branch)\b', re.I), 

47] 

48 

49 

50def classify_action(action_text: str) -> ActionClass: 

51 """ 

52 Classify an action as safe, destructive, or unknown. 

53 

54 Destructive takes priority: if both safe and destructive patterns match, 

55 the action is classified as destructive (fail-safe). 

56 """ 

57 if not action_text or not action_text.strip(): 

58 return 'unknown' 

59 

60 is_destructive = any(p.search(action_text) for p in DESTRUCTIVE_PATTERNS) 

61 is_safe = any(p.search(action_text) for p in SAFE_PATTERNS) 

62 

63 if is_destructive: 

64 logger.info(f"Action classified as DESTRUCTIVE: {action_text[:80]}") 

65 return 'destructive' 

66 

67 if is_safe: 

68 return 'safe' 

69 

70 return 'unknown' 

71 

72 

73def should_preview(action_text: str, preview_enabled: bool = False) -> bool: 

74 """ 

75 Determine if an action should go through the preview approval flow. 

76 

77 Preview is opt-in. When enabled, destructive and unknown actions 

78 require user approval before execution. 

79 

80 Args: 

81 action_text: The action to classify 

82 preview_enabled: Whether the user/agent has opted into preview mode 

83 

84 Returns: 

85 True if the action should be routed to PREVIEW_PENDING 

86 """ 

87 if not preview_enabled: 

88 return False 

89 

90 classification = classify_action(action_text) 

91 return classification in ('destructive', 'unknown')