Quellcode für vyra_base.state.health_layer

"""
Health Layer - High-level API for system health monitoring and management.

This layer monitors system integrity:
- Health status tracking (OK, Warning, Overloaded, Faulted, Critical)
- Warning and fault reporting
- Load management
- Recovery coordination
- Escalation to lifecycle layer

Thread-safe wrapper around StateMachine for health management.
"""

import logging
from typing import Optional, Dict, Any

from .state_machine import StateMachine
from .state_types import HealthState
from .state_events import StateEvent, EventType


logger = logging.getLogger(__name__)


[Doku] class HealthLayer: """ High-level API for health state management. Provides intuitive methods for health monitoring: - report_warning() - Report non-critical issues - recover() - Attempt recovery - escalate() - Escalate to critical state Example: >>> health = HealthLayer(fsm) >>> health.report_warning({'cpu': '85%'}) >>> health.is_degraded() True >>> health.clear_warning() """
[Doku] def __init__(self, fsm: StateMachine): """ Initialize health layer. Args: fsm: StateMachine instance to control """ self.fsm = fsm logger.info("HealthLayer initialized")
# ------------------------------------------------------------------------- # State Query # -------------------------------------------------------------------------
[Doku] def get_state(self) -> HealthState: """Get current health state.""" return self.fsm.get_health_state()
[Doku] def get_state_name(self) -> str: """Get current health state as string.""" return self.get_state().value
[Doku] def is_healthy(self) -> bool: """Check if health is OK.""" return self.get_state() == HealthState.HEALTHY
[Doku] def is_warning(self) -> bool: """Check if there are warnings.""" return self.get_state() == HealthState.WARNING
[Doku] def is_critical(self) -> bool: """Check if health is critical.""" return self.get_state() == HealthState.CRITICAL
[Doku] def is_degraded(self) -> bool: """Check if health is degraded (warning or worse).""" return self.get_state() in ( HealthState.WARNING, HealthState.CRITICAL )
[Doku] def is_operational_safe(self) -> bool: """Check if safe for operational tasks.""" return self.get_state() in (HealthState.HEALTHY, HealthState.WARNING)
# ------------------------------------------------------------------------- # Health Reporting # -------------------------------------------------------------------------
[Doku] def report_warning(self, warning_info: Optional[Dict[str, Any]] = None) -> HealthState: """ Report non-critical warning. Transitions: OK → Warning Args: warning_info: Warning details (metrics, thresholds, etc.) Returns: New health state """ event = StateEvent(EventType.WARN, payload=warning_info, origin_layer="health") self.fsm.send_event(event) logger.warning(f"Warning reported: {warning_info}") return self.get_state()
[Doku] def clear_warning(self, clearance_info: Optional[Dict[str, Any]] = None) -> HealthState: """ Clear active warnings. Transitions: Warning → OK Args: clearance_info: Clearance details Returns: New health state """ event = StateEvent(EventType.CLEAR_WARNING, payload=clearance_info, origin_layer="health") self.fsm.send_event(event) logger.info(f"Warning cleared: {clearance_info}") return self.get_state()
[Doku] def report_fault(self, fault_info: Optional[Dict[str, Any]] = None) -> HealthState: """ Report critical fault. Transitions: OK/Warning → Critical Args: fault_info: Fault details Returns: New health state """ event = StateEvent(EventType.FAULT, payload=fault_info, origin_layer="health") self.fsm.send_event(event) logger.error(f"Fault reported: {fault_info}") return self.get_state()
[Doku] def recover(self, recovery_info: Optional[Dict[str, Any]] = None) -> HealthState: """ Attempt recovery from fault. Transitions: Critical → OK/Warning Args: recovery_info: Recovery details Returns: New health state """ event = StateEvent(EventType.RECOVER, payload=recovery_info, origin_layer="health") self.fsm.send_event(event) logger.info(f"Recovery attempted: {recovery_info}") return self.get_state()
# ------------------------------------------------------------------------- # Convenience Methods # -------------------------------------------------------------------------
[Doku] def check_and_report( self, metrics: Dict[str, Any], warning_threshold: Optional[float] = None ) -> HealthState: """ Check metrics and automatically report appropriate health state. Args: metrics: System metrics to evaluate warning_threshold: Threshold for warning state Returns: New health state after evaluation Example: >>> health.check_and_report( ... {'cpu_usage': 0.85}, ... warning_threshold=0.7, ... overload_threshold=0.9 ... ) """ # Extract numeric value for comparison (simplified example) value = next((v for v in metrics.values() if isinstance(v, (int, float))), None) if value is None: logger.debug("No numeric metrics for threshold check") return self.get_state() if warning_threshold and value >= warning_threshold: return self.report_warning(metrics) elif self.is_warning(): # Metrics below warning threshold - clear if currently warning return self.clear_warning(metrics) return self.get_state()
[Doku] def emergency_stop(self, reason: str) -> Dict[str, str]: """ Trigger emergency stop (affects all layers). This is an interrupt event that immediately: - Sets lifecycle to Deactivated - Sets operational to Idle - Sets health to Warning Args: reason: Emergency stop reason Returns: New state of all layers """ event = StateEvent( EventType.EMERGENCY_STOP, payload={"reason": reason}, origin_layer="health" ) return self.fsm.send_event(event)
# ------------------------------------------------------------------------- # Callbacks # -------------------------------------------------------------------------
[Doku] def on_state_change(self, callback, priority: int = 0): """ Register callback for health state changes. Args: callback: Function(layer, old_state, new_state) priority: Callback priority (higher = earlier execution) """ self.fsm.subscribe("health", callback, priority)
# ------------------------------------------------------------------------- # Diagnostics # -------------------------------------------------------------------------
[Doku] def get_info(self) -> Dict[str, Any]: """ Get health layer information. Returns: Dictionary with state and status info """ return { "state": self.get_state_name(), "is_healthy": self.is_healthy(), "is_degraded": self.is_degraded(), "is_operational_safe": self.is_operational_safe(), "lifecycle_state": self.fsm.get_lifecycle_state().value, }
[Doku] def get_severity_level(self) -> int: """ Get health severity as numeric level. Returns: 0 = HEALTHY, 1 = Warning, 2 = Critical """ severity_map = { HealthState.HEALTHY: 0, HealthState.WARNING: 1, HealthState.CRITICAL: 2, } return severity_map.get(self.get_state(), 0)
def __repr__(self) -> str: """String representation.""" return f"HealthLayer(state={self.get_state_name()}, severity={self.get_severity_level()})"