Skip to main content

hypercall_admin/monitoring/
recovery.rs

1//! Recovery safety gate admin endpoints.
2
3use axum::{
4    extract::State,
5    http::StatusCode,
6    response::{IntoResponse, Response},
7};
8use sonic_rs::json;
9
10use crate::state::AdminState;
11use hypercall_runtime_api::recovery_safety::{
12    RecoverySafetyAlertPoint, RecoverySafetyBuildInfo, RecoverySafetyMonitoringResponse,
13    RecoverySafetyReport, RecoverySafetyStatus,
14};
15use hypercall_runtime_api::sonic_json::SonicJson;
16
17/// GET /recovery-safety - Last startup recovery safety gate result.
18#[utoipa::path(
19    get,
20    path = "/recovery-safety",
21    responses(
22        (status = 200, description = "Recovery safety gate passed", body = RecoverySafetyMonitoringResponse),
23        (status = 401, description = "Invalid or missing X-Admin-Key header"),
24        (status = 503, description = "Recovery safety gate failed or has not run"),
25    ),
26    tag = "Health",
27    security(("admin_key" = []))
28)]
29pub async fn recovery_safety(State(app_state): State<AdminState>) -> Response {
30    let report = latest_recovery_safety_report(&app_state);
31
32    match report {
33        Some(report) => {
34            let status = if report.status == RecoverySafetyStatus::Pass {
35                StatusCode::OK
36            } else {
37                StatusCode::SERVICE_UNAVAILABLE
38            };
39            let response = RecoverySafetyMonitoringResponse {
40                status: report.status,
41                build_info: RecoverySafetyBuildInfo {
42                    version: app_state.build_info.version.clone(),
43                    commit: app_state.build_info.commit.clone(),
44                    git_ref: app_state.build_info.git_ref.clone(),
45                    build_time: app_state.build_info.build_time.clone(),
46                },
47                report,
48            };
49            (status, SonicJson(response)).into_response()
50        }
51        None => (
52            StatusCode::SERVICE_UNAVAILABLE,
53            SonicJson(json!({
54                "status": "fail",
55                "error": "startup recovery safety gate has not reported"
56            })),
57        )
58            .into_response(),
59    }
60}
61
62/// GET /recovery-safety/alert - Numeric recovery safety gate signal for Grafana alerts.
63#[utoipa::path(
64    get,
65    path = "/recovery-safety/alert",
66    responses(
67        (status = 200, description = "Recovery safety alert datapoint", body = [RecoverySafetyAlertPoint]),
68        (status = 401, description = "Invalid or missing X-Admin-Key header"),
69    ),
70    tag = "Health",
71    security(("admin_key" = []))
72)]
73pub async fn recovery_safety_alert(State(app_state): State<AdminState>) -> Response {
74    let point = recovery_safety_alert_point(latest_recovery_safety_report(&app_state));
75
76    (StatusCode::OK, SonicJson(vec![point])).into_response()
77}
78
79fn latest_recovery_safety_report(app_state: &AdminState) -> Option<RecoverySafetyReport> {
80    app_state
81        .recovery_safety_report
82        .read()
83        .expect("recovery safety report lock poisoned")
84        .clone()
85}
86
87fn recovery_safety_alert_point(report: Option<RecoverySafetyReport>) -> RecoverySafetyAlertPoint {
88    match report {
89        Some(report) if report.status == RecoverySafetyStatus::Pass => RecoverySafetyAlertPoint {
90            status: RecoverySafetyStatus::Pass,
91            value: 0,
92            message: "startup recovery safety gate passed".to_string(),
93        },
94        Some(report) => {
95            let message = report
96                .checks
97                .iter()
98                .find(|check| check.status == RecoverySafetyStatus::Fail)
99                .map(|check| check.message.clone())
100                .unwrap_or_else(|| "startup recovery safety gate failed".to_string());
101            RecoverySafetyAlertPoint {
102                status: RecoverySafetyStatus::Fail,
103                value: 1,
104                message,
105            }
106        }
107        None => RecoverySafetyAlertPoint {
108            status: RecoverySafetyStatus::Fail,
109            value: 1,
110            message: "startup recovery safety gate has not reported".to_string(),
111        },
112    }
113}