Skip to main content

hypercall_runtime_api/
drain.rs

1//! Shared drain-marker helpers.
2//!
3//! The drain marker is a file next to the engine WAL that signals the API to
4//! keep refusing traffic across a restart. These helpers are pure (path / fs /
5//! atomic) so both the `hypercall-api` readiness path and the admin lifecycle
6//! surface use the same implementation. State-coupled quiesce orchestration
7//! lives with each caller, since it operates on caller-specific channels.
8
9use crate::sonic_json::SonicJson;
10use axum::{http::StatusCode, response::Response};
11use std::path::{Path, PathBuf};
12use std::sync::atomic::{AtomicBool, Ordering};
13
14/// Path to the drain marker file, derived from the engine WAL path.
15pub fn drain_marker_path(wal_path: &Path) -> PathBuf {
16    let mut path = wal_path.to_path_buf();
17    path.set_extension("drain");
18    path
19}
20
21/// Persist the drain marker so a restart resumes in the drained state.
22pub async fn persist_drain_marker(marker_path: &Path) -> Result<(), String> {
23    if let Some(parent) = marker_path.parent() {
24        tokio::fs::create_dir_all(parent)
25            .await
26            .map_err(|error| format!("failed to create drain marker directory: {error}"))?;
27    }
28    tokio::fs::write(marker_path, b"drained\n")
29        .await
30        .map_err(|error| format!("failed to persist drain marker: {error}"))
31}
32
33/// Keep the API drained and persist the marker when a quiesce request was
34/// accepted but its outcome is unknown, so a restart stays safe.
35pub async fn keep_drain_after_accepted_quiesce_failure(
36    is_draining: &AtomicBool,
37    marker_path: &Path,
38    status: StatusCode,
39    message: &'static str,
40) -> Response {
41    use axum::response::IntoResponse;
42    is_draining.store(true, Ordering::SeqCst);
43    let marker_persist_error = persist_drain_marker(marker_path).await.err();
44    (
45        status,
46        SonicJson(sonic_rs::json!({
47            "status": "error",
48            "message": message,
49            "draining": true,
50            "drain_marker": marker_path,
51            "marker_persist_error": marker_persist_error,
52        })),
53    )
54        .into_response()
55}
56
57#[cfg(test)]
58mod tests {
59    use super::*;
60
61    #[test]
62    fn drain_marker_path_sits_next_to_wal() {
63        assert_eq!(
64            drain_marker_path(Path::new("/data/wal/engine-journal.wal")),
65            Path::new("/data/wal/engine-journal.drain")
66        );
67    }
68
69    #[tokio::test]
70    async fn accepted_quiesce_failure_keeps_drain_flag_and_marker() {
71        let dir = tempfile::tempdir().expect("tempdir");
72        let marker_path = dir.path().join("engine-journal.drain");
73        let is_draining = AtomicBool::new(false);
74
75        let _response = keep_drain_after_accepted_quiesce_failure(
76            &is_draining,
77            &marker_path,
78            StatusCode::GATEWAY_TIMEOUT,
79            "engine quiesce timed out after request was accepted",
80        )
81        .await;
82
83        assert!(
84            is_draining.load(Ordering::SeqCst),
85            "accepted-but-unknown quiesce must keep API drain enabled"
86        );
87        assert!(
88            marker_path.exists(),
89            "accepted-but-unknown quiesce must persist drain marker for restart"
90        );
91    }
92}