Skip to content

Commit d01d106

Browse files
refactor(proto): move phase and current_policy_version into status (#1565)
* refactor(proto): move phase and current_policy_version into SandboxStatus Move phase and current_policy_version from SandboxSpec into SandboxStatus to correctly model mutable runtime state. Update all callers in the gateway server, TUI, and Python SDK to read and write these fields through SandboxStatus accessors. Signed-off-by: Derek Carr <decarr@redhat.com> * fix(server): preserve sandbox status on statusless driver updates When a driver update arrives without a status payload (e.g. before Kubernetes populates the status subresource), preserve the stored phase, conditions, and current policy version instead of resetting them. Adds a regression test covering the edge case. Signed-off-by: Derek Carr <decarr@redhat.com> --------- Signed-off-by: Derek Carr <decarr@redhat.com>
1 parent 7873f61 commit d01d106

16 files changed

Lines changed: 325 additions & 221 deletions

File tree

crates/openshell-cli/src/run.rs

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1855,11 +1855,11 @@ pub async fn sandbox_create(
18551855
.into_diagnostic()?
18561856
.into_inner();
18571857

1858-
let mut last_phase = sandbox.phase;
1858+
let mut last_phase = sandbox.phase();
18591859
let mut last_error_reason = String::new();
18601860
let mut last_condition_message = ready_false_condition_message(sandbox.status.as_ref());
18611861
// Track whether we have seen a non-Ready phase during the watch.
1862-
let mut saw_non_ready = SandboxPhase::try_from(sandbox.phase) != Ok(SandboxPhase::Ready);
1862+
let mut saw_non_ready = SandboxPhase::try_from(sandbox.phase()) != Ok(SandboxPhase::Ready);
18631863
let provision_timeout = Duration::from_secs(
18641864
std::env::var("OPENSHELL_PROVISION_TIMEOUT")
18651865
.ok()
@@ -1912,8 +1912,8 @@ pub async fn sandbox_create(
19121912
let evt = item.into_diagnostic()?;
19131913
match evt.payload {
19141914
Some(openshell_core::proto::sandbox_stream_event::Payload::Sandbox(s)) => {
1915-
let phase = SandboxPhase::try_from(s.phase).unwrap_or(SandboxPhase::Unknown);
1916-
last_phase = s.phase;
1915+
let phase = SandboxPhase::try_from(s.phase()).unwrap_or(SandboxPhase::Unknown);
1916+
last_phase = s.phase();
19171917
if let Some(message) = ready_false_condition_message(s.status.as_ref()) {
19181918
last_condition_message = Some(message);
19191919
}
@@ -2474,7 +2474,7 @@ pub async fn sandbox_get(
24742474
};
24752475
println!(" {} {}", "Id:".dimmed(), id);
24762476
println!(" {} {}", "Name:".dimmed(), name);
2477-
println!(" {} {}", "Phase:".dimmed(), phase_name(sandbox.phase));
2477+
println!(" {} {}", "Phase:".dimmed(), phase_name(sandbox.phase()));
24782478
println!(
24792479
" {} {}",
24802480
"Resource version:".dimmed(),
@@ -2558,11 +2558,11 @@ pub async fn sandbox_exec_grpc(
25582558
.ok_or_else(|| miette::miette!("sandbox not found"))?;
25592559

25602560
// Verify the sandbox is ready before issuing the exec.
2561-
if SandboxPhase::try_from(sandbox.phase) != Ok(SandboxPhase::Ready) {
2561+
if SandboxPhase::try_from(sandbox.phase()) != Ok(SandboxPhase::Ready) {
25622562
return Err(miette::miette!(
25632563
"sandbox '{}' is not ready (phase: {}); wait for it to reach Ready state",
25642564
name,
2565-
phase_name(sandbox.phase)
2565+
phase_name(sandbox.phase())
25662566
));
25672567
}
25682568

@@ -2770,11 +2770,11 @@ async fn fetch_ready_sandbox_for_forward(
27702770
.sandbox
27712771
.ok_or_else(|| miette::miette!("sandbox '{name}' not found"))?;
27722772

2773-
if SandboxPhase::try_from(sandbox.phase) != Ok(SandboxPhase::Ready) {
2773+
if SandboxPhase::try_from(sandbox.phase()) != Ok(SandboxPhase::Ready) {
27742774
return Err(miette::miette!(
27752775
"sandbox '{}' is no longer ready (phase: {}); stopping service forward",
27762776
name,
2777-
phase_name(sandbox.phase)
2777+
phase_name(sandbox.phase())
27782778
));
27792779
}
27802780

@@ -3218,8 +3218,8 @@ pub async fn sandbox_list(
32183218

32193219
// Print rows
32203220
for sandbox in sandboxes {
3221-
let phase = phase_name(sandbox.phase);
3222-
let phase_colored = match SandboxPhase::try_from(sandbox.phase) {
3221+
let phase = phase_name(sandbox.phase());
3222+
let phase_colored = match SandboxPhase::try_from(sandbox.phase()) {
32233223
Ok(SandboxPhase::Ready) => phase.green().to_string(),
32243224
Ok(SandboxPhase::Error) => phase.red().to_string(),
32253225
Ok(SandboxPhase::Provisioning) => phase.yellow().to_string(),
@@ -3247,8 +3247,8 @@ fn sandbox_to_json(sandbox: &Sandbox) -> serde_json::Value {
32473247
"labels": labels,
32483248
"resource_version": meta.map_or(0, |m| m.resource_version),
32493249
"created_at": format_epoch_ms(meta.map_or(0, |m| m.created_at_ms)),
3250-
"phase": phase_name(sandbox.phase),
3251-
"current_policy_version": sandbox.current_policy_version,
3250+
"phase": phase_name(sandbox.phase()),
3251+
"current_policy_version": sandbox.current_policy_version(),
32523252
})
32533253
}
32543254

@@ -7671,15 +7671,14 @@ mod tests {
76717671
let status = SandboxStatus {
76727672
sandbox_name: "gpu".to_string(),
76737673
agent_pod: "gpu-pod".to_string(),
7674-
agent_fd: String::new(),
7675-
sandbox_fd: String::new(),
76767674
conditions: vec![SandboxCondition {
76777675
r#type: "Ready".to_string(),
76787676
status: "False".to_string(),
76797677
reason: "Unschedulable".to_string(),
76807678
message: "Another GPU sandbox may already be using the available GPU.".to_string(),
76817679
last_transition_time: String::new(),
76827680
}],
7681+
..Default::default()
76837682
};
76847683

76857684
assert_eq!(
@@ -7693,15 +7692,14 @@ mod tests {
76937692
let status = SandboxStatus {
76947693
sandbox_name: "gpu".to_string(),
76957694
agent_pod: "gpu-pod".to_string(),
7696-
agent_fd: String::new(),
7697-
sandbox_fd: String::new(),
76987695
conditions: vec![SandboxCondition {
76997696
r#type: "Scheduled".to_string(),
77007697
status: "True".to_string(),
77017698
reason: "Scheduled".to_string(),
77027699
message: "Sandbox scheduled".to_string(),
77037700
last_transition_time: String::new(),
77047701
}],
7702+
..Default::default()
77057703
};
77067704

77077705
assert!(ready_false_condition_message(Some(&status)).is_none());

crates/openshell-cli/tests/provider_commands_integration.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,6 @@ impl OpenShell for TestOpenShell {
126126
}),
127127
spec: None,
128128
status: None,
129-
phase: 0,
130-
current_policy_version: 0,
131129
}),
132130
}))
133131
}

crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -79,18 +79,19 @@ impl OpenShell for TestOpenShell {
7979
name
8080
};
8181

82-
Ok(Response::new(SandboxResponse {
83-
sandbox: Some(Sandbox {
84-
metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta {
85-
id: format!("id-{sandbox_name}"),
86-
name: sandbox_name,
87-
created_at_ms: 0,
88-
labels: HashMap::new(),
89-
resource_version: 0,
90-
}),
91-
phase: SandboxPhase::Provisioning as i32,
92-
..Sandbox::default()
82+
let mut sandbox = Sandbox {
83+
metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta {
84+
id: format!("id-{sandbox_name}"),
85+
name: sandbox_name,
86+
created_at_ms: 0,
87+
labels: HashMap::new(),
88+
resource_version: 0,
9389
}),
90+
..Sandbox::default()
91+
};
92+
sandbox.set_phase(SandboxPhase::Provisioning as i32);
93+
Ok(Response::new(SandboxResponse {
94+
sandbox: Some(sandbox),
9495
}))
9596
}
9697

@@ -99,18 +100,19 @@ impl OpenShell for TestOpenShell {
99100
request: tonic::Request<GetSandboxRequest>,
100101
) -> Result<Response<SandboxResponse>, Status> {
101102
let name = request.into_inner().name;
102-
Ok(Response::new(SandboxResponse {
103-
sandbox: Some(Sandbox {
104-
metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta {
105-
id: format!("id-{name}"),
106-
name,
107-
created_at_ms: 0,
108-
labels: HashMap::new(),
109-
resource_version: 0,
110-
}),
111-
phase: SandboxPhase::Ready as i32,
112-
..Sandbox::default()
103+
let mut sandbox = Sandbox {
104+
metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta {
105+
id: format!("id-{name}"),
106+
name,
107+
created_at_ms: 0,
108+
labels: HashMap::new(),
109+
resource_version: 0,
113110
}),
111+
..Sandbox::default()
112+
};
113+
sandbox.set_phase(SandboxPhase::Ready as i32);
114+
Ok(Response::new(SandboxResponse {
115+
sandbox: Some(sandbox),
114116
}))
115117
}
116118

@@ -351,38 +353,34 @@ impl OpenShell for TestOpenShell {
351353
let vm_log_churn_before_ready = self.state.vm_log_churn_before_ready.load(Ordering::SeqCst);
352354

353355
tokio::spawn(async move {
354-
let provisioning = Sandbox {
356+
let mut provisioning = Sandbox {
355357
metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta {
356358
id: sandbox_id.clone(),
357359
name: sandbox_id.trim_start_matches("id-").to_string(),
358360
created_at_ms: 0,
359361
labels: HashMap::new(),
360362
resource_version: 0,
361363
}),
362-
phase: SandboxPhase::Provisioning as i32,
363364
..Sandbox::default()
364365
};
365-
let error = Sandbox {
366-
phase: SandboxPhase::Error as i32,
366+
provisioning.set_phase(SandboxPhase::Provisioning as i32);
367+
let mut error = Sandbox {
367368
status: Some(SandboxStatus {
368369
sandbox_name: sandbox_id.trim_start_matches("id-").to_string(),
369-
agent_pod: String::new(),
370-
agent_fd: String::new(),
371-
sandbox_fd: String::new(),
372370
conditions: vec![SandboxCondition {
373371
r#type: "Ready".to_string(),
374372
status: "False".to_string(),
375373
reason: "ProcessExited".to_string(),
376374
message: "VM process exited with status 0".to_string(),
377375
last_transition_time: String::new(),
378376
}],
377+
..Default::default()
379378
}),
380379
..provisioning.clone()
381380
};
382-
let ready = Sandbox {
383-
phase: SandboxPhase::Ready as i32,
384-
..provisioning.clone()
385-
};
381+
error.set_phase(SandboxPhase::Error as i32);
382+
let mut ready = provisioning.clone();
383+
ready.set_phase(SandboxPhase::Ready as i32);
386384

387385
let _ = tx
388386
.send(Ok(SandboxStreamEvent {

crates/openshell-core/src/metadata.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//! These traits provide uniform access to `ObjectMeta` fields across all resource types.
77
88
use crate::proto::{
9-
InferenceRoute, ObjectForTest, Provider, Sandbox, ServiceEndpoint, SshSession,
9+
InferenceRoute, ObjectForTest, Provider, Sandbox, SandboxStatus, ServiceEndpoint, SshSession,
1010
StoredProviderCredentialRefreshState, StoredProviderProfile,
1111
};
1212
use std::collections::HashMap;
@@ -69,6 +69,26 @@ impl GetResourceVersion for Sandbox {
6969
}
7070
}
7171

72+
impl Sandbox {
73+
pub fn phase(&self) -> i32 {
74+
self.status.as_ref().map_or(0, |s| s.phase)
75+
}
76+
77+
pub fn set_phase(&mut self, phase: i32) {
78+
self.status.get_or_insert_with(SandboxStatus::default).phase = phase;
79+
}
80+
81+
pub fn current_policy_version(&self) -> u32 {
82+
self.status.as_ref().map_or(0, |s| s.current_policy_version)
83+
}
84+
85+
pub fn set_current_policy_version(&mut self, version: u32) {
86+
self.status
87+
.get_or_insert_with(SandboxStatus::default)
88+
.current_policy_version = version;
89+
}
90+
}
91+
7292
// Implementations for Provider
7393
impl ObjectId for Provider {
7494
fn object_id(&self) -> &str {

0 commit comments

Comments
 (0)