Skip to content

Failure modes #72

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ missing_inline_in_public_items = { level = "allow", priority = 127 } # let rus
missing_trait_methods = { level = "allow", priority = 127 } # allow in favor of rustc `implement the missing item`
module_name_repetitions = { level = "allow", priority = 127 } # allow use of module name in type names
multiple_inherent_impl = { level = "allow", priority = 127 } # required in best practice to limit exposure over UniFFI
multiple_crate_versions = { level = "allow", priority = 127 } # allow multiple versions of the same crate
must_use_candidate = { level = "allow", priority = 127 } # omitting #[must_use] ok
mod_module_files = { level = "allow", priority = 127 } # mod directories ok
non_ascii_literal = { level = "allow", priority = 127 } # non-ascii char in string literal ok
Expand Down
7 changes: 5 additions & 2 deletions src/core/crypto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub fn hash_buffer(buffer: impl AsRef<[u8]>) -> String {
/// Will return error if unable to access file.
pub fn hash_file(filepath: impl AsRef<Path>) -> Result<String> {
hash_stream(
&mut File::open(&filepath).context(selector::InvalidFilepath {
&mut File::open(&filepath).context(selector::InvalidFileOrDirPath {
path: filepath.as_ref(),
})?,
)
Expand All @@ -62,7 +62,10 @@ pub fn hash_file(filepath: impl AsRef<Path>) -> Result<String> {
pub fn hash_dir(dirpath: impl AsRef<Path>) -> Result<String> {
let summary: BTreeMap<String, String> = dirpath
.as_ref()
.read_dir()?
.read_dir()
.context(selector::InvalidFileOrDirPath {
path: dirpath.as_ref(),
})?
.map(|path| {
let access_path = path?.path();
Ok((
Expand Down
4 changes: 3 additions & 1 deletion src/core/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match &self.kind {
Kind::EmptyResponseWhenLoadingContainerAltImage { backtrace, .. }
| Kind::FailedToExtractRunInfo { backtrace, .. }

Check warning on line 93 in src/core/error.rs

View check run for this annotation

Codecov / codecov/patch

src/core/error.rs#L93

Added line #L93 was not covered by tests
| Kind::FailedToStartPod { backtrace, .. }
| Kind::GeneratedNamesOverflow { backtrace, .. }
| Kind::InvalidFilepath { backtrace, .. }
| Kind::InvalidFileOrDirPath { backtrace, .. }
| Kind::InvalidPodResultTerminatedDatetime { backtrace, .. }
| Kind::KeyMissing { backtrace, .. }
| Kind::NoAnnotationFound { backtrace, .. }
Expand Down
238 changes: 165 additions & 73 deletions src/core/orchestrator/docker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
},
};
use bollard::{
container::{Config, CreateContainerOptions, ListContainersOptions},
container::{Config, CreateContainerOptions, ListContainersOptions, RemoveContainerOptions},
models::{ContainerStateStatusEnum, HostConfig},
secret::{ContainerInspectResponse, ContainerSummary},
};
use chrono::DateTime;
use futures_util::future::join_all;
Expand All @@ -34,6 +35,11 @@
.expect("Invalid image tag regex.")
});

#[expect(clippy::expect_used, reason = "Valid static regex")]
static RE_FOR_CMD: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"[^\s"']+|"[^"]*"|'[^']*'"#).expect("Invalid model metadata regex.")
});

impl LocalDockerOrchestrator {
fn prepare_mount_binds(
namespace_lookup: &HashMap<String, PathBuf>,
Expand Down Expand Up @@ -108,7 +114,11 @@
- Pod commands will always have at least 1 element
"#
)]
pub(crate) fn prepare_container_start_inputs(
/// Prepare the inputs for starting a container.
///
/// # Errors
/// Will fail if pod job is invalid
pub fn prepare_container_start_inputs(
namespace_lookup: &HashMap<String, PathBuf>,
pod_job: &PodJob,
image: String,
Expand Down Expand Up @@ -143,11 +153,15 @@
serde_json::to_string(&pod_job)?,
),
]);
let command = pod_job
.pod
.command
.split_whitespace()
.map(String::from)
let command = RE_FOR_CMD
.captures_iter(&pod_job.pod.command)
.map(|capture| {
capture
.extract::<0>()
.0
.to_owned()
.replace(['\'', '\"'], "")
})
.collect::<Vec<_>>();

Ok((
Expand Down Expand Up @@ -178,10 +192,7 @@
))
}
#[expect(
clippy::cast_sign_loss,
clippy::string_slice,
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::indexing_slicing,
reason = r#"
- Timestamp and memory should always have a value > 0
Expand All @@ -194,7 +205,7 @@
pub(crate) async fn list_containers(
&self,
filters: HashMap<String, Vec<String>>, // https://docs.rs/bollard/latest/bollard/container/struct.ListContainersOptions.html#structfield.filters
) -> Result<impl Iterator<Item = (String, RunInfo)>> {
) -> Result<impl Iterator<Item = Result<(String, RunInfo)>>> {
Ok(join_all(
self.api
.list_containers(Some(ListContainersOptions {
Expand All @@ -218,70 +229,151 @@
)
.await
.into_iter()
.filter_map(|result: Result<_>| {
let (container_name, container_summary, container_spec) = result.ok()?;
let terminated_timestamp =
DateTime::parse_from_rfc3339(container_spec.state.as_ref()?.finished_at.as_ref()?)
.ok()?
.timestamp() as u64;
Some((
.map(|result: Result<_>| {
let (container_name, container_summary, container_inspect_response) = match result {
Ok((container_name, container_summary, container_inspect_response)) => (
container_name,
container_summary,
container_inspect_response,
),
Err(error) => {
return Err(error);

Check warning on line 240 in src/core/orchestrator/docker.rs

View check run for this annotation

Codecov / codecov/patch

src/core/orchestrator/docker.rs#L239-L240

Added lines #L239 - L240 were not covered by tests
}
};

Ok(
Self::extract_run_info(&container_summary, &container_inspect_response)
.map(|run_info| (container_name.clone(), run_info))
.context(selector::FailedToExtractRunInfo { container_name })?,
)
}))
}
pub(crate) async fn delete_container(&self, container_name: &str) -> Result<()> {
self.api
.remove_container(
container_name,
RunInfo {
image: container_spec.config.as_ref()?.image.as_ref()?.clone(),
created: container_summary.created? as u64,
terminated: (terminated_timestamp > 0).then_some(terminated_timestamp),
env_vars: container_spec
.config
.as_ref()?
.env
Some(RemoveContainerOptions {
force: true,
..Default::default()
}),
)
.await?;
Ok(())
}

#[expect(
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
reason = r#"
- Timestamp and memory should always have a value > 0
- Container will always have a name with more than 1 character
- No issue in core casting if between 0 - 3.40e38(f32:MAX)
- No issue in exit code casting if between -3.27e4(i16:MIN) - 3.27e4(i16:MAX)
- Containers will always have at least 1 name with at least 2 characters
- This functions requires a lot of boilerplate code to extract the run info
"#
)]
fn extract_run_info(
container_summary: &ContainerSummary,
container_inspect_response: &ContainerInspectResponse,
) -> Option<RunInfo> {
let terminated_timestamp = DateTime::parse_from_rfc3339(
container_inspect_response
.state
.as_ref()?
.finished_at
.as_ref()?,
)
.ok()?
.timestamp() as u64;
Some(RunInfo {
image: container_inspect_response
.config
.as_ref()?
.image
.as_ref()?
.clone(),
created: container_summary.created? as u64,
terminated: (terminated_timestamp > 0).then_some(terminated_timestamp),
env_vars: container_inspect_response
.config
.as_ref()?
.env
.as_ref()?
.iter()
.filter_map(|x| {
x.split_once('=')
.map(|(key, value)| (key.to_owned(), value.to_owned()))
})
.collect(),
command: format!(
"{} {}",
container_inspect_response
.config
.as_ref()?
.entrypoint
.as_ref()?
.join(" "),
container_inspect_response
.config
.as_ref()?
.cmd
.as_ref()?
.join(" ")
),
status: match (
container_inspect_response.state.as_ref()?.status?,
container_inspect_response.state.as_ref()?.exit_code? as i16,
) {
(ContainerStateStatusEnum::RUNNING, _) => Status::Running,
(ContainerStateStatusEnum::EXITED, 0) => Status::Completed,
(ContainerStateStatusEnum::EXITED | ContainerStateStatusEnum::DEAD, code) => {
Status::Failed(code)
}
(
ContainerStateStatusEnum::CREATED | ContainerStateStatusEnum::RESTARTING,
code,
) => {
if container_inspect_response
.state
.as_ref()?
.iter()
.filter_map(|x| {
x.split_once('=')
.map(|(key, value)| (key.to_owned(), value.to_owned()))
})
.collect(),
command: format!(
"{} {}",
container_spec
.config
.as_ref()?
.entrypoint
.as_ref()?
.join(" "),
container_spec.config.as_ref()?.cmd.as_ref()?.join(" ")
),
status: match (
container_spec.state.as_ref()?.status.as_ref()?,
container_spec.state.as_ref()?.exit_code? as i16,
) {
(ContainerStateStatusEnum::RUNNING, _) => Status::Running,
(ContainerStateStatusEnum::EXITED, 0) => Status::Completed,
(ContainerStateStatusEnum::EXITED, code) => Status::Failed(code),
_ => todo!(),
},
mounts: container_spec
.mounts
.error
.as_ref()?
.iter()
.map(|mount_point| {
Some(format!(
"{}:{}{}",
mount_point.source.as_ref()?,
mount_point.destination.as_ref()?,
mount_point
.mode
.as_ref()
.map_or_else(String::new, |mode| format!(":{mode}"))
))
})
.collect::<Option<Vec<_>>>()?,
labels: container_spec.config.as_ref()?.labels.as_ref()?.clone(),
cpu_limit: container_spec.host_config.as_ref()?.nano_cpus? as f32
/ 10_f32.powi(9), // ncpu, ucores=3, mcores=6, cores=9
memory_limit: container_spec.host_config.as_ref()?.memory? as u64,
},
))
}))
.is_empty()
{
Status::Queued
} else {
Status::Failed(code)
}
}
_ => Status::Unknown,
},
mounts: container_inspect_response
.mounts
.as_ref()?
.iter()
.map(|mount_point| {
Some(format!(
"{}:{}{}",
mount_point.source.as_ref()?,
mount_point.destination.as_ref()?,
mount_point
.mode
.as_ref()
.map_or_else(String::new, |mode| format!(":{mode}"))
))
})
.collect::<Option<Vec<_>>>()?,
labels: container_inspect_response
.config
.as_ref()?
.labels
.as_ref()?
.clone(),
cpu_limit: container_inspect_response.host_config.as_ref()?.nano_cpus? as f32
/ 10_f32.powi(9), // ncpu, ucores=3, mcores=6, cores=9
memory_limit: container_inspect_response.host_config.as_ref()?.memory? as u64,
})
}
}
21 changes: 20 additions & 1 deletion src/uniffi/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,25 @@ pub(crate) enum Kind {
path: PathBuf,
backtrace: Option<Backtrace>,
},
#[snafu(display(
"Failed to extract run info from the container image file: {container_name}."
))]
FailedToExtractRunInfo {
container_name: String,
backtrace: Option<Backtrace>,
},
#[snafu(display(
"Fail to start pod with container_name: {container_name} with error: {source}"
))]
FailedToStartPod {
container_name: String,
source: BollardError,
backtrace: Option<Backtrace>,
},
#[snafu(display("Out of generated random names."))]
GeneratedNamesOverflow { backtrace: Option<Backtrace> },
#[snafu(display("{source} ({path:?})."))]
InvalidFilepath {
InvalidFileOrDirPath {
path: PathBuf,
source: io::Error,
backtrace: Option<Backtrace>,
Expand Down Expand Up @@ -122,4 +137,8 @@ impl OrcaError {
pub const fn is_purged_pod_run(&self) -> bool {
matches!(self.kind, Kind::NoMatchingPodRun { .. })
}
/// Returns `true` if the error was caused by an invalid file or directory path.
pub const fn is_failed_to_start_pod(&self) -> bool {
matches!(self.kind, Kind::FailedToStartPod { .. })
}
}
4 changes: 4 additions & 0 deletions src/uniffi/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ pub struct PodResult {
pub created: u64,
/// Time in epoch when terminated in seconds.
pub terminated: u64,
/// Output logs of container
pub logs: String,
}

impl PodResult {
Expand All @@ -225,6 +227,7 @@ impl PodResult {
status: Status,
created: u64,
terminated: u64,
logs: String,
) -> Result<Self> {
let pod_result_no_hash = Self {
annotation,
Expand All @@ -234,6 +237,7 @@ impl PodResult {
status,
created,
terminated,
logs,
};
Ok(Self {
hash: hash_buffer(to_yaml(&pod_result_no_hash)?),
Expand Down
Loading