mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-12 04:11:07 +00:00
PVF: remove audit log access (#2461)
This commit is contained in:
@@ -522,4 +522,4 @@ test-syscalls:
|
||||
- if [[ "$CI_JOB_STATUS" == "failed" ]]; then
|
||||
printf "The x86_64 syscalls used by the worker binaries have changed. Please review if this is expected and update polkadot/scripts/list-syscalls/*-worker-syscalls as needed.\n";
|
||||
fi
|
||||
allow_failure: true # TODO: remove this once we have an idea how often the syscall lists will change
|
||||
allow_failure: false # this rarely triggers in practice
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
|
||||
use crate::{
|
||||
artifacts::ArtifactPathId,
|
||||
security,
|
||||
worker_intf::{
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
@@ -33,7 +32,7 @@ use polkadot_node_core_pvf_common::{
|
||||
execute::{Handshake, WorkerResponse},
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
use polkadot_parachain_primitives::primitives::{ValidationCodeHash, ValidationResult};
|
||||
use polkadot_parachain_primitives::primitives::ValidationResult;
|
||||
use polkadot_primitives::ExecutorParams;
|
||||
use std::{path::Path, time::Duration};
|
||||
use tokio::{io, net::UnixStream};
|
||||
@@ -132,10 +131,7 @@ pub async fn start_work(
|
||||
artifact.path.display(),
|
||||
);
|
||||
|
||||
let artifact_path = artifact.path.clone();
|
||||
with_worker_dir_setup(worker_dir, pid, &artifact.path, |worker_dir| async move {
|
||||
let audit_log_file = security::AuditLogFile::try_open_and_seek_to_end().await;
|
||||
|
||||
if let Err(error) = send_request(&mut stream, &validation_params, execution_timeout).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
@@ -160,10 +156,7 @@ pub async fn start_work(
|
||||
handle_response(
|
||||
response,
|
||||
pid,
|
||||
&artifact.id.code_hash,
|
||||
&artifact_path,
|
||||
execution_timeout,
|
||||
audit_log_file
|
||||
)
|
||||
.await,
|
||||
Err(error) => {
|
||||
@@ -217,10 +210,7 @@ pub async fn start_work(
|
||||
async fn handle_response(
|
||||
response: WorkerResponse,
|
||||
worker_pid: u32,
|
||||
validation_code_hash: &ValidationCodeHash,
|
||||
artifact_path: &Path,
|
||||
execution_timeout: Duration,
|
||||
audit_log_file: Option<security::AuditLogFile>,
|
||||
) -> WorkerResponse {
|
||||
if let WorkerResponse::Ok { duration, .. } = response {
|
||||
if duration > execution_timeout {
|
||||
@@ -238,25 +228,6 @@ async fn handle_response(
|
||||
}
|
||||
}
|
||||
|
||||
if let WorkerResponse::JobDied { err: _, job_pid } = response {
|
||||
// The job died. Check if it was due to a seccomp violation.
|
||||
//
|
||||
// NOTE: Log, but don't change the outcome. Not all validators may have
|
||||
// auditing enabled, so we don't want attackers to abuse a non-deterministic
|
||||
// outcome.
|
||||
for syscall in security::check_seccomp_violations_for_job(audit_log_file, job_pid).await {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
%syscall,
|
||||
?validation_code_hash,
|
||||
?artifact_path,
|
||||
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
response
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
use crate::{
|
||||
artifacts::ArtifactId,
|
||||
metrics::Metrics,
|
||||
security,
|
||||
worker_intf::{
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
@@ -134,7 +133,6 @@ pub async fn start_work(
|
||||
pid,
|
||||
|tmp_artifact_file, mut stream, worker_dir| async move {
|
||||
let preparation_timeout = pvf.prep_timeout();
|
||||
let audit_log_file = security::AuditLogFile::try_open_and_seek_to_end().await;
|
||||
|
||||
if let Err(err) = send_request(&mut stream, &pvf).await {
|
||||
gum::warn!(
|
||||
@@ -170,7 +168,6 @@ pub async fn start_work(
|
||||
&pvf,
|
||||
&cache_path,
|
||||
preparation_timeout,
|
||||
audit_log_file,
|
||||
)
|
||||
.await,
|
||||
Ok(Err(err)) => {
|
||||
@@ -211,34 +208,13 @@ async fn handle_response(
|
||||
pvf: &PvfPrepData,
|
||||
cache_path: &Path,
|
||||
preparation_timeout: Duration,
|
||||
audit_log_file: Option<security::AuditLogFile>,
|
||||
) -> Outcome {
|
||||
let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } =
|
||||
match result.clone() {
|
||||
Ok(result) => result,
|
||||
// Timed out on the child. This should already be logged by the child.
|
||||
Err(PrepareError::TimedOut) => return Outcome::TimedOut,
|
||||
Err(PrepareError::JobDied { err, job_pid }) => {
|
||||
// The job died. Check if it was due to a seccomp violation.
|
||||
//
|
||||
// NOTE: Log, but don't change the outcome. Not all validators may have
|
||||
// auditing enabled, so we don't want attackers to abuse a non-deterministic
|
||||
// outcome.
|
||||
for syscall in
|
||||
security::check_seccomp_violations_for_job(audit_log_file, job_pid).await
|
||||
{
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
%job_pid,
|
||||
%syscall,
|
||||
?pvf,
|
||||
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
|
||||
);
|
||||
}
|
||||
|
||||
return Outcome::JobDied { err, job_pid }
|
||||
},
|
||||
Err(PrepareError::JobDied { err, job_pid }) => return Outcome::JobDied { err, job_pid },
|
||||
Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory,
|
||||
Err(err) => return Outcome::Concluded { worker, result: Err(err) },
|
||||
};
|
||||
|
||||
@@ -17,10 +17,6 @@
|
||||
use crate::{Config, SecurityStatus, LOG_TARGET};
|
||||
use futures::join;
|
||||
use std::{fmt, path::Path};
|
||||
use tokio::{
|
||||
fs::{File, OpenOptions},
|
||||
io::{AsyncReadExt, AsyncSeekExt, SeekFrom},
|
||||
};
|
||||
|
||||
const SECURE_MODE_ANNOUNCEMENT: &'static str =
|
||||
"In the next release this will be a hard error by default.
|
||||
@@ -35,7 +31,6 @@ const SECURE_MODE_ANNOUNCEMENT: &'static str =
|
||||
pub async fn check_security_status(config: &Config) -> SecurityStatus {
|
||||
let Config { prepare_worker_program_path, cache_path, .. } = config;
|
||||
|
||||
// TODO: add check that syslog is available and that seccomp violations are logged?
|
||||
let (landlock, seccomp, change_root) = join!(
|
||||
check_landlock(prepare_worker_program_path),
|
||||
check_seccomp(prepare_worker_program_path),
|
||||
@@ -303,147 +298,3 @@ async fn check_seccomp(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const AUDIT_LOG_PATH: &'static str = "/var/log/audit/audit.log";
|
||||
const SYSLOG_PATH: &'static str = "/var/log/syslog";
|
||||
|
||||
/// System audit log.
|
||||
pub struct AuditLogFile {
|
||||
file: File,
|
||||
path: &'static str,
|
||||
}
|
||||
|
||||
impl AuditLogFile {
|
||||
/// Looks for an audit log file on the system and opens it, seeking to the end to skip any
|
||||
/// events from before this was called.
|
||||
///
|
||||
/// A bit of a verbose name, but it should clue future refactorers not to move calls closer to
|
||||
/// where the `AuditLogFile` is used.
|
||||
pub async fn try_open_and_seek_to_end() -> Option<Self> {
|
||||
let mut path = AUDIT_LOG_PATH;
|
||||
let mut file = match OpenOptions::new().read(true).open(AUDIT_LOG_PATH).await {
|
||||
Ok(file) => Ok(file),
|
||||
Err(_) => {
|
||||
path = SYSLOG_PATH;
|
||||
OpenOptions::new().read(true).open(SYSLOG_PATH).await
|
||||
},
|
||||
}
|
||||
.ok()?;
|
||||
|
||||
let _pos = file.seek(SeekFrom::End(0)).await;
|
||||
|
||||
Some(Self { file, path })
|
||||
}
|
||||
|
||||
async fn read_new_since_open(mut self) -> String {
|
||||
let mut buf = String::new();
|
||||
let _len = self.file.read_to_string(&mut buf).await;
|
||||
buf
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a seccomp violation occurred for the given job process. As the syslog may be in a
|
||||
/// different location, or seccomp auditing may be disabled, this function provides a best-effort
|
||||
/// attempt only.
|
||||
///
|
||||
/// The `audit_log_file` must have been obtained before the job started. It only allows reading
|
||||
/// entries that were written since it was obtained, so that we do not consider events from previous
|
||||
/// processes with the same pid. This can still be racy, but it's unlikely and fine for a
|
||||
/// best-effort attempt.
|
||||
pub async fn check_seccomp_violations_for_job(
|
||||
audit_log_file: Option<AuditLogFile>,
|
||||
job_pid: i32,
|
||||
) -> Vec<u32> {
|
||||
let audit_event_pid_field = format!("pid={job_pid}");
|
||||
|
||||
let audit_log_file = match audit_log_file {
|
||||
Some(file) => {
|
||||
gum::trace!(
|
||||
target: LOG_TARGET,
|
||||
%job_pid,
|
||||
audit_log_path = ?file.path,
|
||||
"checking audit log for seccomp violations",
|
||||
);
|
||||
file
|
||||
},
|
||||
None => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%job_pid,
|
||||
"could not open either {AUDIT_LOG_PATH} or {SYSLOG_PATH} for reading audit logs"
|
||||
);
|
||||
return vec![]
|
||||
},
|
||||
};
|
||||
let events = audit_log_file.read_new_since_open().await;
|
||||
|
||||
let mut violations = vec![];
|
||||
for event in events.lines() {
|
||||
if let Some(syscall) = parse_audit_log_for_seccomp_event(event, &audit_event_pid_field) {
|
||||
violations.push(syscall);
|
||||
}
|
||||
}
|
||||
|
||||
violations
|
||||
}
|
||||
|
||||
fn parse_audit_log_for_seccomp_event(event: &str, audit_event_pid_field: &str) -> Option<u32> {
|
||||
const SECCOMP_AUDIT_EVENT_TYPE: &'static str = "type=1326";
|
||||
|
||||
// Do a series of simple .contains instead of a regex, because I'm not sure if the fields are
|
||||
// guaranteed to always be in the same order.
|
||||
if !event.contains(SECCOMP_AUDIT_EVENT_TYPE) || !event.contains(&audit_event_pid_field) {
|
||||
return None
|
||||
}
|
||||
|
||||
// Get the syscall. Let's avoid a dependency on regex just for this.
|
||||
for field in event.split(" ") {
|
||||
if let Some(syscall) = field.strip_prefix("syscall=") {
|
||||
return syscall.parse::<u32>().ok()
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_audit_log_for_seccomp_event() {
|
||||
let audit_event_pid_field = "pid=2559058";
|
||||
|
||||
assert_eq!(
|
||||
parse_audit_log_for_seccomp_event(
|
||||
r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1326 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559058 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e syscall=53 compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
|
||||
audit_event_pid_field
|
||||
),
|
||||
Some(53)
|
||||
);
|
||||
// pid is wrong
|
||||
assert_eq!(
|
||||
parse_audit_log_for_seccomp_event(
|
||||
r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1326 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559057 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e syscall=53 compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
|
||||
audit_event_pid_field
|
||||
),
|
||||
None
|
||||
);
|
||||
// type is wrong
|
||||
assert_eq!(
|
||||
parse_audit_log_for_seccomp_event(
|
||||
r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1327 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559057 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e syscall=53 compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
|
||||
audit_event_pid_field
|
||||
),
|
||||
None
|
||||
);
|
||||
// no syscall field
|
||||
assert_eq!(
|
||||
parse_audit_log_for_seccomp_event(
|
||||
r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1327 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559057 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
|
||||
audit_event_pid_field
|
||||
),
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user