From 8c2ea190855ad11ebf7683b99e271897fca02511 Mon Sep 17 00:00:00 2001 From: Koute Date: Mon, 1 Nov 2021 17:46:32 +0900 Subject: [PATCH] Strip out control codes from the logged messages (#10081) * Strip out control codes from the logged messages * Also strip away C1 control codes * Add extra comments * Clear the buffer after flushing; rename `write` to `flush` * Move control code stripping into its own function * Also strip out control codes from panic messages * Also strip out Unicode left-to-right/right-to-left control codes --- substrate/Cargo.lock | 2 + .../tracing/src/logging/event_format.rs | 65 +++++++++++++------ substrate/client/tracing/src/logging/mod.rs | 32 +++++++++ substrate/primitives/panic-handler/Cargo.toml | 2 + substrate/primitives/panic-handler/src/lib.rs | 61 +++++++++++++++++ 5 files changed, 142 insertions(+), 20 deletions(-) diff --git a/substrate/Cargo.lock b/substrate/Cargo.lock index 3434e01d13..1ddf355c89 100644 --- a/substrate/Cargo.lock +++ b/substrate/Cargo.lock @@ -9462,6 +9462,8 @@ name = "sp-panic-handler" version = "3.0.0" dependencies = [ "backtrace", + "lazy_static", + "regex", ] [[package]] diff --git a/substrate/client/tracing/src/logging/event_format.rs b/substrate/client/tracing/src/logging/event_format.rs index 5a21192d69..944901d803 100644 --- a/substrate/client/tracing/src/logging/event_format.rs +++ b/substrate/client/tracing/src/logging/event_format.rs @@ -62,7 +62,7 @@ where S: Subscriber + for<'a> LookupSpan<'a>, N: for<'a> FormatFields<'a> + 'static, { - let writer = &mut MaybeColorWriter::new(self.enable_color, writer); + let writer = &mut ControlCodeSanitizer::new(!self.enable_color, writer); let normalized_meta = event.normalized_metadata(); let meta = normalized_meta.as_ref().unwrap_or_else(|| event.metadata()); time::write(&self.timer, writer, self.enable_color)?; @@ -100,10 +100,18 @@ where } } + // The writer only sanitizes its output once it's flushed, so if we don't actually need + // to sanitize everything we need to flush out what was already buffered as-is and only + // force-sanitize what follows. + if !writer.sanitize { + writer.flush()?; + writer.sanitize = true; + } + ctx.format_fields(writer, event)?; writeln!(writer)?; - writer.write() + writer.flush() } } @@ -294,43 +302,60 @@ where } } -/// A writer that may write to `inner_writer` with colors. +/// A writer which (optionally) strips out terminal control codes from the logs. /// -/// This is used by [`EventFormat`] to kill colors when `enable_color` is `false`. +/// This is used by [`EventFormat`] to sanitize the log messages. /// -/// It is required to call [`MaybeColorWriter::write`] after all writes are done, +/// It is required to call [`ControlCodeSanitizer::flush`] after all writes are done, /// because the content of these writes is buffered and will only be written to the /// `inner_writer` at that point. -struct MaybeColorWriter<'a> { - enable_color: bool, +struct ControlCodeSanitizer<'a> { + sanitize: bool, buffer: String, inner_writer: &'a mut dyn fmt::Write, } -impl<'a> fmt::Write for MaybeColorWriter<'a> { +impl<'a> fmt::Write for ControlCodeSanitizer<'a> { fn write_str(&mut self, buf: &str) -> fmt::Result { self.buffer.push_str(buf); Ok(()) } } -impl<'a> MaybeColorWriter<'a> { +// NOTE: When making any changes here make sure to also change this function in `sp-panic-handler`. +fn strip_control_codes(input: &str) -> std::borrow::Cow { + lazy_static::lazy_static! { + static ref RE: Regex = Regex::new(r#"(?x) + \x1b\[[^m]+m| # VT100 escape codes + [ + \x00-\x09\x0B-\x1F # ASCII control codes / Unicode C0 control codes, except \n + \x7F # ASCII delete + \u{80}-\u{9F} # Unicode C1 control codes + \u{202A}-\u{202E} # Unicode left-to-right / right-to-left control characters + \u{2066}-\u{2069} # Same as above + ] + "#).expect("regex parsing doesn't fail; qed"); + } + + RE.replace_all(input, "") +} + +impl<'a> ControlCodeSanitizer<'a> { /// Creates a new instance. - fn new(enable_color: bool, inner_writer: &'a mut dyn fmt::Write) -> Self { - Self { enable_color, inner_writer, buffer: String::new() } + fn new(sanitize: bool, inner_writer: &'a mut dyn fmt::Write) -> Self { + Self { sanitize, inner_writer, buffer: String::new() } } /// Write the buffered content to the `inner_writer`. - fn write(&mut self) -> fmt::Result { - lazy_static::lazy_static! { - static ref RE: Regex = Regex::new("\x1b\\[[^m]+m").expect("Error initializing color regex"); + fn flush(&mut self) -> fmt::Result { + if self.sanitize { + let replaced = strip_control_codes(&self.buffer); + self.inner_writer.write_str(&replaced)? + } else { + self.inner_writer.write_str(&self.buffer)? } - if !self.enable_color { - let replaced = RE.replace_all(&self.buffer, ""); - self.inner_writer.write_str(&replaced) - } else { - self.inner_writer.write_str(&self.buffer) - } + self.buffer.clear(); + Ok(()) } } diff --git a/substrate/client/tracing/src/logging/mod.rs b/substrate/client/tracing/src/logging/mod.rs index 7f995615a2..49807098d1 100644 --- a/substrate/client/tracing/src/logging/mod.rs +++ b/substrate/client/tracing/src/logging/mod.rs @@ -595,4 +595,36 @@ mod tests { assert!(stderr.contains(&line)); } } + + #[test] + fn control_characters_are_always_stripped_out_from_the_log_messages() { + const RAW_LINE: &str = "$$START$$\x1B[1;32mIn\u{202a}\u{202e}\u{2066}\u{2069}ner\n\r\x7ftext!\u{80}\u{9f}\x1B[0m$$END$$"; + const SANITIZED_LINE: &str = "$$START$$Inner\ntext!$$END$$"; + + let output = run_test_in_another_process( + "control_characters_are_always_stripped_out_from_the_log_messages", + || { + std::env::set_var("RUST_LOG", "trace"); + let mut builder = LoggerBuilder::new(""); + builder.with_colors(true); + builder.init().unwrap(); + log::error!("{}", RAW_LINE); + }, + ); + + if let Some(output) = output { + let stderr = String::from_utf8(output.stderr).unwrap(); + // The log messages should always be sanitized. + assert!(!stderr.contains(RAW_LINE)); + assert!(stderr.contains(SANITIZED_LINE)); + + // The part where the timestamp, the logging level, etc. is printed out doesn't + // always have to be sanitized unless it's necessary, and here it shouldn't be. + assert!(stderr.contains("\x1B[31mERROR\x1B[0m")); + + // Make sure the logs aren't being duplicated. + assert_eq!(stderr.find("ERROR"), stderr.rfind("ERROR")); + assert_eq!(stderr.find(SANITIZED_LINE), stderr.rfind(SANITIZED_LINE)); + } + } } diff --git a/substrate/primitives/panic-handler/Cargo.toml b/substrate/primitives/panic-handler/Cargo.toml index c961d5b089..890cc277bd 100644 --- a/substrate/primitives/panic-handler/Cargo.toml +++ b/substrate/primitives/panic-handler/Cargo.toml @@ -15,3 +15,5 @@ targets = ["x86_64-unknown-linux-gnu"] [dependencies] backtrace = "0.3.38" +regex = "1.5.4" +lazy_static = "1.4.0" diff --git a/substrate/primitives/panic-handler/src/lib.rs b/substrate/primitives/panic-handler/src/lib.rs index 75b057cebf..eddb31a7f2 100644 --- a/substrate/primitives/panic-handler/src/lib.rs +++ b/substrate/primitives/panic-handler/src/lib.rs @@ -25,6 +25,7 @@ //! temporarily be disabled by using an [`AbortGuard`]. use backtrace::Backtrace; +use regex::Regex; use std::{ cell::Cell, io::{self, Write}, @@ -125,6 +126,24 @@ impl Drop for AbortGuard { } } +// NOTE: When making any changes here make sure to also change this function in `sc-tracing`. +fn strip_control_codes(input: &str) -> std::borrow::Cow { + lazy_static::lazy_static! { + static ref RE: Regex = Regex::new(r#"(?x) + \x1b\[[^m]+m| # VT100 escape codes + [ + \x00-\x09\x0B-\x1F # ASCII control codes / Unicode C0 control codes, except \n + \x7F # ASCII delete + \u{80}-\u{9F} # Unicode C1 control codes + \u{202A}-\u{202E} # Unicode left-to-right / right-to-left control characters + \u{2066}-\u{2069} # Same as above + ] + "#).expect("regex parsing doesn't fail; qed"); + } + + RE.replace_all(input, "") +} + /// Function being called when a panic happens. fn panic_hook(info: &PanicInfo, report_url: &str, version: &str) { let location = info.location(); @@ -139,6 +158,8 @@ fn panic_hook(info: &PanicInfo, report_url: &str, version: &str) { }, }; + let msg = strip_control_codes(&msg); + let thread = thread::current(); let name = thread.name().unwrap_or(""); @@ -181,4 +202,44 @@ mod tests { let _guard = AbortGuard::force_abort(); std::panic::catch_unwind(|| panic!()).ok(); } + + fn run_test_in_another_process( + test_name: &str, + test_body: impl FnOnce(), + ) -> Option { + if std::env::var("RUN_FORKED_TEST").is_ok() { + test_body(); + None + } else { + let output = std::process::Command::new(std::env::current_exe().unwrap()) + .arg(test_name) + .env("RUN_FORKED_TEST", "1") + .output() + .unwrap(); + + assert!(output.status.success()); + Some(output) + } + } + + #[test] + fn control_characters_are_always_stripped_out_from_the_panic_messages() { + const RAW_LINE: &str = "$$START$$\x1B[1;32mIn\u{202a}\u{202e}\u{2066}\u{2069}ner\n\r\x7ftext!\u{80}\u{9f}\x1B[0m$$END$$"; + const SANITIZED_LINE: &str = "$$START$$Inner\ntext!$$END$$"; + + let output = run_test_in_another_process( + "control_characters_are_always_stripped_out_from_the_panic_messages", + || { + set("test", "1.2.3"); + let _guard = AbortGuard::force_unwind(); + let _ = std::panic::catch_unwind(|| panic!("{}", RAW_LINE)); + }, + ); + + if let Some(output) = output { + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!(!stderr.contains(RAW_LINE)); + assert!(stderr.contains(SANITIZED_LINE)); + } + } }