diff --git a/Cargo.lock b/Cargo.lock index b58a40e0..171b7d5d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,12 +96,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -582,17 +576,16 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "serde", "wasm-bindgen", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -2796,11 +2789,14 @@ dependencies = [ name = "rayhunter-check" version = "0.7.1" dependencies = [ + "chrono", "clap", "futures", "log", "pcap-file-tokio", "rayhunter", + "serde", + "serde_json", "simple_logger", "tokio", "walkdir", @@ -3162,18 +3158,28 @@ checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -3182,14 +3188,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.140" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "itoa", "memchr", "ryu", "serde", + "serde_core", ] [[package]] @@ -4048,7 +4055,7 @@ checksum = "46ec44dc15085cea82cf9c78f85a9114c463a369786585ad2882d1ff0b0acf40" dependencies = [ "windows-implement 0.60.0", "windows-interface 0.59.1", - "windows-link", + "windows-link 0.1.1", "windows-result 0.3.3", "windows-strings", ] @@ -4103,6 +4110,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-result" version = "0.1.2" @@ -4118,7 +4131,7 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b895b5356fc36103d0f64dd1e94dfa7ac5633f1c9dd6e80fe9ec4adef69e09d" dependencies = [ - "windows-link", + "windows-link 0.1.1", ] [[package]] @@ -4127,7 +4140,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a7ab927b2637c19b3dbe0965e75d8f2d30bdd697a1516191cad2ec4df8fb28a" dependencies = [ - "windows-link", + "windows-link 0.1.1", ] [[package]] diff --git a/check/Cargo.toml b/check/Cargo.toml index 54f916b2..17fb7d55 100644 --- a/check/Cargo.toml +++ b/check/Cargo.toml @@ -10,5 +10,12 @@ log = "0.4.20" tokio = { version = "1.44.2", default-features = false, features = ["fs", "signal", "process", "rt-multi-thread"] } pcap-file-tokio = "0.1.0" clap = { version = "4.5.2", features = ["derive"] } -simple_logger = "5.0.0" walkdir = "2.5.0" +serde_json = "1.0.145" +serde = "1.0.228" +chrono = "0.4.42" + +[dependencies.simple_logger] +version = "5.0.0" +# Log to stderr so that report output doesn't potentially collide with system logs +features = ["stderr"] diff --git a/check/src/main.rs b/check/src/main.rs index 47c8e131..175cca07 100644 --- a/check/src/main.rs +++ b/check/src/main.rs @@ -1,18 +1,40 @@ -use clap::Parser; +use clap::{Parser, ValueEnum}; use futures::TryStreamExt; use log::{debug, error, info, warn}; use pcap_file_tokio::pcapng::{Block, PcapNgReader}; use rayhunter::{ - analysis::analyzer::{AnalysisRow, AnalyzerConfig, EventType, Harness}, + analysis::analyzer::{ + AnalysisRow, AnalyzerConfig, DetectionRow, Event, EventType, Harness, ReportMetadata, + }, diag::DataType, gsmtap_parser, pcap::GsmtapPcapWriter, qmdl::QmdlReader, }; +use serde::Serialize; use std::{collections::HashMap, future, path::PathBuf, pin::pin}; -use tokio::fs::File; +use tokio::fs::{File, OpenOptions}; +use tokio::io::{AsyncWriteExt, BufWriter}; use walkdir::WalkDir; +#[derive(ValueEnum, Copy, Clone, Debug, Default)] +enum ReportFormat { + /// Log detections to stdout + #[default] + Log, + /// Write a newline-delimited JSON file for each report + Ndjson, +} + +impl std::fmt::Display for ReportFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ReportFormat::Log => write!(f, "log"), + ReportFormat::Ndjson => write!(f, "ndjson"), + } + } +} + #[derive(Parser, Debug)] #[command(version, about)] struct Args { @@ -22,6 +44,14 @@ struct Args { #[arg(short = 'P', long, help = "Convert qmdl files to pcap before analysis")] pcapify: bool, + #[arg( + short = 'r', + long, + help = "Generate a report for each capture analyzed", + default_value_t = ReportFormat::default() + )] + report: ReportFormat, + #[arg(long, help = "Show why some packets were skipped during analysis")] show_skipped: bool, @@ -32,43 +62,42 @@ struct Args { debug: bool, } -#[derive(Default)] -struct Report { +#[derive(Default, Debug, Clone, Serialize)] +struct Summary { skipped_reasons: HashMap, total_messages: u32, - warnings: u32, skipped: u32, +} + +#[derive(Default)] +struct LogReport { + show_skipped: bool, + warnings: u32, file_path: String, } -impl Report { - fn new(file_path: &str) -> Self { - Report { +impl LogReport { + fn new(file_path: &str, show_skipped: bool) -> Self { + LogReport { file_path: file_path.to_string(), + show_skipped, ..Default::default() } } - fn process_row(&mut self, row: AnalysisRow) { - self.total_messages += 1; - if let Some(reason) = row.skipped_message_reason { - *self.skipped_reasons.entry(reason).or_insert(0) += 1; - self.skipped += 1; - return; - } - for maybe_event in row.events { - let Some(event) = maybe_event else { continue }; - let Some(timestamp) = row.packet_timestamp else { - continue; - }; + fn process_row(&mut self, row: DetectionRow) { + for event in row.events { match event.event_type { EventType::Informational => { - info!("{}: INFO - {} {}", self.file_path, timestamp, event.message,); + info!( + "{}: INFO - {} {}", + self.file_path, row.packet_timestamp, event.message, + ); } EventType::Low | EventType::Medium | EventType::High => { warn!( "{}: WARNING (Severity: {:?}) - {} {}", - self.file_path, event.event_type, timestamp, event.message, + self.file_path, event.event_type, row.packet_timestamp, event.message, ); self.warnings += 1; } @@ -76,27 +105,147 @@ impl Report { } } - fn print_summary(&self, show_skipped: bool) { - if show_skipped && self.skipped > 0 { + fn finish(&self, summary: &Summary) { + if self.show_skipped && summary.skipped > 0 { info!("{}: messages skipped:", self.file_path); - for (reason, count) in self.skipped_reasons.iter() { + for (reason, count) in summary.skipped_reasons.iter() { info!(" - {count}: \"{reason}\""); } } info!( "{}: {} messages analyzed, {} warnings, {} messages skipped", - self.file_path, self.total_messages, self.warnings, self.skipped + self.file_path, summary.total_messages, self.warnings, summary.skipped ); } } -async fn analyze_pcap(pcap_path: &str, show_skipped: bool) { +struct NdjsonReport { + writer: BufWriter, +} + +// The `njson` report has the same output format as the daemon analysis report. +// See also: [Newline Delimited JSON](https://docs.mulesoft.com/dataweave/latest/dataweave-formats-ndjson) +impl NdjsonReport { + async fn new(file_path: &str, metadata: &ReportMetadata) -> std::io::Result { + let mut report_path = PathBuf::from(file_path); + report_path.set_extension("ndjson"); + let writer = OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(&report_path) + .await + .map(BufWriter::new)?; + + let mut r = NdjsonReport { writer }; + + // Analysis metadata is written to the first line of the ndjson report format + r.write(metadata).await?; + + Ok(r) + } + + async fn write(&mut self, value: &T) -> std::io::Result<()> { + let mut value_str = serde_json::to_string(value).unwrap(); + value_str.push('\n'); + self.writer.write_all(value_str.as_bytes()).await + } + + async fn process_row(&mut self, row: DetectionRow) { + self.write(&row).await.expect("failed to write ndjson row"); + } + + async fn finish(&mut self, _summary: &Summary) { + self.writer + .flush() + .await + .expect("failed to flush ndjson report"); + } +} + +enum ReportDest { + Log(LogReport), + Ndjson(NdjsonReport), +} + +struct Report { + show_skipped: bool, + summary: Summary, + dest: ReportDest, +} + +impl Report { + async fn build( + format: ReportFormat, + harness: &Harness, + show_skipped: bool, + path_str: &str, + ) -> Self { + let dest = match format { + ReportFormat::Log => { + let r = LogReport::new(path_str, show_skipped); + ReportDest::Log(r) + } + ReportFormat::Ndjson => { + let metadata = harness.get_metadata(); + let ndjson_report = NdjsonReport::new(path_str, &metadata) + .await + .expect("failed to create ndjson report"); + ReportDest::Ndjson(ndjson_report) + } + }; + + Report::new_with_dest(show_skipped, dest) + } + fn new_with_dest(show_skipped: bool, dest: ReportDest) -> Self { + Report { + show_skipped, + summary: Summary::default(), + dest, + } + } + + async fn process_row(&mut self, row: AnalysisRow) { + self.summary.total_messages += 1; + if let Some(ref reason) = row.skipped_message_reason { + *self + .summary + .skipped_reasons + .entry(reason.clone()) + .or_insert(0) += 1; + self.summary.skipped += 1; + + if !self.show_skipped { + return; + } + } + + let det = DetectionRow::try_from(row).ok(); + if let Some(detection) = det { + match &mut self.dest { + ReportDest::Log(r) => r.process_row(detection), + ReportDest::Ndjson(r) => r.process_row(detection).await, + } + } + } + + async fn finish(&mut self) { + match &mut self.dest { + ReportDest::Log(r) => r.finish(&self.summary), + ReportDest::Ndjson(r) => r.finish(&self.summary).await, + } + } +} + +async fn analyze_pcap(pcap_path: &str, args: &Args) { let mut harness = Harness::new_with_config(&AnalyzerConfig::default()); let pcap_file = &mut File::open(&pcap_path).await.expect("failed to open file"); let mut pcap_reader = PcapNgReader::new(pcap_file) .await .expect("failed to read PCAP file"); - let mut report = Report::new(pcap_path); + + let mut report = Report::build(args.report, &harness, args.show_skipped, pcap_path).await; + while let Some(Ok(block)) = pcap_reader.next_block().await { let row = match block { Block::EnhancedPacket(packet) => harness.analyze_pcap_packet(packet), @@ -105,12 +254,12 @@ async fn analyze_pcap(pcap_path: &str, show_skipped: bool) { continue; } }; - report.process_row(row); + report.process_row(row).await; } - report.print_summary(show_skipped); + report.finish().await; } -async fn analyze_qmdl(qmdl_path: &str, show_skipped: bool) { +async fn analyze_qmdl(qmdl_path: &str, args: &Args) { let mut harness = Harness::new_with_config(&AnalyzerConfig::default()); let qmdl_file = &mut File::open(&qmdl_path).await.expect("failed to open file"); let file_size = qmdl_file @@ -124,17 +273,17 @@ async fn analyze_qmdl(qmdl_path: &str, show_skipped: bool) { .as_stream() .try_filter(|container| future::ready(container.data_type == DataType::UserSpace)) ); - let mut report = Report::new(qmdl_path); + let mut report = Report::build(args.report, &harness, args.show_skipped, qmdl_path).await; while let Some(container) = qmdl_stream .try_next() .await .expect("failed getting QMDL container") { for row in harness.analyze_qmdl_messages(container) { - report.process_row(row); + report.process_row(row).await; } } - report.print_summary(show_skipped); + report.finish().await; } async fn pcapify(qmdl_path: &PathBuf) { @@ -206,16 +355,17 @@ async fn main() { let path_str = path.to_str().unwrap(); // instead of relying on the QMDL extension, can we check if a file is // QMDL by inspecting the contents? + if name_str.ends_with(".qmdl") { info!("**** Beginning analysis of {name_str}"); - analyze_qmdl(path_str, args.show_skipped).await; + analyze_qmdl(path_str, &args).await; if args.pcapify { pcapify(&path.to_path_buf()).await; } } else if name_str.ends_with(".pcap") || name_str.ends_with(".pcapng") { // TODO: if we've already analyzed a QMDL, skip its corresponding pcap info!("**** Beginning analysis of {name_str}"); - analyze_pcap(path_str, args.show_skipped).await; + analyze_pcap(path_str, &args).await; } } } diff --git a/lib/src/analysis/analyzer.rs b/lib/src/analysis/analyzer.rs index d2ca26fb..3ca89b04 100644 --- a/lib/src/analysis/analyzer.rs +++ b/lib/src/analysis/analyzer.rs @@ -298,6 +298,35 @@ impl<'de> Deserialize<'de> for AnalysisRow { } } +#[derive(Serialize, Clone)] +pub struct DetectionRow { + pub packet_timestamp: DateTime, + pub events: Vec, + pub skipped_message_reason: Option, +} + +impl TryFrom for DetectionRow { + type Error = &'static str; + + fn try_from(ar: AnalysisRow) -> Result { + let events: Vec = ar.events.into_iter().filter_map(|e| e).collect(); + + if events.is_empty() { + return Err("No detection events in analysis row"); + } + + let Some(timestamp) = ar.packet_timestamp else { + return Err("Missing packet timestamp"); + }; + + Ok(DetectionRow { + packet_timestamp: timestamp, + events, + skipped_message_reason: ar.skipped_message_reason, + }) + } +} + pub struct Harness { analyzers: Vec>, packet_num: usize,