Skip to content

Commit 16b81ff

Browse files
Merge pull request #210 from WithSecureLabs/feat/mft_resident_streams
feat: extract, decode and write MFT streams
2 parents 8799abb + 86daeb0 commit 16b81ff

File tree

7 files changed

+259
-22
lines changed

7 files changed

+259
-22
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ once_cell = "1.0"
3030
prettytable-rs = "0.10"
3131
quick-xml = { version = "0.37", features = ["serialize"] }
3232
rayon = "1.5"
33+
rand = "0.8"
3334
regex = "1.6"
3435
rustc-hash = "2.0"
3536
serde = { version = "1.0", features = ["derive"] }

src/file/mft.rs

Lines changed: 197 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,213 @@
1-
use std::path::Path;
2-
use std::{fs::File, io::BufReader};
1+
use std::io::{BufReader, Write};
2+
use std::path::{self, Path, PathBuf};
3+
use std::{fs::create_dir_all, fs::File, ops::RangeInclusive, str::FromStr};
34

4-
use mft::csv::FlatMftEntryWithName;
5-
use mft::MftParser;
6-
use serde_json::Value as Json;
5+
use anyhow::{anyhow, Error, Result};
6+
use mft::{
7+
attribute::MftAttributeType,
8+
csv::FlatMftEntryWithName,
9+
entry::{MftEntry, ZERO_HEADER},
10+
MftParser,
11+
};
12+
use serde::Serialize;
13+
use serde_json::{json, Value as Json};
714

815
pub type Mft = Json;
916

1017
pub struct Parser {
1118
pub inner: MftParser<BufReader<File>>,
19+
ranges: Option<Ranges>,
20+
pub data_streams_directory: Option<PathBuf>,
21+
pub decode_data_streams: bool,
22+
}
23+
24+
#[derive(Serialize)]
25+
struct DataStreams {
26+
stream_name: String,
27+
stream_number: usize,
28+
stream_data: String,
29+
}
30+
31+
struct Ranges(Vec<RangeInclusive<usize>>);
32+
33+
impl Ranges {
34+
pub fn chain(&self) -> impl Iterator<Item = usize> + '_ {
35+
self.0.iter().cloned().flatten()
36+
}
37+
}
38+
39+
impl FromStr for Ranges {
40+
type Err = Error;
41+
42+
fn from_str(s: &str) -> Result<Self> {
43+
let mut ranges = vec![];
44+
for x in s.split(',') {
45+
// range
46+
if x.contains('-') {
47+
let range: Vec<&str> = x.split('-').collect();
48+
if range.len() != 2 {
49+
return Err(anyhow!(
50+
"Failed to parse ranges: Range should contain exactly one `-`, found {}",
51+
x
52+
));
53+
}
54+
55+
ranges.push(range[0].parse()?..=range[1].parse()?);
56+
} else {
57+
let n = x.parse()?;
58+
ranges.push(n..=n);
59+
}
60+
}
61+
62+
Ok(Ranges(ranges))
63+
}
1264
}
1365

1466
impl Parser {
15-
pub fn load(file: &Path) -> crate::Result<Self> {
67+
pub fn load(
68+
file: &Path,
69+
data_streams_directory: Option<PathBuf>,
70+
decode_data_streams: bool,
71+
) -> crate::Result<Self> {
1672
let parser = MftParser::from_path(file)?;
17-
Ok(Self { inner: parser })
73+
Ok(Self {
74+
inner: parser,
75+
ranges: None,
76+
data_streams_directory,
77+
decode_data_streams,
78+
})
1879
}
1980

2081
pub fn parse(&mut self) -> impl Iterator<Item = crate::Result<Json>> + '_ {
21-
// FIXME: Due to the nested borrowing we still have to do a full pass which is memory
22-
// hungry but there is no easy way around this for now...
23-
let entries = self.inner.iter_entries().collect::<Vec<_>>();
24-
entries.into_iter().map(|e| match e {
25-
Ok(e) => serde_json::to_value(FlatMftEntryWithName::from_entry(&e, &mut self.inner))
26-
.map_err(|e| e.into()),
27-
Err(e) => anyhow::bail!(e),
82+
// Code is adapted MFT Library implementation of the mft_dump.rs file
83+
// Reference: https://github.com/omerbenamram/mft/blob/6767bb5d3787b5532a7a5a07532f0c6b4e22413d/src/bin/mft_dump.rs#L289
84+
85+
if let Some(data_streams_dir) = &self.data_streams_directory {
86+
if !data_streams_dir.exists() {
87+
create_dir_all(data_streams_dir).expect("Failed to create data streams directory");
88+
}
89+
}
90+
91+
let number_of_entries = self.inner.get_entry_count();
92+
93+
let take_ranges = self.ranges.take();
94+
95+
let entries = match take_ranges {
96+
Some(ref ranges) => Box::new(ranges.chain()),
97+
None => Box::new(0..number_of_entries as usize) as Box<dyn Iterator<Item = usize>>,
98+
};
99+
100+
let collected_entries: Vec<_> = entries
101+
.filter_map(|i| {
102+
let entry = self.inner.get_entry(i as u64);
103+
match entry {
104+
Ok(entry) => match &entry.header.signature {
105+
// Skip entries with zero headers
106+
ZERO_HEADER => None,
107+
_ => Some(entry),
108+
},
109+
Err(error) => {
110+
cs_eyellowln!("{}", error);
111+
None
112+
}
113+
}
114+
})
115+
.collect();
116+
117+
collected_entries.into_iter().map(|e| {
118+
// Get the MFT entry base details from the entry using FlatMftEntryWithName
119+
match serde_json::to_value(FlatMftEntryWithName::from_entry(&e, &mut self.inner)) {
120+
Ok(mut val) => {
121+
// Extract the DataStreams from the MFT entry
122+
val["DataStreams"] = extract_data_streams(self, &e)?;
123+
Ok(val)
124+
}
125+
Err(e) => Err(anyhow::Error::from(e)),
126+
}
127+
})
128+
}
129+
}
130+
131+
pub fn extract_data_streams(parser: &mut Parser, entry: &MftEntry) -> crate::Result<Json> {
132+
// This function is used to extract the data streams from the MFT entry.
133+
// It will attempt to write the data streams to the output path if provided.
134+
// It will attempt to decode the data streams if the decode_data_streams flag is set.
135+
136+
// Code is based on the MFT Library implementation of the mft_dump.rs file
137+
// Reference: https://github.com/omerbenamram/mft/blob/6767bb5d3787b5532a7a5a07532f0c6b4e22413d/src/bin/mft_dump.rs#L289
138+
139+
let mut data_streams = vec![];
140+
141+
for (i, (name, stream)) in entry
142+
.iter_attributes()
143+
.filter_map(|a| a.ok())
144+
.filter_map(|a| {
145+
if a.header.type_code == MftAttributeType::DATA {
146+
let name = a.header.name.clone();
147+
a.data.into_data().map(|data| (name, data))
148+
} else {
149+
None
150+
}
28151
})
152+
.enumerate()
153+
{
154+
if let Some(data_streams_dir) = &parser.data_streams_directory {
155+
if let Some(path) = parser.inner.get_full_path_for_entry(entry)? {
156+
// Replace file path seperators with underscores
157+
158+
let sanitized_path = path
159+
.to_string_lossy()
160+
.chars()
161+
.map(|c| if path::is_separator(c) { '_' } else { c })
162+
.collect::<String>();
163+
164+
let output_path: String = data_streams_dir
165+
.join(&sanitized_path)
166+
.to_string_lossy()
167+
.to_string();
168+
169+
// Generate 6 characters random hex string
170+
let random: String = (0..6)
171+
.map(|_| format!("{:02x}", rand::random::<u8>()))
172+
.fold(String::new(), |acc, hex| format!("{}{}", acc, hex));
173+
174+
let truncated: String = output_path.chars().take(150).collect();
175+
176+
if PathBuf::from(&output_path).exists() {
177+
return Err(anyhow!(
178+
"Data stream output path already exists: {}\n\
179+
Exiting out of precaution.",
180+
output_path
181+
));
182+
}
183+
184+
File::create(format!(
185+
"{path}__{random}_{stream_number}_{stream_name}.disabled",
186+
path = truncated,
187+
random = random,
188+
stream_number = i,
189+
stream_name = name
190+
))?
191+
.write_all(stream.data())?;
192+
}
193+
}
194+
195+
//convert stream.data() to a hex string
196+
let final_data_stream = if parser.decode_data_streams {
197+
String::from_utf8_lossy(stream.data()).to_string()
198+
} else {
199+
stream
200+
.data()
201+
.iter()
202+
.map(|byte| format!("{:02x}", byte))
203+
.fold(String::new(), |acc, hex| format!("{}{}", acc, hex))
204+
};
205+
206+
data_streams.push(DataStreams {
207+
stream_name: name,
208+
stream_number: i,
209+
stream_data: final_data_stream,
210+
});
29211
}
212+
Ok(json!(data_streams))
30213
}

src/file/mod.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,13 @@ pub struct Reader {
101101
}
102102

103103
impl Reader {
104-
pub fn load(file: &Path, load_unknown: bool, skip_errors: bool) -> crate::Result<Self> {
104+
pub fn load(
105+
file: &Path,
106+
load_unknown: bool,
107+
skip_errors: bool,
108+
decode_data_streams: bool,
109+
data_streams_directory: Option<PathBuf>,
110+
) -> crate::Result<Self> {
105111
// NOTE: We don't want to use libmagic because then we have to include databases etc... So
106112
// for now we assume that the file extensions are correct!
107113
match file.extension().and_then(|e| e.to_str()) {
@@ -173,7 +179,11 @@ impl Reader {
173179
})
174180
}
175181
"bin" | "mft" => {
176-
let parser = match MftParser::load(file) {
182+
let parser = match MftParser::load(
183+
file,
184+
data_streams_directory.clone(),
185+
decode_data_streams,
186+
) {
177187
Ok(parser) => parser,
178188
Err(e) => {
179189
if skip_errors {
@@ -266,7 +276,11 @@ impl Reader {
266276
return Ok(Self {
267277
parser: Parser::Evtx(parser),
268278
});
269-
} else if let Ok(parser) = MftParser::load(file) {
279+
} else if let Ok(parser) = MftParser::load(
280+
file,
281+
data_streams_directory.clone(),
282+
decode_data_streams,
283+
) {
270284
return Ok(Self {
271285
parser: Parser::Mft(parser),
272286
});
@@ -311,7 +325,9 @@ impl Reader {
311325
None => {
312326
// Edge cases
313327
if file.file_name().and_then(|e| e.to_str()) == Some("$MFT") {
314-
if let Ok(parser) = MftParser::load(file) {
328+
if let Ok(parser) =
329+
MftParser::load(file, data_streams_directory.clone(), decode_data_streams)
330+
{
315331
return Ok(Self {
316332
parser: Parser::Mft(parser),
317333
});
@@ -322,7 +338,9 @@ impl Reader {
322338
return Ok(Self {
323339
parser: Parser::Evtx(parser),
324340
});
325-
} else if let Ok(parser) = MftParser::load(file) {
341+
} else if let Ok(parser) =
342+
MftParser::load(file, data_streams_directory.clone(), decode_data_streams)
343+
{
326344
return Ok(Self {
327345
parser: Parser::Mft(parser),
328346
});

src/hunt.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,14 @@ impl Hunter {
774774
file: &'a Path,
775775
cache: &Option<std::fs::File>,
776776
) -> crate::Result<Vec<Detections<'a>>> {
777-
let mut reader = Reader::load(file, self.inner.load_unknown, self.inner.skip_errors)?;
777+
let mut reader = Reader::load(
778+
file,
779+
self.inner.load_unknown,
780+
self.inner.skip_errors,
781+
true,
782+
None,
783+
)?;
784+
778785
let kind = reader.kind();
779786
#[allow(clippy::type_complexity)]
780787
let aggregates: Mutex<

src/main.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,14 @@ enum Command {
8383
/// Continue to hunt when an error is encountered.
8484
#[arg(long = "skip-errors")]
8585
skip_errors: bool,
86+
87+
// MFT Specific Options
88+
/// Attempt to decode all extracted data streams from Hex to UTF-8
89+
#[arg(long = "decode-data-streams", help_heading = "MFT Specific Options")]
90+
decode_data_streams: bool,
91+
/// Extracted data streams will be decoded and written to this directory
92+
#[arg(long = "data-streams-directory", help_heading = "MFT Specific Options")]
93+
data_streams_directory: Option<PathBuf>,
8694
},
8795

8896
/// Hunt through artefacts using detection rules for threat detection.
@@ -408,6 +416,8 @@ fn run() -> Result<()> {
408416
output,
409417
quiet,
410418
skip_errors,
419+
decode_data_streams,
420+
data_streams_directory,
411421
} => {
412422
init_writer(output, false, json, quiet, args.verbose)?;
413423
if !args.no_banner {
@@ -449,7 +459,17 @@ fn run() -> Result<()> {
449459

450460
let mut first = true;
451461
for path in &files {
452-
let mut reader = Reader::load(path, load_unknown, skip_errors)?;
462+
let mut reader = Reader::load(
463+
path,
464+
load_unknown,
465+
skip_errors,
466+
decode_data_streams,
467+
data_streams_directory.clone(),
468+
)?;
469+
470+
// We try to keep the reader and parser as generic as possible.
471+
// However in some cases we need to pass artefact specific arguments to the parser.
472+
// If the argument is not relevant for the artefact, it is ignored.
453473
for result in reader.documents() {
454474
let document = match result {
455475
Ok(document) => document,
@@ -473,6 +493,7 @@ fn run() -> Result<()> {
473493
| Document::Mft(json)
474494
| Document::Esedb(json) => json,
475495
};
496+
476497
if json {
477498
if first {
478499
first = false;

src/search.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,13 @@ impl Searcher {
334334
}
335335

336336
pub fn search(&self, file: &Path) -> crate::Result<Hits<'_>> {
337-
let reader = Reader::load(file, self.inner.load_unknown, self.inner.skip_errors)?;
337+
let reader = Reader::load(
338+
file,
339+
self.inner.load_unknown,
340+
self.inner.skip_errors,
341+
true,
342+
None,
343+
)?;
338344
Ok(Hits {
339345
reader,
340346
searcher: &self.inner,

0 commit comments

Comments
 (0)