|
1 | | -use std::path::Path; |
2 | | -use std::{fs::File, io::BufReader}; |
| 1 | +use std::io::{BufReader, Write}; |
| 2 | +use std::path::{self, Path, PathBuf}; |
| 3 | +use std::{fs::create_dir_all, fs::File, ops::RangeInclusive, str::FromStr}; |
3 | 4 |
|
4 | | -use mft::csv::FlatMftEntryWithName; |
5 | | -use mft::MftParser; |
6 | | -use serde_json::Value as Json; |
| 5 | +use anyhow::{anyhow, Error, Result}; |
| 6 | +use mft::{ |
| 7 | + attribute::MftAttributeType, |
| 8 | + csv::FlatMftEntryWithName, |
| 9 | + entry::{MftEntry, ZERO_HEADER}, |
| 10 | + MftParser, |
| 11 | +}; |
| 12 | +use serde::Serialize; |
| 13 | +use serde_json::{json, Value as Json}; |
7 | 14 |
|
8 | 15 | pub type Mft = Json; |
9 | 16 |
|
10 | 17 | pub struct Parser { |
11 | 18 | pub inner: MftParser<BufReader<File>>, |
| 19 | + ranges: Option<Ranges>, |
| 20 | + pub data_streams_directory: Option<PathBuf>, |
| 21 | + pub decode_data_streams: bool, |
| 22 | +} |
| 23 | + |
| 24 | +#[derive(Serialize)] |
| 25 | +struct DataStreams { |
| 26 | + stream_name: String, |
| 27 | + stream_number: usize, |
| 28 | + stream_data: String, |
| 29 | +} |
| 30 | + |
| 31 | +struct Ranges(Vec<RangeInclusive<usize>>); |
| 32 | + |
| 33 | +impl Ranges { |
| 34 | + pub fn chain(&self) -> impl Iterator<Item = usize> + '_ { |
| 35 | + self.0.iter().cloned().flatten() |
| 36 | + } |
| 37 | +} |
| 38 | + |
| 39 | +impl FromStr for Ranges { |
| 40 | + type Err = Error; |
| 41 | + |
| 42 | + fn from_str(s: &str) -> Result<Self> { |
| 43 | + let mut ranges = vec![]; |
| 44 | + for x in s.split(',') { |
| 45 | + // range |
| 46 | + if x.contains('-') { |
| 47 | + let range: Vec<&str> = x.split('-').collect(); |
| 48 | + if range.len() != 2 { |
| 49 | + return Err(anyhow!( |
| 50 | + "Failed to parse ranges: Range should contain exactly one `-`, found {}", |
| 51 | + x |
| 52 | + )); |
| 53 | + } |
| 54 | + |
| 55 | + ranges.push(range[0].parse()?..=range[1].parse()?); |
| 56 | + } else { |
| 57 | + let n = x.parse()?; |
| 58 | + ranges.push(n..=n); |
| 59 | + } |
| 60 | + } |
| 61 | + |
| 62 | + Ok(Ranges(ranges)) |
| 63 | + } |
12 | 64 | } |
13 | 65 |
|
14 | 66 | impl Parser { |
15 | | - pub fn load(file: &Path) -> crate::Result<Self> { |
| 67 | + pub fn load( |
| 68 | + file: &Path, |
| 69 | + data_streams_directory: Option<PathBuf>, |
| 70 | + decode_data_streams: bool, |
| 71 | + ) -> crate::Result<Self> { |
16 | 72 | let parser = MftParser::from_path(file)?; |
17 | | - Ok(Self { inner: parser }) |
| 73 | + Ok(Self { |
| 74 | + inner: parser, |
| 75 | + ranges: None, |
| 76 | + data_streams_directory, |
| 77 | + decode_data_streams, |
| 78 | + }) |
18 | 79 | } |
19 | 80 |
|
20 | 81 | pub fn parse(&mut self) -> impl Iterator<Item = crate::Result<Json>> + '_ { |
21 | | - // FIXME: Due to the nested borrowing we still have to do a full pass which is memory |
22 | | - // hungry but there is no easy way around this for now... |
23 | | - let entries = self.inner.iter_entries().collect::<Vec<_>>(); |
24 | | - entries.into_iter().map(|e| match e { |
25 | | - Ok(e) => serde_json::to_value(FlatMftEntryWithName::from_entry(&e, &mut self.inner)) |
26 | | - .map_err(|e| e.into()), |
27 | | - Err(e) => anyhow::bail!(e), |
| 82 | + // Code is adapted MFT Library implementation of the mft_dump.rs file |
| 83 | + // Reference: https://github.com/omerbenamram/mft/blob/6767bb5d3787b5532a7a5a07532f0c6b4e22413d/src/bin/mft_dump.rs#L289 |
| 84 | + |
| 85 | + if let Some(data_streams_dir) = &self.data_streams_directory { |
| 86 | + if !data_streams_dir.exists() { |
| 87 | + create_dir_all(data_streams_dir).expect("Failed to create data streams directory"); |
| 88 | + } |
| 89 | + } |
| 90 | + |
| 91 | + let number_of_entries = self.inner.get_entry_count(); |
| 92 | + |
| 93 | + let take_ranges = self.ranges.take(); |
| 94 | + |
| 95 | + let entries = match take_ranges { |
| 96 | + Some(ref ranges) => Box::new(ranges.chain()), |
| 97 | + None => Box::new(0..number_of_entries as usize) as Box<dyn Iterator<Item = usize>>, |
| 98 | + }; |
| 99 | + |
| 100 | + let collected_entries: Vec<_> = entries |
| 101 | + .filter_map(|i| { |
| 102 | + let entry = self.inner.get_entry(i as u64); |
| 103 | + match entry { |
| 104 | + Ok(entry) => match &entry.header.signature { |
| 105 | + // Skip entries with zero headers |
| 106 | + ZERO_HEADER => None, |
| 107 | + _ => Some(entry), |
| 108 | + }, |
| 109 | + Err(error) => { |
| 110 | + cs_eyellowln!("{}", error); |
| 111 | + None |
| 112 | + } |
| 113 | + } |
| 114 | + }) |
| 115 | + .collect(); |
| 116 | + |
| 117 | + collected_entries.into_iter().map(|e| { |
| 118 | + // Get the MFT entry base details from the entry using FlatMftEntryWithName |
| 119 | + match serde_json::to_value(FlatMftEntryWithName::from_entry(&e, &mut self.inner)) { |
| 120 | + Ok(mut val) => { |
| 121 | + // Extract the DataStreams from the MFT entry |
| 122 | + val["DataStreams"] = extract_data_streams(self, &e)?; |
| 123 | + Ok(val) |
| 124 | + } |
| 125 | + Err(e) => Err(anyhow::Error::from(e)), |
| 126 | + } |
| 127 | + }) |
| 128 | + } |
| 129 | +} |
| 130 | + |
| 131 | +pub fn extract_data_streams(parser: &mut Parser, entry: &MftEntry) -> crate::Result<Json> { |
| 132 | + // This function is used to extract the data streams from the MFT entry. |
| 133 | + // It will attempt to write the data streams to the output path if provided. |
| 134 | + // It will attempt to decode the data streams if the decode_data_streams flag is set. |
| 135 | + |
| 136 | + // Code is based on the MFT Library implementation of the mft_dump.rs file |
| 137 | + // Reference: https://github.com/omerbenamram/mft/blob/6767bb5d3787b5532a7a5a07532f0c6b4e22413d/src/bin/mft_dump.rs#L289 |
| 138 | + |
| 139 | + let mut data_streams = vec![]; |
| 140 | + |
| 141 | + for (i, (name, stream)) in entry |
| 142 | + .iter_attributes() |
| 143 | + .filter_map(|a| a.ok()) |
| 144 | + .filter_map(|a| { |
| 145 | + if a.header.type_code == MftAttributeType::DATA { |
| 146 | + let name = a.header.name.clone(); |
| 147 | + a.data.into_data().map(|data| (name, data)) |
| 148 | + } else { |
| 149 | + None |
| 150 | + } |
28 | 151 | }) |
| 152 | + .enumerate() |
| 153 | + { |
| 154 | + if let Some(data_streams_dir) = &parser.data_streams_directory { |
| 155 | + if let Some(path) = parser.inner.get_full_path_for_entry(entry)? { |
| 156 | + // Replace file path seperators with underscores |
| 157 | + |
| 158 | + let sanitized_path = path |
| 159 | + .to_string_lossy() |
| 160 | + .chars() |
| 161 | + .map(|c| if path::is_separator(c) { '_' } else { c }) |
| 162 | + .collect::<String>(); |
| 163 | + |
| 164 | + let output_path: String = data_streams_dir |
| 165 | + .join(&sanitized_path) |
| 166 | + .to_string_lossy() |
| 167 | + .to_string(); |
| 168 | + |
| 169 | + // Generate 6 characters random hex string |
| 170 | + let random: String = (0..6) |
| 171 | + .map(|_| format!("{:02x}", rand::random::<u8>())) |
| 172 | + .fold(String::new(), |acc, hex| format!("{}{}", acc, hex)); |
| 173 | + |
| 174 | + let truncated: String = output_path.chars().take(150).collect(); |
| 175 | + |
| 176 | + if PathBuf::from(&output_path).exists() { |
| 177 | + return Err(anyhow!( |
| 178 | + "Data stream output path already exists: {}\n\ |
| 179 | + Exiting out of precaution.", |
| 180 | + output_path |
| 181 | + )); |
| 182 | + } |
| 183 | + |
| 184 | + File::create(format!( |
| 185 | + "{path}__{random}_{stream_number}_{stream_name}.disabled", |
| 186 | + path = truncated, |
| 187 | + random = random, |
| 188 | + stream_number = i, |
| 189 | + stream_name = name |
| 190 | + ))? |
| 191 | + .write_all(stream.data())?; |
| 192 | + } |
| 193 | + } |
| 194 | + |
| 195 | + //convert stream.data() to a hex string |
| 196 | + let final_data_stream = if parser.decode_data_streams { |
| 197 | + String::from_utf8_lossy(stream.data()).to_string() |
| 198 | + } else { |
| 199 | + stream |
| 200 | + .data() |
| 201 | + .iter() |
| 202 | + .map(|byte| format!("{:02x}", byte)) |
| 203 | + .fold(String::new(), |acc, hex| format!("{}{}", acc, hex)) |
| 204 | + }; |
| 205 | + |
| 206 | + data_streams.push(DataStreams { |
| 207 | + stream_name: name, |
| 208 | + stream_number: i, |
| 209 | + stream_data: final_data_stream, |
| 210 | + }); |
29 | 211 | } |
| 212 | + Ok(json!(data_streams)) |
30 | 213 | } |
0 commit comments