Refactor arc parsing and add internal tests for it

master
Simon Bruder 2020-07-09 14:47:53 +02:00
parent 85650f474b
commit dad3a7ff13
No known key found for this signature in database
GPG Key ID: 6F03E0000CC5B62F
3 changed files with 187 additions and 64 deletions

View File

@ -1,5 +1,6 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::default::Default;
use std::io; use std::io;
use std::io::prelude::*; use std::io::prelude::*;
use std::io::Cursor; use std::io::Cursor;
@ -7,6 +8,7 @@ use std::num;
use std::path::PathBuf; use std::path::PathBuf;
use byteorder::{ReadBytesExt, LE}; use byteorder::{ReadBytesExt, LE};
use derive_more::Deref;
use konami_lz77::decompress; use konami_lz77::decompress;
use log::{debug, info, trace, warn}; use log::{debug, info, trace, warn};
use thiserror::Error; use thiserror::Error;
@ -29,38 +31,80 @@ pub enum Error {
MiniParserError(#[from] mini_parser::MiniParserError), MiniParserError(#[from] mini_parser::MiniParserError),
} }
#[derive(Debug)] type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Default, PartialEq)]
struct CueEntry { struct CueEntry {
name_offset: usize, path_offset: usize,
data_offset: usize, data_offset: usize,
decompressed_size: usize, decompressed_size: usize,
compressed_size: usize, compressed_size: usize,
} }
impl CueEntry { impl CueEntry {
fn parse(data: &[u8]) -> Result<Self, Error> { fn parse(data: &[u8]) -> Result<Self> {
let mut cursor = Cursor::new(data); let mut cursor = Cursor::new(data);
let name_offset = cursor.read_u32::<LE>()?.try_into()?; let path_offset = cursor.read_u32::<LE>()?.try_into()?;
let data_offset = cursor.read_u32::<LE>()?.try_into()?; let data_offset = cursor.read_u32::<LE>()?.try_into()?;
let decompressed_size = cursor.read_u32::<LE>()?.try_into()?; let decompressed_size = cursor.read_u32::<LE>()?.try_into()?;
let compressed_size = cursor.read_u32::<LE>()?.try_into()?; let compressed_size = cursor.read_u32::<LE>()?.try_into()?;
Ok(Self { Ok(Self {
name_offset, path_offset,
data_offset, data_offset,
decompressed_size, decompressed_size,
compressed_size, compressed_size,
}) })
} }
fn parse_path(&self, data: &[u8]) -> Result<PathBuf> {
Ok(PathBuf::from(
String::from_utf8_lossy(
&mini_parser::get_slice_range(data, self.path_offset..data.len())?
.iter()
.take_while(|byte| **byte != 0)
.cloned()
.collect::<Vec<u8>>(),
)
.into_owned(),
))
}
} }
pub struct ARC { #[derive(Debug, Deref, PartialEq)]
pub files: HashMap<PathBuf, Vec<u8>>, struct Cue(HashMap<PathBuf, CueEntry>);
impl Cue {
fn parse(data: &[u8], arc_data: &[u8]) -> Result<Self> {
let mut cue = HashMap::new();
for chunk in data.chunks(4 * 4) {
let entry = CueEntry::parse(chunk)?;
let path = entry.parse_path(arc_data)?;
trace!(
"Found cue entry with path {} at {} (size {})",
path.display(),
entry.data_offset,
entry.decompressed_size,
);
cue.insert(path, entry);
}
Ok(Self(cue))
}
} }
impl ARC { #[derive(Debug, PartialEq)]
pub fn parse(data: &[u8]) -> Result<Self, Error> { pub struct ARC<'a> {
data: &'a [u8],
file_count: u32,
version: u32,
cue: Cue,
}
impl<'a> ARC<'a> {
pub fn parse(data: &'a [u8]) -> Result<Self> {
let mut cursor = Cursor::new(data); let mut cursor = Cursor::new(data);
let magic = cursor.read_u32::<LE>()?; let magic = cursor.read_u32::<LE>()?;
@ -82,62 +126,138 @@ impl ARC {
let _compression = cursor.read_u32::<LE>()?; let _compression = cursor.read_u32::<LE>()?;
let mut cue = Vec::new(); let mut cue_data = vec![0u8; (4 * 4 * file_count).try_into().unwrap()];
cursor cursor.read_exact(&mut cue_data)?;
.take((4 * 4 * file_count).into()) let cue = Cue::parse(&cue_data, &data)?;
.read_to_end(&mut cue)?;
let cue: Vec<CueEntry> = cue
.chunks(4 * 4)
.map(CueEntry::parse)
.collect::<Result<_, _>>()?;
let mut files = HashMap::new(); info!("ARC archive has {} files", cue.len());
for entry in cue { Ok(Self {
let path = PathBuf::from( data,
String::from_utf8_lossy( file_count,
&mini_parser::get_slice_range(data, entry.name_offset..data.len())? version,
.iter() cue,
.take_while(|byte| **byte != 0) })
.cloned() }
.collect::<Vec<u8>>(),
)
.into_owned(),
);
trace!("Found entry with path {}", path.display()); pub fn has_file(&self, path: &PathBuf) -> bool {
self.cue.get(path).is_some()
}
let data = mini_parser::get_slice_range( pub fn file_paths(&self) -> Vec<&PathBuf> {
data, self.cue.keys().collect()
entry.data_offset..entry.data_offset + entry.compressed_size, }
)?;
let data = if entry.compressed_size != entry.decompressed_size { /// Gets a single file from the archive.
trace!("Decompressing file"); ///
decompress(data) /// Returns `Ok(None)` when the file does not exist and returns an error when the file could
} else { /// not be read.
trace!("File is not compressed"); pub fn get_file(&self, path: &PathBuf) -> Result<Option<Vec<u8>>> {
data.to_vec() let entry = match self.cue.get(path) {
}; Some(entry) => entry,
None => return Ok(None),
};
if data.len() != entry.decompressed_size { let data = mini_parser::get_slice_range(
return Err(Error::DecompressionSize { self.data,
expected: entry.decompressed_size, entry.data_offset..entry.data_offset + entry.compressed_size,
found: data.len(), )?;
});
}
debug!( let data = if entry.compressed_size != entry.decompressed_size {
"Processed entry with path {} and length {}", trace!("Decompressing file");
path.display(), decompress(data)
data.len() } else {
); trace!("File is not compressed");
data.to_vec()
};
files.insert(path, data); if data.len() != entry.decompressed_size {
return Err(Error::DecompressionSize {
expected: entry.decompressed_size,
found: data.len(),
});
} }
info!("Processed {} files", files.len()); debug!(
"Got file with path {} and length {}",
path.display(),
data.len()
);
Ok(Self { files }) Ok(Some(data))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cue_entry_parse() {
assert_eq!(
CueEntry::parse(b"\xa0\x00\x00\x00\xc0\x01\x00\x00\x5c\x04\x00\x00\x2d\x02\x00\x00")
.unwrap(),
CueEntry {
path_offset: 160,
data_offset: 448,
decompressed_size: 1116,
compressed_size: 557,
}
);
}
#[quickcheck]
fn test_cue_entry_parse_size(data: Vec<u8>) -> bool {
let cue_entry = CueEntry::parse(&data);
if dbg!(data.len()) >= 16 {
cue_entry.is_ok()
} else {
cue_entry.is_err()
}
}
#[test]
fn test_cue_entry_parse_path() {
let cue_entry = CueEntry {
path_offset: 7,
..Default::default()
};
cue_entry.parse_path(b"").unwrap_err();
let path = cue_entry
.parse_path(b"1234567test/file/name\0after path")
.unwrap();
assert_eq!(path, PathBuf::from("test/file/name"));
}
#[test]
fn test_parse_cue() {
// only path_offset is required to have a useful value to test the cue
#[rustfmt::skip]
let cue = Cue::parse(&[
0x02, 0x00, 0x00, 0x00, // first file (path offset 2)
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x00, 0x00, // second file (path offset 15)
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
], b"abpath/to/file\0other/file\0z").unwrap();
let mut expected_cue = HashMap::new();
expected_cue.insert(
PathBuf::from("path/to/file"),
CueEntry {
path_offset: 2,
..Default::default()
},
);
expected_cue.insert(
PathBuf::from("other/file"),
CueEntry {
path_offset: 15,
..Default::default()
},
);
assert_eq!(cue, Cue(expected_cue));
} }
} }

View File

@ -78,8 +78,7 @@ impl MusicDB {
let arc = arc::ARC::parse(&data)?; let arc = arc::ARC::parse(&data)?;
let musicdb_data = arc let musicdb_data = arc
.files .get_file(&PathBuf::from("data/gamedata/musicdb.xml"))?
.get(&PathBuf::from("data/gamedata/musicdb.xml"))
.ok_or(Error::NotInArchive)?; .ok_or(Error::NotInArchive)?;
Self::parse(&String::from_utf8(musicdb_data.to_vec())?).map_err(|err| err.into()) Self::parse(&String::from_utf8(musicdb_data.to_vec())?).map_err(|err| err.into())

View File

@ -306,19 +306,23 @@ fn main() -> Result<()> {
.with_context(|| format!("failed to read ARC file {}", &opts.file.display()))?; .with_context(|| format!("failed to read ARC file {}", &opts.file.display()))?;
let arc = ARC::parse(&arc_data).context("failed to parse ARC file")?; let arc = ARC::parse(&arc_data).context("failed to parse ARC file")?;
let files = match opts.single_file { let files = match &opts.single_file {
Some(path) => match arc.files.get(&path) { Some(path) => {
Some(_) => vec![path], if arc.has_file(&path) {
None => return Err(anyhow!("File “{}” not found in archive", path.display())), vec![path]
}, } else {
None => arc.files.keys().cloned().collect(), return Err(anyhow!("File “{}” not found in archive", path.display()));
}
}
None => arc.file_paths(),
}; };
for (path, data) in arc.files.iter() { for path in arc.file_paths() {
if files.contains(&path) { if files.contains(&path) {
if opts.list_files { if opts.list_files {
println!("{}", path.display()); println!("{}", path.display());
} else { } else {
let data = arc.get_file(path)?.unwrap();
info!("Writing {}", path.display()); info!("Writing {}", path.display());
if let Some(parent) = path.parent() { if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?; fs::create_dir_all(parent)?;