Refactor arc parsing and add internal tests for it

master
Simon Bruder 2020-07-09 14:47:53 +02:00
parent 85650f474b
commit dad3a7ff13
No known key found for this signature in database
GPG Key ID: 6F03E0000CC5B62F
3 changed files with 187 additions and 64 deletions

View File

@ -1,5 +1,6 @@
use std::collections::HashMap;
use std::convert::TryInto;
use std::default::Default;
use std::io;
use std::io::prelude::*;
use std::io::Cursor;
@ -7,6 +8,7 @@ use std::num;
use std::path::PathBuf;
use byteorder::{ReadBytesExt, LE};
use derive_more::Deref;
use konami_lz77::decompress;
use log::{debug, info, trace, warn};
use thiserror::Error;
@ -29,38 +31,80 @@ pub enum Error {
MiniParserError(#[from] mini_parser::MiniParserError),
}
#[derive(Debug)]
type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Default, PartialEq)]
struct CueEntry {
name_offset: usize,
path_offset: usize,
data_offset: usize,
decompressed_size: usize,
compressed_size: usize,
}
impl CueEntry {
fn parse(data: &[u8]) -> Result<Self, Error> {
fn parse(data: &[u8]) -> Result<Self> {
let mut cursor = Cursor::new(data);
let name_offset = cursor.read_u32::<LE>()?.try_into()?;
let path_offset = cursor.read_u32::<LE>()?.try_into()?;
let data_offset = cursor.read_u32::<LE>()?.try_into()?;
let decompressed_size = cursor.read_u32::<LE>()?.try_into()?;
let compressed_size = cursor.read_u32::<LE>()?.try_into()?;
Ok(Self {
name_offset,
path_offset,
data_offset,
decompressed_size,
compressed_size,
})
}
fn parse_path(&self, data: &[u8]) -> Result<PathBuf> {
Ok(PathBuf::from(
String::from_utf8_lossy(
&mini_parser::get_slice_range(data, self.path_offset..data.len())?
.iter()
.take_while(|byte| **byte != 0)
.cloned()
.collect::<Vec<u8>>(),
)
.into_owned(),
))
}
}
pub struct ARC {
pub files: HashMap<PathBuf, Vec<u8>>,
#[derive(Debug, Deref, PartialEq)]
struct Cue(HashMap<PathBuf, CueEntry>);
impl Cue {
fn parse(data: &[u8], arc_data: &[u8]) -> Result<Self> {
let mut cue = HashMap::new();
for chunk in data.chunks(4 * 4) {
let entry = CueEntry::parse(chunk)?;
let path = entry.parse_path(arc_data)?;
trace!(
"Found cue entry with path {} at {} (size {})",
path.display(),
entry.data_offset,
entry.decompressed_size,
);
cue.insert(path, entry);
}
Ok(Self(cue))
}
}
impl ARC {
pub fn parse(data: &[u8]) -> Result<Self, Error> {
#[derive(Debug, PartialEq)]
pub struct ARC<'a> {
data: &'a [u8],
file_count: u32,
version: u32,
cue: Cue,
}
impl<'a> ARC<'a> {
pub fn parse(data: &'a [u8]) -> Result<Self> {
let mut cursor = Cursor::new(data);
let magic = cursor.read_u32::<LE>()?;
@ -82,62 +126,138 @@ impl ARC {
let _compression = cursor.read_u32::<LE>()?;
let mut cue = Vec::new();
cursor
.take((4 * 4 * file_count).into())
.read_to_end(&mut cue)?;
let cue: Vec<CueEntry> = cue
.chunks(4 * 4)
.map(CueEntry::parse)
.collect::<Result<_, _>>()?;
let mut cue_data = vec![0u8; (4 * 4 * file_count).try_into().unwrap()];
cursor.read_exact(&mut cue_data)?;
let cue = Cue::parse(&cue_data, &data)?;
let mut files = HashMap::new();
info!("ARC archive has {} files", cue.len());
for entry in cue {
let path = PathBuf::from(
String::from_utf8_lossy(
&mini_parser::get_slice_range(data, entry.name_offset..data.len())?
.iter()
.take_while(|byte| **byte != 0)
.cloned()
.collect::<Vec<u8>>(),
)
.into_owned(),
);
Ok(Self {
data,
file_count,
version,
cue,
})
}
trace!("Found entry with path {}", path.display());
pub fn has_file(&self, path: &PathBuf) -> bool {
self.cue.get(path).is_some()
}
let data = mini_parser::get_slice_range(
data,
entry.data_offset..entry.data_offset + entry.compressed_size,
)?;
pub fn file_paths(&self) -> Vec<&PathBuf> {
self.cue.keys().collect()
}
let data = if entry.compressed_size != entry.decompressed_size {
trace!("Decompressing file");
decompress(data)
} else {
trace!("File is not compressed");
data.to_vec()
};
/// Gets a single file from the archive.
///
/// Returns `Ok(None)` when the file does not exist and returns an error when the file could
/// not be read.
pub fn get_file(&self, path: &PathBuf) -> Result<Option<Vec<u8>>> {
let entry = match self.cue.get(path) {
Some(entry) => entry,
None => return Ok(None),
};
if data.len() != entry.decompressed_size {
return Err(Error::DecompressionSize {
expected: entry.decompressed_size,
found: data.len(),
});
}
let data = mini_parser::get_slice_range(
self.data,
entry.data_offset..entry.data_offset + entry.compressed_size,
)?;
debug!(
"Processed entry with path {} and length {}",
path.display(),
data.len()
);
let data = if entry.compressed_size != entry.decompressed_size {
trace!("Decompressing file");
decompress(data)
} else {
trace!("File is not compressed");
data.to_vec()
};
files.insert(path, data);
if data.len() != entry.decompressed_size {
return Err(Error::DecompressionSize {
expected: entry.decompressed_size,
found: data.len(),
});
}
info!("Processed {} files", files.len());
debug!(
"Got file with path {} and length {}",
path.display(),
data.len()
);
Ok(Self { files })
Ok(Some(data))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cue_entry_parse() {
assert_eq!(
CueEntry::parse(b"\xa0\x00\x00\x00\xc0\x01\x00\x00\x5c\x04\x00\x00\x2d\x02\x00\x00")
.unwrap(),
CueEntry {
path_offset: 160,
data_offset: 448,
decompressed_size: 1116,
compressed_size: 557,
}
);
}
#[quickcheck]
fn test_cue_entry_parse_size(data: Vec<u8>) -> bool {
let cue_entry = CueEntry::parse(&data);
if dbg!(data.len()) >= 16 {
cue_entry.is_ok()
} else {
cue_entry.is_err()
}
}
#[test]
fn test_cue_entry_parse_path() {
let cue_entry = CueEntry {
path_offset: 7,
..Default::default()
};
cue_entry.parse_path(b"").unwrap_err();
let path = cue_entry
.parse_path(b"1234567test/file/name\0after path")
.unwrap();
assert_eq!(path, PathBuf::from("test/file/name"));
}
#[test]
fn test_parse_cue() {
// only path_offset is required to have a useful value to test the cue
#[rustfmt::skip]
let cue = Cue::parse(&[
0x02, 0x00, 0x00, 0x00, // first file (path offset 2)
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x00, 0x00, // second file (path offset 15)
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
], b"abpath/to/file\0other/file\0z").unwrap();
let mut expected_cue = HashMap::new();
expected_cue.insert(
PathBuf::from("path/to/file"),
CueEntry {
path_offset: 2,
..Default::default()
},
);
expected_cue.insert(
PathBuf::from("other/file"),
CueEntry {
path_offset: 15,
..Default::default()
},
);
assert_eq!(cue, Cue(expected_cue));
}
}

View File

@ -78,8 +78,7 @@ impl MusicDB {
let arc = arc::ARC::parse(&data)?;
let musicdb_data = arc
.files
.get(&PathBuf::from("data/gamedata/musicdb.xml"))
.get_file(&PathBuf::from("data/gamedata/musicdb.xml"))?
.ok_or(Error::NotInArchive)?;
Self::parse(&String::from_utf8(musicdb_data.to_vec())?).map_err(|err| err.into())

View File

@ -306,19 +306,23 @@ fn main() -> Result<()> {
.with_context(|| format!("failed to read ARC file {}", &opts.file.display()))?;
let arc = ARC::parse(&arc_data).context("failed to parse ARC file")?;
let files = match opts.single_file {
Some(path) => match arc.files.get(&path) {
Some(_) => vec![path],
None => return Err(anyhow!("File “{}” not found in archive", path.display())),
},
None => arc.files.keys().cloned().collect(),
let files = match &opts.single_file {
Some(path) => {
if arc.has_file(&path) {
vec![path]
} else {
return Err(anyhow!("File “{}” not found in archive", path.display()));
}
}
None => arc.file_paths(),
};
for (path, data) in arc.files.iter() {
for path in arc.file_paths() {
if files.contains(&path) {
if opts.list_files {
println!("{}", path.display());
} else {
let data = arc.get_file(path)?.unwrap();
info!("Writing {}", path.display());
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;