commit d0d4ccbd55c3ecbe34ac32a39957194bd882ce4b Author: Ethan Simmons Date: Wed Apr 17 02:01:14 2024 -0500 Initial Commit Will write more useful commit messages in the, the start of this project was extremely rushed. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..047c59c --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,39 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anyhow" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "vim_undo_extractor" +version = "0.1.0" +dependencies = [ + "anyhow", + "nom", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..45feca0 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "vim_undo_extractor" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.82" +nom = "7.1.3" diff --git a/layout.txt b/layout.txt new file mode 100644 index 0000000..eb1eb21 --- /dev/null +++ b/layout.txt @@ -0,0 +1,202 @@ +START_MAGIC: 56 69 6D 9F 55 6E 44 6F E5 "Vim.UnDoF" + +Version: 00 03 + +SHA-256: A5 B4 88 3B 3A AB FB 50 30 CF 8E 57 58 30 AE 44 38 25 A8 A3 DF B2 BD DF D6 FB E6 F2 EB E8 14 45 + +Line Count: 00 00 00 7C + +Line Length : 00 00 00 16 +Line: 20 20 20 20 73 74 64 3A 3A 73 74 72 69 6E 67 20 69 6E 70 75 74 3B " std::string input;" +Line Number: 00 00 00 2B +Column Number: 00 00 00 15 + +Old Head Sequence or 0 if null: 00 00 00 01 +New Head Sequence or 0 if null: 00 00 00 83 +Current Head Sequence or 0 if null: 00 00 00 83 + +Numhead: 00 00 00 83 +Sequence Last: 00 00 00 83 +Sequence Current: 00 00 00 82 + + +Time: 00 00 00 00 66 10 CE 39 + +Optional fields + 04 + 01 + 00 00 00 40 + +End Marker: 00 + +Header Magic: 5F D0 + +Next Pointer: 00 00 00 00 +Previous Pointer: 00 00 00 02 +Alt Next Pointer: 00 00 00 00 +Alt Previous Pointer: 00 00 00 00 + +Sequence: 00 00 00 01 + +Position +Line Number: 00 00 00 54 +Column: 00 00 00 00 +Coladd: 00 00 00 00 + +Cursor VCol: FF FF FF FF +Flags: 00 01 +00 00 + +Marks +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + +00 00 00 00 +00 00 00 00 +00 00 00 00 + + +Visual Info: + Start Position: + 00 00 00 00 + 00 00 00 00 + 00 00 00 00 + + End Position: + 00 00 00 00 + 00 00 00 00 + 00 00 00 00 + + Vi Mode: 00 00 00 00 + Vi_curswant: 00 00 00 00 + +Time: 00 00 00 00 66 10 BC DC + +Optional Fields: + 04 + 01 + 00 00 00 00 + +End Marker: 00 + +Entry Magic: F5 18 + +Entry Type: 00 00 00 00 +Entry Data: 54 00 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 + +Entry Magic: F5 18 + +Entry Type: 00 00 00 00 +Entry Data: 55 00 00 00 04 00 00 00 00 00 00 00 05 +00 00 00 00 00 00 00 05 00 00 00 71 06 00 00 00 +00 00 00 05 00 00 00 00 00 00 00 05 00 00 00 00 +00 00 00 + +Entry End Magic: 35 81 + +Header Magic: 5F D0 +00 00 00 01 00 00 00 03 00 + + + + + + + diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..0a3fa64 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,28 @@ +use anyhow::Result; + + +mod undo; +mod parse; + +use undo::UndoFile; + + +fn main() -> Result<()> { + + let undo_file = UndoFile::from_path("./undo_file")?; + + for header in undo_file.headers { + for entry in header.entries { + let section: String = entry.section + .iter() + .map(|b| char::from_u32(*b as u32).unwrap_or(' ')) + .collect(); + + println!("{}", section); + + } + } + + Ok(()) +} + diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..e6be60e --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,42 @@ +use nom::{ + bytes::complete::take, + combinator::map_res, + IResult, + error::Error, +}; + +pub(crate) mod start_header; +pub(crate) mod header; +pub(crate) mod entry; + +pub fn bytes_u32(input: &[u8]) -> IResult<&[u8], u32> { + map_res( + take(4usize), + |b: &[u8]| Ok::>(u32::from_be_bytes(b.try_into().unwrap())) + )(input) +} + +fn time(input: &[u8]) -> IResult<&[u8], u64> { + map_res( + take(8usize), + |b: &[u8]| Ok::>(u64::from_be_bytes(b.try_into().unwrap())) + )(input) +} + +#[derive(Debug)] +struct OptionalFields((u8, u8, u32)); + +fn optional_fields(input: &[u8]) -> IResult<&[u8], OptionalFields> { + map_res( + take(6usize), + |b: &[u8]| { + Ok::>( + OptionalFields (( + b[0], + b[1], + u32::from_be_bytes(b[2..6].try_into().unwrap()) + )) + ) + } + )(input) +} diff --git a/src/parse/entry.rs b/src/parse/entry.rs new file mode 100644 index 0000000..145d19e --- /dev/null +++ b/src/parse/entry.rs @@ -0,0 +1,97 @@ +use nom::{ + IResult, + Parser, + Or, + error::Error, + combinator::map_res, + sequence::{tuple, pair}, + multi::many0, + bytes::complete::{tag, take, take_until}, +}; + +use super::bytes_u32; + +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct Entry { + pub(crate) entry_type: EntryType, + pub(crate) section: Vec, +} + +#[derive(Debug, PartialEq, Eq)] +enum EntryType { + Unknown(u32), +} + +fn magic(input: &[u8]) -> IResult<&[u8], &[u8]> { + tag(b"\xf5\x18")(input) +} + +fn entry(input: &[u8]) -> IResult<&[u8], Entry> { + let (input, section_type) = take(4usize)(input)?; + let (input, section) = take_until(b"\xf5\x18".as_ref())(input)?; + let (input, _) = tag(b"\xf5\x18")(input)?; + + let entry_type = match section_type { + num => EntryType::Unknown(u32::from_be_bytes(num.try_into().unwrap())), + }; + + println!("{:#?}", section); + + Ok(( + input, + Entry { + entry_type, + section: section.to_vec(), + } + )) + +} + +fn sections(input: &[u8]) -> IResult<&[u8], Vec> { + let (input, sections) = take_until(b"\x35\x81".as_ref())(input)?; + let (last_entry, mut entries) = many0(entry)(sections)?; + entries.push( + Entry { + entry_type: match last_entry[0..4] { + _ => EntryType::Unknown(u32::from_be_bytes(last_entry[0..4].try_into().unwrap())), + }, + section: last_entry[4..].to_vec(), + }); + Ok((input, entries)) +} + +pub(crate) fn parse(input: &[u8]) -> IResult<&[u8], Vec> { + let (input, ( + _, + entries, + _, + )) = tuple(( + magic, + sections, + tag(b"\x35\x81".as_ref()) + ))(input).unwrap(); + + Ok(( + input, + entries, + )) + +} + +#[cfg(test)] +mod tests { + + use super::{sections, Entry, EntryType}; + + #[test] + fn test_sections() { + let test_str = b"\x00\x00\x00\x00\xaa\xaa\xf5\x18\x00\x00\x00\x00\xaa\xaa\xf5\x18\x00\x00\x00\x00\xaa\xaa\x35\x81"; + + assert_eq!(sections(test_str).unwrap().1, vec![ + Entry { + entry_type: EntryType::Unknown, + section: vec![b'\xaa', b'\xaa'], + }, + ]) + } +} diff --git a/src/parse/header.rs b/src/parse/header.rs new file mode 100644 index 0000000..bf591b2 --- /dev/null +++ b/src/parse/header.rs @@ -0,0 +1,175 @@ +use nom::{ + IResult, + error::Error, + combinator::map_res, + sequence::tuple, + bytes::complete::{tag, take}, +}; + +use super::{bytes_u32, time, optional_fields, OptionalFields}; + +use crate::parse::entry::Entry; + +#[derive(Debug)] +pub(crate) struct Header { + pub(crate) next: u32, + pub(crate) previous: u32, + pub(crate) alt_next: u32, + pub(crate) alt_previous: u32, + pub(crate) sequence: u32, + pub(crate) position: Position, + pub(crate) cursor_vcol: u32, + pub(crate) flags: Vec, + pub(crate) marks: Vec, + pub(crate) visual_info: VisualInfo, + pub(crate) time: u64, + pub(crate) optional_fields: OptionalFields, + pub(crate) entries: Vec +} + +#[derive(Debug)] +struct Position { + line_number: u32, + column_number: u32, + coladd: u32, +} + +#[derive(Debug)] +enum Flag { + Unknown, +} + +#[derive(Debug)] +struct VisualInfo { + start: Position, + end: Position, + mode: u32, + curswant: u32, +} + +fn magic(input: &[u8]) -> IResult<&[u8], &[u8]> { + tag(b"\x5f\xd0".as_ref())(input) +} + +fn position(input: &[u8]) -> IResult<&[u8], Position> { + map_res( + tuple(( + bytes_u32, + bytes_u32, + bytes_u32, + )), + |(line_number, column_number, coladd)| { + Ok::>(Position { + line_number, + column_number, + coladd, + }) + } + )(input) +} + +fn visual_info(input: &[u8]) -> IResult<&[u8], VisualInfo> { + map_res( + tuple(( + position, + position, + bytes_u32, + bytes_u32, + )), + |(start, end, mode, curswant)| { + Ok::>( + VisualInfo { + start, + end, + mode, + curswant, + } + ) + } + )(input) +} + +fn flags(input: &[u8]) -> IResult<&[u8], Vec> { + map_res( + take(4usize), + |flags: &[u8]| { + Ok::, Error<&[u8]>>( + flags.into_iter().map(|b| { + match b { + _ => Flag::Unknown + } + }).collect() + ) + } + )(input) +} + +fn marks(input: &[u8]) ->IResult<&[u8], &[u8]> { + take(310usize)(input) +} + +fn end_marker(input: &[u8]) -> IResult<&[u8], &[u8]> { + tag(b"\x00")(input) +} + +pub(crate) fn parse(input: &[u8]) -> IResult<&[u8], Header> { + let next = bytes_u32; + let previous = bytes_u32; + let alt_next = bytes_u32; + let alt_previous = bytes_u32; + let sequence = bytes_u32; + let cursor_vcol = bytes_u32; + let time = time; + + let (input, ( + _, + next, + previous, + alt_next, + alt_previous, + sequence, + position, + cursor_vcol, + flags, + marks, + visual_info, + time, + optional_fields, + _, + )) = tuple(( + magic, + next, + previous, + alt_next, + alt_previous, + sequence, + position, + cursor_vcol, + flags, + marks, + visual_info, + time, + optional_fields, + end_marker, + ))(input).unwrap(); + + Ok(( + + input, + Header { + next, + previous, + alt_next, + alt_previous, + sequence, + position, + visual_info, + cursor_vcol, + flags, + marks: marks.to_vec(), + time, + optional_fields, + entries: Vec::new(), + } + )) +} diff --git a/src/parse/start_header.rs b/src/parse/start_header.rs new file mode 100644 index 0000000..63782c5 --- /dev/null +++ b/src/parse/start_header.rs @@ -0,0 +1,154 @@ +use nom::{ + IResult, + error::Error, + combinator::map_res, + sequence::tuple, + bytes::complete::{tag, take}, +}; + +use super::{bytes_u32, time, optional_fields, OptionalFields}; + + +fn magic(input: &[u8]) -> IResult<&[u8], &[u8]> { + tag(b"\x56\x69\x6D\x9F\x55\x6E\x44\x6F\xE5")(input) +} + +fn version(input: &[u8]) -> IResult<&[u8], u16> { + map_res( + take(2usize), + |b: &[u8]| Ok::>(u16::from_be_bytes(b.try_into().unwrap())) + )(input) +} + +fn hash(input: &[u8]) -> IResult<&[u8], &[u8; 32]> { + map_res( + take(32usize), + |b: &[u8]| Ok::<&[u8; 32], Error<&[u8]>>(b.try_into().unwrap()) + )(input) +} + +fn line_with_length(input: &[u8]) -> IResult<&[u8], String> { + let (input, line_length) = bytes_u32(input)?; + map_res( + take(line_length), + |b: &[u8]| Ok::>(String::from_utf8(b.to_vec()).expect("Invalid UTF-8")) + )(input) +} + + + +fn end_marker(input: &[u8]) -> IResult<&[u8], &[u8]> { + tag(b"\x00".as_ref())(input) +} + +#[derive(Debug)] +pub(crate) struct StartHeader { + version: u16, + hash: Vec, + line_count: u32, + line: String, + line_number: u32, + column_number: u32, + old_head_sequence: u32, + new_head_sequence: u32, + current_head_sequence: u32, + numhead: u32, + last_sequence: u32, + current_sequence: u32, + time: u64, + optional_fields: OptionalFields, +} + +pub(crate) fn parse(input: &[u8]) -> IResult<&[u8], StartHeader> { + let line_count = bytes_u32; + let line_number = bytes_u32; + let column_number = bytes_u32; + let old_head_sequence = bytes_u32; + let new_head_sequence = bytes_u32; + let current_head_sequence = bytes_u32; + let numhead = bytes_u32; + let last_sequence = bytes_u32; + let current_sequence = bytes_u32; + + + let (input, ( + _, + version, + hash, + line_count, + line, + line_number, + column_number, + old_head_sequence, + new_head_sequence, + current_head_sequence, + numhead, + last_sequence, + current_sequence, + time, + optional_fields, + _, + )) = tuple (( + magic, + version, + hash, + line_count, + line_with_length, + line_number, + column_number, + old_head_sequence, + new_head_sequence, + current_head_sequence, + numhead, + last_sequence, + current_sequence, + time, + optional_fields, + end_marker, + ))(&input).unwrap(); + + Ok( + ( + input, + StartHeader { + version, + hash: hash.to_vec(), + line_count, + line, + line_number, + column_number, + old_head_sequence, + new_head_sequence, + current_head_sequence, + numhead, + last_sequence, + current_sequence, + time, + optional_fields, + } + ) + ) +} + +#[cfg(test)] +mod tests { + use super::{magic, version}; + + #[test] + fn test_magic() { + let test_str = b"\x56\x69\x6d\x9f\x55\x6e\x44\x6f\xe5\x00\x03"; + + let (_, magic) = magic(test_str).unwrap(); + + assert_eq!(magic, b"\x56\x69\x6d\x9f\x55\x6e\x44\x6f\xe5"); + } + + #[test] + fn test_version() { + let test_str = b"\x00\x03\xa5\xb4"; + + let (_, version) = version(test_str).unwrap(); + + assert_eq!(version, 3); + } +} diff --git a/src/undo.rs b/src/undo.rs new file mode 100644 index 0000000..5e466e4 --- /dev/null +++ b/src/undo.rs @@ -0,0 +1,50 @@ +use std::fs::File; +use std::io::Read; + +use anyhow::{Result, anyhow}; + +use crate::parse::start_header::{self, StartHeader}; +use crate::parse::header::{self, Header}; +use crate::parse::entry::{self, Entry}; + +use nom::{ + sequence::tuple, + multi::many0, + bytes::complete::{take_until, tag}, +}; + +#[derive(Debug)] +pub(crate) struct UndoFile { + pub(crate) start_header: StartHeader, + pub(crate) headers: Vec
, +} + +impl UndoFile { + pub(crate) fn from_path(path: &str) -> Result { + let mut buffer: Vec = Vec::new(); + let mut file = File::open(path)?; + + let _ = match file.read_to_end(&mut buffer) { + Ok(_) => Ok(()), + Err(e) => Err(anyhow!(e)), + }; + + let (input, start_header) = start_header::parse(&buffer).unwrap(); + let (_, out) = many0(tuple((header::parse, entry::parse, take_until(b"\x5f\xd0".as_ref()))))(input).unwrap(); + + let headers = out + .into_iter() + .map(|(mut header, entries, _)| { + entries.into_iter().for_each(|entry| header.entries.push(entry)); + header + }).collect(); + + Ok( + Self { + start_header, + headers, + } + ) + } + +} diff --git a/undo_file b/undo_file new file mode 100644 index 0000000..73370a5 Binary files /dev/null and b/undo_file differ