By MAG


2016-12-23 21:57:33 8 Comments

I'm looking for a general review with emphasis on idiomaticness and error handling.

extern crate byteorder;
extern crate rand;

use std::collections::hash_map::HashMap;
use std::collections::BinaryHeap;

use byteorder::{ReadBytesExt, WriteBytesExt, NativeEndian};
use std::{fmt, error, result, io, path, cmp, fs};
use std::io::Read;
use std::io::Write;


type Result<T> = result::Result<T, HuffmanError>;
const BITS: usize = 8;

#[derive(Debug)]
pub enum HuffmanError {
    Io(io::Error),
    ParseTree,
    AlphabetMismatch,
    Empty,
}

impl fmt::Display for HuffmanError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use HuffmanError::*;
        use std::error::Error;
        match *self {
            Io(ref err) => write!(f, "IO error: {}", err),
            ParseTree => write!(f, "Parse tree error: {}", self.description()),
            Empty => write!(f, "Empty error: {}", self.description()),
            AlphabetMismatch => write!(f, "Alphabet mismatch error: {}", self.description()),
        }
    }
}

impl error::Error for HuffmanError {
    fn description(&self) -> &str {
        use HuffmanError::*;
        match *self {
            Io(ref err) => err.description(),
            ParseTree => "invalid encoded huffman tree",
            AlphabetMismatch => "alphabet doesn't match parsed tree",
            Empty => "empty stream",
        }
    }
    fn cause(&self) -> Option<&error::Error> {
        match *self {
            HuffmanError::Io(ref err) => Some(err),
            _ => None,
        }
    }
}

impl From<io::Error> for HuffmanError {
    fn from(err: io::Error) -> HuffmanError {
        HuffmanError::Io(err)
    }
}

#[derive(Eq, Debug)]
enum HuffmanTree {
    Inner {
        frecuency: u32,
        left: Box<HuffmanTree>,
        right: Box<HuffmanTree>,
    },
    Leaf { frecuency: u32, character: char },
}

fn characters_mode(file: &str) -> HashMap<char, u32> {
    let mut table = HashMap::new();
    for c in file.chars() {
        *(table.entry(c).or_insert(0)) += 1;
    }
    table
}

fn create_priority_queue(input: &str) -> BinaryHeap<HuffmanTree> {
    characters_mode(input)
        .iter()
        .map(|(c, f)| {
            HuffmanTree::Leaf {
                character: *c,
                frecuency: *f,
            }
        })
        .collect()
}

impl HuffmanTree {
    fn new(input: &str) -> Result<HuffmanTree> {
        let mut pq = create_priority_queue(input);
        for _ in 1..pq.len() {
            let min1 = pq.pop().unwrap();
            let min2 = pq.pop().unwrap();
            pq.push(min1.join(min2));
        }
        pq.pop().ok_or(HuffmanError::Empty)
    }

    fn frec(&self) -> u32 {
        use HuffmanTree::*;
        match self {
            &Inner { frecuency, .. } |
            &Leaf { frecuency, .. } => frecuency,
        }
    }

    fn join(self, other: HuffmanTree) -> HuffmanTree {
        HuffmanTree::Inner {
            frecuency: self.frec() + other.frec(),
            left: Box::new(self),
            right: Box::new(other),
        }
    }

    fn create_char_mapper_recur(&self,
                                bit_vec: &mut BitVector,
                                map: &mut HashMap<char, BitVector>) {
        use HuffmanTree::*;
        match self {
            &Inner { ref left, ref right, .. } => {
                bit_vec.push(true);
                left.create_char_mapper_recur(bit_vec, map);
                bit_vec.push(false);
                right.create_char_mapper_recur(bit_vec, map);

            }
            &Leaf { character, .. } => {
                map.insert(character, bit_vec.clone());
            }
        }
        bit_vec.pop();
    }

    fn create_char_mapper(&self) -> HashMap<char, BitVector> {
        let mut bit_vec = BitVector::new();
        let mut map = HashMap::new();
        if let &HuffmanTree::Leaf { .. } = self {
            bit_vec.push(true);
        }
        self.create_char_mapper_recur(&mut bit_vec, &mut map);
        map
    }

    fn decode<I, T>(encoded_walk: &mut I, mut chars: &mut T) -> Result<HuffmanTree>
        where I: Iterator<Item = bool>,
              T: Iterator<Item = char>
    {
        match encoded_walk.next() {
            Some(x) if x => {
                let left = Self::decode(encoded_walk, chars)?;
                let right = Self::decode(encoded_walk, chars)?;
                Ok(left.join(right))
            }
            Some(_) => {
                let c = chars.next()
                    .ok_or(HuffmanError::AlphabetMismatch)?;
                Ok(HuffmanTree::Leaf {
                    frecuency: 0,
                    character: c,
                })
            }
            None => Err(HuffmanError::ParseTree),
        }
    }

    fn serialize<W: std::io::Write>(&self, writer: &mut W) -> Result<()> {
        let (encoded_walk, alphabet) = self.encode();

        let walk_bit_len = encoded_walk.len() as u64;
        writer.write_u64::<NativeEndian>(walk_bit_len)?;
        writer.write(&encoded_walk.bits)?;

        let encoded_alphabet = alphabet.as_bytes();
        let alphabet_byte_len = encoded_alphabet.len() as u64;
        writer.write_u64::<NativeEndian>(alphabet_byte_len)?;
        writer.write_all(encoded_alphabet)?;

        Ok(())
    }

    fn de_serialize<R: std::io::Read>(reader: &mut R) -> Result<HuffmanTree> {
        use std::io::Read;

        let walk_len = reader.read_u64::<NativeEndian>()?;
        let mut walk_bytes = Vec::new();
        reader.take((walk_len + BITS as u64 - 1) / BITS as u64)
            .read_to_end(&mut walk_bytes)?;

        let bit_vec = BitVector {
            bits: walk_bytes,
            size: walk_len as usize,
        };

        let chars_len = reader.read_u64::<NativeEndian>()?;
        let mut chars = String::new();
        reader.take(chars_len)
            .read_to_string(&mut chars)?;

        let bit_iter = &mut bit_vec.iter();
        Self::decode(bit_iter, &mut chars.chars())

    }

    fn encode_recur(&self, bit_vec: &mut BitVector, alphabet: &mut String) {
        use HuffmanTree::*;
        match self {
            &Inner { ref left, ref right, .. } => {
                bit_vec.push(true);
                left.encode_recur(bit_vec, alphabet);
                right.encode_recur(bit_vec, alphabet);
            }
            &Leaf { character, .. } => {
                bit_vec.push(false);
                alphabet.push(character);
            }
        }
    }

    fn encode(&self) -> (BitVector, String) {
        let mut bit_vector = BitVector::new();
        let mut alphabet = String::new();
        self.encode_recur(&mut bit_vector, &mut alphabet);
        (bit_vector, alphabet)
    }

    fn encode_string(&self, file_str: &str) -> BitVector {
        let char_map = self.create_char_mapper();
        let itr = file_str.chars()
            .map(|c| char_map.get(&c).expect("Assertion error"));

        let mut bit_vec = BitVector::new();
        for code in itr {
            bit_vec.append(&code);
        }
        bit_vec
    }

    fn decode_string<I>(&self, bit_iter: I) -> String
        where I: Iterator<Item = bool>
    {
        use HuffmanTree::*;
        let mut node = self;
        let mut output = String::new();
        for bit in bit_iter {
            if let &Inner { ref left, ref right, .. } = node {
                node = if bit { left } else { right }
            }
            if let &Leaf { character, .. } = node {
                output.push(character);
                node = self;
            }
        }
        output
    }
}

impl Ord for HuffmanTree {
    fn cmp(&self, other: &HuffmanTree) -> cmp::Ordering {
        self.frec().cmp(&other.frec()).reverse()
    }
}

impl PartialOrd for HuffmanTree {
    fn partial_cmp(&self, other: &HuffmanTree) -> Option<cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl PartialEq for HuffmanTree {
    fn eq(&self, other: &HuffmanTree) -> bool {
        self.frec() == other.frec()
    }
}

#[derive(Clone, Debug)]
struct BitVector {
    bits: Vec<u8>,
    size: usize,
}

impl BitVector {
    fn new() -> BitVector {
        BitVector {
            bits: Vec::new(),
            size: 0,
        }
    }

    fn push(&mut self, bit: bool) {
        let leftover = self.size % BITS;
        if leftover == 0 {
            self.bits.push(0);
        }
        let last_byte = self.bits
            .last_mut()
            .expect("Assertion error");

        *last_byte |= (bit as u8) << leftover;
        self.size += 1;
    }

    fn pop(&mut self) {
        if self.len() == 0 {
            return;
        }
        let len = self.size - 1;
        self.put(len, false);
        self.size = len;
        if self.len() % BITS == 0 {
            self.bits.pop();
        }
    }

    #[allow(dead_code)]
    fn push_all(&mut self, bits: &[bool]) {
        for bit in bits {
            self.push(*bit);
        }
    }

    fn append(&mut self, other: &BitVector) {
        let leftover = self.size % BITS;
        let empty_bits = BITS - leftover;
        let len = self.bits.len();

        self.bits.extend(other.bits.iter().cloned());
        self.size += other.size;

        if leftover == 0 {
            return;
        }
        for i in len..self.bits.len() {
            self.move_bits(empty_bits, i);
        }
        if (self.size - 1) / BITS != self.bits.len() - 1 {
            self.bits.pop();
        }
    }

    fn check(&self, i: usize) {
        assert!(i < self.size,
                format!("Index out of bounds. Index: {} >= len: {}", i, self.size));
    }

    #[allow(dead_code)]
    fn get(&self, i: usize) -> bool {
        self.check(i);
        (1 & (self.bits[i / BITS] >> (i % BITS))) != 0
    }

    fn put(&mut self, i: usize, bit: bool) {
        self.check(i);
        let (byte, bit_pos) = (i / BITS, i % BITS);
        self.bits[byte] &= !(1 << bit_pos);
        self.bits[byte] |= (bit as u8) << bit_pos;
    }

    fn move_bits(&mut self, bits: usize, i: usize) {
        let leftover = BITS - bits;
        let (from, to) = (self.bits[i], self.bits[i - 1]);
        self.bits[i - 1] = to ^ (from << leftover);
        self.bits[i] = from >> bits;
    }

    fn iter<'a>(&'a self) -> BitVectorIter<'a> {
        let itr = self.bits
            .iter()
            .flat_map(|byte| (0..BITS).map(move |i| ((byte >> i) & 1) != 0))
            .take(self.size);

        BitVectorIter { iter: Box::new(itr) }
    }

    fn len(&self) -> usize {
        self.size
    }

    #[allow(dead_code)]
    fn byte_len(&self) -> usize {
        self.bits.len()
    }
}

struct BitVectorIter<'a> {
    iter: Box<std::iter::Iterator<Item = bool> + 'a>,
}
impl<'a> IntoIterator for &'a BitVector {
    type Item = bool;
    type IntoIter = BitVectorIter<'a>;
    fn into_iter(self) -> Self::IntoIter {
        self.iter()
    }
}
impl<'a> Iterator for BitVectorIter<'a> {
    type Item = bool;
    fn next(&mut self) -> Option<Self::Item> {
        self.iter.next()
    }
}



pub trait HuffmanCompress {
    fn compress<T: std::io::Write>(&mut self, &mut T) -> Result<()>;
}

pub trait HuffmanDeCompress {
    fn de_compress<T: std::io::Write>(&mut self, writer: &mut T) -> Result<()> {
        let string = self.de_compress_to_string()?;
        writer.write_all(&string.as_bytes())?;
        Ok(())
    }
    fn de_compress_to_string(&mut self) -> Result<String>;
}

pub trait HuffmanCodes: HuffmanDeCompress + HuffmanCompress {}
impl<T: std::io::Read> HuffmanCodes for T {}
impl<T: std::io::Read> HuffmanCompress for T {
    fn compress<W: std::io::Write>(&mut self, writer: &mut W) -> Result<()> {
        let mut file_str = String::new();
        self.read_to_string(&mut file_str)?;
        let tree = HuffmanTree::new(&file_str)?;
        let encoded_file = tree.encode_string(&file_str);
        let junk = BITS - (encoded_file.len() % BITS);
        let mask = (((junk != BITS) as i8) << (BITS - 1)) >> (BITS - 1);

        tree.serialize(writer)?;
        writer.write_i8(junk as i8 & mask)?;
        writer.write(&encoded_file.bits)?;
        Ok(())
    }
}

impl<T: std::io::Read> HuffmanDeCompress for T {
    fn de_compress_to_string(&mut self) -> Result<String> {
        let tree = HuffmanTree::de_serialize(self)?;
        let junk = self.read_i8()?;
        let mut bytes = Vec::new();
        self.read_to_end(&mut bytes)?;
        let bit_vec = BitVector {
            size: BITS * bytes.len() - junk as usize,
            bits: bytes,
        };
        let string = tree.decode_string(bit_vec.iter());
        Ok(string)
    }
}


pub fn encode<P, T>(path: P, target: T) -> Result<()>
    where P: AsRef<path::Path>,
          T: AsRef<path::Path>
{
    let f1 = fs::File::open(path)?;
    let mut reader = io::BufReader::new(f1);
    let f2 = fs::File::create(target)?;
    let mut writer = io::BufWriter::new(f2);
    reader.compress(&mut writer)?;
    Ok(())
}

pub fn decode<P>(path: P) -> Result<String>
    where P: AsRef<path::Path>
{
    let f = fs::File::open(path)?;
    let mut reader = &mut io::BufReader::new(f);
    Ok(reader.de_compress_to_string()?)
}



fn read_file<P: AsRef<path::Path>>(path: P) -> std::io::Result<String> {
    let f = fs::File::open(path)?;
    let mut reader = io::BufReader::new(f);
    let mut result = String::new();
    reader.read_to_string(&mut result)?;
    Ok(result)
}

fn rand_string(len: usize) -> String {
    use rand::Rng;
    rand::thread_rng()
        .gen_iter::<char>()
        .take(len)
        .collect()
}

#[test]
fn it_works() {
    for _ in 1..100 {
        let string = rand_string(5000);
        if let Err(err) = test(&string) {
            panic!(format!("{}", err))
        }
    }
}

fn test(text: &str) -> Result<()> {
    const FILE: &'static str = "test";
    const CFILE: &'static str = "compress";
    {
        let f = fs::File::create(FILE)?;
        let mut writer = io::BufWriter::new(f);
        writer.write_all(&text.as_bytes())?;
    }
    encode(FILE, CFILE)?;
    let x = decode(CFILE)?;
    let y = read_file(FILE)?;
    assert_eq!(x, y);
    assert_eq!(x, text);
    Ok(())
}

1 comments

@Shepmaster 2016-12-27 16:30:30

  1. Combine imports like io::Read and io::Write with use std::io::{Read, Write}. In this case, consider using the IO prelude: use std::io::prelude::*.

  2. You've intermixed test and production code, as well as test and production dependencies. This produces compile-time warnings (warning: function is never used: `read_file`, `rand_string`, `test`) and wastes end-user time during compilation and bandwidth during download.

  3. I prefer to use a crate like quick-error to succinctly create error macros. I combined the description and display text where appropriate.

  4. "frecuency" is misspelled; should be frequency.

  5. It's idiomatic to use |&foo| foo instead of |foo| *foo when you can; generally Copy types allow this.

  6. If you are creating and then throwing away the HashMap, just consume it during iteration to not have any references.

  7. Prefer match *foo { Foo:Variant => /* ... */ } over match foo { &Foo::Variant => /* ... */ }. Same for if let.

  8. Pattern match against Some(true) directly instead of using a match guard. I'd also explicitly say Some(false) as there are only the two cases.

  9. Using NativeEndian is suspicious; that means the saved file will not be easily portable across different endian machines.

  10. There's nothing wrong with using reverse, but you could just reverse the order of arguments to cmp.

  11. I like to introduce a key method that Eq, Ord, Hash, etc. all use. This prevents them from drifting out of sync.

  12. I ignored BitVector as it has already been asked about in its own question.

  13. Decompress is usually written as one word with no separation.

  14. &string.as_bytes() returns a &&[u8], more references than needed. string.as_bytes() is sufficient.

  15. The rand crate should be in dev-dependencies and only used within the test module or only included when `#[cfg(test)].

  16. Generating a bunch of random strings feels like a quickcheck style test. I changed it, but the tests fail for strings containing '\0' '\1', maybe more. These are valid characters, but the error is always Empty.

  17. Avoid writing to files in a test. Files are slow compared to memory, and involving any external resources makes things harder. Since Rust tests run in parallel, as soon as you have a second test, you will have conflicts.

#[macro_use]
extern crate quick_error;
extern crate byteorder;

#[cfg(test)]
#[macro_use]
extern crate quickcheck;

use std::collections::hash_map::HashMap;
use std::collections::BinaryHeap;

use byteorder::{ReadBytesExt, WriteBytesExt, NativeEndian};
use std::{result, io, path, cmp, fs};

type Result<T> = result::Result<T, HuffmanError>;
const BITS: usize = 8;

quick_error! {
    #[derive(Debug)]
    pub enum HuffmanError {
        Io(err: io::Error) {
            from()
            cause(err)
            description(err.description())
            display("IO error: {}", err)
        }
        ParseTree {
            description("Parse tree error: invalid encoded huffman tree")
        }
        AlphabetMismatch {
            description("Alphabet mismatch error: alphabet doesn't match parsed tree")
        }
        Empty {
            description("Empty stream")
        }
    }
}

#[derive(Eq, Debug)]
enum HuffmanTree {
    Inner {
        frequency: u32,
        left: Box<HuffmanTree>,
        right: Box<HuffmanTree>,
    },
    Leaf { frequency: u32, character: char },
}

fn characters_mode(file: &str) -> HashMap<char, u32> {
    let mut table = HashMap::new();
    for c in file.chars() {
        *(table.entry(c).or_insert(0)) += 1;
    }
    table
}

fn create_priority_queue(input: &str) -> BinaryHeap<HuffmanTree> {
    characters_mode(input)
        .into_iter()
        .map(|(c, f)| {
            HuffmanTree::Leaf {
                character: c,
                frequency: f,
            }
        })
        .collect()
}

impl HuffmanTree {
    fn new(input: &str) -> Result<HuffmanTree> {
        let mut pq = create_priority_queue(input);
        for _ in 1..pq.len() {
            let min1 = pq.pop().unwrap();
            let min2 = pq.pop().unwrap();
            pq.push(min1.join(min2));
        }
        pq.pop().ok_or(HuffmanError::Empty)
    }

    fn freq(&self) -> u32 {
        use HuffmanTree::*;
        match *self {
            Inner { frequency, .. } |
            Leaf { frequency, .. } => frequency,
        }
    }

    fn join(self, other: HuffmanTree) -> HuffmanTree {
        HuffmanTree::Inner {
            frequency: self.freq() + other.freq(),
            left: Box::new(self),
            right: Box::new(other),
        }
    }

    fn create_char_mapper_recur(&self,
                                bit_vec: &mut BitVector,
                                map: &mut HashMap<char, BitVector>) {
        use HuffmanTree::*;
        match *self {
            Inner { ref left, ref right, .. } => {
                bit_vec.push(true);
                left.create_char_mapper_recur(bit_vec, map);
                bit_vec.push(false);
                right.create_char_mapper_recur(bit_vec, map);
            }
            Leaf { character, .. } => {
                map.insert(character, bit_vec.clone());
            }
        }
        bit_vec.pop();
    }

    fn create_char_mapper(&self) -> HashMap<char, BitVector> {
        let mut bit_vec = BitVector::new();
        let mut map = HashMap::new();
        if let HuffmanTree::Leaf { .. } = *self {
            bit_vec.push(true);
        }
        self.create_char_mapper_recur(&mut bit_vec, &mut map);
        map
    }

    fn decode<I, T>(encoded_walk: &mut I, mut chars: &mut T) -> Result<HuffmanTree>
        where I: Iterator<Item = bool>,
              T: Iterator<Item = char>
    {
        match encoded_walk.next() {
            Some(true) => {
                let left = Self::decode(encoded_walk, chars)?;
                let right = Self::decode(encoded_walk, chars)?;
                Ok(left.join(right))
            }
            Some(false) => {
                let c = chars.next()
                    .ok_or(HuffmanError::AlphabetMismatch)?;
                Ok(HuffmanTree::Leaf {
                    frequency: 0,
                    character: c,
                })
            }
            None => Err(HuffmanError::ParseTree),
        }
    }

    fn serialize<W: std::io::Write>(&self, writer: &mut W) -> Result<()> {
        let (encoded_walk, alphabet) = self.encode();

        let walk_bit_len = encoded_walk.len() as u64;
        writer.write_u64::<NativeEndian>(walk_bit_len)?;
        writer.write(&encoded_walk.bits)?;

        let encoded_alphabet = alphabet.as_bytes();
        let alphabet_byte_len = encoded_alphabet.len() as u64;
        writer.write_u64::<NativeEndian>(alphabet_byte_len)?;
        writer.write_all(encoded_alphabet)?;

        Ok(())
    }

    fn de_serialize<R: std::io::Read>(reader: &mut R) -> Result<HuffmanTree> {
        use std::io::Read;

        let walk_len = reader.read_u64::<NativeEndian>()?;
        let mut walk_bytes = Vec::new();
        reader.take((walk_len + BITS as u64 - 1) / BITS as u64)
            .read_to_end(&mut walk_bytes)?;

        let bit_vec = BitVector {
            bits: walk_bytes,
            size: walk_len as usize,
        };

        let chars_len = reader.read_u64::<NativeEndian>()?;
        let mut chars = String::new();
        reader.take(chars_len)
            .read_to_string(&mut chars)?;

        let bit_iter = &mut bit_vec.iter();
        Self::decode(bit_iter, &mut chars.chars())
    }

    fn encode_recur(&self, bit_vec: &mut BitVector, alphabet: &mut String) {
        use HuffmanTree::*;
        match *self {
            Inner { ref left, ref right, .. } => {
                bit_vec.push(true);
                left.encode_recur(bit_vec, alphabet);
                right.encode_recur(bit_vec, alphabet);
            }
            Leaf { character, .. } => {
                bit_vec.push(false);
                alphabet.push(character);
            }
        }
    }

    fn encode(&self) -> (BitVector, String) {
        let mut bit_vector = BitVector::new();
        let mut alphabet = String::new();
        self.encode_recur(&mut bit_vector, &mut alphabet);
        (bit_vector, alphabet)
    }

    fn encode_string(&self, file_str: &str) -> BitVector {
        let char_map = self.create_char_mapper();
        let itr = file_str.chars()
            .map(|c| char_map.get(&c).expect("Assertion error"));

        let mut bit_vec = BitVector::new();
        for code in itr {
            bit_vec.append(&code);
        }
        bit_vec
    }

    fn decode_string<I>(&self, bit_iter: I) -> String
        where I: Iterator<Item = bool>
    {
        use HuffmanTree::*;
        let mut node = self;
        let mut output = String::new();
        for bit in bit_iter {
            if let Inner { ref left, ref right, .. } = *node {
                node = if bit { left } else { right }
            }
            if let Leaf { character, .. } = *node {
                output.push(character);
                node = self;
            }
        }
        output
    }
}

impl Ord for HuffmanTree {
    fn cmp(&self, other: &HuffmanTree) -> cmp::Ordering {
        self.freq().cmp(&other.freq()).reverse()
    }
}

impl PartialOrd for HuffmanTree {
    fn partial_cmp(&self, other: &HuffmanTree) -> Option<cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl PartialEq for HuffmanTree {
    fn eq(&self, other: &HuffmanTree) -> bool {
        self.freq() == other.freq()
    }
}

// BitVector elided

pub trait HuffmanCompress {
    fn compress<T: std::io::Write>(&mut self, &mut T) -> Result<()>;
}

pub trait HuffmanDeCompress {
    fn de_compress<T: std::io::Write>(&mut self, writer: &mut T) -> Result<()> {
        let string = self.de_compress_to_string()?;
        writer.write_all(string.as_bytes())?;
        Ok(())
    }
    fn de_compress_to_string(&mut self) -> Result<String>;
}

pub trait HuffmanCodes: HuffmanDeCompress + HuffmanCompress {}

impl<T: std::io::Read> HuffmanCodes for T {}

impl<T: std::io::Read> HuffmanCompress for T {
    fn compress<W: std::io::Write>(&mut self, writer: &mut W) -> Result<()> {
        let mut file_str = String::new();
        self.read_to_string(&mut file_str)?;
        let tree = HuffmanTree::new(&file_str)?;
        let encoded_file = tree.encode_string(&file_str);
        let junk = BITS - (encoded_file.len() % BITS);
        let mask = (((junk != BITS) as i8) << (BITS - 1)) >> (BITS - 1);

        tree.serialize(writer)?;
        writer.write_i8(junk as i8 & mask)?;
        writer.write(&encoded_file.bits)?;
        Ok(())
    }
}

impl<T: std::io::Read> HuffmanDeCompress for T {
    fn de_compress_to_string(&mut self) -> Result<String> {
        let tree = HuffmanTree::de_serialize(self)?;
        let junk = self.read_i8()?;
        let mut bytes = Vec::new();
        self.read_to_end(&mut bytes)?;
        let bit_vec = BitVector {
            size: BITS * bytes.len() - junk as usize,
            bits: bytes,
        };
        let string = tree.decode_string(bit_vec.iter());
        Ok(string)
    }
}

pub fn encode<P, T>(path: P, target: T) -> Result<()>
    where P: AsRef<path::Path>,
          T: AsRef<path::Path>
{
    let f1 = fs::File::open(path)?;
    let mut reader = io::BufReader::new(f1);
    let f2 = fs::File::create(target)?;
    let mut writer = io::BufWriter::new(f2);
    reader.compress(&mut writer)?;
    Ok(())
}

pub fn decode<P>(path: P) -> Result<String>
    where P: AsRef<path::Path>
{
    let f = fs::File::open(path)?;
    let mut reader = &mut io::BufReader::new(f);
    Ok(reader.de_compress_to_string()?)
}
#[cfg(test)]
mod test {
    use quickcheck::TestResult;

    use std::{io, path, fs};
    use super::*;
    use std::io::prelude::*;

    fn read_file<P: AsRef<path::Path>>(path: P) -> io::Result<String> {
        let f = fs::File::open(path)?;
        let mut reader = io::BufReader::new(f);
        let mut result = String::new();
        reader.read_to_string(&mut result)?;
        Ok(result)
    }

    quickcheck! {
        fn prop(text: String) -> super::Result<TestResult> {
            // TODO: What is the appropriate invalid string set?
            if text.is_empty() {
                return Ok(TestResult::discard());
            }

            const FILE: &'static str = "test";
            const CFILE: &'static str = "compress";
            let f = fs::File::create(FILE)?;
            let mut writer = io::BufWriter::new(f);
            writer.write_all(text.as_bytes())?;

            encode(FILE, CFILE)?;
            let x = decode(CFILE)?;
            let y = read_file(FILE)?;

            // TODO: Should probably make these into two separate
            // tests for better error reporting.
            let eq = x == y && x == text;

            Ok(TestResult::from_bool(eq))
        }
    }
}

Bigger questions:

  1. Why use char as the smallest unit instead of u8?

  2. Why force the user to read the whole input all the way? If compressing a 2GiB file, I'd have to allocate 2GiB of memory! new and encode could stream data in and out, allowing the user to choose to allocate in one chunk or not.

  3. Dealing with Files and Paths seems too specific and low-level for this library.

Related Questions

Sponsored Content

0 Answered Questions

Huffman encoding in Rust

3 Answered Questions

[SOLVED] Huffman code implementation

2 Answered Questions

0 Answered Questions

Huffman encoding implementation -follow up

  • 2017-06-06 00:30:26
  • MAG
  • 93 View
  • 2 Score
  • 0 Answer
  • Tags:   rust compression

1 Answered Questions

[SOLVED] C++ Huffman coder and decoder

  • 2017-05-18 19:52:14
  • Yksuh
  • 1213 View
  • 8 Score
  • 1 Answer
  • Tags:   c++ compression

1 Answered Questions

[SOLVED] Huffman algorithm implementation in C++

1 Answered Questions

[SOLVED] C++11 implementation of Huffman-encoding

1 Answered Questions

[SOLVED] Huffman tree Python implementation

3 Answered Questions

[SOLVED] Huffman decoding for video

1 Answered Questions

[SOLVED] Huffman encoding successive-merge function

Sponsored Content