#Encode/Decode base 64 string

7 messages · Page 1 of 1 (latest)

trim dew
#

Hey,
I'm trying to write two functions using the bytes crate: one that takes a String, encodes it as b64 and write it in a BytesMut and another function that does the inverse.

I'm using these lookup tables:

pub const BASE64_LOOKUP_TABLE: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
pub const BASE64_REVERSE_LOOKUP_TABLE: [u8; 256] = {
    let mut array = [u8::MAX; 256];
    
    let mut n = 0;

    let mut c = b'A';
    while c <= b'Z' {
        array[c as usize] = n;
        n += 1;
        c += 1;
    }

    let mut c = b'a';
    while c <= b'z' {
        array[c as usize] = n;
        n += 1;
        c += 1;
    }

    let mut c = b'0';
    while c <= b'9' {
        array[c as usize] = n;
        n += 1;
        c += 1;
    }

    array[b'+' as usize] = n;
    n += 1;
    array[b'/' as usize] = n;

    array
};
#

My functions:

pub fn encode_string_base64(data: String, buf: &mut BytesMut) -> &mut BytesMut {
    buf.reserve(data.len() * 3 / 4);

    let mut buffer: u32 = 0;
    let mut bits_collected: u8 = 0;

    for byte in data.as_bytes() {
        let val = crate::constants::BASE64_REVERSE_LOOKUP_TABLE[*byte as usize];

        buffer = (buffer << 6) | (val as u32);
        bits_collected += 6;

        // Process every full 8 bits collected
        if bits_collected >= 8 {
            bits_collected -= 8;
            buf.put_u8((buffer >> bits_collected) as u8);
        }
    }

    buf
}
pub fn decode_string_base64(buf: &mut BytesMut) -> Option<String> {
    if buf.is_empty() {
        return None;
    }

    let mut result = String::new();
    let mut iter = buf.iter();

    while let Some(&b1) = iter.next() {
        let b2 = iter.next().cloned().unwrap_or(0);
        let b3 = iter.next().cloned().unwrap_or(0);

        result.push(crate::constants::BASE64_LOOKUP_TABLE[(b1 >> 2) as usize] as char);
        result.push(crate::constants::BASE64_LOOKUP_TABLE[((b1 & 0x03) << 4 | (b2 >> 4)) as usize] as char);

        if iter.len() == 0 && b2 == 0 {
            result.push('=');
            result.push('=');
            break;
        }

        result.push(crate::constants::BASE64_LOOKUP_TABLE[((b2 & 0x0F) << 2 | (b3 >> 6)) as usize] as char);

        if iter.len() == 0 && b3 == 0 {
            result.push('=');
            break;
        }

        result.push(crate::constants::BASE64_LOOKUP_TABLE[(b3 & 0x3F) as usize] as char);
    }

    Some(result)
}
#

For some reason I don't know why my encode_string_base64 functions adds 2 more bytes to the encoded BytesMut:

let example_str= "G3KpTd7rY3YVAAAAQ2hpbmVzZSBIb21lIFJ1biBLaW5nBAAAAENIUktDAAAAaHR0cHM6Ly9pcGZzLmlvL2lwZnMvUW1kdUd3dzZnZUtiSGVna3V4MUN4YlBpaVZGNWJvREY5dEpCNFN2cEFKc1dzThSw+Jtm05FJV7iZQhfixqQq6W0oVTQGxegsLcBjfj4P4lyGYeApefrQw5kxjveRoBygzg45GugNdJ4E/c/M8Wn8yu0UgN/2/7FPNb+0RuRVWUBT5Nnx9k9/cZxLbXpfDw==";

let encoded_data = base64::decode(strr).unwrap();
let mut buf= BytesMut::new();
encode_string_base64(example_str.to_string(), &mut buf);

println!("base64 crate = {:?}", encoded_data);
println!("my implementation = {:?}", buf.to_vec());

#

This prints:

base64 crate = [27, 114, 169, 77, 222, 235, 99, 118, 21, 0, 0, 0, 67, 104, 105, 110, 101, 115, 101, 32, 72, 111, 109, 101, 32, 82, 117, 110, 32, 75, 105, 110, 103, 4, 0, 0, 0, 67, 72, 82, 75, 67, 0, 0, 0, 104, 116, 116, 112, 115, 58, 47, 47, 105, 112, 102, 115, 46, 105, 111, 47, 105, 112, 102, 115, 47, 81, 109, 100, 117, 71, 119, 119, 54, 103, 101, 75, 98, 72, 101, 103, 107, 117, 120, 49, 67, 120, 98, 80, 105, 105, 86, 70, 53, 98, 111, 68, 70, 57, 116, 74, 66, 52, 83, 118, 112, 65, 74, 115, 87, 115, 78, 20, 176, 248, 155, 102, 211, 145, 73, 87, 184, 153, 66, 23, 226, 198, 164, 42, 233, 109, 40, 85, 52, 6, 197, 232, 44, 45, 192, 99, 126, 62, 15, 226, 92, 134, 97, 224, 41, 121, 250, 208, 195, 153, 49, 142, 247, 145, 160, 28, 160, 206, 14, 57, 26, 232, 13, 116, 158, 4, 253, 207, 204, 241, 105, 252, 202, 237, 20, 128, 223, 246, 255, 177, 79, 53, 191, 180, 70, 228, 85, 89, 64, 83, 228, 217, 241, 246, 79, 127, 113, 156, 75, 109, 122, 95, 15]

my implementation = [27, 114, 169, 77, 222, 235, 99, 118, 21, 0, 0, 0, 67, 104, 105, 110, 101, 115, 101, 32, 72, 111, 109, 101, 32, 82, 117, 110, 32, 75, 105, 110, 103, 4, 0, 0, 0, 67, 72, 82, 75, 67, 0, 0, 0, 104, 116, 116, 112, 115, 58, 47, 47, 105, 112, 102, 115, 46, 105, 111, 47, 105, 112, 102, 115, 47, 81, 109, 100, 117, 71, 119, 119, 54, 103, 101, 75, 98, 72, 101, 103, 107, 117, 120, 49, 67, 120, 98, 80, 105, 105, 86, 70, 53, 98, 111, 68, 70, 57, 116, 74, 66, 52, 83, 118, 112, 65, 74, 115, 87, 115, 78, 20, 176, 248, 155, 102, 211, 145, 73, 87, 184, 153, 66, 23, 226, 198, 164, 42, 233, 109, 40, 85, 52, 6, 197, 232, 44, 45, 192, 99, 126, 62, 15, 226, 92, 134, 97, 224, 41, 121, 250, 208, 195, 153, 49, 142, 247, 145, 160, 28, 160, 206, 14, 57, 26, 232, 13, 116, 158, 4, 253, 207, 204, 241, 105, 252, 202, 237, 20, 128, 223, 246, 255, 177, 79, 53, 191, 180, 70, 228, 85, 89, 64, 83, 228, 217, 241, 246, 79, 127, 113, 156, 75, 109, 122, 95, 15, 63, 255]
#

Also, I'm really not satisfied of the way I wrote the decode_string_base64, but I can't find a better way to map 3 bytes to 4 base64 chars.
But can't think of any better way, any suggestion ?

ripe basalt
trim dew