1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
use crate::convert::*;
use core::hash::{Hasher};

///This constant come from Kunth's prng (Empirically it works better than those from splitmix32).
const MULTIPLE: u64 = crate::MULTIPLE;
const INCREMENT: u64 = 1442695040888963407;
const ROT: u32 = 23; //17

/// A `Hasher` for hashing an arbitrary stream of bytes.
///
/// Instances of [`AHasher`] represent state that is updated while hashing data.
///
/// Each method updates the internal state based on the new data provided. Once
/// all of the data has been provided, the resulting hash can be obtained by calling
/// `finish()`
///
/// [Clone] is also provided in case you wish to calculate hashes for two different items that
/// start with the same data.
///
#[derive(Debug, Clone)]
pub struct AHasher {
    buffer: u64
}

impl AHasher {

    /// Creates a new hasher keyed to the provided key.
    #[inline]
    pub fn new_with_key(key: u64) -> AHasher {
        AHasher { buffer: key }
    }

    /// Creates a new hasher keyed to the provided keys.
    #[inline]
    pub(crate) fn new_with_keys(key1: u64, key2: u64) -> AHasher {
        AHasher { buffer: key1 ^ (key2.rotate_left(ROT)) }
    }

    /// This update function has the goal of updating the buffer with a single multiply
    /// FxHash does this but is venerable to attack. To avoid this input needs to be masked to with an unpredictable value.
    /// However other hashes such as murmurhash have taken that approach but were found venerable to attack.
    /// The attack was based on the idea of reversing the pre-mixing (Which is necessarily reversible otherwise
    /// bits would be lost) then placing a difference in the highest bit before the multiply. Because a multiply
    /// can never affect the bits to the right of it. This version avoids this vulnerability by rotating and
    /// performing a second multiply. This makes it impossible for an attacker to place a single bit
    /// difference between two blocks so as to cancel each other. (While the transform is still reversible if you know the key)
    #[inline(always)]
    fn update(&mut self, new_data: u64) {
        let result: [u64;2] = ((new_data ^ self.buffer) as u128).wrapping_mul(MULTIPLE as u128).convert();
        self.buffer = result[0].wrapping_add(result[1]);
    }

    /// This is similar to the above update function (see it's description). But is designed to run in a loop
    /// that will be unrolled and vectorized. So instead of using the buffer, it uses a 'key' that it updates
    /// and returns. The buffer is only xored at the end. This structure is so that when the method is inlined,
    /// the compiler will unroll any loop this gets placed in and the loop can be automatically vectorized
    /// and the rotates, xors, and multiplies can be paralleled.
    ///
    /// The key needs to be incremented between consecutive calls to prevent (a,b) from hashing the same as (b,a).
    /// The adding of the increment is moved to the bottom rather than the top. This allows one less add to be
    /// performed overall, but more importantly, it follows the multiply, which is expensive. So the CPU can
    /// run another operation afterwords if does not depend on the output of the multiply operation.
    #[inline(always)]
    fn ordered_update(&mut self, new_data: u64, key: u64) -> u64 {
        self.buffer ^= (new_data ^ key).wrapping_mul(MULTIPLE).rotate_left(ROT).wrapping_mul(MULTIPLE);
        key.wrapping_add(INCREMENT)
    }
}

#[inline(never)]
#[no_mangle]
fn hash_test(input: &[u8]) -> u64 {
    let mut a = AHasher::new_with_key(67);
    a.write(input);
    a.finish()
}

/// Provides methods to hash all of the primitive types.
impl Hasher for AHasher {

    #[inline]
    fn write_u8(&mut self, i: u8) {
        self.update(i as u64);
    }

    #[inline]
    fn write_u16(&mut self, i: u16) {
        self.update(i as u64);
    }

    #[inline]
    fn write_u32(&mut self, i: u32) {
        self.update(i as u64);
    }

    #[inline]
    fn write_u64(&mut self, i: u64) {
        self.update(i as u64);
    }

    #[inline]
    fn write_u128(&mut self, i: u128) {
        let data: [u64;2] = i.convert();
        self.update(data[0]);
        self.update(data[1]);
    }

    #[inline]
    fn write_usize(&mut self, i: usize) {
        self.write_u64(i as u64);
    }

    #[inline]
    fn write(&mut self, input: &[u8]) {
        let mut data = input;
        let length = data.len() as u64;
        //Needs to be an add rather than an xor because otherwise it could be canceled with carefully formed input.
        self.buffer = self.buffer.wrapping_add(length.wrapping_mul(MULTIPLE));
        //A 'binary search' on sizes reduces the number of comparisons.
        if data.len() > 8 {
            if data.len() > 16 {
                let tail = data.read_last_u64();
                let mut key: u64 = self.buffer;
                while data.len() > 8 {
                    let (val, rest) = data.read_u64();
                    key = self.ordered_update(val, key);
                    data = rest;
                }
                self.update(tail);
            } else {
                self.update(data.read_u64().0);
                self.update(data.read_last_u64());
            }
        } else {
            if data.len() >= 2 {
                if data.len() >= 4 {
                    let block: [u32; 2] = [data.read_u32().0, data.read_last_u32()];
                    self.update(block.convert());
                } else {
                    let block: [u16; 2] = [data.read_u16().0, data.read_last_u16()];
                    let val: u32 = block.convert();
                    self.update(val as u64);
                }
            } else {
                let value = if data.len() > 0 {
                    data[0] //len 1
                } else {
                    0
                };
                self.update(value as u64);
            }
        }
    }
    #[inline]
    fn finish(&self) -> u64 {
        //self.buffer.wrapping_mul(MULTIPLE).rotate_left(9).wrapping_mul(MULTIPLE)
        self.buffer
    }
}


#[cfg(test)]
mod tests {
    use crate::convert::Convert;
    use crate::fallback_hash::*;

    #[test]
    fn test_hash() {
        let mut hasher = AHasher::new_with_keys(0,0);
        let value: u64 = 1 << 32;
        hasher.update(value);
        let result = hasher.buffer;
        let mut hasher = AHasher::new_with_keys(0,0);
        let value2: u64 = 1;
        hasher.update(value2);
        let result2 = hasher.buffer;
        let result: [u8; 8] = result.convert();
        let result2: [u8; 8] = result2.convert();
        assert_ne!(hex::encode(result), hex::encode(result2));
    }

    #[test]
    fn test_conversion() {
        let input: &[u8] = "dddddddd".as_bytes();
        let bytes: u64 = as_array!(input, 8).convert();
        assert_eq!(bytes, 0x6464646464646464);
    }
}