1
0
Mirror von https://github.com/tkuschel/bees.git synchronisiert 2026-05-08 04:59:37 +02:00

hash: prepare for user-selectable hash functions

Localize the hash function in bees to a single spot to make it easier
to change later (or at runtime).

Remove some code that was using a property of CRC as an optimization.
The optimization doesn't work for other hash functions, and running the
CRC function takes more CPU time than the optimization saved.

Signed-off-by: Zygo Blaxell <bees@furryterror.org>
Dieser Commit ist enthalten in:
Zygo Blaxell
2018-10-15 21:56:13 -04:00
Ursprung b3a8fcb553
Commit 7117cb40c5
3 geänderte Dateien mit 11 neuen und 9 gelöschten Zeilen
+7
Datei anzeigen
@@ -1,5 +1,6 @@
#include "bees.h"
#include "crucible/city.h"
#include "crucible/crc64.h"
#include "crucible/string.h"
@@ -11,6 +12,12 @@
using namespace crucible;
using namespace std;
BeesHash::BeesHash(const uint8_t *ptr, size_t len) :
// m_hash(CityHash64(reinterpret_cast<const char *>(ptr), len))
m_hash(Digest::CRC::crc64(ptr, len))
{
}
ostream &
operator<<(ostream &os, const BeesHash &bh)
{
+3 -9
Datei anzeigen
@@ -1,6 +1,5 @@
#include "bees.h"
#include "crucible/crc64.h"
#include "crucible/limits.h"
#include "crucible/ntoa.h"
#include "crucible/string.h"
@@ -964,11 +963,7 @@ BeesBlockData::hash() const
// We can only dedup unaligned EOF blocks against other unaligned EOF blocks,
// so we do NOT round up to a full sum block size.
const Blob &blob = data();
// TODO: It turns out that file formats with 4K block
// alignment and embedded CRC64 do exist, and every block
// of such files has the same hash. Could use a subset
// of SHA1 here instead.
m_hash = Digest::CRC::crc64(blob.data(), blob.size());
m_hash = BeesHash(blob.data(), blob.size());
m_hash_done = true;
BEESCOUNT(block_hash);
}
@@ -980,9 +975,8 @@ bool
BeesBlockData::is_data_zero() const
{
// The CRC64 of zero is zero, so skip some work if we already know the CRC
if (m_hash_done && m_hash != 0) {
return false;
}
// ...but that doesn't work for any other hash function, and it
// saves us next to nothing.
// OK read block (maybe) and check every byte
for (auto c : data()) {
+1
Datei anzeigen
@@ -605,6 +605,7 @@ struct BeesHash {
BeesHash(Type that) : m_hash(that) { }
operator Type() const { return m_hash; }
BeesHash& operator=(const Type that) { m_hash = that; return *this; }
BeesHash(const uint8_t *ptr, size_t len);
private:
Type m_hash;