diff --git a/libfsst.cpp b/libfsst.cpp index 919cb9c..a984238 100644 --- a/libfsst.cpp +++ b/libfsst.cpp @@ -23,7 +23,7 @@ Symbol concat(Symbol a, Symbol b) { u32 length = a.length()+b.length(); if (length > Symbol::maxLength) length = Symbol::maxLength; s.set_code_len(FSST_CODE_MASK, length); - s.val.num = (b.val.num << (8*a.length())) | a.val.num; + s.store_num((b.load_num() << (8*a.length())) | a.load_num()); return s; } } // namespace libfsst @@ -33,7 +33,7 @@ template <> class hash { public: size_t operator()(const libfsst::QSymbol& q) const { - uint64_t k = q.symbol.val.num; + uint64_t k = q.symbol.load_num(); const uint64_t m = 0xc6a4a7935bd1e995; const int r = 47; uint64_t h = 0x8445d61a4e774912 ^ (8*m); @@ -125,7 +125,7 @@ SymbolTable *buildSymbolTable(Counters& counters, vector line, const Symbol s = st->hashTab[idx]; code2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK; word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl); - if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) { + if ((s.icl < FSST_ICL_FREE) & (s.load_num() == word)) { code2 = s.code(); cur += s.length(); } else if (code2 >= FSST_CODE_BASE) { @@ -205,7 +205,7 @@ SymbolTable *buildSymbolTable(Counters& counters, vector line, const } // insert candidates into priority queue (by gain) - auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.val.num > q2.symbol.val.num); }; + auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.load_num() > q2.symbol.load_num()); }; priority_queue,decltype(cmpGn)> pq(cmpGn); for (auto& q : cands) pq.push(q); @@ -337,7 +337,7 @@ static inline size_t compressSIMD(SymbolTable &symbolTable, u8* symbolBase, size Symbol s = symbolTable.hashTab[idx]; out[1] = (u8) word; // speculatively write out escaped byte word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl); - if ((s.icl < FSST_ICL_FREE) && s.val.num == word) { + if ((s.icl < FSST_ICL_FREE) && s.load_num() == word) { *out++ = (u8) s.code(); cur += s.length(); } else { // could be a 2-byte or 1-byte code, or miss @@ -398,7 +398,7 @@ static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, const Symbol s = symbolTable.hashTab[idx]; out[1] = (u8) word; // speculatively write out escaped byte word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl); - if ((s.icl < FSST_ICL_FREE) && s.val.num == word) { + if ((s.icl < FSST_ICL_FREE) && s.load_num() == word) { *out++ = (u8) s.code(); cur += s.length(); } else if (avoidBranch) { // could be a 2-byte or 1-byte code, or miss @@ -535,6 +535,8 @@ extern "C" u32 fsst_export(fsst_encoder_t *encoder, u8 *buf) { (((u64) e->symbolTable->nSymbols) << 8) | FSST_ENDIAN_MARKER; // least significant byte is nonzero + version = swap64_if_be(version); // ensure version is little-endian encoded + /* do not assume unaligned reads here */ memcpy(buf, &version, 8); buf[8] = e->symbolTable->zeroTerminated; @@ -559,6 +561,8 @@ extern "C" u32 fsst_import(fsst_decoder_t *decoder, u8 const *buf) { // version field (first 8 bytes) is now there just for future-proofness, unused still (skipped) memcpy(&version, buf, 8); + version = swap64_if_be(version); // version is always little-endian encoded + if ((version>>32) != FSST_VERSION) return 0; decoder->zeroTerminated = buf[8]&1; memcpy(lenHisto, buf+9, 8); diff --git a/libfsst.hpp b/libfsst.hpp index a29a4af..6e43516 100644 --- a/libfsst.hpp +++ b/libfsst.hpp @@ -60,10 +60,18 @@ typedef uint64_t u64; #define FSST_CODE_MASK (FSST_CODE_MAX-1UL) /* all bits set: indicating a symbol that has not been assigned a code yet */ namespace libfsst { +constexpr inline uint64_t swap64_if_be(uint64_t v) noexcept { +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return __builtin_bswap64(v); +#else + return v; // little-endian (or unknown), so no swap needed +#endif +} + inline uint64_t fsst_unaligned_load(u8 const* V) { uint64_t Ret; memcpy(&Ret, V, sizeof(uint64_t)); // compiler will generate efficient code (unaligned load, where possible) - return Ret; + return swap64_if_be(Ret); } struct Symbol { @@ -77,7 +85,7 @@ struct Symbol { Symbol() : icl(0) { val.num = 0; } - explicit Symbol(u8 c, u16 code) : icl((1<<28)|(code<<16)|56) { val.num = c; } // single-char symbol + explicit Symbol(u8 c, u16 code) : icl((1<<28)|(code<<16)|56) { store_num(c); } // single-char symbol explicit Symbol(const char* begin, const char* end) : Symbol(begin, (u32) (end-begin)) {} explicit Symbol(const u8* begin, const u8* end) : Symbol((const char*)begin, (u32) (end-begin)) {} explicit Symbol(const char* input, u32 len) { @@ -92,18 +100,21 @@ struct Symbol { } void set_code_len(u32 code, u32 len) { icl = (len<<28)|(code<<16)|((8-len)*8); } + u64 load_num() const { return swap64_if_be(val.num); } + void store_num(u64 v) { val.num = swap64_if_be(v); } + u32 length() const { return (u32) (icl >> 28); } u16 code() const { return (icl >> 16) & FSST_CODE_MASK; } u32 ignoredBits() const { return (u32) icl; } - u8 first() const { assert( length() >= 1); return 0xFF & val.num; } - u16 first2() const { assert( length() >= 2); return 0xFFFF & val.num; } + u8 first() const { assert( length() >= 1); return 0xFF & load_num(); } + u16 first2() const { assert( length() >= 2); return 0xFFFF & load_num(); } #define FSST_HASH_LOG2SIZE 10 #define FSST_HASH_PRIME 2971215073LL #define FSST_SHIFT 15 #define FSST_HASH(w) (((w)*FSST_HASH_PRIME)^(((w)*FSST_HASH_PRIME)>>FSST_SHIFT)) - size_t hash() const { size_t v = 0xFFFFFF & val.num; return FSST_HASH(v); } // hash on the next 3 bytes + size_t hash() const { size_t v = 0xFFFFFF & load_num(); return FSST_HASH(v); } // hash on the next 3 bytes }; // Symbol that can be put in a queue, ordered on gain @@ -218,7 +229,7 @@ struct SymbolTable { bool taken = (hashTab[idx].icl < FSST_ICL_FREE); if (taken) return false; // collision in hash table hashTab[idx].icl = s.icl; - hashTab[idx].val.num = s.val.num & (0xFFFFFFFFFFFFFFFF >> (u8) s.icl); + hashTab[idx].store_num(s.load_num() & (0xFFFFFFFFFFFFFFFF >> (u8) s.icl)); return true; } bool add(Symbol s) { @@ -239,7 +250,7 @@ struct SymbolTable { /// Find longest expansion, return code (= position in symbol table) u16 findLongestSymbol(Symbol s) const { size_t idx = s.hash() & (hashTabSize-1); - if (hashTab[idx].icl <= s.icl && hashTab[idx].val.num == (s.val.num & (0xFFFFFFFFFFFFFFFF >> ((u8) hashTab[idx].icl)))) { + if (hashTab[idx].icl <= s.icl && hashTab[idx].load_num() == (s.load_num() & (0xFFFFFFFFFFFFFFFF >> ((u8) hashTab[idx].icl)))) { return (hashTab[idx].icl>>16) & FSST_CODE_MASK; // matched a long symbol } if (s.length() >= 2) {