Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.
38 changes: 34 additions & 4 deletions src/zopfli/blocksplitter.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ Author: [email protected] (Jyrki Alakuijala)
#include <stdlib.h>

#include "deflate.h"
#include "squeeze.h"
#include "tree.h"
#include "util.h"

Expand Down Expand Up @@ -62,7 +61,7 @@ static size_t FindMinimum(FindMinimumFun f, void* context,
size_t p[NUM];
double vp[NUM];
size_t besti;
double best;
double best = ZOPFLI_LARGE_FLOAT;
double lastbest = ZOPFLI_LARGE_FLOAT;
size_t pos = start;

Expand All @@ -71,6 +70,10 @@ static size_t FindMinimum(FindMinimumFun f, void* context,

for (i = 0; i < NUM; i++) {
p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
if(pos == p[i]){
vp[i] = best;
continue;
}
vp[i] = f(p[i], context);
}
besti = 0;
Expand Down Expand Up @@ -274,7 +277,7 @@ void ZopfliBlockSplitLZ77(const ZopfliOptions* options,

void ZopfliBlockSplit(const ZopfliOptions* options,
const unsigned char* in, size_t instart, size_t inend,
size_t maxblocks, size_t** splitpoints, size_t* npoints) {
size_t maxblocks, size_t** splitpoints, size_t* npoints, SymbolStats** stats) {
size_t pos = 0;
size_t i;
ZopfliBlockState s;
Expand All @@ -298,19 +301,46 @@ void ZopfliBlockSplit(const ZopfliOptions* options,
ZopfliBlockSplitLZ77(options,
&store, maxblocks,
&lz77splitpoints, &nlz77points);
(*stats) = (SymbolStats*)realloc(*stats, (nlz77points + 1) * sizeof(SymbolStats));

/* Convert LZ77 positions to positions in the uncompressed input. */
pos = instart;
if (nlz77points > 0) {
for (i = 0; i < store.size; i++) {
size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
if (lz77splitpoints[*npoints] == i) {
if (lz77splitpoints[(*npoints)] == i) {
size_t temp = store.size;
size_t shift = (*npoints) ? lz77splitpoints[*npoints - 1] : 0;
store.size = i - shift;
store.dists += shift;
store.litlens += shift;

InitStats(&((*stats)[*npoints]));
GetStatistics(&store, &((*stats)[*npoints]));
store.size = temp;
store.dists -= shift;
store.litlens -= shift;
ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
if (*npoints == nlz77points) break;
}
pos += length;
}
size_t shift = lz77splitpoints[*npoints - 1];
store.size -= shift;
store.dists += shift;
store.litlens += shift;

InitStats(&((*stats)[*npoints]));
GetStatistics(&store, &((*stats)[*npoints]));
store.size += shift;
store.dists -= shift;
store.litlens -= shift;
}
else{
InitStats(*stats);
GetStatistics(&store, *stats);
}

assert(*npoints == nlz77points);

free(lz77splitpoints);
Expand Down
3 changes: 2 additions & 1 deletion src/zopfli/blocksplitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ ones that enhance it.
#include <stdlib.h>

#include "lz77.h"
#include "squeeze.h"
#include "zopfli.h"


Expand Down Expand Up @@ -59,7 +60,7 @@ npoints: pointer to amount of splitpoints, for the dynamic array. The amount of
*/
void ZopfliBlockSplit(const ZopfliOptions* options,
const unsigned char* in, size_t instart, size_t inend,
size_t maxblocks, size_t** splitpoints, size_t* npoints);
size_t maxblocks, size_t** splitpoints, size_t* npoints, SymbolStats** stats);

/*
Divides the input into equal blocks, does not even take LZ77 lengths into
Expand Down
11 changes: 9 additions & 2 deletions src/zopfli/deflate.c
Original file line number Diff line number Diff line change
Expand Up @@ -813,13 +813,16 @@ void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final,
unsigned char* bp, unsigned char** out,
size_t* outsize) {
size_t i;
SymbolStats* stats;

/* byte coordinates rather than lz77 index */
size_t* splitpoints_uncompressed = 0;
size_t npoints = 0;
size_t* splitpoints = 0;
double totalcost = 0;
ZopfliLZ77Store lz77;

stats = 0;
/* If btype=2 is specified, it tries all block types. If a lesser btype is
given, then however it forces that one. Neither of the lesser types needs
block splitting as they have no dynamic huffman trees. */
Expand All @@ -845,7 +848,7 @@ void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final,
if (options->blocksplitting) {
ZopfliBlockSplit(options, in, instart, inend,
options->blocksplittingmax,
&splitpoints_uncompressed, &npoints);
&splitpoints_uncompressed, &npoints, &stats);
splitpoints = (size_t*)malloc(sizeof(*splitpoints) * npoints);
}

Expand All @@ -858,7 +861,7 @@ void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final,
ZopfliLZ77Store store;
ZopfliInitLZ77Store(in, &store);
ZopfliInitBlockState(options, start, end, 1, &s);
ZopfliLZ77Optimal(&s, in, start, end, options->numiterations, &store);
ZopfliLZ77Optimal(&s, in, start, end, options->numiterations, &store, stats ? &stats[i] : 0);
totalcost += ZopfliCalculateBlockSizeAutoType(&store, 0, store.size);

ZopfliAppendLZ77Store(&store, &lz77);
Expand All @@ -868,6 +871,10 @@ void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final,
ZopfliCleanLZ77Store(&store);
}

if(stats){
free(stats);
}

/* Second block splitting attempt */
if (options->blocksplitting && npoints > 1) {
size_t* splitpoints2 = 0;
Expand Down
155 changes: 90 additions & 65 deletions src/zopfli/katajainen.c
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,20 @@ Bounded package merge algorithm, based on the paper
Jyrki Katajainen, Alistair Moffat, Andrew Turpin".
*/

#ifdef __cplusplus
#include <algorithm>
extern "C" {
#endif

#include "katajainen.h"
#include <assert.h>
#include <stdlib.h>
#include <limits.h>

#ifdef __cplusplus
}
#endif

typedef struct Node Node;

/*
Expand All @@ -39,13 +48,6 @@ struct Node {
int count; /* Leaf symbol index, or number of leaves before this chain. */
};

/*
Memory pool for nodes.
*/
typedef struct NodePool {
Node* next; /* Pointer to a free node in the pool. */
} NodePool;

/*
Initializes a chain node with the given values and marks it as in use.
*/
Expand All @@ -55,64 +57,18 @@ static void InitNode(size_t weight, int count, Node* tail, Node* node) {
node->tail = tail;
}

/*
Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
new chain is, depending on the weights, a leaf or a combination of two chains
from the previous list.
lists: The lists of chains.
maxbits: Number of lists.
leaves: The leaves, one per symbol.
numsymbols: Number of leaves.
pool: the node memory pool.
index: The index of the list in which a new chain or leaf is required.
*/
static void BoundaryPM(Node* (*lists)[2], Node* leaves, int numsymbols,
NodePool* pool, int index) {
Node* newchain;
Node* oldchain;
int lastcount = lists[index][1]->count; /* Count of last chain of list. */

if (index == 0 && lastcount >= numsymbols) return;

newchain = pool->next++;
oldchain = lists[index][1];

/* These are set up before the recursive calls below, so that there is a list
pointing to the new node, to let the garbage collection know it's in use. */
lists[index][0] = oldchain;
lists[index][1] = newchain;

if (index == 0) {
/* New leaf node in list 0. */
InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain);
} else {
size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;
if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
/* New leaf inserted in list, so count is incremented. */
InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail,
newchain);
} else {
InitNode(sum, lastcount, lists[index - 1][1], newchain);
/* Two lookahead chains of previous list used up, create new ones. */
BoundaryPM(lists, leaves, numsymbols, pool, index - 1);
BoundaryPM(lists, leaves, numsymbols, pool, index - 1);
}
}
}

static void BoundaryPMFinal(Node* (*lists)[2],
Node* leaves, int numsymbols, NodePool* pool, int index) {
Node* leaves, int numsymbols, Node* pool, int index) {
int lastcount = lists[index][1]->count; /* Count of last chain of list. */

size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;

if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
Node* newchain = pool->next;
Node* oldchain = lists[index][1]->tail;

lists[index][1] = newchain;
newchain->count = lastcount + 1;
newchain->tail = oldchain;
lists[index][1] = pool;
pool->count = lastcount + 1;
pool->tail = oldchain;
} else {
lists[index][1]->tail = lists[index - 1][1];
}
Expand All @@ -123,10 +79,10 @@ Initializes each list with as lookahead chains the two leaves with lowest
weights.
*/
static void InitLists(
NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) {
Node* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) {
int i;
Node* node0 = pool->next++;
Node* node1 = pool->next++;
Node* node0 = pool;
Node* node1 = pool + 1;
InitNode(leaves[0].weight, 1, 0, node0);
InitNode(leaves[1].weight, 2, 0, node1);
for (i = 0; i < maxbits; i++) {
Expand Down Expand Up @@ -162,20 +118,32 @@ static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) {
}
}

#ifndef __cplusplus
/*
Comparator for sorting the leaves. Has the function signature for qsort.
*/
static int LeafComparator(const void* a, const void* b) {
return ((const Node*)a)->weight - ((const Node*)b)->weight;
}
#else
struct {
bool operator()(const Node a, const Node b) {
return (a.weight < b.weight);
}
} cmp;
#endif

#ifdef __cplusplus
extern "C"
#endif
int ZopfliLengthLimitedCodeLengths(
const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) {
NodePool pool;
Node* pool;
int i;
int numsymbols = 0; /* Amount of symbols with frequency > 0. */
int numBoundaryPMRuns;
Node* nodes;
unsigned char stack[16];

/* Array of lists of chains. Each list requires only two lookahead chains at
a time, so each list is a array of two Node*'s. */
Expand Down Expand Up @@ -229,7 +197,11 @@ int ZopfliLengthLimitedCodeLengths(
}
leaves[i].weight = (leaves[i].weight << 9) | leaves[i].count;
}
#ifdef __cplusplus
std::sort(leaves, leaves + numsymbols, cmp);
#else
qsort(leaves, numsymbols, sizeof(Node), LeafComparator);
#endif
for (i = 0; i < numsymbols; i++) {
leaves[i].weight >>= 9;
}
Expand All @@ -240,18 +212,71 @@ int ZopfliLengthLimitedCodeLengths(

/* Initialize node memory pool. */
nodes = (Node*)malloc(maxbits * 2 * numsymbols * sizeof(Node));
pool.next = nodes;
pool = nodes;

lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists));
InitLists(&pool, leaves, maxbits, lists);
InitLists(pool, leaves, maxbits, lists);
pool += 2;

/* In the last list, 2 * numsymbols - 2 active chains need to be created. Two
are already created in the initialization. Each BoundaryPM run creates one. */
numBoundaryPMRuns = 2 * numsymbols - 4;
for (i = 0; i < numBoundaryPMRuns - 1; i++) {
BoundaryPM(lists, leaves, numsymbols, &pool, maxbits - 1);
/*
Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
new chain is, depending on the weights, a leaf or a combination of two chains
from the previous list.
*/
unsigned stackpos;
stack[0] = maxbits - 1;

for (stackpos = 0; ;) {
unsigned char index = stack[stackpos];

int lastcount = lists[index][1]->count; /* Count of last chain of list. */

Node* newchain = pool++;
Node* oldchain = lists[index][1];
size_t sum;

/* These are set up before the recursive calls below, so that there is a list
pointing to the new node, to let the garbage collection know it's in use. */
lists[index][0] = oldchain;
lists[index][1] = newchain;

sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;

if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
/* New leaf inserted in list, so count is incremented. */
InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail, newchain);
} else {
InitNode(sum, lastcount, lists[index - 1][1], newchain);
/* Two lookahead chains of previous list used up, create new ones. */
if (index == 1) {
if (lists[0][1]->count < numsymbols) {
lastcount = lists[0][1]->count;
lists[0][0] = lists[0][1];
lists[0][1] = pool++;
InitNode(leaves[lastcount].weight, lastcount + 1, 0, lists[0][1]);
lastcount++;
if(lastcount < numsymbols){
lists[0][0] = lists[0][1];
lists[0][1] = pool++;
InitNode(leaves[lastcount].weight, lastcount + 1, 0, lists[0][1]);
}
}
}
else {
stack[stackpos++] = index - 1;
stack[stackpos++] = index - 1;
}
}
if (!stackpos--) {
break;
}
}
}
BoundaryPMFinal(lists, leaves, numsymbols, &pool, maxbits - 1);
BoundaryPMFinal(lists, leaves, numsymbols, pool, maxbits - 1);

ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths);

Expand Down
3 changes: 3 additions & 0 deletions src/zopfli/katajainen.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ maxbits: Maximum bit length, inclusive.
bitlengths: Output, the bitlengths for the symbol prefix codes.
return: 0 for OK, non-0 for error.
*/
#ifdef __cplusplus
extern "C"
#endif
int ZopfliLengthLimitedCodeLengths(
const size_t* frequencies, int n, int maxbits, unsigned* bitlengths);

Expand Down
Loading