#include #include #include #include #ifdef unix #define __cdecl #else #include #endif // link bit-level I/O void arc_put1 (unsigned bit); unsigned arc_get1 (); // This code is adapted from Professor Vitter's // article, Design and Analysis of Dynamic Huffman Codes, // which appeared in JACM October 1987 // A design trade-off has been made to simplify the // code: a node's block is determined dynamically. // The dynamic huffman table weight ranking // is maintained per Professor Vitter's // invariants for algorithm FGK: // 1) leaves preceed internal nodes of the // same weight in a non-decreasing // ranking of weights; // 2) equal weight internal nodes are kept // ordered by their tree positions. // weights are incremented by 2: // leaves always have even weight values; // internal nodes always have odd values. // tree nodes (HuffSize is the alphabet size): // nodes >= HuffSize are internal nodes // nodes < HuffSize are leaf nodes // node 2 * HuffSize - 1 is always the tree root; // node HuffEsc is always the escape node; // the tree is initialized by creating an // escape node as the root. // each new leaf symbol is paired with a new escape // node into the previous escape node, until the // last symbol, which is incorporated into the // tree into the position of the escape node. // The orphaned escape node is left with its // up pointer set to zero, and its nxt // pointing at the last symbol. // table size: 2 * HuffSize // huff_init(alphabet_size) // huff_encode(next_symbol) // next_symbol = huff_decode() // huff_scale(by_bits) -- scale weights and rebalance tree typedef struct { unsigned parity, // 1:right link, 0:left link up, // the next node up the tree prev, // next <= weight node nxt, // next >= weight node dnl, // next node down-left dnr, // next node down-right weight; // node weight } HTable; typedef struct { unsigned esc, // the current tree height size; // the size of the alphabet HTable table[1]; // the coding table starts here } HCoder; HCoder *huff_init (unsigned size) { unsigned esc = 2 * size - 1; HCoder *huff; // create the initial escape node // at the tree root huff = malloc (esc * sizeof(HTable) + sizeof(HCoder)); memset (huff->table, 0, 2 * size * sizeof(HTable)); huff->table[esc].weight = 1; huff->table[esc].nxt = esc; huff->size = size; huff->esc = esc; return huff; } // split escape node to incorporate new symbol void huff_split (HCoder *huff, unsigned node) { unsigned nxt, pair; // the final symbol takes the escape node's left child // tree position, and the orphan escape node links to // the first symbol in the rank list (this last symbol) if( huff->esc == huff->size ) { nxt = huff->table[huff->esc].nxt; huff->table[huff->esc + 1].dnl = node; huff->table[huff->esc].nxt = node; huff->table[huff->esc].up = 0; huff->table[nxt].prev = node; huff->table[node].up = huff->esc + 1; huff->table[node].prev = huff->esc; huff->table[node].parity = 0; huff->table[node].weight = 0; huff->table[node].nxt = nxt; return; } // other symbols initialize a new escape node. // the old escape node is promoted to parent // the new escape node and the new symbol. // HuffEsc is always the current escape node; // promote the old escape node. pair = huff->esc--; huff->table[pair].dnl = huff->esc; huff->table[pair].prev = node; huff->table[pair].dnr = node; // create a new escape node. huff->table[huff->esc].weight = 1; huff->table[huff->esc].parity = 0; huff->table[huff->esc].nxt = node; huff->table[huff->esc].up = pair; // initialize the new symbol to the // right of the new escape node // and beneath the promoted // pair node at pair huff->table[node].prev = huff->esc; huff->table[node].parity = 1; huff->table[node].weight = 0; huff->table[node].nxt = pair; huff->table[node].up = pair; } // send the bits for an escaped symbol void huff_sendid (HCoder *huff, unsigned node) { unsigned empty = 0, max; // count the number of empty nodes // before the symbol in the table while( node-- ) if( !huff->table[node].weight ) empty++; // send LSB of this count first, using // as many bits as are required for // the maximum possible count if( max = huff->esc - huff->size ) do arc_put1 (empty & 1), empty >>= 1; while( max >>= 1 ); } // slide node to right over all leaves of equal weight // return node that follows unsigned huff_slide (HCoder *huff, unsigned node, unsigned idx) { unsigned prev = huff->table[node].prev; unsigned nxt = huff->table[idx].nxt; unsigned slide, up, parity; // find rightmost possible leaf to exchange with if( idx < huff->size ) while( huff->table[nxt].weight == huff->table[idx].weight ) idx = nxt, nxt = huff->table[idx].nxt; // swap the tree positions // of node and idx. up = huff->table[idx].up; if( parity = huff->table[idx].parity ) huff->table[up].dnr = node; else huff->table[up].dnl = node; huff->table[idx].parity = huff->table[node].parity; huff->table[idx].up = huff->table[node].up; huff->table[node].parity = parity; huff->table[node].up = up; up = huff->table[idx].up; if( huff->table[idx].parity ) huff->table[up].dnr = idx; else huff->table[up].dnl = idx; // exchange the ranking positions // of node and idx. slide = huff->table[idx].prev; // simple swap?? if( slide == node ) { huff->table[node].prev = idx; huff->table[idx].nxt = node; } else { huff->table[node].prev = slide; huff->table[slide].nxt = node; slide = huff->table[node].nxt; huff->table[slide].prev = idx; huff->table[idx].nxt = slide; } // we never swap the escape node nor // the root node -- they will always // have the smallest and largest // weights in the tree huff->table[prev].nxt = idx; huff->table[nxt].prev = node; huff->table[idx].prev = prev; return huff->table[node].nxt = nxt; } // increment node weights and re balance the tree. void huff_increment (HCoder *huff, unsigned node) { unsigned nxt, up = huff->table[node].up; // obviate swapping a parent with its child: // increment the leaf and proceed // directly to its parent. if( huff->table[node].nxt == up ) huff->table[node].weight += 2; else up = node; // slide right and go up until reaching the root while( node = up, huff->table[node].up ) { nxt = huff->table[node].nxt; // promote the node to group leader // position by sliding right over // any equal weight nodes while( huff->table[node].weight == huff->table[nxt].weight ) nxt = huff_slide (huff, node, nxt); // increase the weight of the node huff->table[node].weight += 2; // internal nodes go up from this // initial group leader position if( node > huff->size ) up = huff->table[node].up; // slide incremented node over smaller weights to its right while( huff->table[node].weight > huff->table[nxt].weight ) nxt = huff_slide (huff, node, nxt); // symbol nodes slide over first, // then go up from their // final positions if( node < huff->size ) up = huff->table[node].up; } // increase the root's weight huff->table[node].weight += 2; } // scale all weights and rebalance the tree // zero weight nodes are removed from the tree // by sliding them out the left of the rank list void huff_scale (HCoder *huff, unsigned bits) { unsigned node, nxt, prev; unsigned weight; // start from the node following the escape node // proceed across the nodes in weight order // scaling them by the value of bits // clear the current escape node weight to serve // as a stopper for newly zeroed weight leaves huff->table[huff->esc].weight = 0; nxt = huff->table[huff->esc].nxt; while( node = nxt, huff->table[node].up ) { nxt = huff->table[node].nxt; // recompute the weight of internal nodes // and slide out unused ones to the left // with a zero weight if( node > huff->size ) { if( weight = huff->table[huff->table[node].dnr].weight & ~1 ) weight += huff->table[huff->table[node].dnl].weight | 1; // remove zero weight leaves by incrementing HuffEsc // or, if table is full, re-splice escape node into tree // without the zero weight leaf } else if( !(weight = huff->table[node].weight >> bits & ~1) ) if( huff->table[huff->esc].up ) huff->esc++; else { huff->table[huff->esc + 1].dnl = huff->esc; huff->table[huff->esc].up = huff->esc + 1; huff->table[nxt].prev = huff->esc; huff->table[huff->esc].nxt = nxt; } // slide the scaled node back over any // previous nodes with larger weights huff->table[node].weight = weight; prev = huff->table[node].prev; while( weight < huff->table[prev].weight ) huff_slide (huff, prev, node), prev = huff->table[node].prev; } // reset the (possibly new) escape node's weight huff->table[huff->esc].weight = 1; } // encode the next symbol void huff_encode (HCoder *huff, unsigned node) { unsigned emit = 1, bit; unsigned up, idx; // transform illegal symbols to zero if( node >= huff->size ) node = 0; // for a new symbol, direct the receiver to the escape node if( huff->table[node].weight ) idx = node; else idx = huff->esc; // accumulate the code bits by // working up the tree from // the node to the root while( up = huff->table[idx].up ) emit <<= 1, emit |= huff->table[idx].parity, idx = up; // send the code, root selector bit first while( bit = emit & 1, emit >>= 1 ) arc_put1 (bit); // send identification and incorporate // new symbols into the tree if( !huff->table[node].weight ) huff_sendid(huff, node), huff_split(huff, node); // adjust and re-balance the tree huff_increment (huff, node); } // read the identification bits // for an escaped symbol unsigned huff_readid (HCoder *huff) { unsigned empty = 0, bit = 1, node, max; // receive the code, LSB first, reading // only the number of bits necessary to // transmit the maximum possible value if( max = huff->esc - huff->size ) do empty |= arc_get1 () ? bit : 0, bit <<= 1; while( max >>= 1 ); // the count is of zero weight // symbols in the table before // the zero weight new symbol for( node = 0; node < huff->size; node++ ) if( !huff->table[node].weight ) if( !empty-- ) return node; // oops! our count was too big! return 0; } // decode the next symbol unsigned huff_decode (HCoder *huff) { unsigned node = 2 * huff->size - 1; // work down the tree from the root // until reaching either a leaf // or the escape node while( node > huff->esc ) if( arc_get1 () ) node = huff->table[node].dnr; else node = huff->table[node].dnl; // sent to the escape node??? if( node == huff->esc ) node = huff_readid (huff), huff_split (huff, node); // increment weights and rebalance // the coding tree huff_increment (huff, node); return node; } #ifdef HUFFSTANDALONE #include FILE *In = stdin, *Out = stdout; unsigned char ArcBit = 0; int ArcChar = 0; int main (int argc, char **argv) { int mode, size, nxt; unsigned mask = ~0; HCoder *huff; if( argc > 1 ) mode = argv[1][0], argv[1]++; else { printf ("Usage: %s [cdtls]nn infile outfile\nnn -- alphabet size\ninfile -- source file\noutfile -- output file", argv[0]); return 1; } if( argv[1][0] == 's' ) argv[1]++, mask = 8191; if( argc > 3 ) if( !(Out = fopen (argv[3], "w")) ) return 1; #ifndef unix _setmode (_fileno (Out), _O_BINARY); #endif // literal text if( mode == 'l' ) { if( !(size = atoi (argv[1])) ) size = 256; huff = huff_init (size); putc (size >> 8, Out); putc (size, Out); size = strlen (argv[2]); putc (size >> 16, Out); putc (size >> 8, Out); putc (size, Out); while( nxt = *argv[2]++ ) huff_encode(huff, nxt); while( ArcBit ) // flush last few bits arc_put1 (0); return 0; } // alphabet fill if( mode == 't' ) { if( !(size = atoi (argv[1])) ) size = 256; huff = huff_init (size); putc (size >> 8, Out); putc (size, Out); putc (size >> 16, Out); putc (size >> 8, Out); putc (size, Out); for( nxt = 0; nxt < size; nxt++ ) huff_encode(huff, nxt); while( ArcBit ) // flush last few bits arc_put1 (0); return 0; } if( argc > 2 ) if( !(In = fopen (argv[2], "r")) ) return 1; #ifndef unix _setmode (_fileno (In), _O_BINARY); #endif // decompression if( mode == 'd' ) { size = getc(In) << 8; size |= getc(In); huff = huff_init (size); size = getc(In) << 16; size |= getc(In) << 8; size |= getc(In); while( size ) if( nxt = huff_decode(huff), putc (nxt, Out), size-- & mask ) continue; else huff_scale(huff, 1); return 0; } // compression if( !(size = atoi (argv[1])) ) size = 256; huff = huff_init (size); putc (size >> 8, Out); putc (size, Out); fseek(In, 0, 2); size = ftell(In); fseek (In, 0, 0); putc (size >> 16, Out); putc (size >> 8, Out); putc (size, Out); while( size ) if( nxt = getc(In), huff_encode(huff, nxt), size-- & mask ) continue; else huff_scale(huff, 1); while( ArcBit ) // flush last few bits arc_put1 (0); return 0; } void arc_put1 (unsigned bit) { ArcChar <<= 1; if( bit ) ArcChar |= 1; if( ++ArcBit < 8 ) return; putc (ArcChar, Out); ArcChar = ArcBit = 0; } unsigned arc_get1 () { if( !ArcBit ) ArcChar = getc (In), ArcBit = 8; return ArcChar >> --ArcBit & 1; } #endif