root/xdelta30r/xdelta3-fgk.h

Revision 31, 21.6 kB (checked in by nlawren2, 1 year ago)

Moved xdelta30r

  • Property svn:executable set to *
Line 
1 /* xdelta 3 - delta compression tools and library
2  * Copyright (C) 2002, 2006, 2007.  Joshua P. MacDonald
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  */
18
19 /* For demonstration purposes only.
20  */
21
22 #ifndef _XDELTA3_FGK_h_
23 #define _XDELTA3_FGK_h_
24
25 /* An implementation of the FGK algorithm described by D.E. Knuth in "Dynamic Huffman
26  * Coding" in Journal of Algorithms 6. */
27
28 /* A 32bit counter (fgk_weight) is used as the frequency counter for nodes in the huffman
29  * tree.  TODO: Need to test for overflow and/or reset stats. */
30
31 typedef struct _fgk_stream fgk_stream;
32 typedef struct _fgk_node   fgk_node;
33 typedef struct _fgk_block  fgk_block;
34 typedef unsigned int       fgk_bit;
35 typedef uint32_t           fgk_weight;
36
37 struct _fgk_block {
38   union {
39     fgk_node  *un_leader;
40     fgk_block *un_freeptr;
41   } un;
42 };
43
44 #define block_leader  un.un_leader
45 #define block_freeptr un.un_freeptr
46
47 /* The code can also support fixed huffman encoding/decoding. */
48 #define IS_ADAPTIVE 1
49
50 /* weight is a count of the number of times this element has been seen in the current
51  * encoding/decoding.  parent, right_child, and left_child are pointers defining the tree
52  * structure.  right and left point to neighbors in an ordered sequence of
53  * weights.  The left child of a node is always guaranteed to have weight not greater than
54  * its sibling.  fgk_blockLeader points to the element with the same weight as itself which is
55  * closest to the next increasing weight block.  */
56 struct _fgk_node
57 {
58   fgk_weight  weight;
59   fgk_node   *parent;
60   fgk_node   *left_child;
61   fgk_node   *right_child;
62   fgk_node   *left;
63   fgk_node   *right;
64   fgk_block  *my_block;
65 };
66
67 /* alphabet_size is the a count of the number of possible leaves in the huffman tree.  The
68  * number of total nodes counting internal nodes is ((2 * alphabet_size) - 1).
69  * zero_freq_count is the number of elements remaining which have zero frequency.
70  * zero_freq_exp and zero_freq_rem satisfy the equation zero_freq_count = 2^zero_freq_exp +
71  * zero_freq_rem.  root_node is the root of the tree, which is initialized to a node with
72  * zero frequency and contains the 0th such element.  free_node contains a pointer to the
73  * next available fgk_node space.  alphabet contains all the elements and is indexed by N.
74  * remaining_zeros points to the head of the list of zeros.  */
75 struct _fgk_stream
76 {
77   int alphabet_size;
78   int zero_freq_count;
79   int zero_freq_exp;
80   int zero_freq_rem;
81   int coded_depth;
82
83   int total_nodes;
84   int total_blocks;
85
86   fgk_bit *coded_bits;
87
88   fgk_block *block_array;
89   fgk_block *free_block;
90
91   fgk_node *decode_ptr;
92   fgk_node *remaining_zeros;
93   fgk_node *alphabet;
94   fgk_node *root_node;
95   fgk_node *free_node;
96 };
97
98 /*********************************************************************/
99 /*                             Encoder                               */
100 /*********************************************************************/
101
102 static fgk_stream*     fgk_alloc           (xd3_stream *stream /*, int alphabet_size */);
103 static void            fgk_init            (fgk_stream *h);
104 static int             fgk_encode_data     (fgk_stream *h,
105                                             int         n);
106 static INLINE fgk_bit  fgk_get_encoded_bit (fgk_stream *h);
107
108 static int             xd3_encode_fgk      (xd3_stream  *stream,
109                                             fgk_stream  *sec_stream,
110                                             xd3_output  *input,
111                                             xd3_output  *output,
112                                             xd3_sec_cfg *cfg);
113
114 /*********************************************************************/
115 /*                             Decoder                               */
116 /*********************************************************************/
117
118 static INLINE int      fgk_decode_bit      (fgk_stream *h,
119                                             fgk_bit     b);
120 static int             fgk_decode_data     (fgk_stream *h);
121 static void            fgk_destroy         (xd3_stream *stream,
122                                             fgk_stream *h);
123
124 static int             xd3_decode_fgk      (xd3_stream     *stream,
125                                             fgk_stream     *sec_stream,
126                                             const uint8_t **input,
127                                             const uint8_t  *const input_end,
128                                             uint8_t       **output,
129                                             const uint8_t  *const output_end);
130
131 /*********************************************************************/
132 /*                             Private                               */
133 /*********************************************************************/
134
135 static unsigned int fgk_find_nth_zero        (fgk_stream *h, int n);
136 static int          fgk_nth_zero             (fgk_stream *h, int n);
137 static void         fgk_update_tree          (fgk_stream *h, int n);
138 static fgk_node*    fgk_increase_zero_weight (fgk_stream *h, int n);
139 static void         fgk_eliminate_zero       (fgk_stream* h, fgk_node *node);
140 static void         fgk_move_right           (fgk_stream *h, fgk_node *node);
141 static void         fgk_promote              (fgk_stream *h, fgk_node *node);
142 static void         fgk_init_node            (fgk_node *node, int i, int size);
143 static fgk_block*   fgk_make_block           (fgk_stream *h, fgk_node *l);
144 static void         fgk_free_block           (fgk_stream *h, fgk_block *b);
145 static void         fgk_factor_remaining     (fgk_stream *h);
146 static INLINE void  fgk_swap_ptrs            (fgk_node **one, fgk_node **two);
147
148 /*********************************************************************/
149 /*                          Basic Routines                           */
150 /*********************************************************************/
151
152 /* returns an initialized huffman encoder for an alphabet with the
153  * given size.  returns NULL if enough memory cannot be allocated */
154 static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */)
155 {
156   int alphabet_size0 = ALPHABET_SIZE;
157   fgk_stream *h;
158
159   if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL)
160     {
161       return NULL;
162     }
163
164   h->total_nodes  = (2 * alphabet_size0) - 1;
165   h->total_blocks = (2 * h->total_nodes);
166   h->alphabet     = (fgk_node*)  xd3_alloc (stream, h->total_nodes,    sizeof (fgk_node));
167   h->block_array  = (fgk_block*) xd3_alloc (stream, h->total_blocks,   sizeof (fgk_block));
168   h->coded_bits   = (fgk_bit*)   xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit));
169
170   if (h->coded_bits  == NULL ||
171       h->alphabet    == NULL ||
172       h->block_array == NULL)
173     {
174       fgk_destroy (stream, h);
175       return NULL;
176     }
177
178   h->alphabet_size   = alphabet_size0;
179
180   return h;
181 }
182
183 static void fgk_init (fgk_stream *h)
184 {
185   int i;
186
187   h->root_node       = h->alphabet;
188   h->decode_ptr      = h->root_node;
189   h->free_node       = h->alphabet + h->alphabet_size;
190   h->remaining_zeros = h->alphabet;
191   h->coded_depth     = 0;
192   h->zero_freq_count = h->alphabet_size + 2;
193
194   /* after two calls to factor_remaining, zero_freq_count == alphabet_size */
195   fgk_factor_remaining(h); /* set ZFE and ZFR */
196   fgk_factor_remaining(h); /* set ZFDB according to prev state */
197
198   IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes));
199
200   for (i = 0; i < h->total_blocks-1; i += 1)
201     {
202       h->block_array[i].block_freeptr = &h->block_array[i + 1];
203     }
204
205   h->block_array[h->total_blocks - 1].block_freeptr = NULL;
206   h->free_block = h->block_array;
207
208   /* Zero frequency nodes are inserted in the first alphabet_size
209    * positions, with Value, weight, and a pointer to the next zero
210    * frequency node.  */
211   for (i = h->alphabet_size - 1; i >= 0; i -= 1)
212     {
213       fgk_init_node (h->alphabet + i, i, h->alphabet_size);
214     }
215 }
216
217 static void fgk_swap_ptrs(fgk_node **one, fgk_node **two)
218 {
219   fgk_node *tmp = *one;
220   *one = *two;
221   *two = tmp;
222 }
223
224 /* Takes huffman transmitter h and n, the nth elt in the alphabet, and
225  * returns the number of required to encode n. */
226 static int fgk_encode_data (fgk_stream* h, int n)
227 {
228   fgk_node *target_ptr = h->alphabet + n;
229
230   XD3_ASSERT (n < h->alphabet_size);
231
232   h->coded_depth = 0;
233
234   /* First encode the binary representation of the nth remaining
235    * zero frequency element in reverse such that bit, which will be
236    * encoded from h->coded_depth down to 0 will arrive in increasing
237    * order following the tree path.  If there is only one left, it
238    * is not neccesary to encode these bits. */
239   if (IS_ADAPTIVE && target_ptr->weight == 0)
240     {
241       unsigned int where, shift;
242       int bits;
243
244       where = fgk_find_nth_zero(h, n);
245       shift = 1;
246
247       if (h->zero_freq_rem == 0)
248         {
249           bits = h->zero_freq_exp;
250         }
251       else
252         {
253           bits = h->zero_freq_exp + 1;
254         }
255
256       while (bits > 0)
257         {
258           h->coded_bits[h->coded_depth++] = (shift & where) && 1;
259
260           bits   -= 1;
261           shift <<= 1;
262         };
263
264       target_ptr = h->remaining_zeros;
265     }
266
267   /* The path from root to node is filled into coded_bits in reverse so
268    * that it is encoded in the right order */
269   while (target_ptr != h->root_node)
270     {
271       h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr);
272
273       target_ptr = target_ptr->parent;
274     }
275
276   if (IS_ADAPTIVE)
277     {
278       fgk_update_tree(h, n);
279     }
280
281   return h->coded_depth;
282 }
283
284 /* Should be called as many times as fgk_encode_data returns.
285  */
286 static INLINE fgk_bit fgk_get_encoded_bit (fgk_stream *h)
287 {
288   XD3_ASSERT (h->coded_depth > 0);
289
290   return h->coded_bits[--h->coded_depth];
291 }
292
293 /* This procedure updates the tree after alphabet[n] has been encoded
294  * or decoded.
295  */
296 static void fgk_update_tree (fgk_stream *h, int n)
297 {
298   fgk_node *incr_node;
299
300   if (h->alphabet[n].weight == 0)
301     {
302       incr_node = fgk_increase_zero_weight (h, n);
303     }
304   else
305     {
306       incr_node = h->alphabet + n;
307     }
308
309   while (incr_node != h->root_node)
310     {
311       fgk_move_right (h, incr_node);
312       fgk_promote    (h, incr_node);
313       incr_node->weight += 1;   /* incr the parent */
314       incr_node = incr_node->parent; /* repeat */
315     }
316
317   h->root_node->weight += 1;
318 }
319
320 static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd)
321 {
322   fgk_node **fwd_par_ptr, **back_par_ptr;
323   fgk_node *move_back, *tmp;
324
325   move_back = move_fwd->my_block->block_leader;
326
327   if (move_fwd         == move_back ||
328       move_fwd->parent == move_back ||
329       move_fwd->weight == 0)
330     {
331       return;
332     }
333
334   move_back->right->left = move_fwd;
335
336   if (move_fwd->left)
337     {
338       move_fwd->left->right = move_back;
339     }
340
341   tmp = move_fwd->right;
342   move_fwd->right = move_back->right;
343
344   if (tmp == move_back)
345     {
346       move_back->right = move_fwd;
347     }
348   else
349     {
350       tmp->left = move_back;
351       move_back->right = tmp;
352     }
353
354   tmp = move_back->left;
355   move_back->left = move_fwd->left;
356
357   if (tmp == move_fwd)
358     {
359       move_fwd->left = move_back;
360     }
361   else
362     {
363       tmp->right = move_fwd;
364       move_fwd->left = tmp;
365     }
366
367   if (move_fwd->parent->right_child == move_fwd)
368     {
369       fwd_par_ptr = &move_fwd->parent->right_child;
370     }
371   else
372     {
373       fwd_par_ptr = &move_fwd->parent->left_child;
374     }
375
376   if (move_back->parent->right_child == move_back)
377     {
378       back_par_ptr = &move_back->parent->right_child;
379     }
380   else
381     {
382       back_par_ptr = &move_back->parent->left_child;
383     }
384
385   fgk_swap_ptrs (&move_fwd->parent, &move_back->parent);
386   fgk_swap_ptrs (fwd_par_ptr, back_par_ptr);
387
388   move_fwd->my_block->block_leader = move_fwd;
389 }
390
391 /* Shifts node, the leader of its block, into the next block. */
392 static void fgk_promote (fgk_stream *h, fgk_node *node)
393 {
394   fgk_node *my_left, *my_right;
395   fgk_block *cur_block;
396
397   my_right  = node->right;
398   my_left   = node->left;
399   cur_block = node->my_block;
400
401   if (node->weight == 0)
402     {
403       return;
404     }
405
406   /* if left is right child, parent of remaining zeros case (?), means parent
407    * has same weight as right child. */
408   if (my_left == node->right_child &&
409       node->left_child &&
410       node->left_child->weight == 0)
411     {
412       XD3_ASSERT (node->left_child == h->remaining_zeros);
413       XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */
414      
415       if (node->weight == (my_right->weight - 1) && my_right != h->root_node)
416         {
417           fgk_free_block (h, cur_block);
418           node->my_block    = my_right->my_block;
419           my_left->my_block = my_right->my_block;
420         }
421
422       return;
423     }
424
425   if (my_left == h->remaining_zeros)
426     {
427       return;
428     }
429
430   /* true if not the leftmost node */
431   if (my_left->my_block == cur_block)
432     {
433       my_left->my_block->block_leader = my_left;
434     }
435   else
436     {
437       fgk_free_block (h, cur_block);
438     }
439
440   /* node->parent != my_right */
441   if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node))
442     {
443       node->my_block = my_right->my_block;
444     }
445   else
446     {
447       node->my_block = fgk_make_block (h, node);
448     }
449 }
450
451 /* When an element is seen the first time this is called to remove it from the list of
452  * zero weight elements and introduce a new internal node to the tree.  */
453 static fgk_node* fgk_increase_zero_weight (fgk_stream *h, int n)
454 {
455   fgk_node *this_zero, *new_internal, *zero_ptr;
456
457   this_zero = h->alphabet + n;
458
459   if (h->zero_freq_count == 1)
460     {
461       /* this is the last one */
462       this_zero->right_child = NULL;
463
464       if (this_zero->right->weight == 1)
465         {
466           this_zero->my_block = this_zero->right->my_block;
467         }
468       else
469         {
470           this_zero->my_block = fgk_make_block (h, this_zero);
471         }
472
473       h->remaining_zeros = NULL;
474
475       return this_zero;
476     }
477
478   zero_ptr = h->remaining_zeros;
479
480   new_internal = h->free_node++;
481
482   new_internal->parent      = zero_ptr->parent;
483   new_internal->right       = zero_ptr->right;
484   new_internal->weight      = 0;
485   new_internal->right_child = this_zero;
486   new_internal->left        = this_zero;
487
488   if (h->remaining_zeros == h->root_node)
489     {
490       /* This is the first element to be coded */
491       h->root_node           = new_internal;
492       this_zero->my_block    = fgk_make_block (h, this_zero);
493       new_internal->my_block = fgk_make_block (h, new_internal);
494     }
495   else
496     {
497       new_internal->right->left = new_internal;
498
499       if (zero_ptr->parent->right_child == zero_ptr)
500         {
501           zero_ptr->parent->right_child = new_internal;
502         }
503       else
504         {
505           zero_ptr->parent->left_child = new_internal;
506         }
507
508       if (new_internal->right->weight == 1)
509         {
510           new_internal->my_block = new_internal->right->my_block;
511         }
512       else
513         {
514           new_internal->my_block = fgk_make_block (h, new_internal);
515         }
516
517       this_zero->my_block = new_internal->my_block;
518     }
519
520   fgk_eliminate_zero (h, this_zero);
521
522   new_internal->left_child = h->remaining_zeros;
523
524   this_zero->right       = new_internal;
525   this_zero->left        = h->remaining_zeros;
526   this_zero->parent      = new_internal;
527   this_zero->left_child  = NULL;
528   this_zero->right_child = NULL;
529
530   h->remaining_zeros->parent = new_internal;
531   h->remaining_zeros->right  = this_zero;
532
533   return this_zero;
534 }
535
536 /* When a zero frequency element is encoded, it is followed by the binary representation
537  * of the index into the remaining elements.  Sets a cache to the element before it so
538  * that it can be removed without calling this procedure again.  */
539 static unsigned int fgk_find_nth_zero (fgk_stream* h, int n)
540 {
541   fgk_node *target_ptr = h->alphabet + n;
542   fgk_node *head_ptr = h->remaining_zeros;
543   unsigned int idx = 0;
544
545   while (target_ptr != head_ptr)
546     {
547       head_ptr = head_ptr->right_child;
548       idx += 1;
549     }
550
551   return idx;
552 }
553
554 /* Splices node out of the list of zeros. */
555 static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node)
556 {
557   if (h->zero_freq_count == 1)
558     {
559       return;
560     }
561
562   fgk_factor_remaining(h);
563
564