root/xdelta30r/xdelta3-main.h

Revision 31, 90.8 kB (checked in by nlawren2, 10 months ago)

Moved xdelta30r

  • Property svn:executable set to *
Line 
1 /* xdelta 3 - delta compression tools and library
2  * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3  * Joshua P. MacDonald
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, write to the Free Software
17  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19
20 /* This is all the extra stuff you need for convenience to users in a command line
21  * application.  It contains these major components:
22  *
23  * 1. VCDIFF tools
24  * 2. external compression support (this is POSIX-specific).
25  * 3. a general read/write loop that handles all of the Xdelta decode/encode/VCDIFF-print
26  *    functions
27  * 4. command-line interpreter
28  * 5. an Xdelta application header which stores default filename, external compression settings
29  * 6. output/error printing
30  * 7. basic file support and OS interface
31  */
32
33 /* TODO list:
34  * 1. do exact gzip-like filename, stdout handling.  make a .vcdiff extension, refuse
35  *    to encode to stdout without -cf, etc.
36  * 2. Allow the user to add a comment string to the app header without disturbing the default
37  *    behavior.
38  * 3. "Source file must be seekable" is not actually true for encoding, given current
39  *    behavior.  Allow non-seekable sources?  It would in theory let you use a fifo for
40  *    the source.
41  */
42
43 /* On error handling and printing:
44  *
45  * The xdelta library sets stream->msg to indicate what condition caused an internal
46  * failure, but many failures originate here and are printed here.  The return convention
47  * is 0 for success, as throughout Xdelta code, but special attention is required here for
48  * the operating system calls with different error handling.  See the main_file_* routines.
49  * All errors in this file have a message printed at the time of occurance.  Since some of
50  * these calls occur within calls to the library, the error may end up being printed again
51  * with a more general error message.
52  */
53
54 /******************************************************************************************/
55
56 #ifndef XD3_POSIX
57 #define XD3_POSIX 0
58 #endif
59 #ifndef XD3_STDIO
60 #define XD3_STDIO 0
61 #endif
62 #ifndef XD3_WIN32
63 #define XD3_WIN32 0
64 #endif
65
66 /* Combines xd3_strerror() and strerror() */
67 const char* xd3_mainerror(int err_num);
68
69 /* XPRINTX (used by main) prefixes an "xdelta3: " to the output. */
70 #define XPR fprintf
71 #define NT stderr, "xdelta3: "
72
73 /* If none are set, default to posix. */
74 #if (XD3_POSIX + XD3_STDIO + XD3_WIN32) == 0
75 #undef XD3_POSIX
76 #define XD3_POSIX 1
77 #endif
78
79 /* Handle externally-compressed inputs. */
80 #ifndef EXTERNAL_COMPRESSION
81 #define EXTERNAL_COMPRESSION 1
82 #endif
83
84 #define PRINTHDR_SPECIAL -4378291
85
86 /* The number of soft-config variables.  */
87 #define XD3_SOFTCFG_VARCNT 7
88
89 /* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an error message
90  * from the library. */
91 #define XD3_LIB_ERRMSG(stream, ret) "%s: %s\n", xd3_errstring (stream), xd3_mainerror (ret)
92
93 #include <stdio.h>  /* fprintf */
94
95 #if XD3_POSIX
96 #include <unistd.h> /* close, read, write... */
97 #include <sys/types.h>
98 #include <fcntl.h>
99 #endif
100
101 #ifndef _WIN32
102 #include <unistd.h> /* lots */
103 #include <sys/time.h> /* gettimeofday() */
104 #include <sys/stat.h> /* stat() and fstat() */
105 #else
106 #define strtoll _strtoi64
107 #include <sys/types.h>
108 #include <sys/stat.h>
109 #ifndef WIFEXITED
110 #   define WIFEXITED(stat)  (((*((int *) &(stat))) & 0xff) == 0)
111 #endif
112 #ifndef WEXITSTATUS
113 #   define WEXITSTATUS(stat) (((*((int *) &(stat))) >> 8) & 0xff)
114 #endif
115 #ifndef S_ISREG
116 //#   ifdef S_IFREG
117 //#       define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
118 //#   else
119 #       define S_ISREG(m) 1
120 //#   endif
121 #endif /* !S_ISREG */
122
123 // For standard input/output handles
124 static STARTUPINFO winStartupInfo;
125 #endif
126
127 /******************************************************************************************
128  ENUMS and TYPES
129  ******************************************************************************************/
130
131 /* These flags (mainly pertaining to main_read() operations) are set in the
132  * main_file->flags variable.  All are related to with external decompression support.
133  *
134  * RD_FIRST causes the external decompression check when the input is first read.
135  *
136  * RD_NONEXTERNAL disables external decompression for reading a compressed input, in the
137  * case of Xdelta inputs.  Note: Xdelta is supported as an external compression type,
138  * which makes is the reason for this flag.  An example to justify this is: to create a
139  * delta between two files that are VCDIFF-compressed.  Two external Xdelta decoders are
140  * run to supply decompressed source and target inputs to the Xdelta encoder. */
141 typedef enum
142 {
143   RD_FIRST        = (1 << 0),
144   RD_NONEXTERNAL  = (1 << 1),
145   RD_EXTERNAL_V1  = (1 << 2),
146 } xd3_read_flags;
147
148 /* main_file->mode values */
149 typedef enum
150 {
151   XO_READ  = 0,
152   XO_WRITE = 1,
153 } main_file_modes;
154
155 /* Main commands.  For example, CMD_PRINTHDR is the "xdelta printhdr" command. */
156 typedef enum
157 {
158   CMD_NONE = 0,
159   CMD_PRINTHDR,
160   CMD_PRINTHDRS,
161   CMD_PRINTDELTA,
162   CMD_RECODE,
163 #if XD3_ENCODER
164   CMD_ENCODE,
165 #endif
166   CMD_DECODE,
167   CMD_TEST,
168   CMD_CONFIG,
169 } xd3_cmd;
170
171 #if XD3_ENCODER
172 #define CMD_DEFAULT CMD_ENCODE
173 #define IS_ENCODE(cmd) (cmd == CMD_ENCODE)
174 #else
175 #define CMD_DEFAULT CMD_DECODE
176 #define IS_ENCODE(cmd) (0)
177 #endif
178
179 typedef struct _main_file        main_file;
180 typedef struct _main_extcomp     main_extcomp;
181 typedef struct _main_blklru      main_blklru;
182 typedef struct _main_blklru_list main_blklru_list;
183
184 /* The main_file object supports abstract system calls like open, close, read, write, seek,
185  * stat.  The program uses these to represent both seekable files and non-seekable files.
186  * Source files must be seekable, but the target input and any output file do not require
187  * seekability.
188  */
189 struct _main_file
190 {
191 #if XD3_STDIO
192   FILE               *file;
193 #elif XD3_POSIX
194   int                 file;
195 #elif XD3_WIN32
196   HANDLE              file;
197 #endif
198
199   int                 mode;          /* XO_READ and XO_WRITE */
200   const char         *filename;      /* File name or /dev/stdin, /dev/stdout, /dev/stderr. */
201   char               *filename_copy; /* File name or /dev/stdin, /dev/stdout, /dev/stderr. */
202   const char         *realname;      /* File name or /dev/stdin, /dev/stdout, /dev/stderr. */
203   const main_extcomp *compressor;    /* External compression struct. */
204   int                 flags;         /* RD_FIRST, RD_NONEXTERNAL, ... */
205   xoff_t              nread;         /* for input position */
206   xoff_t              nwrite;        /* for output position */
207   uint8_t            *snprintf_buf;  /* internal snprintf() use */
208 };
209
210 /* Various strings and magic values used to detect and call external compression.  See
211  * below for examples. */
212 struct _main_extcomp
213 {
214   const char    *recomp_cmdname;
215   const char    *recomp_options;
216
217   const char    *decomp_cmdname;
218   const char    *decomp_options;
219
220   const char    *ident;
221   const char    *magic;
222   int            magic_size;
223   int            flags;
224 };
225
226 /* This file implements a small LRU of source blocks.  For encoding purposes,
227  * we prevent paging in blocks we've already scanned in the source (return
228  * XD3_NOTAVAIL). */
229 struct _main_blklru_list
230 {
231   main_blklru_list  *next;
232   main_blklru_list  *prev;
233 };
234
235 struct _main_blklru
236 {
237   uint8_t         *blk;
238   xoff_t           blkno;
239   main_blklru_list  link;
240 };
241
242 #define LRU_SIZE 32U
243 #define XD3_MINSRCWINSZ XD3_ALLOCSIZE
244
245 /* ... represented as a list (no cache index). */
246 XD3_MAKELIST(main_blklru_list,main_blklru,link);
247
248 // TODO:
249 // struct _main_state
250 // {
251
252 /* Program options: various command line flags and options. */
253 static int         option_stdout             = 0;
254 static int         option_force              = 0;
255 static int         option_verbose            = 0;
256 static int         option_quiet              = 0;
257 static int         option_use_appheader      = 1;
258 static uint8_t*    option_appheader          = NULL;
259 static int         option_use_secondary      = 0;
260 static char*       option_secondary          = NULL;
261 static int         option_use_checksum       = 1;
262 static int         option_use_altcodetable   = 0;
263 static char*       option_smatch_config      = NULL;
264 static int         option_no_compress        = 0;
265 static int         option_no_output          = 0; /* do not open or write output */
266 static const char *option_source_filename    = NULL;
267
268 static int         option_level              = XD3_DEFAULT_LEVEL;
269 static usize_t     option_iopt_size          = XD3_DEFAULT_IOPT_SIZE;
270 static usize_t     option_winsize            = XD3_DEFAULT_WINSIZE;
271 static usize_t     option_srcwinsz           = XD3_DEFAULT_SRCWINSZ;
272 static usize_t     option_sprevsz            = XD3_DEFAULT_SPREVSZ;
273
274 /* These variables are supressed to avoid their use w/o support.  main() warns
275  * appropriately. */
276 #if EXTERNAL_COMPRESSION
277 static int         option_decompress_inputs  = 1;
278 static int         option_recompress_outputs = 1;
279 #endif
280
281 /* This is for comparing "printdelta" output without attention to
282  * copy-instruction modes. */
283 #if VCDIFF_TOOLS
284 static int         option_print_cpymode = 1;
285 #endif
286
287 /* Static variables */
288 IF_DEBUG(static int main_mallocs = 0;)
289
290 static char*          program_name = NULL;
291 static uint8_t*       appheader_used = NULL;
292 static uint8_t*       main_bdata = NULL;
293
294 /* The LRU: obviously this is shared by all callers. */
295 static int               lru_size = 0;
296 static main_blklru      *lru = NULL;  /* array of lru_size elts */
297 static main_blklru_list  lru_list;
298 static main_blklru_list  lru_free;
299 static int               do_not_lru = 0;  /* set to avoid lru, instead discard oldest */
300
301 static int lru_hits   = 0;
302 static int lru_misses = 0;
303 static int lru_filled = 0;
304
305 /* Hacks for VCDIFF tools */
306 static int allow_fake_source = 0;
307
308 /* State for xdelta3 recode */
309 static xd3_stream *recode_stream = NULL;
310
311 /* This array of compressor types is compiled even if EXTERNAL_COMPRESSION is false just so
312  * the program knows the mapping of IDENT->NAME. */
313 static main_extcomp extcomp_types[] =
314 {
315   /* The entry for xdelta3 must be 0 because the program_name is set there. */
316   { "xdelta3",  "-cfq",  "xdelta3",    "-dcfq",  "X", "\xd6\xc3\xc4", 3, RD_NONEXTERNAL },
317   { "bzip2",    "-cf",   "bzip2",      "-dcf",   "B", "BZh",          3, 0 },
318   { "gzip",     "-cf",   "gzip",       "-dcf",   "G", "\037\213",     2, 0 },
319   { "compress", "-cf",   "uncompress", "-cf",    "Z", "\037\235",     2, 0 },
320
321   /* TODO: add commandline support for magic-less formats */
322   /*{ "lzma",     "-cf",   "lzma",       "-dcf",   "M", "]\000",        2, 0 },*/
323 };
324
325 // };
326
327 static void main_get_appheader (xd3_stream *stream, main_file *ifile,
328                                 main_file *output, main_file *sfile);
329
330 static int main_help (void);
331
332 static int
333 main_version (void)
334 {
335   /* $Format: "  DP(RINT \"VERSION=3.$Xdelta3Version$\\n\");" $ */
336   DP(RINT "VERSION=3.0r\n");
337   return EXIT_SUCCESS;
338 }
339
340 static int
341 main_config (void)
342 {
343   main_version ();
344
345   DP(RINT "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION);
346   DP(RINT "GENERIC_ENCODE_TABLES=%d\n", GENERIC_ENCODE_TABLES);
347   DP(RINT "GENERIC_ENCODE_TABLES_COMPUTE=%d\n", GENERIC_ENCODE_TABLES_COMPUTE);
348   DP(RINT "REGRESSION_TEST=%d\n", REGRESSION_TEST);
349   DP(RINT "SECONDARY_DJW=%d\n", SECONDARY_DJW);
350   DP(RINT "SECONDARY_FGK=%d\n", SECONDARY_FGK);
351   DP(RINT "VCDIFF_TOOLS=%d\n", VCDIFF_TOOLS);
352   DP(RINT "XD3_ALLOCSIZE=%d\n", XD3_ALLOCSIZE);
353   DP(RINT "XD3_DEBUG=%d\n", XD3_DEBUG);
354   DP(RINT "XD3_ENCODER=%d\n", XD3_ENCODER);
355   DP(RINT "XD3_POSIX=%d\n", XD3_POSIX);
356   DP(RINT "XD3_STDIO=%d\n", XD3_STDIO);
357   DP(RINT "XD3_WIN32=%d\n", XD3_WIN32);
358   DP(RINT "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64);
359   DP(RINT "XD3_DEFAULT_LEVEL=%d\n", XD3_DEFAULT_LEVEL);
360   DP(RINT "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE);
361   DP(RINT "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ);
362   DP(RINT "XD3_DEFAULT_SRCWINSZ=%d\n", XD3_DEFAULT_SRCWINSZ);
363   DP(RINT "XD3_DEFAULT_WINSIZE=%d\n", XD3_DEFAULT_WINSIZE);
364   DP(RINT "XD3_HARDMAXWINSIZE=%d\n", XD3_HARDMAXWINSIZE);
365
366   return EXIT_SUCCESS;
367 }
368
369 static void
370 reset_defaults(void)
371 {
372   option_stdout = 0;
373   option_force = 0;
374   option_verbose = 0;
375   option_quiet = 0;
376   option_appheader = NULL;
377   option_use_secondary = 0;
378   option_secondary = NULL;
379   option_use_altcodetable = 0;
380   option_smatch_config = NULL;
381   option_no_compress = 0;
382   option_no_output = 0;
383   option_source_filename = NULL;
384   program_name = NULL;
385   appheader_used = NULL;
386   main_bdata = NULL;
387   lru_size = 0;
388   lru = NULL;
389   do_not_lru = 0;
390   lru_hits   = 0;
391   lru_misses = 0;
392   lru_filled = 0;
393   allow_fake_source = 0;
394   option_smatch_config = NULL;
395
396   option_use_appheader = 1;
397   option_use_checksum = 1;
398 #if EXTERNAL_COMPRESSION
399   option_decompress_inputs  = 1;
400   option_recompress_outputs = 1;
401 #endif
402 #if VCDIFF_TOOLS
403   option_print_cpymode = 1;
404 #endif
405   option_level = XD3_DEFAULT_LEVEL;
406   option_iopt_size = XD3_DEFAULT_IOPT_SIZE;
407   option_winsize = XD3_DEFAULT_WINSIZE;
408   option_srcwinsz = XD3_DEFAULT_SRCWINSZ;
409   option_sprevsz = XD3_DEFAULT_SPREVSZ;
410 }
411
412 static void*
413 main_malloc1 (usize_t size)
414 {
415   void* r = malloc (size);
416   if (r == NULL) { XPR(NT "malloc: %s\n", xd3_mainerror (ENOMEM)); }
417   else if (option_verbose > 3) { XPR(NT "malloc: %u: %p\n", size, r); }
418   return r;
419 }
420
421 static void*
422 main_malloc (usize_t size)
423 {
424   void *r = main_malloc1 (size);
425    if (r) { IF_DEBUG (main_mallocs += 1); }
426   return r;
427 }
428
429 static void*
430 main_alloc (void   *opaque,
431             usize_t  items,
432             usize_t  size)
433 {
434   return main_malloc1 (items * size);
435 }
436
437 static void
438 main_free1 (void *opaque, void *ptr)
439 {
440   if (option_verbose > 3) { XPR(NT "free: %p\n", ptr); }
441   free (ptr);
442 }
443
444 static void
445 main_free (void *ptr)
446 {
447   if (ptr)
448     {
449       IF_DEBUG (main_mallocs -= 1);
450       main_free1 (NULL, ptr);
451       IF_DEBUG (XD3_ASSERT(main_mallocs >= 0));
452     }
453 }
454
455 /* This ensures that (ret = errno) always indicates failure, in case errno was
456  * accidentally not set.  If this prints there's a bug somewhere. */
457 static int
458 get_errno (void)
459 {
460 #ifndef _WIN32
461   if (errno == 0)
462     {
463       XPR(NT "you found a bug: expected errno != 0\n");
464       errno = XD3_INTERNAL;
465     }
466   return errno;
467 #else
468   DWORD errNum = GetLastError();
469   if (errNum == NO_ERROR) {
470           errNum = XD3_INTERNAL;
471   }
472   return errNum;
473 #endif
474 }
475
476 const char*
477 xd3_mainerror(int err_num) {
478 #ifndef _WIN32
479         const char* x = xd3_strerror (err_num);
480         if (x != NULL) {
481                 return x;
482         }
483         return strerror(err_num);
484 #else
485         static char err_buf[256];
486         const char* x = xd3_strerror (err_num);
487         if (x != NULL) {
488                 return x;
489         }
490         memset (err_buf, 0, 256);
491         FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
492                 NULL, err_num,
493                 MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
494                 err_buf, 256, NULL);
495         return err_buf;
496 #endif
497 }
498
499 static long
500 get_millisecs_now (void)
501 {
502 #ifndef _WIN32
503   struct timeval tv;
504
505   gettimeofday (& tv, NULL);
506
507   return (tv.tv_sec) * 1000L + (tv.tv_usec) / 1000;
508 #else
509   // Found this in an example on www.codeproject.com
510   // It doesn't matter that the offset is Jan 1, 1601
511   // Result is the numbre of 100 nanosecond units
512   // 100ns * 10,000 = 1ms
513   SYSTEMTIME st;
514   FILETIME ft;
515   __int64 *pi = (__int64*)&ft;
516   GetLocalTime(&st);
517   SystemTimeToFileTime(&st, &ft);
518   return (long)((*pi) / 10000);
519 #endif
520 }
521
522 /* Always >= 1 millisec, right? */
523 static long
524 get_millisecs_since (void)
525 {
526   static long last = 0;
527   long now = get_millisecs_now();
528   long diff = now - last;
529   last = now;
530   return diff;
531 }
532
533 static char*
534 main_format_bcnt (xoff_t r, char *buf)
535 {
536   static const char* fmts[] = { "B", "KB", "MB", "GB" };
537   int i;
538
539   for (i = 0; i < SIZEOF_ARRAY(fmts); i += 1)
540     {
541       if (r <= (10 * 1024) || i == (-1 + (int)SIZEOF_ARRAY(fmts)))
542             {
543               sprintf (buf, "%"Q"u %s", r, fmts[i]);
544               break;
545             }
546       r /= 1024;
547     }
548   return buf;
549 }
550
551 static char*
552 main_format_rate (xoff_t bytes, long millis, char *buf)
553 {
554   xoff_t r = (xoff_t)(1.0 * bytes / (1.0 * millis / 1000.0));
555   static char lbuf[32];
556
557   main_format_bcnt (r, lbuf);
558   sprintf (buf, "%s/sec", lbuf);
559   return buf;
560 }
561
562 static char*
563 main_format_millis (long millis, char *buf)
564 {
565   if (millis < 1000)       { sprintf (buf, "%lu ms", millis); }
566   else if (millis < 10000) { sprintf (buf, "%.1f sec", millis / 1000.0); }
567   else                     { sprintf (buf, "%lu sec", millis / 1000L); }
568   return buf;
569 }
570
571 /* A safe version of strtol for xoff_t. */
572 static int
573 main_strtoxoff (const char* s, xoff_t *xo, char which)
574 {
575   char *e;
576   xoff_t x;
577
578   XD3_ASSERT(s && *s != 0);
579
580   {
581     /* Should check LONG_MIN, LONG_MAX, LLONG_MIN, LLONG_MAX? */
582 #if SIZEOF_XOFF_T == 4
583     long xx = strtol (s, &e, 0);
584 #else
585     long long xx = strtoll (s, &e, 0);
586 #endif
587
588     if (xx < 0)
589       {
590         XPR(NT "-%c: negative integer: %s\n", which, s);
591         return EXIT_FAILURE;
592       }
593
594     x = xx;
595   }
596
597   if (*e != 0)
598     {
599       XPR(NT "-%c: invalid integer: %s\n", which, s);
600       return EXIT_FAILURE;
601     }
602
603   (*xo) = x;
604   return 0;
605 }
606
607 static int
608 main_atou (const char* arg, usize_t *xo, usize_t low, usize_t high, char which)
609 {
610   xoff_t x;
611   int ret;
612
613   if ((ret = main_strtoxoff (arg, & x, which))) { return ret; }
614
615   if (x < low)
616     {
617       XPR(NT "-%c: minimum value: %u\n", which, low);
618       return EXIT_FAILURE;
619     }
620   if (high == 0)
621     {
622       high = USIZE_T_MAX;
623     }
624   if (x > high)
625     {
626       XPR(NT "-%c: maximum value: %u\n", which, high);
627       return EXIT_FAILURE;
628     }
629   (*xo) = (usize_t)x;
630   return 0;
631 }
632
633 /******************************************************************************************
634  FILE BASICS
635  ******************************************************************************************/
636