root/xdelta30r/xdelta3-regtest.py

Revision 31, 32.5 kB (checked in by nlawren2, 10 months ago)

Moved xdelta30r

  • Property svn:executable set to *
Line 
1 #!/usr/bin/python2.5
2 # xdelta 3 - delta compression tools and library
3 # Copyright (C) 2003, 2006, 2007.  Joshua P. MacDonald
4 #
5 #  This program is free software; you can redistribute it and/or modify
6 #  it under the terms of the GNU General Public License as published by
7 #  the Free Software Foundation; either version 2 of the License, or
8 #  (at your option) any later version.
9 #
10 #  This program is distributed in the hope that it will be useful,
11 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 #  GNU General Public License for more details.
14 #
15 #  You should have received a copy of the GNU General Public License
16 #  along with this program; if not, write to the Free Software
17 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
19 # TODO: test 1.5 vs. greedy
20
21 import os, sys, math, re, time, types, array, random
22 import xdelta3main
23 import xdelta3
24
25 #RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
26 RCSDIR = '/tmp/PRCS_read_copy'
27 SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
28
29 #RCSDIR = 'G:/jmacd/PRCS/prcs/b'
30 #SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
31
32 #
33 MIN_SIZE       = 0
34
35 TIME_TOO_SHORT = 0.050
36
37 SKIP_TRIALS    = 2
38 MIN_TRIALS     = 3
39 MAX_TRIALS     = 15
40
41 SKIP_DECODE = 1
42
43 # 10 = fast 1.5 = slow
44 MIN_STDDEV_PCT = 1.5
45
46 # How many results per round
47 MAX_RESULTS = 500
48 TEST_ROUNDS = 500
49 KEEP_P = (0.5)
50
51 # For RCS testing, what percent to select
52 FILE_P = (0.30)
53
54 # For run-speed tests
55 MIN_RUN = 1000 * 1000 * 1
56 MAX_RUN = 1000 * 1000 * 10
57
58 # Testwide defaults
59 ALL_ARGS = [
60     # -v
61     ]
62
63 # The first 7 args go to -C
64 SOFT_CONFIG_CNT = 7
65
66 CONFIG_ORDER = [ 'large_look',
67                  'large_step',
68                  'small_look',
69                  'small_chain',
70                  'small_lchain',
71                  'max_lazy',
72                  'long_enough',
73
74                  # > SOFT_CONFIG_CNT
75                  'nocompress',
76                  'winsize',
77                  'srcwinsize',
78                  'sprevsz',
79                  'iopt',
80                  'djw',
81                  'altcode',
82                  ]
83
84 CONFIG_ARGMAP = {
85     'winsize'    : '-W',
86     'srcwinsize' : '-B',
87     'sprevsz'    : '-P',
88     'iopt'       : '-I',
89     'nocompress' : '-N',
90     'djw'        : '-Sdjw',
91     'altcode'    : '-T',
92     }
93
94 def INPUT_SPEC(rand):
95     return {
96
97     # Time/space costs:
98
99     # -C 1,2,3,4,5,6,7
100     'large_look' : lambda d: rand.choice([9]),
101     'large_step' : lambda d: rand.choice([3, 5, 7, 8, 15]),
102     'small_chain'  : lambda d: rand.choice([40, 10, 4, 1]),
103     'small_lchain' : lambda d: rand.choice([x for x in [10, 4, 2, 1] if x <= d['small_chain']]),
104     'max_lazy'     : lambda d: rand.choice([9, 18, 27, 36, 72, 108]),
105     'long_enough'  : lambda d: rand.choice([9, 18, 27, 36, 72, 108]),
106     'small_look'   : lambda d: rand.choice([4]),
107
108     # -N
109     'nocompress'   : lambda d: rand.choice(['true']),
110
111     # -T
112     'altcode'      : lambda d: rand.choice(['false']),
113
114     # -S djw
115     'djw'          : lambda d: rand.choice(['false']),
116
117     # Memory costs:
118
119     # -W
120     'winsize'      : lambda d: 8 * (1<<20),
121
122     # -B
123     'srcwinsize'   : lambda d: 64 * (1<<20),
124
125     # -I 0 is unlimited
126     'iopt'         : lambda d: 0,
127
128     # -P only powers of two
129     'sprevsz'      : lambda d: rand.choice([x * (1<<16) for x in [4]]),
130   }
131 #end
132
133 #
134 TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
135
136 RUNFILE = os.path.join(TMPDIR, 'run')
137 DFILE   = os.path.join(TMPDIR, 'output')
138 RFILE   = os.path.join(TMPDIR, 'recon')
139
140 HEAD_STATE = 0
141 BAR_STATE  = 1
142 REV_STATE  = 2
143 DATE_STATE = 3
144
145 #
146 IGNORE_FILENAME  = re.compile('.*\\.(gif|jpg).*')
147
148 # rcs output
149 RE_TOTREV  = re.compile('total revisions: (\\d+)')
150 RE_BAR     = re.compile('----------------------------')
151 RE_REV     = re.compile('revision (.+)')
152 RE_DATE    = re.compile('date: ([^;]+);.*')
153 # xdelta output
154 RE_HDRSZ   = re.compile('VCDIFF header size: +(\\d+)')
155 RE_EXTCOMP = re.compile('XDELTA ext comp.*')
156
157 def c2str(c):
158     return ' '.join(['%s' % x for x in c])
159 #end
160
161 def SumList(l):
162     return reduce(lambda x,y: x+y, l)
163 #end
164
165 # returns (total, mean, stddev, q2 (median),
166 #          (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
167 class StatList:
168     def __init__(self,l,desc):
169         cnt = len(l)
170         assert(cnt > 1)
171         l.sort()
172         self.cnt    = cnt
173         self.l      = l
174         self.total  = SumList(l)
175         self.mean   = self.total / float(self.cnt)
176         self.s      = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
177         self.q0     = l[0]
178         self.q1     = l[int(self.cnt/4.0+0.5)]
179         self.q2     = l[int(self.cnt/2.0+0.5)]
180         self.q3     = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
181         self.q4     = l[self.cnt-1]+1
182         self.siqr   = (self.q3-self.q1)/2.0;
183         self.spread = (self.q4-self.q0)
184         self.str    = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
185                       (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
186     #end
187 #end
188
189 def RunCommand(args, ok = [0]):
190     #print 'run command %s' % (' '.join(args))
191     p = os.spawnvp(os.P_WAIT, args[0], args)
192     if p not in ok:
193         raise CommandError(args, 'exited %d' % p)
194     #end
195 #end
196
197 def RunCommandIO(args,infn,outfn):
198     p = os.fork()
199     if p == 0:
200         os.dup2(os.open(infn,os.O_RDONLY),0)
201         os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
202         os.execvp(args[0], args)
203     else:
204         s = os.waitpid(p,0)
205         o = os.WEXITSTATUS(s[1])
206         if not os.WIFEXITED(s[1]) or o != 0:
207             raise CommandError(args, 'exited %d' % o)
208         #end
209     #end
210 #end
211
212 class TimedTest:
213     def __init__(self, target, source, runnable,
214                  skip_trials = SKIP_TRIALS,
215                  min_trials = MIN_TRIALS,
216                  max_trials = MAX_TRIALS,
217                  min_stddev_pct = MIN_STDDEV_PCT):
218         self.target = target
219         self.source = source
220         self.runnable = runnable
221
222         self.skip_trials = skip_trials
223         self.min_trials = min(min_trials, max_trials)
224         self.max_trials = max_trials
225         self.min_stddev_pct = min_stddev_pct
226
227         self.encode_time = self.DoTest(DFILE,
228                                        lambda x: x.Encode(self.target, self.source, DFILE))
229         self.encode_size = runnable.EncodeSize(DFILE)
230
231         if SKIP_DECODE:
232             self.decode_time = StatList([1, 1], 'not decoded')
233             return
234         #end
235
236         self.decode_time = self.DoTest(RFILE,
237                                        lambda x: x.Decode(DFILE, self.source, RFILE),
238                                        )
239
240         # verify
241         runnable.Verify(self.target, RFILE)
242     #end
243
244     def DoTest(self, fname, func):
245         trials   = 0
246         measured = []
247
248         while 1:
249             try:
250                 os.remove(fname)
251             except OSError:
252                 pass
253
254             start_time  = time.time()
255             start_clock = time.clock()
256
257             func(self.runnable)
258
259             total_clock = (time.clock() - start_clock)
260             total_time  = (time.time() - start_time)
261
262             elap_time  = max(total_time,  0.0000001)
263             elap_clock = max(total_clock, 0.0000001)
264
265             trials = trials + 1
266
267             # skip some of the first trials
268             if trials > self.skip_trials:
269                 measured.append((elap_clock, elap_time))
270                 #print 'measurement total: %.1f ms' % (total_time * 1000.0)
271
272             # at least so many
273             if trials < (self.skip_trials + self.min_trials):
274                 #print 'continue: need more trials: %d' % trials
275                 continue
276
277             # compute %variance
278             done = 0
279             if self.skip_trials + self.min_trials <= 2:
280                 measured = measured + measured;
281                 done = 1
282             #end
283
284             time_stat = StatList([x[1] for x in measured], 'elap time')
285             sp = float(time_stat.s) / float(time_stat.mean)
286
287             # what if MAX_TRIALS is exceeded?
288             too_many = (trials - self.skip_trials) >= self.max_trials
289             good = (100.0 * sp) < self.min_stddev_pct
290             if done or too_many or good:
291                 trials = trials - self.skip_trials
292                 if not done and not good:
293                     #print 'too many trials: %d' % trials
294                     pass
295                 #clock = StatList([x[0] for x in measured], 'elap clock')
296                 return time_stat
297             #end
298         #end
299     #end
300 #end
301
302 def Decimals(start, end):
303     l = []
304     step = start
305     while 1:
306         r = range(step, step * 10, step)
307         l = l + r
308         if step * 10 >= end:
309             l.append(step * 10)
310             break
311         step = step * 10
312     return l
313 #end
314
315 # This tests the raw speed of 0-byte inputs
316 def RunSpeedTest():
317     for L in Decimals(MIN_RUN, MAX_RUN):
318         SetFileSize(RUNFILE, L)
319
320         trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
321         ReportSpeed(L, trx, '1MB ')
322
323         trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
324         ReportSpeed(L, trx, '512k')
325
326         trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
327         ReportSpeed(L, trx, '256k')
328
329         trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
330         ReportSpeed(L, trm, 'swig')
331
332         trg = TimedTest(RUNFILE, None, GzipRun1())
333         ReportSpeed(L,trg,'gzip')
334     #end
335 #end
336
337 def SetFileSize(F,L):
338     fd = os.open(F, os.O_CREAT | os.O_WRONLY)
339     os.ftruncate(fd,L)
340     assert os.fstat(fd).st_size == L
341     os.close(fd)
342 #end
343
344 def ReportSpeed(L,tr,desc):
345     print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
346           (desc, L,
347            tr.encode_size,
348            tr.encode_time.mean * 1000.0,
349            tr.decode_time.mean * 1000.0)
350 #end
351
352 class Xdelta3RunClass:
353     def __init__(self, extra):
354         self.extra = extra
355     #end
356
357     def __str__(self):
358         return ' '.join(self.extra)
359     #end
360
361     def New(self):
362         return Xdelta3Runner(self.extra)
363     #end
364 #end
365
366 class Xdelta3Runner:
367     def __init__(self, extra):
368         self.extra = extra
369     #end
370
371     def Encode(self, target, source, output):
372         args = (ALL_ARGS +
373                 self.extra +
374                 ['-e'])
375         if source:
376             args.append('-s')
377             args.append(source)
378         #end
379         args = args + [target, output]
380         self.Main(args)
381     #end
382
383     def Decode(self, input, source, output):
384         args = (ALL_ARGS +
385                 ['-d'])
386         if source:
387             args.append('-s')
388             args.append(source)
389         #end
390         args = args + [input, output]
391         self.Main(args)
392     #end
393
394     def Verify(self, target, recon):
395         RunCommand(('cmp', target, recon))
396     #end
397
398     def EncodeSize(self, output):
399         return os.stat(output).st_size
400     #end
401
402     def Main(self, args):
403         try:
404             xdelta3main.main(args)
405         except Exception, e:
406             raise CommandError(args, "xdelta3.main exception")
407         #end
408     #end
409 #end
410
411 class Xdelta3Mod1:
412     def __init__(self, file):
413         self.target_data = open(file, 'r').read()
414     #end
415
416     def Encode(self, ignore1, ignore2, ignore3):
417         r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
418         if r1 != 0:
419             raise CommandError('memory', 'encode failed: %s' % r1)
420         #end
421         self.encoded = encoded
422     #end
423
424     def Decode(self, ignore1, ignore2, ignore3):
425         r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
426         if r2 != 0:
427             raise CommandError('memory', 'decode failed: %s' % r1)
428         #end
429         self.decoded = data1
430     #end
431
432     def Verify(self, ignore1, ignore2):
433         if self.target_data != self.decoded:
434             raise CommandError('memory', 'bad decode')
435         #end
436     #end
437
438     def EncodeSize(self, ignore1):
439         return len(self.encoded)
440     #end
441 #end
442
443 class GzipRun1:
444     def Encode(self, target, source, output):
445         assert source == None
446         RunCommandIO(['gzip', '-cf'], target, output)
447     #end
448
449     def Decode(self, input, source, output):
450         assert source == None
451         RunCommandIO(['gzip', '-dcf'], input, output)
452     #end
453
454     def Verify(self, target, recon):
455         RunCommand(('cmp', target, recon))
456     #end
457
458     def EncodeSize(self, output):
459         return os.stat(output).st_size
460     #end
461 #end
462
463 class Xdelta1RunClass:
464     def __str__(self):
465         return 'xdelta1'
466     #end
467
468     def New(self):
469         return Xdelta1Runner()
470     #end
471 #end
472
473 class Xdelta1Runner:
474     def Encode(self, target, source, output):
475         assert source != None
476         args = ['xdelta1', 'delta', '-q', source, target, output]
477         RunCommand(args, [0, 1])
478     #end
479
480     def Decode(self, input, source, output):
481         assert source != None
482         args = ['xdelta1', 'patch', '-q', input, source, output]
483         # Note: for dumb historical reasons, xdelta1 returns 1 or 0
484         RunCommand(args)
485     #end
486
487     def Verify(self, target, recon):
488         RunCommand(('cmp', target, recon))
489     #end
490
491     def EncodeSize(self, output):
492         return os.stat(output).st_size
493     #end
494 #end
495
496 # exceptions
497 class SkipRcsException:
498     def __init__(self,reason):
499         self.reason = reason
500     #end
501 #end
502
503 class NotEnoughVersions:
504     def __init__(self):
505         pass
506     #end
507 #end
508
509 class CommandError: