icdiff (21706B)
1 #!/usr/bin/env python 2 3 """ icdiff.py 4 5 Author: Jeff Kaufman, derived from difflib.HtmlDiff 6 7 License: This code is usable under the same open terms as the rest of 8 python. See: http://www.python.org/psf/license/ 9 10 """ 11 12 import os 13 import sys 14 import errno 15 import difflib 16 import optparse 17 import re 18 import filecmp 19 20 color_codes = { 21 "red": '\033[0;31m', 22 "green": '\033[0;32m', 23 "yellow": '\033[0;33m', 24 "blue": '\033[0;34m', 25 "magenta": '\033[0;35m', 26 "cyan": '\033[0;36m', 27 "none": '\033[m', 28 "red_bold": '\033[1;31m', 29 "green_bold": '\033[1;32m', 30 "yellow_bold": '\033[1;33m', 31 "blue_bold": '\033[1;34m', 32 "magenta_bold": '\033[1;35m', 33 "cyan_bold": '\033[1;36m', 34 } 35 36 class ConsoleDiff(object): 37 """Console colored side by side comparison with change highlights. 38 39 Based on difflib.HtmlDiff 40 41 This class can be used to create a text-mode table showing a side 42 43 by side, line by line comparison of text with inter-line and 44 intra-line change highlights in ansi color escape sequences as 45 intra-line change highlights in ansi color escape sequences as 46 read by xterm. The table can be generated in either full or 47 contextual difference mode. 48 49 To generate the table, call make_table. 50 51 Usage is the almost the same as HtmlDiff except only make_table is 52 implemented and the file can be invoked on the command line. 53 Run:: 54 55 python icdiff.py --help 56 57 for command line usage information. 58 59 """ 60 61 def __init__(self, tabsize=8, wrapcolumn=None, linejunk=None, 62 charjunk=difflib.IS_CHARACTER_JUNK, cols=80, 63 line_numbers=False, 64 show_all_spaces=False, 65 highlight=False, 66 no_bold=False): 67 """ConsoleDiff instance initializer 68 69 Arguments: 70 tabsize -- tab stop spacing, defaults to 8. 71 wrapcolumn -- column number where lines are broken and wrapped, 72 defaults to None where lines are not wrapped. 73 linejunk, charjunk -- keyword arguments passed into ndiff() (used by 74 ConsoleDiff() to generate the side by side differences). See 75 ndiff() documentation for argument default values and descriptions. 76 """ 77 78 self._tabsize = tabsize 79 self.line_numbers = line_numbers 80 self.cols = cols 81 self.show_all_spaces = show_all_spaces 82 self.highlight = highlight 83 self.no_bold = no_bold 84 85 if wrapcolumn is None: 86 if not line_numbers: 87 wrapcolumn = self.cols // 2 - 2 88 else: 89 wrapcolumn = self.cols // 2 - 10 90 91 self._wrapcolumn = wrapcolumn 92 self._linejunk = linejunk 93 self._charjunk = charjunk 94 95 96 def _tab_newline_replace(self, fromlines, tolines): 97 """Returns from/to line lists with tabs expanded and newlines removed. 98 99 Instead of tab characters being replaced by the number of spaces 100 needed to fill in to the next tab stop, this function will fill 101 the space with tab characters. This is done so that the difference 102 algorithms can identify changes in a file when tabs are replaced by 103 spaces and vice versa. At the end of the table generation, the tab 104 characters will be replaced with a space. 105 """ 106 def expand_tabs(line): 107 # hide real spaces 108 line = line.replace(' ', '\0') 109 # expand tabs into spaces 110 line = line.expandtabs(self._tabsize) 111 # relace spaces from expanded tabs back into tab characters 112 # (we'll replace them with markup after we do differencing) 113 line = line.replace(' ', '\t') 114 return line.replace('\0', ' ').rstrip('\n') 115 fromlines = [expand_tabs(line) for line in fromlines] 116 tolines = [expand_tabs(line) for line in tolines] 117 return fromlines, tolines 118 119 def _split_line(self, data_list, line_num, text): 120 """Builds list of text lines by splitting text lines at wrap point 121 122 This function will determine if the input text line needs to be 123 wrapped (split) into separate lines. If so, the first wrap point 124 will be determined and the first line appended to the output 125 text line list. This function is used recursively to handle 126 the second part of the split line to further split it. 127 """ 128 # if blank line or context separator, just add it to the output list 129 if not line_num: 130 data_list.append((line_num, text)) 131 return 132 133 # if line text doesn't need wrapping, just add it to the output list 134 size = len(text) 135 if (size <= self._wrapcolumn) or ((size - (text.count('\0') * 3)) <= self._wrapcolumn): 136 data_list.append((line_num, text)) 137 return 138 139 # scan text looking for the wrap point, keeping track if the wrap 140 # point is inside markers 141 i = 0 142 n = 0 143 mark = '' 144 while n < self._wrapcolumn and i < size: 145 if text[i] == '\0': 146 i += 1 147 mark = text[i] 148 i += 1 149 elif text[i] == '\1': 150 i += 1 151 mark = '' 152 else: 153 i += 1 154 n += 1 155 156 # wrap point is inside text, break it up into separate lines 157 line1 = text[:i] 158 line2 = text[i:] 159 160 # if wrap point is inside markers, place end marker at end of first 161 # line and start marker at beginning of second line because each 162 # line will have its own table tag markup around it. 163 if mark: 164 line1 = line1 + '\1' 165 line2 = '\0' + mark + line2 166 167 # tack on first line onto the output list 168 data_list.append((line_num, line1)) 169 170 # use this routine again to wrap the remaining text 171 self._split_line(data_list, '>', line2) 172 173 def _line_wrapper(self, diffs): 174 """Returns iterator that splits (wraps) mdiff text lines""" 175 176 # pull from/to data and flags from mdiff iterator 177 for fromdata, todata, flag in diffs: 178 # check for context separators and pass them through 179 if flag is None: 180 yield fromdata, todata, flag 181 continue 182 (fromline, fromtext), (toline, totext) = fromdata, todata 183 # for each from/to line split it at the wrap column to form 184 # list of text lines. 185 fromlist, tolist = [], [] 186 self._split_line(fromlist, fromline, fromtext) 187 self._split_line(tolist, toline, totext) 188 # yield from/to line in pairs inserting blank lines as 189 # necessary when one side has more wrapped lines 190 while fromlist or tolist: 191 if fromlist: 192 fromdata = fromlist.pop(0) 193 else: 194 fromdata = ('', ' ') 195 if tolist: 196 todata = tolist.pop(0) 197 else: 198 todata = ('', ' ') 199 yield fromdata, todata, flag 200 201 def _collect_lines(self, diffs): 202 """Collects mdiff output into separate lists 203 204 Before storing the mdiff from/to data into a list, it is converted 205 into a single line of text with console markup. 206 """ 207 208 fromlist, tolist, flaglist = [], [], [] 209 # pull from/to data and flags from mdiff style iterator 210 for fromdata, todata, flag in diffs: 211 try: 212 # store HTML markup of the lines into the lists 213 fromlist.append(self._format_line(0, flag, *fromdata)) 214 tolist.append(self._format_line(1, flag, *todata)) 215 except TypeError: 216 # exceptions occur for lines where context separators go 217 fromlist.append(None) 218 tolist.append(None) 219 flaglist.append(flag) 220 return fromlist, tolist, flaglist 221 222 def _format_line(self, side, flag, linenum, text): 223 """Returns HTML markup of "from" / "to" text lines 224 225 side -- 0 or 1 indicating "from" or "to" text 226 flag -- indicates if difference on line 227 linenum -- line number (used for line number column) 228 text -- line text to be marked up 229 """ 230 try: 231 lid = '%d' % linenum 232 except TypeError: 233 # handle blank lines where linenum is '>' or '' 234 lid = '' 235 236 text = text.rstrip() 237 238 if not self.line_numbers: 239 return text 240 return '%s %s' % (self._rpad(lid, 8), text) 241 242 def _real_len(self, s): 243 l = 0 244 in_esc = False 245 prev = ' ' 246 for c in s.replace('\0+', "").replace('\0-', "").replace('\0^', "").replace('\1', "").replace('\t', ' '): 247 if in_esc: 248 if c == "m": 249 in_esc = False 250 else: 251 if c == "[" and prev == "\033": 252 in_esc = True 253 l -= 1 # we counted prev when we shouldn't have 254 else: 255 l += 1 256 prev = c 257 258 #print("len '%s' is %d." % (s, l)) 259 return l 260 261 262 def _rpad(self, s, field_width): 263 return self._pad(s, field_width) + s 264 265 def _pad(self, s, field_width): 266 return " " * (field_width - self._real_len(s)) 267 268 def _lpad(self, s, field_width): 269 target = s + self._pad(s, field_width) 270 #if self._real_len(target) != field_width: 271 # print("Warning: bad line %r is not of length %d" % (target, field_width)) 272 return target 273 274 def _convert_flags(self, fromlist, tolist, flaglist, context, numlines): 275 """Makes list of "next" links""" 276 277 # all anchor names will be generated using the unique "to" prefix 278 279 # process change flags, generating middle column of next anchors/links 280 next_id = [''] * len(flaglist) 281 next_href = [''] * len(flaglist) 282 num_chg, in_change = 0, False 283 last = 0 284 toprefix = '' 285 for i, flag in enumerate(flaglist): 286 if flag: 287 if not in_change: 288 in_change = True 289 last = i 290 # at the beginning of a change, drop an anchor a few lines 291 # (the context lines) before the change for the previous 292 # link 293 i = max([0, i - numlines]) 294 next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix, num_chg) 295 # at the beginning of a change, drop a link to the next 296 # change 297 num_chg += 1 298 next_href[last] = '<a href="#difflib_chg_%s_%d">n</a>' % ( 299 toprefix, num_chg) 300 else: 301 in_change = False 302 # check for cases where there is no content to avoid exceptions 303 if not flaglist: 304 flaglist = [False] 305 next_id = [''] 306 next_href = [''] 307 last = 0 308 if context: 309 fromlist = ['No Differences Found'] 310 tolist = fromlist 311 else: 312 fromlist = tolist = ['Empty File'] 313 # if not a change on first line, drop a link 314 if not flaglist[0]: 315 next_href[0] = '<a href="#difflib_chg_%s_0">f</a>' % toprefix 316 # redo the last link to link to the top 317 next_href[last] = '<a href="#difflib_chg_%s_top">t</a>' % (toprefix) 318 319 return fromlist, tolist, flaglist, next_href, next_id 320 321 def make_table(self, fromlines, tolines, fromdesc='', todesc='', context=False, 322 numlines=5): 323 """Returns table of side by side comparison with change highlights 324 325 Arguments: 326 fromlines -- list of "from" lines 327 tolines -- list of "to" lines 328 fromdesc -- "from" file column header string 329 todesc -- "to" file column header string 330 context -- set to True for contextual differences (defaults to False 331 which shows full differences). 332 numlines -- number of context lines. When context is set True, 333 controls number of lines displayed before and after the change. 334 When context is False, controls the number of lines to place 335 the "next" link anchors before the next change (so click of 336 "next" link jumps to just before the change). 337 """ 338 339 # change tabs to spaces before it gets more difficult after we insert 340 # markkup 341 fromlines, tolines = self._tab_newline_replace(fromlines, tolines) 342 343 # create diffs iterator which generates side by side from/to data 344 if context: 345 context_lines = numlines 346 else: 347 context_lines = None 348 diffs = difflib._mdiff(fromlines, tolines, context_lines, linejunk=self._linejunk, 349 charjunk=self._charjunk) 350 351 352 # set up iterator to wrap lines that exceed desired width 353 if self._wrapcolumn: 354 diffs = self._line_wrapper(diffs) 355 356 # collect up from/to lines and flags into lists (also format the lines) 357 fromlist, tolist, flaglist = self._collect_lines(diffs) 358 359 # process change flags, generating middle column of next anchors/links 360 fromlist, tolist, flaglist, next_href, next_id = self._convert_flags( 361 fromlist, tolist, flaglist, context, numlines) 362 363 s = [] 364 365 if fromdesc or todesc: 366 s.append((simple_colorize(fromdesc, "blue"), 367 simple_colorize(todesc, "blue"))) 368 369 for i in range(len(flaglist)): 370 if flaglist[i] is None: 371 # mdiff yields None on separator lines; skip the bogus ones 372 # generated for the first line 373 374 if i > 0: 375 s.append((simple_colorize('---', "blue"), 376 simple_colorize('---', "blue"))) 377 else: 378 s.append((fromlist[i], tolist[i])) 379 380 table_lines = [] 381 for sides in s: 382 line = [] 383 for side in sides: 384 line.append(self._lpad(side, self.cols // 2 - 1)) 385 table_lines.append(" ".join(line)) 386 387 table_line_string = "\n".join(table_lines) 388 389 colorized_table_line_string = self.colorize(table_line_string) 390 391 return colorized_table_line_string 392 393 def colorize(self, s): 394 def background(color): 395 return color.replace("\033[1;", "\033[7;") 396 397 if self.no_bold: 398 C_ADD = color_codes["green"] 399 C_SUB = color_codes["red"] 400 C_CHG = color_codes["yellow"] 401 else: 402 C_ADD = color_codes["green_bold"] 403 C_SUB = color_codes["red_bold"] 404 C_CHG = color_codes["yellow_bold"] 405 406 if self.highlight: 407 C_ADD, C_SUB, C_CHG = background(C_ADD), background(C_SUB), background(C_CHG) 408 409 C_NONE = color_codes["none"] 410 colors = (C_ADD, C_SUB, C_CHG, C_NONE) 411 412 s = s.replace('\0+', C_ADD).replace('\0-', C_SUB).replace('\0^', C_CHG).replace('\1', C_NONE).replace('\t', ' ') 413 414 if self.highlight: 415 return s 416 417 if not self.show_all_spaces: 418 # If there's a change consisting entirely of whitespace, don't color it. 419 return re.sub("\033\\[[01];3([123])m(\\s+)(\033\\[)", "\033[7;3\\1m\\2\\3", s) 420 421 def will_see_coloredspace(i, s): 422 while i < len(s) and s[i].isspace(): 423 i += 1 424 if i < len(s) and s[i] == '\033': 425 return False 426 return True 427 428 n_s = [] 429 in_color = False 430 seen_coloredspace = False 431 for i, c in enumerate(s): 432 if len(n_s) > 6 and n_s[-1] == "m": 433 ns_end = "".join(n_s[-7:]) 434 for color in colors: 435 if ns_end.endswith(color): 436 if color != in_color: 437 seen_coloredspace = False 438 in_color = color 439 if ns_end.endswith(C_NONE): 440 in_color = False 441 442 if c.isspace() and in_color and (self.show_all_spaces or not (seen_coloredspace or will_see_coloredspace(i, s))): 443 n_s.extend([C_NONE, background(in_color), c, C_NONE, in_color]) 444 else: 445 if in_color: 446 seen_coloredspace = True 447 n_s.append(c) 448 449 joined = "".join(n_s) 450 451 return joined 452 453 def simple_colorize(s, chosen_color): 454 return "%s%s%s" % (color_codes[chosen_color], s, color_codes["none"]) 455 456 def start(): 457 # If you change any of these, also update README. 458 parser = optparse.OptionParser(usage="usage: %prog [options] left_file right_file", 459 description="Show differences between files in a two column view.") 460 parser.add_option("--cols", default=None, 461 help="specify the width of the screen. Autodetection is Linux only") 462 parser.add_option("--head", default=0, 463 help="consider only the first N lines of each file") 464 parser.add_option("--highlight", default=False, 465 action="store_true", 466 help="color by changing the background color instead of the foreground color. Very fast, ugly, displays all changes") 467 parser.add_option("--line-numbers", default=False, 468 action="store_true", 469 help="generate output with line numbers") 470 parser.add_option("--no-bold", default=False, 471 action="store_true", 472 help="use non-bold colors; recommended for with solarized") 473 parser.add_option("--no-headers", default=False, 474 action="store_true", 475 help="don't label the left and right sides with their file names") 476 parser.add_option("--numlines", default=5, 477 help="how many lines of context to print; can't be combined with --whole-file") 478 parser.add_option("--recursive", default=False, 479 action="store_true", 480 help="recursively compare subdirectories") 481 parser.add_option("--show-all-spaces", default=False, 482 action="store_true", 483 help="color all non-matching whitespace including that which is not needed for drawing the eye to changes. Slow, ugly, displays all changes") 484 parser.add_option("--version", default=False, 485 action="store_true", 486 help="print version and exit") 487 parser.add_option("--whole-file", default=False, 488 action="store_true", 489 help="show the whole file instead of just changed lines and context") 490 491 (options, args) = parser.parse_args() 492 493 if options.version: 494 print("icdiff version 1.2.0") 495 sys.exit() 496 497 if len(args) != 2: 498 parser.print_help() 499 sys.exit() 500 501 a, b = args 502 503 if not options.cols: 504 def ioctl_GWINSZ(fd): 505 try: 506 import fcntl, termios, struct 507 cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) 508 except Exception: 509 return None 510 return cr 511 cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) 512 if cr: 513 options.cols = cr[1] 514 else: 515 options.cols = 80 516 517 if options.recursive: 518 diff_recursively(options, a, b) 519 else: 520 diff_files(options, a, b) 521 522 def diff_recursively(options, a, b): 523 def print_meta(s): 524 print(simple_colorize(s, "magenta")) 525 526 if os.path.isfile(a) and os.path.isfile(b): 527 if not filecmp.cmp(a, b): 528 diff_files(options, a, b) 529 530 elif os.path.isdir(a) and os.path.isdir(b): 531 a_contents = set(os.listdir(a)) 532 b_contents = set(os.listdir(b)) 533 534 for child in sorted(a_contents.union(b_contents)): 535 if child not in b_contents: 536 print_meta("Only in %s: %s" % (a, child)) 537 elif child not in a_contents: 538 print_meta("Only in %s: %s" % (b, child)) 539 else: 540 diff_recursively(options, 541 os.path.join(a, child), 542 os.path.join(b, child)) 543 544 elif os.path.isdir(a) and os.path.isfile(b): 545 print_meta("File %s is a directory while %s is a file" % (a, b)) 546 547 elif os.path.isfile(a) and os.path.isdir(b): 548 print_meta("File %s is a file while %s is a directory" % (a, b)) 549 550 def diff_files(options, a, b): 551 headers = a, b 552 if options.no_headers: 553 headers = None, None 554 555 head = int(options.head) 556 557 for x in [a, b]: 558 if os.path.isdir(x): 559 sys.stderr.write("error: %s is a directory; did you mean to pass --recursive?\n" % x) 560 sys.exit(1) 561 lines_a = open(a, "U").readlines() 562 lines_b = open(b, "U").readlines() 563 564 if head != 0: 565 lines_a = lines_a[:head] 566 lines_b = lines_b[:head] 567 568 print(ConsoleDiff(cols=int(options.cols), 569 show_all_spaces=options.show_all_spaces, 570 highlight=options.highlight, 571 no_bold=options.no_bold, 572 line_numbers=options.line_numbers).make_table( 573 lines_a, lines_b, headers[0], headers[1], context=(not options.whole_file), numlines=int(options.numlines))) 574 sys.stdout.flush() 575 576 if __name__ == "__main__": 577 try: 578 start() 579 except KeyboardInterrupt: 580 pass 581 except IOError as e: 582 if e.errno == errno.EPIPE: 583 pass 584 else: 585 raise