mybin

my ~/bin
git clone https://a3nm.net/git/mybin/
Log | Files | Refs | README

icdiff (21706B)


      1 #!/usr/bin/env python
      2 
      3 """ icdiff.py
      4 
      5 Author: Jeff Kaufman, derived from difflib.HtmlDiff
      6 
      7 License: This code is usable under the same open terms as the rest of
      8          python.  See: http://www.python.org/psf/license/
      9 
     10 """
     11 
     12 import os
     13 import sys
     14 import errno
     15 import difflib
     16 import optparse
     17 import re
     18 import filecmp
     19 
     20 color_codes = {
     21     "red":     '\033[0;31m',
     22     "green":   '\033[0;32m',
     23     "yellow":  '\033[0;33m',
     24     "blue":    '\033[0;34m',
     25     "magenta": '\033[0;35m',
     26     "cyan":    '\033[0;36m',
     27     "none":    '\033[m',
     28     "red_bold":     '\033[1;31m',
     29     "green_bold":   '\033[1;32m',
     30     "yellow_bold":  '\033[1;33m',
     31     "blue_bold":    '\033[1;34m',
     32     "magenta_bold": '\033[1;35m',
     33     "cyan_bold":    '\033[1;36m',
     34 }
     35 
     36 class ConsoleDiff(object):
     37     """Console colored side by side comparison with change highlights.
     38 
     39     Based on difflib.HtmlDiff
     40 
     41     This class can be used to create a text-mode table showing a side
     42 
     43     by side, line by line comparison of text with inter-line and
     44     intra-line change highlights in ansi color escape sequences as
     45     intra-line change highlights in ansi color escape sequences as
     46     read by xterm.  The table can be generated in either full or
     47     contextual difference mode.
     48 
     49     To generate the table, call make_table.
     50 
     51     Usage is the almost the same as HtmlDiff except only make_table is
     52     implemented and the file can be invoked on the command line.
     53     Run::
     54 
     55       python icdiff.py --help
     56 
     57     for command line usage information.
     58 
     59     """
     60 
     61     def __init__(self, tabsize=8, wrapcolumn=None, linejunk=None,
     62                  charjunk=difflib.IS_CHARACTER_JUNK, cols=80,
     63                  line_numbers=False,
     64                  show_all_spaces=False,
     65                  highlight=False,
     66                  no_bold=False):
     67         """ConsoleDiff instance initializer
     68 
     69         Arguments:
     70         tabsize -- tab stop spacing, defaults to 8.
     71         wrapcolumn -- column number where lines are broken and wrapped,
     72             defaults to None where lines are not wrapped.
     73         linejunk, charjunk -- keyword arguments passed into ndiff() (used by
     74             ConsoleDiff() to generate the side by side differences).  See
     75             ndiff() documentation for argument default values and descriptions.
     76         """
     77 
     78         self._tabsize = tabsize
     79         self.line_numbers = line_numbers
     80         self.cols = cols
     81         self.show_all_spaces = show_all_spaces
     82         self.highlight = highlight
     83         self.no_bold = no_bold
     84 
     85         if wrapcolumn is None:
     86             if not line_numbers:
     87                 wrapcolumn = self.cols // 2 - 2
     88             else:
     89                 wrapcolumn = self.cols // 2 - 10
     90 
     91         self._wrapcolumn = wrapcolumn
     92         self._linejunk = linejunk
     93         self._charjunk = charjunk
     94 
     95 
     96     def _tab_newline_replace(self, fromlines, tolines):
     97         """Returns from/to line lists with tabs expanded and newlines removed.
     98 
     99         Instead of tab characters being replaced by the number of spaces
    100         needed to fill in to the next tab stop, this function will fill
    101         the space with tab characters.  This is done so that the difference
    102         algorithms can identify changes in a file when tabs are replaced by
    103         spaces and vice versa.  At the end of the table generation, the tab
    104         characters will be replaced with a space.
    105         """
    106         def expand_tabs(line):
    107             # hide real spaces
    108             line = line.replace(' ', '\0')
    109             # expand tabs into spaces
    110             line = line.expandtabs(self._tabsize)
    111             # relace spaces from expanded tabs back into tab characters
    112             # (we'll replace them with markup after we do differencing)
    113             line = line.replace(' ', '\t')
    114             return line.replace('\0', ' ').rstrip('\n')
    115         fromlines = [expand_tabs(line) for line in fromlines]
    116         tolines = [expand_tabs(line) for line in tolines]
    117         return fromlines, tolines
    118 
    119     def _split_line(self, data_list, line_num, text):
    120         """Builds list of text lines by splitting text lines at wrap point
    121 
    122         This function will determine if the input text line needs to be
    123         wrapped (split) into separate lines.  If so, the first wrap point
    124         will be determined and the first line appended to the output
    125         text line list.  This function is used recursively to handle
    126         the second part of the split line to further split it.
    127         """
    128         # if blank line or context separator, just add it to the output list
    129         if not line_num:
    130             data_list.append((line_num, text))
    131             return
    132 
    133         # if line text doesn't need wrapping, just add it to the output list
    134         size = len(text)
    135         if (size <= self._wrapcolumn) or ((size - (text.count('\0') * 3)) <= self._wrapcolumn):
    136             data_list.append((line_num, text))
    137             return
    138 
    139         # scan text looking for the wrap point, keeping track if the wrap
    140         # point is inside markers
    141         i = 0
    142         n = 0
    143         mark = ''
    144         while n < self._wrapcolumn and i < size:
    145             if text[i] == '\0':
    146                 i += 1
    147                 mark = text[i]
    148                 i += 1
    149             elif text[i] == '\1':
    150                 i += 1
    151                 mark = ''
    152             else:
    153                 i += 1
    154                 n += 1
    155 
    156         # wrap point is inside text, break it up into separate lines
    157         line1 = text[:i]
    158         line2 = text[i:]
    159 
    160         # if wrap point is inside markers, place end marker at end of first
    161         # line and start marker at beginning of second line because each
    162         # line will have its own table tag markup around it.
    163         if mark:
    164             line1 = line1 + '\1'
    165             line2 = '\0' + mark + line2
    166 
    167         # tack on first line onto the output list
    168         data_list.append((line_num, line1))
    169 
    170         # use this routine again to wrap the remaining text
    171         self._split_line(data_list, '>', line2)
    172 
    173     def _line_wrapper(self, diffs):
    174         """Returns iterator that splits (wraps) mdiff text lines"""
    175 
    176         # pull from/to data and flags from mdiff iterator
    177         for fromdata, todata, flag in diffs:
    178             # check for context separators and pass them through
    179             if flag is None:
    180                 yield fromdata, todata, flag
    181                 continue
    182             (fromline, fromtext), (toline, totext) = fromdata, todata
    183             # for each from/to line split it at the wrap column to form
    184             # list of text lines.
    185             fromlist, tolist = [], []
    186             self._split_line(fromlist, fromline, fromtext)
    187             self._split_line(tolist, toline, totext)
    188             # yield from/to line in pairs inserting blank lines as
    189             # necessary when one side has more wrapped lines
    190             while fromlist or tolist:
    191                 if fromlist:
    192                     fromdata = fromlist.pop(0)
    193                 else:
    194                     fromdata = ('', ' ')
    195                 if tolist:
    196                     todata = tolist.pop(0)
    197                 else:
    198                     todata = ('', ' ')
    199                 yield fromdata, todata, flag
    200 
    201     def _collect_lines(self, diffs):
    202         """Collects mdiff output into separate lists
    203 
    204         Before storing the mdiff from/to data into a list, it is converted
    205         into a single line of text with console markup.
    206         """
    207 
    208         fromlist, tolist, flaglist = [], [], []
    209         # pull from/to data and flags from mdiff style iterator
    210         for fromdata, todata, flag in diffs:
    211             try:
    212                 # store HTML markup of the lines into the lists
    213                 fromlist.append(self._format_line(0, flag, *fromdata))
    214                 tolist.append(self._format_line(1, flag, *todata))
    215             except TypeError:
    216                 # exceptions occur for lines where context separators go
    217                 fromlist.append(None)
    218                 tolist.append(None)
    219             flaglist.append(flag)
    220         return fromlist, tolist, flaglist
    221 
    222     def _format_line(self, side, flag, linenum, text):
    223         """Returns HTML markup of "from" / "to" text lines
    224 
    225         side -- 0 or 1 indicating "from" or "to" text
    226         flag -- indicates if difference on line
    227         linenum -- line number (used for line number column)
    228         text -- line text to be marked up
    229         """
    230         try:
    231             lid = '%d' % linenum
    232         except TypeError:
    233             # handle blank lines where linenum is '>' or ''
    234             lid = ''
    235 
    236         text = text.rstrip()
    237 
    238         if not self.line_numbers:
    239             return text
    240         return '%s %s' % (self._rpad(lid, 8), text)
    241 
    242     def _real_len(self, s):
    243         l = 0
    244         in_esc = False
    245         prev = ' '
    246         for c in s.replace('\0+', "").replace('\0-', "").replace('\0^', "").replace('\1', "").replace('\t', ' '):
    247             if in_esc:
    248                 if c == "m":
    249                     in_esc = False
    250             else:
    251                 if c == "[" and prev == "\033":
    252                     in_esc = True
    253                     l -= 1 # we counted prev when we shouldn't have
    254                 else:
    255                     l += 1
    256             prev = c
    257 
    258         #print("len '%s' is %d." % (s, l))
    259         return l
    260 
    261 
    262     def _rpad(self, s, field_width):
    263         return self._pad(s, field_width) + s
    264 
    265     def _pad(self, s, field_width):
    266         return " " * (field_width - self._real_len(s))
    267 
    268     def _lpad(self, s, field_width):
    269         target = s + self._pad(s, field_width)
    270         #if self._real_len(target) != field_width:
    271         #    print("Warning: bad line %r is not of length %d" % (target, field_width))
    272         return target
    273 
    274     def _convert_flags(self, fromlist, tolist, flaglist, context, numlines):
    275         """Makes list of "next" links"""
    276 
    277         # all anchor names will be generated using the unique "to" prefix
    278 
    279         # process change flags, generating middle column of next anchors/links
    280         next_id = [''] * len(flaglist)
    281         next_href = [''] * len(flaglist)
    282         num_chg, in_change = 0, False
    283         last = 0
    284         toprefix = ''
    285         for i, flag in enumerate(flaglist):
    286             if flag:
    287                 if not in_change:
    288                     in_change = True
    289                     last = i
    290                     # at the beginning of a change, drop an anchor a few lines
    291                     # (the context lines) before the change for the previous
    292                     # link
    293                     i = max([0, i - numlines])
    294                     next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix, num_chg)
    295                     # at the beginning of a change, drop a link to the next
    296                     # change
    297                     num_chg += 1
    298                     next_href[last] = '<a href="#difflib_chg_%s_%d">n</a>' % (
    299                          toprefix, num_chg)
    300             else:
    301                 in_change = False
    302         # check for cases where there is no content to avoid exceptions
    303         if not flaglist:
    304             flaglist = [False]
    305             next_id = ['']
    306             next_href = ['']
    307             last = 0
    308             if context:
    309                 fromlist = ['No Differences Found']
    310                 tolist = fromlist
    311             else:
    312                 fromlist = tolist = ['Empty File']
    313         # if not a change on first line, drop a link
    314         if not flaglist[0]:
    315             next_href[0] = '<a href="#difflib_chg_%s_0">f</a>' % toprefix
    316         # redo the last link to link to the top
    317         next_href[last] = '<a href="#difflib_chg_%s_top">t</a>' % (toprefix)
    318 
    319         return fromlist, tolist, flaglist, next_href, next_id
    320 
    321     def make_table(self, fromlines, tolines, fromdesc='', todesc='', context=False,
    322                    numlines=5):
    323         """Returns table of side by side comparison with change highlights
    324 
    325         Arguments:
    326         fromlines -- list of "from" lines
    327         tolines -- list of "to" lines
    328         fromdesc -- "from" file column header string
    329         todesc -- "to" file column header string
    330         context -- set to True for contextual differences (defaults to False
    331             which shows full differences).
    332         numlines -- number of context lines.  When context is set True,
    333             controls number of lines displayed before and after the change.
    334             When context is False, controls the number of lines to place
    335             the "next" link anchors before the next change (so click of
    336             "next" link jumps to just before the change).
    337         """
    338 
    339         # change tabs to spaces before it gets more difficult after we insert
    340         # markkup
    341         fromlines, tolines = self._tab_newline_replace(fromlines, tolines)
    342 
    343         # create diffs iterator which generates side by side from/to data
    344         if context:
    345             context_lines = numlines
    346         else:
    347             context_lines = None
    348         diffs = difflib._mdiff(fromlines, tolines, context_lines, linejunk=self._linejunk,
    349                                charjunk=self._charjunk)
    350 
    351 
    352         # set up iterator to wrap lines that exceed desired width
    353         if self._wrapcolumn:
    354             diffs = self._line_wrapper(diffs)
    355 
    356         # collect up from/to lines and flags into lists (also format the lines)
    357         fromlist, tolist, flaglist = self._collect_lines(diffs)
    358 
    359         # process change flags, generating middle column of next anchors/links
    360         fromlist, tolist, flaglist, next_href, next_id = self._convert_flags(
    361             fromlist, tolist, flaglist, context, numlines)
    362 
    363         s = []
    364 
    365         if fromdesc or todesc:
    366             s.append((simple_colorize(fromdesc, "blue"),
    367                       simple_colorize(todesc, "blue")))
    368 
    369         for i in range(len(flaglist)):
    370             if flaglist[i] is None:
    371                 # mdiff yields None on separator lines; skip the bogus ones
    372                 # generated for the first line
    373 
    374                 if i > 0:
    375                     s.append((simple_colorize('---', "blue"),
    376                               simple_colorize('---', "blue")))
    377             else:
    378                 s.append((fromlist[i], tolist[i]))
    379 
    380         table_lines = []
    381         for sides in s:
    382             line = []
    383             for side in sides:
    384                 line.append(self._lpad(side, self.cols // 2 - 1))
    385             table_lines.append(" ".join(line))
    386 
    387         table_line_string = "\n".join(table_lines)
    388 
    389         colorized_table_line_string = self.colorize(table_line_string)
    390 
    391         return colorized_table_line_string
    392 
    393     def colorize(self, s):
    394         def background(color):
    395             return color.replace("\033[1;", "\033[7;")
    396 
    397         if self.no_bold:
    398             C_ADD = color_codes["green"]
    399             C_SUB = color_codes["red"]
    400             C_CHG = color_codes["yellow"]
    401         else:
    402             C_ADD = color_codes["green_bold"]
    403             C_SUB = color_codes["red_bold"]
    404             C_CHG = color_codes["yellow_bold"]
    405 
    406         if self.highlight:
    407             C_ADD, C_SUB, C_CHG = background(C_ADD), background(C_SUB), background(C_CHG)
    408 
    409         C_NONE = color_codes["none"]
    410         colors = (C_ADD, C_SUB, C_CHG, C_NONE)
    411 
    412         s = s.replace('\0+', C_ADD).replace('\0-', C_SUB).replace('\0^', C_CHG).replace('\1', C_NONE).replace('\t', ' ')
    413 
    414         if self.highlight:
    415             return s
    416 
    417         if not self.show_all_spaces:
    418             # If there's a change consisting entirely of whitespace, don't color it.
    419             return re.sub("\033\\[[01];3([123])m(\\s+)(\033\\[)", "\033[7;3\\1m\\2\\3", s)
    420 
    421         def will_see_coloredspace(i, s):
    422             while i < len(s) and s[i].isspace():
    423                 i += 1
    424             if i < len(s) and s[i] == '\033':
    425                 return False
    426             return True
    427 
    428         n_s = []
    429         in_color = False
    430         seen_coloredspace = False
    431         for i, c in enumerate(s):
    432             if len(n_s) > 6 and n_s[-1] == "m":
    433                 ns_end = "".join(n_s[-7:])
    434                 for color in colors:
    435                     if ns_end.endswith(color):
    436                         if color != in_color:
    437                             seen_coloredspace = False
    438                         in_color = color
    439                 if ns_end.endswith(C_NONE):
    440                     in_color = False
    441 
    442             if c.isspace() and in_color and (self.show_all_spaces or not (seen_coloredspace or will_see_coloredspace(i, s))):
    443                 n_s.extend([C_NONE, background(in_color), c, C_NONE, in_color])
    444             else:
    445                 if in_color:
    446                     seen_coloredspace = True
    447                 n_s.append(c)
    448 
    449         joined = "".join(n_s)
    450 
    451         return joined
    452 
    453 def simple_colorize(s, chosen_color):
    454     return "%s%s%s" % (color_codes[chosen_color], s, color_codes["none"])
    455 
    456 def start():
    457     # If you change any of these, also update README.
    458     parser = optparse.OptionParser(usage="usage: %prog [options] left_file right_file",
    459                                    description="Show differences between files in a two column view.")
    460     parser.add_option("--cols", default=None,
    461                       help="specify the width of the screen. Autodetection is Linux only")
    462     parser.add_option("--head", default=0,
    463                       help="consider only the first N lines of each file")
    464     parser.add_option("--highlight", default=False,
    465                       action="store_true",
    466                       help="color by changing the background color instead of the foreground color.  Very fast, ugly, displays all changes")
    467     parser.add_option("--line-numbers", default=False,
    468                       action="store_true",
    469                       help="generate output with line numbers")
    470     parser.add_option("--no-bold", default=False,
    471                       action="store_true",
    472                       help="use non-bold colors; recommended for with solarized")
    473     parser.add_option("--no-headers", default=False,
    474                       action="store_true",
    475                       help="don't label the left and right sides with their file names")
    476     parser.add_option("--numlines", default=5,
    477                       help="how many lines of context to print; can't be combined with --whole-file")
    478     parser.add_option("--recursive", default=False,
    479                       action="store_true",
    480                       help="recursively compare subdirectories")
    481     parser.add_option("--show-all-spaces", default=False,
    482                       action="store_true",
    483                       help="color all non-matching whitespace including that which is not needed for drawing the eye to changes.  Slow, ugly, displays all changes")
    484     parser.add_option("--version", default=False,
    485                       action="store_true",
    486                       help="print version and exit")
    487     parser.add_option("--whole-file", default=False,
    488                       action="store_true",
    489                       help="show the whole file instead of just changed lines and context")
    490 
    491     (options, args) = parser.parse_args()
    492 
    493     if options.version:
    494         print("icdiff version 1.2.0")
    495         sys.exit()
    496 
    497     if len(args) != 2:
    498         parser.print_help()
    499         sys.exit()
    500 
    501     a, b = args
    502 
    503     if not options.cols:
    504         def ioctl_GWINSZ(fd):
    505             try:
    506                 import fcntl, termios, struct
    507                 cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))
    508             except Exception:
    509                 return None
    510             return cr
    511         cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
    512         if cr:
    513             options.cols = cr[1]
    514         else:
    515             options.cols = 80
    516 
    517     if options.recursive:
    518         diff_recursively(options, a, b)
    519     else:
    520         diff_files(options, a, b)
    521 
    522 def diff_recursively(options, a, b):
    523     def print_meta(s):
    524         print(simple_colorize(s, "magenta"))
    525 
    526     if os.path.isfile(a) and os.path.isfile(b):
    527         if not filecmp.cmp(a, b):
    528             diff_files(options, a, b)
    529 
    530     elif os.path.isdir(a) and os.path.isdir(b):
    531         a_contents = set(os.listdir(a))
    532         b_contents = set(os.listdir(b))
    533 
    534         for child in sorted(a_contents.union(b_contents)):
    535             if child not in b_contents:
    536                 print_meta("Only in %s: %s" % (a, child))
    537             elif child not in a_contents:
    538                 print_meta("Only in %s: %s" % (b, child))
    539             else:
    540                 diff_recursively(options,
    541                                  os.path.join(a, child),
    542                                  os.path.join(b, child))
    543 
    544     elif os.path.isdir(a) and os.path.isfile(b):
    545         print_meta("File %s is a directory while %s is a file" % (a, b))
    546 
    547     elif os.path.isfile(a) and os.path.isdir(b):
    548         print_meta("File %s is a file while %s is a directory" % (a, b))
    549 
    550 def diff_files(options, a, b):
    551     headers = a, b
    552     if options.no_headers:
    553         headers = None, None
    554 
    555     head = int(options.head)
    556 
    557     for x in [a, b]:
    558         if os.path.isdir(x):
    559             sys.stderr.write("error: %s is a directory; did you mean to pass --recursive?\n" % x)
    560             sys.exit(1)
    561     lines_a = open(a, "U").readlines()
    562     lines_b = open(b, "U").readlines()
    563 
    564     if head != 0:
    565         lines_a = lines_a[:head]
    566         lines_b = lines_b[:head]
    567 
    568     print(ConsoleDiff(cols=int(options.cols),
    569                       show_all_spaces=options.show_all_spaces,
    570                       highlight=options.highlight,
    571                       no_bold=options.no_bold,
    572                       line_numbers=options.line_numbers).make_table(
    573         lines_a, lines_b, headers[0], headers[1], context=(not options.whole_file), numlines=int(options.numlines)))
    574     sys.stdout.flush()
    575 
    576 if __name__ == "__main__":
    577     try:
    578         start()
    579     except KeyboardInterrupt:
    580         pass
    581     except IOError as e:
    582         if e.errno == errno.EPIPE:
    583             pass
    584         else:
    585             raise