icdiff - mybin - my ~/bin

commit 5de23a87d235047cb89872c7c29ac735c1cf8dbb
parent 821804834c640320f5219d30e1d7f95b36162e54
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri, 26 Dec 2014 12:44:33 +0100

icdiff

Diffstat:
icdiff  | 585 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 585 insertions(+), 0 deletions(-)
diff --git a/icdiff b/icdiff
@@ -0,0 +1,585 @@
+#!/usr/bin/env python
+
+""" icdiff.py
+
+Author: Jeff Kaufman, derived from difflib.HtmlDiff
+
+License: This code is usable under the same open terms as the rest of
+         python.  See: http://www.python.org/psf/license/
+
+"""
+
+import os
+import sys
+import errno
+import difflib
+import optparse
+import re
+import filecmp
+
+color_codes = {
+    "red":     '\033[0;31m',
+    "green":   '\033[0;32m',
+    "yellow":  '\033[0;33m',
+    "blue":    '\033[0;34m',
+    "magenta": '\033[0;35m',
+    "cyan":    '\033[0;36m',
+    "none":    '\033[m',
+    "red_bold":     '\033[1;31m',
+    "green_bold":   '\033[1;32m',
+    "yellow_bold":  '\033[1;33m',
+    "blue_bold":    '\033[1;34m',
+    "magenta_bold": '\033[1;35m',
+    "cyan_bold":    '\033[1;36m',
+}
+
+class ConsoleDiff(object):
+    """Console colored side by side comparison with change highlights.
+
+    Based on difflib.HtmlDiff
+
+    This class can be used to create a text-mode table showing a side
+
+    by side, line by line comparison of text with inter-line and
+    intra-line change highlights in ansi color escape sequences as
+    intra-line change highlights in ansi color escape sequences as
+    read by xterm.  The table can be generated in either full or
+    contextual difference mode.
+
+    To generate the table, call make_table.
+
+    Usage is the almost the same as HtmlDiff except only make_table is
+    implemented and the file can be invoked on the command line.
+    Run::
+
+      python icdiff.py --help
+
+    for command line usage information.
+
+    """
+
+    def __init__(self, tabsize=8, wrapcolumn=None, linejunk=None,
+                 charjunk=difflib.IS_CHARACTER_JUNK, cols=80,
+                 line_numbers=False,
+                 show_all_spaces=False,
+                 highlight=False,
+                 no_bold=False):
+        """ConsoleDiff instance initializer
+
+        Arguments:
+        tabsize -- tab stop spacing, defaults to 8.
+        wrapcolumn -- column number where lines are broken and wrapped,
+            defaults to None where lines are not wrapped.
+        linejunk, charjunk -- keyword arguments passed into ndiff() (used by
+            ConsoleDiff() to generate the side by side differences).  See
+            ndiff() documentation for argument default values and descriptions.
+        """
+
+        self._tabsize = tabsize
+        self.line_numbers = line_numbers
+        self.cols = cols
+        self.show_all_spaces = show_all_spaces
+        self.highlight = highlight
+        self.no_bold = no_bold
+
+        if wrapcolumn is None:
+            if not line_numbers:
+                wrapcolumn = self.cols // 2 - 2
+            else:
+                wrapcolumn = self.cols // 2 - 10
+
+        self._wrapcolumn = wrapcolumn
+        self._linejunk = linejunk
+        self._charjunk = charjunk
+
+
+    def _tab_newline_replace(self, fromlines, tolines):
+        """Returns from/to line lists with tabs expanded and newlines removed.
+
+        Instead of tab characters being replaced by the number of spaces
+        needed to fill in to the next tab stop, this function will fill
+        the space with tab characters.  This is done so that the difference
+        algorithms can identify changes in a file when tabs are replaced by
+        spaces and vice versa.  At the end of the table generation, the tab
+        characters will be replaced with a space.
+        """
+        def expand_tabs(line):
+            # hide real spaces
+            line = line.replace(' ', '\0')
+            # expand tabs into spaces
+            line = line.expandtabs(self._tabsize)
+            # relace spaces from expanded tabs back into tab characters
+            # (we'll replace them with markup after we do differencing)
+            line = line.replace(' ', '\t')
+            return line.replace('\0', ' ').rstrip('\n')
+        fromlines = [expand_tabs(line) for line in fromlines]
+        tolines = [expand_tabs(line) for line in tolines]
+        return fromlines, tolines
+
+    def _split_line(self, data_list, line_num, text):
+        """Builds list of text lines by splitting text lines at wrap point
+
+        This function will determine if the input text line needs to be
+        wrapped (split) into separate lines.  If so, the first wrap point
+        will be determined and the first line appended to the output
+        text line list.  This function is used recursively to handle
+        the second part of the split line to further split it.
+        """
+        # if blank line or context separator, just add it to the output list
+        if not line_num:
+            data_list.append((line_num, text))
+            return
+
+        # if line text doesn't need wrapping, just add it to the output list
+        size = len(text)
+        if (size <= self._wrapcolumn) or ((size - (text.count('\0') * 3)) <= self._wrapcolumn):
+            data_list.append((line_num, text))
+            return
+
+        # scan text looking for the wrap point, keeping track if the wrap
+        # point is inside markers
+        i = 0
+        n = 0
+        mark = ''
+        while n < self._wrapcolumn and i < size:
+            if text[i] == '\0':
+                i += 1
+                mark = text[i]
+                i += 1
+            elif text[i] == '\1':
+                i += 1
+                mark = ''
+            else:
+                i += 1
+                n += 1
+
+        # wrap point is inside text, break it up into separate lines
+        line1 = text[:i]
+        line2 = text[i:]
+
+        # if wrap point is inside markers, place end marker at end of first
+        # line and start marker at beginning of second line because each
+        # line will have its own table tag markup around it.
+        if mark:
+            line1 = line1 + '\1'
+            line2 = '\0' + mark + line2
+
+        # tack on first line onto the output list
+        data_list.append((line_num, line1))
+
+        # use this routine again to wrap the remaining text
+        self._split_line(data_list, '>', line2)
+
+    def _line_wrapper(self, diffs):
+        """Returns iterator that splits (wraps) mdiff text lines"""
+
+        # pull from/to data and flags from mdiff iterator
+        for fromdata, todata, flag in diffs:
+            # check for context separators and pass them through
+            if flag is None:
+                yield fromdata, todata, flag
+                continue
+            (fromline, fromtext), (toline, totext) = fromdata, todata
+            # for each from/to line split it at the wrap column to form
+            # list of text lines.
+            fromlist, tolist = [], []
+            self._split_line(fromlist, fromline, fromtext)
+            self._split_line(tolist, toline, totext)
+            # yield from/to line in pairs inserting blank lines as
+            # necessary when one side has more wrapped lines
+            while fromlist or tolist:
+                if fromlist:
+                    fromdata = fromlist.pop(0)
+                else:
+                    fromdata = ('', ' ')
+                if tolist:
+                    todata = tolist.pop(0)
+                else:
+                    todata = ('', ' ')
+                yield fromdata, todata, flag
+
+    def _collect_lines(self, diffs):
+        """Collects mdiff output into separate lists
+
+        Before storing the mdiff from/to data into a list, it is converted
+        into a single line of text with console markup.
+        """
+
+        fromlist, tolist, flaglist = [], [], []
+        # pull from/to data and flags from mdiff style iterator
+        for fromdata, todata, flag in diffs:
+            try:
+                # store HTML markup of the lines into the lists
+                fromlist.append(self._format_line(0, flag, *fromdata))
+                tolist.append(self._format_line(1, flag, *todata))
+            except TypeError:
+                # exceptions occur for lines where context separators go
+                fromlist.append(None)
+                tolist.append(None)
+            flaglist.append(flag)
+        return fromlist, tolist, flaglist
+
+    def _format_line(self, side, flag, linenum, text):
+        """Returns HTML markup of "from" / "to" text lines
+
+        side -- 0 or 1 indicating "from" or "to" text
+        flag -- indicates if difference on line
+        linenum -- line number (used for line number column)
+        text -- line text to be marked up
+        """
+        try:
+            lid = '%d' % linenum
+        except TypeError:
+            # handle blank lines where linenum is '>' or ''
+            lid = ''
+
+        text = text.rstrip()
+
+        if not self.line_numbers:
+            return text
+        return '%s %s' % (self._rpad(lid, 8), text)
+
+    def _real_len(self, s):
+        l = 0
+        in_esc = False
+        prev = ' '
+        for c in s.replace('\0+', "").replace('\0-', "").replace('\0^', "").replace('\1', "").replace('\t', ' '):
+            if in_esc:
+                if c == "m":
+                    in_esc = False
+            else:
+                if c == "[" and prev == "\033":
+                    in_esc = True
+                    l -= 1 # we counted prev when we shouldn't have
+                else:
+                    l += 1
+            prev = c
+
+        #print("len '%s' is %d." % (s, l))
+        return l
+
+
+    def _rpad(self, s, field_width):
+        return self._pad(s, field_width) + s
+
+    def _pad(self, s, field_width):
+        return " " * (field_width - self._real_len(s))
+
+    def _lpad(self, s, field_width):
+        target = s + self._pad(s, field_width)
+        #if self._real_len(target) != field_width:
+        #    print("Warning: bad line %r is not of length %d" % (target, field_width))
+        return target
+
+    def _convert_flags(self, fromlist, tolist, flaglist, context, numlines):
+        """Makes list of "next" links"""
+
+        # all anchor names will be generated using the unique "to" prefix
+
+        # process change flags, generating middle column of next anchors/links
+        next_id = [''] * len(flaglist)
+        next_href = [''] * len(flaglist)
+        num_chg, in_change = 0, False
+        last = 0
+        toprefix = ''
+        for i, flag in enumerate(flaglist):
+            if flag:
+                if not in_change:
+                    in_change = True
+                    last = i
+                    # at the beginning of a change, drop an anchor a few lines
+                    # (the context lines) before the change for the previous
+                    # link
+                    i = max([0, i - numlines])
+                    next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix, num_chg)
+                    # at the beginning of a change, drop a link to the next
+                    # change
+                    num_chg += 1
+                    next_href[last] = '<a href="#difflib_chg_%s_%d">n</a>' % (
+                         toprefix, num_chg)
+            else:
+                in_change = False
+        # check for cases where there is no content to avoid exceptions
+        if not flaglist:
+            flaglist = [False]
+            next_id = ['']
+            next_href = ['']
+            last = 0
+            if context:
+                fromlist = ['No Differences Found']
+                tolist = fromlist
+            else:
+                fromlist = tolist = ['Empty File']
+        # if not a change on first line, drop a link
+        if not flaglist[0]:
+            next_href[0] = '<a href="#difflib_chg_%s_0">f</a>' % toprefix
+        # redo the last link to link to the top
+        next_href[last] = '<a href="#difflib_chg_%s_top">t</a>' % (toprefix)
+
+        return fromlist, tolist, flaglist, next_href, next_id
+
+    def make_table(self, fromlines, tolines, fromdesc='', todesc='', context=False,
+                   numlines=5):
+        """Returns table of side by side comparison with change highlights
+
+        Arguments:
+        fromlines -- list of "from" lines
+        tolines -- list of "to" lines
+        fromdesc -- "from" file column header string
+        todesc -- "to" file column header string
+        context -- set to True for contextual differences (defaults to False
+            which shows full differences).
+        numlines -- number of context lines.  When context is set True,
+            controls number of lines displayed before and after the change.
+            When context is False, controls the number of lines to place
+            the "next" link anchors before the next change (so click of
+            "next" link jumps to just before the change).
+        """
+
+        # change tabs to spaces before it gets more difficult after we insert
+        # markkup
+        fromlines, tolines = self._tab_newline_replace(fromlines, tolines)
+
+        # create diffs iterator which generates side by side from/to data
+        if context:
+            context_lines = numlines
+        else:
+            context_lines = None
+        diffs = difflib._mdiff(fromlines, tolines, context_lines, linejunk=self._linejunk,
+                               charjunk=self._charjunk)
+
+
+        # set up iterator to wrap lines that exceed desired width
+        if self._wrapcolumn:
+            diffs = self._line_wrapper(diffs)
+
+        # collect up from/to lines and flags into lists (also format the lines)
+        fromlist, tolist, flaglist = self._collect_lines(diffs)
+
+        # process change flags, generating middle column of next anchors/links
+        fromlist, tolist, flaglist, next_href, next_id = self._convert_flags(
+            fromlist, tolist, flaglist, context, numlines)
+
+        s = []
+
+        if fromdesc or todesc:
+            s.append((simple_colorize(fromdesc, "blue"),
+                      simple_colorize(todesc, "blue")))
+
+        for i in range(len(flaglist)):
+            if flaglist[i] is None:
+                # mdiff yields None on separator lines; skip the bogus ones
+                # generated for the first line
+
+                if i > 0:
+                    s.append((simple_colorize('---', "blue"),
+                              simple_colorize('---', "blue")))
+            else:
+                s.append((fromlist[i], tolist[i]))
+
+        table_lines = []
+        for sides in s:
+            line = []
+            for side in sides:
+                line.append(self._lpad(side, self.cols // 2 - 1))
+            table_lines.append(" ".join(line))
+
+        table_line_string = "\n".join(table_lines)
+
+        colorized_table_line_string = self.colorize(table_line_string)
+
+        return colorized_table_line_string
+
+    def colorize(self, s):
+        def background(color):
+            return color.replace("\033[1;", "\033[7;")
+
+        if self.no_bold:
+            C_ADD = color_codes["green"]
+            C_SUB = color_codes["red"]
+            C_CHG = color_codes["yellow"]
+        else:
+            C_ADD = color_codes["green_bold"]
+            C_SUB = color_codes["red_bold"]
+            C_CHG = color_codes["yellow_bold"]
+
+        if self.highlight:
+            C_ADD, C_SUB, C_CHG = background(C_ADD), background(C_SUB), background(C_CHG)
+
+        C_NONE = color_codes["none"]
+        colors = (C_ADD, C_SUB, C_CHG, C_NONE)
+
+        s = s.replace('\0+', C_ADD).replace('\0-', C_SUB).replace('\0^', C_CHG).replace('\1', C_NONE).replace('\t', ' ')
+
+        if self.highlight:
+            return s
+
+        if not self.show_all_spaces:
+            # If there's a change consisting entirely of whitespace, don't color it.
+            return re.sub("\033\\[[01];3([123])m(\\s+)(\033\\[)", "\033[7;3\\1m\\2\\3", s)
+
+        def will_see_coloredspace(i, s):
+            while i < len(s) and s[i].isspace():
+                i += 1
+            if i < len(s) and s[i] == '\033':
+                return False
+            return True
+
+        n_s = []
+        in_color = False
+        seen_coloredspace = False
+        for i, c in enumerate(s):
+            if len(n_s) > 6 and n_s[-1] == "m":
+                ns_end = "".join(n_s[-7:])
+                for color in colors:
+                    if ns_end.endswith(color):
+                        if color != in_color:
+                            seen_coloredspace = False
+                        in_color = color
+                if ns_end.endswith(C_NONE):
+                    in_color = False
+
+            if c.isspace() and in_color and (self.show_all_spaces or not (seen_coloredspace or will_see_coloredspace(i, s))):
+                n_s.extend([C_NONE, background(in_color), c, C_NONE, in_color])
+            else:
+                if in_color:
+                    seen_coloredspace = True
+                n_s.append(c)
+
+        joined = "".join(n_s)
+
+        return joined
+
+def simple_colorize(s, chosen_color):
+    return "%s%s%s" % (color_codes[chosen_color], s, color_codes["none"])
+
+def start():
+    # If you change any of these, also update README.
+    parser = optparse.OptionParser(usage="usage: %prog [options] left_file right_file",
+                                   description="Show differences between files in a two column view.")
+    parser.add_option("--cols", default=None,
+                      help="specify the width of the screen. Autodetection is Linux only")
+    parser.add_option("--head", default=0,
+                      help="consider only the first N lines of each file")
+    parser.add_option("--highlight", default=False,
+                      action="store_true",
+                      help="color by changing the background color instead of the foreground color.  Very fast, ugly, displays all changes")
+    parser.add_option("--line-numbers", default=False,
+                      action="store_true",
+                      help="generate output with line numbers")
+    parser.add_option("--no-bold", default=False,
+                      action="store_true",
+                      help="use non-bold colors; recommended for with solarized")
+    parser.add_option("--no-headers", default=False,
+                      action="store_true",
+                      help="don't label the left and right sides with their file names")
+    parser.add_option("--numlines", default=5,
+                      help="how many lines of context to print; can't be combined with --whole-file")
+    parser.add_option("--recursive", default=False,
+                      action="store_true",
+                      help="recursively compare subdirectories")
+    parser.add_option("--show-all-spaces", default=False,
+                      action="store_true",
+                      help="color all non-matching whitespace including that which is not needed for drawing the eye to changes.  Slow, ugly, displays all changes")
+    parser.add_option("--version", default=False,
+                      action="store_true",
+                      help="print version and exit")
+    parser.add_option("--whole-file", default=False,
+                      action="store_true",
+                      help="show the whole file instead of just changed lines and context")
+
+    (options, args) = parser.parse_args()
+
+    if options.version:
+        print("icdiff version 1.2.0")
+        sys.exit()
+
+    if len(args) != 2:
+        parser.print_help()
+        sys.exit()
+
+    a, b = args
+
+    if not options.cols:
+        def ioctl_GWINSZ(fd):
+            try:
+                import fcntl, termios, struct
+                cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))
+            except Exception:
+                return None
+            return cr
+        cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
+        if cr:
+            options.cols = cr[1]
+        else:
+            options.cols = 80
+
+    if options.recursive:
+        diff_recursively(options, a, b)
+    else:
+        diff_files(options, a, b)
+
+def diff_recursively(options, a, b):
+    def print_meta(s):
+        print(simple_colorize(s, "magenta"))
+
+    if os.path.isfile(a) and os.path.isfile(b):
+        if not filecmp.cmp(a, b):
+            diff_files(options, a, b)
+
+    elif os.path.isdir(a) and os.path.isdir(b):
+        a_contents = set(os.listdir(a))
+        b_contents = set(os.listdir(b))
+
+        for child in sorted(a_contents.union(b_contents)):
+            if child not in b_contents:
+                print_meta("Only in %s: %s" % (a, child))
+            elif child not in a_contents:
+                print_meta("Only in %s: %s" % (b, child))
+            else:
+                diff_recursively(options,
+                                 os.path.join(a, child),
+                                 os.path.join(b, child))
+
+    elif os.path.isdir(a) and os.path.isfile(b):
+        print_meta("File %s is a directory while %s is a file" % (a, b))
+
+    elif os.path.isfile(a) and os.path.isdir(b):
+        print_meta("File %s is a file while %s is a directory" % (a, b))
+
+def diff_files(options, a, b):
+    headers = a, b
+    if options.no_headers:
+        headers = None, None
+
+    head = int(options.head)
+
+    for x in [a, b]:
+        if os.path.isdir(x):
+            sys.stderr.write("error: %s is a directory; did you mean to pass --recursive?\n" % x)
+            sys.exit(1)
+    lines_a = open(a, "U").readlines()
+    lines_b = open(b, "U").readlines()
+
+    if head != 0:
+        lines_a = lines_a[:head]
+        lines_b = lines_b[:head]
+
+    print(ConsoleDiff(cols=int(options.cols),
+                      show_all_spaces=options.show_all_spaces,
+                      highlight=options.highlight,
+                      no_bold=options.no_bold,
+                      line_numbers=options.line_numbers).make_table(
+        lines_a, lines_b, headers[0], headers[1], context=(not options.whole_file), numlines=int(options.numlines)))
+    sys.stdout.flush()
+
+if __name__ == "__main__":
+    try:
+        start()
+    except KeyboardInterrupt:
+        pass
+    except IOError as e:
+        if e.errno == errno.EPIPE:
+            pass
+        else:
+            raise

	mybin my ~/bin
	git clone https://a3nm.net/git/mybin/
	Log \| Files \| Refs \| README