mybin

my ~/bin
git clone https://a3nm.net/git/mybin/
Log | Files | Refs | README

commit 5de23a87d235047cb89872c7c29ac735c1cf8dbb
parent 821804834c640320f5219d30e1d7f95b36162e54
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri, 26 Dec 2014 12:44:33 +0100

icdiff

Diffstat:
icdiff | 585+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 585 insertions(+), 0 deletions(-)

diff --git a/icdiff b/icdiff @@ -0,0 +1,585 @@ +#!/usr/bin/env python + +""" icdiff.py + +Author: Jeff Kaufman, derived from difflib.HtmlDiff + +License: This code is usable under the same open terms as the rest of + python. See: http://www.python.org/psf/license/ + +""" + +import os +import sys +import errno +import difflib +import optparse +import re +import filecmp + +color_codes = { + "red": '\033[0;31m', + "green": '\033[0;32m', + "yellow": '\033[0;33m', + "blue": '\033[0;34m', + "magenta": '\033[0;35m', + "cyan": '\033[0;36m', + "none": '\033[m', + "red_bold": '\033[1;31m', + "green_bold": '\033[1;32m', + "yellow_bold": '\033[1;33m', + "blue_bold": '\033[1;34m', + "magenta_bold": '\033[1;35m', + "cyan_bold": '\033[1;36m', +} + +class ConsoleDiff(object): + """Console colored side by side comparison with change highlights. + + Based on difflib.HtmlDiff + + This class can be used to create a text-mode table showing a side + + by side, line by line comparison of text with inter-line and + intra-line change highlights in ansi color escape sequences as + intra-line change highlights in ansi color escape sequences as + read by xterm. The table can be generated in either full or + contextual difference mode. + + To generate the table, call make_table. + + Usage is the almost the same as HtmlDiff except only make_table is + implemented and the file can be invoked on the command line. + Run:: + + python icdiff.py --help + + for command line usage information. + + """ + + def __init__(self, tabsize=8, wrapcolumn=None, linejunk=None, + charjunk=difflib.IS_CHARACTER_JUNK, cols=80, + line_numbers=False, + show_all_spaces=False, + highlight=False, + no_bold=False): + """ConsoleDiff instance initializer + + Arguments: + tabsize -- tab stop spacing, defaults to 8. + wrapcolumn -- column number where lines are broken and wrapped, + defaults to None where lines are not wrapped. + linejunk, charjunk -- keyword arguments passed into ndiff() (used by + ConsoleDiff() to generate the side by side differences). See + ndiff() documentation for argument default values and descriptions. + """ + + self._tabsize = tabsize + self.line_numbers = line_numbers + self.cols = cols + self.show_all_spaces = show_all_spaces + self.highlight = highlight + self.no_bold = no_bold + + if wrapcolumn is None: + if not line_numbers: + wrapcolumn = self.cols // 2 - 2 + else: + wrapcolumn = self.cols // 2 - 10 + + self._wrapcolumn = wrapcolumn + self._linejunk = linejunk + self._charjunk = charjunk + + + def _tab_newline_replace(self, fromlines, tolines): + """Returns from/to line lists with tabs expanded and newlines removed. + + Instead of tab characters being replaced by the number of spaces + needed to fill in to the next tab stop, this function will fill + the space with tab characters. This is done so that the difference + algorithms can identify changes in a file when tabs are replaced by + spaces and vice versa. At the end of the table generation, the tab + characters will be replaced with a space. + """ + def expand_tabs(line): + # hide real spaces + line = line.replace(' ', '\0') + # expand tabs into spaces + line = line.expandtabs(self._tabsize) + # relace spaces from expanded tabs back into tab characters + # (we'll replace them with markup after we do differencing) + line = line.replace(' ', '\t') + return line.replace('\0', ' ').rstrip('\n') + fromlines = [expand_tabs(line) for line in fromlines] + tolines = [expand_tabs(line) for line in tolines] + return fromlines, tolines + + def _split_line(self, data_list, line_num, text): + """Builds list of text lines by splitting text lines at wrap point + + This function will determine if the input text line needs to be + wrapped (split) into separate lines. If so, the first wrap point + will be determined and the first line appended to the output + text line list. This function is used recursively to handle + the second part of the split line to further split it. + """ + # if blank line or context separator, just add it to the output list + if not line_num: + data_list.append((line_num, text)) + return + + # if line text doesn't need wrapping, just add it to the output list + size = len(text) + if (size <= self._wrapcolumn) or ((size - (text.count('\0') * 3)) <= self._wrapcolumn): + data_list.append((line_num, text)) + return + + # scan text looking for the wrap point, keeping track if the wrap + # point is inside markers + i = 0 + n = 0 + mark = '' + while n < self._wrapcolumn and i < size: + if text[i] == '\0': + i += 1 + mark = text[i] + i += 1 + elif text[i] == '\1': + i += 1 + mark = '' + else: + i += 1 + n += 1 + + # wrap point is inside text, break it up into separate lines + line1 = text[:i] + line2 = text[i:] + + # if wrap point is inside markers, place end marker at end of first + # line and start marker at beginning of second line because each + # line will have its own table tag markup around it. + if mark: + line1 = line1 + '\1' + line2 = '\0' + mark + line2 + + # tack on first line onto the output list + data_list.append((line_num, line1)) + + # use this routine again to wrap the remaining text + self._split_line(data_list, '>', line2) + + def _line_wrapper(self, diffs): + """Returns iterator that splits (wraps) mdiff text lines""" + + # pull from/to data and flags from mdiff iterator + for fromdata, todata, flag in diffs: + # check for context separators and pass them through + if flag is None: + yield fromdata, todata, flag + continue + (fromline, fromtext), (toline, totext) = fromdata, todata + # for each from/to line split it at the wrap column to form + # list of text lines. + fromlist, tolist = [], [] + self._split_line(fromlist, fromline, fromtext) + self._split_line(tolist, toline, totext) + # yield from/to line in pairs inserting blank lines as + # necessary when one side has more wrapped lines + while fromlist or tolist: + if fromlist: + fromdata = fromlist.pop(0) + else: + fromdata = ('', ' ') + if tolist: + todata = tolist.pop(0) + else: + todata = ('', ' ') + yield fromdata, todata, flag + + def _collect_lines(self, diffs): + """Collects mdiff output into separate lists + + Before storing the mdiff from/to data into a list, it is converted + into a single line of text with console markup. + """ + + fromlist, tolist, flaglist = [], [], [] + # pull from/to data and flags from mdiff style iterator + for fromdata, todata, flag in diffs: + try: + # store HTML markup of the lines into the lists + fromlist.append(self._format_line(0, flag, *fromdata)) + tolist.append(self._format_line(1, flag, *todata)) + except TypeError: + # exceptions occur for lines where context separators go + fromlist.append(None) + tolist.append(None) + flaglist.append(flag) + return fromlist, tolist, flaglist + + def _format_line(self, side, flag, linenum, text): + """Returns HTML markup of "from" / "to" text lines + + side -- 0 or 1 indicating "from" or "to" text + flag -- indicates if difference on line + linenum -- line number (used for line number column) + text -- line text to be marked up + """ + try: + lid = '%d' % linenum + except TypeError: + # handle blank lines where linenum is '>' or '' + lid = '' + + text = text.rstrip() + + if not self.line_numbers: + return text + return '%s %s' % (self._rpad(lid, 8), text) + + def _real_len(self, s): + l = 0 + in_esc = False + prev = ' ' + for c in s.replace('\0+', "").replace('\0-', "").replace('\0^', "").replace('\1', "").replace('\t', ' '): + if in_esc: + if c == "m": + in_esc = False + else: + if c == "[" and prev == "\033": + in_esc = True + l -= 1 # we counted prev when we shouldn't have + else: + l += 1 + prev = c + + #print("len '%s' is %d." % (s, l)) + return l + + + def _rpad(self, s, field_width): + return self._pad(s, field_width) + s + + def _pad(self, s, field_width): + return " " * (field_width - self._real_len(s)) + + def _lpad(self, s, field_width): + target = s + self._pad(s, field_width) + #if self._real_len(target) != field_width: + # print("Warning: bad line %r is not of length %d" % (target, field_width)) + return target + + def _convert_flags(self, fromlist, tolist, flaglist, context, numlines): + """Makes list of "next" links""" + + # all anchor names will be generated using the unique "to" prefix + + # process change flags, generating middle column of next anchors/links + next_id = [''] * len(flaglist) + next_href = [''] * len(flaglist) + num_chg, in_change = 0, False + last = 0 + toprefix = '' + for i, flag in enumerate(flaglist): + if flag: + if not in_change: + in_change = True + last = i + # at the beginning of a change, drop an anchor a few lines + # (the context lines) before the change for the previous + # link + i = max([0, i - numlines]) + next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix, num_chg) + # at the beginning of a change, drop a link to the next + # change + num_chg += 1 + next_href[last] = '<a href="#difflib_chg_%s_%d">n</a>' % ( + toprefix, num_chg) + else: + in_change = False + # check for cases where there is no content to avoid exceptions + if not flaglist: + flaglist = [False] + next_id = [''] + next_href = [''] + last = 0 + if context: + fromlist = ['No Differences Found'] + tolist = fromlist + else: + fromlist = tolist = ['Empty File'] + # if not a change on first line, drop a link + if not flaglist[0]: + next_href[0] = '<a href="#difflib_chg_%s_0">f</a>' % toprefix + # redo the last link to link to the top + next_href[last] = '<a href="#difflib_chg_%s_top">t</a>' % (toprefix) + + return fromlist, tolist, flaglist, next_href, next_id + + def make_table(self, fromlines, tolines, fromdesc='', todesc='', context=False, + numlines=5): + """Returns table of side by side comparison with change highlights + + Arguments: + fromlines -- list of "from" lines + tolines -- list of "to" lines + fromdesc -- "from" file column header string + todesc -- "to" file column header string + context -- set to True for contextual differences (defaults to False + which shows full differences). + numlines -- number of context lines. When context is set True, + controls number of lines displayed before and after the change. + When context is False, controls the number of lines to place + the "next" link anchors before the next change (so click of + "next" link jumps to just before the change). + """ + + # change tabs to spaces before it gets more difficult after we insert + # markkup + fromlines, tolines = self._tab_newline_replace(fromlines, tolines) + + # create diffs iterator which generates side by side from/to data + if context: + context_lines = numlines + else: + context_lines = None + diffs = difflib._mdiff(fromlines, tolines, context_lines, linejunk=self._linejunk, + charjunk=self._charjunk) + + + # set up iterator to wrap lines that exceed desired width + if self._wrapcolumn: + diffs = self._line_wrapper(diffs) + + # collect up from/to lines and flags into lists (also format the lines) + fromlist, tolist, flaglist = self._collect_lines(diffs) + + # process change flags, generating middle column of next anchors/links + fromlist, tolist, flaglist, next_href, next_id = self._convert_flags( + fromlist, tolist, flaglist, context, numlines) + + s = [] + + if fromdesc or todesc: + s.append((simple_colorize(fromdesc, "blue"), + simple_colorize(todesc, "blue"))) + + for i in range(len(flaglist)): + if flaglist[i] is None: + # mdiff yields None on separator lines; skip the bogus ones + # generated for the first line + + if i > 0: + s.append((simple_colorize('---', "blue"), + simple_colorize('---', "blue"))) + else: + s.append((fromlist[i], tolist[i])) + + table_lines = [] + for sides in s: + line = [] + for side in sides: + line.append(self._lpad(side, self.cols // 2 - 1)) + table_lines.append(" ".join(line)) + + table_line_string = "\n".join(table_lines) + + colorized_table_line_string = self.colorize(table_line_string) + + return colorized_table_line_string + + def colorize(self, s): + def background(color): + return color.replace("\033[1;", "\033[7;") + + if self.no_bold: + C_ADD = color_codes["green"] + C_SUB = color_codes["red"] + C_CHG = color_codes["yellow"] + else: + C_ADD = color_codes["green_bold"] + C_SUB = color_codes["red_bold"] + C_CHG = color_codes["yellow_bold"] + + if self.highlight: + C_ADD, C_SUB, C_CHG = background(C_ADD), background(C_SUB), background(C_CHG) + + C_NONE = color_codes["none"] + colors = (C_ADD, C_SUB, C_CHG, C_NONE) + + s = s.replace('\0+', C_ADD).replace('\0-', C_SUB).replace('\0^', C_CHG).replace('\1', C_NONE).replace('\t', ' ') + + if self.highlight: + return s + + if not self.show_all_spaces: + # If there's a change consisting entirely of whitespace, don't color it. + return re.sub("\033\\[[01];3([123])m(\\s+)(\033\\[)", "\033[7;3\\1m\\2\\3", s) + + def will_see_coloredspace(i, s): + while i < len(s) and s[i].isspace(): + i += 1 + if i < len(s) and s[i] == '\033': + return False + return True + + n_s = [] + in_color = False + seen_coloredspace = False + for i, c in enumerate(s): + if len(n_s) > 6 and n_s[-1] == "m": + ns_end = "".join(n_s[-7:]) + for color in colors: + if ns_end.endswith(color): + if color != in_color: + seen_coloredspace = False + in_color = color + if ns_end.endswith(C_NONE): + in_color = False + + if c.isspace() and in_color and (self.show_all_spaces or not (seen_coloredspace or will_see_coloredspace(i, s))): + n_s.extend([C_NONE, background(in_color), c, C_NONE, in_color]) + else: + if in_color: + seen_coloredspace = True + n_s.append(c) + + joined = "".join(n_s) + + return joined + +def simple_colorize(s, chosen_color): + return "%s%s%s" % (color_codes[chosen_color], s, color_codes["none"]) + +def start(): + # If you change any of these, also update README. + parser = optparse.OptionParser(usage="usage: %prog [options] left_file right_file", + description="Show differences between files in a two column view.") + parser.add_option("--cols", default=None, + help="specify the width of the screen. Autodetection is Linux only") + parser.add_option("--head", default=0, + help="consider only the first N lines of each file") + parser.add_option("--highlight", default=False, + action="store_true", + help="color by changing the background color instead of the foreground color. Very fast, ugly, displays all changes") + parser.add_option("--line-numbers", default=False, + action="store_true", + help="generate output with line numbers") + parser.add_option("--no-bold", default=False, + action="store_true", + help="use non-bold colors; recommended for with solarized") + parser.add_option("--no-headers", default=False, + action="store_true", + help="don't label the left and right sides with their file names") + parser.add_option("--numlines", default=5, + help="how many lines of context to print; can't be combined with --whole-file") + parser.add_option("--recursive", default=False, + action="store_true", + help="recursively compare subdirectories") + parser.add_option("--show-all-spaces", default=False, + action="store_true", + help="color all non-matching whitespace including that which is not needed for drawing the eye to changes. Slow, ugly, displays all changes") + parser.add_option("--version", default=False, + action="store_true", + help="print version and exit") + parser.add_option("--whole-file", default=False, + action="store_true", + help="show the whole file instead of just changed lines and context") + + (options, args) = parser.parse_args() + + if options.version: + print("icdiff version 1.2.0") + sys.exit() + + if len(args) != 2: + parser.print_help() + sys.exit() + + a, b = args + + if not options.cols: + def ioctl_GWINSZ(fd): + try: + import fcntl, termios, struct + cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) + except Exception: + return None + return cr + cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) + if cr: + options.cols = cr[1] + else: + options.cols = 80 + + if options.recursive: + diff_recursively(options, a, b) + else: + diff_files(options, a, b) + +def diff_recursively(options, a, b): + def print_meta(s): + print(simple_colorize(s, "magenta")) + + if os.path.isfile(a) and os.path.isfile(b): + if not filecmp.cmp(a, b): + diff_files(options, a, b) + + elif os.path.isdir(a) and os.path.isdir(b): + a_contents = set(os.listdir(a)) + b_contents = set(os.listdir(b)) + + for child in sorted(a_contents.union(b_contents)): + if child not in b_contents: + print_meta("Only in %s: %s" % (a, child)) + elif child not in a_contents: + print_meta("Only in %s: %s" % (b, child)) + else: + diff_recursively(options, + os.path.join(a, child), + os.path.join(b, child)) + + elif os.path.isdir(a) and os.path.isfile(b): + print_meta("File %s is a directory while %s is a file" % (a, b)) + + elif os.path.isfile(a) and os.path.isdir(b): + print_meta("File %s is a file while %s is a directory" % (a, b)) + +def diff_files(options, a, b): + headers = a, b + if options.no_headers: + headers = None, None + + head = int(options.head) + + for x in [a, b]: + if os.path.isdir(x): + sys.stderr.write("error: %s is a directory; did you mean to pass --recursive?\n" % x) + sys.exit(1) + lines_a = open(a, "U").readlines() + lines_b = open(b, "U").readlines() + + if head != 0: + lines_a = lines_a[:head] + lines_b = lines_b[:head] + + print(ConsoleDiff(cols=int(options.cols), + show_all_spaces=options.show_all_spaces, + highlight=options.highlight, + no_bold=options.no_bold, + line_numbers=options.line_numbers).make_table( + lines_a, lines_b, headers[0], headers[1], context=(not options.whole_file), numlines=int(options.numlines))) + sys.stdout.flush() + +if __name__ == "__main__": + try: + start() + except KeyboardInterrupt: + pass + except IOError as e: + if e.errno == errno.EPIPE: + pass + else: + raise