debdu

measure debian package disk space usage
git clone https://a3nm.net/git/debdu/
Log | Files | Refs

commit ce9e99275dc5d1494dd665eb31dfe7550c55f9b8
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri, 25 Jul 2014 01:17:15 +0200

initial

Diffstat:
debdu.py | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 76 insertions(+), 0 deletions(-)

diff --git a/debdu.py b/debdu.py @@ -0,0 +1,76 @@ +#!/usr/bin/python + +import apt_pkg + +apt_pkg.init() +cache = apt_pkg.Cache(None) +depcache = apt_pkg.DepCache(cache) + +graph = {} +roots = [] +results = [] + +def init(p): + if p not in graph.keys(): + graph[p] = {} + graph[p]['n_in'] = 0 + +def dfs(v): + if graph[v].has_key('full'): + return graph[v]['full'] + graph[v]['full'] = 0 # TODO this marking is wrong for cycles, think about it + s = graph[v]['size'] + expl = [(s, 'the package itself', True, 1)] + for d in graph[v]['deps']: + #print "%s -> %s" % (v, d) + dfs(d) + if graph[d]['auto']: + s += graph[d]['full'] / graph[d]['n_in'] + expl.append((graph[d]['full']/graph[d]['n_in'], d, + graph[d]['auto'], graph[d]['n_in'])) + else: + expl.append((0, d, graph[d]['auto'], 1)) + graph[v]['full'] = s + expl = sorted(expl, reverse=True) + if not graph[v]['auto']: + results.append((s, v, expl)) + +for package in cache.packages: + if package.current_state != apt_pkg.CURSTATE_INSTALLED: + continue + version = package.current_ver + name = package.name + #print name + auto = depcache.is_auto_installed(package) + init(name) + graph[name]['auto'] = auto + if not auto: + roots.append(name) + graph[name]['size'] = version.installed_size + graph[name]['deps'] = set() + if 'Depends' not in version.depends_list_str.keys(): + continue + for dep in version.depends_list_str['Depends']: + # TODO of course this is wrong... + for or_dep in dep: + try: + if cache[or_dep[0]].current_state != apt_pkg.CURSTATE_INSTALLED: + continue + except KeyError: + continue # Wrong wrong wrong! + graph[name]['deps'].add(or_dep[0]) + init(or_dep[0]) + graph[or_dep[0]]['n_in'] += 1 + +for root in roots: + dfs(root) + +for result in sorted(results, reverse=True): + print str(result[0]) + ' ' + result[1] + for e in result[2]: + # TODO shared should be between non-descendant packages + print ' %d from %s%s' % (e[0], e[1], + (" shared between "+str(e[3])+" package(s)" if e[3] != 1 else '') if + e[2] else ' manually installed and accounted in a separate entry') + print +