#!/usr/bin/env python
# usage: takes list of directories on command line

import sys
import os
import os.path
from itertools import ifilter, ifilterfalse, imap, izip
import md5
from pprint import pprint
from sets import Set

class info(object):
    def __init__(self, dir, name, md5sum):
        self.dir = dir
        self.name = name
        self.md5sum = md5sum
    def __repr__(self):
        return '<%r, %r, %r>' % (self.dir, self.name, self.md5sum)


def md5sum_file(filename):
    f = open(filename, 'r')
    m = md5.new(f.read())
    return m.hexdigest()

def process_dir(dir):
    files = list(ifilterfalse(lambda s: s.endswith('~'),
                              _filter(os.listdir(dir), ('CVS','Makefile', 'Makefile.in', '.svn'))))
    return [info(dir, f, md5sum_file(os.path.join(dir, f))) for f in files]


# Return a copy with items that occur in skip removed.
#
def _filter(flist, skip):
    return list(ifilterfalse(skip.__contains__, flist))

def main():
    dirs = sys.argv[1:]
    r = []
    for d in dirs:
        r += process_dir(d)

    names = Set([x.name for x in r])
    #pprint(names)

    # check for missing files across the union of names
    for d in dirs:
        names_in_dir = Set([x.name for x in r if x.dir == d])
        diff = names.difference(names_in_dir)
        if len(diff) != 0:
            print "%s is missing %r" % (d, diff)

    # check for different versions of files
    name_list = [n for n in names]
    name_list.sort()

    for name in name_list:
        vers = {}
        pairs = [(x.dir, x.md5sum) for x in r if x.name == name]
        for (dir, sum) in pairs:
            if vers.has_key(sum):
                vers[sum].append(dir)
            else:
                vers[sum] = [ dir ]
        if len(vers) != 1:     # multiple versions
            print "Multiple versions of %s:" % (name,)
            pprint(vers)
    
    # pprint(r)

if __name__ == '__main__':
    main()