#!/usr/bin/env python

"""
    usage: gen-xyzzy [-o output] directory...

    Typical usage:
      gen-xyzzy -o filesystem.dat /usr/share/guile/1.8 /usr/local/share/guile/site

    Where /usr/share/guile points to the system guile installation and
    /usr/local/share/guile/site points to the GNU Radio installed guile files


    Build a single file that contains all of the *.scm files from the
    guile installation and from the GR installation.  I figure it's
    basically a table that maps strings to strings.  That is,
    "/foo/bar/filename" -> "file contents".  We could just mmap
    it in read-only, or just read it in.  Reading is more portable,
    let's do that.

    File: [ header | directory | strings ]

    All integers are net-endian.

    struct header {
      char magic[8];

      uint32_t	offset_to_directory;	// byte offset from start of file
      uint32_t	size_of_directory;	// bytes
      uint32_t  number_of_dir_entries;

      uint32_t	offset_to_strings;	// byte offset from start of file
      uint32_t  size_of_strings;	// bytes
    };

    struct directory_entry {
      uint32_t	     offset_to_name;	 // from start of strings
      uint32_t	     offset_to_contents; // from start of strings
    }

    Each string starts with a uint32_t length, followed by length bytes.
    There is no trailing \0 in the string.  Each string entry is followed
    with enough padding to bring it up to a multiple of 4 bytes.

    struct string_entry {
      uint32_t	    length;
      unsigned char c[1];		// 0 is nicer, but not portable.
    }
"""


from optparse import OptionParser
import sys
import os
import os.path
from pprint import pprint
import struct


def main():
   parser = OptionParser(usage="usage: %prog [options] directory...")
   parser.add_option("-o", type="string", default=None, metavar="FILENAME",
                          help="Specify output filename [default=stdout]")
   (options, args) = parser.parse_args()

   if len(args) == 0:
      parser.print_help()
      raise SystemExit, 1

   if options.o:
      output = open(options.o, 'wb')
   else:
      output = sys.stdout

   doit(output, args)


def doit(output, dirs):
   acc = []
   for d in dirs:
      acc.extend(handle_dir(d))

   uniq = {}
   for key, val in acc:
      if key in uniq:
         if val != uniq[key]:
            sys.stderr.write("Duplicate key: %s %s %s\n" % (key, uniq[key], val))
      else:
         uniq[key] = val

   t = uniq.items()
   t.sort()
   write_xyzzy(output, t)


def handle_dir(directory):
   if not directory.endswith(os.sep):
      directory = directory + os.sep
   acc = []
   for root, dirs, files in os.walk(directory, topdown=True):
      # scm_files = [f for f in files if f.endswith('.scm')]
      scm_files = files
      for f in scm_files:
         full_name = os.path.join(root, f)
         t = (full_name[len(directory):], full_name)
         acc.append(t)
   return acc


def file_length(filename):
   statinfo = os.stat(filename)
   return statinfo.st_size


# return n rounded up to a multiple of 4
def round_up(n):
   return (n + 3) & -4


class string_table(object):
   def __init__(self):
      self._table = ''
      self._padding = '\0\0\0\0'

   def add_string(self, s):
      r = len(self._table)
      len_s = len(s)
      padding = self._padding[0:round_up(len_s) - len_s]
      self._table = ''.join((self._table, struct.pack('>I', len(s)), s, padding))
      return r


def write_xyzzy(f, list_of_tuples):
   # tuples: (name, filename)
   names = [s[0] for s in list_of_tuples]
   number_of_dir_entries = len(list_of_tuples)
   number_of_names = number_of_dir_entries
   number_of_files = number_of_dir_entries
   sizeof_uint32 = 4

   contents = {}
   for name, filename in list_of_tuples:
      t = open(filename, 'rb').read()
      contents[name] = t

   offset_to_directory = 28
   size_of_directory = number_of_dir_entries * 8
   offset_to_strings = offset_to_directory + size_of_directory

   st = string_table()

   # Insert names in string table first to help locality
   name_str_offset = {}
   for name in names:
      name_str_offset[name] = st.add_string(name)

   # Now add file contents
   content_str_offset = {}
   for name in names:
      content_str_offset[name] = st.add_string(contents[name])

   size_of_strings = len(st._table)

   if 0:
      print "offset_to_directory\t", offset_to_directory
      print "size_of_directory\t", size_of_directory
      print "number_of_dir_entries\t", number_of_dir_entries
      print "offset_to_strings\t", offset_to_strings
      print "size_of_strings\t\t", size_of_strings

   magic = '-XyZzY-\0'

   # Write header
   f.write(struct.pack('>8s5I',
                       magic,
                       offset_to_directory,
                       size_of_directory,
                       number_of_dir_entries,
                       offset_to_strings,
                       size_of_strings))

   # Write directory
   for name in names:
      f.write(struct.pack('>2I',
                          name_str_offset[name],
                          content_str_offset[name]))

   # Write string table
   f.write(st._table)


if __name__ == "__main__":
   main()