Synopsis - Synopsis/Core/Util.py

    1| # $Id: Util.py,v 1.24 2002/11/19 03:49:49 chalky Exp $
    2| #
    3| # This file is a part of Synopsis.
    4| # Copyright (C) 2000, 2001 Stefan Seefeld
    5| # Copyright (C) 2000, 2001 Stephen Davies
    6| #
    7| # Synopsis is free software; you can redistribute it and/or modify it
    8| # under the terms of the GNU General Public License as published by
    9| # the Free Software Foundation; either version 2 of the License, or
   10| # (at your option) any later version.
   11| #
   12| # This program is distributed in the hope that it will be useful,
   13| # but WITHOUT ANY WARRANTY; without even the implied warranty of
   14| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   15| # General Public License for more details.
   16| #
   17| # You should have received a copy of the GNU General Public License
   18| # along with this program; if not, write to the Free Software
   19| # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
   20| # 02111-1307, USA.
   21| #
   22| # $Log: Util.py,v $
   23| # Revision 1.24  2002/11/19 03:49:49  chalky
   24| # Sort structs in PyWriter to make diffs of config files easier
   25| #
   26| # Revision 1.23  2002/10/29 15:01:38  chalky
   27| # Support names with spaces
   28| #
   29| # Revision 1.22  2002/10/28 06:15:26  chalky
   30| # Fix double-dot problem when quoting names including an extension (eg: the
   31| # highlighted source files)
   32| #
   33| # Revision 1.21  2002/10/20 02:21:25  chalky
   34| # Move quote function to Core.Util
   35| #
   36| # Revision 1.20  2002/09/28 06:16:19  chalky
   37| # Don't dump file to stdout
   38| #
   39| # Revision 1.19  2002/09/20 10:35:32  chalky
   40| # Allow writing a comment to top of file
   41| #
   42| # Revision 1.18  2002/08/23 04:37:26  chalky
   43| # Huge refactoring of Linker to make it modular, and use a config system similar
   44| # to the HTML package
   45| #
   46| # Revision 1.17  2002/06/22 06:56:31  chalky
   47| # Fixes to PyWriter for nested classes
   48| #
   49| # Revision 1.16  2002/04/26 01:21:13  chalky
   50| # Bugs and cleanups
   51| #
   52| # Revision 1.15  2001/11/05 06:52:11  chalky
   53| # Major backside ui changes
   54| #
   55| # Revision 1.14  2001/07/19 04:03:05  chalky
   56| # New .syn file format.
   57| #
   58| # Revision 1.13  2001/07/10 14:41:22  chalky
   59| # Make treeformatter config nicer
   60| #
   61| # Revision 1.12  2001/06/28 07:22:18  stefan
   62| # more refactoring/cleanup in the HTML formatter
   63| #
   64| # Revision 1.11  2001/06/26 04:32:15  stefan
   65| # A whole slew of changes mostly to fix the HTML formatter's output generation,
   66| # i.e. to make the output more robust towards changes in the layout of files.
   67| #
   68| # the rpm script now works, i.e. it generates source and binary packages.
   69| #
   70| # Revision 1.10  2001/06/21 01:17:27  chalky
   71| # Fixed some paths for the new dir structure
   72| #
   73| # Revision 1.9  2001/04/05 09:54:00  chalky
   74| # More robust _import()
   75| #
   76| # Revision 1.8  2001/03/29 14:03:36  chalky
   77| # Cache current working dir, and use it for file imports in _import()
   78| #
   79| # Revision 1.7  2001/01/24 18:33:38  stefan
   80| # more cleanup
   81| #
   82| # Revision 1.6  2001/01/24 12:48:10  chalky
   83| # Improved error reporting in _import if __import__ fails for some other reason
   84| # than file not found
   85| #
   86| # Revision 1.5  2001/01/22 17:06:15  stefan
   87| # added copyright notice, and switched on logging
   88| #
   89| # Revision 1.4  2001/01/22 06:04:25  stefan
   90| # some advances on cross referencing
   91| #
   92| # Revision 1.3  2001/01/21 19:31:03  stefan
   93| # new and improved import function that accepts file names or modules
   94| #
   95| # Revision 1.2  2001/01/21 06:22:51  chalky
   96| # Added Util.getopt_spec for --spec=file.spec support
   97| #
   98| # Revision 1.1  2001/01/08 19:48:41  stefan
   99| # changes to allow synopsis to be installed
  100| #
  101| # Revision 1.2  2000/12/19 23:44:16  chalky
  102| # Chalky's Big Commit. Loads of changes and new stuff:
  103| # Rewrote HTML formatter so that it creates a page for each module and class,
  104| # with summaries and details and sections on each page. It also creates indexes
  105| # of modules, and for each module, and a frames index to organise them. Oh and
  106| # an inheritance tree. Bug fixes to some other files. The C++ parser now also
  107| # recognises class and functions to some extent, but support is not complete.
  108| # Also wrote a DUMP formatter that verbosely dumps the AST and Types. Renamed
  109| # the original HTML formatter to HTML_Simple. The ASCII formatter was rewritten
  110| # to follow what looked like major changes to the AST ;) It now outputs
  111| # something that should be the same as the input file, modulo whitespace and
  112| # comments.
  113| #
  114| # Revision 1.1  2000/08/01 03:28:26  stefan
  115| # a major rewrite, hopefully much more robust
  116| #
  117|
  118| """Utility functions for IDL compilers
  119|
  120| escapifyString() -- return a string with non-printing characters escaped.
  121| slashName()      -- format a scoped name with '/' separating components.
  122| dotName()        -- format a scoped name with '.' separating components.
  123| ccolonName()     -- format a scoped name with '::' separating components.
  124| pruneScope()     -- remove common prefix from a scoped name.
  125| getopt_spec(args,options,longlist) -- version of getopt that adds transparent --spec= suppport"""
  126|
  127| import string, getopt, sys, os, os.path, cStringIO, types, re, md5
  128|
  129| # Store the current working directory here, since during output it is
  130| # sometimes changed, and imports should be relative to the current WD
  131| _workdir = os.getcwd()
  132|
  133| def slashName(scopedName, our_scope=[]):
  134|     """slashName(list, [list]) -> string
  135|
  136| Return a scoped name given as a list of strings as a single string
  137| with the components separated by '/' characters. If a second list is
  138| given, remove a common prefix using pruneScope()."""
  139|
  140|     pscope = pruneScope(scopedName, our_scope)
  141|     return string.join(pscope, "/")
  142|
  143| def dotName(scopedName, our_scope=[]):
  144|     """dotName(list, [list]) -> string
  145|
  146| Return a scoped name given as a list of strings as a single string
  147| with the components separated by '.' characters. If a second list is
  148| given, remove a common prefix using pruneScope()."""
  149|
  150|     pscope = pruneScope(scopedName, our_scope)
  151|     return string.join(pscope, ".")
  152|
  153| def ccolonName(scopedName, our_scope=[]):
  154|     """ccolonName(list, [list]) -> string
  155|
  156| Return a scoped name given as a list of strings as a single string
  157| with the components separated by '::' strings. If a second list is
  158| given, remove a common prefix using pruneScope()."""
  159|
  160|     pscope = pruneScope(scopedName, our_scope)
  161|     try: return string.join(pscope, "::")
  162|     except TypeError, e:
  163|         import pprint
  164|         pprint.pprint(scopedName)
  165|         pprint.pprint(our_scope)
  166|         if type(pscope) in (type([]), type(())):
  167|             raise TypeError, str(e) + " ..not: list of " + str(type(pscope[0]))
  168|         raise TypeError, str(e) + " ..not: " + str(type(pscope))
  169|
  170| def pruneScope(target_scope, our_scope):
  171|     """pruneScope(list A, list B) -> list
  172|
  173| Given two lists of strings (scoped names), return a copy of list A
  174| with any prefix it shares with B removed.
  175|
  176|   e.g. pruneScope(['A', 'B', 'C', 'D'], ['A', 'B', 'D']) -> ['C', 'D']"""
  177|
  178|     tscope = list(target_scope)
  179|     i = 0
  180|     while len(tscope) > 1 and \
  181|           i < len(our_scope) and \
  182|           tscope[0] == our_scope[i]:
  183|         del tscope[0]
  184|         i = i + 1
  185|     return tscope
  186|
  187| def escapifyString(str):
  188|     """escapifyString(string) -> string
  189|
  190| Return the given string with any non-printing characters escaped."""
  191|
  192|     l = list(str)
  193|     vis = string.letters + string.digits + " _!$%^&*()-=+[]{};'#:@~,./<>?|`"
  194|     for i in range(len(l)):
  195|         if l[i] not in vis:
  196|             l[i] = "\\%03o" % ord(l[i])
  197|     return string.join(l, "")
  198|
  199| def _import(name):
  200|     """import either a module, or a file."""
  201|     arg_name = name #backup for error reporting
  202|     # if name contains slashes, interpret it as a file
  203|     as_file = string.find(name, "/") != -1
  204|     as_file = as_file or name[-3:] == '.py'
  205|     if not as_file:
  206|         components = string.split(name, ".")
  207|         # if one of the components is empty, name is interpreted as a file ('.foo' for example)
  208|         for comp in components:
  209|             if not comp:
  210|                 as_file = 1
  211|                 break
  212|     mod = None
  213|     error_messages = []
  214|     # try as module
  215|     if not as_file:
  216|         import_name = list(components)
  217|         while len(import_name):
  218|         try:
  219|                mod = __import__(string.join(import_name, '.'))
  220|                for comp in components[1:]:
  221|                try:
  222|                       mod = getattr(mod, comp)
  223|                    except AttributeError, msg:
  224|                       print "Error: Unable to find %s in:\n%s"%(
  225|                       comp,repr(mod))
  226|                       print "Error: Importing '%s'\n"%arg_name
  227|                       print "Collected import messages (may not all be errors):"
  228|                       for message in error_messages: print message
  229|                sys.exit(1)
  230|                return mod
  231|             except ImportError, msg:
  232|                # Remove last component and try again
  233|                del import_name[-1]
  234|                msg = "  %s:\n    %s"%(string.join(import_name, '.'), msg)
  235|                error_messages.append(msg)
  236|             except SystemExit, msg: raise
  237|            except:
  238|                print "Unknown error occurred importing",name
  239|                import traceback
  240|                traceback.print_exc()
  241|                sys.exit(1)
  242|
  243|     # try as file
  244|     try:
  245|         if not name[0] == '/': name = _workdir+os.sep+name
  246|         if not os.access(name, os.R_OK): raise ImportError, "Cannot access file %s"%name
  247|         dir = os.path.abspath(os.path.dirname(name))
  248|         name = os.path.basename(name)
  249|         modname = name[:]
  250|         if modname[-3:] == ".py": modname = modname[0:-3]
  251|         if dir not in sys.path: sys.path.insert(0, dir)
  252|         mod = __import__(modname)
  253|     except ImportError, msg:
  254|         sys.path = sys.path[1:]
  255|         sys.stderr.write("Error: Could not find module %s: %s\n"%(name,msg))
  256|         sys.stderr.write("Error: Importing '%s'\n"%arg_name)
  257|         sys.stderr.flush()
  258|         sys.exit(-1)
  259|     return mod
  260|
  261| def import_object(spec, defaultAttr = None, basePackage = ''):
  262|     """Imports an object according to 'spec'. spec must be either a
  263|     string or a tuple of two strings. A tuple of two strings means load the
  264|     module from the first string, and look for an attribute using the second
  265|     string. One string is interpreted according to the optional arguments. The
  266|     default is just to load the named module. 'defaultAttr' means to look for
  267|     the named attribute in the module and return that. 'basePackage' means to
  268|     prepend the named string to the spec before importing. Note that you may
  269|     pass a list instead of a tuple, and it will have the same effect.
  270|
  271|     This is used by the HTML formatter for example, to specify page classes.
  272|     Each class is in a separate module, and each module has a htmlPageAttr
  273|     attribute that references the class of the Page for that module. This
  274|     avoids the need to specify a list of default pages, easing
  275|     maintainability."""
  276|     if type(spec) == types.ListType: spec = tuple(spec)
  277|     if type(spec) == types.TupleType:
  278|         # Tuple means (module-name, attribute-name)
  279|         if len(spec) != 2:
  280|             raise TypeError, "Import tuple must have two strings"
  281|         name, attr = spec
  282|         if type(name) != types.StringType or type(attr) != types.StringType:
  283|             raise TypeError, "Import tuple must have two strings"
  284|         module = _import(name)
  285|         if not hasattr(module, attr):
  286|             raise ImportError, "Module %s has no %s attribute."%spec
  287|         return getattr(module, attr)
  288|     elif type(spec) == types.StringType:
  289|         # String means HTML module with htmlPageClass attribute
  290|         module = _import(basePackage+spec)
  291|         if defaultAttr is not None:
  292|             if not hasattr(module, defaultAttr):
  293|                raise ImportError, "Module %s has no %s attribute."%(spec, defaultAttr)
  294|             return getattr(module, defaultAttr)
  295|         return module
  296|     else:
  297|         raise TypeError, "Import spec must be a string or tuple of two strings."
  298|
  299|
  300| def splitAndStrip(line):
  301|     """Splits a line at the first space, then strips the second argument"""
  302|     pair = string.split(line, ' ', 1)
  303|     return pair[0], string.strip(pair[1])
  304|
  305| def open(filename):
  306|     """open a file, generating all intermediate directories if needed"""
  307|     import __builtin__
  308|     dir, file = os.path.split(filename)
  309|     if dir and not os.path.isdir(dir): os.makedirs(dir)
  310|     return __builtin__.open(filename, 'w+')
  311|
  312| def getopt_spec(args, options, long_options=[]):
  313|     """Transparently add --spec=file support to getopt"""
  314|     long_options.append('spec=')
  315|     opts, remainder = getopt.getopt(args, options, long_options)
  316|     ret = []
  317|     for pair in opts:
  318|         if pair[0] == '--spec':
  319|             f = open(pair[1], 'rt')
  320|             spec_opts = map(splitAndStrip, f.readlines())
  321|             f.close()
  322|             ret.extend(spec_opts)
  323|         else:
  324|             ret.append(pair)
  325|     return ret, remainder
  326|
  327| class PyWriter:
  328|     """A class that allows writing data in such a way that it can be read in
  329|     by just 'exec'ing the file. You should extend it and override write_item()"""
  330|     def __init__(self, ostream):
  331|         self.os = ostream
  332|         self.buffer = cStringIO.StringIO()
  333|         self.imports = {}
  334|         self.__indent = '\n'
  335|         self.__prepend = ''
  336|         self.__class_funcs = {}
  337|         self.__long_lists = {}
  338|         self.__done_struct = 0
  339|     def indent(self):
  340|         self.__indent = self.__indent+'  '
  341|     def outdent(self):
  342|         self.__indent = self.__indent[:-2]
  343|     def ensure_import(self, module, names):
  344|         key = module+names
  345|         if self.imports.has_key(key): return
  346|         self.imports[key] = None
  347|         self.os.write('from %s import %s\n'%(module, names))
  348|     def ensure_struct(self):
  349|         if self.__done_struct == 1: return
  350|         self.os.write('class struct:\n def __init__(self,**args):\n  for k,v in args.items(): setattr(self, k, v)\n\n')
  351|         self.__done_struct = 1
  352|     def write_top(self, str):
  353|         """Writes a string to the top of the file"""
  354|         self.os.write(str)
  355|     def write(self, str):
  356|         # Get cached '\n' if any
  357|         prefix = self.__prepend
  358|         self.__prepend = ''
  359|         # Cache '\n' if any
  360|         if len(str) and str[-1] == '\n':
  361|             self.__prepend = '\n'
  362|             str = str[:-1]
  363|         # Indent any remaining \n's, including cached one
  364|         str = string.replace(prefix + str, '\n', self.__indent)
  365|         self.buffer.write(str)
  366|     def write_item(self, item):
  367|         """Writes arbitrary items by looking up write_Foo functions where Foo
  368|         is the class name of the item"""
  369|         # Use repr() for non-instance types
  370|         if type(item) is types.ListType:
  371|             return self.write_list(item)
  372|         if type(item) is not types.InstanceType:
  373|             return self.write(repr(item))
  374|
  375|         # Check for class in cache
  376|         class_obj = item.__class__
  377|         if self.__class_funcs.has_key(class_obj):
  378|             return self.__class_funcs[class_obj](item)
  379|         # Check for write_Foo method
  380|         func_name = 'write_'+class_obj.__name__
  381|         if not hasattr(self, func_name):
  382|             return self.write(repr(item))
  383|         # Cache method and call it
  384|         func = getattr(self, func_name)
  385|         self.__class_funcs[class_obj] = func
  386|         func(item)
  387|     def flush(self):
  388|         "Writes the buffer to the stream and closes the buffer"
  389|         # Needed to flush the cached '\n'
  390|         if self.__prepend: self.write('')
  391|         self.os.write(self.buffer.getvalue())
  392|         if 0: # for debugging
  393|             sys.stdout.write(self.buffer.getvalue())
  394|         self.buffer.close()
  395|     def long(self, list):
  396|         "Remembers list as wanting 'long' representation (an item per line)"
  397|         self.__long_lists[id(list)] = None
  398|         return list
  399|     def write_list(self, list):
  400|         """Writes a list on one line. If long(list) was previously called, the
  401|         list from its cache and calls write_long_list"""
  402|         if self.__long_lists.has_key(id(list)):
  403|             del self.__long_lists[id(list)]
  404|             return self.write_long_list(list)
  405|         self.write('[')
  406|         comma = 0
  407|         for item in list:
  408|             if comma: self.write(', ')
  409|             else: comma = 1
  410|             self.write_item(item)
  411|         self.write(']')
  412|     def write_long_list(self, list):
  413|         "Writes a list with each item on a new line"
  414|         if not list:
  415|             self.write('[]')
  416|           return
  417|         self.write('[\n')
  418|         self.indent()
  419|         comma = 0
  420|         for item in list:
  421|             if comma:
  422|                self.__prepend = ''
  423|                self.write(',\n')
  424|             else: comma = 1
  425|             self.write_item(item)
  426|         self.outdent()
  427|         self.write('\n]')
  428|     def write_attr(self, name, value):
  429|         self.write(name + ' = ')
  430|         self.write_item(value)
  431|         self.write('\n')
  432|     def flatten_struct(self, struct):
  433|         """Flattens a struct into a (possibly nested) list. A struct is an
  434|         object with only the following members: numbers, strings, sub-structs,
  435|         lists and tuples."""
  436|         t = type(struct)
  437|         if t is types.TupleType:
  438|             return tuple(map(self.flatten_struct, struct))
  439|         if t is types.ListType:
  440|             return map(self.flatten_struct, struct)
  441|         if t in (types.ClassType, types.InstanceType):
  442|             self.ensure_struct()
  443|             flatten_item = lambda kv, self=self: (kv[0], self.flatten_struct(kv[1]))
  444|             filter_item = lambda kv: kv[0] not in ('__init__', '__doc__', '__module__')
  445|             items = map(flatten_item, filter(filter_item, struct.__dict__.items()))
  446|             items.sort()
  447|             return PyWriterStruct(items)
  448|         return struct
  449|     def write_PyWriterStruct(self, struct):
  450|         if not len(struct.dict): return self.write('struct()')
  451|         self.write('struct(')
  452|         self.indent()
  453|         # Write one attribute per line, being sure to allow nested structs
  454|         prefix = '\n'
  455|         for key, val in struct.dict:
  456|             self.write(prefix+str(key)+'=')
  457|             self.write_item(val)
  458|             prefix = ',\n'
  459|         self.outdent()
  460|         self.write('\n)')
  461|
  462| class PyWriterStruct:
  463|     """A utility class that PyWriter uses to dump class objects. Dict is the
  464|     dictionary of the class being dumped."""
  465|     def __init__(self, dict): self.dict = dict
  466|
  467| def quote(name):
  468|     """Quotes a base filename to remove illegal characters and keep it
  469|     within a reasonable length for the filesystem.
  470|
  471|     The md5 hash function is used if the length of the name after quoting is
  472|     more than 100 characters. If it is used, then as many characters at the
  473|     start of the name as possible are kept intact, and the hash appended to
  474|     make 100 characters.
  475|
  476|     Do not pass filenames with meaningful extensions to this function, as the
  477|     hash could destroy them."""
  478|
  479|     original = name # save the old name
  480|
  481|     # a . is usually an extension, eg source page filename: "_page-foo.hpp" + .html
  482|     name = re.sub('\.','_',name)
  483|     # The . is arbitrary..
  484|     name = re.sub('<','.L',name)
  485|     name = re.sub('>','.R',name)
  486|     name = re.sub('$','.l',name)
  487|     name = re.sub('$','.r',name)
  488|     name = re.sub('::','-',name)
  489|     name = re.sub(':','.',name)
  490|     name = re.sub('&','.A',name)
  491|     name = re.sub('\*','.S',name)
  492|     name = re.sub(' ','.s',name)
  493|
  494|     if len(name) > 100:
  495|         hash = md5.md5(original).hexdigest()
  496|         # Keep as much of the name as possible
  497|         name = name[:100 - len(hash)] + hash
  498|     return name
  499|