Synopsis - Synopsis/Core/Executor.py

    1| """
    2| Executors are the implementation of the various actions. The actual Action
    3| objects themselves just contain the data needed to perform the actions, and
    4| are minimal on actual code so that they can be easily serialized. The code and
    5| data needed for the execution of an Action is implemented in the matching
    6| Executor class.
    7| """
    8|
    9| import string, re, os, stat, sys, statcache
   10|
   11| from Action import ActionVisitor
   12| from Synopsis.Core import Util
   13| import AST
   14|
   15| try: import gc
   16| except ImportError: gc = None
   17|
   18|
   19| class Executor:
   20|     """Base class for executor classes, defining the common interface between
   21|     each executor instance."""
   22|     def get_output_names(self):
   23|         """Returns a list of (name, timestamp) tuples, representing the output
   24|         from this executor. The names must be given to get_output in turn to
   25|         retrieve the AST objects, and the timestamp may be used for build
   26|         control."""
   27|         pass
   28|
   29|     def prepare_output(self, name, keep):
   30|         """Prepares an AST object for returning. For most objects, this does
   31|         nothing. In the case of a cacher, this causes it to process each input
   32|         in turn and store the results to disk. This is as opposed to keeping
   33|         each previous input in memory while the next is parsed!
   34|         Returns the AST if keep is set, else None."""
   35|         if keep: return get_output(name)
   36|
   37|     def get_output(self, name):
   38|         """Returns the AST object for the given name. Name must one returned
   39|         from the 'get_output_names' method."""
   40|         pass
   41|
   42|
   43| class ExecutorCreator (ActionVisitor):
   44|     """Creates Executor instances for Action objects"""
   45|     def __init__(self, project, verbose=0):
   46|         self.__project = project
   47|         self.verbose = verbose or project.verbose()
   48|
   49|     def project(self):
   50|         """Returns the project for this creator"""
   51|         return self.__project
   52|
   53|     def create(self, action):
   54|         """Creates an executor for the given action"""
   55|         self.__executor = None
   56|         action.accept(self)
   57|         return self.__executor
   58|
   59|     def visitAction(self, action):
   60|         """This is an unknown or incomplete Action: ignore."""
   61|         print "Warning: Unknown action '%s'"%action.name()
   62|
   63|     def visitSource(self, action):
   64|         self.__executor = SourceExecutor(self, action)
   65|     def visitParser(self, action):
   66|         self.__executor = ParserExecutor(self, action)
   67|     def visitLinker(self, action):
   68|         self.__executor = LinkerExecutor(self, action)
   69|     def visitCacher(self, action):
   70|         self.__executor = CacherExecutor(self, action)
   71|     def visitFormat(self, action):
   72|         self.__executor = FormatExecutor(self, action)
   73|
   74| class SourceExecutor (Executor):
   75|     glob_cache = {}
   76|
   77|     def __init__(self, executor, action):
   78|         self.__executor = executor
   79|         self.__project = executor.project()
   80|         self.__action = action
   81|
   82|     def compile_glob(self, globstr):
   83|         """Returns a compiled regular expression for the given glob string. A
   84|         glob string is something like "*.?pp" which gets translated into
   85|         "^.*\..pp$". Compiled regular expressions are cached in a class
   86|         variable"""
   87|         if self.glob_cache.has_key(globstr):
   88|             return self.glob_cache[globstr]
   89|         glob = string.replace(globstr, '.', '\.')
   90|         glob = string.replace(glob, '?', '.')
   91|         glob = string.replace(glob, '*', '.*')
   92|         glob = re.compile('^%s$'%glob)
   93|         self.glob_cache[globstr] = glob
   94|         return glob
   95|     def get_output_names(self):
   96|         """Expands the paths into a list of filenames, and return those"""
   97|         # Use an 'open' list which contains 3-tuples of 'recurse?', 'path' and
   98|         # 'glob'
   99|         def path_to_tuple(path_obj):
  100|             if path_obj.type == 'Simple':
  101|                if path_obj.dir.find('/') == -1:
  102|                    return (0, '.', path_obj.dir)
  103|                return (0,)+os.path.split(path_obj.dir)
  104|             elif path_obj.type == 'Dir':
  105|                return (0, path_obj.dir, path_obj.glob)
  106|          else:
  107|                return (1, path_obj.dir, path_obj.glob)
  108|
  109|         names = []
  110|         for rule in self.__action.rules():
  111|             if rule.type == 'Simple':
  112|                # Add the specified files if they exist
  113|                for file in rule.files:
  114|                try:
  115|                       filepath = os.path.abspath(file)
  116|                       stats = os.stat(filepath)
  117|                       if stat.S_ISREG(stats[stat.ST_MODE]):
  118|                           names.append((file, stats[stat.ST_MTIME]))
  119|                    except OSError, e:
  120|                       print "Warning:",e
  121|             elif rule.type == 'Glob':
  122|                glob = self.compile_glob(rule.glob)
  123|                dirs = list(rule.dirs)
  124|                while len(dirs):
  125|                    dir = dirs.pop(0)
  126|                    # Get list of files in this dir
  127|                    for file in os.listdir(dir):
  128|                # Stat file
  129|                       filepath = os.path.join(dir, file)
  130|                       stats = os.stat(filepath)
  131|                       if stat.S_ISDIR(stats[stat.ST_MODE]) and rule.recursive:
  132|                           # Add to list of dirs to check
  133|                          dirs.append(filepath)
  134|                       elif stat.S_ISREG(stats[stat.ST_MODE]):
  135|                           # Check if matches glob
  136|                         if glob.match(file):
  137|                              # Strip any "./" from the start of the name
  138|                              if len(filepath) > 2 and filepath[:2] == "./":
  139|                              filepath = filepath[2:]
  140|                              names.append((filepath, stats[stat.ST_MTIME]))
  141|             elif rule.type == 'Exclude':
  142|                glob = self.compile_glob(rule.glob)
  143|                old_names = names
  144|                names = []
  145|                for name in old_names:
  146|                    # Only re-add ones that don't match
  147|                    if not glob.match(name[0]):
  148|                       names.append(name)
  149|
  150|         return names
  151|     def get_output(self, name):
  152|         """Raises an exception, since the SourceAction is only used to
  153|         identify files -- the loading is done by the parsers themselves"""
  154|         raise 'ParseError', "SourceAction doesn't support get_output method."
  155|
  156| class ParserExecutor (Executor):
  157|     """Parses the input files given by its input SourceActions"""
  158|     def __init__(self, executor, action):
  159|         self.__executor = executor
  160|         self.__project = executor.project()
  161|         self.__action = action
  162|         self.__name_map = {}
  163|         self.__is_multi = None
  164|
  165|     def is_multi(self):
  166|         """Returns true if this parser parses multiple source files at once.
  167|         This is determined by the parser type and config options."""
  168|         if self.__is_multi is not None: return self.__is_multi
  169|         config = self.__action.config()
  170|         module = config.name
  171|         if module == "C++":
  172|             if hasattr(config, 'multiple_files'):
  173|                self.__is_multi = config.multiple_files
  174|          else:
  175|                self.__is_multi = 0
  176|         else:
  177|             self.__is_multi = 0
  178|         return self.__is_multi
  179|
  180|     def get_output_names(self):
  181|         """Returns the names from all connected SourceActions, and caches
  182|         which source action they came from"""
  183|         names = []
  184|         # for each input source action...
  185|         for source_action in self.__action.inputs():
  186|             source = self.__executor.create(source_action)
  187|             source_names = source.get_output_names()
  188|             names.extend(source_names)
  189|             for name, timestamp in source_names:
  190|                self.__name_map[name] = source
  191|         # Check multi-file
  192|         if self.is_multi():
  193|             # Only return first name
  194|             return names[0:1]
  195|         return names
  196|
  197|     def get_output(self, name):
  198|         if self.__executor.verbose:
  199|             print self.__action.name()+": Parsing "+name
  200|             sys.stdout.flush()
  201|         config = self.__action.config()
  202|         parser = self.get_parser()
  203|         # Do the parse
  204|         extra_files = None
  205|         if self.is_multi():
  206|             # Find all source files
  207|             extra_files = self.__name_map.keys()
  208|         ast = parser.parse(name, extra_files, [], config)
  209|         # Return the parsed AST
  210|         return ast
  211|
  212|     def get_parser(self):
  213|         """Returns the parser module, using the module name stored in the
  214|         Action object. If the module cannot be loaded, this method will raise
  215|         an exception."""
  216|         module = self.__action.config().name
  217|         try:
  218|             parser = Util._import("Synopsis.Parser." + module)
  219|         except ImportError:
  220|             # TODO: invent some exception to pass up
  221|             sys.stderr.write(cmdname + ": Could not import parser `" + name + "'\n")
  222|             sys.exit(1)
  223|         return parser
  224|
  225|
  226|
  227| class LinkerExecutor (Executor):
  228|     def __init__(self, executor, action):
  229|         self.__executor = executor
  230|         self.__project = executor.project()
  231|         self.__action = action
  232|         self.__inputs = {}
  233|         self.__names = {}
  234|     def get_output_names(self):
  235|         """Links multiple ASTs together, and/or performs other manipulative
  236|         actions on a single AST."""
  237|         # Figure out the output name
  238|         myname = self.__action.name()
  239|         if not myname: myname = 'LinkerOutput'
  240|         myname = myname.replace(' ', '_')
  241|         # Figure out the timestamp
  242|         ts = 0
  243|         for input in self.__action.inputs():
  244|             exec_obj = self.__executor.create(input)
  245|             self.__inputs[input] = exec_obj
  246|             names = exec_obj.get_output_names()
  247|             self.__names[input] = names
  248|             for name, timestamp in names:
  249|                if timestamp > ts:
  250|                   ts = timestamp
  251|         return [ (myname, ts) ]
  252|
  253|     def get_output(self, name):
  254|         # Get input AST(s), probably from a cacher, source or other linker
  255|         # Prepare the inputs
  256|         for input in self.__action.inputs():
  257|             exec_obj = self.__inputs[input]
  258|             names = self.__names[input]
  259|             for iname, timestamp in names:
  260|                exec_obj.prepare_output(iname, 0)
  261|         # Merge the inputs into one AST
  262|         if self.__executor.verbose:
  263|             print self.__action.name()+": Linking "+name
  264|             sys.stdout.flush()
  265|         ast = AST.AST()
  266|         for input in self.__action.inputs():
  267|             exec_obj = self.__inputs[input]
  268|             names = self.__names[input]
  269|             for iname, timestamp in names:
  270|                input_ast = exec_obj.get_output(iname)
  271|                ast.merge(input_ast)
  272|         # Pass merged AST to linker
  273|         module = self.get_linker()
  274|         module.resolve([], ast, self.__action.config())
  275|         # Return linked AST
  276|         return ast
  277|
  278|     def get_linker(self):
  279|         """Returns the linker module, using the module name stored in the
  280|         Action object. If the module cannot be loaded, this method will raise
  281|         an exception."""
  282|         module = self.__action.config().name
  283|         try:
  284|             linker = Util._import("Synopsis.Linker." + module)
  285|         except ImportError:
  286|             # TODO: invent some exception to pass up
  287|             sys.stderr.write(cmdname + ": Could not import linker `" + name + "'\n")
  288|             sys.exit(1)
  289|         return linker
  290|
  291|
  292| class CacherExecutor (Executor):
  293|     def __init__(self, executor, action):
  294|         self.__executor = executor
  295|         self.__project = executor.project()
  296|         self.__action = action
  297|         self.__execs = {}
  298|         self.__timestamps = {}
  299|         self.__input_map = {}
  300|         self.__names = []
  301|     def get_output_names(self):
  302|         action = self.__action
  303|         if action.file:
  304|             # Find file
  305|             stats = os.stat(action.file)
  306|             return action.file, stats[stat.ST_MTIME]
  307|         names = self.__names
  308|         # TODO: add logic here to check timestamps, etc
  309|         for input in action.inputs():
  310|             exec_obj = self.__executor.create(input)
  311|             self.__execs[input] = exec_obj
  312|             in_names = exec_obj.get_output_names()
  313|             names.extend(in_names)
  314|             # Remember which input for each name
  315|             for name, timestamp in in_names:
  316|                self.__input_map[name] = exec_obj
  317|                self.__timestamps[name] = timestamp
  318|         return names
  319|     def get_cache_filename(self, name):
  320|         """Returns the filename of the cache for the input with the given
  321|         name"""
  322|         jname = str(name)
  323|         if jname[0] == '/': jname = jname[1:]
  324|         cache_filename = os.path.join(self.__action.dir, jname)
  325|         if cache_filename[-4:] != ".syn":
  326|             cache_filename = cache_filename + ".syn"
  327|         return cache_filename
  328|     def _get_timestamp(self, filename):
  329|         """Returns the timestamp of the given file, or 0 if not found"""
  330|         try:
  331|             stats = statcache.stat(filename)
  332|             return stats[stat.ST_MTIME]
  333|         except OSError:
  334|             # NB: will catch any type of error caused by the stat call, not
  335|             # just Not Found
  336|             return 0
  337|
  338|     def _is_up_to_date(self, name, cache_filename):
  339|         """Returns true if the input 'name' in file 'cache_filename' is up to
  340|         date. Checks all dependencies"""
  341|         # Check timestamp on cache
  342|         cache_ts = self._get_timestamp(cache_filename)
  343|         if cache_ts == 0 or cache_ts < self.__timestamps[name]:
  344|             # Cache doesn't exist or is older than file
  345|             return 0
  346|         # Load the deps from the file to check that they are all okay
  347|         try:
  348|             deps = AST.load_deps(cache_filename)
  349|         except:
  350|             # Hopefully wrong file version - must create anew
  351|             msg = sys.exc_info()[1]
  352|             print "Warning: Forcing rebuild due to error (%s)"%msg
  353|             return 0
  354|         # Decide basename to use. Must end in a /
  355|         basename = None
  356|         if hasattr(self.__action, 'basename'):
  357|             basename = self.__action.basename
  358|             if len(basename) and basename[-1] != '/':
  359|                basename = basename + '/'
  360|         # Check each dep
  361|         for filename, timestamp in deps:
  362|             # Must match exactly (eg: installing headers from a
  363|             # tarball/package gives files their original timestamp, which may
  364|             # be earlier than the timestamp we last saw!
  365|             if basename and filename[0] != '/':
  366|                # Presume need to prepend basename
  367|                filename = basename + filename
  368|             if timestamp != self._get_timestamp(filename):
  369|                return 0
  370|         # All deps checked out okay!
  371|         return 1
  372|
  373|     def prepare_output(self, name, keep):
  374|         """Prepares the output, which means that it parses it, saves it to
  375|         disk, and forgets about it. If keep is set, return the AST anyway"""
  376|         action = self.__action
  377|         # Check if is a single-file loader (not cache)
  378|         if action.file: return
  379|         cache_filename = self.get_cache_filename(name)
  380|         if self._is_up_to_date(name, cache_filename):
  381|           return
  382|         # Need to regenerate. Find input
  383|         exec_obj = self.__input_map[name]
  384|         ast = exec_obj.get_output(name)
  385|         # Save to cache file
  386|         try:
  387|             # Create dir for file
  388|             dir = os.path.dirname(cache_filename)
  389|             if not os.path.exists(dir):
  390|                print "Warning: creating directory",dir
  391|                os.makedirs(dir)
  392|             AST.save(cache_filename, ast)
  393|         except:
  394|             exc, msg = sys.exc_info()[0:2]
  395|             print "Warning: %s: %s"%(exc, msg)
  396|         if keep: return ast
  397|         elif gc:
  398|             # Try to free up mem
  399|             ast = None
  400|             #gc.set_debug(gc.DEBUG_STATS)
  401|             gc.collect()
  402|
  403|     def get_output(self, name):
  404|         """Gets the output"""
  405|         action = self.__action
  406|         # Check if is a single-file loader (not cache)
  407|         if action.file:
  408|             return AST.load(action.file)
  409|         # Double-check preparedness (may generate output)
  410|         ast = self.prepare_output(name, 1)
  411|         if ast: return ast
  412|         # Should now be able to just load from cache file
  413|         return AST.load(self.get_cache_filename(name))
  414|
  415| class FormatExecutor (Executor):
  416|     """Formats the input AST given by its single input"""
  417|     def __init__(self, executor, action):
  418|         self.__executor = executor
  419|         self.__project = executor.project()
  420|         self.__action = action
  421|         self.__input_exec = None
  422|
  423|     def get_output_names(self):
  424|         inputs = self.__action.inputs()
  425|         if len(inputs) != 1:
  426|             raise 'Error', 'Formatter takes exactly one input AST'
  427|         self.__input_exec = self.__executor.create(inputs[0])
  428|         names = self.__input_exec.get_output_names()
  429|         if len(names) != 1:
  430|             raise 'Error', 'Formatter takes exactly one input AST'
  431|         return names
  432|
  433|     def get_output(self, name):
  434|         # Get input AST, probably from a cache or linker
  435|         ast = self.__input_exec.get_output(name)
  436|         module = self.__action.config().name
  437|         # Pass AST to formatter
  438|         if self.__executor.verbose:
  439|             print self.__action.name()+": Formatting "+name
  440|             sys.stdout.flush()
  441|         try:
  442|             formatter = Util._import("Synopsis.Formatter." + module)
  443|         except ImportError:
  444|             sys.stderr.write(cmdname + ": Could not import formatter `" + module + "'\n")
  445|             sys.exit(1)
  446|         formatter.format([], ast, self.__action.config())
  447|         # Finalize AST (incl. maybe write to disk with timestamp info)
  448|         return None
  449|
  450|