Synopsis - Synopsis/Linker/Comments.py

    1| # $Id: Comments.py,v 1.18 2003/01/20 06:43:02 chalky Exp $
    2| #
    3| # This file is a part of Synopsis.
    4| # Copyright (C) 2000, 2001 Stephen Davies
    5| # Copyright (C) 2000, 2001 Stefan Seefeld
    6| #
    7| # Synopsis is free software; you can redistribute it and/or modify it
    8| # under the terms of the GNU General Public License as published by
    9| # the Free Software Foundation; either version 2 of the License, or
   10| # (at your option) any later version.
   11| #
   12| # This program is distributed in the hope that it will be useful,
   13| # but WITHOUT ANY WARRANTY; without even the implied warranty of
   14| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   15| # General Public License for more details.
   16| #
   17| # You should have received a copy of the GNU General Public License
   18| # along with this program; if not, write to the Free Software
   19| # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
   20| # 02111-1307, USA.
   21| #
   22| # $Log: Comments.py,v $
   23| # Revision 1.18  2003/01/20 06:43:02  chalky
   24| # Refactored comment processing. Added AST.CommentTag. Linker now determines
   25| # comment summary and extracts tags. Increased AST version number.
   26| #
   27| # Revision 1.17  2002/10/11 11:07:53  chalky
   28| # Added missing parent __init__ call in Group
   29| #
   30| # Revision 1.16  2002/10/11 05:57:17  chalky
   31| # Support suspect comments
   32| #
   33| # Revision 1.15  2002/09/20 10:34:52  chalky
   34| # Add a comment parser for plain old // comments
   35| #
   36| # Revision 1.14  2002/08/23 04:37:26  chalky
   37| # Huge refactoring of Linker to make it modular, and use a config system similar
   38| # to the HTML package
   39| #
   40| # Revision 1.13  2002/04/26 01:21:14  chalky
   41| # Bugs and cleanups
   42| #
   43| # Revision 1.12  2002/04/25 23:54:09  chalky
   44| # Fixed bug caused by new re module in python 2.1 handling groups differently
   45| #
   46| # Revision 1.11  2001/06/11 10:37:49  chalky
   47| # Better grouping support
   48| #
   49| # Revision 1.10  2001/06/08 21:04:38  stefan
   50| # more work on grouping
   51| #
   52| # Revision 1.9  2001/06/08 04:50:13  stefan
   53| # add grouping support
   54| #
   55| # Revision 1.8  2001/05/25 13:45:49  stefan
   56| # fix problem with getopt error reporting
   57| #
   58| # Revision 1.7  2001/04/05 16:21:56  chalky
   59| # Allow user-specified comment processors
   60| #
   61| # Revision 1.6  2001/04/05 11:11:39  chalky
   62| # Many more comments
   63| #
   64| # Revision 1.5  2001/02/12 04:08:09  chalky
   65| # Added config options to HTML and Linker. Config demo has doxy and synopsis styles.
   66| #
   67| # Revision 1.4  2001/02/07 17:00:43  chalky
   68| # Added Qt-style comments support
   69| #
   70| # Revision 1.3  2001/02/07 14:13:51  chalky
   71| # Small fixes.
   72| #
   73| # Revision 1.2  2001/02/07 09:57:00  chalky
   74| # Support for "previous comments" in C++ parser and Comments linker.
   75| #
   76| # Revision 1.1  2001/02/06 06:55:18  chalky
   77| # Initial commit. Support SSD and Java comments. Selection of comments only
   78| # (same as ssd,java formatters in HTML)
   79| #
   80| #
   81|
   82| """Comment Processor"""
   83| # System modules
   84| import sys, string, re, getopt, types
   85|
   86| # Synopsis modules
   87| from Synopsis.Core import AST, Util
   88|
   89| from Synopsis.Linker.Linker import config, Operation
   90|
   91| class CommentProcessor (AST.Visitor):
   92|     """Base class for comment processors.
   93|
   94|     This is an AST visitor, and by default all declarations call process()
   95|     with the current declaration. Subclasses may override just the process
   96|     method.
   97|     """
   98|     def processAll(self, declarations):
   99|         for decl in declarations:
  100|             decl.accept(self)
  101|     def process(self, decl):
  102|         """Process comments for the given declaration"""
  103|     def visitDeclaration(self, decl):
  104|         self.process(decl)
  105|
  106| class SSDComments (CommentProcessor):
  107|     """A class that selects only //. comments."""
  108|     __re_star = r'/\*(.*?)\*/'
  109|     __re_ssd = r'^[ \t]*//\. ?(.*)$'
  110|     def __init__(self):
  111|         "Compiles the regular expressions"
  112|         self.re_star = re.compile(SSDComments.__re_star, re.S)
  113|         self.re_ssd = re.compile(SSDComments.__re_ssd, re.M)
  114|     def process(self, decl):
  115|         "Calls processComment on all comments"
  116|         map(self.processComment, decl.comments())
  117|     def processComment(self, comment):
  118|         """Replaces the text in the comment. It calls strip_star() first to
  119|         remove all multi-line star comments, then follows with parse_ssd().
  120|         """
  121|         text = comment.text()
  122|         text = self.parse_ssd(self.strip_star(text))
  123|         comment.set_text(text)
  124|     def strip_star(self, str):
  125|         """Strips all star-format comments from the string"""
  126|         mo = self.re_star.search(str)
  127|         while mo:
  128|             str = str[:mo.start()] + str[mo.end():]
  129|             mo = self.re_star.search(str)
  130|         return str
  131|     def parse_ssd(self, str):
  132|         """Filters str and returns just the lines that start with //."""
  133|         return string.join(self.re_ssd.findall(str),'\n')
  134|
  135| class JavaComments (CommentProcessor):
  136|     """A class that formats java /** style comments"""
  137|     __re_java = r"/\*\*[ \t]*(?P<text>.*)(?P<lines>(\n[ \t]*\*.*)*?)(\n[ \t]*)?\*/"
  138|     __re_line = r"\n[ \t]*\*[ \t]*(?P<text>.*)"
  139|     def __init__(self):
  140|         "Compiles the regular expressions"
  141|         self.re_java = re.compile(JavaComments.__re_java)
  142|         self.re_line = re.compile(JavaComments.__re_line)
  143|     def process(self, decl):
  144|         "Calls processComment on all comments"
  145|         map(self.processComment, decl.comments())
  146|     def processComment(self, comment):
  147|         """Finds comments in the java format. The format is  /** ... */, and
  148|         it has to cater for all four line forms: "/** ...", " * ...", " */" and
  149|         the one-line "/** ... */".
  150|         """
  151|         text = comment.text()
  152|         text_list = []
  153|         mo = self.re_java.search(text)
  154|         while mo:
  155|             text_list.append(mo.group('text'))
  156|             lines = mo.group('lines')
  157|             if lines:
  158|                mol = self.re_line.search(lines)
  159|                while mol:
  160|                    text_list.append(mol.group('text'))
  161|                    mol = self.re_line.search(lines, mol.end())
  162|             mo = self.re_java.search(text, mo.end())
  163|         text = string.join(text_list,'\n')
  164|         comment.set_text(text)
  165|
  166| class SSComments (CommentProcessor):
  167|     """A class that selects only // comments."""
  168|     __re_star = r'/\*(.*?)\*/'
  169|     __re_ss = r'^[ \t]*// ?(.*)$'
  170|     def __init__(self):
  171|         "Compiles the regular expressions"
  172|         self.re_star = re.compile(SSComments.__re_star, re.S)
  173|         self.re_ss = re.compile(SSComments.__re_ss, re.M)
  174|     def process(self, decl):
  175|         "Calls processComment on all comments"
  176|         map(self.processComment, decl.comments())
  177|     def processComment(self, comment):
  178|         """Replaces the text in the comment. It calls strip_star() first to
  179|         remove all multi-line star comments, then follows with parse_ss().
  180|         """
  181|         text = comment.text()
  182|         text = self.parse_ss(self.strip_star(text))
  183|         comment.set_text(text)
  184|     def strip_star(self, str):
  185|         """Strips all star-format comments from the string"""
  186|         mo = self.re_star.search(str)
  187|         while mo:
  188|             str = str[:mo.start()] + str[mo.end():]
  189|             mo = self.re_star.search(str)
  190|         return str
  191|     def parse_ss(self, str):
  192|         """Filters str and returns just the lines that start with //"""
  193|         return string.join(self.re_ss.findall(str),'\n')
  194|
  195|
  196| class QtComments (CommentProcessor):
  197|     """A class that finds Qt style comments. These have two styles: //! ...
  198|     and /*! ... */. The first means "brief comment" and there must only be
  199|     one. The second type is the detailed comment."""
  200|     __re_brief = r"[ \t]*//!(.*)"
  201|     __re_detail = r"[ \t]*/\*!(.*)\*/[ \t\n]*"
  202|     def __init__(self):
  203|         "Compiles the regular expressions"
  204|         self.re_brief = re.compile(self.__re_brief)
  205|         self.re_detail = re.compile(self.__re_detail, re.S)
  206|     def process(self, decl):
  207|         "Calls processComment on all comments"
  208|         map(self.processComment, decl.comments())
  209|     def processComment(self, comment):
  210|         "Matches either brief or detailed comments"
  211|         text = comment.text()
  212|         mo = self.re_brief.match(text)
  213|         if mo:
  214|             comment.set_text(mo.group(1))
  215|           return
  216|         mo = self.re_detail.match(text)
  217|         if mo:
  218|             comment.set_text(mo.group(1))
  219|           return
  220|         comment.set_text('')
  221|
  222| class Transformer (CommentProcessor):
  223|     """A class that creates a new AST from an old one. This is a helper base for
  224|     more specialized classes that manipulate the AST based on the comments in the nodes"""
  225|     def __init__(self):
  226|         """Constructor"""
  227|         self.__scopestack = []
  228|         self.__currscope = []
  229|     def processAll(self, declarations):
  230|         """Overrides the default processAll() to setup the stack"""
  231|         for decl in declarations: decl.accept(self)
  232|         declarations[:] = self.__currscope
  233|     def push(self):
  234|         """Pushes the current scope onto the stack and starts a new one"""
  235|         self.__scopestack.append(self.__currscope)
  236|         self.__currscope = []
  237|     def pop(self, decl):
  238|         """Pops the current scope from the stack, and appends the given
  239|         declaration to it"""
  240|         self.__currscope = self.__scopestack.pop()
  241|         self.__currscope.append(decl)
  242|     def add(self, decl):
  243|         """Adds the given decl to the current scope"""
  244|         self.__currscope.append(decl)
  245|     def currscope(self):
  246|         """Returns the current scope: a list of declarations"""
  247|         return self.__currscope
  248|
  249| class Dummies (Transformer):
  250|     """A class that deals with dummy declarations and their comments. This
  251|     class just removes them."""
  252|     def visitDeclaration(self, decl):
  253|         """Checks for dummy declarations"""
  254|         if decl.type() == "dummy": return
  255|         self.add(decl)
  256|     def visitScope(self, scope):
  257|         """Visits all children of the scope in a new scope. The value of
  258|         currscope() at the end of the list is used to replace scope's list of
  259|         declarations - hence you can remove (or insert) declarations from the
  260|         list. Such as dummy declarations :)"""
  261|         self.push()
  262|         for decl in scope.declarations(): decl.accept(self)
  263|         scope.declarations()[:] = self.currscope()
  264|         self.pop(scope)
  265|     def visitEnum(self, enum):
  266|         """Does the same as visitScope, but for the enum's list of
  267|         enumerators"""
  268|         self.push()
  269|         for enumor in enum.enumerators(): enumor.accept(self)
  270|         enum.enumerators()[:] = self.currscope()
  271|         self.pop(enum)
  272|     def visitEnumerator(self, enumor):
  273|         """Removes dummy enumerators"""
  274|         if enumor.type() == "dummy": return #This wont work since Core.AST.Enumerator forces type to "enumerator"
  275|         if not len(enumor.name()): return # workaround.
  276|         self.add(enumor)
  277|
  278| class Previous (Dummies):
  279|     """A class that maps comments that begin with '<' to the previous
  280|     declaration"""
  281|     def processAll(self, declarations):
  282|         """decorates processAll() to initialise last and laststack"""
  283|         self.last = None
  284|         self.__laststack = []
  285|         for decl in declarations:
  286|             decl.accept(self)
  287|             self.last = decl
  288|         declarations[:] = self.currscope()
  289|     def push(self):
  290|         """decorates push() to also push 'last' onto 'laststack'"""
  291|         Dummies.push(self)
  292|         self.__laststack.append(self.last)
  293|         self.last = None
  294|     def pop(self, decl):
  295|         """decorates pop() to also pop 'last' from 'laststack'"""
  296|         Dummies.pop(self, decl)
  297|         self.last = self.__laststack.pop()
  298|     def visitScope(self, scope):
  299|         """overrides visitScope() to set 'last' after each declaration"""
  300|         self.removeSuspect(scope)
  301|         self.push()
  302|         for decl in scope.declarations():
  303|             decl.accept(self)
  304|             self.last = decl
  305|         scope.declarations()[:] = self.currscope()
  306|         self.pop(scope)
  307|     def checkPrevious(self, decl):
  308|         """Checks a decl to see if the comment should be moved. If the comment
  309|         begins with a less-than sign, then it is moved to the 'last'
  310|         declaration"""
  311|         if len(decl.comments()) and self.last:
  312|             first = decl.comments()[0]
  313|             if len(first.text()) and first.text()[0] == "<" and self.last:
  314|                first.set_suspect(0) # Remove suspect flag
  315|                first.set_text(first.text()[1:]) # Remove '<'
  316|                self.last.comments().append(first)
  317|                del decl.comments()[0]
  318|     def removeSuspect(self, decl):
  319|         """Removes any suspect comments from the declaration"""
  320|         non_suspect = lambda decl: not decl.is_suspect()
  321|         comments = decl.comments()
  322|         comments[:] = filter(non_suspect, comments)
  323|     def visitDeclaration(self, decl):
  324|         """Calls checkPrevious on the declaration and removes dummies"""
  325|         self.checkPrevious(decl)
  326|         self.removeSuspect(decl)
  327|         if decl.type() != "dummy":
  328|             self.add(decl)
  329|     def visitEnum(self, enum):
  330|         """Does the same as visitScope but for enum and enumerators"""
  331|         self.removeSuspect(enum)
  332|         self.push()
  333|         for enumor in enum.enumerators():
  334|             enumor.accept(self)
  335|             self.last = enumor
  336|         enum.enumerators()[:] = self.currscope()
  337|         self.pop(enum)
  338|     def visitEnumerator(self, enumor):
  339|         """Checks previous comment and removes dummies"""
  340|         self.removeSuspect(enumor)
  341|         self.checkPrevious(enumor)
  342|         if len(enumor.name()): self.add(enumor)
  343|
  344| class Grouper (Transformer):
  345|     """A class that detects grouping tags and moves the enclosed nodes into a subnode (a 'Group')"""
  346|     __re_open = r'^[ \t]*{ ?(.*)$'
  347|     __re_close = r'^[ \t]*} ?(.*)$'
  348|     def __init__(self):
  349|         Transformer.__init__(self)
  350|         self.re_open = re.compile(Grouper.__re_open, re.M)
  351|         self.re_close = re.compile(Grouper.__re_close, re.M)
  352|         self.__groups = []
  353|     def visitDeclaration(self, decl):
  354|         """Checks for grouping tags.
  355|         If an opening tag is found in the middle of a comment, a new Group is generated, the preceeding
  356|         comments are associated with it, and is pushed onto the scope stack as well as the groups stack.
  357|         """
  358|         comments = []
  359|         process_comments = decl.comments()
  360|         while len(process_comments):
  361|             c = process_comments.pop(0)
  362|             open_mo = self.re_open.search(c.text())
  363|             if open_mo:
  364|                # Open group. Name is remainder of line
  365|                 label = open_mo.group(1)
  366|                # The comment before the { becomes the group comment
  367|                if open_mo.start() > 0:
  368|                    text = c.text()[:open_mo.start()]
  369|                    comments.append(AST.Comment(text, c.file(), c.line()))
  370|                 group = AST.Group(decl.file(), decl.line(), decl.language(), "group", [label])
  371|                 group.comments()[:] = comments
  372|                 comments = []
  373|                # The comment after the { becomes the next comment to process
  374|                if open_mo.end() < len(c.text()):
  375|                    text = c.text()[open_mo.end()+1:]
  376|                    process_comments.insert(0, AST.Comment(text, c.file(), c.line()))
  377|                 self.push()
  378|                 self.__groups.append(group)
  379|                continue
  380|             close_mo = self.re_close.search(c.text())
  381|             if close_mo:
  382|                # Fixme: the close group doesn't handle things as well as open
  383|         # does!
  384|                 group = self.__groups.pop()
  385|                 group.declarations()[:] = self.currscope()
  386|                 self.pop(group)
  387|                # The comment before the } is ignored...? maybe post-comment?
  388|                # The comment after the } becomes the next comment to process
  389|                if close_mo.end() < len(c.text()):
  390|                    text = c.text()[close_mo.end()+1:]
  391|                    process_comments.insert(0, AST.Comment(text, c.file(), c.line()))
  392|             else: comments.append(c)
  393|         decl.comments()[:] = comments
  394|         self.add(decl)
  395|     def visitScope(self, scope):
  396|         """Visits all children of the scope in a new scope. The value of
  397|         currscope() at the end of the list is used to replace scope's list of
  398|         declarations - hence you can remove (or insert) declarations from the
  399|         list. Such as dummy declarations :)"""
  400|         self.push()
  401|         for decl in scope.declarations(): decl.accept(self)
  402|         scope.declarations()[:] = self.currscope()
  403|         self.pop(scope)
  404|     def visitEnum(self, enum):
  405|         """Does the same as visitScope, but for the enum's list of
  406|         enumerators"""
  407|         self.push()
  408|         for enumor in enum.enumerators(): enumor.accept(self)
  409|         enum.enumerators()[:] = self.currscope()
  410|         self.pop(enum)
  411|     def visitEnumerator(self, enumor):
  412|         """Removes dummy enumerators"""
  413|         if enumor.type() == "dummy": return #This wont work since Core.AST.Enumerator forces type to "enumerator"
  414|         if not len(enumor.name()): return # workaround.
  415|         self.add(enumor)
  416|
  417| class Summarizer (CommentProcessor):
  418|     """Splits comments into summary/detail parts."""
  419|     re_summary = r"[ \t\n]*(.*?\.)([ \t\n]|$)"
  420|     def __init__(self):
  421|         self.re_summary = re.compile(Summarizer.re_summary, re.S)
  422|     def process(self, decl):
  423|         """Combine and summarize the comments of this declaration."""
  424|         # First combine
  425|         comments = decl.comments()
  426|         if not len(comments):
  427|           return
  428|         comment = comments[0]
  429|         tags = comment.tags()
  430|         if len(comments) > 1:
  431|             # Should be rare to have >1 comment
  432|             for extra in comments[1:]:
  433|                tags.extend(extra.tags())
  434|                comment.set_text(comment.text() + extra.text())
  435|             del comments[1:]
  436|         # Now decide how much of the comment is the summary
  437|         text = comment.text()
  438|         mo = self.re_summary.match(text)
  439|         if mo:
  440|             # Set summary to the sentence
  441|             comment.set_summary(mo.group(1))
  442|         else:
  443|             # Set summary to whole text
  444|             comment.set_summary(text)
  445|
  446| class JavaTags (CommentProcessor):
  447|     """Extracts javadoc-style @tags from the end of comments."""
  448|
  449|     # The regexp to use for finding all the tags
  450|     _re_tags = '\n[ \t]*(?P<tags>@[a-zA-Z]+[ \t]+.*)'
  451|
  452|     def __init__(self):
  453|         self.re_tags = re.compile(self._re_tags,re.M|re.S)
  454|     def process(self, decl):
  455|         """Extract tags from each comment of the given decl"""
  456|         for comment in decl.comments():
  457|             # Find tags
  458|             text = comment.text()
  459|             mo = self.re_tags.search(text)
  460|             if not mo:
  461|                continue
  462|             # A lambda to use in the reduce expression
  463|             joiner = lambda x,y: len(y) and y[0]=='@' and x+[y] or x[:-1]+[x[-1]+' '+y]
  464|
  465|             tags = mo.group('tags')
  466|             text = text[:mo.start('tags')]
  467|             # Split the tag section into lines
  468|             tags = map(string.strip, string.split(tags,'\n'))
  469|             # Join non-tag lines to the previous tag
  470|             tags = reduce(joiner, tags, [])
  471|             # Split the tag lines into @name, rest-of-line pairs
  472|             tags = map(lambda line: string.split(line,' ',1), tags)
  473|             # Convert the pairs into CommentTag objects
  474|             tags = map(lambda pair: AST.CommentTag(pair[0], pair[1]), tags)
  475|             # Store back in comment
  476|             comment.set_text(text)
  477|             comment.tags().extend(tags)
  478|
  479| processors = {
  480|     'ssd': SSDComments,
  481|     'ss' : SSComments,
  482|     'java': JavaComments,
  483|     'qt': QtComments,
  484|     'dummy': Dummies,
  485|     'prev': Previous,
  486|     'group': Grouper,
  487|     'summary' : Summarizer,
  488|     'javatags' : JavaTags,
  489| }
  490|
  491| class Comments(Operation):
  492|     def __init__(self):
  493|         """Constructor, parses the config object"""
  494|         self.processor_list = []
  495|
  496|         if hasattr(config, 'comment_processors'):
  497|             for proc in config.comment_processors:
  498|                if type(proc) == types.StringType:
  499|                    if processors.has_key(proc):
  500|                       self.processor_list.append(processors[proc]())
  501|                else:
  502|                       raise ImportError, 'No such processor: %s'%(proc,)
  503|                elif type(proc) == types.TupleType:
  504|                    mod = Util._import(proc[0])
  505|                    clas = getattr(mod, proc[1])
  506|                    self.processor_list.append(clas())
  507|
  508|     def execute(self, ast):
  509|         declarations = ast.declarations()
  510|         for processor in self.processor_list:
  511|             processor.processAll(declarations)
  512|
  513| linkerOperation = Comments