Modules |
Files |
Inheritance Tree |
Inheritance Graph |
Name Index |
Config
File: Synopsis/Parser/Python/exparse.py
1| """Simple code to extract class & function docstrings from a module.
2|
3| This code is used as an example in the library reference manual in the
4| section on using the parser module. Refer to the manual for a thorough
5| discussion of the operation of this code.
6|
7| The code has been extended by Stephen Davies for the Synopsis project. It now
8| also recognises parameter names and values, and baseclasses. Names are now
9| returned in order also.
10| """
11|
12| import symbol
13| import token
14| import types
15| import string
16| import sys
17| import os
18|
19| import pprint
20|
21| # Path to the currently-being-processed package
22| packagepath = ''
23| # scoped name of the currently-being-processed package
24| packagename = []
25|
26| def findModulePath(module):
27| """Return the path for the given module"""
28| global packagepath, packagename
29| if not packagepath: return []
30| packagepath = packagepath+'/'
31| extensions = ['','.py','.pyc','.so']
32| # import from inside current package
33| for ext in extensions:
34| if os.path.exists(packagepath+module+ext):
35| return packagename+[module]
36| return [module]
37|
38| def format(tree, depth=-1):
39| """Format the given tree up to the given depth.
40| Numbers are replaced with their symbol or token names."""
41| if type(tree) == types.IntType:
42| try:
43| return symbol.sym_name[tree]
44| except KeyError:
45| try:
46| return token.tok_name[tree]
47| except KeyError:
48| return tree
49| if type(tree) != types.TupleType:
50| return tree
51| if depth == 0: return '...'
52| ret = [format(tree[0])]
53| for branch in tree[1:]:
54| ret.append(format(branch, depth-1))
55| return tuple(ret)
56|
57| def stringify(tree):
58| """Convert the given tree to a string"""
59| if type(tree) == types.IntType: return ''
60| if type(tree) != types.TupleType:
61| return str(tree)
62| strs = []
63| for elem in tree:
64| strs.append(stringify(elem))
65| return string.join(strs, '')
66|
67| def get_docs(file):
68| """Retrieve information from the parse tree of a source file.
69|
70| file
71| Name of the file to read Python source code from.
72| """
73| if type(file) == types.StringType: file = open(file)
74| source = file.read()
75| import os
76| basename = os.path.basename(os.path.splitext(file.name)[0])
77| import parser
78| ast = parser.suite(source)
79| tup = parser.ast2tuple(ast)
80| return ModuleInfo(tup, basename)
81|
82| name_filter = lambda x: x[0] == token.NAME
83| second_map = lambda x: x[1]
84| rest_map = lambda x: x[1:]
85| def filter_names(list): return filter(name_filter, list)
86| def map_second(list): return map(second_map, list)
87| def map_rest(list): return map(rest_map, list)
88| def get_names_only(list):
89| return map_second(filter_names(list))
90|
91|
92|
93| class SuiteInfoBase:
94| _docstring = ''
95| _name = ''
96|
97| def __init__(self, tree = None, env={}):
98| self._env = {} ; self._env.update(env)
99| self._class_info = {}
100| self._class_names = []
101| self._function_info = {}
102| self._function_names = []
103| if tree:
104| self._extract_info(tree)
105|
106| def _extract_info(self, tree):
107| # extract docstring
108| if len(tree) == 2:
109| found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
110| else:
111| found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
112| if found:
113| self._docstring = eval(vars['docstring'])
114| # discover inner definitions
115| for node in tree[1:]:
116| found, vars = match(COMPOUND_STMT_PATTERN, node)
117| if found:
118| cstmt = vars['compound']
119| if cstmt[0] == symbol.funcdef:
120| name = cstmt[2][1]
121| self._function_info[name] = FunctionInfo(cstmt, env=self._env)
122| self._function_names.append(name)
123| elif cstmt[0] == symbol.classdef:
124| name = cstmt[2][1]
125| self._class_info[name] = ClassInfo(cstmt, env=self._env)
126| self._class_names.append(name)
127| found, vars = match(IMPORT_STMT_PATTERN, node)
128| while found:
129| imp = vars['import_spec']
130| if imp[0] != symbol.import_stmt: break #while found
131| if imp[1][1] == 'import':
132| # import dotted_name
133| names = map_rest(filter(lambda x: x[0] == symbol.dotted_name, imp[2:]))
134| imps = map(get_names_only, names)
135| #print "import",imps
136| self._addImport(imps)
137| elif imp[1][1] == 'from':
138| # from dotted_name import name, name, ...
139| name = get_names_only(imp[2][1:])
140| imps = get_names_only(imp[4:])
141| #print "from",name,"import",imps
142| self._addFromImport(name, imps)
143| else:
144| print "Unknown import."
145| break #while found
146|
147| def _addImport(self, names):
148| for name in names:
149| link = findModulePath(name[0])
150| self._env[name[0]] = link
151| #print "",name[0],"->",link
152| def _addFromImport(self, module, names):
153| base = findModulePath(module[0]) + module[1:]
154| for name in names:
155| link = base + [name]
156| self._env[name] = link
157| #print "",name,"->",link
158|
159| def get_docstring(self):
160| return self._docstring
161|
162| def get_name(self):
163| return self._name
164|
165| def get_class_names(self):
166| return self._class_names
167|
168| def get_class_info(self, name):
169| return self._class_info[name]
170|
171| def __getitem__(self, name):
172| try:
173| return self._class_info[name]
174| except KeyError:
175| return self._function_info[name]
176|
177|
178| class SuiteFuncInfo:
179| # Mixin class providing access to function names and info.
180|
181| def get_function_names(self):
182| return self._function_names
183|
184| def get_function_info(self, name):
185| return self._function_info[name]
186|
187|
188| class FunctionInfo(SuiteInfoBase, SuiteFuncInfo):
189| def __init__(self, tree = None, env={}):
190| self._name = tree[2][1]
191| SuiteInfoBase.__init__(self, tree and tree[-1] or None, env)
192| self._params = []
193| self._param_defaults = []
194| if tree[3][0] == symbol.parameters:
195| if tree[3][2][0] == symbol.varargslist:
196| args = list(tree[3][2][1:])
197| while args:
198| if args[0][0] == token.COMMA:
199| pass
200| elif args[0][0] == symbol.fpdef:
201| self._params.append(stringify(args[0]))
202| self._param_defaults.append('')
203| elif args[0][0] == token.EQUAL:
204| del args[0]
205| self._param_defaults[-1] = stringify(args[0])
206| elif args[0][0] == token.DOUBLESTAR:
207| del args[0]
208| self._params.append('**'+stringify(args[0]))
209| self._param_defaults.append('')
210| elif args[0][0] == token.STAR:
211| del args[0]
212| self._params.append('*'+stringify(args[0]))
213| self._param_defaults.append('')
214| else:
215| print "Unknown symbol:",args[0]
216| del args[0]
217|
218| def get_params(self): return self._params
219| def get_param_defaults(self): return self._param_defaults
220|
221|
222| class ClassInfo(SuiteInfoBase):
223| def __init__(self, tree = None, env={}):
224| self._name = tree[2][1]
225| SuiteInfoBase.__init__(self, tree and tree[-1] or None, env)
226| self._bases = []
227| if tree[4][0] == symbol.testlist:
228| for test in tree[4][1:]:
229| found, vars = match(TEST_NAME_PATTERN, test)
230| if found and vars.has_key('power'):
231| power = vars['power']
232| if power[0] != symbol.power: continue
233| atom = power[1]
234| if atom[0] != symbol.atom: continue
235| if atom[1][0] != token.NAME: continue
236| name = [atom[1][1]]
237| for trailer in power[2:]:
238| if trailer[2][0] == token.NAME: name.append(trailer[2][1])
239| if self._env.has_key(name[0]):
240| name = self._env[name[0]] + name[1:]
241| self._bases.append(name)
242| #print "BASE:",name
243| else:
244| #print "BASE:",name[0]
245| self._bases.append(name[0])
246| else:
247| pass#pprint.pprint(format(tree[4]))
248|
249| def get_method_names(self):
250| return self._function_names
251|
252| def get_method_info(self, name):
253| return self._function_info[name]
254|
255| def get_base_names(self):
256| return self._bases
257|
258|
259| class ModuleInfo(SuiteInfoBase, SuiteFuncInfo):
260| def __init__(self, tree = None, name = "<string>"):
261| self._name = name
262| SuiteInfoBase.__init__(self, tree)
263| if tree:
264| found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
265| if found:
266| self._docstring = eval(vars["docstring"])
267|
268|
269| from types import ListType, TupleType
270|
271| def match(pattern, data, vars=None):
272| """Match `data' to `pattern', with variable extraction.
273|
274| pattern
275| Pattern to match against, possibly containing variables.
276|
277| data
278| Data to be checked and against which variables are extracted.
279|
280| vars
281| Dictionary of variables which have already been found. If not
282| provided, an empty dictionary is created.
283|
284| The `pattern' value may contain variables of the form ['varname'] which
285| are allowed to match anything. The value that is matched is returned as
286| part of a dictionary which maps 'varname' to the matched value. 'varname'
287| is not required to be a string object, but using strings makes patterns
288| and the code which uses them more readable.
289|
290| This function returns two values: a boolean indicating whether a match
291| was found and a dictionary mapping variable names to their associated
292| values.
293| """
294| if vars is None:
295| vars = {}
296| if type(pattern) is ListType: # 'variables' are ['varname']
297| vars[pattern[0]] = data
298| return 1, vars
299| if type(pattern) is not TupleType:
300| return (pattern == data), vars
301| if len(data) != len(pattern):
302| return 0, vars
303| for pattern, data in map(None, pattern, data):
304| same, vars = match(pattern, data, vars)
305| if not same:
306| break
307| return same, vars
308|
309|
310| def dmatch(pattern, data, vars=None):
311| """Debugging match """
312| if vars is None:
313| vars = {}
314| if type(pattern) is ListType: # 'variables' are ['varname']
315| vars[pattern[0]] = data
316| print "dmatch: pattern is list,",pattern[0],"=",data
317| return 1, vars
318| if type(pattern) is not TupleType:
319| print "dmatch: pattern is not tuple, pattern =",format(pattern)," data =",format(data)
320| return (pattern == data), vars
321| if len(data) != len(pattern):
322| print "dmatch: bad length. data=",format(data,2)," pattern=",format(pattern,1)
323| return 0, vars
324| for pattern, data in map(None, pattern, data):
325| same, vars = dmatch(pattern, data, vars)
326| if not same:
327| print "dmatch: not same"
328| break
329| print "dmatch: same so far"
330| print "dmatch: returning",same,vars
331| return same, vars
332|
333|
334| # This pattern identifies compound statements, allowing them to be readily
335| # differentiated from simple statements.
336| #
337| COMPOUND_STMT_PATTERN = (
338| symbol.stmt,
339| (symbol.compound_stmt, ['compound'])
340| )
341|
342|
343| # This pattern will match a 'stmt' node which *might* represent a docstring;
344| # docstrings require that the statement which provides the docstring be the
345| # first statement in the class or function, which this pattern does not check.
346| #
347| DOCSTRING_STMT_PATTERN = (
348| symbol.stmt,
349| (symbol.simple_stmt,
350| (symbol.small_stmt,
351| (symbol.expr_stmt,
352| (symbol.testlist,
353| (symbol.test,
354| (symbol.and_test,
355| (symbol.not_test,
356| (symbol.comparison,
357| (symbol.expr,
358| (symbol.xor_expr,
359| (symbol.and_expr,
360| (symbol.shift_expr,
361| (symbol.arith_expr,
362| (symbol.term,
363| (symbol.factor,
364| (symbol.power,
365| (symbol.atom,
366| (token.STRING, ['docstring'])
367| )))))))))))))))),
368| (token.NEWLINE, '')
369| ))
370|
371| # This pattern will match a 'test' node which is a base class
372| #
373| TEST_NAME_PATTERN = (
374| symbol.test,
375| (symbol.and_test,
376| (symbol.not_test,
377| (symbol.comparison,
378| (symbol.expr,
379| (symbol.xor_expr,
380| (symbol.and_expr,
381| (symbol.shift_expr,
382| (symbol.arith_expr,
383| (symbol.term,
384| (symbol.factor,
385| ['power']
386| ))))))))))
387| )
388|
389| # This pattern will match an import statement
390| # import_spec is either:
391| # NAME:import, dotted_name
392| # or:
393| # NAME:from, dotted_name, NAME:import, NAME [, COMMA, NAME]*
394| # hence you must process it manually (second form has variable length)
395| IMPORT_STMT_PATTERN = (
396| symbol.stmt, (
397| symbol.simple_stmt, (
398| symbol.small_stmt, ['import_spec']
399| ), (
400| token.NEWLINE, ''
401| )
402| )
403| )
404|
405|
406|
407| #
408| # end of file