|
|
Custom python docstring checks with epydoc 3.0.1From ThisBlueWikiIf you use epydoc to generate documentation for your Python projects, you may also find the The problem is that, as of epydoc 3.0.1, there's no easy way to specify the granularity of the checks you'd like done (what pieces of code to check and what to require as far as documentation completeness) and get a boolean result out of the command line parser. This limits the usefulness of the check features in things like pre-commit hooks, etc. The other problem is that the epydoc API in 3.0.1 is still quite lacking, and a lot of the docstrings are simply untrue (e.g. methods don't return what they claim, and so forth). So it's difficult to accomplish this seemingly simple task using the API. I'm using epydoc to check for a certain minimum standard of documentation before submitting code back to my repositories, and after days of puzzling through its internals, I came up with some quick and dirty Python code that returns a boolean based on the completeness of the documentation for a given project path. This has since been extended quite a bit, but the code itself isn't terribly complex, so if you look through it you should be able to see how it can be molded to your needs. Note that epydoc occasionally starts parsing outside the tree and complains about things like Object not being documented, etc. Hence the code for inclusion and exclusion based on regexes. You should of course replace the default inclusion regex of "myproject" either with the name of your top-level project package, or a dynamically generated string which is determined based on the class MyDocChecker(): """ Interacts with epydoc to perform the following functions: - Check the completeness of documentation for a given source tree """ def __init__(self): pass def check_completeness(self, package_root, checks=None, include_regexes=['^myproject'], exclude_regexes=None): """ Checks documentation completeness using epydoc. @param package_root: The path to the top of a python package directory to check. @param checks: A dict of the checks to run on each of keys "module", "class", "function". For example: C{checks={"module" : DocChecker.AUTHOR | DocChecker.DESCR, "class" : None, "function" : DocChecker.PARAM | DocChecker.RETURN,}} This will check that all modules have an author and a description, will not run any checks on classes, and will check that all functions have their parameters and return values documented. See the beginning of this method's source code as well as http://epydoc.sourceforge.net/api/epydoc.checker.DocChecker-class.html for details. This parameter must either be None, or have all three keys defined. The values for any key can be None, in which case all checks for that type of item will be skipped. @param include_regexes: A list of regex strings which items must match in order to be checked. For example, "^mypackage\.database.*" would include all items whose names start with mypackage.database. @param exclude_regexes: A list of regex strings against which items will be matched and, if matching, be excluded. Excludes take precedence over includes. For example, if include_regexes contains "^mypackage\.database.*" and exclude_regexes contains ".*\.ObjectNotFound$", then an item named mypackage.database.loader would be included, but mypackage.database.loader.ObjectNotFound would be excluded from checks. @return: A tuple: (True if documentation was all okay, string containing output messages) @rtype: tuple """ from epydoc.docbuilder import build_doc_index from epydoc.checker import DocChecker from epydoc.apidoc import DottedName messages = "" if checks is None: # set reasonable defaults for what to check for the presence of. # see also http://epydoc.sourceforge.net/api/epydoc.checker.DocChecker-class.html module_checks = DocChecker.MODULE | DocChecker.DESCR class_checks = DocChecker.CLASS | DocChecker.DESCR function_checks = DocChecker.FUNC | DocChecker.DESCR | DocChecker.RETURN | DocChecker.PARAM else: # will raise a KeyError if any of these are not defined if checks["module"] is not 0 and checks["module"] is not None: module_checks = DocChecker.MODULE | checks["module"] else: module_checks = None if checks["class"] is not 0 and checks["class"] is not None: class_checks = DocChecker.CLASS | checks["class"] else: class_checks = None if checks["function"] is not 0 and checks["function"] is not None: function_checks = DocChecker.FUNC | checks["function"] else: function_checks = None docindex = build_doc_index((package_root,)) checker = DocChecker(docindex) no_problems = True module_messages = {} class_messages = {} function_messages = {} try: if module_checks is not None: checker.check(module_checks,) for (warning, module_set) in checker._warnings.iteritems(): module_messages[warning] = [] for item in module_set: if self._has_name_match(item, include_regexes) is True and self._has_name_match(item, exclude_regexes) is False: module_messages[warning].append(str(item.canonical_name)) no_problems = False if class_checks is not None: checker.check(class_checks,) for (warning, module_set) in checker._warnings.iteritems(): class_messages[warning] = [] for item in module_set: if self._has_name_match(item, include_regexes) is True and self._has_name_match(item, exclude_regexes) is False: class_messages[warning].append(str(item.canonical_name)) no_problems = False if function_checks is not None: checker.check(function_checks,) for (warning, module_set) in checker._warnings.iteritems(): function_messages[warning] = [] for item in module_set: if self._has_name_match(item, include_regexes) is True and self._has_name_match(item, exclude_regexes) is False: function_messages[warning].append(str(item.canonical_name)) no_problems = False except AttributeError, e: raise Exception, "Caught an exception checking documentation in directory %s (perhaps this directory doesn't exist or doesn't contain a python package?): %s" % (package_root, e) except: raise if no_problems: messages = "Documentation appears to be complete. Documentation for items matching these regexes was not checked:\n\n%s" % exclude_regexes else: if module_messages is not []: messages = "MODULES\n" for warning in module_messages: messages += " " + warning + "\n" for item in sorted(module_messages[warning]): messages += " " + item + "\n" if class_messages is not []: messages += "CLASSES\n" for warning in class_messages: messages += " " + warning + "\n" for item in sorted(class_messages[warning]): messages += " " + item + "\n" if function_messages is not []: messages += "FUNCTIONS\n" for warning in function_messages: messages += " " + warning + "\n" for item in sorted(function_messages[warning]): messages += " " + item + "\n" return (no_problems, messages) check_completeness = classmethod(check_completeness) def _has_name_match(self, item, regexes): """ Checks if the canonical name of item matches any regexes in the list regexes. @param item: An instance of epydoc.apidoc.ValueDoc. @param exclusions: A list of regex strings against which items will be matched and, if matching, cause the function to return True. @return: True if item is matched by at least one regex in regexes. @rtype: boolean """ if regexes is None: return False import re for pattern in regexes: if re.search(pattern, str(item.canonical_name)) is not None: return True return False _has_name_match = classmethod(_has_name_match) |