buildframework/helium/external/python/lib/2.5/Sphinx-0.5.1-py2.5.egg/sphinx/search.py
changeset 1 be27ed110b50
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/buildframework/helium/external/python/lib/2.5/Sphinx-0.5.1-py2.5.egg/sphinx/search.py	Wed Oct 28 14:39:48 2009 +0000
@@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+"""
+    sphinx.search
+    ~~~~~~~~~~~~~
+
+    Create a search index for offline search.
+
+    :copyright: 2007-2008 by Armin Ronacher.
+    :license: BSD.
+"""
+import re
+import cPickle as pickle
+from cStringIO import StringIO
+
+from docutils.nodes import Text, NodeVisitor
+
+from sphinx.util.stemmer import PorterStemmer
+from sphinx.util import jsdump, rpartition
+
+
+word_re = re.compile(r'\w+(?u)')
+
+stopwords = set("""
+a  and  are  as  at
+be  but  by
+for
+if  in  into  is  it
+near  no  not
+of  on  or
+such
+that  the  their  then  there  these  they  this  to
+was  will  with
+""".split())
+
+
+class _JavaScriptIndex(object):
+    """
+    The search index as javascript file that calls a function
+    on the documentation search object to register the index.
+    """
+
+    PREFIX = 'Search.setIndex('
+    SUFFIX = ')'
+
+    def dumps(self, data):
+        return self.PREFIX + jsdump.dumps(data) + self.SUFFIX
+
+    def loads(self, s):
+        data = s[len(self.PREFIX):-len(self.SUFFIX)]
+        if not data or not s.startswith(self.PREFIX) or not \
+           s.endswith(self.SUFFIX):
+            raise ValueError('invalid data')
+        return jsdump.loads(data)
+
+    def dump(self, data, f):
+        f.write(self.dumps(data))
+
+    def load(self, f):
+        return self.loads(f.read())
+
+
+js_index = _JavaScriptIndex()
+
+
+class Stemmer(PorterStemmer):
+    """
+    All those porter stemmer implementations look hideous.
+    make at least the stem method nicer.
+    """
+
+    def stem(self, word):
+        word = word.lower()
+        return PorterStemmer.stem(self, word, 0, len(word) - 1)
+
+
+class WordCollector(NodeVisitor):
+    """
+    A special visitor that collects words for the `IndexBuilder`.
+    """
+
+    def __init__(self, document):
+        NodeVisitor.__init__(self, document)
+        self.found_words = []
+
+    def dispatch_visit(self, node):
+        if node.__class__ is Text:
+            self.found_words.extend(word_re.findall(node.astext()))
+
+
+class IndexBuilder(object):
+    """
+    Helper class that creates a searchindex based on the doctrees
+    passed to the `feed` method.
+    """
+    formats = {
+        'jsdump':   jsdump,
+        'pickle':   pickle
+    }
+
+    def __init__(self, env):
+        self.env = env
+        self._stemmer = Stemmer()
+        # filename -> title
+        self._titles = {}
+        # stemmed word -> set(filenames)
+        self._mapping = {}
+        # desctypes -> index
+        self._desctypes = {}
+
+    def load(self, stream, format):
+        """Reconstruct from frozen data."""
+        if isinstance(format, basestring):
+            format = self.formats[format]
+        frozen = format.load(stream)
+        # if an old index is present, we treat it as not existing.
+        if not isinstance(frozen, dict):
+            raise ValueError('old format')
+        index2fn = frozen['filenames']
+        self._titles = dict(zip(index2fn, frozen['titles']))
+        self._mapping = {}
+        for k, v in frozen['terms'].iteritems():
+            if isinstance(v, int):
+                self._mapping[k] = set([index2fn[v]])
+            else:
+                self._mapping[k] = set(index2fn[i] for i in v)
+        # no need to load keywords/desctypes
+
+    def dump(self, stream, format):
+        """Dump the frozen index to a stream."""
+        if isinstance(format, basestring):
+            format = self.formats[format]
+        format.dump(self.freeze(), stream)
+
+    def get_modules(self, fn2index):
+        rv = {}
+        for name, (doc, _, _, _) in self.env.modules.iteritems():
+            rv[name] = fn2index[doc]
+        return rv
+
+    def get_descrefs(self, fn2index):
+        rv = {}
+        dt = self._desctypes
+        for fullname, (doc, desctype) in self.env.descrefs.iteritems():
+            prefix, name = rpartition(fullname, '.')
+            pdict = rv.setdefault(prefix, {})
+            try:
+                i = dt[desctype]
+            except KeyError:
+                i = len(dt)
+                dt[desctype] = i
+            pdict[name] = (fn2index[doc], i)
+        return rv
+
+    def get_terms(self, fn2index):
+        rv = {}
+        for k, v in self._mapping.iteritems():
+            if len(v) == 1:
+                fn, = v
+                rv[k] = fn2index[fn]
+            else:
+                rv[k] = [fn2index[fn] for fn in v]
+        return rv
+
+    def freeze(self):
+        """Create a usable data structure for serializing."""
+        filenames = self._titles.keys()
+        titles = self._titles.values()
+        fn2index = dict((f, i) for (i, f) in enumerate(filenames))
+        return dict(
+            filenames=filenames,
+            titles=titles,
+            terms=self.get_terms(fn2index),
+            descrefs=self.get_descrefs(fn2index),
+            modules=self.get_modules(fn2index),
+            desctypes=dict((v, k) for (k, v) in self._desctypes.items()),
+        )
+
+    def prune(self, filenames):
+        """Remove data for all filenames not in the list."""
+        new_titles = {}
+        for filename in filenames:
+            if filename in self._titles:
+                new_titles[filename] = self._titles[filename]
+        self._titles = new_titles
+        for wordnames in self._mapping.itervalues():
+            wordnames.intersection_update(filenames)
+
+    def feed(self, filename, title, doctree):
+        """Feed a doctree to the index."""
+        self._titles[filename] = title
+
+        visitor = WordCollector(doctree)
+        doctree.walk(visitor)
+
+        def add_term(word, prefix='', stem=self._stemmer.stem):
+            word = stem(word)
+            if len(word) < 3 or word in stopwords or word.isdigit():
+                return
+            self._mapping.setdefault(prefix + word, set()).add(filename)
+
+        for word in word_re.findall(title):
+            add_term(word)
+
+        for word in visitor.found_words:
+            add_term(word)