--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/buildframework/helium/external/python/lib/2.5/Sphinx-0.5.1-py2.5.egg/sphinx/search.py Wed Oct 28 14:39:48 2009 +0000
@@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+"""
+ sphinx.search
+ ~~~~~~~~~~~~~
+
+ Create a search index for offline search.
+
+ :copyright: 2007-2008 by Armin Ronacher.
+ :license: BSD.
+"""
+import re
+import cPickle as pickle
+from cStringIO import StringIO
+
+from docutils.nodes import Text, NodeVisitor
+
+from sphinx.util.stemmer import PorterStemmer
+from sphinx.util import jsdump, rpartition
+
+
+word_re = re.compile(r'\w+(?u)')
+
+stopwords = set("""
+a and are as at
+be but by
+for
+if in into is it
+near no not
+of on or
+such
+that the their then there these they this to
+was will with
+""".split())
+
+
+class _JavaScriptIndex(object):
+ """
+ The search index as javascript file that calls a function
+ on the documentation search object to register the index.
+ """
+
+ PREFIX = 'Search.setIndex('
+ SUFFIX = ')'
+
+ def dumps(self, data):
+ return self.PREFIX + jsdump.dumps(data) + self.SUFFIX
+
+ def loads(self, s):
+ data = s[len(self.PREFIX):-len(self.SUFFIX)]
+ if not data or not s.startswith(self.PREFIX) or not \
+ s.endswith(self.SUFFIX):
+ raise ValueError('invalid data')
+ return jsdump.loads(data)
+
+ def dump(self, data, f):
+ f.write(self.dumps(data))
+
+ def load(self, f):
+ return self.loads(f.read())
+
+
+js_index = _JavaScriptIndex()
+
+
+class Stemmer(PorterStemmer):
+ """
+ All those porter stemmer implementations look hideous.
+ make at least the stem method nicer.
+ """
+
+ def stem(self, word):
+ word = word.lower()
+ return PorterStemmer.stem(self, word, 0, len(word) - 1)
+
+
+class WordCollector(NodeVisitor):
+ """
+ A special visitor that collects words for the `IndexBuilder`.
+ """
+
+ def __init__(self, document):
+ NodeVisitor.__init__(self, document)
+ self.found_words = []
+
+ def dispatch_visit(self, node):
+ if node.__class__ is Text:
+ self.found_words.extend(word_re.findall(node.astext()))
+
+
+class IndexBuilder(object):
+ """
+ Helper class that creates a searchindex based on the doctrees
+ passed to the `feed` method.
+ """
+ formats = {
+ 'jsdump': jsdump,
+ 'pickle': pickle
+ }
+
+ def __init__(self, env):
+ self.env = env
+ self._stemmer = Stemmer()
+ # filename -> title
+ self._titles = {}
+ # stemmed word -> set(filenames)
+ self._mapping = {}
+ # desctypes -> index
+ self._desctypes = {}
+
+ def load(self, stream, format):
+ """Reconstruct from frozen data."""
+ if isinstance(format, basestring):
+ format = self.formats[format]
+ frozen = format.load(stream)
+ # if an old index is present, we treat it as not existing.
+ if not isinstance(frozen, dict):
+ raise ValueError('old format')
+ index2fn = frozen['filenames']
+ self._titles = dict(zip(index2fn, frozen['titles']))
+ self._mapping = {}
+ for k, v in frozen['terms'].iteritems():
+ if isinstance(v, int):
+ self._mapping[k] = set([index2fn[v]])
+ else:
+ self._mapping[k] = set(index2fn[i] for i in v)
+ # no need to load keywords/desctypes
+
+ def dump(self, stream, format):
+ """Dump the frozen index to a stream."""
+ if isinstance(format, basestring):
+ format = self.formats[format]
+ format.dump(self.freeze(), stream)
+
+ def get_modules(self, fn2index):
+ rv = {}
+ for name, (doc, _, _, _) in self.env.modules.iteritems():
+ rv[name] = fn2index[doc]
+ return rv
+
+ def get_descrefs(self, fn2index):
+ rv = {}
+ dt = self._desctypes
+ for fullname, (doc, desctype) in self.env.descrefs.iteritems():
+ prefix, name = rpartition(fullname, '.')
+ pdict = rv.setdefault(prefix, {})
+ try:
+ i = dt[desctype]
+ except KeyError:
+ i = len(dt)
+ dt[desctype] = i
+ pdict[name] = (fn2index[doc], i)
+ return rv
+
+ def get_terms(self, fn2index):
+ rv = {}
+ for k, v in self._mapping.iteritems():
+ if len(v) == 1:
+ fn, = v
+ rv[k] = fn2index[fn]
+ else:
+ rv[k] = [fn2index[fn] for fn in v]
+ return rv
+
+ def freeze(self):
+ """Create a usable data structure for serializing."""
+ filenames = self._titles.keys()
+ titles = self._titles.values()
+ fn2index = dict((f, i) for (i, f) in enumerate(filenames))
+ return dict(
+ filenames=filenames,
+ titles=titles,
+ terms=self.get_terms(fn2index),
+ descrefs=self.get_descrefs(fn2index),
+ modules=self.get_modules(fn2index),
+ desctypes=dict((v, k) for (k, v) in self._desctypes.items()),
+ )
+
+ def prune(self, filenames):
+ """Remove data for all filenames not in the list."""
+ new_titles = {}
+ for filename in filenames:
+ if filename in self._titles:
+ new_titles[filename] = self._titles[filename]
+ self._titles = new_titles
+ for wordnames in self._mapping.itervalues():
+ wordnames.intersection_update(filenames)
+
+ def feed(self, filename, title, doctree):
+ """Feed a doctree to the index."""
+ self._titles[filename] = title
+
+ visitor = WordCollector(doctree)
+ doctree.walk(visitor)
+
+ def add_term(word, prefix='', stem=self._stemmer.stem):
+ word = stem(word)
+ if len(word) < 3 or word in stopwords or word.isdigit():
+ return
+ self._mapping.setdefault(prefix + word, set()).add(filename)
+
+ for word in word_re.findall(title):
+ add_term(word)
+
+ for word in visitor.found_words:
+ add_term(word)