buildframework/helium/external/python/lib/2.5/Sphinx-0.5.1-py2.5.egg/sphinx/search.py
changeset 179 d8ac696cc51f
parent 1 be27ed110b50
child 180 e02a83d4c571
child 592 3215c239276a
--- a/buildframework/helium/external/python/lib/2.5/Sphinx-0.5.1-py2.5.egg/sphinx/search.py	Wed Oct 28 14:39:48 2009 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,205 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-    sphinx.search
-    ~~~~~~~~~~~~~
-
-    Create a search index for offline search.
-
-    :copyright: 2007-2008 by Armin Ronacher.
-    :license: BSD.
-"""
-import re
-import cPickle as pickle
-from cStringIO import StringIO
-
-from docutils.nodes import Text, NodeVisitor
-
-from sphinx.util.stemmer import PorterStemmer
-from sphinx.util import jsdump, rpartition
-
-
-word_re = re.compile(r'\w+(?u)')
-
-stopwords = set("""
-a  and  are  as  at
-be  but  by
-for
-if  in  into  is  it
-near  no  not
-of  on  or
-such
-that  the  their  then  there  these  they  this  to
-was  will  with
-""".split())
-
-
-class _JavaScriptIndex(object):
-    """
-    The search index as javascript file that calls a function
-    on the documentation search object to register the index.
-    """
-
-    PREFIX = 'Search.setIndex('
-    SUFFIX = ')'
-
-    def dumps(self, data):
-        return self.PREFIX + jsdump.dumps(data) + self.SUFFIX
-
-    def loads(self, s):
-        data = s[len(self.PREFIX):-len(self.SUFFIX)]
-        if not data or not s.startswith(self.PREFIX) or not \
-           s.endswith(self.SUFFIX):
-            raise ValueError('invalid data')
-        return jsdump.loads(data)
-
-    def dump(self, data, f):
-        f.write(self.dumps(data))
-
-    def load(self, f):
-        return self.loads(f.read())
-
-
-js_index = _JavaScriptIndex()
-
-
-class Stemmer(PorterStemmer):
-    """
-    All those porter stemmer implementations look hideous.
-    make at least the stem method nicer.
-    """
-
-    def stem(self, word):
-        word = word.lower()
-        return PorterStemmer.stem(self, word, 0, len(word) - 1)
-
-
-class WordCollector(NodeVisitor):
-    """
-    A special visitor that collects words for the `IndexBuilder`.
-    """
-
-    def __init__(self, document):
-        NodeVisitor.__init__(self, document)
-        self.found_words = []
-
-    def dispatch_visit(self, node):
-        if node.__class__ is Text:
-            self.found_words.extend(word_re.findall(node.astext()))
-
-
-class IndexBuilder(object):
-    """
-    Helper class that creates a searchindex based on the doctrees
-    passed to the `feed` method.
-    """
-    formats = {
-        'jsdump':   jsdump,
-        'pickle':   pickle
-    }
-
-    def __init__(self, env):
-        self.env = env
-        self._stemmer = Stemmer()
-        # filename -> title
-        self._titles = {}
-        # stemmed word -> set(filenames)
-        self._mapping = {}
-        # desctypes -> index
-        self._desctypes = {}
-
-    def load(self, stream, format):
-        """Reconstruct from frozen data."""
-        if isinstance(format, basestring):
-            format = self.formats[format]
-        frozen = format.load(stream)
-        # if an old index is present, we treat it as not existing.
-        if not isinstance(frozen, dict):
-            raise ValueError('old format')
-        index2fn = frozen['filenames']
-        self._titles = dict(zip(index2fn, frozen['titles']))
-        self._mapping = {}
-        for k, v in frozen['terms'].iteritems():
-            if isinstance(v, int):
-                self._mapping[k] = set([index2fn[v]])
-            else:
-                self._mapping[k] = set(index2fn[i] for i in v)
-        # no need to load keywords/desctypes
-
-    def dump(self, stream, format):
-        """Dump the frozen index to a stream."""
-        if isinstance(format, basestring):
-            format = self.formats[format]
-        format.dump(self.freeze(), stream)
-
-    def get_modules(self, fn2index):
-        rv = {}
-        for name, (doc, _, _, _) in self.env.modules.iteritems():
-            rv[name] = fn2index[doc]
-        return rv
-
-    def get_descrefs(self, fn2index):
-        rv = {}
-        dt = self._desctypes
-        for fullname, (doc, desctype) in self.env.descrefs.iteritems():
-            prefix, name = rpartition(fullname, '.')
-            pdict = rv.setdefault(prefix, {})
-            try:
-                i = dt[desctype]
-            except KeyError:
-                i = len(dt)
-                dt[desctype] = i
-            pdict[name] = (fn2index[doc], i)
-        return rv
-
-    def get_terms(self, fn2index):
-        rv = {}
-        for k, v in self._mapping.iteritems():
-            if len(v) == 1:
-                fn, = v
-                rv[k] = fn2index[fn]
-            else:
-                rv[k] = [fn2index[fn] for fn in v]
-        return rv
-
-    def freeze(self):
-        """Create a usable data structure for serializing."""
-        filenames = self._titles.keys()
-        titles = self._titles.values()
-        fn2index = dict((f, i) for (i, f) in enumerate(filenames))
-        return dict(
-            filenames=filenames,
-            titles=titles,
-            terms=self.get_terms(fn2index),
-            descrefs=self.get_descrefs(fn2index),
-            modules=self.get_modules(fn2index),
-            desctypes=dict((v, k) for (k, v) in self._desctypes.items()),
-        )
-
-    def prune(self, filenames):
-        """Remove data for all filenames not in the list."""
-        new_titles = {}
-        for filename in filenames:
-            if filename in self._titles:
-                new_titles[filename] = self._titles[filename]
-        self._titles = new_titles
-        for wordnames in self._mapping.itervalues():
-            wordnames.intersection_update(filenames)
-
-    def feed(self, filename, title, doctree):
-        """Feed a doctree to the index."""
-        self._titles[filename] = title
-
-        visitor = WordCollector(doctree)
-        doctree.walk(visitor)
-
-        def add_term(word, prefix='', stem=self._stemmer.stem):
-            word = stem(word)
-            if len(word) < 3 or word in stopwords or word.isdigit():
-                return
-            self._mapping.setdefault(prefix + word, set()).add(filename)
-
-        for word in word_re.findall(title):
-            add_term(word)
-
-        for word in visitor.found_words:
-            add_term(word)