buildframework/helium/external/python/lib/2.5/Sphinx-0.5.1-py2.5.egg/sphinx/search.py
author Alex Gilkes <alex.gilkes@nokia.com>
Wed, 28 Oct 2009 14:39:48 +0000
changeset 1 be27ed110b50
permissions -rw-r--r--
Bringing in Helium, imaker and cmaker
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     2
"""
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     3
    sphinx.search
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     4
    ~~~~~~~~~~~~~
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     5
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     6
    Create a search index for offline search.
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     7
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     8
    :copyright: 2007-2008 by Armin Ronacher.
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     9
    :license: BSD.
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    10
"""
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    11
import re
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    12
import cPickle as pickle
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    13
from cStringIO import StringIO
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    14
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    15
from docutils.nodes import Text, NodeVisitor
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    16
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    17
from sphinx.util.stemmer import PorterStemmer
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    18
from sphinx.util import jsdump, rpartition
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    19
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    20
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    21
word_re = re.compile(r'\w+(?u)')
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    22
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    23
stopwords = set("""
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    24
a  and  are  as  at
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    25
be  but  by
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    26
for
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    27
if  in  into  is  it
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    28
near  no  not
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    29
of  on  or
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    30
such
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    31
that  the  their  then  there  these  they  this  to
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    32
was  will  with
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    33
""".split())
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    34
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    35
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    36
class _JavaScriptIndex(object):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    37
    """
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    38
    The search index as javascript file that calls a function
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    39
    on the documentation search object to register the index.
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    40
    """
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    41
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    42
    PREFIX = 'Search.setIndex('
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    43
    SUFFIX = ')'
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    44
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    45
    def dumps(self, data):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    46
        return self.PREFIX + jsdump.dumps(data) + self.SUFFIX
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    47
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    48
    def loads(self, s):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    49
        data = s[len(self.PREFIX):-len(self.SUFFIX)]
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    50
        if not data or not s.startswith(self.PREFIX) or not \
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    51
           s.endswith(self.SUFFIX):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    52
            raise ValueError('invalid data')
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    53
        return jsdump.loads(data)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    54
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    55
    def dump(self, data, f):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    56
        f.write(self.dumps(data))
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    57
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    58
    def load(self, f):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    59
        return self.loads(f.read())
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    60
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    61
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    62
js_index = _JavaScriptIndex()
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    63
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    64
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    65
class Stemmer(PorterStemmer):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    66
    """
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    67
    All those porter stemmer implementations look hideous.
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    68
    make at least the stem method nicer.
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    69
    """
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    70
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    71
    def stem(self, word):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    72
        word = word.lower()
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    73
        return PorterStemmer.stem(self, word, 0, len(word) - 1)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    74
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    75
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    76
class WordCollector(NodeVisitor):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    77
    """
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    78
    A special visitor that collects words for the `IndexBuilder`.
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    79
    """
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    80
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    81
    def __init__(self, document):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    82
        NodeVisitor.__init__(self, document)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    83
        self.found_words = []
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    84
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    85
    def dispatch_visit(self, node):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    86
        if node.__class__ is Text:
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    87
            self.found_words.extend(word_re.findall(node.astext()))
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    88
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    89
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    90
class IndexBuilder(object):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    91
    """
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    92
    Helper class that creates a searchindex based on the doctrees
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    93
    passed to the `feed` method.
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    94
    """
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    95
    formats = {
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    96
        'jsdump':   jsdump,
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    97
        'pickle':   pickle
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    98
    }
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    99
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   100
    def __init__(self, env):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   101
        self.env = env
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   102
        self._stemmer = Stemmer()
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   103
        # filename -> title
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   104
        self._titles = {}
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   105
        # stemmed word -> set(filenames)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   106
        self._mapping = {}
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   107
        # desctypes -> index
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   108
        self._desctypes = {}
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   109
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   110
    def load(self, stream, format):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   111
        """Reconstruct from frozen data."""
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   112
        if isinstance(format, basestring):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   113
            format = self.formats[format]
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   114
        frozen = format.load(stream)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   115
        # if an old index is present, we treat it as not existing.
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   116
        if not isinstance(frozen, dict):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   117
            raise ValueError('old format')
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   118
        index2fn = frozen['filenames']
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   119
        self._titles = dict(zip(index2fn, frozen['titles']))
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   120
        self._mapping = {}
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   121
        for k, v in frozen['terms'].iteritems():
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   122
            if isinstance(v, int):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   123
                self._mapping[k] = set([index2fn[v]])
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   124
            else:
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   125
                self._mapping[k] = set(index2fn[i] for i in v)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   126
        # no need to load keywords/desctypes
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   127
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   128
    def dump(self, stream, format):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   129
        """Dump the frozen index to a stream."""
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   130
        if isinstance(format, basestring):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   131
            format = self.formats[format]
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   132
        format.dump(self.freeze(), stream)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   133
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   134
    def get_modules(self, fn2index):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   135
        rv = {}
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   136
        for name, (doc, _, _, _) in self.env.modules.iteritems():
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   137
            rv[name] = fn2index[doc]
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   138
        return rv
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   139
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   140
    def get_descrefs(self, fn2index):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   141
        rv = {}
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   142
        dt = self._desctypes
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   143
        for fullname, (doc, desctype) in self.env.descrefs.iteritems():
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   144
            prefix, name = rpartition(fullname, '.')
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   145
            pdict = rv.setdefault(prefix, {})
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   146
            try:
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   147
                i = dt[desctype]
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   148
            except KeyError:
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   149
                i = len(dt)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   150
                dt[desctype] = i
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   151
            pdict[name] = (fn2index[doc], i)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   152
        return rv
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   153
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   154
    def get_terms(self, fn2index):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   155
        rv = {}
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   156
        for k, v in self._mapping.iteritems():
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   157
            if len(v) == 1:
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   158
                fn, = v
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   159
                rv[k] = fn2index[fn]
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   160
            else:
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   161
                rv[k] = [fn2index[fn] for fn in v]
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   162
        return rv
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   163
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   164
    def freeze(self):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   165
        """Create a usable data structure for serializing."""
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   166
        filenames = self._titles.keys()
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   167
        titles = self._titles.values()
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   168
        fn2index = dict((f, i) for (i, f) in enumerate(filenames))
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   169
        return dict(
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   170
            filenames=filenames,
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   171
            titles=titles,
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   172
            terms=self.get_terms(fn2index),
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   173
            descrefs=self.get_descrefs(fn2index),
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   174
            modules=self.get_modules(fn2index),
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   175
            desctypes=dict((v, k) for (k, v) in self._desctypes.items()),
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   176
        )
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   177
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   178
    def prune(self, filenames):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   179
        """Remove data for all filenames not in the list."""
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   180
        new_titles = {}
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   181
        for filename in filenames:
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   182
            if filename in self._titles:
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   183
                new_titles[filename] = self._titles[filename]
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   184
        self._titles = new_titles
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   185
        for wordnames in self._mapping.itervalues():
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   186
            wordnames.intersection_update(filenames)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   187
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   188
    def feed(self, filename, title, doctree):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   189
        """Feed a doctree to the index."""
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   190
        self._titles[filename] = title
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   191
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   192
        visitor = WordCollector(doctree)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   193
        doctree.walk(visitor)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   194
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   195
        def add_term(word, prefix='', stem=self._stemmer.stem):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   196
            word = stem(word)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   197
            if len(word) < 3 or word in stopwords or word.isdigit():
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   198
                return
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   199
            self._mapping.setdefault(prefix + word, set()).add(filename)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   200
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   201
        for word in word_re.findall(title):
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   202
            add_term(word)
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   203
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   204
        for word in visitor.found_words:
be27ed110b50 Bringing in Helium, imaker and cmaker
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   205
            add_term(word)