src/extras/examples/dumbfeedparser.py
author Vijayan <ts.vijayan@nokia.com>
Tue, 16 Feb 2010 10:07:05 +0530
changeset 0 ca70ae20a155
permissions -rw-r--r--
Base Python2.0 code
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
     1
# A simple and limited RSS feed parser used in the RSS reader example.
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
     2
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
     3
# Copyright (c) 2005 Nokia Corporation
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
     4
#
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
     5
# Licensed under the Apache License, Version 2.0 (the "License");
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
     6
# you may not use this file except in compliance with the License.
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
     7
# You may obtain a copy of the License at
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
     8
#
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
     9
#     http://www.apache.org/licenses/LICENSE-2.0
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    10
#
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    11
# Unless required by applicable law or agreed to in writing, software
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    12
# distributed under the License is distributed on an "AS IS" BASIS,
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    13
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    14
# See the License for the specific language governing permissions and
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    15
# limitations under the License.
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    16
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    17
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    18
import re
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    19
import urllib
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    20
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    21
def parse(url):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    22
    return parse_feed(urllib.urlopen(url).read())
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    23
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    24
def parse_feed(text):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    25
    feed={}
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    26
    items=[]
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    27
    currentitem=[{}]
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    28
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    29
    def clean_entities(text): return re.sub('&[#0-9a-z]+;','?',text)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    30
    def clean_lf(text): return re.sub('[\n\t\r]',' ',text)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    31
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    32
    def end_a(tag,content): write('LINK(%s)'%gettext())
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    33
    def start_item(tag,content):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    34
        gettext()
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    35
        write(content)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    36
        currentitem[0]={}
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    37
    def end_item(tag,content):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    38
        items.append(currentitem[0])
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    39
        currentitem[0]={}
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    40
    def end_link(tag,content):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    41
        if within('item'):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    42
            currentitem[0]['link']=gettext()
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    43
    def end_description(tag,content):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    44
        if within('item'):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    45
            currentitem[0]['summary']=clean_entities(gettext())
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    46
    def end_title(tag,content):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    47
        text=clean_lf(gettext()).strip()
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    48
        if within('item'):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    49
            currentitem[0]['title']=text
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    50
        elif parentis('channel'):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    51
            feed['title']=text
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    52
            
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    53
    tagre=re.compile('([^ \n\t]+)(.*)>(.*)',re.S)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    54
    tagpath=[]
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    55
    textbuffer=[[]]
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    56
    assumed_encoding='latin-1'
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    57
    lines=text.split('<')
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    58
    def start_default(tag,content): write(content)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    59
    def end_default(tag,content): pass
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    60
    def tag_default(tag,content): pass
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    61
    def write(text): textbuffer[0].append(text)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    62
    def gettext():
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    63
        text=''.join(textbuffer[0])
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    64
        textbuffer[0]=[]
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    65
        return unicode(text,assumed_encoding)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    66
    def current_tag(): return tagpath[-1]
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    67
    def current_path(): return '/'.join(tagpath)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    68
    def within(tag): return tag in tagpath
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    69
    def parentis(tag): return current_tag()==tag
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    70
    for k in lines:
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    71
        m=tagre.match(k)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    72
        if m:
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    73
            (tag,attributes,content)=m.groups()
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    74
            if tag.startswith('?'):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    75
                continue
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    76
            if tag.startswith('/'):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    77
                tagname=tag[1:]
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    78
                handler='end_%s'%tagname
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    79
                generic_handler=end_default
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    80
                if current_tag() != tagname:
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    81
                    pass # Unbalanced tags, just ignore for now.
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    82
                del tagpath[-1]
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    83
            elif tag.endswith('/'):
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    84
                tagname=tag[0:-1]
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    85
                handler='tag_%s'%tagname
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    86
                generic_handler=tag_default
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    87
            else:
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    88
                tagname=tag
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    89
                handler='start_%s'%tagname
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    90
                generic_handler=start_default
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    91
                tagpath.append(tagname)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    92
            locals().get(handler,generic_handler)(tagname,content)
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    93
        else:
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    94
            pass # Malformed line, just ignore.
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    95
        
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    96
    feed['entries']=items
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    97
    return feed
ca70ae20a155 Base Python2.0 code
Vijayan <ts.vijayan@nokia.com>
parents:
diff changeset
    98