FCL/sf/incubator/python: comparison src/extras/examples/dumbfeedparser.py

equal deleted inserted replaced

--1:000000000000
+:ca70ae20a155
+# A simple and limited RSS feed parser used in the RSS reader example.
+# Copyright (c) 2005 Nokia Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+import urllib
+def parse(url):
+return parse_feed(urllib.urlopen(url).read())
+def parse_feed(text):
+feed={}
+items=[]
+currentitem=[{}]
+def clean_entities(text): return re.sub('&[#0-9a-z]+;','?',text)
+def clean_lf(text): return re.sub('[\n\t\r]',' ',text)
+def end_a(tag,content): write('LINK(%s)'%gettext())
+def start_item(tag,content):
+gettext()
+write(content)
+currentitem[0]={}
+def end_item(tag,content):
+items.append(currentitem[0])
+currentitem[0]={}
+def end_link(tag,content):
+if within('item'):
+currentitem[0]['link']=gettext()
+def end_description(tag,content):
+if within('item'):
+currentitem[0]['summary']=clean_entities(gettext())
+def end_title(tag,content):
+text=clean_lf(gettext()).strip()
+if within('item'):
+currentitem[0]['title']=text
+elif parentis('channel'):
+feed['title']=text
+tagre=re.compile('([^ \n\t]+)(.*)>(.*)',re.S)
+tagpath=[]
+textbuffer=[[]]
+assumed_encoding='latin-1'
+lines=text.split('<')
+def start_default(tag,content): write(content)
+def end_default(tag,content): pass
+def tag_default(tag,content): pass
+def write(text): textbuffer[0].append(text)
+def gettext():
+text=''.join(textbuffer[0])
+textbuffer[0]=[]
+return unicode(text,assumed_encoding)
+def current_tag(): return tagpath[-1]
+def current_path(): return '/'.join(tagpath)
+def within(tag): return tag in tagpath
+def parentis(tag): return current_tag()==tag
+for k in lines:
+m=tagre.match(k)
+if m:
+(tag,attributes,content)=m.groups()
+if tag.startswith('?'):
+continue
+if tag.startswith('/'):
+tagname=tag[1:]
+handler='end_%s'%tagname
+generic_handler=end_default
+if current_tag() != tagname:
+pass # Unbalanced tags, just ignore for now.
+del tagpath[-1]
+elif tag.endswith('/'):
+tagname=tag[0:-1]
+handler='tag_%s'%tagname
+generic_handler=tag_default
+else:
+tagname=tag
+handler='start_%s'%tagname
+generic_handler=start_default
+tagpath.append(tagname)
+locals().get(handler,generic_handler)(tagname,content)
+else:
+pass # Malformed line, just ignore.
+feed['entries']=items
+return feed