symbian-qemu-0.9.1-12/python-2.6.1/Lib/test/test_robotparser.py
author Brendan Donegan <brendand@symbian.org>
Thu, 01 Jul 2010 14:51:21 +0100
branchnvmemory
changeset 81 0f0bbe5dfe4b
parent 1 2fb8b9db1c86
permissions -rw-r--r--
Added mednvmemory to base_syborg IBY
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     1
import unittest, StringIO, robotparser
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     2
from test import test_support
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     3
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     4
class RobotTestCase(unittest.TestCase):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     5
    def __init__(self, index, parser, url, good, agent):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     6
        unittest.TestCase.__init__(self)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     7
        if good:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     8
            self.str = "RobotTest(%d, good, %s)" % (index, url)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     9
        else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    10
            self.str = "RobotTest(%d, bad, %s)" % (index, url)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    11
        self.parser = parser
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    12
        self.url = url
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    13
        self.good = good
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    14
        self.agent = agent
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    15
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    16
    def runTest(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    17
        if isinstance(self.url, tuple):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    18
            agent, url = self.url
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    19
        else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    20
            url = self.url
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    21
            agent = self.agent
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    22
        if self.good:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    23
            self.failUnless(self.parser.can_fetch(agent, url))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    24
        else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    25
            self.failIf(self.parser.can_fetch(agent, url))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    26
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    27
    def __str__(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    28
        return self.str
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    29
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    30
tests = unittest.TestSuite()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    31
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    32
def RobotTest(index, robots_txt, good_urls, bad_urls,
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    33
              agent="test_robotparser"):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    34
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    35
    lines = StringIO.StringIO(robots_txt).readlines()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    36
    parser = robotparser.RobotFileParser()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    37
    parser.parse(lines)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    38
    for url in good_urls:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    39
        tests.addTest(RobotTestCase(index, parser, url, 1, agent))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    40
    for url in bad_urls:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    41
        tests.addTest(RobotTestCase(index, parser, url, 0, agent))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    42
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    43
# Examples from http://www.robotstxt.org/wc/norobots.html (fetched 2002)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    44
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    45
# 1.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    46
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    47
User-agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    48
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    49
Disallow: /tmp/ # these will soon disappear
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    50
Disallow: /foo.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    51
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    52
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    53
good = ['/','/test.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    54
bad = ['/cyberworld/map/index.html','/tmp/xxx','/foo.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    55
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    56
RobotTest(1, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    57
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    58
# 2.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    59
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    60
# robots.txt for http://www.example.com/
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    61
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    62
User-agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    63
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    64
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    65
# Cybermapper knows where to go.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    66
User-agent: cybermapper
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    67
Disallow:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    68
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    69
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    70
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    71
good = ['/','/test.html',('cybermapper','/cyberworld/map/index.html')]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    72
bad = ['/cyberworld/map/index.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    73
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    74
RobotTest(2, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    75
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    76
# 3.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    77
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    78
# go away
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    79
User-agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    80
Disallow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    81
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    82
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    83
good = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    84
bad = ['/cyberworld/map/index.html','/','/tmp/']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    85
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    86
RobotTest(3, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    87
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    88
# Examples from http://www.robotstxt.org/wc/norobots-rfc.html (fetched 2002)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    89
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    90
# 4.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    91
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    92
User-agent: figtree
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    93
Disallow: /tmp
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    94
Disallow: /a%3cd.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    95
Disallow: /a%2fb.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    96
Disallow: /%7ejoe/index.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    97
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    98
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    99
good = [] # XFAIL '/a/b.html'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   100
bad = ['/tmp','/tmp.html','/tmp/a.html',
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   101
       '/a%3cd.html','/a%3Cd.html','/a%2fb.html',
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   102
       '/~joe/index.html'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   103
       ]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   104
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   105
RobotTest(4, doc, good, bad, 'figtree')
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   106
RobotTest(5, doc, good, bad, 'FigTree Robot libwww-perl/5.04')
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   107
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   108
# 6.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   109
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   110
User-agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   111
Disallow: /tmp/
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   112
Disallow: /a%3Cd.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   113
Disallow: /a/b.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   114
Disallow: /%7ejoe/index.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   115
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   116
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   117
good = ['/tmp',] # XFAIL: '/a%2fb.html'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   118
bad = ['/tmp/','/tmp/a.html',
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   119
       '/a%3cd.html','/a%3Cd.html',"/a/b.html",
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   120
       '/%7Ejoe/index.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   121
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   122
RobotTest(6, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   123
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   124
# From bug report #523041
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   125
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   126
# 7.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   127
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   128
User-Agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   129
Disallow: /.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   130
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   131
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   132
good = ['/foo.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   133
bad = [] # Bug report says "/" should be denied, but that is not in the RFC
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   134
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   135
RobotTest(7, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   136
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   137
# From Google: http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=40364
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   138
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   139
# 8.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   140
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   141
User-agent: Googlebot
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   142
Allow: /folder1/myfile.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   143
Disallow: /folder1/
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   144
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   145
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   146
good = ['/folder1/myfile.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   147
bad = ['/folder1/anotherfile.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   148
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   149
RobotTest(8, doc, good, bad, agent="Googlebot")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   150
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   151
# 9.  This file is incorrect because "Googlebot" is a substring of
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   152
#     "Googlebot-Mobile", so test 10 works just like test 9.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   153
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   154
User-agent: Googlebot
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   155
Disallow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   156
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   157
User-agent: Googlebot-Mobile
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   158
Allow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   159
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   160
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   161
good = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   162
bad = ['/something.jpg']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   163
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   164
RobotTest(9, doc, good, bad, agent="Googlebot")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   165
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   166
good = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   167
bad = ['/something.jpg']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   168
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   169
RobotTest(10, doc, good, bad, agent="Googlebot-Mobile")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   170
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   171
# 11.  Get the order correct.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   172
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   173
User-agent: Googlebot-Mobile
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   174
Allow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   175
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   176
User-agent: Googlebot
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   177
Disallow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   178
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   179
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   180
good = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   181
bad = ['/something.jpg']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   182
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   183
RobotTest(11, doc, good, bad, agent="Googlebot")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   184
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   185
good = ['/something.jpg']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   186
bad = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   187
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   188
RobotTest(12, doc, good, bad, agent="Googlebot-Mobile")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   189
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   190
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   191
# 13.  Google also got the order wrong in #8.  You need to specify the
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   192
#      URLs from more specific to more general.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   193
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   194
User-agent: Googlebot
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   195
Allow: /folder1/myfile.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   196
Disallow: /folder1/
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   197
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   198
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   199
good = ['/folder1/myfile.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   200
bad = ['/folder1/anotherfile.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   201
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   202
RobotTest(13, doc, good, bad, agent="googlebot")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   203
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   204
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   205
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   206
class TestCase(unittest.TestCase):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   207
    def runTest(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   208
        test_support.requires('network')
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   209
        # whole site is password-protected.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   210
        url = 'http://mueblesmoraleda.com'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   211
        parser = robotparser.RobotFileParser()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   212
        parser.set_url(url)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   213
        parser.read()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   214
        self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   215
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   216
def test_main():
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   217
    test_support.run_unittest(tests)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   218
    TestCase().run()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   219
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   220
if __name__=='__main__':
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   221
    test_support.verbose = 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   222
    test_main()