symbian-qemu-0.9.1-12/python-2.6.1/Lib/test/test_re.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 import sys
       
     2 sys.path = ['.'] + sys.path
       
     3 
       
     4 from test.test_support import verbose, run_unittest
       
     5 import re
       
     6 from re import Scanner
       
     7 import sys, os, traceback
       
     8 from weakref import proxy
       
     9 
       
    10 # Misc tests from Tim Peters' re.doc
       
    11 
       
    12 # WARNING: Don't change details in these tests if you don't know
       
    13 # what you're doing. Some of these tests were carefuly modeled to
       
    14 # cover most of the code.
       
    15 
       
    16 import unittest
       
    17 
       
    18 class ReTests(unittest.TestCase):
       
    19 
       
    20     def test_weakref(self):
       
    21         s = 'QabbbcR'
       
    22         x = re.compile('ab+c')
       
    23         y = proxy(x)
       
    24         self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
       
    25 
       
    26     def test_search_star_plus(self):
       
    27         self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
       
    28         self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
       
    29         self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
       
    30         self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
       
    31         self.assertEqual(re.search('x', 'aaa'), None)
       
    32         self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
       
    33         self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
       
    34         self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
       
    35         self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
       
    36         self.assertEqual(re.match('a+', 'xxx'), None)
       
    37 
       
    38     def bump_num(self, matchobj):
       
    39         int_value = int(matchobj.group(0))
       
    40         return str(int_value + 1)
       
    41 
       
    42     def test_basic_re_sub(self):
       
    43         self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
       
    44         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
       
    45                          '9.3 -3 24x100y')
       
    46         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
       
    47                          '9.3 -3 23x99y')
       
    48 
       
    49         self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
       
    50         self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
       
    51 
       
    52         s = r"\1\1"
       
    53         self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
       
    54         self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
       
    55         self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
       
    56 
       
    57         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
       
    58         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
       
    59         self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
       
    60         self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
       
    61 
       
    62         self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
       
    63                          '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
       
    64         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
       
    65         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
       
    66                          (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
       
    67 
       
    68         self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
       
    69 
       
    70     def test_bug_449964(self):
       
    71         # fails for group followed by other escape
       
    72         self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
       
    73                          'xx\bxx\b')
       
    74 
       
    75     def test_bug_449000(self):
       
    76         # Test for sub() on escaped characters
       
    77         self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
       
    78                          'abc\ndef\n')
       
    79         self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
       
    80                          'abc\ndef\n')
       
    81         self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
       
    82                          'abc\ndef\n')
       
    83         self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
       
    84                          'abc\ndef\n')
       
    85 
       
    86     def test_bug_1140(self):
       
    87         # re.sub(x, y, u'') should return u'', not '', and
       
    88         # re.sub(x, y, '') should return '', not u''.
       
    89         # Also:
       
    90         # re.sub(x, y, unicode(x)) should return unicode(y), and
       
    91         # re.sub(x, y, str(x)) should return
       
    92         #     str(y) if isinstance(y, str) else unicode(y).
       
    93         for x in 'x', u'x':
       
    94             for y in 'y', u'y':
       
    95                 z = re.sub(x, y, u'')
       
    96                 self.assertEqual(z, u'')
       
    97                 self.assertEqual(type(z), unicode)
       
    98                 #
       
    99                 z = re.sub(x, y, '')
       
   100                 self.assertEqual(z, '')
       
   101                 self.assertEqual(type(z), str)
       
   102                 #
       
   103                 z = re.sub(x, y, unicode(x))
       
   104                 self.assertEqual(z, y)
       
   105                 self.assertEqual(type(z), unicode)
       
   106                 #
       
   107                 z = re.sub(x, y, str(x))
       
   108                 self.assertEqual(z, y)
       
   109                 self.assertEqual(type(z), type(y))
       
   110 
       
   111     def test_bug_1661(self):
       
   112         # Verify that flags do not get silently ignored with compiled patterns
       
   113         pattern = re.compile('.')
       
   114         self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
       
   115         self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
       
   116         self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
       
   117         self.assertRaises(ValueError, re.compile, pattern, re.I)
       
   118 
       
   119     def test_bug_3629(self):
       
   120         # A regex that triggered a bug in the sre-code validator
       
   121         re.compile("(?P<quote>)(?(quote))")
       
   122 
       
   123     def test_sub_template_numeric_escape(self):
       
   124         # bug 776311 and friends
       
   125         self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
       
   126         self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
       
   127         self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
       
   128         self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
       
   129         self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
       
   130         self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
       
   131         self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
       
   132 
       
   133         self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
       
   134         self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
       
   135 
       
   136         self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
       
   137         self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
       
   138         self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
       
   139         self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
       
   140         self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
       
   141 
       
   142         self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
       
   143         self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
       
   144 
       
   145         self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
       
   146         self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
       
   147         self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
       
   148         self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
       
   149         self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
       
   150         self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
       
   151         self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
       
   152         self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
       
   153         self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
       
   154         self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
       
   155         self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
       
   156         self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
       
   157 
       
   158         # in python2.3 (etc), these loop endlessly in sre_parser.py
       
   159         self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
       
   160         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
       
   161                          'xz8')
       
   162         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
       
   163                          'xza')
       
   164 
       
   165     def test_qualified_re_sub(self):
       
   166         self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
       
   167         self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
       
   168 
       
   169     def test_bug_114660(self):
       
   170         self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
       
   171                          'hello there')
       
   172 
       
   173     def test_bug_462270(self):
       
   174         # Test for empty sub() behaviour, see SF bug #462270
       
   175         self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
       
   176         self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
       
   177 
       
   178     def test_symbolic_refs(self):
       
   179         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
       
   180         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
       
   181         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
       
   182         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
       
   183         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
       
   184         self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
       
   185         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
       
   186         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
       
   187         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
       
   188 
       
   189     def test_re_subn(self):
       
   190         self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
       
   191         self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
       
   192         self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
       
   193         self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
       
   194         self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
       
   195 
       
   196     def test_re_split(self):
       
   197         self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
       
   198         self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
       
   199         self.assertEqual(re.split("(:*)", ":a:b::c"),
       
   200                          ['', ':', 'a', ':', 'b', '::', 'c'])
       
   201         self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
       
   202         self.assertEqual(re.split("(:)*", ":a:b::c"),
       
   203                          ['', ':', 'a', ':', 'b', ':', 'c'])
       
   204         self.assertEqual(re.split("([b:]+)", ":a:b::c"),
       
   205                          ['', ':', 'a', ':b::', 'c'])
       
   206         self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
       
   207                          ['', None, ':', 'a', None, ':', '', 'b', None, '',
       
   208                           None, '::', 'c'])
       
   209         self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
       
   210                          ['', 'a', '', '', 'c'])
       
   211 
       
   212     def test_qualified_re_split(self):
       
   213         self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
       
   214         self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
       
   215         self.assertEqual(re.split("(:)", ":a:b::c", 2),
       
   216                          ['', ':', 'a', ':', 'b::c'])
       
   217         self.assertEqual(re.split("(:*)", ":a:b::c", 2),
       
   218                          ['', ':', 'a', ':', 'b::c'])
       
   219 
       
   220     def test_re_findall(self):
       
   221         self.assertEqual(re.findall(":+", "abc"), [])
       
   222         self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
       
   223         self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
       
   224         self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
       
   225                                                                (":", ":"),
       
   226                                                                (":", "::")])
       
   227 
       
   228     def test_bug_117612(self):
       
   229         self.assertEqual(re.findall(r"(a|(b))", "aba"),
       
   230                          [("a", ""),("b", "b"),("a", "")])
       
   231 
       
   232     def test_re_match(self):
       
   233         self.assertEqual(re.match('a', 'a').groups(), ())
       
   234         self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
       
   235         self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
       
   236         self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
       
   237         self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
       
   238 
       
   239         pat = re.compile('((a)|(b))(c)?')
       
   240         self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
       
   241         self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
       
   242         self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
       
   243         self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
       
   244         self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
       
   245 
       
   246         # A single group
       
   247         m = re.match('(a)', 'a')
       
   248         self.assertEqual(m.group(0), 'a')
       
   249         self.assertEqual(m.group(0), 'a')
       
   250         self.assertEqual(m.group(1), 'a')
       
   251         self.assertEqual(m.group(1, 1), ('a', 'a'))
       
   252 
       
   253         pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
       
   254         self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
       
   255         self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
       
   256                          (None, 'b', None))
       
   257         self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
       
   258 
       
   259     def test_re_groupref_exists(self):
       
   260         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
       
   261                          ('(', 'a'))
       
   262         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
       
   263                          (None, 'a'))
       
   264         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
       
   265         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
       
   266         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
       
   267                          ('a', 'b'))
       
   268         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
       
   269                          (None, 'd'))
       
   270         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
       
   271                          (None, 'd'))
       
   272         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
       
   273                          ('a', ''))
       
   274 
       
   275         # Tests for bug #1177831: exercise groups other than the first group
       
   276         p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
       
   277         self.assertEqual(p.match('abc').groups(),
       
   278                          ('a', 'b', 'c'))
       
   279         self.assertEqual(p.match('ad').groups(),
       
   280                          ('a', None, 'd'))
       
   281         self.assertEqual(p.match('abd'), None)
       
   282         self.assertEqual(p.match('ac'), None)
       
   283 
       
   284 
       
   285     def test_re_groupref(self):
       
   286         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
       
   287                          ('|', 'a'))
       
   288         self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
       
   289                          (None, 'a'))
       
   290         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
       
   291         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
       
   292         self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
       
   293                          ('a', 'a'))
       
   294         self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
       
   295                          (None, None))
       
   296 
       
   297     def test_groupdict(self):
       
   298         self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
       
   299                                   'first second').groupdict(),
       
   300                          {'first':'first', 'second':'second'})
       
   301 
       
   302     def test_expand(self):
       
   303         self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
       
   304                                   "first second")
       
   305                                   .expand(r"\2 \1 \g<second> \g<first>"),
       
   306                          "second first second first")
       
   307 
       
   308     def test_repeat_minmax(self):
       
   309         self.assertEqual(re.match("^(\w){1}$", "abc"), None)
       
   310         self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
       
   311         self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
       
   312         self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
       
   313 
       
   314         self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
       
   315         self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
       
   316         self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
       
   317         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
       
   318         self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
       
   319         self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
       
   320         self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
       
   321         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
       
   322 
       
   323         self.assertEqual(re.match("^x{1}$", "xxx"), None)
       
   324         self.assertEqual(re.match("^x{1}?$", "xxx"), None)
       
   325         self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
       
   326         self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
       
   327 
       
   328         self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
       
   329         self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
       
   330         self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
       
   331         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
       
   332         self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
       
   333         self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
       
   334         self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
       
   335         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
       
   336 
       
   337         self.assertEqual(re.match("^x{}$", "xxx"), None)
       
   338         self.assertNotEqual(re.match("^x{}$", "x{}"), None)
       
   339 
       
   340     def test_getattr(self):
       
   341         self.assertEqual(re.match("(a)", "a").pos, 0)
       
   342         self.assertEqual(re.match("(a)", "a").endpos, 1)
       
   343         self.assertEqual(re.match("(a)", "a").string, "a")
       
   344         self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
       
   345         self.assertNotEqual(re.match("(a)", "a").re, None)
       
   346 
       
   347     def test_special_escapes(self):
       
   348         self.assertEqual(re.search(r"\b(b.)\b",
       
   349                                    "abcd abc bcd bx").group(1), "bx")
       
   350         self.assertEqual(re.search(r"\B(b.)\B",
       
   351                                    "abc bcd bc abxd").group(1), "bx")
       
   352         self.assertEqual(re.search(r"\b(b.)\b",
       
   353                                    "abcd abc bcd bx", re.LOCALE).group(1), "bx")
       
   354         self.assertEqual(re.search(r"\B(b.)\B",
       
   355                                    "abc bcd bc abxd", re.LOCALE).group(1), "bx")
       
   356         self.assertEqual(re.search(r"\b(b.)\b",
       
   357                                    "abcd abc bcd bx", re.UNICODE).group(1), "bx")
       
   358         self.assertEqual(re.search(r"\B(b.)\B",
       
   359                                    "abc bcd bc abxd", re.UNICODE).group(1), "bx")
       
   360         self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
       
   361         self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
       
   362         self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
       
   363         self.assertEqual(re.search(r"\b(b.)\b",
       
   364                                    u"abcd abc bcd bx").group(1), "bx")
       
   365         self.assertEqual(re.search(r"\B(b.)\B",
       
   366                                    u"abc bcd bc abxd").group(1), "bx")
       
   367         self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
       
   368         self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
       
   369         self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
       
   370         self.assertEqual(re.search(r"\d\D\w\W\s\S",
       
   371                                    "1aa! a").group(0), "1aa! a")
       
   372         self.assertEqual(re.search(r"\d\D\w\W\s\S",
       
   373                                    "1aa! a", re.LOCALE).group(0), "1aa! a")
       
   374         self.assertEqual(re.search(r"\d\D\w\W\s\S",
       
   375                                    "1aa! a", re.UNICODE).group(0), "1aa! a")
       
   376 
       
   377     def test_bigcharset(self):
       
   378         self.assertEqual(re.match(u"([\u2222\u2223])",
       
   379                                   u"\u2222").group(1), u"\u2222")
       
   380         self.assertEqual(re.match(u"([\u2222\u2223])",
       
   381                                   u"\u2222", re.UNICODE).group(1), u"\u2222")
       
   382 
       
   383     def test_anyall(self):
       
   384         self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
       
   385                          "a\nb")
       
   386         self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
       
   387                          "a\n\nb")
       
   388 
       
   389     def test_non_consuming(self):
       
   390         self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
       
   391         self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
       
   392         self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
       
   393         self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
       
   394         self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
       
   395         self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
       
   396         self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
       
   397 
       
   398         self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
       
   399         self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
       
   400         self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
       
   401         self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
       
   402 
       
   403     def test_ignore_case(self):
       
   404         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
       
   405         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
       
   406         self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
       
   407         self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
       
   408         self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
       
   409         self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
       
   410         self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
       
   411         self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
       
   412         self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
       
   413         self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
       
   414 
       
   415     def test_category(self):
       
   416         self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
       
   417 
       
   418     def test_getlower(self):
       
   419         import _sre
       
   420         self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
       
   421         self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
       
   422         self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
       
   423 
       
   424         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
       
   425         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
       
   426 
       
   427     def test_not_literal(self):
       
   428         self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
       
   429         self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
       
   430 
       
   431     def test_search_coverage(self):
       
   432         self.assertEqual(re.search("\s(b)", " b").group(1), "b")
       
   433         self.assertEqual(re.search("a\s", "a ").group(0), "a ")
       
   434 
       
   435     def test_re_escape(self):
       
   436         p=""
       
   437         for i in range(0, 256):
       
   438             p = p + chr(i)
       
   439             self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
       
   440                              True)
       
   441             self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
       
   442 
       
   443         pat=re.compile(re.escape(p))
       
   444         self.assertEqual(pat.match(p) is not None, True)
       
   445         self.assertEqual(pat.match(p).span(), (0,256))
       
   446 
       
   447     def test_pickling(self):
       
   448         import pickle
       
   449         self.pickle_test(pickle)
       
   450         import cPickle
       
   451         self.pickle_test(cPickle)
       
   452         # old pickles expect the _compile() reconstructor in sre module
       
   453         import warnings
       
   454         with warnings.catch_warnings():
       
   455             warnings.filterwarnings("ignore", "The sre module is deprecated",
       
   456                                     DeprecationWarning)
       
   457             from sre import _compile
       
   458 
       
   459     def pickle_test(self, pickle):
       
   460         oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
       
   461         s = pickle.dumps(oldpat)
       
   462         newpat = pickle.loads(s)
       
   463         self.assertEqual(oldpat, newpat)
       
   464 
       
   465     def test_constants(self):
       
   466         self.assertEqual(re.I, re.IGNORECASE)
       
   467         self.assertEqual(re.L, re.LOCALE)
       
   468         self.assertEqual(re.M, re.MULTILINE)
       
   469         self.assertEqual(re.S, re.DOTALL)
       
   470         self.assertEqual(re.X, re.VERBOSE)
       
   471 
       
   472     def test_flags(self):
       
   473         for flag in [re.I, re.M, re.X, re.S, re.L]:
       
   474             self.assertNotEqual(re.compile('^pattern$', flag), None)
       
   475 
       
   476     def test_sre_character_literals(self):
       
   477         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
       
   478             self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
       
   479             self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
       
   480             self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
       
   481             self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
       
   482             self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
       
   483             self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
       
   484         self.assertRaises(re.error, re.match, "\911", "")
       
   485 
       
   486     def test_sre_character_class_literals(self):
       
   487         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
       
   488             self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
       
   489             self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
       
   490             self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
       
   491             self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
       
   492             self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
       
   493             self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
       
   494         self.assertRaises(re.error, re.match, "[\911]", "")
       
   495 
       
   496     def test_bug_113254(self):
       
   497         self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
       
   498         self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
       
   499         self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
       
   500 
       
   501     def test_bug_527371(self):
       
   502         # bug described in patches 527371/672491
       
   503         self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
       
   504         self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
       
   505         self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
       
   506         self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
       
   507         self.assertEqual(re.match("((a))", "a").lastindex, 1)
       
   508 
       
   509     def test_bug_545855(self):
       
   510         # bug 545855 -- This pattern failed to cause a compile error as it
       
   511         # should, instead provoking a TypeError.
       
   512         self.assertRaises(re.error, re.compile, 'foo[a-')
       
   513 
       
   514     def test_bug_418626(self):
       
   515         # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
       
   516         # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
       
   517         # pattern '*?' on a long string.
       
   518         self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
       
   519         self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
       
   520                          20003)
       
   521         self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
       
   522         # non-simple '*?' still used to hit the recursion limit, before the
       
   523         # non-recursive scheme was implemented.
       
   524         self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
       
   525 
       
   526     def test_bug_612074(self):
       
   527         pat=u"["+re.escape(u"\u2039")+u"]"
       
   528         self.assertEqual(re.compile(pat) and 1, 1)
       
   529 
       
   530     def test_stack_overflow(self):
       
   531         # nasty cases that used to overflow the straightforward recursive
       
   532         # implementation of repeated groups.
       
   533         self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
       
   534         self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
       
   535         self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
       
   536 
       
   537     def test_scanner(self):
       
   538         def s_ident(scanner, token): return token
       
   539         def s_operator(scanner, token): return "op%s" % token
       
   540         def s_float(scanner, token): return float(token)
       
   541         def s_int(scanner, token): return int(token)
       
   542 
       
   543         scanner = Scanner([
       
   544             (r"[a-zA-Z_]\w*", s_ident),
       
   545             (r"\d+\.\d*", s_float),
       
   546             (r"\d+", s_int),
       
   547             (r"=|\+|-|\*|/", s_operator),
       
   548             (r"\s+", None),
       
   549             ])
       
   550 
       
   551         self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
       
   552 
       
   553         self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
       
   554                          (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
       
   555                            'op+', 'bar'], ''))
       
   556 
       
   557     def test_bug_448951(self):
       
   558         # bug 448951 (similar to 429357, but with single char match)
       
   559         # (Also test greedy matches.)
       
   560         for op in '','?','*':
       
   561             self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
       
   562                              (None, None))
       
   563             self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
       
   564                              ('a:', 'a'))
       
   565 
       
   566     def test_bug_725106(self):
       
   567         # capturing groups in alternatives in repeats
       
   568         self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
       
   569                          ('b', 'a'))
       
   570         self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
       
   571                          ('c', 'b'))
       
   572         self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
       
   573                          ('b', None))
       
   574         self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
       
   575                          ('b', None))
       
   576         self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
       
   577                          ('b', 'a'))
       
   578         self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
       
   579                          ('c', 'b'))
       
   580         self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
       
   581                          ('b', None))
       
   582         self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
       
   583                          ('b', None))
       
   584 
       
   585     def test_bug_725149(self):
       
   586         # mark_stack_base restoring before restoring marks
       
   587         self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
       
   588                          ('a', None))
       
   589         self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
       
   590                          ('a', None, None))
       
   591 
       
   592     def test_bug_764548(self):
       
   593         # bug 764548, re.compile() barfs on str/unicode subclasses
       
   594         try:
       
   595             unicode
       
   596         except NameError:
       
   597             return  # no problem if we have no unicode
       
   598         class my_unicode(unicode): pass
       
   599         pat = re.compile(my_unicode("abc"))
       
   600         self.assertEqual(pat.match("xyz"), None)
       
   601 
       
   602     def test_finditer(self):
       
   603         iter = re.finditer(r":+", "a:b::c:::d")
       
   604         self.assertEqual([item.group(0) for item in iter],
       
   605                          [":", "::", ":::"])
       
   606 
       
   607     def test_bug_926075(self):
       
   608         try:
       
   609             unicode
       
   610         except NameError:
       
   611             return # no problem if we have no unicode
       
   612         self.assert_(re.compile('bug_926075') is not
       
   613                      re.compile(eval("u'bug_926075'")))
       
   614 
       
   615     def test_bug_931848(self):
       
   616         try:
       
   617             unicode
       
   618         except NameError:
       
   619             pass
       
   620         pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
       
   621         self.assertEqual(re.compile(pattern).split("a.b.c"),
       
   622                          ['a','b','c'])
       
   623 
       
   624     def test_bug_581080(self):
       
   625         iter = re.finditer(r"\s", "a b")
       
   626         self.assertEqual(iter.next().span(), (1,2))
       
   627         self.assertRaises(StopIteration, iter.next)
       
   628 
       
   629         scanner = re.compile(r"\s").scanner("a b")
       
   630         self.assertEqual(scanner.search().span(), (1, 2))
       
   631         self.assertEqual(scanner.search(), None)
       
   632 
       
   633     def test_bug_817234(self):
       
   634         iter = re.finditer(r".*", "asdf")
       
   635         self.assertEqual(iter.next().span(), (0, 4))
       
   636         self.assertEqual(iter.next().span(), (4, 4))
       
   637         self.assertRaises(StopIteration, iter.next)
       
   638 
       
   639     def test_empty_array(self):
       
   640         # SF buf 1647541
       
   641         import array
       
   642         for typecode in 'cbBuhHiIlLfd':
       
   643             a = array.array(typecode)
       
   644             self.assertEqual(re.compile("bla").match(a), None)
       
   645             self.assertEqual(re.compile("").match(a).groups(), ())
       
   646 
       
   647     def test_inline_flags(self):
       
   648         # Bug #1700
       
   649         upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
       
   650         lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
       
   651 
       
   652         p = re.compile(upper_char, re.I | re.U)
       
   653         q = p.match(lower_char)
       
   654         self.assertNotEqual(q, None)
       
   655 
       
   656         p = re.compile(lower_char, re.I | re.U)
       
   657         q = p.match(upper_char)
       
   658         self.assertNotEqual(q, None)
       
   659 
       
   660         p = re.compile('(?i)' + upper_char, re.U)
       
   661         q = p.match(lower_char)
       
   662         self.assertNotEqual(q, None)
       
   663 
       
   664         p = re.compile('(?i)' + lower_char, re.U)
       
   665         q = p.match(upper_char)
       
   666         self.assertNotEqual(q, None)
       
   667 
       
   668         p = re.compile('(?iu)' + upper_char)
       
   669         q = p.match(lower_char)
       
   670         self.assertNotEqual(q, None)
       
   671 
       
   672         p = re.compile('(?iu)' + lower_char)
       
   673         q = p.match(upper_char)
       
   674         self.assertNotEqual(q, None)
       
   675 
       
   676     def test_dollar_matches_twice(self):
       
   677         "$ matches the end of string, and just before the terminating \n"
       
   678         pattern = re.compile('$')
       
   679         self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
       
   680         self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
       
   681         self.assertEqual(pattern.sub('#', '\n'), '#\n#')
       
   682 
       
   683         pattern = re.compile('$', re.MULTILINE)
       
   684         self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
       
   685         self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
       
   686         self.assertEqual(pattern.sub('#', '\n'), '#\n#')
       
   687 
       
   688 
       
   689 def run_re_tests():
       
   690     from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
       
   691     if verbose:
       
   692         print 'Running re_tests test suite'
       
   693     else:
       
   694         # To save time, only run the first and last 10 tests
       
   695         #tests = tests[:10] + tests[-10:]
       
   696         pass
       
   697 
       
   698     for t in tests:
       
   699         sys.stdout.flush()
       
   700         pattern = s = outcome = repl = expected = None
       
   701         if len(t) == 5:
       
   702             pattern, s, outcome, repl, expected = t
       
   703         elif len(t) == 3:
       
   704             pattern, s, outcome = t
       
   705         else:
       
   706             raise ValueError, ('Test tuples should have 3 or 5 fields', t)
       
   707 
       
   708         try:
       
   709             obj = re.compile(pattern)
       
   710         except re.error:
       
   711             if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
       
   712             else:
       
   713                 print '=== Syntax error:', t
       
   714         except KeyboardInterrupt: raise KeyboardInterrupt
       
   715         except:
       
   716             print '*** Unexpected error ***', t
       
   717             if verbose:
       
   718                 traceback.print_exc(file=sys.stdout)
       
   719         else:
       
   720             try:
       
   721                 result = obj.search(s)
       
   722             except re.error, msg:
       
   723                 print '=== Unexpected exception', t, repr(msg)
       
   724             if outcome == SYNTAX_ERROR:
       
   725                 # This should have been a syntax error; forget it.
       
   726                 pass
       
   727             elif outcome == FAIL:
       
   728                 if result is None: pass   # No match, as expected
       
   729                 else: print '=== Succeeded incorrectly', t
       
   730             elif outcome == SUCCEED:
       
   731                 if result is not None:
       
   732                     # Matched, as expected, so now we compute the
       
   733                     # result string and compare it to our expected result.
       
   734                     start, end = result.span(0)
       
   735                     vardict={'found': result.group(0),
       
   736                              'groups': result.group(),
       
   737                              'flags': result.re.flags}
       
   738                     for i in range(1, 100):
       
   739                         try:
       
   740                             gi = result.group(i)
       
   741                             # Special hack because else the string concat fails:
       
   742                             if gi is None:
       
   743                                 gi = "None"
       
   744                         except IndexError:
       
   745                             gi = "Error"
       
   746                         vardict['g%d' % i] = gi
       
   747                     for i in result.re.groupindex.keys():
       
   748                         try:
       
   749                             gi = result.group(i)
       
   750                             if gi is None:
       
   751                                 gi = "None"
       
   752                         except IndexError:
       
   753                             gi = "Error"
       
   754                         vardict[i] = gi
       
   755                     repl = eval(repl, vardict)
       
   756                     if repl != expected:
       
   757                         print '=== grouping error', t,
       
   758                         print repr(repl) + ' should be ' + repr(expected)
       
   759                 else:
       
   760                     print '=== Failed incorrectly', t
       
   761 
       
   762                 # Try the match on a unicode string, and check that it
       
   763                 # still succeeds.
       
   764                 try:
       
   765                     result = obj.search(unicode(s, "latin-1"))
       
   766                     if result is None:
       
   767                         print '=== Fails on unicode match', t
       
   768                 except NameError:
       
   769                     continue # 1.5.2
       
   770                 except TypeError:
       
   771                     continue # unicode test case
       
   772 
       
   773                 # Try the match on a unicode pattern, and check that it
       
   774                 # still succeeds.
       
   775                 obj=re.compile(unicode(pattern, "latin-1"))
       
   776                 result = obj.search(s)
       
   777                 if result is None:
       
   778                     print '=== Fails on unicode pattern match', t
       
   779 
       
   780                 # Try the match with the search area limited to the extent
       
   781                 # of the match and see if it still succeeds.  \B will
       
   782                 # break (because it won't match at the end or start of a
       
   783                 # string), so we'll ignore patterns that feature it.
       
   784 
       
   785                 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
       
   786                                and result is not None:
       
   787                     obj = re.compile(pattern)
       
   788                     result = obj.search(s, result.start(0), result.end(0) + 1)
       
   789                     if result is None:
       
   790                         print '=== Failed on range-limited match', t
       
   791 
       
   792                 # Try the match with IGNORECASE enabled, and check that it
       
   793                 # still succeeds.
       
   794                 obj = re.compile(pattern, re.IGNORECASE)
       
   795                 result = obj.search(s)
       
   796                 if result is None:
       
   797                     print '=== Fails on case-insensitive match', t
       
   798 
       
   799                 # Try the match with LOCALE enabled, and check that it
       
   800                 # still succeeds.
       
   801                 obj = re.compile(pattern, re.LOCALE)
       
   802                 result = obj.search(s)
       
   803                 if result is None:
       
   804                     print '=== Fails on locale-sensitive match', t
       
   805 
       
   806                 # Try the match with UNICODE locale enabled, and check
       
   807                 # that it still succeeds.
       
   808                 obj = re.compile(pattern, re.UNICODE)
       
   809                 result = obj.search(s)
       
   810                 if result is None:
       
   811                     print '=== Fails on unicode-sensitive match', t
       
   812 
       
   813 def test_main():
       
   814     run_unittest(ReTests)
       
   815     run_re_tests()
       
   816 
       
   817 if __name__ == "__main__":
       
   818     test_main()