python-2.5.2/win32/Lib/test/test_re.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 import sys
       
     2 sys.path = ['.'] + sys.path
       
     3 
       
     4 from test.test_support import verbose, run_unittest
       
     5 import re
       
     6 from re import Scanner
       
     7 import sys, os, traceback
       
     8 from weakref import proxy
       
     9 
       
    10 # Misc tests from Tim Peters' re.doc
       
    11 
       
    12 # WARNING: Don't change details in these tests if you don't know
       
    13 # what you're doing. Some of these tests were carefuly modeled to
       
    14 # cover most of the code.
       
    15 
       
    16 import unittest
       
    17 
       
    18 class ReTests(unittest.TestCase):
       
    19 
       
    20     def test_weakref(self):
       
    21         s = 'QabbbcR'
       
    22         x = re.compile('ab+c')
       
    23         y = proxy(x)
       
    24         self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
       
    25 
       
    26     def test_search_star_plus(self):
       
    27         self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
       
    28         self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
       
    29         self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
       
    30         self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
       
    31         self.assertEqual(re.search('x', 'aaa'), None)
       
    32         self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
       
    33         self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
       
    34         self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
       
    35         self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
       
    36         self.assertEqual(re.match('a+', 'xxx'), None)
       
    37 
       
    38     def bump_num(self, matchobj):
       
    39         int_value = int(matchobj.group(0))
       
    40         return str(int_value + 1)
       
    41 
       
    42     def test_basic_re_sub(self):
       
    43         self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
       
    44         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
       
    45                          '9.3 -3 24x100y')
       
    46         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
       
    47                          '9.3 -3 23x99y')
       
    48 
       
    49         self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
       
    50         self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
       
    51 
       
    52         s = r"\1\1"
       
    53         self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
       
    54         self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
       
    55         self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
       
    56 
       
    57         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
       
    58         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
       
    59         self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
       
    60         self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
       
    61 
       
    62         self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
       
    63                          '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
       
    64         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
       
    65         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
       
    66                          (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
       
    67 
       
    68         self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
       
    69 
       
    70     def test_bug_449964(self):
       
    71         # fails for group followed by other escape
       
    72         self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
       
    73                          'xx\bxx\b')
       
    74 
       
    75     def test_bug_449000(self):
       
    76         # Test for sub() on escaped characters
       
    77         self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
       
    78                          'abc\ndef\n')
       
    79         self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
       
    80                          'abc\ndef\n')
       
    81         self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
       
    82                          'abc\ndef\n')
       
    83         self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
       
    84                          'abc\ndef\n')
       
    85 
       
    86     def test_bug_1140(self):
       
    87         # re.sub(x, y, u'') should return u'', not '', and
       
    88         # re.sub(x, y, '') should return '', not u''.
       
    89         # Also:
       
    90         # re.sub(x, y, unicode(x)) should return unicode(y), and
       
    91         # re.sub(x, y, str(x)) should return
       
    92         #     str(y) if isinstance(y, str) else unicode(y).
       
    93         for x in 'x', u'x':
       
    94             for y in 'y', u'y':
       
    95                 z = re.sub(x, y, u'')
       
    96                 self.assertEqual(z, u'')
       
    97                 self.assertEqual(type(z), unicode)
       
    98                 #
       
    99                 z = re.sub(x, y, '')
       
   100                 self.assertEqual(z, '')
       
   101                 self.assertEqual(type(z), str)
       
   102                 #
       
   103                 z = re.sub(x, y, unicode(x))
       
   104                 self.assertEqual(z, y)
       
   105                 self.assertEqual(type(z), unicode)
       
   106                 #
       
   107                 z = re.sub(x, y, str(x))
       
   108                 self.assertEqual(z, y)
       
   109                 self.assertEqual(type(z), type(y))
       
   110 
       
   111     def test_sub_template_numeric_escape(self):
       
   112         # bug 776311 and friends
       
   113         self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
       
   114         self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
       
   115         self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
       
   116         self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
       
   117         self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
       
   118         self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
       
   119         self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
       
   120 
       
   121         self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
       
   122         self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
       
   123 
       
   124         self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
       
   125         self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
       
   126         self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
       
   127         self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
       
   128         self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
       
   129 
       
   130         self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
       
   131         self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
       
   132 
       
   133         self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
       
   134         self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
       
   135         self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
       
   136         self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
       
   137         self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
       
   138         self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
       
   139         self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
       
   140         self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
       
   141         self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
       
   142         self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
       
   143         self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
       
   144         self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
       
   145 
       
   146         # in python2.3 (etc), these loop endlessly in sre_parser.py
       
   147         self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
       
   148         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
       
   149                          'xz8')
       
   150         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
       
   151                          'xza')
       
   152 
       
   153     def test_qualified_re_sub(self):
       
   154         self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
       
   155         self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
       
   156 
       
   157     def test_bug_114660(self):
       
   158         self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
       
   159                          'hello there')
       
   160 
       
   161     def test_bug_462270(self):
       
   162         # Test for empty sub() behaviour, see SF bug #462270
       
   163         self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
       
   164         self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
       
   165 
       
   166     def test_symbolic_refs(self):
       
   167         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
       
   168         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
       
   169         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
       
   170         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
       
   171         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
       
   172         self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
       
   173         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
       
   174         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
       
   175         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
       
   176 
       
   177     def test_re_subn(self):
       
   178         self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
       
   179         self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
       
   180         self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
       
   181         self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
       
   182         self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
       
   183 
       
   184     def test_re_split(self):
       
   185         self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
       
   186         self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
       
   187         self.assertEqual(re.split("(:*)", ":a:b::c"),
       
   188                          ['', ':', 'a', ':', 'b', '::', 'c'])
       
   189         self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
       
   190         self.assertEqual(re.split("(:)*", ":a:b::c"),
       
   191                          ['', ':', 'a', ':', 'b', ':', 'c'])
       
   192         self.assertEqual(re.split("([b:]+)", ":a:b::c"),
       
   193                          ['', ':', 'a', ':b::', 'c'])
       
   194         self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
       
   195                          ['', None, ':', 'a', None, ':', '', 'b', None, '',
       
   196                           None, '::', 'c'])
       
   197         self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
       
   198                          ['', 'a', '', '', 'c'])
       
   199 
       
   200     def test_qualified_re_split(self):
       
   201         self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
       
   202         self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
       
   203         self.assertEqual(re.split("(:)", ":a:b::c", 2),
       
   204                          ['', ':', 'a', ':', 'b::c'])
       
   205         self.assertEqual(re.split("(:*)", ":a:b::c", 2),
       
   206                          ['', ':', 'a', ':', 'b::c'])
       
   207 
       
   208     def test_re_findall(self):
       
   209         self.assertEqual(re.findall(":+", "abc"), [])
       
   210         self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
       
   211         self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
       
   212         self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
       
   213                                                                (":", ":"),
       
   214                                                                (":", "::")])
       
   215 
       
   216     def test_bug_117612(self):
       
   217         self.assertEqual(re.findall(r"(a|(b))", "aba"),
       
   218                          [("a", ""),("b", "b"),("a", "")])
       
   219 
       
   220     def test_re_match(self):
       
   221         self.assertEqual(re.match('a', 'a').groups(), ())
       
   222         self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
       
   223         self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
       
   224         self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
       
   225         self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
       
   226 
       
   227         pat = re.compile('((a)|(b))(c)?')
       
   228         self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
       
   229         self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
       
   230         self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
       
   231         self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
       
   232         self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
       
   233 
       
   234         # A single group
       
   235         m = re.match('(a)', 'a')
       
   236         self.assertEqual(m.group(0), 'a')
       
   237         self.assertEqual(m.group(0), 'a')
       
   238         self.assertEqual(m.group(1), 'a')
       
   239         self.assertEqual(m.group(1, 1), ('a', 'a'))
       
   240 
       
   241         pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
       
   242         self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
       
   243         self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
       
   244                          (None, 'b', None))
       
   245         self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
       
   246 
       
   247     def test_re_groupref_exists(self):
       
   248         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
       
   249                          ('(', 'a'))
       
   250         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
       
   251                          (None, 'a'))
       
   252         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
       
   253         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
       
   254         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
       
   255                          ('a', 'b'))
       
   256         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
       
   257                          (None, 'd'))
       
   258         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
       
   259                          (None, 'd'))
       
   260         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
       
   261                          ('a', ''))
       
   262 
       
   263         # Tests for bug #1177831: exercise groups other than the first group
       
   264         p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
       
   265         self.assertEqual(p.match('abc').groups(),
       
   266                          ('a', 'b', 'c'))
       
   267         self.assertEqual(p.match('ad').groups(),
       
   268                          ('a', None, 'd'))
       
   269         self.assertEqual(p.match('abd'), None)
       
   270         self.assertEqual(p.match('ac'), None)
       
   271 
       
   272 
       
   273     def test_re_groupref(self):
       
   274         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
       
   275                          ('|', 'a'))
       
   276         self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
       
   277                          (None, 'a'))
       
   278         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
       
   279         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
       
   280         self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
       
   281                          ('a', 'a'))
       
   282         self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
       
   283                          (None, None))
       
   284 
       
   285     def test_groupdict(self):
       
   286         self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
       
   287                                   'first second').groupdict(),
       
   288                          {'first':'first', 'second':'second'})
       
   289 
       
   290     def test_expand(self):
       
   291         self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
       
   292                                   "first second")
       
   293                                   .expand(r"\2 \1 \g<second> \g<first>"),
       
   294                          "second first second first")
       
   295 
       
   296     def test_repeat_minmax(self):
       
   297         self.assertEqual(re.match("^(\w){1}$", "abc"), None)
       
   298         self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
       
   299         self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
       
   300         self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
       
   301 
       
   302         self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
       
   303         self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
       
   304         self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
       
   305         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
       
   306         self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
       
   307         self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
       
   308         self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
       
   309         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
       
   310 
       
   311         self.assertEqual(re.match("^x{1}$", "xxx"), None)
       
   312         self.assertEqual(re.match("^x{1}?$", "xxx"), None)
       
   313         self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
       
   314         self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
       
   315 
       
   316         self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
       
   317         self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
       
   318         self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
       
   319         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
       
   320         self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
       
   321         self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
       
   322         self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
       
   323         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
       
   324 
       
   325         self.assertEqual(re.match("^x{}$", "xxx"), None)
       
   326         self.assertNotEqual(re.match("^x{}$", "x{}"), None)
       
   327 
       
   328     def test_getattr(self):
       
   329         self.assertEqual(re.match("(a)", "a").pos, 0)
       
   330         self.assertEqual(re.match("(a)", "a").endpos, 1)
       
   331         self.assertEqual(re.match("(a)", "a").string, "a")
       
   332         self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
       
   333         self.assertNotEqual(re.match("(a)", "a").re, None)
       
   334 
       
   335     def test_special_escapes(self):
       
   336         self.assertEqual(re.search(r"\b(b.)\b",
       
   337                                    "abcd abc bcd bx").group(1), "bx")
       
   338         self.assertEqual(re.search(r"\B(b.)\B",
       
   339                                    "abc bcd bc abxd").group(1), "bx")
       
   340         self.assertEqual(re.search(r"\b(b.)\b",
       
   341                                    "abcd abc bcd bx", re.LOCALE).group(1), "bx")
       
   342         self.assertEqual(re.search(r"\B(b.)\B",
       
   343                                    "abc bcd bc abxd", re.LOCALE).group(1), "bx")
       
   344         self.assertEqual(re.search(r"\b(b.)\b",
       
   345                                    "abcd abc bcd bx", re.UNICODE).group(1), "bx")
       
   346         self.assertEqual(re.search(r"\B(b.)\B",
       
   347                                    "abc bcd bc abxd", re.UNICODE).group(1), "bx")
       
   348         self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
       
   349         self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
       
   350         self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
       
   351         self.assertEqual(re.search(r"\b(b.)\b",
       
   352                                    u"abcd abc bcd bx").group(1), "bx")
       
   353         self.assertEqual(re.search(r"\B(b.)\B",
       
   354                                    u"abc bcd bc abxd").group(1), "bx")
       
   355         self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
       
   356         self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
       
   357         self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
       
   358         self.assertEqual(re.search(r"\d\D\w\W\s\S",
       
   359                                    "1aa! a").group(0), "1aa! a")
       
   360         self.assertEqual(re.search(r"\d\D\w\W\s\S",
       
   361                                    "1aa! a", re.LOCALE).group(0), "1aa! a")
       
   362         self.assertEqual(re.search(r"\d\D\w\W\s\S",
       
   363                                    "1aa! a", re.UNICODE).group(0), "1aa! a")
       
   364 
       
   365     def test_ignore_case(self):
       
   366         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
       
   367         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
       
   368 
       
   369     def test_bigcharset(self):
       
   370         self.assertEqual(re.match(u"([\u2222\u2223])",
       
   371                                   u"\u2222").group(1), u"\u2222")
       
   372         self.assertEqual(re.match(u"([\u2222\u2223])",
       
   373                                   u"\u2222", re.UNICODE).group(1), u"\u2222")
       
   374 
       
   375     def test_anyall(self):
       
   376         self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
       
   377                          "a\nb")
       
   378         self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
       
   379                          "a\n\nb")
       
   380 
       
   381     def test_non_consuming(self):
       
   382         self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
       
   383         self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
       
   384         self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
       
   385         self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
       
   386         self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
       
   387         self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
       
   388         self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
       
   389 
       
   390         self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
       
   391         self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
       
   392         self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
       
   393         self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
       
   394 
       
   395     def test_ignore_case(self):
       
   396         self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
       
   397         self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
       
   398         self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
       
   399         self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
       
   400         self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
       
   401         self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
       
   402         self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
       
   403         self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
       
   404 
       
   405     def test_category(self):
       
   406         self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
       
   407 
       
   408     def test_getlower(self):
       
   409         import _sre
       
   410         self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
       
   411         self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
       
   412         self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
       
   413 
       
   414         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
       
   415         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
       
   416 
       
   417     def test_not_literal(self):
       
   418         self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
       
   419         self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
       
   420 
       
   421     def test_search_coverage(self):
       
   422         self.assertEqual(re.search("\s(b)", " b").group(1), "b")
       
   423         self.assertEqual(re.search("a\s", "a ").group(0), "a ")
       
   424 
       
   425     def test_re_escape(self):
       
   426         p=""
       
   427         for i in range(0, 256):
       
   428             p = p + chr(i)
       
   429             self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
       
   430                              True)
       
   431             self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
       
   432 
       
   433         pat=re.compile(re.escape(p))
       
   434         self.assertEqual(pat.match(p) is not None, True)
       
   435         self.assertEqual(pat.match(p).span(), (0,256))
       
   436 
       
   437     def test_pickling(self):
       
   438         import pickle
       
   439         self.pickle_test(pickle)
       
   440         import cPickle
       
   441         self.pickle_test(cPickle)
       
   442         # old pickles expect the _compile() reconstructor in sre module
       
   443         import warnings
       
   444         original_filters = warnings.filters[:]
       
   445         try:
       
   446             warnings.filterwarnings("ignore", "The sre module is deprecated",
       
   447                                     DeprecationWarning)
       
   448             from sre import _compile
       
   449         finally:
       
   450             warnings.filters = original_filters
       
   451 
       
   452     def pickle_test(self, pickle):
       
   453         oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
       
   454         s = pickle.dumps(oldpat)
       
   455         newpat = pickle.loads(s)
       
   456         self.assertEqual(oldpat, newpat)
       
   457 
       
   458     def test_constants(self):
       
   459         self.assertEqual(re.I, re.IGNORECASE)
       
   460         self.assertEqual(re.L, re.LOCALE)
       
   461         self.assertEqual(re.M, re.MULTILINE)
       
   462         self.assertEqual(re.S, re.DOTALL)
       
   463         self.assertEqual(re.X, re.VERBOSE)
       
   464 
       
   465     def test_flags(self):
       
   466         for flag in [re.I, re.M, re.X, re.S, re.L]:
       
   467             self.assertNotEqual(re.compile('^pattern$', flag), None)
       
   468 
       
   469     def test_sre_character_literals(self):
       
   470         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
       
   471             self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
       
   472             self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
       
   473             self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
       
   474             self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
       
   475             self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
       
   476             self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
       
   477         self.assertRaises(re.error, re.match, "\911", "")
       
   478 
       
   479     def test_sre_character_class_literals(self):
       
   480         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
       
   481             self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
       
   482             self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
       
   483             self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
       
   484             self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
       
   485             self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
       
   486             self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
       
   487         self.assertRaises(re.error, re.match, "[\911]", "")
       
   488 
       
   489     def test_bug_113254(self):
       
   490         self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
       
   491         self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
       
   492         self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
       
   493 
       
   494     def test_bug_527371(self):
       
   495         # bug described in patches 527371/672491
       
   496         self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
       
   497         self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
       
   498         self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
       
   499         self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
       
   500         self.assertEqual(re.match("((a))", "a").lastindex, 1)
       
   501 
       
   502     def test_bug_545855(self):
       
   503         # bug 545855 -- This pattern failed to cause a compile error as it
       
   504         # should, instead provoking a TypeError.
       
   505         self.assertRaises(re.error, re.compile, 'foo[a-')
       
   506 
       
   507     def test_bug_418626(self):
       
   508         # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
       
   509         # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
       
   510         # pattern '*?' on a long string.
       
   511         self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
       
   512         self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
       
   513                          20003)
       
   514         self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
       
   515         # non-simple '*?' still used to hit the recursion limit, before the
       
   516         # non-recursive scheme was implemented.
       
   517         self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
       
   518 
       
   519     def test_bug_612074(self):
       
   520         pat=u"["+re.escape(u"\u2039")+u"]"
       
   521         self.assertEqual(re.compile(pat) and 1, 1)
       
   522 
       
   523     def test_stack_overflow(self):
       
   524         # nasty cases that used to overflow the straightforward recursive
       
   525         # implementation of repeated groups.
       
   526         self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
       
   527         self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
       
   528         self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
       
   529 
       
   530     def test_scanner(self):
       
   531         def s_ident(scanner, token): return token
       
   532         def s_operator(scanner, token): return "op%s" % token
       
   533         def s_float(scanner, token): return float(token)
       
   534         def s_int(scanner, token): return int(token)
       
   535 
       
   536         scanner = Scanner([
       
   537             (r"[a-zA-Z_]\w*", s_ident),
       
   538             (r"\d+\.\d*", s_float),
       
   539             (r"\d+", s_int),
       
   540             (r"=|\+|-|\*|/", s_operator),
       
   541             (r"\s+", None),
       
   542             ])
       
   543 
       
   544         self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
       
   545 
       
   546         self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
       
   547                          (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
       
   548                            'op+', 'bar'], ''))
       
   549 
       
   550     def test_bug_448951(self):
       
   551         # bug 448951 (similar to 429357, but with single char match)
       
   552         # (Also test greedy matches.)
       
   553         for op in '','?','*':
       
   554             self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
       
   555                              (None, None))
       
   556             self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
       
   557                              ('a:', 'a'))
       
   558 
       
   559     def test_bug_725106(self):
       
   560         # capturing groups in alternatives in repeats
       
   561         self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
       
   562                          ('b', 'a'))
       
   563         self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
       
   564                          ('c', 'b'))
       
   565         self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
       
   566                          ('b', None))
       
   567         self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
       
   568                          ('b', None))
       
   569         self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
       
   570                          ('b', 'a'))
       
   571         self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
       
   572                          ('c', 'b'))
       
   573         self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
       
   574                          ('b', None))
       
   575         self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
       
   576                          ('b', None))
       
   577 
       
   578     def test_bug_725149(self):
       
   579         # mark_stack_base restoring before restoring marks
       
   580         self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
       
   581                          ('a', None))
       
   582         self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
       
   583                          ('a', None, None))
       
   584 
       
   585     def test_bug_764548(self):
       
   586         # bug 764548, re.compile() barfs on str/unicode subclasses
       
   587         try:
       
   588             unicode
       
   589         except NameError:
       
   590             return  # no problem if we have no unicode
       
   591         class my_unicode(unicode): pass
       
   592         pat = re.compile(my_unicode("abc"))
       
   593         self.assertEqual(pat.match("xyz"), None)
       
   594 
       
   595     def test_finditer(self):
       
   596         iter = re.finditer(r":+", "a:b::c:::d")
       
   597         self.assertEqual([item.group(0) for item in iter],
       
   598                          [":", "::", ":::"])
       
   599 
       
   600     def test_bug_926075(self):
       
   601         try:
       
   602             unicode
       
   603         except NameError:
       
   604             return # no problem if we have no unicode
       
   605         self.assert_(re.compile('bug_926075') is not
       
   606                      re.compile(eval("u'bug_926075'")))
       
   607 
       
   608     def test_bug_931848(self):
       
   609         try:
       
   610             unicode
       
   611         except NameError:
       
   612             pass
       
   613         pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
       
   614         self.assertEqual(re.compile(pattern).split("a.b.c"),
       
   615                          ['a','b','c'])
       
   616 
       
   617     def test_bug_581080(self):
       
   618         iter = re.finditer(r"\s", "a b")
       
   619         self.assertEqual(iter.next().span(), (1,2))
       
   620         self.assertRaises(StopIteration, iter.next)
       
   621 
       
   622         scanner = re.compile(r"\s").scanner("a b")
       
   623         self.assertEqual(scanner.search().span(), (1, 2))
       
   624         self.assertEqual(scanner.search(), None)
       
   625 
       
   626     def test_bug_817234(self):
       
   627         iter = re.finditer(r".*", "asdf")
       
   628         self.assertEqual(iter.next().span(), (0, 4))
       
   629         self.assertEqual(iter.next().span(), (4, 4))
       
   630         self.assertRaises(StopIteration, iter.next)
       
   631 
       
   632     def test_empty_array(self):
       
   633         # SF buf 1647541
       
   634         import array
       
   635         for typecode in 'cbBuhHiIlLfd':
       
   636             a = array.array(typecode)
       
   637             self.assertEqual(re.compile("bla").match(a), None)
       
   638             self.assertEqual(re.compile("").match(a).groups(), ())
       
   639 
       
   640     def test_inline_flags(self):
       
   641         # Bug #1700
       
   642         upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
       
   643         lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
       
   644 
       
   645         p = re.compile(upper_char, re.I | re.U)
       
   646         q = p.match(lower_char)
       
   647         self.assertNotEqual(q, None)
       
   648 
       
   649         p = re.compile(lower_char, re.I | re.U)
       
   650         q = p.match(upper_char)
       
   651         self.assertNotEqual(q, None)
       
   652 
       
   653         p = re.compile('(?i)' + upper_char, re.U)
       
   654         q = p.match(lower_char)
       
   655         self.assertNotEqual(q, None)
       
   656 
       
   657         p = re.compile('(?i)' + lower_char, re.U)
       
   658         q = p.match(upper_char)
       
   659         self.assertNotEqual(q, None)
       
   660 
       
   661         p = re.compile('(?iu)' + upper_char)
       
   662         q = p.match(lower_char)
       
   663         self.assertNotEqual(q, None)
       
   664 
       
   665         p = re.compile('(?iu)' + lower_char)
       
   666         q = p.match(upper_char)
       
   667         self.assertNotEqual(q, None)
       
   668 
       
   669 
       
   670 def run_re_tests():
       
   671     from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
       
   672     if verbose:
       
   673         print 'Running re_tests test suite'
       
   674     else:
       
   675         # To save time, only run the first and last 10 tests
       
   676         #tests = tests[:10] + tests[-10:]
       
   677         pass
       
   678 
       
   679     for t in tests:
       
   680         sys.stdout.flush()
       
   681         pattern = s = outcome = repl = expected = None
       
   682         if len(t) == 5:
       
   683             pattern, s, outcome, repl, expected = t
       
   684         elif len(t) == 3:
       
   685             pattern, s, outcome = t
       
   686         else:
       
   687             raise ValueError, ('Test tuples should have 3 or 5 fields', t)
       
   688 
       
   689         try:
       
   690             obj = re.compile(pattern)
       
   691         except re.error:
       
   692             if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
       
   693             else:
       
   694                 print '=== Syntax error:', t
       
   695         except KeyboardInterrupt: raise KeyboardInterrupt
       
   696         except:
       
   697             print '*** Unexpected error ***', t
       
   698             if verbose:
       
   699                 traceback.print_exc(file=sys.stdout)
       
   700         else:
       
   701             try:
       
   702                 result = obj.search(s)
       
   703             except re.error, msg:
       
   704                 print '=== Unexpected exception', t, repr(msg)
       
   705             if outcome == SYNTAX_ERROR:
       
   706                 # This should have been a syntax error; forget it.
       
   707                 pass
       
   708             elif outcome == FAIL:
       
   709                 if result is None: pass   # No match, as expected
       
   710                 else: print '=== Succeeded incorrectly', t
       
   711             elif outcome == SUCCEED:
       
   712                 if result is not None:
       
   713                     # Matched, as expected, so now we compute the
       
   714                     # result string and compare it to our expected result.
       
   715                     start, end = result.span(0)
       
   716                     vardict={'found': result.group(0),
       
   717                              'groups': result.group(),
       
   718                              'flags': result.re.flags}
       
   719                     for i in range(1, 100):
       
   720                         try:
       
   721                             gi = result.group(i)
       
   722                             # Special hack because else the string concat fails:
       
   723                             if gi is None:
       
   724                                 gi = "None"
       
   725                         except IndexError:
       
   726                             gi = "Error"
       
   727                         vardict['g%d' % i] = gi
       
   728                     for i in result.re.groupindex.keys():
       
   729                         try:
       
   730                             gi = result.group(i)
       
   731                             if gi is None:
       
   732                                 gi = "None"
       
   733                         except IndexError:
       
   734                             gi = "Error"
       
   735                         vardict[i] = gi
       
   736                     repl = eval(repl, vardict)
       
   737                     if repl != expected:
       
   738                         print '=== grouping error', t,
       
   739                         print repr(repl) + ' should be ' + repr(expected)
       
   740                 else:
       
   741                     print '=== Failed incorrectly', t
       
   742 
       
   743                 # Try the match on a unicode string, and check that it
       
   744                 # still succeeds.
       
   745                 try:
       
   746                     result = obj.search(unicode(s, "latin-1"))
       
   747                     if result is None:
       
   748                         print '=== Fails on unicode match', t
       
   749                 except NameError:
       
   750                     continue # 1.5.2
       
   751                 except TypeError:
       
   752                     continue # unicode test case
       
   753 
       
   754                 # Try the match on a unicode pattern, and check that it
       
   755                 # still succeeds.
       
   756                 obj=re.compile(unicode(pattern, "latin-1"))
       
   757                 result = obj.search(s)
       
   758                 if result is None:
       
   759                     print '=== Fails on unicode pattern match', t
       
   760 
       
   761                 # Try the match with the search area limited to the extent
       
   762                 # of the match and see if it still succeeds.  \B will
       
   763                 # break (because it won't match at the end or start of a
       
   764                 # string), so we'll ignore patterns that feature it.
       
   765 
       
   766                 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
       
   767                                and result is not None:
       
   768                     obj = re.compile(pattern)
       
   769                     result = obj.search(s, result.start(0), result.end(0) + 1)
       
   770                     if result is None:
       
   771                         print '=== Failed on range-limited match', t
       
   772 
       
   773                 # Try the match with IGNORECASE enabled, and check that it
       
   774                 # still succeeds.
       
   775                 obj = re.compile(pattern, re.IGNORECASE)
       
   776                 result = obj.search(s)
       
   777                 if result is None:
       
   778                     print '=== Fails on case-insensitive match', t
       
   779 
       
   780                 # Try the match with LOCALE enabled, and check that it
       
   781                 # still succeeds.
       
   782                 obj = re.compile(pattern, re.LOCALE)
       
   783                 result = obj.search(s)
       
   784                 if result is None:
       
   785                     print '=== Fails on locale-sensitive match', t
       
   786 
       
   787                 # Try the match with UNICODE locale enabled, and check
       
   788                 # that it still succeeds.
       
   789                 obj = re.compile(pattern, re.UNICODE)
       
   790                 result = obj.search(s)
       
   791                 if result is None:
       
   792                     print '=== Fails on unicode-sensitive match', t
       
   793 
       
   794 def test_main():
       
   795     run_unittest(ReTests)
       
   796     run_re_tests()
       
   797 
       
   798 if __name__ == "__main__":
       
   799     test_main()