|
1 """A collection of string operations (most are no longer used). |
|
2 |
|
3 Warning: most of the code you see here isn't normally used nowadays. |
|
4 Beginning with Python 1.6, many of these functions are implemented as |
|
5 methods on the standard string object. They used to be implemented by |
|
6 a built-in module called strop, but strop is now obsolete itself. |
|
7 |
|
8 Public module variables: |
|
9 |
|
10 whitespace -- a string containing all characters considered whitespace |
|
11 lowercase -- a string containing all characters considered lowercase letters |
|
12 uppercase -- a string containing all characters considered uppercase letters |
|
13 letters -- a string containing all characters considered letters |
|
14 digits -- a string containing all characters considered decimal digits |
|
15 hexdigits -- a string containing all characters considered hexadecimal digits |
|
16 octdigits -- a string containing all characters considered octal digits |
|
17 punctuation -- a string containing all characters considered punctuation |
|
18 printable -- a string containing all characters considered printable |
|
19 |
|
20 """ |
|
21 |
|
22 # Some strings for ctype-style character classification |
|
23 whitespace = ' \t\n\r\v\f' |
|
24 lowercase = 'abcdefghijklmnopqrstuvwxyz' |
|
25 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' |
|
26 letters = lowercase + uppercase |
|
27 ascii_lowercase = lowercase |
|
28 ascii_uppercase = uppercase |
|
29 ascii_letters = ascii_lowercase + ascii_uppercase |
|
30 digits = '0123456789' |
|
31 hexdigits = digits + 'abcdef' + 'ABCDEF' |
|
32 octdigits = '01234567' |
|
33 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" |
|
34 printable = digits + letters + punctuation + whitespace |
|
35 |
|
36 # Case conversion helpers |
|
37 # Use str to convert Unicode literal in case of -U |
|
38 l = map(chr, xrange(256)) |
|
39 _idmap = str('').join(l) |
|
40 del l |
|
41 |
|
42 # Functions which aren't available as string methods. |
|
43 |
|
44 # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". |
|
45 def capwords(s, sep=None): |
|
46 """capwords(s, [sep]) -> string |
|
47 |
|
48 Split the argument into words using split, capitalize each |
|
49 word using capitalize, and join the capitalized words using |
|
50 join. Note that this replaces runs of whitespace characters by |
|
51 a single space. |
|
52 |
|
53 """ |
|
54 return (sep or ' ').join([x.capitalize() for x in s.split(sep)]) |
|
55 |
|
56 |
|
57 # Construct a translation string |
|
58 _idmapL = None |
|
59 def maketrans(fromstr, tostr): |
|
60 """maketrans(frm, to) -> string |
|
61 |
|
62 Return a translation table (a string of 256 bytes long) |
|
63 suitable for use in string.translate. The strings frm and to |
|
64 must be of the same length. |
|
65 |
|
66 """ |
|
67 if len(fromstr) != len(tostr): |
|
68 raise ValueError, "maketrans arguments must have same length" |
|
69 global _idmapL |
|
70 if not _idmapL: |
|
71 _idmapL = list(_idmap) |
|
72 L = _idmapL[:] |
|
73 fromstr = map(ord, fromstr) |
|
74 for i in range(len(fromstr)): |
|
75 L[fromstr[i]] = tostr[i] |
|
76 return ''.join(L) |
|
77 |
|
78 |
|
79 |
|
80 #################################################################### |
|
81 import re as _re |
|
82 |
|
83 class _multimap: |
|
84 """Helper class for combining multiple mappings. |
|
85 |
|
86 Used by .{safe_,}substitute() to combine the mapping and keyword |
|
87 arguments. |
|
88 """ |
|
89 def __init__(self, primary, secondary): |
|
90 self._primary = primary |
|
91 self._secondary = secondary |
|
92 |
|
93 def __getitem__(self, key): |
|
94 try: |
|
95 return self._primary[key] |
|
96 except KeyError: |
|
97 return self._secondary[key] |
|
98 |
|
99 |
|
100 class _TemplateMetaclass(type): |
|
101 pattern = r""" |
|
102 %(delim)s(?: |
|
103 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters |
|
104 (?P<named>%(id)s) | # delimiter and a Python identifier |
|
105 {(?P<braced>%(id)s)} | # delimiter and a braced identifier |
|
106 (?P<invalid>) # Other ill-formed delimiter exprs |
|
107 ) |
|
108 """ |
|
109 |
|
110 def __init__(cls, name, bases, dct): |
|
111 super(_TemplateMetaclass, cls).__init__(name, bases, dct) |
|
112 if 'pattern' in dct: |
|
113 pattern = cls.pattern |
|
114 else: |
|
115 pattern = _TemplateMetaclass.pattern % { |
|
116 'delim' : _re.escape(cls.delimiter), |
|
117 'id' : cls.idpattern, |
|
118 } |
|
119 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE) |
|
120 |
|
121 |
|
122 class Template: |
|
123 """A string class for supporting $-substitutions.""" |
|
124 __metaclass__ = _TemplateMetaclass |
|
125 |
|
126 delimiter = '$' |
|
127 idpattern = r'[_a-z][_a-z0-9]*' |
|
128 |
|
129 def __init__(self, template): |
|
130 self.template = template |
|
131 |
|
132 # Search for $$, $identifier, ${identifier}, and any bare $'s |
|
133 |
|
134 def _invalid(self, mo): |
|
135 i = mo.start('invalid') |
|
136 lines = self.template[:i].splitlines(True) |
|
137 if not lines: |
|
138 colno = 1 |
|
139 lineno = 1 |
|
140 else: |
|
141 colno = i - len(''.join(lines[:-1])) |
|
142 lineno = len(lines) |
|
143 raise ValueError('Invalid placeholder in string: line %d, col %d' % |
|
144 (lineno, colno)) |
|
145 |
|
146 def substitute(self, *args, **kws): |
|
147 if len(args) > 1: |
|
148 raise TypeError('Too many positional arguments') |
|
149 if not args: |
|
150 mapping = kws |
|
151 elif kws: |
|
152 mapping = _multimap(kws, args[0]) |
|
153 else: |
|
154 mapping = args[0] |
|
155 # Helper function for .sub() |
|
156 def convert(mo): |
|
157 # Check the most common path first. |
|
158 named = mo.group('named') or mo.group('braced') |
|
159 if named is not None: |
|
160 val = mapping[named] |
|
161 # We use this idiom instead of str() because the latter will |
|
162 # fail if val is a Unicode containing non-ASCII characters. |
|
163 return '%s' % (val,) |
|
164 if mo.group('escaped') is not None: |
|
165 return self.delimiter |
|
166 if mo.group('invalid') is not None: |
|
167 self._invalid(mo) |
|
168 raise ValueError('Unrecognized named group in pattern', |
|
169 self.pattern) |
|
170 return self.pattern.sub(convert, self.template) |
|
171 |
|
172 def safe_substitute(self, *args, **kws): |
|
173 if len(args) > 1: |
|
174 raise TypeError('Too many positional arguments') |
|
175 if not args: |
|
176 mapping = kws |
|
177 elif kws: |
|
178 mapping = _multimap(kws, args[0]) |
|
179 else: |
|
180 mapping = args[0] |
|
181 # Helper function for .sub() |
|
182 def convert(mo): |
|
183 named = mo.group('named') |
|
184 if named is not None: |
|
185 try: |
|
186 # We use this idiom instead of str() because the latter |
|
187 # will fail if val is a Unicode containing non-ASCII |
|
188 return '%s' % (mapping[named],) |
|
189 except KeyError: |
|
190 return self.delimiter + named |
|
191 braced = mo.group('braced') |
|
192 if braced is not None: |
|
193 try: |
|
194 return '%s' % (mapping[braced],) |
|
195 except KeyError: |
|
196 return self.delimiter + '{' + braced + '}' |
|
197 if mo.group('escaped') is not None: |
|
198 return self.delimiter |
|
199 if mo.group('invalid') is not None: |
|
200 return self.delimiter |
|
201 raise ValueError('Unrecognized named group in pattern', |
|
202 self.pattern) |
|
203 return self.pattern.sub(convert, self.template) |
|
204 |
|
205 |
|
206 |
|
207 #################################################################### |
|
208 # NOTE: Everything below here is deprecated. Use string methods instead. |
|
209 # This stuff will go away in Python 3.0. |
|
210 |
|
211 # Backward compatible names for exceptions |
|
212 index_error = ValueError |
|
213 atoi_error = ValueError |
|
214 atof_error = ValueError |
|
215 atol_error = ValueError |
|
216 |
|
217 # convert UPPER CASE letters to lower case |
|
218 def lower(s): |
|
219 """lower(s) -> string |
|
220 |
|
221 Return a copy of the string s converted to lowercase. |
|
222 |
|
223 """ |
|
224 return s.lower() |
|
225 |
|
226 # Convert lower case letters to UPPER CASE |
|
227 def upper(s): |
|
228 """upper(s) -> string |
|
229 |
|
230 Return a copy of the string s converted to uppercase. |
|
231 |
|
232 """ |
|
233 return s.upper() |
|
234 |
|
235 # Swap lower case letters and UPPER CASE |
|
236 def swapcase(s): |
|
237 """swapcase(s) -> string |
|
238 |
|
239 Return a copy of the string s with upper case characters |
|
240 converted to lowercase and vice versa. |
|
241 |
|
242 """ |
|
243 return s.swapcase() |
|
244 |
|
245 # Strip leading and trailing tabs and spaces |
|
246 def strip(s, chars=None): |
|
247 """strip(s [,chars]) -> string |
|
248 |
|
249 Return a copy of the string s with leading and trailing |
|
250 whitespace removed. |
|
251 If chars is given and not None, remove characters in chars instead. |
|
252 If chars is unicode, S will be converted to unicode before stripping. |
|
253 |
|
254 """ |
|
255 return s.strip(chars) |
|
256 |
|
257 # Strip leading tabs and spaces |
|
258 def lstrip(s, chars=None): |
|
259 """lstrip(s [,chars]) -> string |
|
260 |
|
261 Return a copy of the string s with leading whitespace removed. |
|
262 If chars is given and not None, remove characters in chars instead. |
|
263 |
|
264 """ |
|
265 return s.lstrip(chars) |
|
266 |
|
267 # Strip trailing tabs and spaces |
|
268 def rstrip(s, chars=None): |
|
269 """rstrip(s [,chars]) -> string |
|
270 |
|
271 Return a copy of the string s with trailing whitespace removed. |
|
272 If chars is given and not None, remove characters in chars instead. |
|
273 |
|
274 """ |
|
275 return s.rstrip(chars) |
|
276 |
|
277 |
|
278 # Split a string into a list of space/tab-separated words |
|
279 def split(s, sep=None, maxsplit=-1): |
|
280 """split(s [,sep [,maxsplit]]) -> list of strings |
|
281 |
|
282 Return a list of the words in the string s, using sep as the |
|
283 delimiter string. If maxsplit is given, splits at no more than |
|
284 maxsplit places (resulting in at most maxsplit+1 words). If sep |
|
285 is not specified or is None, any whitespace string is a separator. |
|
286 |
|
287 (split and splitfields are synonymous) |
|
288 |
|
289 """ |
|
290 return s.split(sep, maxsplit) |
|
291 splitfields = split |
|
292 |
|
293 # Split a string into a list of space/tab-separated words |
|
294 def rsplit(s, sep=None, maxsplit=-1): |
|
295 """rsplit(s [,sep [,maxsplit]]) -> list of strings |
|
296 |
|
297 Return a list of the words in the string s, using sep as the |
|
298 delimiter string, starting at the end of the string and working |
|
299 to the front. If maxsplit is given, at most maxsplit splits are |
|
300 done. If sep is not specified or is None, any whitespace string |
|
301 is a separator. |
|
302 """ |
|
303 return s.rsplit(sep, maxsplit) |
|
304 |
|
305 # Join fields with optional separator |
|
306 def join(words, sep = ' '): |
|
307 """join(list [,sep]) -> string |
|
308 |
|
309 Return a string composed of the words in list, with |
|
310 intervening occurrences of sep. The default separator is a |
|
311 single space. |
|
312 |
|
313 (joinfields and join are synonymous) |
|
314 |
|
315 """ |
|
316 return sep.join(words) |
|
317 joinfields = join |
|
318 |
|
319 # Find substring, raise exception if not found |
|
320 def index(s, *args): |
|
321 """index(s, sub [,start [,end]]) -> int |
|
322 |
|
323 Like find but raises ValueError when the substring is not found. |
|
324 |
|
325 """ |
|
326 return s.index(*args) |
|
327 |
|
328 # Find last substring, raise exception if not found |
|
329 def rindex(s, *args): |
|
330 """rindex(s, sub [,start [,end]]) -> int |
|
331 |
|
332 Like rfind but raises ValueError when the substring is not found. |
|
333 |
|
334 """ |
|
335 return s.rindex(*args) |
|
336 |
|
337 # Count non-overlapping occurrences of substring |
|
338 def count(s, *args): |
|
339 """count(s, sub[, start[,end]]) -> int |
|
340 |
|
341 Return the number of occurrences of substring sub in string |
|
342 s[start:end]. Optional arguments start and end are |
|
343 interpreted as in slice notation. |
|
344 |
|
345 """ |
|
346 return s.count(*args) |
|
347 |
|
348 # Find substring, return -1 if not found |
|
349 def find(s, *args): |
|
350 """find(s, sub [,start [,end]]) -> in |
|
351 |
|
352 Return the lowest index in s where substring sub is found, |
|
353 such that sub is contained within s[start,end]. Optional |
|
354 arguments start and end are interpreted as in slice notation. |
|
355 |
|
356 Return -1 on failure. |
|
357 |
|
358 """ |
|
359 return s.find(*args) |
|
360 |
|
361 # Find last substring, return -1 if not found |
|
362 def rfind(s, *args): |
|
363 """rfind(s, sub [,start [,end]]) -> int |
|
364 |
|
365 Return the highest index in s where substring sub is found, |
|
366 such that sub is contained within s[start,end]. Optional |
|
367 arguments start and end are interpreted as in slice notation. |
|
368 |
|
369 Return -1 on failure. |
|
370 |
|
371 """ |
|
372 return s.rfind(*args) |
|
373 |
|
374 # for a bit of speed |
|
375 _float = float |
|
376 _int = int |
|
377 _long = long |
|
378 |
|
379 # Convert string to float |
|
380 def atof(s): |
|
381 """atof(s) -> float |
|
382 |
|
383 Return the floating point number represented by the string s. |
|
384 |
|
385 """ |
|
386 return _float(s) |
|
387 |
|
388 |
|
389 # Convert string to integer |
|
390 def atoi(s , base=10): |
|
391 """atoi(s [,base]) -> int |
|
392 |
|
393 Return the integer represented by the string s in the given |
|
394 base, which defaults to 10. The string s must consist of one |
|
395 or more digits, possibly preceded by a sign. If base is 0, it |
|
396 is chosen from the leading characters of s, 0 for octal, 0x or |
|
397 0X for hexadecimal. If base is 16, a preceding 0x or 0X is |
|
398 accepted. |
|
399 |
|
400 """ |
|
401 return _int(s, base) |
|
402 |
|
403 |
|
404 # Convert string to long integer |
|
405 def atol(s, base=10): |
|
406 """atol(s [,base]) -> long |
|
407 |
|
408 Return the long integer represented by the string s in the |
|
409 given base, which defaults to 10. The string s must consist |
|
410 of one or more digits, possibly preceded by a sign. If base |
|
411 is 0, it is chosen from the leading characters of s, 0 for |
|
412 octal, 0x or 0X for hexadecimal. If base is 16, a preceding |
|
413 0x or 0X is accepted. A trailing L or l is not accepted, |
|
414 unless base is 0. |
|
415 |
|
416 """ |
|
417 return _long(s, base) |
|
418 |
|
419 |
|
420 # Left-justify a string |
|
421 def ljust(s, width, *args): |
|
422 """ljust(s, width[, fillchar]) -> string |
|
423 |
|
424 Return a left-justified version of s, in a field of the |
|
425 specified width, padded with spaces as needed. The string is |
|
426 never truncated. If specified the fillchar is used instead of spaces. |
|
427 |
|
428 """ |
|
429 return s.ljust(width, *args) |
|
430 |
|
431 # Right-justify a string |
|
432 def rjust(s, width, *args): |
|
433 """rjust(s, width[, fillchar]) -> string |
|
434 |
|
435 Return a right-justified version of s, in a field of the |
|
436 specified width, padded with spaces as needed. The string is |
|
437 never truncated. If specified the fillchar is used instead of spaces. |
|
438 |
|
439 """ |
|
440 return s.rjust(width, *args) |
|
441 |
|
442 # Center a string |
|
443 def center(s, width, *args): |
|
444 """center(s, width[, fillchar]) -> string |
|
445 |
|
446 Return a center version of s, in a field of the specified |
|
447 width. padded with spaces as needed. The string is never |
|
448 truncated. If specified the fillchar is used instead of spaces. |
|
449 |
|
450 """ |
|
451 return s.center(width, *args) |
|
452 |
|
453 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' |
|
454 # Decadent feature: the argument may be a string or a number |
|
455 # (Use of this is deprecated; it should be a string as with ljust c.s.) |
|
456 def zfill(x, width): |
|
457 """zfill(x, width) -> string |
|
458 |
|
459 Pad a numeric string x with zeros on the left, to fill a field |
|
460 of the specified width. The string x is never truncated. |
|
461 |
|
462 """ |
|
463 if not isinstance(x, basestring): |
|
464 x = repr(x) |
|
465 return x.zfill(width) |
|
466 |
|
467 # Expand tabs in a string. |
|
468 # Doesn't take non-printing chars into account, but does understand \n. |
|
469 def expandtabs(s, tabsize=8): |
|
470 """expandtabs(s [,tabsize]) -> string |
|
471 |
|
472 Return a copy of the string s with all tab characters replaced |
|
473 by the appropriate number of spaces, depending on the current |
|
474 column, and the tabsize (default 8). |
|
475 |
|
476 """ |
|
477 return s.expandtabs(tabsize) |
|
478 |
|
479 # Character translation through look-up table. |
|
480 def translate(s, table, deletions=""): |
|
481 """translate(s,table [,deletions]) -> string |
|
482 |
|
483 Return a copy of the string s, where all characters occurring |
|
484 in the optional argument deletions are removed, and the |
|
485 remaining characters have been mapped through the given |
|
486 translation table, which must be a string of length 256. The |
|
487 deletions argument is not allowed for Unicode strings. |
|
488 |
|
489 """ |
|
490 if deletions or table is None: |
|
491 return s.translate(table, deletions) |
|
492 else: |
|
493 # Add s[:0] so that if s is Unicode and table is an 8-bit string, |
|
494 # table is converted to Unicode. This means that table *cannot* |
|
495 # be a dictionary -- for that feature, use u.translate() directly. |
|
496 return s.translate(table + s[:0]) |
|
497 |
|
498 # Capitalize a string, e.g. "aBc dEf" -> "Abc def". |
|
499 def capitalize(s): |
|
500 """capitalize(s) -> string |
|
501 |
|
502 Return a copy of the string s with only its first character |
|
503 capitalized. |
|
504 |
|
505 """ |
|
506 return s.capitalize() |
|
507 |
|
508 # Substring replacement (global) |
|
509 def replace(s, old, new, maxsplit=-1): |
|
510 """replace (str, old, new[, maxsplit]) -> string |
|
511 |
|
512 Return a copy of string str with all occurrences of substring |
|
513 old replaced by new. If the optional argument maxsplit is |
|
514 given, only the first maxsplit occurrences are replaced. |
|
515 |
|
516 """ |
|
517 return s.replace(old, new, maxsplit) |
|
518 |
|
519 |
|
520 # Try importing optional built-in module "strop" -- if it exists, |
|
521 # it redefines some string operations that are 100-1000 times faster. |
|
522 # It also defines values for whitespace, lowercase and uppercase |
|
523 # that match <ctype.h>'s definitions. |
|
524 |
|
525 try: |
|
526 from strop import maketrans, lowercase, uppercase, whitespace |
|
527 letters = lowercase + uppercase |
|
528 except ImportError: |
|
529 pass # Use the original versions |
|
530 |
|
531 ######################################################################## |
|
532 # the Formatter class |
|
533 # see PEP 3101 for details and purpose of this class |
|
534 |
|
535 # The hard parts are reused from the C implementation. They're |
|
536 # exposed here via the sys module. sys was chosen because it's always |
|
537 # available and doesn't have to be dynamically loaded. |
|
538 |
|
539 # The overall parser is implemented in str._formatter_parser. |
|
540 # The field name parser is implemented in str._formatter_field_name_split |
|
541 |
|
542 class Formatter(object): |
|
543 def format(self, format_string, *args, **kwargs): |
|
544 return self.vformat(format_string, args, kwargs) |
|
545 |
|
546 def vformat(self, format_string, args, kwargs): |
|
547 used_args = set() |
|
548 result = self._vformat(format_string, args, kwargs, used_args, 2) |
|
549 self.check_unused_args(used_args, args, kwargs) |
|
550 return result |
|
551 |
|
552 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth): |
|
553 if recursion_depth < 0: |
|
554 raise ValueError('Max string recursion exceeded') |
|
555 result = [] |
|
556 for literal_text, field_name, format_spec, conversion in \ |
|
557 self.parse(format_string): |
|
558 |
|
559 # output the literal text |
|
560 if literal_text: |
|
561 result.append(literal_text) |
|
562 |
|
563 # if there's a field, output it |
|
564 if field_name is not None: |
|
565 # this is some markup, find the object and do |
|
566 # the formatting |
|
567 |
|
568 # given the field_name, find the object it references |
|
569 # and the argument it came from |
|
570 obj, arg_used = self.get_field(field_name, args, kwargs) |
|
571 used_args.add(arg_used) |
|
572 |
|
573 # do any conversion on the resulting object |
|
574 obj = self.convert_field(obj, conversion) |
|
575 |
|
576 # expand the format spec, if needed |
|
577 format_spec = self._vformat(format_spec, args, kwargs, |
|
578 used_args, recursion_depth-1) |
|
579 |
|
580 # format the object and append to the result |
|
581 result.append(self.format_field(obj, format_spec)) |
|
582 |
|
583 return ''.join(result) |
|
584 |
|
585 |
|
586 def get_value(self, key, args, kwargs): |
|
587 if isinstance(key, (int, long)): |
|
588 return args[key] |
|
589 else: |
|
590 return kwargs[key] |
|
591 |
|
592 |
|
593 def check_unused_args(self, used_args, args, kwargs): |
|
594 pass |
|
595 |
|
596 |
|
597 def format_field(self, value, format_spec): |
|
598 return format(value, format_spec) |
|
599 |
|
600 |
|
601 def convert_field(self, value, conversion): |
|
602 # do any conversion on the resulting object |
|
603 if conversion == 'r': |
|
604 return repr(value) |
|
605 elif conversion == 's': |
|
606 return str(value) |
|
607 elif conversion is None: |
|
608 return value |
|
609 raise ValueError("Unknown converion specifier {0!s}".format(conversion)) |
|
610 |
|
611 |
|
612 # returns an iterable that contains tuples of the form: |
|
613 # (literal_text, field_name, format_spec, conversion) |
|
614 # literal_text can be zero length |
|
615 # field_name can be None, in which case there's no |
|
616 # object to format and output |
|
617 # if field_name is not None, it is looked up, formatted |
|
618 # with format_spec and conversion and then used |
|
619 def parse(self, format_string): |
|
620 return format_string._formatter_parser() |
|
621 |
|
622 |
|
623 # given a field_name, find the object it references. |
|
624 # field_name: the field being looked up, e.g. "0.name" |
|
625 # or "lookup[3]" |
|
626 # used_args: a set of which args have been used |
|
627 # args, kwargs: as passed in to vformat |
|
628 def get_field(self, field_name, args, kwargs): |
|
629 first, rest = field_name._formatter_field_name_split() |
|
630 |
|
631 obj = self.get_value(first, args, kwargs) |
|
632 |
|
633 # loop through the rest of the field_name, doing |
|
634 # getattr or getitem as needed |
|
635 for is_attr, i in rest: |
|
636 if is_attr: |
|
637 obj = getattr(obj, i) |
|
638 else: |
|
639 obj = obj[i] |
|
640 |
|
641 return obj, first |