symbian-qemu-0.9.1-12/python-2.6.1/Lib/gettext.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 """Internationalization and localization support.
       
     2 
       
     3 This module provides internationalization (I18N) and localization (L10N)
       
     4 support for your Python programs by providing an interface to the GNU gettext
       
     5 message catalog library.
       
     6 
       
     7 I18N refers to the operation by which a program is made aware of multiple
       
     8 languages.  L10N refers to the adaptation of your program, once
       
     9 internationalized, to the local language and cultural habits.
       
    10 
       
    11 """
       
    12 
       
    13 # This module represents the integration of work, contributions, feedback, and
       
    14 # suggestions from the following people:
       
    15 #
       
    16 # Martin von Loewis, who wrote the initial implementation of the underlying
       
    17 # C-based libintlmodule (later renamed _gettext), along with a skeletal
       
    18 # gettext.py implementation.
       
    19 #
       
    20 # Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
       
    21 # which also included a pure-Python implementation to read .mo files if
       
    22 # intlmodule wasn't available.
       
    23 #
       
    24 # James Henstridge, who also wrote a gettext.py module, which has some
       
    25 # interesting, but currently unsupported experimental features: the notion of
       
    26 # a Catalog class and instances, and the ability to add to a catalog file via
       
    27 # a Python API.
       
    28 #
       
    29 # Barry Warsaw integrated these modules, wrote the .install() API and code,
       
    30 # and conformed all C and Python code to Python's coding standards.
       
    31 #
       
    32 # Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
       
    33 # module.
       
    34 #
       
    35 # J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs.
       
    36 #
       
    37 # TODO:
       
    38 # - Lazy loading of .mo files.  Currently the entire catalog is loaded into
       
    39 #   memory, but that's probably bad for large translated programs.  Instead,
       
    40 #   the lexical sort of original strings in GNU .mo files should be exploited
       
    41 #   to do binary searches and lazy initializations.  Or you might want to use
       
    42 #   the undocumented double-hash algorithm for .mo files with hash tables, but
       
    43 #   you'll need to study the GNU gettext code to do this.
       
    44 #
       
    45 # - Support Solaris .mo file formats.  Unfortunately, we've been unable to
       
    46 #   find this format documented anywhere.
       
    47 
       
    48 
       
    49 import locale, copy, os, re, struct, sys
       
    50 from errno import ENOENT
       
    51 
       
    52 
       
    53 __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
       
    54            'find', 'translation', 'install', 'textdomain', 'bindtextdomain',
       
    55            'dgettext', 'dngettext', 'gettext', 'ngettext',
       
    56            ]
       
    57 
       
    58 _default_localedir = os.path.join(sys.prefix, 'share', 'locale')
       
    59 
       
    60 
       
    61 def test(condition, true, false):
       
    62     """
       
    63     Implements the C expression:
       
    64 
       
    65       condition ? true : false
       
    66 
       
    67     Required to correctly interpret plural forms.
       
    68     """
       
    69     if condition:
       
    70         return true
       
    71     else:
       
    72         return false
       
    73 
       
    74 
       
    75 def c2py(plural):
       
    76     """Gets a C expression as used in PO files for plural forms and returns a
       
    77     Python lambda function that implements an equivalent expression.
       
    78     """
       
    79     # Security check, allow only the "n" identifier
       
    80     try:
       
    81         from cStringIO import StringIO
       
    82     except ImportError:
       
    83         from StringIO import StringIO
       
    84     import token, tokenize
       
    85     tokens = tokenize.generate_tokens(StringIO(plural).readline)
       
    86     try:
       
    87         danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
       
    88     except tokenize.TokenError:
       
    89         raise ValueError, \
       
    90               'plural forms expression error, maybe unbalanced parenthesis'
       
    91     else:
       
    92         if danger:
       
    93             raise ValueError, 'plural forms expression could be dangerous'
       
    94 
       
    95     # Replace some C operators by their Python equivalents
       
    96     plural = plural.replace('&&', ' and ')
       
    97     plural = plural.replace('||', ' or ')
       
    98 
       
    99     expr = re.compile(r'\!([^=])')
       
   100     plural = expr.sub(' not \\1', plural)
       
   101 
       
   102     # Regular expression and replacement function used to transform
       
   103     # "a?b:c" to "test(a,b,c)".
       
   104     expr = re.compile(r'(.*?)\?(.*?):(.*)')
       
   105     def repl(x):
       
   106         return "test(%s, %s, %s)" % (x.group(1), x.group(2),
       
   107                                      expr.sub(repl, x.group(3)))
       
   108 
       
   109     # Code to transform the plural expression, taking care of parentheses
       
   110     stack = ['']
       
   111     for c in plural:
       
   112         if c == '(':
       
   113             stack.append('')
       
   114         elif c == ')':
       
   115             if len(stack) == 1:
       
   116                 # Actually, we never reach this code, because unbalanced
       
   117                 # parentheses get caught in the security check at the
       
   118                 # beginning.
       
   119                 raise ValueError, 'unbalanced parenthesis in plural form'
       
   120             s = expr.sub(repl, stack.pop())
       
   121             stack[-1] += '(%s)' % s
       
   122         else:
       
   123             stack[-1] += c
       
   124     plural = expr.sub(repl, stack.pop())
       
   125 
       
   126     return eval('lambda n: int(%s)' % plural)
       
   127 
       
   128 
       
   129 
       
   130 def _expand_lang(locale):
       
   131     from locale import normalize
       
   132     locale = normalize(locale)
       
   133     COMPONENT_CODESET   = 1 << 0
       
   134     COMPONENT_TERRITORY = 1 << 1
       
   135     COMPONENT_MODIFIER  = 1 << 2
       
   136     # split up the locale into its base components
       
   137     mask = 0
       
   138     pos = locale.find('@')
       
   139     if pos >= 0:
       
   140         modifier = locale[pos:]
       
   141         locale = locale[:pos]
       
   142         mask |= COMPONENT_MODIFIER
       
   143     else:
       
   144         modifier = ''
       
   145     pos = locale.find('.')
       
   146     if pos >= 0:
       
   147         codeset = locale[pos:]
       
   148         locale = locale[:pos]
       
   149         mask |= COMPONENT_CODESET
       
   150     else:
       
   151         codeset = ''
       
   152     pos = locale.find('_')
       
   153     if pos >= 0:
       
   154         territory = locale[pos:]
       
   155         locale = locale[:pos]
       
   156         mask |= COMPONENT_TERRITORY
       
   157     else:
       
   158         territory = ''
       
   159     language = locale
       
   160     ret = []
       
   161     for i in range(mask+1):
       
   162         if not (i & ~mask):  # if all components for this combo exist ...
       
   163             val = language
       
   164             if i & COMPONENT_TERRITORY: val += territory
       
   165             if i & COMPONENT_CODESET:   val += codeset
       
   166             if i & COMPONENT_MODIFIER:  val += modifier
       
   167             ret.append(val)
       
   168     ret.reverse()
       
   169     return ret
       
   170 
       
   171 
       
   172 
       
   173 class NullTranslations:
       
   174     def __init__(self, fp=None):
       
   175         self._info = {}
       
   176         self._charset = None
       
   177         self._output_charset = None
       
   178         self._fallback = None
       
   179         if fp is not None:
       
   180             self._parse(fp)
       
   181 
       
   182     def _parse(self, fp):
       
   183         pass
       
   184 
       
   185     def add_fallback(self, fallback):
       
   186         if self._fallback:
       
   187             self._fallback.add_fallback(fallback)
       
   188         else:
       
   189             self._fallback = fallback
       
   190 
       
   191     def gettext(self, message):
       
   192         if self._fallback:
       
   193             return self._fallback.gettext(message)
       
   194         return message
       
   195 
       
   196     def lgettext(self, message):
       
   197         if self._fallback:
       
   198             return self._fallback.lgettext(message)
       
   199         return message
       
   200 
       
   201     def ngettext(self, msgid1, msgid2, n):
       
   202         if self._fallback:
       
   203             return self._fallback.ngettext(msgid1, msgid2, n)
       
   204         if n == 1:
       
   205             return msgid1
       
   206         else:
       
   207             return msgid2
       
   208 
       
   209     def lngettext(self, msgid1, msgid2, n):
       
   210         if self._fallback:
       
   211             return self._fallback.lngettext(msgid1, msgid2, n)
       
   212         if n == 1:
       
   213             return msgid1
       
   214         else:
       
   215             return msgid2
       
   216 
       
   217     def ugettext(self, message):
       
   218         if self._fallback:
       
   219             return self._fallback.ugettext(message)
       
   220         return unicode(message)
       
   221 
       
   222     def ungettext(self, msgid1, msgid2, n):
       
   223         if self._fallback:
       
   224             return self._fallback.ungettext(msgid1, msgid2, n)
       
   225         if n == 1:
       
   226             return unicode(msgid1)
       
   227         else:
       
   228             return unicode(msgid2)
       
   229 
       
   230     def info(self):
       
   231         return self._info
       
   232 
       
   233     def charset(self):
       
   234         return self._charset
       
   235 
       
   236     def output_charset(self):
       
   237         return self._output_charset
       
   238 
       
   239     def set_output_charset(self, charset):
       
   240         self._output_charset = charset
       
   241 
       
   242     def install(self, unicode=False, names=None):
       
   243         import __builtin__
       
   244         __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
       
   245         if hasattr(names, "__contains__"):
       
   246             if "gettext" in names:
       
   247                 __builtin__.__dict__['gettext'] = __builtin__.__dict__['_']
       
   248             if "ngettext" in names:
       
   249                 __builtin__.__dict__['ngettext'] = (unicode and self.ungettext
       
   250                                                              or self.ngettext)
       
   251             if "lgettext" in names:
       
   252                 __builtin__.__dict__['lgettext'] = self.lgettext
       
   253             if "lngettext" in names:
       
   254                 __builtin__.__dict__['lngettext'] = self.lngettext
       
   255 
       
   256 
       
   257 class GNUTranslations(NullTranslations):
       
   258     # Magic number of .mo files
       
   259     LE_MAGIC = 0x950412deL
       
   260     BE_MAGIC = 0xde120495L
       
   261 
       
   262     def _parse(self, fp):
       
   263         """Override this method to support alternative .mo formats."""
       
   264         unpack = struct.unpack
       
   265         filename = getattr(fp, 'name', '')
       
   266         # Parse the .mo file header, which consists of 5 little endian 32
       
   267         # bit words.
       
   268         self._catalog = catalog = {}
       
   269         self.plural = lambda n: int(n != 1) # germanic plural by default
       
   270         buf = fp.read()
       
   271         buflen = len(buf)
       
   272         # Are we big endian or little endian?
       
   273         magic = unpack('<I', buf[:4])[0]
       
   274         if magic == self.LE_MAGIC:
       
   275             version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
       
   276             ii = '<II'
       
   277         elif magic == self.BE_MAGIC:
       
   278             version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
       
   279             ii = '>II'
       
   280         else:
       
   281             raise IOError(0, 'Bad magic number', filename)
       
   282         # Now put all messages from the .mo file buffer into the catalog
       
   283         # dictionary.
       
   284         for i in xrange(0, msgcount):
       
   285             mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
       
   286             mend = moff + mlen
       
   287             tlen, toff = unpack(ii, buf[transidx:transidx+8])
       
   288             tend = toff + tlen
       
   289             if mend < buflen and tend < buflen:
       
   290                 msg = buf[moff:mend]
       
   291                 tmsg = buf[toff:tend]
       
   292             else:
       
   293                 raise IOError(0, 'File is corrupt', filename)
       
   294             # See if we're looking at GNU .mo conventions for metadata
       
   295             if mlen == 0:
       
   296                 # Catalog description
       
   297                 lastk = k = None
       
   298                 for item in tmsg.splitlines():
       
   299                     item = item.strip()
       
   300                     if not item:
       
   301                         continue
       
   302                     if ':' in item:
       
   303                         k, v = item.split(':', 1)
       
   304                         k = k.strip().lower()
       
   305                         v = v.strip()
       
   306                         self._info[k] = v
       
   307                         lastk = k
       
   308                     elif lastk:
       
   309                         self._info[lastk] += '\n' + item
       
   310                     if k == 'content-type':
       
   311                         self._charset = v.split('charset=')[1]
       
   312                     elif k == 'plural-forms':
       
   313                         v = v.split(';')
       
   314                         plural = v[1].split('plural=')[1]
       
   315                         self.plural = c2py(plural)
       
   316             # Note: we unconditionally convert both msgids and msgstrs to
       
   317             # Unicode using the character encoding specified in the charset
       
   318             # parameter of the Content-Type header.  The gettext documentation
       
   319             # strongly encourages msgids to be us-ascii, but some appliations
       
   320             # require alternative encodings (e.g. Zope's ZCML and ZPT).  For
       
   321             # traditional gettext applications, the msgid conversion will
       
   322             # cause no problems since us-ascii should always be a subset of
       
   323             # the charset encoding.  We may want to fall back to 8-bit msgids
       
   324             # if the Unicode conversion fails.
       
   325             if '\x00' in msg:
       
   326                 # Plural forms
       
   327                 msgid1, msgid2 = msg.split('\x00')
       
   328                 tmsg = tmsg.split('\x00')
       
   329                 if self._charset:
       
   330                     msgid1 = unicode(msgid1, self._charset)
       
   331                     tmsg = [unicode(x, self._charset) for x in tmsg]
       
   332                 for i in range(len(tmsg)):
       
   333                     catalog[(msgid1, i)] = tmsg[i]
       
   334             else:
       
   335                 if self._charset:
       
   336                     msg = unicode(msg, self._charset)
       
   337                     tmsg = unicode(tmsg, self._charset)
       
   338                 catalog[msg] = tmsg
       
   339             # advance to next entry in the seek tables
       
   340             masteridx += 8
       
   341             transidx += 8
       
   342 
       
   343     def gettext(self, message):
       
   344         missing = object()
       
   345         tmsg = self._catalog.get(message, missing)
       
   346         if tmsg is missing:
       
   347             if self._fallback:
       
   348                 return self._fallback.gettext(message)
       
   349             return message
       
   350         # Encode the Unicode tmsg back to an 8-bit string, if possible
       
   351         if self._output_charset:
       
   352             return tmsg.encode(self._output_charset)
       
   353         elif self._charset:
       
   354             return tmsg.encode(self._charset)
       
   355         return tmsg
       
   356 
       
   357     def lgettext(self, message):
       
   358         missing = object()
       
   359         tmsg = self._catalog.get(message, missing)
       
   360         if tmsg is missing:
       
   361             if self._fallback:
       
   362                 return self._fallback.lgettext(message)
       
   363             return message
       
   364         if self._output_charset:
       
   365             return tmsg.encode(self._output_charset)
       
   366         return tmsg.encode(locale.getpreferredencoding())
       
   367 
       
   368     def ngettext(self, msgid1, msgid2, n):
       
   369         try:
       
   370             tmsg = self._catalog[(msgid1, self.plural(n))]
       
   371             if self._output_charset:
       
   372                 return tmsg.encode(self._output_charset)
       
   373             elif self._charset:
       
   374                 return tmsg.encode(self._charset)
       
   375             return tmsg
       
   376         except KeyError:
       
   377             if self._fallback:
       
   378                 return self._fallback.ngettext(msgid1, msgid2, n)
       
   379             if n == 1:
       
   380                 return msgid1
       
   381             else:
       
   382                 return msgid2
       
   383 
       
   384     def lngettext(self, msgid1, msgid2, n):
       
   385         try:
       
   386             tmsg = self._catalog[(msgid1, self.plural(n))]
       
   387             if self._output_charset:
       
   388                 return tmsg.encode(self._output_charset)
       
   389             return tmsg.encode(locale.getpreferredencoding())
       
   390         except KeyError:
       
   391             if self._fallback:
       
   392                 return self._fallback.lngettext(msgid1, msgid2, n)
       
   393             if n == 1:
       
   394                 return msgid1
       
   395             else:
       
   396                 return msgid2
       
   397 
       
   398     def ugettext(self, message):
       
   399         missing = object()
       
   400         tmsg = self._catalog.get(message, missing)
       
   401         if tmsg is missing:
       
   402             if self._fallback:
       
   403                 return self._fallback.ugettext(message)
       
   404             return unicode(message)
       
   405         return tmsg
       
   406 
       
   407     def ungettext(self, msgid1, msgid2, n):
       
   408         try:
       
   409             tmsg = self._catalog[(msgid1, self.plural(n))]
       
   410         except KeyError:
       
   411             if self._fallback:
       
   412                 return self._fallback.ungettext(msgid1, msgid2, n)
       
   413             if n == 1:
       
   414                 tmsg = unicode(msgid1)
       
   415             else:
       
   416                 tmsg = unicode(msgid2)
       
   417         return tmsg
       
   418 
       
   419 
       
   420 # Locate a .mo file using the gettext strategy
       
   421 def find(domain, localedir=None, languages=None, all=0):
       
   422     # Get some reasonable defaults for arguments that were not supplied
       
   423     if localedir is None:
       
   424         localedir = _default_localedir
       
   425     if languages is None:
       
   426         languages = []
       
   427         for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
       
   428             val = os.environ.get(envar)
       
   429             if val:
       
   430                 languages = val.split(':')
       
   431                 break
       
   432         if 'C' not in languages:
       
   433             languages.append('C')
       
   434     # now normalize and expand the languages
       
   435     nelangs = []
       
   436     for lang in languages:
       
   437         for nelang in _expand_lang(lang):
       
   438             if nelang not in nelangs:
       
   439                 nelangs.append(nelang)
       
   440     # select a language
       
   441     if all:
       
   442         result = []
       
   443     else:
       
   444         result = None
       
   445     for lang in nelangs:
       
   446         if lang == 'C':
       
   447             break
       
   448         mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
       
   449         if os.path.exists(mofile):
       
   450             if all:
       
   451                 result.append(mofile)
       
   452             else:
       
   453                 return mofile
       
   454     return result
       
   455 
       
   456 
       
   457 
       
   458 # a mapping between absolute .mo file path and Translation object
       
   459 _translations = {}
       
   460 
       
   461 def translation(domain, localedir=None, languages=None,
       
   462                 class_=None, fallback=False, codeset=None):
       
   463     if class_ is None:
       
   464         class_ = GNUTranslations
       
   465     mofiles = find(domain, localedir, languages, all=1)
       
   466     if not mofiles:
       
   467         if fallback:
       
   468             return NullTranslations()
       
   469         raise IOError(ENOENT, 'No translation file found for domain', domain)
       
   470     # TBD: do we need to worry about the file pointer getting collected?
       
   471     # Avoid opening, reading, and parsing the .mo file after it's been done
       
   472     # once.
       
   473     result = None
       
   474     for mofile in mofiles:
       
   475         key = os.path.abspath(mofile)
       
   476         t = _translations.get(key)
       
   477         if t is None:
       
   478             t = _translations.setdefault(key, class_(open(mofile, 'rb')))
       
   479         # Copy the translation object to allow setting fallbacks and
       
   480         # output charset. All other instance data is shared with the
       
   481         # cached object.
       
   482         t = copy.copy(t)
       
   483         if codeset:
       
   484             t.set_output_charset(codeset)
       
   485         if result is None:
       
   486             result = t
       
   487         else:
       
   488             result.add_fallback(t)
       
   489     return result
       
   490 
       
   491 
       
   492 def install(domain, localedir=None, unicode=False, codeset=None, names=None):
       
   493     t = translation(domain, localedir, fallback=True, codeset=codeset)
       
   494     t.install(unicode, names)
       
   495 
       
   496 
       
   497 
       
   498 # a mapping b/w domains and locale directories
       
   499 _localedirs = {}
       
   500 # a mapping b/w domains and codesets
       
   501 _localecodesets = {}
       
   502 # current global domain, `messages' used for compatibility w/ GNU gettext
       
   503 _current_domain = 'messages'
       
   504 
       
   505 
       
   506 def textdomain(domain=None):
       
   507     global _current_domain
       
   508     if domain is not None:
       
   509         _current_domain = domain
       
   510     return _current_domain
       
   511 
       
   512 
       
   513 def bindtextdomain(domain, localedir=None):
       
   514     global _localedirs
       
   515     if localedir is not None:
       
   516         _localedirs[domain] = localedir
       
   517     return _localedirs.get(domain, _default_localedir)
       
   518 
       
   519 
       
   520 def bind_textdomain_codeset(domain, codeset=None):
       
   521     global _localecodesets
       
   522     if codeset is not None:
       
   523         _localecodesets[domain] = codeset
       
   524     return _localecodesets.get(domain)
       
   525 
       
   526 
       
   527 def dgettext(domain, message):
       
   528     try:
       
   529         t = translation(domain, _localedirs.get(domain, None),
       
   530                         codeset=_localecodesets.get(domain))
       
   531     except IOError:
       
   532         return message
       
   533     return t.gettext(message)
       
   534 
       
   535 def ldgettext(domain, message):
       
   536     try:
       
   537         t = translation(domain, _localedirs.get(domain, None),
       
   538                         codeset=_localecodesets.get(domain))
       
   539     except IOError:
       
   540         return message
       
   541     return t.lgettext(message)
       
   542 
       
   543 def dngettext(domain, msgid1, msgid2, n):
       
   544     try:
       
   545         t = translation(domain, _localedirs.get(domain, None),
       
   546                         codeset=_localecodesets.get(domain))
       
   547     except IOError:
       
   548         if n == 1:
       
   549             return msgid1
       
   550         else:
       
   551             return msgid2
       
   552     return t.ngettext(msgid1, msgid2, n)
       
   553 
       
   554 def ldngettext(domain, msgid1, msgid2, n):
       
   555     try:
       
   556         t = translation(domain, _localedirs.get(domain, None),
       
   557                         codeset=_localecodesets.get(domain))
       
   558     except IOError:
       
   559         if n == 1:
       
   560             return msgid1
       
   561         else:
       
   562             return msgid2
       
   563     return t.lngettext(msgid1, msgid2, n)
       
   564 
       
   565 def gettext(message):
       
   566     return dgettext(_current_domain, message)
       
   567 
       
   568 def lgettext(message):
       
   569     return ldgettext(_current_domain, message)
       
   570 
       
   571 def ngettext(msgid1, msgid2, n):
       
   572     return dngettext(_current_domain, msgid1, msgid2, n)
       
   573 
       
   574 def lngettext(msgid1, msgid2, n):
       
   575     return ldngettext(_current_domain, msgid1, msgid2, n)
       
   576 
       
   577 # dcgettext() has been deemed unnecessary and is not implemented.
       
   578 
       
   579 # James Henstridge's Catalog constructor from GNOME gettext.  Documented usage
       
   580 # was:
       
   581 #
       
   582 #    import gettext
       
   583 #    cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
       
   584 #    _ = cat.gettext
       
   585 #    print _('Hello World')
       
   586 
       
   587 # The resulting catalog object currently don't support access through a
       
   588 # dictionary API, which was supported (but apparently unused) in GNOME
       
   589 # gettext.
       
   590 
       
   591 Catalog = translation