Index: configure.in =================================================================== --- configure.in +++ configure.in 2004-06-26 14:24:40.000000000 -0300 @@ -2044,8 +2044,8 @@ AC_MSG_RESULT(MACHDEP_OBJS) # checks for library functions -AC_CHECK_FUNCS(alarm chown clock confstr ctermid execv \ - fork fpathconf ftime ftruncate \ +AC_CHECK_FUNCS(alarm bind_textdomain_codeset chown clock confstr ctermid \ + execv fork fpathconf ftime ftruncate \ gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \ getpriority getpwent getsid getwd \ kill killpg lchown lstat mkfifo mknod mktime \ Index: Modules/_localemodule.c =================================================================== --- Modules/_localemodule.c +++ Modules/_localemodule.c 2004-06-26 14:56:30.000000000 -0300 @@ -625,6 +625,24 @@ return PyString_FromString(dirname); } +#ifdef HAVE_BIND_TEXTDOMAIN_CODESET +PyDoc_STRVAR(bind_textdomain_codeset__doc__, +"bind_textdomain_codeset(domain, codeset) -> string\n" +"Bind the C library's domain to codeset."); + +static PyObject* +PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args) +{ + char *domain,*codeset; + if (!PyArg_ParseTuple(args, "sz", &domain, &codeset)) + return 0; + codeset = bind_textdomain_codeset(domain, codeset); + if (codeset) + return PyString_FromString(codeset); + Py_RETURN_NONE; +} +#endif + #endif static struct PyMethodDef PyLocale_Methods[] = { @@ -654,6 +672,10 @@ textdomain__doc__}, {"bindtextdomain",(PyCFunction)PyIntl_bindtextdomain,METH_VARARGS, bindtextdomain__doc__}, +#ifdef HAVE_BIND_TEXTDOMAIN_CODESET + {"bind_textdomain_codeset",(PyCFunction)PyIntl_bind_textdomain_codeset, + METH_VARARGS, bind_textdomain_codeset__doc__}, +#endif #endif {NULL, NULL} }; Index: Doc/lib/libgettext.tex =================================================================== --- Doc/lib/libgettext.tex +++ Doc/lib/libgettext.tex 2004-06-26 22:37:10.235393976 -0300 @@ -51,6 +51,14 @@ the start of your application.} \end{funcdesc} +\begin{funcdesc}{bind_textdomain_codeset}{domain\optional{, codeset}} +Bind the \var{domain} to \var{codeset}, changing the encoding of +strings returned by the \function{gettext()} family of functions. +If \var{codeset} is omitted, then the current binding is returned. + +\versionadded{2.4} +\end{funcdesc} + \begin{funcdesc}{textdomain}{\optional{domain}} Change or query the current global domain. If \var{domain} is \code{None}, then the current global domain is returned, otherwise the @@ -64,11 +72,27 @@ examples below). \end{funcdesc} +\begin{funcdesc}{lgettext}{message} +Equivalent to \function{gettext()}, but the translation is returned +in the preferred system encoding, if no other encoding was explicitly +set with \function{bind_textdomain_codeset()}. + +\versionadded{2.4} +\end{funcdesc} + \begin{funcdesc}{dgettext}{domain, message} Like \function{gettext()}, but look the message up in the specified \var{domain}. \end{funcdesc} +\begin{funcdesc}{ldgettext}{domain, message} +Equivalent to \function{dgettext()}, but the translation is returned +in the preferred system encoding, if no other encoding was explicitly +set with \function{bind_textdomain_codeset()}. + +\versionadded{2.4} +\end{funcdesc} + \begin{funcdesc}{ngettext}{singular, plural, n} Like \function{gettext()}, but consider plural forms. If a translation @@ -87,6 +111,14 @@ \end{funcdesc} +\begin{funcdesc}{lngettext}{singular, plural, n} +Equivalent to \function{ngettext()}, but the translation is returned +in the preferred system encoding, if no other encoding was explicitly +set with \function{bind_textdomain_codeset()}. + +\versionadded{2.4} +\end{funcdesc} + \begin{funcdesc}{dngettext}{domain, singular, plural, n} Like \function{ngettext()}, but look the message up in the specified \var{domain}. @@ -94,6 +126,15 @@ \versionadded{2.3} \end{funcdesc} +\begin{funcdesc}{ldngettext}{domain, singular, plural, n} +Equivalent to \function{dngettext()}, but the translation is returned +in the preferred system encoding, if no other encoding was explicitly +set with \function{bind_textdomain_codeset()}. + +\versionadded{2.4} +\end{funcdesc} + + Note that GNU \program{gettext} also defines a \function{dcgettext()} method, but this was deemed not useful and so it is currently @@ -152,8 +193,8 @@ \end{funcdesc} \begin{funcdesc}{translation}{domain\optional{, localedir\optional{, - languages\optional{, - class_,\optional{fallback}}}}} + languages\optional{, class_\optional{, + fallback\optional{, codeset}}}}}} Return a \class{Translations} instance based on the \var{domain}, \var{localedir}, and \var{languages}, which are first passed to \function{find()} to get a list of the @@ -161,7 +202,8 @@ identical \file{.mo} file names are cached. The actual class instantiated is either \var{class_} if provided, otherwise \class{GNUTranslations}. The class's constructor must take a single -file object argument. +file object argument. If provided, \var{codeset} will change the +charset used to encode translated strings. If multiple files are found, later files are used as fallbacks for earlier ones. To allow setting the fallback, \function{copy.copy} @@ -172,13 +214,17 @@ \exception{IOError} if \var{fallback} is false (which is the default), and returns a \class{NullTranslations} instance if \var{fallback} is true. + +\versionchanged[Added the \var{codeset} parameter]{2.4} \end{funcdesc} -\begin{funcdesc}{install}{domain\optional{, localedir\optional{, unicode}}} +\begin{funcdesc}{install}{domain\optional{, localedir\optional{, unicode + \optional{, local\optional{, codeset}}}}} This installs the function \function{_} in Python's builtin namespace, -based on \var{domain}, and \var{localedir} which are passed to the -function \function{translation()}. The \var{unicode} flag is passed to -the resulting translation object's \method{install} method. +based on \var{domain}, \var{localedir}, and \var{codeset} which are +passed to the function \function{translation()}. The \var{unicode} +and \var{local} flags are passed to the resulting translation object's +\method{install} method. As seen below, you usually mark the strings in your application that are candidates for translation, by wrapping them in a call to the @@ -191,6 +237,8 @@ For convenience, you want the \function{_()} function to be installed in Python's builtin namespace, so it is easily accessible in all modules of your application. + +\versionchanged[Added the \var{local} and \var{codeset} parameters]{2.4} \end{funcdesc} \subsubsection{The \class{NullTranslations} class} @@ -223,25 +271,39 @@ \end{methoddesc} \begin{methoddesc}[NullTranslations]{gettext}{message} -If a fallback has been set, forward \method{gettext} to the fallback. +If a fallback has been set, forward \method{gettext()} to the fallback. Otherwise, return the translated message. Overridden in derived classes. \end{methoddesc} +\begin{methoddesc}[NullTranslations]{lgettext}{message} +If a fallback has been set, forward \method{lgettext()} to the fallback. +Otherwise, return the translated message. Overridden in derived classes. + +\versionadded{2.4} +\end{methoddesc} + \begin{methoddesc}[NullTranslations]{ugettext}{message} -If a fallback has been set, forward \method{ugettext} to the fallback. +If a fallback has been set, forward \method{ugettext()} to the fallback. Otherwise, return the translated message as a Unicode string. Overridden in derived classes. \end{methoddesc} \begin{methoddesc}[NullTranslations]{ngettext}{singular, plural, n} -If a fallback has been set, forward \method{ngettext} to the fallback. +If a fallback has been set, forward \method{ngettext()} to the fallback. Otherwise, return the translated message. Overridden in derived classes. \versionadded{2.3} \end{methoddesc} +\begin{methoddesc}[NullTranslations]{lngettext}{singular, plural, n} +If a fallback has been set, forward \method{ngettext()} to the fallback. +Otherwise, return the translated message. Overridden in derived classes. + +\versionadded{2.4} +\end{methoddesc} + \begin{methoddesc}[NullTranslations]{ungettext}{singular, plural, n} -If a fallback has been set, forward \method{ungettext} to the fallback. +If a fallback has been set, forward \method{ungettext()} to the fallback. Otherwise, return the translated message as a Unicode string. Overridden in derived classes. @@ -256,11 +318,28 @@ Return the ``protected'' \member{_charset} variable. \end{methoddesc} -\begin{methoddesc}[NullTranslations]{install}{\optional{unicode}} -If the \var{unicode} flag is false, this method installs -\method{self.gettext()} into the built-in namespace, binding it to -\samp{_}. If \var{unicode} is true, it binds \method{self.ugettext()} -instead. By default, \var{unicode} is false. +\begin{methoddesc}[NullTranslations]{output_charset}{} +Return the ``protected'' \member{_output_charset} variable, which +defines the encoding used to return translated messages. + +\versionadded{2.4} +\end{methoddesc} + +\begin{methoddesc}[NullTranslations]{set_output_charset}{charset} +Change the ``protected'' \member{_output_charset} variable, which +defines the encoding used to return translated messages. + +\versionadded{2.4} +\end{methoddesc} + +\begin{methoddesc}[NullTranslations]{install}{\optional{unicode + \optional{, local}}} +If the \var{unicode} and \var{local} flags are false, this method +installs \method{self.gettext()} into the built-in namespace, +binding it to \samp{_}. If \var{unicode} is true, it binds +\method{self.ugettext()} instead. Otherwise, if \var{local} is true, +it binds \method{self.lgettext()}. By default, \var{unicode} +and \var{local} are false. Note that this is only one way, albeit the most convenient way, to make the \function{_} function available to your application. Because it @@ -277,6 +356,8 @@ This puts \function{_} only in the module's global namespace and so only affects calls within this module. + +\versionchanged[Added the \var{local} parameter]{2.4} \end{methoddesc} \subsubsection{The \class{GNUTranslations} class} @@ -323,6 +404,14 @@ Otherwise, the \var{message} id is returned. \end{methoddesc} +\begin{methoddesc}[GNUTranslations]{lgettext}{message} +Equivalent to \method{gettext()}, but the translation is returned +in the preferred system encoding, if no other encoding was explicitly +set with \method{set_output_charset()}. + +\versionadded{2.4} +\end{methoddesc} + \begin{methoddesc}[GNUTranslations]{ugettext}{message} Look up the \var{message} id in the catalog and return the corresponding message string, as a Unicode string. If there is no @@ -346,6 +435,14 @@ \versionadded{2.3} \end{methoddesc} +\begin{methoddesc}[GNUTranslations]{lngettext}{singular, plural, n} +Equivalent to \method{gettext()}, but the translation is returned +in the preferred system encoding, if no other encoding was explicitly +set with \method{set_output_charset()}. + +\versionadded{2.4} +\end{methoddesc} + \begin{methoddesc}[GNUTranslations]{ungettext}{singular, plural, n} Do a plural-forms lookup of a message id. \var{singular} is used as the message id for purposes of lookup in the catalog, while \var{n} is @@ -495,7 +592,7 @@ \begin{verbatim} import gettext t = gettext.translation('spam', '/usr/share/locale') -_ = t.gettext +_ = t.lgettext \end{verbatim} If your translators were providing you with Unicode strings in their @@ -520,7 +617,7 @@ \begin{verbatim} import gettext -gettext.install('myapplication') +gettext.install('myapplication', local=1) \end{verbatim} If you need to set the locale directory or the \var{unicode} flag, @@ -633,6 +730,21 @@ \program{pygettext} and \program{xpot} both support this through the use of command line switches. +\subsubsection{\function{gettext()} vs. \function{lgettext()}} +In Python 2.4 the \function{lgettext()} family of functions were +introduced. The intention of these functions is to provide an +alternative which is more compliant with the current +implementation of GNU gettext. Unlike \function{gettext()}, which +returns strings encoded with the same codeset used in the +translation file, \function{lgettext()} will return strings +encoded with the preferred system encoding, as returned by +\function{locale.getpreferredencoding()}. Also notice that +Python 2.4 introduces new functions to explicitly choose +the codeset used in translated strings. If a codeset is explicitly +set, even \function{lgettext()} will return translated strings in +the requested codeset, as would be expected in the GNU gettext +implementation. + \subsection{Acknowledgements} The following people contributed code, feedback, design suggestions, @@ -647,4 +759,5 @@ \item Martin von L\"owis \item Fran\c cois Pinard \item Barry Warsaw + \item Gustavo Niemeyer \end{itemize} Index: Doc/lib/liblocale.tex =================================================================== --- Doc/lib/liblocale.tex +++ Doc/lib/liblocale.tex 2004-06-26 15:08:20.000000000 -0300 @@ -469,15 +469,16 @@ The locale module exposes the C library's gettext interface on systems that provide this interface. It consists of the functions \function{gettext()}, \function{dgettext()}, \function{dcgettext()}, -\function{textdomain()}, and \function{bindtextdomain()}. These are -similar to the same functions in the \refmodule{gettext} module, but use -the C library's binary format for message catalogs, and the C -library's search algorithms for locating message catalogs. +\function{textdomain()}, \function{bindtextdomain()}, and +\function{bind_textdomain_codeset()}. These are similar to the same +functions in the \refmodule{gettext} module, but use the C library's +binary format for message catalogs, and the C library's search +algorithms for locating message catalogs. Python applications should normally find no need to invoke these functions, and should use \refmodule{gettext} instead. A known exception to this rule are applications that link use additional C libraries which internally invoke \cfunction{gettext()} or -\function{cdgettext()}. For these applications, it may be necessary to +\function{dcgettext()}. For these applications, it may be necessary to bind the text domain, so that the libraries can properly locate their message catalogs. Index: Lib/gettext.py =================================================================== --- Lib/gettext.py +++ Lib/gettext.py 2004-06-26 23:29:21.646277200 -0300 @@ -46,7 +46,7 @@ # find this format documented anywhere. -import copy, os, re, struct, sys +import locale, copy, os, re, struct, sys from errno import ENOENT @@ -171,6 +171,7 @@ def __init__(self, fp=None): self._info = {} self._charset = None + self._output_charset = None self._fallback = None if fp is not None: self._parse(fp) @@ -189,6 +190,11 @@ return self._fallback.gettext(message) return message + def lgettext(self, message): + if self._fallback: + return self._fallback.lgettext(message) + return message + def ngettext(self, msgid1, msgid2, n): if self._fallback: return self._fallback.ngettext(msgid1, msgid2, n) @@ -197,6 +203,14 @@ else: return msgid2 + def lngettext(self, msgid1, msgid2, n): + if self._fallback: + return self._fallback.lngettext(msgid1, msgid2, n) + if n == 1: + return msgid1 + else: + return msgid2 + def ugettext(self, message): if self._fallback: return self._fallback.ugettext(message) @@ -216,9 +230,21 @@ def charset(self): return self._charset - def install(self, unicode=False): + def output_charset(self): + return self._output_charset + + def set_output_charset(self, charset): + self._output_charset = charset + + def install(self, unicode=False, local=False): import __builtin__ - __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext + if unicode: + gettext = self.ugettext + elif local: + gettext = self.lgettext + else: + gettext = self.gettext + __builtin__.__dict__['_'] = gettext class GNUTranslations(NullTranslations): @@ -315,14 +341,29 @@ return self._fallback.gettext(message) return message # Encode the Unicode tmsg back to an 8-bit string, if possible - if self._charset: + if self._output_charset: + return tmsg.encode(self._output_charset) + elif self._charset: return tmsg.encode(self._charset) return tmsg + def lgettext(self, message): + missing = object() + tmsg = self._catalog.get(message, missing) + if tmsg is missing: + if self._fallback: + return self._fallback.lgettext(message) + return message + if self._output_charset: + return tmsg.encode(self._output_charset) + return tmsg.encode(locale.getpreferredencoding()) + def ngettext(self, msgid1, msgid2, n): try: tmsg = self._catalog[(msgid1, self.plural(n))] - if self._charset: + if self._output_charset: + return tmsg.encode(self._output_charset) + elif self._charset: return tmsg.encode(self._charset) return tmsg except KeyError: @@ -333,6 +374,20 @@ else: return msgid2 + def lngettext(self, msgid1, msgid2, n): + try: + tmsg = self._catalog[(msgid1, self.plural(n))] + if self._output_charset: + return tmsg.encode(self._output_charset) + return tmsg.encode(locale.getpreferredencoding()) + except KeyError: + if self._fallback: + return self._fallback.lngettext(msgid1, msgid2, n) + if n == 1: + return msgid1 + else: + return msgid2 + def ugettext(self, message): missing = object() tmsg = self._catalog.get(message, missing) @@ -397,7 +452,7 @@ _translations = {} def translation(domain, localedir=None, languages=None, - class_=None, fallback=False): + class_=None, fallback=False, codeset=None): if class_ is None: class_ = GNUTranslations mofiles = find(domain, localedir, languages, all=1) @@ -414,9 +469,12 @@ t = _translations.get(key) if t is None: t = _translations.setdefault(key, class_(open(mofile, 'rb'))) - # Copy the translation object to allow setting fallbacks. - # All other instance data is shared with the cached object. + # Copy the translation object to allow setting fallbacks and + # output charset. All other instance data is shared with the + # cached object. t = copy.copy(t) + if codeset: + t.set_output_charset(codeset) if result is None: result = t else: @@ -424,13 +482,16 @@ return result -def install(domain, localedir=None, unicode=False): - translation(domain, localedir, fallback=True).install(unicode) +def install(domain, localedir=None, unicode=False, local=False, codeset=None): + t = translation(domain, localedir, fallback=True, codeset=codeset) + t.install(unicode, local) # a mapping b/w domains and locale directories _localedirs = {} +# a mapping b/w domains and codesets +_localecodesets = {} # current global domain, `messages' used for compatibility w/ GNU gettext _current_domain = 'messages' @@ -441,25 +502,38 @@ _current_domain = domain return _current_domain - def bindtextdomain(domain, localedir=None): global _localedirs if localedir is not None: _localedirs[domain] = localedir return _localedirs.get(domain, _default_localedir) +def bind_textdomain_codeset(domain, codeset=None): + global _localecodesets + if codeset is not None: + _localecodesets[domain] = codeset + return _localecodesets.get(domain) def dgettext(domain, message): try: - t = translation(domain, _localedirs.get(domain, None)) + t = translation(domain, _localedirs.get(domain, None), + codeset=_localecodesets.get(domain)) except IOError: return message return t.gettext(message) +def ldgettext(domain, message): + try: + t = translation(domain, _localedirs.get(domain, None), + codeset=_localecodesets.get(domain)) + except IOError: + return message + return t.lgettext(message) def dngettext(domain, msgid1, msgid2, n): try: - t = translation(domain, _localedirs.get(domain, None)) + t = translation(domain, _localedirs.get(domain, None), + codeset=_localecodesets.get(domain)) except IOError: if n == 1: return msgid1 @@ -467,14 +541,28 @@ return msgid2 return t.ngettext(msgid1, msgid2, n) +def ldngettext(domain, msgid1, msgid2, n): + try: + t = translation(domain, _localedirs.get(domain, None), + codeset=_localecodesets.get(domain)) + except IOError: + if n == 1: + return msgid1 + else: + return msgid2 + return t.lngettext(msgid1, msgid2, n) def gettext(message): return dgettext(_current_domain, message) +def lgettext(message): + return ldgettext(_current_domain, message) def ngettext(msgid1, msgid2, n): return dngettext(_current_domain, msgid1, msgid2, n) +def lngettext(msgid1, msgid2, n): + return ldngettext(_current_domain, msgid1, msgid2, n) # dcgettext() has been deemed unnecessary and is not implemented.