#!/usr/bin/env python # (tested on a self-compiled Python 2.6.1 on the current debian testing release). import sys, codecs, logging sangsoo = u'\uc0c1\uc218' # First, not specifying an encoding. This works, and magically writes # the string to the logfile UTF-8 encoded, even though no encoding is # specified. testdotlog = logging.FileHandler (filename='test-none.log', mode='ab', encoding=None ) l1 = logging.getLogger ("test-none") l1.addHandler (testdotlog) l1.warning (sangsoo) # the sangsoo string contains a korean name. this can for example be represented # in the EUR-KR encoding. let's try that: testdotlog = logging.FileHandler (filename='test-euc-kr.log', mode='ab', encoding='EUC-KR' ) # for some reason, using a FileHandler with an encoding set seems to use 'ascii', instead # of the specified encoding. The following error results when runnig the lines below: # UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) l2 = logging.getLogger ("test-euc-kr") l2.addHandler (testdotlog) l2.warning (sangsoo) # setting any encoding will produce a UnicodeError when the logger is supplied with a unicode # string. even 'UTF-8', which worked fine when the encoding is set to None. testdotlog = logging.FileHandler (filename='test-utf-8.log', mode='ab', encoding='UTF-8' ) l3 = logging.getLogger ("test-utf-8") l3.addHandler (testdotlog) l3.warning (sangsoo) # the only way I seem to be able to get 'EUC-KR' encoded data in the log, is by not specifying # an encoding to FileHandler, en encoding it myself before I give it to .warning (). like this: testdotlog = logging.FileHandler (filename='test-euc-kr-str.log', mode='ab', encoding=None ) l4 = logging.getLogger ("test-euc-kr-str") l4.addHandler (testdotlog) l4.warning (sangsoo.encode ("EUC-KR")) # this works, but then what is the purpose of being able to specify an 'encoding' on the # FilHandler?