#!/usr/bin/env python

# (tested on a self-compiled Python 2.6.1 on the current debian testing release).

import sys, codecs, logging

sangsoo = u'\uc0c1\uc218'

# First, not specifying an encoding.  This works, and magically writes
# the string to the logfile UTF-8 encoded, even though no encoding is
# specified.
testdotlog = logging.FileHandler (filename='test-none.log', mode='ab', encoding=None )

l1 = logging.getLogger ("test-none")
l1.addHandler (testdotlog)
l1.warning (sangsoo)

# the sangsoo string contains a korean name.  this can for example be represented
# in the EUR-KR encoding.  let's try that:

testdotlog = logging.FileHandler (filename='test-euc-kr.log', mode='ab', encoding='EUC-KR' )

# for some reason, using a FileHandler with an encoding set seems to use 'ascii', instead
# of the specified encoding.  The following error results when runnig the lines below:
# UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128)

l2 = logging.getLogger ("test-euc-kr")
l2.addHandler (testdotlog)
l2.warning (sangsoo)

# setting any encoding will produce a UnicodeError when the logger is supplied with a unicode
# string.  even 'UTF-8', which worked fine when the encoding is set to None.

testdotlog = logging.FileHandler (filename='test-utf-8.log', mode='ab', encoding='UTF-8' )
l3 = logging.getLogger ("test-utf-8")
l3.addHandler (testdotlog)
l3.warning (sangsoo)

# the only way I seem to be able to get 'EUC-KR' encoded data in the log, is by not specifying
# an encoding to FileHandler, en encoding it myself before I give it to .warning (). like this:
testdotlog = logging.FileHandler (filename='test-euc-kr-str.log', mode='ab', encoding=None )
l4 = logging.getLogger ("test-euc-kr-str")
l4.addHandler (testdotlog)
l4.warning (sangsoo.encode ("EUC-KR"))

# this works, but then what is the purpose of being able to specify an 'encoding' on the 
# FilHandler?