Changeset 3549:a3c1cb69ad3d in livinglogic.python.xist

Show
Ignore:
Timestamp:
07/30/08 22:11:26 (11 years ago)
Author:
Walter Doerwald <walter@…>
Branch:
default
Message:

Test the imported functions instead of the C module directly.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • test/test_xml_codec.py

    r3189 r3549  
    1414import codecs 
    1515 
    16 from ll import xml_codec, _xml_codec # registers the codec 
     16from ll import xml_codec # registers the codec 
    1717 
    1818try: 
     
    2525 
    2626def test_detectencoding_str(): 
    27     assert _xml_codec.detectencoding("") is None 
    28     assert _xml_codec.detectencoding("\xef") is None 
    29     assert _xml_codec.detectencoding("\xef\x33") == "utf-8" 
    30     assert _xml_codec.detectencoding("\xef\xbb") is None 
    31     assert _xml_codec.detectencoding("\xef\xbb\x33") == "utf-8" 
    32     assert _xml_codec.detectencoding("\xef\xbb\xbf") == "utf-8-sig" 
    33     assert _xml_codec.detectencoding("\xff") is None 
    34     assert _xml_codec.detectencoding("\xff\x33") == "utf-8" 
    35     assert _xml_codec.detectencoding("\xff\xfe") is None 
    36     assert _xml_codec.detectencoding("\xff\xfe\x33") == "utf-16" 
    37     assert _xml_codec.detectencoding("\xff\xfe\x00") is None 
    38     assert _xml_codec.detectencoding("\xff\xfe\x00\x33") == "utf-16" 
    39     assert _xml_codec.detectencoding("\xff\xfe\x00\x00") == "utf-32" 
    40     assert _xml_codec.detectencoding("\x00") is None 
    41     assert _xml_codec.detectencoding("\x00\x33") == "utf-8" 
    42     assert _xml_codec.detectencoding("\x00\x00") is None 
    43     assert _xml_codec.detectencoding("\x00\x00\x33") == "utf-8" 
    44     assert _xml_codec.detectencoding("\x00\x00\xfe") is None 
    45     assert _xml_codec.detectencoding("\x00\x00\x00\x33") == "utf-8" 
    46     assert _xml_codec.detectencoding("\x00\x00\x00<") == "utf-32-be" 
    47     assert _xml_codec.detectencoding("\x00\x00\xfe\xff") == "utf-32" 
    48     assert _xml_codec.detectencoding("<") is None 
    49     assert _xml_codec.detectencoding("<\x33") == "utf-8" 
    50     assert _xml_codec.detectencoding("<\x00") is None 
    51     assert _xml_codec.detectencoding("<\x00\x33") == "utf-8" 
    52     assert _xml_codec.detectencoding("<\x00\x00") is None 
    53     assert _xml_codec.detectencoding("<\x00\x00\x33") == "utf-8" 
    54     assert _xml_codec.detectencoding("<\x00\x00\x00") == "utf-32-le" 
    55     assert _xml_codec.detectencoding("\x4c") is None 
    56     assert _xml_codec.detectencoding("\x4c\x33") == "utf-8" 
    57     assert _xml_codec.detectencoding("\x4c\x6f") is None 
    58     assert _xml_codec.detectencoding("\x4c\x6f\x33") == "utf-8" 
    59     assert _xml_codec.detectencoding("\x4c\x6f\xa7") is None 
    60     assert _xml_codec.detectencoding("\x4c\x6f\xa7\x33") == "utf-8" 
    61     assert _xml_codec.detectencoding("\x4c\x6f\xa7\x94") == "cp037" 
    62     assert _xml_codec.detectencoding("<?") is None 
    63     assert _xml_codec.detectencoding("<?x") is None 
    64     assert _xml_codec.detectencoding("<?xm") is None 
    65     assert _xml_codec.detectencoding("<?xml") is None 
    66     assert _xml_codec.detectencoding("<?xml\r") is None 
    67     assert _xml_codec.detectencoding("<?xml\rversion='1.0'") is None 
    68     assert _xml_codec.detectencoding("<?xml\rversion='1.0' encoding='x") is None 
    69     assert _xml_codec.detectencoding("<?xml\rversion='1.0' encoding='x'") == "x" 
    70     assert _xml_codec.detectencoding('<?xml\rversion="1.0" encoding="x"') == "x" 
    71     assert _xml_codec.detectencoding('<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x"') == "x" 
    72     assert _xml_codec.detectencoding("<?xml\rversion='1.0' ?>") == "utf-8" 
    73     assert _xml_codec.detectencoding("<?xml\rversion='1.0' Encoding='x'") is None # encoding not recognized (might come later) 
    74     assert _xml_codec.detectencoding("<?xml\rVersion='1.0'") is None 
    75     py.test.raises(ValueError, _xml_codec.detectencoding, "<?xml\rversion='1.0' encoding=''") # empty encoding 
    76     assert _xml_codec.detectencoding("<", False) is None 
    77     assert _xml_codec.detectencoding("<", True) == "utf-8" 
    78     assert _xml_codec.detectencoding("<?", False) is None 
    79     assert _xml_codec.detectencoding("<?", True) == "utf-8" 
     27    assert xml_codec._detectencoding("") is None 
     28    assert xml_codec._detectencoding("\xef") is None 
     29    assert xml_codec._detectencoding("\xef\x33") == "utf-8" 
     30    assert xml_codec._detectencoding("\xef\xbb") is None 
     31    assert xml_codec._detectencoding("\xef\xbb\x33") == "utf-8" 
     32    assert xml_codec._detectencoding("\xef\xbb\xbf") == "utf-8-sig" 
     33    assert xml_codec._detectencoding("\xff") is None 
     34    assert xml_codec._detectencoding("\xff\x33") == "utf-8" 
     35    assert xml_codec._detectencoding("\xff\xfe") is None 
     36    assert xml_codec._detectencoding("\xff\xfe\x33") == "utf-16" 
     37    assert xml_codec._detectencoding("\xff\xfe\x00") is None 
     38    assert xml_codec._detectencoding("\xff\xfe\x00\x33") == "utf-16" 
     39    assert xml_codec._detectencoding("\xff\xfe\x00\x00") == "utf-32" 
     40    assert xml_codec._detectencoding("\x00") is None 
     41    assert xml_codec._detectencoding("\x00\x33") == "utf-8" 
     42    assert xml_codec._detectencoding("\x00\x00") is None 
     43    assert xml_codec._detectencoding("\x00\x00\x33") == "utf-8" 
     44    assert xml_codec._detectencoding("\x00\x00\xfe") is None 
     45    assert xml_codec._detectencoding("\x00\x00\x00\x33") == "utf-8" 
     46    assert xml_codec._detectencoding("\x00\x00\x00<") == "utf-32-be" 
     47    assert xml_codec._detectencoding("\x00\x00\xfe\xff") == "utf-32" 
     48    assert xml_codec._detectencoding("<") is None 
     49    assert xml_codec._detectencoding("<\x33") == "utf-8" 
     50    assert xml_codec._detectencoding("<\x00") is None 
     51    assert xml_codec._detectencoding("<\x00\x33") == "utf-8" 
     52    assert xml_codec._detectencoding("<\x00\x00") is None 
     53    assert xml_codec._detectencoding("<\x00\x00\x33") == "utf-8" 
     54    assert xml_codec._detectencoding("<\x00\x00\x00") == "utf-32-le" 
     55    assert xml_codec._detectencoding("\x4c") is None 
     56    assert xml_codec._detectencoding("\x4c\x33") == "utf-8" 
     57    assert xml_codec._detectencoding("\x4c\x6f") is None 
     58    assert xml_codec._detectencoding("\x4c\x6f\x33") == "utf-8" 
     59    assert xml_codec._detectencoding("\x4c\x6f\xa7") is None 
     60    assert xml_codec._detectencoding("\x4c\x6f\xa7\x33") == "utf-8" 
     61    assert xml_codec._detectencoding("\x4c\x6f\xa7\x94") == "cp037" 
     62    assert xml_codec._detectencoding("<?") is None 
     63    assert xml_codec._detectencoding("<?x") is None 
     64    assert xml_codec._detectencoding("<?xm") is None 
     65    assert xml_codec._detectencoding("<?xml") is None 
     66    assert xml_codec._detectencoding("<?xml\r") is None 
     67    assert xml_codec._detectencoding("<?xml\rversion='1.0'") is None 
     68    assert xml_codec._detectencoding("<?xml\rversion='1.0' encoding='x") is None 
     69    assert xml_codec._detectencoding("<?xml\rversion='1.0' encoding='x'") == "x" 
     70    assert xml_codec._detectencoding('<?xml\rversion="1.0" encoding="x"') == "x" 
     71    assert xml_codec._detectencoding('<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x"') == "x" 
     72    assert xml_codec._detectencoding("<?xml\rversion='1.0' ?>") == "utf-8" 
     73    assert xml_codec._detectencoding("<?xml\rversion='1.0' Encoding='x'") is None # encoding not recognized (might come later) 
     74    assert xml_codec._detectencoding("<?xml\rVersion='1.0'") is None 
     75    py.test.raises(ValueError, xml_codec._detectencoding, "<?xml\rversion='1.0' encoding=''") # empty encoding 
     76    assert xml_codec._detectencoding("<", False) is None 
     77    assert xml_codec._detectencoding("<", True) == "utf-8" 
     78    assert xml_codec._detectencoding("<?", False) is None 
     79    assert xml_codec._detectencoding("<?", True) == "utf-8" 
    8080 
    8181 
    8282def test_detectencoding_unicode(): 
    8383    # Unicode version (only parses the header) 
    84     assert _xml_codec.detectencoding(u'<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x') is None 
    85     assert _xml_codec.detectencoding(u'<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x', True) == "utf-8" 
    86     assert _xml_codec.detectencoding(u'<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x"') == "x" 
     84    assert xml_codec._detectencoding(u'<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x') is None 
     85    assert xml_codec._detectencoding(u'<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x', True) == "utf-8" 
     86    assert xml_codec._detectencoding(u'<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x"') == "x" 
    8787 
    8888 
    8989def test_fixencoding(): 
    9090    s = u'<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x' 
    91     assert _xml_codec.fixencoding(s, u"utf-8") is None 
     91    assert xml_codec._fixencoding(s, u"utf-8") is None 
    9292 
    9393    s = u'<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x' 
    94     assert _xml_codec.fixencoding(s, u"utf-8", True) == s 
     94    assert xml_codec._fixencoding(s, u"utf-8", True) == s 
    9595 
    9696    s = u'<?xml \r\n\t \r\n\t \r\n\tversion \r\n\t \r\n\t= \r\n\t \r\n\t"1.0" \r\n\t \r\n\t \r\n\tencoding \r\n\t \r\n\t= \r\n\t \r\n\t"x"' 
    97     assert _xml_codec.fixencoding(s, u"utf-8") == s.replace('"x"', '"utf-8"') 
     97    assert xml_codec._fixencoding(s, u"utf-8") == s.replace('"x"', '"utf-8"') 
    9898 
    9999 
     
    147147 
    148148def test_decoder(): 
    149     def checkauto(encoding, input=u"<?xml encoding='x'?>gÃŒrk\u20ac"): 
     149    def checkauto(encoding, input=u"<?xml version='1.0' encoding='x'?>gÃŒrk\u20ac"): 
    150150        # Check stateless decoder 
    151151        d = codecs.getdecoder("xml") 
     
    173173        yield checkauto, "utf-32-be" 
    174174 
    175     def checkdecl(encoding, input=u"<?xml encoding=%r?><gÃŒrk>\u20ac</gÃŒrk>"): 
     175    def checkdecl(encoding, input=u"<?xml version='1.0' encoding=%r?><gÃŒrk>\u20ac</gÃŒrk>"): 
    176176        # Check stateless decoder with encoding autodetection 
    177177        d = codecs.getdecoder("xml") 
     
    192192    # Use correct declaration 
    193193    yield checkdecl, "utf-8" 
    194     yield checkdecl, "iso-8859-1", u"<?xml encoding=%r?><gÃŒrk/>" 
     194    yield checkdecl, "iso-8859-1", u"<?xml version='1.0' encoding=%r?><gÃŒrk/>" 
    195195    yield checkdecl, "iso-8859-15" 
    196196    yield checkdecl, "cp1252" 
    197197 
    198198    # No recursion 
    199     py.test.raises(ValueError, "<?xml encoding='xml'?><gurk/>".decode, "xml") 
     199    py.test.raises(ValueError, "<?xml version='1.0' encoding='xml'?><gurk/>".decode, "xml") 
    200200 
    201201 
    202202def test_encoder(): 
    203     def check(encoding, input=u"<?xml encoding='x'?>gÃŒrk\u20ac"): 
     203    def check(encoding, input=u"<?xml version='1.0' encoding='x'?>gÃŒrk\u20ac"): 
    204204        # Check stateless encoder with encoding autodetection 
    205205        e = codecs.getencoder("xml") 
     
    228228        yield check, "utf-32-be" 
    229229    yield check, "utf-8" 
    230     yield check, "iso-8859-1", u"<?xml encoding='x'?><gÃŒrk/>" 
     230    yield check, "iso-8859-1", u"<?xml version='1.0' encoding='x'?><gÃŒrk/>" 
    231231    yield check, "iso-8859-15" 
    232232    yield check, "cp1252" 
    233233 
    234234    # No recursion 
    235     py.test.raises(ValueError, u"<?xml encoding='xml'?><gurk/>".encode, "xml") 
     235    py.test.raises(ValueError, u"<?xml version='1.0' encoding='xml'?><gurk/>".encode, "xml") 
    236236