Comment 1 for bug 1902364

Revision history for this message
Andreas Maier (maiera) wrote :

There are additional different errors with UCS-2 characters on pypy3.

I have expanded the test program a little:

#!/usr/bin/env python

import sys
import platform
import traceback
from lxml import etree

def test_fromstring(test, xml_src):
    xml_str = eval(xml_src)
    print("\nExecuting test {}: etree.fromstring({}) (evaluated: {!r})".
          format(test, xml_src, xml_str))
    try:
        xml_obj = etree.fromstring(xml_str)
    except Exception:
        print("Failed, traceback follows:")
        traceback.print_exc()
    else:
        print("Success")

test_fromstring("text2", "u'<FOO>\\u00E9</FOO>'")
test_fromstring("attr2", "u'<FOO NAME=\"\\u00E9\"/>'")
test_fromstring("text4", "u'<FOO>\\U00010142</FOO>'")
test_fromstring("attr4", "u'<FOO NAME=\"\\U00010142\"/>'")

print("\nVersions:")
print("%-20s: %s" % ('Platform system', platform.system()))
print("%-20s: %s" % ('Platform release', platform.release()))
print("%-20s: %s" % ('Python impl.', platform.python_implementation()))
print("%-20s: %s" % ('Python impl. version', getattr(sys, 'pypy_version_info', platform.python_revision())))
print("%-20s: %s" % ('Python', sys.version_info))
print("%-20s: %s" % ('lxml.etree', etree.LXML_VERSION))
print("%-20s: %s" % ('libxml used', etree.LIBXML_VERSION))
print("%-20s: %s" % ('libxml compiled', etree.LIBXML_COMPILED_VERSION))
print("%-20s: %s" % ('libxslt used', etree.LIBXSLT_VERSION))
print("%-20s: %s" % ('libxslt compiled', etree.LIBXSLT_COMPILED_VERSION))

#--- end of test program

The results are (all on macOS with lxml version and its libraries' versions as described in the original bug description):

Impl. Python lxml text2 attr2 text4 attr4
---------------------------------------------------------
PyPy 2.7.13 4.6.1 ERR1 ERR2 ERR3 ERR3
PyPy 3.6.9 4.6.1 ERR1 ERR2 ERR3 ERR3
CPython 2.7.16 4.6.1 SUCC SUCC SUCC SUCC
CPython 3.8.6 4.6.1 SUCC SUCC SUCC SUCC
CPython 3.9.0 4.6.1 SUCC SUCC ERR3 ERR3

PyPy 2.7.13 4.5.2 SUCC SUCC SUCC SUCC (!)
PyPy 3.6.9 4.5.2 ERR1 ERR2 ERR3 ERR3
CPython 3.9.0 4.5.2 SUCC SUCC ERR3 ERR3

PyPy 2.7.13 3.8.0 SUCC SUCC SUCC SUCC
PyPy 3.6.9 3.8.0 fails upon from lxml import etree

The errors mentioned in the table above are:

ERR1: lxml.etree.XMLSyntaxError: Char 0x0 out of allowed range
ERR2: lxml.etree.XMLSyntaxError: expected '>'
ERR3: lxml.etree.XMLSyntaxError: attributes construct error