There are additional different errors with UCS-2 characters on pypy3.
I have expanded the test program a little:
#!/usr/bin/env python
import sys import platform import traceback from lxml import etree
def test_fromstring(test, xml_src): xml_str = eval(xml_src) print("\nExecuting test {}: etree.fromstring({}) (evaluated: {!r})". format(test, xml_src, xml_str)) try: xml_obj = etree.fromstring(xml_str) except Exception: print("Failed, traceback follows:") traceback.print_exc() else: print("Success")
test_fromstring("text2", "u'<FOO>\\u00E9</FOO>'") test_fromstring("attr2", "u'<FOO NAME=\"\\u00E9\"/>'") test_fromstring("text4", "u'<FOO>\\U00010142</FOO>'") test_fromstring("attr4", "u'<FOO NAME=\"\\U00010142\"/>'")
print("\nVersions:") print("%-20s: %s" % ('Platform system', platform.system())) print("%-20s: %s" % ('Platform release', platform.release())) print("%-20s: %s" % ('Python impl.', platform.python_implementation())) print("%-20s: %s" % ('Python impl. version', getattr(sys, 'pypy_version_info', platform.python_revision()))) print("%-20s: %s" % ('Python', sys.version_info)) print("%-20s: %s" % ('lxml.etree', etree.LXML_VERSION)) print("%-20s: %s" % ('libxml used', etree.LIBXML_VERSION)) print("%-20s: %s" % ('libxml compiled', etree.LIBXML_COMPILED_VERSION)) print("%-20s: %s" % ('libxslt used', etree.LIBXSLT_VERSION)) print("%-20s: %s" % ('libxslt compiled', etree.LIBXSLT_COMPILED_VERSION))
#--- end of test program
The results are (all on macOS with lxml version and its libraries' versions as described in the original bug description):
Impl. Python lxml text2 attr2 text4 attr4 --------------------------------------------------------- PyPy 2.7.13 4.6.1 ERR1 ERR2 ERR3 ERR3 PyPy 3.6.9 4.6.1 ERR1 ERR2 ERR3 ERR3 CPython 2.7.16 4.6.1 SUCC SUCC SUCC SUCC CPython 3.8.6 4.6.1 SUCC SUCC SUCC SUCC CPython 3.9.0 4.6.1 SUCC SUCC ERR3 ERR3
PyPy 2.7.13 4.5.2 SUCC SUCC SUCC SUCC (!) PyPy 3.6.9 4.5.2 ERR1 ERR2 ERR3 ERR3 CPython 3.9.0 4.5.2 SUCC SUCC ERR3 ERR3
PyPy 2.7.13 3.8.0 SUCC SUCC SUCC SUCC PyPy 3.6.9 3.8.0 fails upon from lxml import etree
The errors mentioned in the table above are:
ERR1: lxml.etree.XMLSyntaxError: Char 0x0 out of allowed range ERR2: lxml.etree.XMLSyntaxError: expected '>' ERR3: lxml.etree.XMLSyntaxError: attributes construct error
There are additional different errors with UCS-2 characters on pypy3.
I have expanded the test program a little:
#!/usr/bin/env python
import sys
import platform
import traceback
from lxml import etree
def test_fromstring (test, xml_src): "\nExecuting test {}: etree.fromstrin g({}) (evaluated: {!r})".
format( test, xml_src, xml_str)) g(xml_str)
print( "Failed, traceback follows:")
traceback. print_exc( )
print( "Success" )
xml_str = eval(xml_src)
print(
try:
xml_obj = etree.fromstrin
except Exception:
else:
test_fromstring ("text2" , "u'<FOO> \\u00E9< /FOO>'" ) ("attr2" , "u'<FOO NAME=\" \\u00E9\ "/>'") ("text4" , "u'<FOO> \\U00010142< /FOO>'" ) ("attr4" , "u'<FOO NAME=\" \\U00010142\ "/>'")
test_fromstring
test_fromstring
test_fromstring
print(" \nVersions: ") release( ))) python_ implementation( ))) info', platform. python_ revision( )))) VERSION) ) VERSION) ) COMPILED_ VERSION) ) VERSION) ) COMPILED_ VERSION) )
print("%-20s: %s" % ('Platform system', platform.system()))
print("%-20s: %s" % ('Platform release', platform.
print("%-20s: %s" % ('Python impl.', platform.
print("%-20s: %s" % ('Python impl. version', getattr(sys, 'pypy_version_
print("%-20s: %s" % ('Python', sys.version_info))
print("%-20s: %s" % ('lxml.etree', etree.LXML_
print("%-20s: %s" % ('libxml used', etree.LIBXML_
print("%-20s: %s" % ('libxml compiled', etree.LIBXML_
print("%-20s: %s" % ('libxslt used', etree.LIBXSLT_
print("%-20s: %s" % ('libxslt compiled', etree.LIBXSLT_
#--- end of test program
The results are (all on macOS with lxml version and its libraries' versions as described in the original bug description):
Impl. Python lxml text2 attr2 text4 attr4 ------- ------- ------- ------- ------- ------- ------- -
-------
PyPy 2.7.13 4.6.1 ERR1 ERR2 ERR3 ERR3
PyPy 3.6.9 4.6.1 ERR1 ERR2 ERR3 ERR3
CPython 2.7.16 4.6.1 SUCC SUCC SUCC SUCC
CPython 3.8.6 4.6.1 SUCC SUCC SUCC SUCC
CPython 3.9.0 4.6.1 SUCC SUCC ERR3 ERR3
PyPy 2.7.13 4.5.2 SUCC SUCC SUCC SUCC (!)
PyPy 3.6.9 4.5.2 ERR1 ERR2 ERR3 ERR3
CPython 3.9.0 4.5.2 SUCC SUCC ERR3 ERR3
PyPy 2.7.13 3.8.0 SUCC SUCC SUCC SUCC
PyPy 3.6.9 3.8.0 fails upon from lxml import etree
The errors mentioned in the table above are:
ERR1: lxml.etree. XMLSyntaxError: Char 0x0 out of allowed range XMLSyntaxError: expected '>' XMLSyntaxError: attributes construct error
ERR2: lxml.etree.
ERR3: lxml.etree.