Trying to use html5parser.fromstring with an unicode text input fails with TypeError unexpected keyword argument:
$ python
Python 2.7.6 (default, Oct 26 2016, 20:30:19)
[GCC 4.8.4] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> from lxml.html import html5parser
>>> html5parser.fromstring(u'')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/elias/.virtualenvs/tmp-6aaa3c35e219018b/local/lib/python2.7/site-packages/lxml/html/html5parser.py", line 147, in fromstring
guess_charset=guess_charset)
File "/home/elias/.virtualenvs/tmp-6aaa3c35e219018b/local/lib/python2.7/site-packages/lxml/html/html5parser.py", line 64, in document_fromstring
return parser.parse(html, useChardet=guess_charset).getroot()
File "/home/elias/.virtualenvs/tmp-6aaa3c35e219018b/local/lib/python2.7/site-packages/html5lib/html5parser.py", line 235, in parse
self._parse(stream, False, None, *args, **kwargs)
File "/home/elias/.virtualenvs/tmp-6aaa3c35e219018b/local/lib/python2.7/site-packages/html5lib/html5parser.py", line 85, in _parse
self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
File "/home/elias/.virtualenvs/tmp-6aaa3c35e219018b/local/lib/python2.7/site-packages/html5lib/_tokenizer.py", line 36, in __init__
self.stream = HTMLInputStream(stream, **kwargs)
File "/home/elias/.virtualenvs/tmp-6aaa3c35e219018b/local/lib/python2.7/site-packages/html5lib/_inputstream.py", line 149, in HTMLInputStream
return HTMLUnicodeInputStream(source, **kwargs)
TypeError: __init__() got an unexpected keyword argument 'useChardet'
$ python
Python 3.4.3 (default, Nov 17 2016, 01:08:31)
[GCC 4.8.4] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from lxml.html import html5parser
>>> html5parser.fromstring('')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/elias/.virtualenvs/tmp-200d3a9b52ebdd89/lib/python3.4/site-packages/lxml/html/html5parser.py", line 147, in fromstring
guess_charset=guess_charset)
File "/home/elias/.virtualenvs/tmp-200d3a9b52ebdd89/lib/python3.4/site-packages/lxml/html/html5parser.py", line 64, in document_fromstring
return parser.parse(html, useChardet=guess_charset).getroot()
File "/home/elias/.virtualenvs/tmp-200d3a9b52ebdd89/lib/python3.4/site-packages/html5lib/html5parser.py", line 235, in parse
self._parse(stream, False, None, *args, **kwargs)
File "/home/elias/.virtualenvs/tmp-200d3a9b52ebdd89/lib/python3.4/site-packages/html5lib/html5parser.py", line 85, in _parse
self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
File "/home/elias/.virtualenvs/tmp-200d3a9b52ebdd89/lib/python3.4/site-packages/html5lib/_tokenizer.py", line 36, in __init__
self.stream = HTMLInputStream(stream, **kwargs)
File "/home/elias/.virtualenvs/tmp-200d3a9b52ebdd89/lib/python3.4/site-packages/html5lib/_inputstream.py", line 149, in HTMLInputStream
return HTMLUnicodeInputStream(source, **kwargs)
TypeError: __init__() got an unexpected keyword argument 'useChardet'
Using the latest version of both lxml and html5lib:
>>> import html5lib __version_ _ LXML_VERSION
>>> html5lib.
u'0.999999999'
>>> import lxml.etree
>>> lxml.etree.
(3, 7, 1, 0)
Trying to use html5parser. fromstring with an unicode text input fails with TypeError unexpected keyword argument:
$ python fromstring( u'') elias/. virtualenvs/ tmp-6aaa3c35e21 9018b/local/ lib/python2. 7/site- packages/ lxml/html/ html5parser. py", line 147, in fromstring charset= guess_charset) elias/. virtualenvs/ tmp-6aaa3c35e21 9018b/local/ lib/python2. 7/site- packages/ lxml/html/ html5parser. py", line 64, in document_fromstring guess_charset) .getroot( ) elias/. virtualenvs/ tmp-6aaa3c35e21 9018b/local/ lib/python2. 7/site- packages/ html5lib/ html5parser. py", line 235, in parse _parse( stream, False, None, *args, **kwargs) elias/. virtualenvs/ tmp-6aaa3c35e21 9018b/local/ lib/python2. 7/site- packages/ html5lib/ html5parser. py", line 85, in _parse HTMLTokenizer( stream, parser=self, **kwargs) elias/. virtualenvs/ tmp-6aaa3c35e21 9018b/local/ lib/python2. 7/site- packages/ html5lib/ _tokenizer. py", line 36, in __init__ (stream, **kwargs) elias/. virtualenvs/ tmp-6aaa3c35e21 9018b/local/ lib/python2. 7/site- packages/ html5lib/ _inputstream. py", line 149, in HTMLInputStream tStream( source, **kwargs)
Python 2.7.6 (default, Oct 26 2016, 20:30:19)
[GCC 4.8.4] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> from lxml.html import html5parser
>>> html5parser.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/
guess_
File "/home/
return parser.parse(html, useChardet=
File "/home/
self.
File "/home/
self.tokenizer = _tokenizer.
File "/home/
self.stream = HTMLInputStream
File "/home/
return HTMLUnicodeInpu
TypeError: __init__() got an unexpected keyword argument 'useChardet'
Details about installed packages:
Python : sys.version_ info(major= 2, minor=7, micro=6, releaselevel= 'final' , serial=0)
lxml.etree : (3, 7, 1, 0)
libxml used : (2, 9, 3)
libxml compiled : (2, 9, 3)
libxslt used : (1, 1, 29)
libxslt compiled : (1, 1, 29)
I also get the same problem using Python 3:
$ python fromstring( '') elias/. virtualenvs/ tmp-200d3a9b52e bdd89/lib/ python3. 4/site- packages/ lxml/html/ html5parser. py", line 147, in fromstring charset= guess_charset) elias/. virtualenvs/ tmp-200d3a9b52e bdd89/lib/ python3. 4/site- packages/ lxml/html/ html5parser. py", line 64, in document_fromstring guess_charset) .getroot( ) elias/. virtualenvs/ tmp-200d3a9b52e bdd89/lib/ python3. 4/site- packages/ html5lib/ html5parser. py", line 235, in parse _parse( stream, False, None, *args, **kwargs) elias/. virtualenvs/ tmp-200d3a9b52e bdd89/lib/ python3. 4/site- packages/ html5lib/ html5parser. py", line 85, in _parse HTMLTokenizer( stream, parser=self, **kwargs) elias/. virtualenvs/ tmp-200d3a9b52e bdd89/lib/ python3. 4/site- packages/ html5lib/ _tokenizer. py", line 36, in __init__ (stream, **kwargs) elias/. virtualenvs/ tmp-200d3a9b52e bdd89/lib/ python3. 4/site- packages/ html5lib/ _inputstream. py", line 149, in HTMLInputStream tStream( source, **kwargs)
Python 3.4.3 (default, Nov 17 2016, 01:08:31)
[GCC 4.8.4] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from lxml.html import html5parser
>>> html5parser.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/
guess_
File "/home/
return parser.parse(html, useChardet=
File "/home/
self.
File "/home/
self.tokenizer = _tokenizer.
File "/home/
self.stream = HTMLInputStream
File "/home/
return HTMLUnicodeInpu
TypeError: __init__() got an unexpected keyword argument 'useChardet'