>>> parse("https://www.google.com")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/dist-packages/lxml/html/__init__.py", line 661, in parse
return etree.parse(filename_or_url, parser, base_url=base_url, **kw)
File "lxml.etree.pyx", line 2706, in lxml.etree.parse (src/lxml/lxml.etree.c:49958)
File "parser.pxi", line 1500, in lxml.etree._parseDocument (src/lxml/lxml.etree.c:71797)
File "parser.pxi", line 1529, in lxml.etree._parseDocumentFromURL (src/lxml/lxml.etree.c:72080)
File "parser.pxi", line 1429, in lxml.etree._parseDocFromFile (src/lxml/lxml.etree.c:71175)
File "parser.pxi", line 975, in lxml.etree._BaseParser._parseDocFromFile (src/lxml/lxml.etree.c:68173)
File "parser.pxi", line 539, in lxml.etree._ParserContext._handleParseResultDoc (src/lxml/lxml.etree.c:64257)
File "parser.pxi", line 625, in lxml.etree._handleParseResult (src/lxml/lxml.etree.c:65178)
File "parser.pxi", line 563, in lxml.etree._raiseParseError (src/lxml/lxml.etree.c:64493)
IOError: Error reading file 'https://www.google.com': failed to load external entity "https://www.google.com"
Forget the "de" domain of google, it's https URI redirects to "http:// www.google. com", but "https:/ /www.google. com" really exists and fails:
>>> parse("https:/ /www.google. com") python2. 6/dist- packages/ lxml/html/ __init_ _.py", line 661, in parse filename_ or_url, parser, base_url=base_url, **kw) lxml.etree. c:49958) _parseDocument (src/lxml/ lxml.etree. c:71797) _parseDocumentF romURL (src/lxml/ lxml.etree. c:72080) _parseDocFromFi le (src/lxml/ lxml.etree. c:71175) _BaseParser. _parseDocFromFi le (src/lxml/ lxml.etree. c:68173) _ParserContext. _handleParseRes ultDoc (src/lxml/ lxml.etree. c:64257) _handleParseRes ult (src/lxml/ lxml.etree. c:65178) _raiseParseErro r (src/lxml/ lxml.etree. c:64493) /www.google. com': failed to load external entity "https:/ /www.google. com"
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/
return etree.parse(
File "lxml.etree.pyx", line 2706, in lxml.etree.parse (src/lxml/
File "parser.pxi", line 1500, in lxml.etree.
File "parser.pxi", line 1529, in lxml.etree.
File "parser.pxi", line 1429, in lxml.etree.
File "parser.pxi", line 975, in lxml.etree.
File "parser.pxi", line 539, in lxml.etree.
File "parser.pxi", line 625, in lxml.etree.
File "parser.pxi", line 563, in lxml.etree.
IOError: Error reading file 'https:/