--- pyzor-old 2019-11-07 12:36:52.240794793 -0500 +++ pyzor 2019-11-14 15:40:30.881470181 -0500 @@ -171,11 +171,35 @@ def _get_input_msg(digester): - msg = email.message_from_file(sys.stdin) + # Read and process stdin as bytes because we don't know its + # encoding. Python-3.x will try to guess -- and can sometimes + # guess wrong -- leading to decoding errors in read(). + msg = email.message_from_bytes(get_binary_stdin().read()) digested = digester(msg).value yield digested +def _is_binary_reader(stream, default=False): + try: + return isinstance(stream.read(0), bytes) + except Exception: + return default + + +def get_binary_stdin(): + # sys.stdin might or might not be binary in some extra cases. By + # default it's obviously non binary which is the core of the + # problem but the docs recommend changing it to binary for such + # cases so we need to deal with it. + is_binary = _is_binary_reader(sys.stdin, False) + if is_binary: + return sys.stdin + buf = getattr(sys.stdin, 'buffer', None) + if buf is not None and _is_binary_reader(buf, True): + return buf + raise RuntimeError('Did not manage to get binary stdin') + + def _get_input_mbox(digester): tfile = tempfile.NamedTemporaryFile() tfile.write(sys.stdin.read().encode("utf8"))