diff -r 9794796d4ced dcpp/SimpleXMLReader.cpp --- a/dcpp/SimpleXMLReader.cpp Sat Nov 12 00:21:38 2016 +0100 +++ b/dcpp/SimpleXMLReader.cpp Sun Dec 18 21:50:18 2016 +0200 @@ -249,9 +249,7 @@ if((state == STATE_ELEMENT_ATTR_VALUE_APOS && c == '\'') || (state == STATE_ELEMENT_ATTR_VALUE_QUOT && c == '"')) { append(attribs.back().second, MAX_VALUE_SIZE, buf.begin() + bufPos, buf.begin() + bufPos + i); - if(!encoding.empty() && encoding != Text::utf8) { - attribs.back().second = Text::toUtf8(attribs.back().second, encoding); - } + decodeString(attribs.back().second); state = STATE_ELEMENT_ATTR; advancePos(i + 1); @@ -736,9 +734,7 @@ } if(oldState == STATE_CONTENT && state != oldState && !value.empty()) { - if(!encoding.empty() && encoding != Text::utf8) { - value = Text::toUtf8(value, encoding); - } + decodeString(value); cb->data(value); value.clear(); } @@ -749,6 +745,18 @@ // should never happen return false; -}; +} + +void SimpleXMLReader::decodeString(string& str_) { + auto isUtf8 = encoding.empty() || compare(encoding, Text::utf8) == 0; + + if (isUtf8) { + if (!Text::validateUtf8(str_)) { + error("UTF-8 validation failed"); + } + } else { + str_ = Text::toUtf8(str_, encoding); + } +} } diff -r 9794796d4ced dcpp/SimpleXMLReader.h --- a/dcpp/SimpleXMLReader.h Sat Nov 12 00:21:38 2016 +0100 +++ b/dcpp/SimpleXMLReader.h Sun Dec 18 21:50:18 2016 +0200 @@ -189,6 +189,8 @@ bool spaceOrError(const char* error); bool error(const char* message); + + void decodeString(string& str_); }; diff -r 9794796d4ced dcpp/Text.cpp --- a/dcpp/Text.cpp Sat Nov 12 00:21:38 2016 +0100 +++ b/dcpp/Text.cpp Sun Dec 18 21:50:18 2016 +0200 @@ -387,50 +387,49 @@ return acpToUtf8(str, tmp); if(fromCharset == utf8 || toLower(fromCharset, tmp) == utf8) return utf8ToAcp(str, tmp); +#else + // Initialize the converter + iconv_t cd = iconv_open(toCharset.c_str(), fromCharset.c_str()); + if (cd != (iconv_t)-1) { + size_t rv; + size_t len = str.length() * 2; // optimization + size_t inleft = str.length(); + size_t outleft = len; + tmp.resize(len); + const char *inbuf = str.data(); + char *outbuf = (char *)tmp.data(); + while (inleft > 0) { + rv = iconv(cd, (ICONV_CONST char **)&inbuf, &inleft, &outbuf, &outleft); + if (rv == (size_t)-1) { + size_t used = outbuf - tmp.data(); + if (errno == E2BIG) { + len *= 2; + tmp.resize(len); + outbuf = (char *)tmp.data() + used; + outleft = len - used; + } + else if (errno == EILSEQ) { + ++inbuf; + --inleft; + tmp[used] = '_'; + } + else { + tmp.replace(used, inleft, string(inleft, '_')); + inleft = 0; + } + } + } + iconv_close(cd); + if (outleft > 0) { + tmp.resize(len - outleft); + } + return tmp; + } +#endif // We don't know how to convert arbitrary charsets dcdebug("Unknown conversion from %s to %s\n", fromCharset.c_str(), toCharset.c_str()); - return str; -#else - - // Initialize the converter - iconv_t cd = iconv_open(toCharset.c_str(), fromCharset.c_str()); - if(cd == (iconv_t)-1) - return str; - - size_t rv; - size_t len = str.length() * 2; // optimization - size_t inleft = str.length(); - size_t outleft = len; - tmp.resize(len); - const char *inbuf = str.data(); - char *outbuf = (char *)tmp.data(); - - while(inleft > 0) { - rv = iconv(cd, (ICONV_CONST char **)&inbuf, &inleft, &outbuf, &outleft); - if(rv == (size_t)-1) { - size_t used = outbuf - tmp.data(); - if(errno == E2BIG) { - len *= 2; - tmp.resize(len); - outbuf = (char *)tmp.data() + used; - outleft = len - used; - } else if(errno == EILSEQ) { - ++inbuf; - --inleft; - tmp[used] = '_'; - } else { - tmp.replace(used, inleft, string(inleft, '_')); - inleft = 0; - } - } - } - iconv_close(cd); - if(outleft > 0) { - tmp.resize(len - outleft); - } - return tmp; -#endif + return Util::emptyString; } }