Index: functions/i18n.php =================================================================== --- functions/i18n.php (revision 14363) +++ functions/i18n.php (working copy) @@ -159,15 +159,17 @@ } /** - * Converts string from given charset to charset, that can be displayed by user translation. + * Converts a string from the given $charset to a character set that + * can be displayed by the current user interface language (translation) * - * Function by default returns html encoded strings, if translation uses different encoding. + * Function by default returns html encoded strings if translation uses + * different encoding. * If Japanese translation is used - function returns string converted to euc-jp * If $charset is not supported - function returns unconverted string. * * sanitizing of html tags is also done by this function. * - * @param string $charset + * @param string $charset The charset of the incoming string * @param string $string Text to be decoded * @param boolean $force_decode converts string to html without $charset!=$default_charset check. * Argument is available since 1.4.5 and 1.5.1. @@ -184,7 +186,7 @@ } /* All HTML special characters are 7 bit and can be replaced first */ - if (! $save_html) $string = sm_encode_html_special_chars ($string); + if (! $save_html) $string = sm_encode_html_special_chars($string, ENT_COMPAT, $charset); $charset = strtolower($charset); set_my_charset(); Index: functions/strings.php =================================================================== --- functions/strings.php (revision 14382) +++ functions/strings.php (working copy) @@ -1489,7 +1489,13 @@ * (See http://php.net/manual/function.htmlspecialchars.php ) * (OPTIONAL; default ENT_COMPAT) * @param string $encoding The character encoding to use in the conversion - * (OPTIONAL; default automatic detection) + * (if not one of the character sets supported + * by PHP's htmlspecialchars(), then $encoding + * will be ignored and iso-8859-1 will be used, + * unless a default has been specified in + * $default_htmlspecialchars_encoding in + * config_local.php) (OPTIONAL; default automatic + * detection) * @param boolean $double_encode Whether or not to convert entities that are * already in the string (only supported in * PHP 5.2.3+) (OPTIONAL; default TRUE) @@ -1500,6 +1506,31 @@ function sm_encode_html_special_chars($string, $flags=ENT_COMPAT, $encoding=NULL, $double_encode=TRUE) { + + // charsets supported by PHP's htmlspecialchars + // (move this elsewhere if needed) + // + static $htmlspecialchars_charsets = array( + 'iso-8859-1', 'iso8859-1', + 'iso-8859-5', 'iso8859-5', + 'iso-8859-15', 'iso8859-15', + 'utf-8', + 'cp866', 'ibm866', '866', + 'cp1251', 'windows-1251', 'win-1251', '1251', + 'cp1252', 'windows-1252', '1252', + 'koi8-R', 'koi8-ru', 'koi8r', + 'big5', '950', + 'gb2312', '936', + 'big5-hkscs', + 'shift_jis', 'sjis', 'sjis-win', 'cp932', '932', + 'euc-jp', 'eucjp', 'eucjp-win', + 'macroman', + ); + + + // if not given, set encoding to the charset being + // used by the current user interface language + // if (!$encoding) { global $default_charset; @@ -1508,6 +1539,21 @@ $encoding = $default_charset; } + + // make sure htmlspecialchars() supports the needed encoding + // + if (!in_array(strtolower($encoding), $htmlspecialchars_charsets)) + { + // use default from configuration if provided or hard-coded fallback + // + global $default_htmlspecialchars_encoding; + if (!empty($default_htmlspecialchars_encoding)) + $encoding = $default_htmlspecialchars_encoding; + else + $encoding = 'iso-8859-1'; + } + + if (check_php_version(5, 2, 3)) return htmlspecialchars($string, $flags, $encoding, $double_encode);