<?php
$string = "foo – bar";
print "source string = ".$string."<br>\n";
print "html_entity_decode = ".html_entity_decode($string, ENT_NOQUOTES, "UTF-8")."<br>\n";
print "utf8RawUrlDecode = ".utf8RawUrlDecode($string)."<br>\n";
print "Unicode2Charset = ".Unicode2Charset($string, "UTF-8")."<br>\n";
function utf8RawUrlDecode ($source) {
$decodedStr = '';
$pos = 0;
$len = strlen ($source);
while ($pos < $len) {
$charAt = substr ($source, $pos, 1);
if ($charAt == '%') {
$pos++;
$charAt = substr ($source, $pos, 1);
if ($charAt == 'u') {
// we got a unicode character
$pos++;
$unicodeHexVal = substr ($source, $pos, 4);
$unicode = hexdec ($unicodeHexVal);
$entity = "&#". $unicode . ';';
$decodedStr .= utf8_Encode ($entity);
$decodedStr .= chr($unicode-848);
$pos += 4;
}
else {
// we have an escaped ascii character
$hexVal = substr ($source, $pos, 2);
$decodedStr .= chr (hexdec ($hexVal));
$pos += 2;
}
}
else {
$decodedStr .= $charAt;
$pos++;
}
}
return $decodedStr;
}
function Unicode2Charset($str, $charset = 'Windows-1251') { // by SiMM, &#xHHHH; addition by John Profic
return preg_replace(
'~&#(?:x([\da-f]+)|(\d+));~ie',
'iconv("UTF-16LE", $charset, pack("v", "$1" ? hexdec("$1") : "$2"))',
$str
);
}
?>