детектор кодировки

_Dio_

Новичок
детектор кодировки

Кто-нибудь встречал функции определяющие в какой кодировке русский текст.
 

[ Z ]

Новичок
PHP:
function detect_encoding($text, $short = 1) {
	/*
		returns:
			none - encoding not detected
			w  - windows-1251
			k  - KOI8-R
			i  - ISO
			a  - DOS 866
	*/
	
	setlocale(LC_CTYPE, 'ru_RU');
	
	$x_win = array('а'=>'0.07890365448505', 'б'=>'0.013981173864895', 'в'=>'0.043050941306755', 'г'=>'0.018687707641196', 'д'=>'0.027685492801772', 'е'=>'0.089285714285714', 'ж'=>'0.0094130675526024', 'з'=>'0.01578073089701', 'и'=>'0.071151716500554', 'й'=>'0.013427464008859', 'к'=>'0.038898117386489', 'л'=>'0.044435215946844', 'м'=>'0.032392026578073', 'н'=>'0.072120708748616', 'о'=>'0.11600221483942', 'п'=>'0.024363233665559', 'р'=>'0.040420819490587', 'с'=>'0.054817275747508', 'т'=>'0.063538205980066', 'у'=>'0.024363233665559', 'ф'=>'0.0016611295681063', 'х'=>'0.0080287929125138', 'ц'=>'0.0038759689922481', 'ч'=>'0.017303433001107', 'ш'=>'0.008859357696567', 'щ'=>'0.0024916943521595', 'ъ'=>'0.00027685492801772', 'ы'=>'0.018410852713178', 'ь'=>'0.017995570321152', 'э'=>'0.002906976744186', 'ю'=>'0.0065060908084164', 'я'=>'0.018964562569214');
	$x_koi = array('б'=>'0.07890365448505', 'в'=>'0.013981173864895', 'Ч'=>'0.043050941306755', 'з'=>'0.018687707641196', 'д'=>'0.027685492801772', 'е'=>'0.089285714285714', 'Ц'=>'0.0094130675526024', 'Ъ'=>'0.01578073089701', 'й'=>'0.071151716500554', 'к'=>'0.013427464008859', 'л'=>'0.038898117386489', 'м'=>'0.044435215946844', 'н'=>'0.032392026578073', 'о'=>'0.072120708748616', 'п'=>'0.11600221483942', 'Р'=>'0.024363233665559', 'Т'=>'0.040420819490587', 'У'=>'0.054817275747508', 'Ф'=>'0.063538205980066', 'Х'=>'0.024363233665559', 'ж'=>'0.0016611295681063', 'и'=>'0.0080287929125138', 'г'=>'0.0038759689922481', 'Ю'=>'0.017303433001107', 'Ы'=>'0.008859357696567', 'Э'=>'0.0024916943521595', 'Я'=>'0.00027685492801772', 'Щ'=>'0.018410852713178', 'Ш'=>'0.017995570321152', 'Ь'=>'0.002906976744186', 'а'=>'0.0065060908084164', 'С'=>'0.018964562569214');
	$x_iso = array('Р'=>'0.07890365448505', 'С'=>'0.013981173864895', 'Т'=>'0.043050941306755', 'У'=>'0.018687707641196', 'Ф'=>'0.027685492801772', 'Х'=>'0.089285714285714', 'Ц'=>'0.0094130675526024', 'Ч'=>'0.01578073089701', 'Ш'=>'0.071151716500554', 'Щ'=>'0.013427464008859', 'Ъ'=>'0.038898117386489', 'Ы'=>'0.044435215946844', 'Ь'=>'0.032392026578073', 'Э'=>'0.072120708748616', 'Ю'=>'0.11600221483942', 'Я'=>'0.024363233665559', 'а'=>'0.040420819490587', 'б'=>'0.054817275747508', 'в'=>'0.063538205980066', 'г'=>'0.024363233665559', 'д'=>'0.0016611295681063', 'е'=>'0.0080287929125138', 'ж'=>'0.0038759689922481', 'з'=>'0.017303433001107', 'и'=>'0.008859357696567', 'й'=>'0.0024916943521595', 'к'=>'0.00027685492801772', 'л'=>'0.018410852713178', 'м'=>'0.017995570321152', 'н'=>'0.002906976744186', 'о'=>'0.0065060908084164', 'п'=>'0.018964562569214');
	$x_dos = array(' '=>'0.07890365448505', 'с'=>'0.013981173864895', 'т'=>'0.043050941306755', 'у'=>'0.018687707641196', 'ф'=>'0.027685492801772', 'х'=>'0.089285714285714', 'ц'=>'0.0094130675526024', 'ч'=>'0.01578073089701', 'ш'=>'0.071151716500554', 'щ'=>'0.013427464008859', 'ъ'=>'0.038898117386489', 'ы'=>'0.044435215946844', 'ь'=>'0.032392026578073', '_'=>'0.072120708748616', 'ю'=>'0.11600221483942', 'я'=>'0.024363233665559', 'а'=>'0.040420819490587', 'б'=>'0.054817275747508', 'в'=>'0.063538205980066', 'г'=>'0.024363233665559', 'д'=>'0.0016611295681063', 'е'=>'0.0080287929125138', 'ж'=>'0.0038759689922481', 'з'=>'0.017303433001107', 'и'=>'0.008859357696567', 'й'=>'0.0024916943521595', 'к'=>'0.00027685492801772', 'л'=>'0.018410852713178', 'м'=>'0.017995570321152', 'н'=>'0.002906976744186', 'о'=>'0.0065060908084164', 'п'=>'0.018964562569214');
	
	if ($short) $text = substr($text, 0, 200);
	
	$len = strlen($text);
	for ($i=0; $i<$len; $i++) {
		$let = strtolower($text[$i]);
		$t[$let]++;
	}
	
	if (is_array($t))
	foreach($t as $k=>$v) {
		$t_win += $v*$x_win[$k];
		$t_koi += $v*$x_koi[$k];
		$t_iso += $v*$x_iso[$k];
		$t_dos += $v*$x_dos[$k];
	}
	
	$r = "none";
	$tmp = max($t_win, $t_koi, $t_iso, $t_dos);
	if ($t_win == $tmp) $r = "w";
	if ($t_koi == $tmp) $r = "k";
	if ($t_iso == $tmp) $r = "i";
	if ($t_dos == $tmp) $r = "a";
	
	return $r;
	
}
 
Сверху