/**
* Converts an XML string to a PHP array.
* This is the reverse function of array2xml()
*
* @param string XML content to convert into an array
* @param string The tag-prefix resolve, eg. a namespace like "T3:"
* @param boolean If set, the document tag will be set in the key "_DOCUMENT_TAG" of the output array
* @return mixed If the parsing had errors, a string with the error message is returned. Otherwise an array with the content.
* @see array2xml()
*/
static function xml2array($string,$NSprefix='',$reportDocTag=false) {
// Create parser:
$parser = xml_parser_create();
$vals = array();
$index = array();
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
// PHP5 fix of charset awareness:
// Problem is: PHP5 apparently detects the charset of the XML file (or defaults to utf-8) and will AUTOMATICALLY convert the content to either utf-8, iso-8859-1 or us-ascii. PHP4 just passed the content through without taking action regarding the charset.
// In TYPO3 we expect that the charset of XML content is NOT handled in the parser but internally in TYPO3 instead. Therefore it would be very nice if PHP5 could be configured to NOT process the charset of the files. But this is not possible for now.
// What we do here fixes the problem but ONLY if the charset is utf-8, iso-8859-1 or us-ascii. That should work for most TYPO3 installations, in particular if people use utf-8 which we highly recommend.
if ((double)phpversion()>=5) {
$ereg_result = array();
ereg('^[[:space:]]*<\?xml[^>]*encoding[[:space:]]*=[[:space:]]*"([^"]*)"',mb_substr($string,0,200),$ereg_result);
$theCharset = 'utf-8';
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $theCharset); // us-ascii / utf-8 / iso-8859-1
}
// Parse content:
xml_parse_into_struct($parser, $string, $vals, $index);
// If error, return error message:
if (xml_get_error_code($parser)) {
return 'Line '.xml_get_current_line_number($parser).': '.xml_error_string(xml_get_error_code($parser));
}
xml_parser_free($parser);
// Init vars:
$stack = array(array());
$stacktop = 0;
$current=array();
$tagName = '';
$documentTag = '';
// Traverse the parsed XML structure:
foreach($vals as $key => $val) {
// First, process the tag-name (which is used in both cases, whether "complete" or "close")
$tagName = $val['tag'];
if (!$documentTag) $documentTag = $tagName;
// Test for name space:
$tagName = ($NSprefix && mb_substr($tagName,0,strlen($NSprefix))==$NSprefix) ? mb_substr($tagName,strlen($NSprefix)) : $tagName;
// Test for numeric tag, encoded on the form "nXXX":
$testNtag = mb_substr($tagName,1); // Closing tag.
$tagName = (mb_substr($tagName,0,1)=='n' && !strcmp(intval($testNtag),$testNtag)) ? intval($testNtag) : $tagName;
// Test for alternative index value:
if (isset($val['attributes']) && isset($val['attributes']['index']) && strlen($val['attributes']['index'])) { $tagName = $val['attributes']['index']; }
// Setting tag-values, manage stack:
switch($val['type']) {
case 'open': // If open tag it means there is an array stored in sub-elements. Therefore increase the stackpointer and reset the accumulation array:
$current[$tagName] = array(); // Setting blank place holder
$stack[$stacktop++] = $current;
$current = array();
break;
case 'close': // If the tag is "close" then it is an array which is closing and we decrease the stack pointer.
$oldCurrent = $current;
$current = $stack[--$stacktop];
end($current); // Going to the end of array to get placeholder key, key($current), and fill in array next:
$current[key($current)] = $oldCurrent;
unset($oldCurrent);
break;
case 'complete': // If "complete", then it's a value. If the attribute "base64" is set, then decode the value, otherwise just set it.
if (isset($val['attributes']) && isset($val['attributes']['base64']) && $val['attributes']['base64']) {
$current[$tagName] = base64_decode($val['value']);
} else {
$current[$tagName] = (string)(isset($val['value']) ? $val['value'] : ''); // Had to cast it as a string - otherwise it would be evaluate false if tested with isset()!!
if (isset($val['attributes']) && isset($val['attributes']['type'])) {
// Cast type:
switch((string)$val['attributes']['type']) {
case 'integer':
$current[$tagName] = (integer)$current[$tagName];
break;
case 'double':
$current[$tagName] = (double)$current[$tagName];
break;
case 'boolean':
$current[$tagName] = (bool)$current[$tagName];
break;
case 'array':
$current[$tagName] = array(); // MUST be an empty array since it is processed as a value; Empty arrays would end up here because they would have no tags inside...
break;
}
}
}
break;
}
}
if ($reportDocTag) {
$current[$tagName]['_DOCUMENT_TAG'] = $documentTag;
}
// Finally return the content of the document tag.
return $current[$tagName];
}