Parse UTF-16 and UTF-8 BOM in all text uploads

This commit is contained in:
Jakub Vrana 2011-03-07 14:27:03 +01:00
parent db030df487
commit 9ff10f8301
2 changed files with 10 additions and 2 deletions

View file

@ -489,10 +489,18 @@ function get_file($key, $decompress = false) {
if (!$file || $file["error"]) {
return $file["error"];
}
return file_get_contents($decompress && ereg('\\.gz$', $file["name"]) ? "compress.zlib://$file[tmp_name]"
$return = file_get_contents($decompress && ereg('\\.gz$', $file["name"]) ? "compress.zlib://$file[tmp_name]"
: ($decompress && ereg('\\.bz2$', $file["name"]) ? "compress.bzip2://$file[tmp_name]"
: $file["tmp_name"]
)); //! may not be reachable because of open_basedir
if ($decompress) {
if (function_exists("iconv") && ereg("^\xFE\xFF|^\xFF\xFE", $return, $regs)) {
$return = iconv("utf-16", "utf-8", $return);
} else { // not ternary operator to save memory
$return = ereg_replace("^\xEF\xBB\xBF", "", $return); // UTF-8 BOM
}
}
return $return;
}
/** Determine upload error

View file

@ -141,7 +141,7 @@ if ($_POST && !$error) {
queries_redirect(remove_from_uri(), lang('%d item(s) have been affected.', $affected), $result);
}
} elseif (is_string($file = get_file("csv_file", true))) {
$file = preg_replace("~^\xEF\xBB\xBF~", '', $file); //! character set
//! character set
$result = true;
$cols = array_keys($fields);
preg_match_all('~(?>"[^"]*"|[^"\\r\\n]+)+~', $file, $matches);