Parse UTF-16 and UTF-8 BOM in all text uploads

This commit is contained in:
Jakub Vrana 2011-03-07 14:27:03 +01:00
parent db030df487
commit 9ff10f8301
2 changed files with 10 additions and 2 deletions

View file

@ -489,10 +489,18 @@ function get_file($key, $decompress = false) {
if (!$file || $file["error"]) { if (!$file || $file["error"]) {
return $file["error"]; return $file["error"];
} }
return file_get_contents($decompress && ereg('\\.gz$', $file["name"]) ? "compress.zlib://$file[tmp_name]" $return = file_get_contents($decompress && ereg('\\.gz$', $file["name"]) ? "compress.zlib://$file[tmp_name]"
: ($decompress && ereg('\\.bz2$', $file["name"]) ? "compress.bzip2://$file[tmp_name]" : ($decompress && ereg('\\.bz2$', $file["name"]) ? "compress.bzip2://$file[tmp_name]"
: $file["tmp_name"] : $file["tmp_name"]
)); //! may not be reachable because of open_basedir )); //! may not be reachable because of open_basedir
if ($decompress) {
if (function_exists("iconv") && ereg("^\xFE\xFF|^\xFF\xFE", $return, $regs)) {
$return = iconv("utf-16", "utf-8", $return);
} else { // not ternary operator to save memory
$return = ereg_replace("^\xEF\xBB\xBF", "", $return); // UTF-8 BOM
}
}
return $return;
} }
/** Determine upload error /** Determine upload error

View file

@ -141,7 +141,7 @@ if ($_POST && !$error) {
queries_redirect(remove_from_uri(), lang('%d item(s) have been affected.', $affected), $result); queries_redirect(remove_from_uri(), lang('%d item(s) have been affected.', $affected), $result);
} }
} elseif (is_string($file = get_file("csv_file", true))) { } elseif (is_string($file = get_file("csv_file", true))) {
$file = preg_replace("~^\xEF\xBB\xBF~", '', $file); //! character set //! character set
$result = true; $result = true;
$cols = array_keys($fields); $cols = array_keys($fields);
preg_match_all('~(?>"[^"]*"|[^"\\r\\n]+)+~', $file, $matches); preg_match_all('~(?>"[^"]*"|[^"\\r\\n]+)+~', $file, $matches);