Improve speed of CSV import

This commit is contained in:
Jakub Vrana 2013-07-09 11:34:12 -07:00
parent 9de402aaf0
commit 56b0917acd
8 changed files with 74 additions and 37 deletions

View file

@ -237,20 +237,25 @@ if (isset($_GET["mssql"])) {
class Min_Driver extends Min_SQL { class Min_Driver extends Min_SQL {
function insertUpdate($table, $set, $primary) { function insertUpdate($table, $rows, $primary) {
$update = array(); foreach ($rows as $set) {
$where = array(); $update = array();
foreach ($set as $key => $val) { $where = array();
$update[] = "$key = $val"; foreach ($set as $key => $val) {
if (isset($primary[idf_unescape($key)])) { $update[] = "$key = $val";
$where[] = "$key = $val"; if (isset($primary[idf_unescape($key)])) {
$where[] = "$key = $val";
}
}
//! can use only one query for all rows
if (!queries("MERGE " . table($table) . " USING (VALUES(" . implode(", ", $set) . ")) AS source (c" . implode(", c", range(1, count($set))) . ") ON " . implode(" AND ", $where) //! source, c1 - possible conflict
. " WHEN MATCHED THEN UPDATE SET " . implode(", ", $update)
. " WHEN NOT MATCHED THEN INSERT (" . implode(", ", array_keys($set)) . ") VALUES (" . implode(", ", $set) . ");" // ; is mandatory
)) {
return false;
} }
} }
// can use only one query for all rows with different API return true;
return queries("MERGE " . table($table) . " USING (VALUES(" . implode(", ", $set) . ")) AS source (c" . implode(", c", range(1, count($set))) . ") ON " . implode(" AND ", $where) //! source, c1 - possible conflict
. " WHEN MATCHED THEN UPDATE SET " . implode(", ", $update)
. " WHEN NOT MATCHED THEN INSERT (" . implode(", ", array_keys($set)) . ") VALUES (" . implode(", ", $set) . ");" // ; is mandatory
);
} }
} }

View file

@ -237,12 +237,29 @@ if (!defined("DRIVER")) {
return ($set ? parent::insert($table, $set) : queries("INSERT INTO " . table($table) . " ()\nVALUES ()")); return ($set ? parent::insert($table, $set) : queries("INSERT INTO " . table($table) . " ()\nVALUES ()"));
} }
function insertUpdate($table, $set, $primary) { function insertUpdate($table, $rows, $primary) {
foreach ($set as $key => $val) { $columns = array_keys(reset($rows));
$set[$key] = "$key = VALUES($key)"; $prefix = "INSERT INTO " . table($table) . " (" . implode(", ", $columns) . ") VALUES\n";
$values = array();
foreach ($columns as $key) {
$values[$key] = "$key = VALUES($key)";
} }
$update = implode(", ", $set); $suffix = "\nON DUPLICATE KEY UPDATE " . implode(", ", $values);
return queries("INSERT INTO " . table($table) . " SET $update ON DUPLICATE KEY UPDATE $update"); $values = array();
$length = 0;
foreach ($rows as $set) {
$value = "(" . implode(", ", $set) . ")";
if ($values && (strlen($prefix) + $length + strlen($value) + strlen($suffix) > 1e6)) { // 1e6 - default max_allowed_packet
if (!queries($prefix . implode(",\n", $values) . $suffix)) {
return false;
}
$values = array();
$length = 0;
}
$values[] = $value;
$length += strlen($value) + 2; // 2 - strlen(",\n")
}
return queries($prefix . implode(",\n", $values) . $suffix);
} }
} }

View file

@ -153,19 +153,24 @@ if (isset($_GET["pgsql"])) {
class Min_Driver extends Min_SQL { class Min_Driver extends Min_SQL {
function insertUpdate($table, $set, $primary) { function insertUpdate($table, $rows, $primary) {
global $connection; global $connection;
$update = array(); foreach ($rows as $set) {
$where = array(); $update = array();
foreach ($set as $key => $val) { $where = array();
$update[] = "$key = $val"; foreach ($set as $key => $val) {
if (isset($primary[idf_unescape($key)])) { $update[] = "$key = $val";
$where[] = "$key = $val"; if (isset($primary[idf_unescape($key)])) {
$where[] = "$key = $val";
}
}
if (!(($where && queries("UPDATE " . table($table) . " SET " . implode(", ", $update) . " WHERE " . implode(" AND ", $where)) && $connection->affected_rows)
|| queries("INSERT INTO " . table($table) . " (" . implode(", ", array_keys($set)) . ") VALUES (" . implode(", ", $set) . ")")
)) {
return false;
} }
} }
return ($where && queries("UPDATE " . table($table) . " SET " . implode(", ", $update) . " WHERE " . implode(" AND ", $where)) && $connection->affected_rows) return true;
|| queries("INSERT INTO " . table($table) . " (" . implode(", ", array_keys($set)) . ") VALUES (" . implode(", ", $set) . ")")
;
} }
} }

View file

@ -200,8 +200,14 @@ if (isset($_GET["simpledb"])) {
return sdb_request('PutAttributes', $params); return sdb_request('PutAttributes', $params);
} }
function insertUpdate($table, $set, $primary) { function insertUpdate($table, $rows, $primary) {
return $this->update($table, $set, "WHERE `itemName()` = " . q($set["`itemName()`"])); //! use one batch request
foreach ($rows as $set) {
if (!$this->update($table, $set, "WHERE `itemName()` = " . q($set["`itemName()`"]))) {
return false;
}
}
return true;
} }
} }

View file

@ -208,8 +208,12 @@ if (isset($_GET["sqlite"]) || isset($_GET["sqlite2"])) {
class Min_Driver extends Min_SQL { class Min_Driver extends Min_SQL {
function insertUpdate($table, $set, $primary) { function insertUpdate($table, $rows, $primary) {
return queries("REPLACE INTO " . table($table) . " (" . implode(", ", array_keys($set)) . ") VALUES (" . implode(", ", $set) . ")"); $values = array();
foreach ($rows as $set) {
$values[] = "(" . implode(", ", $set) . ")";
}
return queries("REPLACE INTO " . table($table) . " (" . implode(", ", array_keys(reset($rows))) . ") VALUES\n" . implode(",\n", $values));
} }
} }

View file

@ -53,10 +53,10 @@
/** Insert or update data in table /** Insert or update data in table
* @param string * @param string
* @param array * @param array
* @param array columns in keys * @param array of arrays with escaped columns in keys and quoted data in values
* @return bool * @return bool
*/ */
/*abstract*/ function insertUpdate($table, $set, $primary) { /*abstract*/ function insertUpdate($table, $rows, $primary) {
return false; return false;
} }

View file

@ -175,6 +175,7 @@ if ($_POST && !$error) {
$affected = count($matches[0]); $affected = count($matches[0]);
begin(); begin();
$separator = ($_POST["separator"] == "csv" ? "," : ($_POST["separator"] == "tsv" ? "\t" : ";")); $separator = ($_POST["separator"] == "csv" ? "," : ($_POST["separator"] == "tsv" ? "\t" : ";"));
$rows = array();
foreach ($matches[0] as $key => $val) { foreach ($matches[0] as $key => $val) {
preg_match_all("~((?>\"[^\"]*\")+|[^$separator]*)$separator~", $val . $separator, $matches2); preg_match_all("~((?>\"[^\"]*\")+|[^$separator]*)$separator~", $val . $separator, $matches2);
if (!$key && !array_diff($matches2[1], $cols)) { //! doesn't work with column names containing ",\n if (!$key && !array_diff($matches2[1], $cols)) { //! doesn't work with column names containing ",\n
@ -186,12 +187,10 @@ if ($_POST && !$error) {
foreach ($matches2[1] as $i => $col) { foreach ($matches2[1] as $i => $col) {
$set[idf_escape($cols[$i])] = ($col == "" && $fields[$cols[$i]]["null"] ? "NULL" : q(str_replace('""', '"', preg_replace('~^"|"$~', '', $col)))); $set[idf_escape($cols[$i])] = ($col == "" && $fields[$cols[$i]]["null"] ? "NULL" : q(str_replace('""', '"', preg_replace('~^"|"$~', '', $col))));
} }
$result = $driver->insertUpdate($TABLE, $set, $primary); $rows[] = $set;
if (!$result) {
break;
}
} }
} }
$result = (!$rows || $driver->insertUpdate($TABLE, $rows, $primary));
if ($result) { if ($result) {
queries("COMMIT"); queries("COMMIT");
} }

View file

@ -7,6 +7,7 @@ Add label to database selection
Add button for dropping an index Add button for dropping an index
Display number of selected rows Display number of selected rows
Disable underlining links Disable underlining links
Improve speed of CSV import
PostgreSQL: Fix handling of nextval() default values PostgreSQL: Fix handling of nextval() default values
Adminer 3.7.1 (released 2013-06-29): Adminer 3.7.1 (released 2013-06-29):