Misc fixes
Save the process id of the crawler in the sp_crawling DB value instead of just a flag; we can use it to compare and further prevent race conditions which still seem to happen occasionally.
This commit is contained in:
parent
eed50c3727
commit
4bbe1d967b
|
@ -2673,7 +2673,7 @@ ORCINUS;
|
||||||
</label>
|
</label>
|
||||||
</td>
|
</td>
|
||||||
<th class="fs-5 text-center os_sorting os_desc" scope="col">
|
<th class="fs-5 text-center os_sorting os_desc" scope="col">
|
||||||
<span data-bs-toggle="tooltip" data-bs-placement="top" title="The number of times this query has been searched for with (unique users / IP addresses) in brackets"
|
<span data-bs-toggle="tooltip" data-bs-placement="top" title="The number of times this query has been searched for with unique users by IP address in (brackets)"
|
||||||
role="button" id="os_queries_hits">Hits</span>
|
role="button" id="os_queries_hits">Hits</span>
|
||||||
<img src="img/arrow-down.svg" alt="Sort" title="Sort order" class="align-middle svg-icon-sm mb-1">
|
<img src="img/arrow-down.svg" alt="Sort" title="Sort order" class="align-middle svg-icon-sm mb-1">
|
||||||
</th>
|
</th>
|
||||||
|
|
|
@ -101,7 +101,7 @@ if (!in_array($_DDATA['tbprefix'].'config', $_DDATA['tables'], true)) {
|
||||||
`sp_sitemap_file` TINYTEXT NOT NULL,
|
`sp_sitemap_file` TINYTEXT NOT NULL,
|
||||||
`sp_sitemap_hostname` TINYTEXT NOT NULL,
|
`sp_sitemap_hostname` TINYTEXT NOT NULL,
|
||||||
`sp_useragent` TINYTEXT NOT NULL,
|
`sp_useragent` TINYTEXT NOT NULL,
|
||||||
`sp_crawling` BOOLEAN NOT NULL,
|
`sp_crawling` INT UNSIGNED NOT NULL,
|
||||||
`sp_cancel` BOOLEAN NOT NULL,
|
`sp_cancel` BOOLEAN NOT NULL,
|
||||||
`sp_progress` TINYTEXT NOT NULL,
|
`sp_progress` TINYTEXT NOT NULL,
|
||||||
`sp_email_success` BOOLEAN NOT NULL,
|
`sp_email_success` BOOLEAN NOT NULL,
|
||||||
|
|
|
@ -474,19 +474,16 @@ switch ($_SERVER['REQUEST_METHOD']) {
|
||||||
if (empty($_POST->action)) $_POST->action = '';
|
if (empty($_POST->action)) $_POST->action = '';
|
||||||
switch ($_POST->action) {
|
switch ($_POST->action) {
|
||||||
case 'crawl':
|
case 'crawl':
|
||||||
if (!empty($_POST->sp_key) &&
|
if (!empty($_POST->sp_key) && OS_getValue('sp_key') &&
|
||||||
$_ODATA['sp_key'] &&
|
|
||||||
$_POST->sp_key == $_ODATA['sp_key']) {
|
$_POST->sp_key == $_ODATA['sp_key']) {
|
||||||
if (OS_getValue('sp_crawling')) {
|
if (OS_getValue('sp_crawling')) {
|
||||||
$response = array(
|
$response = array(
|
||||||
'status' => 'Error',
|
'status' => 'Error',
|
||||||
'message' => 'Crawler is already running; current progress: '.$_ODATA['sp_progress'][0].'/'.$_ODATA['sp_progress'][1]
|
'message' => 'Crawler is already running; current progress: '.$_ODATA['sp_progress'][0].'/'.$_ODATA['sp_progress'][1]
|
||||||
);
|
);
|
||||||
}
|
|
||||||
|
|
||||||
// Go crawl!
|
// Go crawl!
|
||||||
OS_setValue('sp_crawling', 1);
|
} else OS_setValue('sp_crawling', getmypid());
|
||||||
OS_setValue('sp_key', '');
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
$response = array(
|
$response = array(
|
||||||
|
@ -494,6 +491,8 @@ switch ($_SERVER['REQUEST_METHOD']) {
|
||||||
'message' => 'Incorrect key to initiate crawler'
|
'message' => 'Incorrect key to initiate crawler'
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OS_setValue('sp_key', '');
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'progress':
|
case 'progress':
|
||||||
|
@ -513,6 +512,7 @@ switch ($_SERVER['REQUEST_METHOD']) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If crawl is in progress, return just the last 15 lines
|
||||||
if ($_ODATA['sp_crawling']) $lines = array_slice($lines, -15);
|
if ($_ODATA['sp_crawling']) $lines = array_slice($lines, -15);
|
||||||
|
|
||||||
$response = array(
|
$response = array(
|
||||||
|
@ -603,7 +603,7 @@ switch ($_SERVER['REQUEST_METHOD']) {
|
||||||
} else $_RDATA['sp_log_clilevel'] = 2;
|
} else $_RDATA['sp_log_clilevel'] = 2;
|
||||||
|
|
||||||
// Start a crawl
|
// Start a crawl
|
||||||
OS_setValue('sp_crawling', 1);
|
OS_setValue('sp_crawling', getmypid());
|
||||||
|
|
||||||
} else die('Crawler is already running; exiting...');
|
} else die('Crawler is already running; exiting...');
|
||||||
} else die($_ODATA['sp_useragent']);
|
} else die($_ODATA['sp_useragent']);
|
||||||
|
@ -619,7 +619,7 @@ switch ($_SERVER['REQUEST_METHOD']) {
|
||||||
die('Crawler is already running; exiting...');
|
die('Crawler is already running; exiting...');
|
||||||
|
|
||||||
// Start a crawl
|
// Start a crawl
|
||||||
OS_setValue('sp_crawling', 1);
|
OS_setValue('sp_crawling', getmypid());
|
||||||
|
|
||||||
} else die($_ODATA['sp_useragent']);
|
} else die($_ODATA['sp_useragent']);
|
||||||
break;
|
break;
|
||||||
|
@ -632,6 +632,12 @@ switch ($_SERVER['REQUEST_METHOD']) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// One last check for a race condition
|
||||||
|
sleep(1);
|
||||||
|
if (OS_getValue('sp_crawling') != getmypid())
|
||||||
|
die('Crawler is already running; exiting...');
|
||||||
|
|
||||||
|
|
||||||
/* ***** Begin Crawl Execution ************************************* */
|
/* ***** Begin Crawl Execution ************************************* */
|
||||||
register_shutdown_function('OS_crawlCleanUp');
|
register_shutdown_function('OS_crawlCleanUp');
|
||||||
ignore_user_abort(true);
|
ignore_user_abort(true);
|
||||||
|
|
|
@ -495,7 +495,7 @@ if ($_RDATA['s_searchable_pages']) {
|
||||||
if (count($splitter) == 1) {
|
if (count($splitter) == 1) {
|
||||||
// Grab some random content if there were no
|
// Grab some random content if there were no
|
||||||
// matches in the content
|
// matches in the content
|
||||||
$offset = mt_rand(0, mb_strlen($row['content'], 'UTF-8') - $_ODATA['s_limit_matchtext']);
|
$offset = mt_rand(0, max(0, mb_strlen($row['content'], 'UTF-8') - $_ODATA['s_limit_matchtext']));
|
||||||
} else {
|
} else {
|
||||||
$_SDATA['results'][$key]['fragment'][] = $split[0];
|
$_SDATA['results'][$key]['fragment'][] = $split[0];
|
||||||
$offset = floor(max(0, $split[1] - (mb_strlen($term, 'UTF-8') + $_ODATA['s_limit_matchtext']) / 2));
|
$offset = floor(max(0, $split[1] - (mb_strlen($term, 'UTF-8') + $_ODATA['s_limit_matchtext']) / 2));
|
||||||
|
|
Loading…
Reference in a new issue