Misc fixes

Save the process id of the crawler in the sp_crawling DB value instead of just a flag; we can use it to compare and further prevent race conditions which still seem to happen occasionally.
This commit is contained in:
Brian Huisman 2023-10-17 10:36:34 -04:00
parent eed50c3727
commit 4bbe1d967b
4 changed files with 16 additions and 10 deletions

View file

@ -2673,7 +2673,7 @@ ORCINUS;
</label>
</td>
<th class="fs-5 text-center os_sorting os_desc" scope="col">
<span data-bs-toggle="tooltip" data-bs-placement="top" title="The number of times this query has been searched for with (unique users / IP addresses) in brackets"
<span data-bs-toggle="tooltip" data-bs-placement="top" title="The number of times this query has been searched for with unique users by IP address in (brackets)"
role="button" id="os_queries_hits">Hits</span>
<img src="img/arrow-down.svg" alt="Sort" title="Sort order" class="align-middle svg-icon-sm mb-1">
</th>

View file

@ -101,7 +101,7 @@ if (!in_array($_DDATA['tbprefix'].'config', $_DDATA['tables'], true)) {
`sp_sitemap_file` TINYTEXT NOT NULL,
`sp_sitemap_hostname` TINYTEXT NOT NULL,
`sp_useragent` TINYTEXT NOT NULL,
`sp_crawling` BOOLEAN NOT NULL,
`sp_crawling` INT UNSIGNED NOT NULL,
`sp_cancel` BOOLEAN NOT NULL,
`sp_progress` TINYTEXT NOT NULL,
`sp_email_success` BOOLEAN NOT NULL,

View file

@ -474,19 +474,16 @@ switch ($_SERVER['REQUEST_METHOD']) {
if (empty($_POST->action)) $_POST->action = '';
switch ($_POST->action) {
case 'crawl':
if (!empty($_POST->sp_key) &&
$_ODATA['sp_key'] &&
if (!empty($_POST->sp_key) && OS_getValue('sp_key') &&
$_POST->sp_key == $_ODATA['sp_key']) {
if (OS_getValue('sp_crawling')) {
$response = array(
'status' => 'Error',
'message' => 'Crawler is already running; current progress: '.$_ODATA['sp_progress'][0].'/'.$_ODATA['sp_progress'][1]
);
}
// Go crawl!
OS_setValue('sp_crawling', 1);
OS_setValue('sp_key', '');
} else OS_setValue('sp_crawling', getmypid());
} else {
$response = array(
@ -494,6 +491,8 @@ switch ($_SERVER['REQUEST_METHOD']) {
'message' => 'Incorrect key to initiate crawler'
);
}
OS_setValue('sp_key', '');
break;
case 'progress':
@ -513,6 +512,7 @@ switch ($_SERVER['REQUEST_METHOD']) {
}
}
// If crawl is in progress, return just the last 15 lines
if ($_ODATA['sp_crawling']) $lines = array_slice($lines, -15);
$response = array(
@ -603,7 +603,7 @@ switch ($_SERVER['REQUEST_METHOD']) {
} else $_RDATA['sp_log_clilevel'] = 2;
// Start a crawl
OS_setValue('sp_crawling', 1);
OS_setValue('sp_crawling', getmypid());
} else die('Crawler is already running; exiting...');
} else die($_ODATA['sp_useragent']);
@ -619,7 +619,7 @@ switch ($_SERVER['REQUEST_METHOD']) {
die('Crawler is already running; exiting...');
// Start a crawl
OS_setValue('sp_crawling', 1);
OS_setValue('sp_crawling', getmypid());
} else die($_ODATA['sp_useragent']);
break;
@ -632,6 +632,12 @@ switch ($_SERVER['REQUEST_METHOD']) {
}
// One last check for a race condition
sleep(1);
if (OS_getValue('sp_crawling') != getmypid())
die('Crawler is already running; exiting...');
/* ***** Begin Crawl Execution ************************************* */
register_shutdown_function('OS_crawlCleanUp');
ignore_user_abort(true);

View file

@ -495,7 +495,7 @@ if ($_RDATA['s_searchable_pages']) {
if (count($splitter) == 1) {
// Grab some random content if there were no
// matches in the content
$offset = mt_rand(0, mb_strlen($row['content'], 'UTF-8') - $_ODATA['s_limit_matchtext']);
$offset = mt_rand(0, max(0, mb_strlen($row['content'], 'UTF-8') - $_ODATA['s_limit_matchtext']));
} else {
$_SDATA['results'][$key]['fragment'][] = $split[0];
$offset = floor(max(0, $split[1] - (mb_strlen($term, 'UTF-8') + $_ODATA['s_limit_matchtext']) / 2));