From 4bbe1d967baf93842bce8556cab44ad3a5506b80 Mon Sep 17 00:00:00 2001 From: Brian Huisman Date: Tue, 17 Oct 2023 10:36:34 -0400 Subject: [PATCH] Misc fixes Save the process id of the crawler in the sp_crawling DB value instead of just a flag; we can use it to compare and further prevent race conditions which still seem to happen occasionally. --- orcinus/admin.php | 2 +- orcinus/config.php | 2 +- orcinus/crawler.php | 20 +++++++++++++------- orcinus/search.php | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/orcinus/admin.php b/orcinus/admin.php index 47a4053..0dae315 100644 --- a/orcinus/admin.php +++ b/orcinus/admin.php @@ -2673,7 +2673,7 @@ ORCINUS; - Hits Sort diff --git a/orcinus/config.php b/orcinus/config.php index 52fb2d4..1e1ce8c 100644 --- a/orcinus/config.php +++ b/orcinus/config.php @@ -101,7 +101,7 @@ if (!in_array($_DDATA['tbprefix'].'config', $_DDATA['tables'], true)) { `sp_sitemap_file` TINYTEXT NOT NULL, `sp_sitemap_hostname` TINYTEXT NOT NULL, `sp_useragent` TINYTEXT NOT NULL, - `sp_crawling` BOOLEAN NOT NULL, + `sp_crawling` INT UNSIGNED NOT NULL, `sp_cancel` BOOLEAN NOT NULL, `sp_progress` TINYTEXT NOT NULL, `sp_email_success` BOOLEAN NOT NULL, diff --git a/orcinus/crawler.php b/orcinus/crawler.php index f74c4d7..b8e560f 100644 --- a/orcinus/crawler.php +++ b/orcinus/crawler.php @@ -474,19 +474,16 @@ switch ($_SERVER['REQUEST_METHOD']) { if (empty($_POST->action)) $_POST->action = ''; switch ($_POST->action) { case 'crawl': - if (!empty($_POST->sp_key) && - $_ODATA['sp_key'] && + if (!empty($_POST->sp_key) && OS_getValue('sp_key') && $_POST->sp_key == $_ODATA['sp_key']) { if (OS_getValue('sp_crawling')) { $response = array( 'status' => 'Error', 'message' => 'Crawler is already running; current progress: '.$_ODATA['sp_progress'][0].'/'.$_ODATA['sp_progress'][1] ); - } // Go crawl! - OS_setValue('sp_crawling', 1); - OS_setValue('sp_key', ''); + } else OS_setValue('sp_crawling', getmypid()); } else { $response = array( @@ -494,6 +491,8 @@ switch ($_SERVER['REQUEST_METHOD']) { 'message' => 'Incorrect key to initiate crawler' ); } + + OS_setValue('sp_key', ''); break; case 'progress': @@ -513,6 +512,7 @@ switch ($_SERVER['REQUEST_METHOD']) { } } + // If crawl is in progress, return just the last 15 lines if ($_ODATA['sp_crawling']) $lines = array_slice($lines, -15); $response = array( @@ -603,7 +603,7 @@ switch ($_SERVER['REQUEST_METHOD']) { } else $_RDATA['sp_log_clilevel'] = 2; // Start a crawl - OS_setValue('sp_crawling', 1); + OS_setValue('sp_crawling', getmypid()); } else die('Crawler is already running; exiting...'); } else die($_ODATA['sp_useragent']); @@ -619,7 +619,7 @@ switch ($_SERVER['REQUEST_METHOD']) { die('Crawler is already running; exiting...'); // Start a crawl - OS_setValue('sp_crawling', 1); + OS_setValue('sp_crawling', getmypid()); } else die($_ODATA['sp_useragent']); break; @@ -632,6 +632,12 @@ switch ($_SERVER['REQUEST_METHOD']) { } +// One last check for a race condition +sleep(1); +if (OS_getValue('sp_crawling') != getmypid()) + die('Crawler is already running; exiting...'); + + /* ***** Begin Crawl Execution ************************************* */ register_shutdown_function('OS_crawlCleanUp'); ignore_user_abort(true); diff --git a/orcinus/search.php b/orcinus/search.php index 3087568..715e69c 100644 --- a/orcinus/search.php +++ b/orcinus/search.php @@ -495,7 +495,7 @@ if ($_RDATA['s_searchable_pages']) { if (count($splitter) == 1) { // Grab some random content if there were no // matches in the content - $offset = mt_rand(0, mb_strlen($row['content'], 'UTF-8') - $_ODATA['s_limit_matchtext']); + $offset = mt_rand(0, max(0, mb_strlen($row['content'], 'UTF-8') - $_ODATA['s_limit_matchtext'])); } else { $_SDATA['results'][$key]['fragment'][] = $split[0]; $offset = floor(max(0, $split[1] - (mb_strlen($term, 'UTF-8') + $_ODATA['s_limit_matchtext']) / 2));