>fetchAll())) {
`sp_time_last`=0,
`sp_data_transferred`=0,
`sp_data_stored`=0,
- `sp_links_crawled`=0,
`sp_pages_stored`=0,
`sp_domains`=\'\',
`sp_autodelete`=0,
@@ -280,10 +278,14 @@ function OS_setValue($columnName, $value) {
if (!isset($_ODATA[$columnName])) return 0;
+ $encValue = $value;
+ if (is_array($encValue) || is_object($encValue))
+ $encValue = json_encode($encValue);
+
$update = $_DDATA['pdo']->prepare(
'UPDATE `'.$_DDATA['tbprefix'].'config` SET `'.$columnName.'`=:value;'
);
- $update->execute(array('value' => $value));
+ $update->execute(array('value' => $encValue));
$err = $update->errorInfo();
if ($err[0] != '00000') {
@@ -313,8 +315,10 @@ function OS_getValue($columnName) {
$err = $select->errorInfo();
if ($err[0] == '00000') {
$select = $select->fetchAll();
- if (count($select))
- $_ODATA[$columnName] = $select[0][$columnName];
+ if (count($select)) {
+ $json = json_decode($select[0][$columnName], true);
+ $_ODATA[$columnName] = (!is_null($json)) ? $json : $select[0][$columnName];
+ }
} else if (isset($_SESSION['error']))
$_SESSION['error'][] = 'Could not get live value of \''.$columnName.'\' from config database.';
@@ -361,7 +365,11 @@ $err = $odata->errorInfo();
if ($err[0] == '00000') {
$odata = $odata->fetchAll();
if (count($odata)) {
- $_ODATA = $odata[0];
+ $_ODATA = array();
+ foreach ($odata[0] as $key => $value) {
+ $json = json_decode($value, true);
+ $_ODATA[$key] = (!is_null($json)) ? $json : $value;
+ }
} else throw new Exception('No data in configuration table');
} else throw new Exception('Could not read from configuration table: '.$err[2]);
@@ -706,10 +714,9 @@ if ($err[0] == '00000') {
$_SESSION['error'][] = 'Could not read status data from search database: '.$err[2];
-$_RDATA['sp_domains'] = json_decode($_ODATA['sp_domains'], true);
-if (!is_array($_RDATA['sp_domains'])) $_RDATA['sp_domains'] = array();
-if (count($_RDATA['sp_domains']) == 1 && $_ODATA['jw_hostname'] != key($_RDATA['sp_domains']))
- OS_setValue('jw_hostname', key($_RDATA['sp_domains']));
+if (!is_array($_ODATA['sp_domains'])) $_ODATA['sp_domains'] = array();
+if (count($_ODATA['sp_domains']) == 1 && $_ODATA['jw_hostname'] != key($_ODATA['sp_domains']))
+ OS_setValue('jw_hostname', key($_ODATA['sp_domains']));
// Match Weighting Values
diff --git a/orcinus/crawler.php b/orcinus/crawler.php
index a0f31e5..860e298 100644
--- a/orcinus/crawler.php
+++ b/orcinus/crawler.php
@@ -254,10 +254,6 @@ function OS_crawlCleanUp() {
// var_dump($cookies);
curl_close($_cURL);
- OS_setValue('sp_time_end', time());
- OS_setValue('sp_time_last', $_ODATA['sp_time_end'] - $_ODATA['sp_time_start']);
- OS_setValue('sp_data_transferred', $_RDATA['sp_data_transferred']);
-
// If crawl completed successfully
if ($_RDATA['sp_complete']) {
OS_crawlLog('Cleaning up database tables...', 1);
@@ -299,6 +295,9 @@ function OS_crawlCleanUp() {
// If crawl completed successfully AND we truncated the old table
if ($_RDATA['sp_complete']) {
+ OS_setValue('sp_time_end', time());
+ OS_setValue('sp_time_last', $_ODATA['sp_time_end'] - $_ODATA['sp_time_start']);
+
// Select all rows from the temp table into the existing search table
$insert = $_DDATA['pdo']->query(
'INSERT INTO `'.$_DDATA['tbprefix'].'crawldata`
@@ -330,9 +329,8 @@ function OS_crawlCleanUp() {
'OPTIMIZE TABLE `'.$_DDATA['tbprefix'].'query`;'
);
- OS_setValue('sp_links_crawled', count($_RDATA['sp_links']));
OS_setValue('sp_pages_stored', count($_RDATA['sp_store']));
- OS_setValue('sp_domains', json_encode($_RDATA['sp_domains']));
+ OS_setValue('sp_domains', $_RDATA['sp_domains']);
OS_setValue('sp_time_end_success', $_ODATA['sp_time_end']);
OS_crawlLog('***** Crawl completed in '.$_ODATA['sp_time_last'].'s *****', 1);
@@ -341,7 +339,7 @@ function OS_crawlCleanUp() {
if ($_RDATA['sp_sleep'])
OS_crawlLog('Time spent sleeping: '.(round($_RDATA['sp_sleep'] / 10) / 100).'s', 1);
OS_crawlLog('Time taken by cURL: '.(round($_RDATA['sp_time_curl'] * 100) / 100).'s', 1);
- OS_crawlLog($_ODATA['sp_links_crawled'].' page'.(($_ODATA['sp_links_crawled'] == 1) ? '' : 's').' crawled', 1);
+ OS_crawlLog($_ODATA['sp_progress'][0].' page'.(($_ODATA['sp_progress'][0] == 1) ? '' : 's').' crawled', 1);
OS_crawlLog($_ODATA['sp_pages_stored'].' page'.(($_ODATA['sp_pages_stored'] == 1) ? '' : 's').' stored', 1);
if ($_RDATA['sp_status']['New'])
@@ -398,7 +396,10 @@ function OS_crawlCleanUp() {
// Else the crawl failed
} else {
+ OS_setValue('sp_time_last', $_ODATA['sp_time_end'] - $_ODATA['sp_time_start']);
+
OS_crawlLog('***** Crawl failed; runtime '.$_ODATA['sp_time_last'].'s *****', 1);
+ OS_crawlLog('Total data transferred: '.OS_readSize($_RDATA['sp_data_transferred']), 1);
OS_crawlLog('Search table was NOT updated', 1);
if ($_ODATA['sp_sitemap_file'])
@@ -469,7 +470,7 @@ switch ($_SERVER['REQUEST_METHOD']) {
if ($_ODATA['sp_crawling']) {
$response = array(
'status' => 'Error',
- 'message' => 'Crawler is already running; current progress: '.$_ODATA['sp_progress']
+ 'message' => 'Crawler is already running; current progress: '.$_ODATA['sp_progress'][0].'/'.$_ODATA['sp_progress'][1]
);
}
@@ -506,15 +507,11 @@ switch ($_SERVER['REQUEST_METHOD']) {
$response = array(
'status' => ($_ODATA['sp_crawling']) ? 'Crawling' : 'Complete',
'progress' => $_ODATA['sp_progress'],
- 'time_crawl' => time() - $_ODATA['sp_time_start'],
- 'time_end' => $_ODATA['sp_time_end'],
- 'time_end_success' => $_ODATA['sp_time_end_success'],
- 'time_last' => $_ODATA['sp_time_last'],
- 'timeout_crawl' => $_ODATA['sp_timeout_crawl'],
'data_transferred' => $_ODATA['sp_data_transferred'],
- 'data_stored' => $_ODATA['sp_data_stored'],
- 'links_crawled' => $_ODATA['sp_links_crawled'],
- 'pages_stored' => $_ODATA['sp_pages_stored'],
+ 'time_crawl' => time() - $_ODATA['sp_time_start'],
+ 'time_start' => $_ODATA['sp_time_start'],
+ 'time_end' => $_ODATA['sp_time_end'],
+ 'timeout_crawl' => $_ODATA['sp_timeout_crawl'],
'tail' => trim(implode("\n", $lines))
);
break;
@@ -536,10 +533,7 @@ switch ($_SERVER['REQUEST_METHOD']) {
$log = file_get_contents($_ODATA['sp_log']);
OS_setValue('sp_log', $log."\n".'[ERROR] '.$_POST->reason);
} else OS_setValue('sp_log', '[ERROR] '.$_POST->reason);
- OS_setValue('sp_time_end', time());
- OS_setValue('sp_time_last', time() - $_ODATA['sp_time_start']);
- OS_setValue('sp_data_transferred', 0);
- OS_setValue('sp_data_stored', 0);
+ OS_setValue('sp_time_last', $_ODATA['sp_time_end'] - $_ODATA['sp_time_start']);
// Send failure email to the admin(s)
if ($_MAIL && count($_MAIL->getAllRecipientAddresses()) && $_ODATA['sp_email_failure']) {
@@ -621,10 +615,11 @@ if (function_exists('apache_setenv'))
OS_setValue('sp_crawling', 1);
OS_setValue('sp_cancel', 0);
OS_setValue('sp_time_start', time());
-OS_setValue('sp_links_crawled', 0);
+
+OS_setValue('sp_progress', array(0, 0));
OS_setValue('sp_pages_stored', 0);
-OS_setValue('sp_data_stored', 0);
OS_setValue('sp_data_transferred', 0);
+OS_setValue('sp_data_stored', 0);
OS_setValue('sp_time_last', 0);
@@ -651,7 +646,6 @@ $_RDATA['sp_robots'] = array();
$_RDATA['sp_status'] = array('Orphan' => 0, 'Blocked' => 0, 'Not Found' => 0, 'Updated' => 0, 'New' => 0);
$_RDATA['sp_filter'] = array();
$_RDATA['sp_prev_dls'] = 0;
-$_RDATA['sp_data_transferred'] = 0;
$_RDATA['sp_time_curl'] = 0;
$_RDATA['sp_sleep'] = 0;
$_RDATA['sp_sha1'] = array();
@@ -888,7 +882,11 @@ while ($_cURL && count($_RDATA['sp_queue'])) {
OS_crawlLog('Memory used: '.OS_readSize(memory_get_usage(true)), 1);
OS_crawlLog('Crawling: '.$url.' (Depth: '.$depth.')', 1);
- OS_setValue('sp_progress', count($_RDATA['sp_links']).'/'.(count($_RDATA['sp_links']) + count($_RDATA['sp_queue'])));
+ OS_setValue('sp_progress', array(
+ count($_RDATA['sp_links']),
+ count($_RDATA['sp_links']) + count($_RDATA['sp_queue'])
+ ));
+ OS_setValue('sp_time_end', time());
// Set the correct If-Modified-Since request header
if ($_ODATA['sp_ifmodifiedsince'] && isset($_RDATA['sp_lastmod'][$url])) {
@@ -900,7 +898,7 @@ while ($_cURL && count($_RDATA['sp_queue'])) {
$data = OS_fetchURL($url, $referer);
// Record cURL timing and data info for this fetch
- $_RDATA['sp_data_transferred'] += $data['info']['size_download'];
+ OS_setValue('sp_data_transferred', $_ODATA['sp_data_transferred'] + $data['info']['size_download']);
$_RDATA['sp_time_curl'] += $data['info']['total_time'];
diff --git a/orcinus/js/admin.js b/orcinus/js/admin.js
index 79562a9..8d56d58 100644
--- a/orcinus/js/admin.js
+++ b/orcinus/js/admin.js
@@ -136,12 +136,14 @@ for (let x = 0; x < countUpTimers.length; x++) {
} else this.spans[2].classList.remove('d-none');
let secPlural = (parseInt(this.spans[3].tVar.firstChild.nodeValue) == 1) ? 0 : 1;
- this.spans[3].tVar.nextSibling.nodeValue = ' ' + countUpPeriods[3][secPlural] + ' ago';
+ this.spans[3].tVar.nextSibling.nodeValue = ' ' + countUpPeriods[3][secPlural];
};
- setInterval(function() {
- countUpTimers[x].incrementTime();
- }, 1000);
+ if (countUpTimers[x].classList.contains('active')) {
+ countUpTimers[x].interval = setInterval(function() {
+ countUpTimers[x].incrementTime();
+ }, 1000);
+ }
}
@@ -420,76 +422,38 @@ let os_get_crawl_progress = function(getLog) {
os_crawl_start.innerHTML = 'Crawling...';
os_crawl_navbar.innerHTML = 'Crawling...';
+ let os_countup_time_end = document.getElementById('os_countup_time_end');
+ if (os_countup_time_end) {
+ clearInterval(os_countup_time_end.interval);
+ os_countup_time_end.parentNode.innerHTML = 'Currently crawling... ';
+ }
+
+ let os_countup_time_crawl = document.getElementById('os_countup_time_crawl');
+ if (os_countup_time_crawl) {
+ os_countup_time_crawl.classList.add('active');
+ os_countup_time_crawl.setAttribute('data-start', data.time_start);
+ os_countup_time_crawl.interval = setInterval(function() {
+ os_countup_time_crawl.incrementTime();
+ }, 1000);
+ }
+
// Start an interval progress check
clearInterval(os_crawl_interval);
os_crawl_interval = setInterval(os_get_crawl_progress, 1000);
// Else check if the given time_end is later than the time this
// page was loaded; if so, a crawl has finished after this page
- // was loaded; if we are on the Crawler Management page, update
- // all the info there
+ // was loaded; if so, reload the page
} else if (os_crawl_loaded < data.time_end) {
- os_crawl_loaded = parseInt((new Date()).getTime() / 1000);
- os_crawl_start.disabled = '';
- os_crawl_start.innerHTML = 'Start Crawl';
-
- let os_countup_time_end = document.getElementById('os_countup_time_end');
- if (os_countup_time_end) {
- os_countup_time_end.setAttribute('data-start', data.time_end);
-
- // Try to locate the warning element
- let pDanger = os_countup_time_end.parentNode.parentNode.querySelector('p.data-text.text-danger');
-
- // If the time_end does not match the time_end_success, then
- // the last crawl did not succeed; show the error message
- if (data.time_end != data.time_end_success) {
- if (!pDanger) {
- let pDanger = document.createElement('p');
- pDanger.classList.add('data-text', 'text-danger');
- let strong = document.createElement('strong');
- strong.appendChild(document.createTextNode('Warning:'));
- pDanger.appendChild(strong);
- pDanger.appendChild(document.createTextNode(' The previous crawl did not complete successfully. Please check the crawl log for more details.'));
- os_countup_time_end.parentNode.parentNode.appendChild(pDanger);
- }
-
- // Else if it matches, it was successful, remove any warning
- } else if (pDanger) pDanger.parentNode.removeChild(pDanger);
-
- // Update the Crawl information items
- let os_crawl_time_last = document.getElementById('os_crawl_time_last');
- os_crawl_time_last.innerHTML = data.time_last + ' s ';
-
- let os_crawl_data_transferred = document.getElementById('os_crawl_data_transferred');
- os_crawl_data_transferred.innerHTML = os_readSize(data.data_transferred, true);
-
- let os_crawl_data_stored = document.getElementById('os_crawl_data_stored');
- let text = '';
- if (data.data_transferred) {
- text += '';
- text += '(' + (Math.round(data.data_stored * 1000 / data.data_transferred) / 10) + '%)';
- text += ' ';
- }
- os_crawl_data_stored.innerHTML = text + os_readSize(data.data_stored, true);
-
- let os_crawl_links_crawled = document.getElementById('os_crawl_links_crawled');
- os_crawl_links_crawled.innerHTML = data.links_crawled;
-
- let os_crawl_pages_stored = document.getElementById('os_crawl_pages_stored');
- text = '';
- if (data.links_crawled) {
- text += '';
- text += '(' + (Math.round(data.pages_stored * 1000 / data.links_crawled) / 10) + '%)';
- text += ' ';
- }
- os_crawl_pages_stored.innerHTML = text + data.pages_stored;
-
- // If we are not on the Crawler Management page, let the user
- // know there is new data, and ask to reload the page
- } else if (window.confirm('A crawl has finished. Reload the page to view new data?'))
- window.location.reload();
+ // Check if the crawler modal window is open
+ if (crawlerModal && crawlerModal.classList.contains('show')) {
+ // Don't refresh the page until the user closes the modal
+ crawlerModal.addEventListener('hide.bs.modal', function() {
+ window.location.reload();
+ }, false);
+ } else window.location.reload();
}
}
@@ -509,12 +473,23 @@ let os_get_crawl_progress = function(getLog) {
os_crawl_log.value = data.tail;
if (os_crawl_interval) {
- data.progress = data.progress.split('/');
os_crawl_progress.value = data.progress[0];
os_crawl_progress.max = data.progress[1];
os_crawl_progress.setAttribute('data-progress', data.progress[0] + ' / ' + data.progress[1]);
os_crawl_progress.innerHTML = Math.ceil(data.progress[0] / data.progress[1]) + '%';
os_crawl_log.scrollTop = os_crawl_log.scrollHeight;
+
+ if (os_crawl_data_transferred)
+ os_crawl_data_transferred.innerHTML = os_readSize(data.data_transferred, true);
+
+ if (os_crawl_data_stored)
+ os_crawl_data_stored.innerHTML = 0;
+
+ if (os_crawl_links_crawled)
+ os_crawl_links_crawled.innerHTML = data.progress[0] + ' / ' + data.progress[1];
+
+ if (os_crawl_pages_stored)
+ os_crawl_pages_stored.innerHTML = 0;
}
if (!os_crawl_start.complete && data.status == 'Complete') {
@@ -542,6 +517,11 @@ let os_crawl_progress = document.getElementById('os_crawl_progress');
let os_crawl_log = document.getElementById('os_crawl_log');
let os_crawl_log_download = document.getElementById('os_crawl_log_download');
+let os_crawl_data_transferred = document.getElementById('os_crawl_data_transferred');
+let os_crawl_data_stored = document.getElementById('os_crawl_data_stored');
+let os_crawl_links_crawled = document.getElementById('os_crawl_links_crawled');
+let os_crawl_pages_stored = document.getElementById('os_crawl_pages_stored');
+
os_crawl_cancel.force = false;
os_crawl_cancel.reason = '';
os_crawl_start.allow_grep = false;
@@ -587,6 +567,21 @@ os_crawl_start.addEventListener('click', function(e) {
os_crawl_start.innerHTML = 'Crawling...';
os_crawl_navbar.innerHTML = 'Crawling...';
+ let os_countup_time_end = document.getElementById('os_countup_time_end');
+ if (os_countup_time_end) {
+ clearInterval(os_countup_time_end.interval);
+ os_countup_time_end.parentNode.innerHTML = 'Currently crawling... ';
+ }
+
+ let os_countup_time_crawl = document.getElementById('os_countup_time_crawl');
+ if (os_countup_time_crawl) {
+ os_countup_time_crawl.classList.add('active');
+ os_countup_time_crawl.setAttribute('data-start', parseInt((new Date()).getTime() / 1000));
+ os_countup_time_crawl.interval = setInterval(function() {
+ os_countup_time_crawl.incrementTime();
+ }, 1000);
+ }
+
fetch(new Request('./crawler.php'), {
method: 'POST',
headers: { 'Content-type': 'application/json' },
diff --git a/orcinus/search.php b/orcinus/search.php
index 352f712..66b4c00 100644
--- a/orcinus/search.php
+++ b/orcinus/search.php
@@ -229,7 +229,7 @@ if ($_RDATA['s_searchable_pages']) {
// Try to json_decode the cache data
// If this step fails, assume there is no cache data
$checkJS = json_decode($_SDATA['cache']['data'], true);
- $_SDATA['cache']['data'] = ($checkJS) ? $checkJS : '';
+ $_SDATA['cache']['data'] = (!is_null($checkJS)) ? $checkJS : '';
}
// Database error accessing the query log
@@ -603,8 +603,8 @@ if ($_RDATA['s_searchable_pages']) {
$_ORCINUS->searchable->searched->results->result_list = array();
// Prepare PCRE for removing base domains
- if (count($_RDATA['sp_domains']) == 1)
- $repStr = '/^'.preg_quote(key($_RDATA['sp_domains']), '/').'/';
+ if (count($_ODATA['sp_domains']) == 1)
+ $repStr = '/^'.preg_quote(key($_ODATA['sp_domains']), '/').'/';
// Do a last once-over of the results
foreach ($resultsPage as $key => $result) {
@@ -633,7 +633,7 @@ if ($_RDATA['s_searchable_pages']) {
$_RESULT->relevance = number_format($result['relevance'], 2, '.', '');
// Remove base domain from URL if they are all the same
- if (count($_RDATA['sp_domains']) == 1)
+ if (count($_ODATA['sp_domains']) == 1)
$result['url'] = preg_replace($repStr, '', $result['url']);
// Highlight the terms in the title, url and matchtext