> > , > , > , ago query( 'SHOW TABLE STATUS LIKE \''.$_DDATA['tbprefix'].'%\';' ); $err = $tableinfo->errorInfo(); if ($err[0] == '00000') { $tableinfo = $tableinfo->fetchAll(); foreach ($tableinfo as $table) { switch ($table['Name']) { case $_DDATA['tbprefix'].'config': $_RDATA['s_config_info'] = $table; break; case $_DDATA['tbprefix'].'crawldata': $_RDATA['s_crawldata_info'] = $table; break; case $_DDATA['tbprefix'].'query': $_RDATA['s_query_info'] = $table; } } } else $_SESSION['error'][] = 'Could not read search database status.'; // Search Database Charsets $charsets = $_DDATA['pdo']->query( 'SELECT `content_charset`, COUNT(*) as `num` FROM `'.$_DDATA['tbprefix'].'crawldata` GROUP BY `content_charset` ORDER BY `num` DESC;' ); $err = $charsets->errorInfo(); if ($err[0] == '00000') { $charsets = $charsets->fetchAll(); foreach ($charsets as $row) { if (!$row['content_charset']) $row['content_charset'] = ''; $_RDATA['s_crawldata_info']['Charsets'][$row['content_charset']] = $row['num']; } } else $_SESSION['error'][] = 'Could not read charset counts from search database.'; // ***** Other runtime data $_RDATA['admin_pagination_options'] = array(25, 50, 100, 250, 500, 1000); if (!in_array($_ODATA['admin_index_pagination'], $_RDATA['admin_pagination_options'])) OS_setValue('admin_index_pagination', 100); $_RDATA['admin_pages'] = array( 'crawler' => 'Crawler', 'index' => 'Page Index', 'search' => 'Search' ); if ($_ODATA['s_limit_query_log']) $_RDATA['admin_pages']['queries'] = 'Query Log'; $_RDATA['index_status_list'] = array( '', 'OK', 'Orphan', 'Updated', 'Unlisted' ); // ***** Set session defaults if (empty($_SESSION['admin_page']) || empty($_RDATA['admin_pages'][$_SESSION['admin_page']])) $_SESSION['admin_page'] = 'crawler'; if (!isset($_SESSION['index_page'])) $_SESSION['index_page'] = 1; if (empty($_SESSION['index_filter_category'])) $_SESSION['index_filter_category'] = ''; if (empty($_SESSION['index_filter_status'])) $_SESSION['index_filter_status'] = ''; if (empty($_SESSION['index_filter_text'])) $_SESSION['index_filter_text'] = ''; if (empty($_SESSION['admin_username'])) $_SESSION['admin_username'] = ''; if (!$_SESSION['admin_username']) { // If we are logging in if ($_SERVER['REQUEST_METHOD'] == 'POST') { if (!empty($_POST['os_submit']) && $_POST['os_submit'] == 'os_admin_login') { if (empty($_POST['os_admin_username'])) $_POST['os_admin_username'] = ''; if (empty($_POST['os_admin_password'])) $_POST['os_admin_password'] = ''; if ($_POST['os_admin_username'] == $_RDATA['admin_username'] && $_POST['os_admin_password'] == $_RDATA['admin_password']) { $_SESSION['admin_username'] = $_RDATA['admin_username']; $_SESSION['admin_page'] = 'crawler'; header('Location: '.$_SERVER['REQUEST_URI']); exit(); } else $_SESSION['error'][] = 'Invalid username or password.'; } } } else { /* ***** Handle POST Requests ************************************** */ if ($_SERVER['REQUEST_METHOD'] == 'POST') { // JSON POST request // These are usually sent by javascript fetch() if ($_SERVER['CONTENT_TYPE'] == 'application/json') { $postBody = file_get_contents('php://input'); $_POST = json_decode($postBody, false); $response = array(); if (empty($_POST->action)) $_POST->action = ''; switch ($_POST->action) { // Set the key for initiating the crawler case 'setkey': if (!$_ODATA['sp_crawling']) { $md5 = md5(hrtime(true)); OS_setValue('sp_key', $md5); OS_setValue('sp_log', ''); OS_setValue('sp_progress', '0/1'); $response = array( 'status' => 'Success', 'message' => 'Key set to initiate crawler', 'sp_key' => $md5 ); } else { $response = array( 'status' => 'Error', 'message' => 'Crawler is already running; current progress: '.$_ODATA['sp_progress'] ); } break; // Download a text file of the most recent crawl or query log case 'download': if (empty($_POST->content)) $_POST->content = ''; switch ($_POST->content) { case 'crawl_log': if (!$_ODATA['sp_crawling']) { if ($_ODATA['sp_time_end']) { $lines = explode("\n", $_ODATA['sp_log']); if (empty($_POST->grep)) $_POST->grep = ''; switch ($_POST->grep) { case 'all': break; case 'errors': $lines = preg_grep('/^[\[\*]/', $lines); break; default: $lines = preg_grep('/^[\[\*\w\d]/', $lines); } if ($_POST->grep) $_POST->grep = '-'.$_POST->grep; header('Content-type: text/plain; charset='.strtolower($_ODATA['s_charset'])); header('Content-disposition: attachment; filename="'. 'crawl-log'.$_POST->grep.'_'.date('Y-m-d', $_ODATA['sp_time_end']).'.txt"'); // UTF-8 byte order mark if (strtolower($_ODATA['s_charset']) == 'utf-8') echo "\xEF\xBB\xBF"; die(implode("\n", $lines)); } else { $response = array( 'status' => 'Error', 'message' => 'Crawler has not run yet; no log to download' ); } } else { $response = array( 'status' => 'Error', 'message' => 'Currently crawling; try again later' ); } break; case 'query_log': $querylog = $_DDATA['pdo']->query( 'SELECT `query`, `results`, `stamp`, INET_NTOA(`ip`) AS `ipaddr` FROM `'.$_DDATA['tbprefix'].'query` ORDER BY `stamp` DESC;' ); $err = $querylog->errorInfo(); if ($err[0] == '00000') { $querylog = $querylog->fetchAll(); if (count($querylog)) { header('Content-type: text/csv; charset='.strtolower($_ODATA['s_charset'])); header('Content-disposition: attachment; filename="'. 'query-log_'.date('Y-m-d').'.csv"'); $output = fopen('php://output', 'w'); // UTF-8 byte order mark if (strtolower($_ODATA['s_charset']) == 'utf-8') fwrite($output, "\xEF\xBB\xBF"); $headings = array('Query', 'Results', 'Time Stamp', 'IP'); if ($_GEOIP2) $headings[] = 'Country'; fputcsv($output, $headings); foreach ($querylog as $line) { $line['stamp'] = date('c', $line['stamp']); if ($_GEOIP2) { try { $geo = $_GEOIP2->country($line['ipaddr']); } catch(Exception $e) { $geo = false; } $line['country'] = ($geo) ? $geo->raw['country']['names']['en'] : ''; } fputcsv($output, $line); } fclose($output); die(); } else { $response = array( 'status' => 'Error', 'message' => 'The query log is empty; nothing to download' ); } } else { $response = array( 'status' => 'Error', 'message' => 'Could not read the query log database' ); } break; default: $response = array( 'status' => 'Error', 'message' => 'Invalid content selected to download' ); } break; // Not used? case 'fetch': if (empty($_POST->value)) $_POST->value = ''; if (!empty($_ODATA[$_POST->value])) { $response = array( 'status' => 'Success', 'message' => trim($_ODATA[$_POST->value]) ); } else { $response = array( 'status' => 'Error', 'message' => 'Invalid value selected to fetch' ); } } header('Content-type: application/json; charset='.strtolower($_ODATA['s_charset'])); die(json_encode($response, JSON_INVALID_UTF8_IGNORE)); // Normal POST request } else if (!empty($_POST['os_submit'])) { switch ($_POST['os_submit']) { // ***** Crawler >> Settings case 'os_sp_crawl_config': if (isset($_POST['os_sp_starting'])) { $_POST['os_sp_starting'] = str_replace("\r\n", "\n", trim($_POST['os_sp_starting'])); $_POST['os_sp_starting'] = preg_replace('/\n+/', "\n", $_POST['os_sp_starting']); $_POST['os_sp_starting'] = substr($_POST['os_sp_starting'], 0, 4095); $_POST['os_sp_starting'] = explode("\n", $_POST['os_sp_starting']); foreach ($_POST['os_sp_starting'] as $key => $starting) { $starting = preg_replace( '/#.*$/', '', filter_var( str_replace(' ', '%20', $starting), FILTER_SANITIZE_URL ) ); $_POST['os_sp_starting'][$key] = str_replace('%20', ' ', $starting); } $_POST['os_sp_starting'] = array_filter($_POST['os_sp_starting'], function($a) { return preg_match('/^(([^:\/?#]+):)(\/\/([^\/?#]+))([^?#]*)(\?([^#]*))?(#(.*))?/', $a); }); if (!count($_POST['os_sp_starting'])) { $_POST['os_sp_starting'][] = $_ODATA['admin_install_domain'].'/'; $_SESSION['error'][] = 'Cannot have an empty or invalid Starting URLs field.'; } OS_setValue('sp_starting', implode("\n", $_POST['os_sp_starting'])); } if (isset($_POST['os_sp_useragent'])) { $_POST['os_sp_useragent'] = filter_var($_POST['os_sp_useragent'], FILTER_SANITIZE_SPECIAL_CHARS); OS_setValue('sp_useragent', substr($_POST['os_sp_useragent'], 0, 255)); } if (isset($_POST['os_sp_cookies']) && $_POST['os_sp_cookies'] == '1') { $_POST['os_sp_cookies'] = 1; } else $_POST['os_sp_cookies'] = 0; OS_setValue('sp_cookies', $_POST['os_sp_cookies']); if (isset($_POST['os_sp_ifmodifiedsince']) && $_POST['os_sp_ifmodifiedsince'] == '1') { $_POST['os_sp_ifmodifiedsince'] = 1; } else $_POST['os_sp_ifmodifiedsince'] = 0; OS_setValue('sp_ifmodifiedsince', $_POST['os_sp_ifmodifiedsince']); if (isset($_POST['os_sp_autodelete']) && $_POST['os_sp_autodelete'] == '1') { $_POST['os_sp_autodelete'] = 1; } else $_POST['os_sp_autodelete'] = 0; OS_setValue('sp_autodelete', $_POST['os_sp_autodelete']); if (isset($_POST['os_sp_timeout_url'])) { $_POST['os_sp_timeout_url'] = max(1, min(65535, (int)$_POST['os_sp_timeout_url'])); OS_setValue('sp_timeout_url', (int)$_POST['os_sp_timeout_url']); } if (isset($_POST['os_sp_timeout_crawl'])) { $_POST['os_sp_timeout_crawl'] = max(1, min(65535, (int)$_POST['os_sp_timeout_crawl'])); OS_setValue('sp_timeout_crawl', (int)$_POST['os_sp_timeout_crawl']); } if (isset($_POST['os_sp_sleep'])) { $_POST['os_sp_sleep'] = max(0, min(65535, (int)$_POST['os_sp_sleep'])); OS_setValue('sp_sleep', (int)$_POST['os_sp_sleep']); } if (isset($_POST['os_sp_limit_crawl'])) { $_POST['os_sp_limit_crawl'] = max(1, min(65535, (int)$_POST['os_sp_limit_crawl'])); OS_setValue('sp_limit_crawl', (int)$_POST['os_sp_limit_crawl']); } if (isset($_POST['os_sp_limit_store'])) { $_POST['os_sp_limit_store'] = max(1, min(65535, (int)$_POST['os_sp_limit_store'])); OS_setValue('sp_limit_store', $_POST['os_sp_limit_store']); } if (isset($_POST['os_sp_limit_depth'])) { $_POST['os_sp_limit_depth'] = max(1, min(255, (int)$_POST['os_sp_limit_depth'])); OS_setValue('sp_limit_depth', (int)$_POST['os_sp_limit_depth']); } if (isset($_POST['os_sp_limit_filesize'])) { $_POST['os_sp_limit_filesize'] = max(1, min(65535, (int)$_POST['os_sp_limit_filesize'])); OS_setValue('sp_limit_filesize', (int)$_POST['os_sp_limit_filesize']); } if (isset($_POST['os_sp_require_url'])) { $_POST['os_sp_require_url'] = str_replace("\r\n", "\n", trim($_POST['os_sp_require_url'])); $_POST['os_sp_require_url'] = preg_replace('/\n+/', "\n", $_POST['os_sp_require_url']); $_POST['os_sp_require_url'] = substr($_POST['os_sp_require_url'], 0, 4095); $_POST['os_sp_require_url'] = explode("\n", $_POST['os_sp_require_url']); foreach ($_POST['os_sp_require_url'] as $key => $require) $_POST['os_sp_require_url'][$key] = filter_var($require, FILTER_SANITIZE_URL); OS_setValue('sp_require_url', implode("\n", $_POST['os_sp_require_url'])); } if (isset($_POST['os_sp_ignore_url'])) { $_POST['os_sp_ignore_url'] = str_replace("\r\n", "\n", trim($_POST['os_sp_ignore_url'])); $_POST['os_sp_ignore_url'] = preg_replace('/\n+/', "\n", $_POST['os_sp_ignore_url']); $_POST['os_sp_ignore_url'] = substr($_POST['os_sp_ignore_url'], 0, 4095); $_POST['os_sp_ignore_url'] = explode("\n", $_POST['os_sp_ignore_url']); foreach ($_POST['os_sp_ignore_url'] as $key => $require) $_POST['os_sp_ignore_url'][$key] = filter_var($require, FILTER_SANITIZE_URL); OS_setValue('sp_ignore_url', implode("\n", $_POST['os_sp_ignore_url'])); } if (isset($_POST['os_sp_ignore_ext'])) { $_POST['os_sp_ignore_ext'] = preg_replace( array('/[^\w\d\. _-]/', '/ {2,}/'), array('', ' '), trim($_POST['os_sp_ignore_ext']) ); OS_setValue('sp_ignore_ext', substr($_POST['os_sp_ignore_ext'], 0, 4095)); } if (isset($_POST['os_sp_category_default'])) { $_POST['os_sp_category_default'] = preg_replace(array('/\s/', '/ {2,}/'), ' ', trim($_POST['os_sp_category_default'])); $_POST['os_sp_category_default'] = preg_replace('/[^\w \d-]/', '', $_POST['os_sp_category_default']); if ($_POST['os_sp_category_default']) { OS_setValue('sp_category_default', substr($_POST['os_sp_category_default'], 0, 30)); } else $_SESSION['error'][] = 'Category names may only contain letters, numbers, spaces or dashes.'; } else $_SESSION['error'][] = 'Please supply a category name.'; if (isset($_POST['os_sp_ignore_css'])) { $_POST['os_sp_ignore_css'] = preg_replace( array('/[^\w\d\. #_:-]/', '/ {2,}/'), array('', ' '), trim($_POST['os_sp_ignore_css']) ); OS_setValue('sp_ignore_css', substr($_POST['os_sp_ignore_css'], 0, 4095)); } if (isset($_POST['os_sp_title_strip'])) { $_POST['os_sp_title_strip'] = str_replace("\r\n", "\n", trim($_POST['os_sp_title_strip'])); $_POST['os_sp_title_strip'] = preg_replace('/\n+/', "\n", $_POST['os_sp_title_strip']); $_POST['os_sp_title_strip'] = substr($_POST['os_sp_title_strip'], 0, 4095); $_POST['os_sp_title_strip'] = explode("\n", $_POST['os_sp_title_strip']); foreach ($_POST['os_sp_title_strip'] as $key => $require) $_POST['os_sp_title_strip'][$key] = filter_var($require, FILTER_SANITIZE_SPECIAL_CHARS); OS_setValue('sp_title_strip', implode("\n", $_POST['os_sp_title_strip'])); } $_SESSION['message'][] = 'Crawl settings have been saved.'; break; // ***** Crawler >> Administration case 'os_admin_config': if (isset($_POST['os_sp_interval'])) { $_POST['os_sp_interval'] = max(0, min(255, (int)$_POST['os_sp_interval'])); OS_setValue('sp_interval', (int)$_POST['os_sp_interval']); } if (isset($_POST['os_sp_interval_start'])) { if (preg_match('/\d\d:\d\d(:\d\d)?/', $_POST['os_sp_interval_start'])) { OS_setValue('sp_interval_start', $_POST['os_sp_interval_start']); } else $_SESSION['error'][] = 'Unexpected start time format.'; } if (isset($_POST['os_sp_interval_stop'])) { if (preg_match('/\d\d:\d\d(:\d\d)?/', $_POST['os_sp_interval_stop'])) { OS_setValue('sp_interval_stop', $_POST['os_sp_interval_stop']); } else $_SESSION['error'][] = 'Unexpected stop time format.'; } if (isset($_POST['os_sp_timezone'])) if (in_array($_POST['os_sp_timezone'], timezone_identifiers_list())) OS_setValue('sp_timezone', $_POST['os_sp_timezone']); if (isset($_POST['os_sp_email_success']) && $_POST['os_sp_email_success'] == '1') { $_POST['os_sp_email_success'] = 1; } else $_POST['os_sp_email_success'] = 0; OS_setValue('sp_email_success', $_POST['os_sp_email_success']); if (isset($_POST['os_sp_email_failure']) && $_POST['os_sp_email_failure'] == '1') { $_POST['os_sp_email_failure'] = 1; } else $_POST['os_sp_email_failure'] = 0; OS_setValue('sp_email_failure', $_POST['os_sp_email_failure']); if (isset($_POST['os_admin_email'])) { if ($_MAIL) { $_POST['os_admin_email'] = str_replace("\r\n", "\n", $_POST['os_admin_email']); $_POST['os_admin_email'] = preg_replace('/\n+/', "\n", $_POST['os_admin_email']); $_POST['os_admin_email'] = substr($_POST['os_admin_email'], 0, 4095); $_POST['os_admin_email'] = explode("\n", $_POST['os_admin_email']); foreach ($_POST['os_admin_email'] as $key => $admin_email) { $email = $_MAIL->parseAddresses($admin_email); if (count($email)) { if ($email[0]['name']) { $_POST['os_admin_email'][$key] = $email[0]['name'].' <'.$email[0]['address'].'>'; } else $_POST['os_admin_email'][$key] = $email[0]['address']; } else { $_SESSION['error'][] = 'Invalid To: email address \''.$admin_email.'\'.'; unset($_POST['os_admin_email'][$key]); } } OS_setValue('admin_email', implode("\n", array_values($_POST['os_admin_email']))); } else $_SESSION['error'][] = 'PHPMailer needs to be installed to parse new email addresses.'; } $_SESSION['message'][] = 'Crawl administration settings have been saved.'; break; // ***** Crawler >> Sitemap case 'os_sp_sitemap_config': if (isset($_POST['os_sp_sitemap_file'])) { $_POST['os_sp_sitemap_file'] = substr($_POST['os_sp_sitemap_file'], 0, 255); $_POST['os_sp_sitemap_file'] = filter_var($_POST['os_sp_sitemap_file'], FILTER_SANITIZE_URL); if ($_POST['os_sp_sitemap_file']) { if (preg_match('/\.xml(\.gz)?$/', $_POST['os_sp_sitemap_file'])) { OS_setValue('sp_sitemap_file', $_POST['os_sp_sitemap_file']); } else $_SESSION['error'][] = 'Sitemap filename must end witn .xml or .xml.gz'; } else OS_setValue('sp_sitemap_file', ''); } if (isset($_POST['os_sp_sitemap_hostname'])) { $_POST['os_sp_sitemap_hostname'] = filter_var($_POST['os_sp_sitemap_hostname'], FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME); if ($_POST['os_sp_sitemap_hostname']) { OS_setValue('sp_sitemap_hostname', $_POST['os_sp_sitemap_hostname']); } else $_SESSION['error'][] = 'Invalid sitemap hostname.'; } $_SESSION['message'][] = 'Sitemap settings have been saved.'; break; // ***** Page Index >> With Selected... case 'os_index_with_selected': if (empty($_POST['os_index_pages'])) $_POST['os_index_pages'] = array(); if (is_array($_POST['os_index_pages'])) { $checksums_good = true; foreach ($_POST['os_index_pages'] as $key => $content_checksum) { $content_checksum = base64_decode($content_checksum); if ($content_checksum && strlen($content_checksum) == 20) { $_POST['os_index_pages'][$key] = $content_checksum; } else $checksums_good = false; } if ($checksums_good) { if (empty($_POST['os_index_select_action'])) $_POST['os_index_select_action'] = ''; switch ($_POST['os_index_select_action']) { case 'delete': $delete = $_DDATA['pdo']->prepare( 'DELETE FROM `'.$_DDATA['tbprefix'].'crawldata` WHERE `content_checksum`=:content_checksum;' ); foreach ($_POST['os_index_pages'] as $content_checksum) { $delete->execute(array('content_checksum' => $content_checksum)); $err = $delete->errorInfo(); if ($err[0] != '00000') { $_SESSION['error'][] = 'Database error on attempt to delete: '.$err[2]; break; } } break; case 'category': if (!empty($_POST['os_apply_new_category'])) { $_POST['os_apply_new_category'] = preg_replace(array('/\s/', '/ {2,}/'), ' ', trim($_POST['os_apply_new_category'])); $_POST['os_apply_new_category'] = preg_replace('/[^\w \d-]/', '', $_POST['os_apply_new_category']); $_POST['os_apply_new_category'] = substr($_POST['os_apply_new_category'], 0, 30); if ($_POST['os_apply_new_category']) { $update = $_DDATA['pdo']->prepare( 'UPDATE `'.$_DDATA['tbprefix'].'crawldata` SET `category`=:category WHERE `content_checksum`=:content_checksum;' ); foreach ($_POST['os_index_pages'] as $content_checksum) { $update->execute(array( 'category' => $_POST['os_apply_new_category'], 'content_checksum' => $content_checksum )); $err = $update->errorInfo(); if ($err[0] != '00000') { $_SESSION['error'][] = 'Database error on attempt to update category: '.$err[2]; break; } } $_SESSION['index_filter_category'] = ''; } else $_SESSION['error'][] = 'Category names may only contain letters, numbers, spaces or dashes.'; } else $_SESSION['error'][] = 'Please supply a category name.'; break; case 'priority': if (!empty($_POST['os_apply_new_priority'])) { $_POST['os_apply_new_priority'] = (float)$_POST['os_apply_new_priority']; $_POST['os_apply_new_priority'] = max(0, min(1, $_POST['os_apply_new_priority'])); $_POST['os_apply_new_priority'] = round($_POST['os_apply_new_priority'], 5); $update = $_DDATA['pdo']->prepare( 'UPDATE `'.$_DDATA['tbprefix'].'crawldata` SET `priority`=:priority WHERE `content_checksum`=:content_checksum;' ); foreach ($_POST['os_index_pages'] as $content_checksum) { $update->execute(array( 'priority' => $_POST['os_apply_new_priority'], 'content_checksum' => $content_checksum )); $err = $update->errorInfo(); if ($err[0] != '00000') { $_SESSION['error'][] = 'Database error on attempt to update priority: '.$err[2]; break; } } } else $_SESSION['error'][] = 'Please supply a priority value.'; break; case 'unlisted': $update = $_DDATA['pdo']->prepare( 'UPDATE `'.$_DDATA['tbprefix'].'crawldata` SET `flag_unlisted`=!`flag_unlisted` WHERE `content_checksum`=:content_checksum;' ); foreach ($_POST['os_index_pages'] as $content_checksum) { $update->execute(array('content_checksum' => $content_checksum)); $err = $update->errorInfo(); if ($err[0] != '00000') { $_SESSION['error'][] = 'Database error on attempt to toggle \'unlisted\' status: '.$err[2]; break; } } break; default: $_SESSION['error'][] = 'Unknown command.'; } } else $_SESSION['error'][] = 'Bad page checksum(s) given by user.'; } else $_SESSION['error'][] = 'Badly formed list of pages; could not perform an action.'; break; // ***** Page Index >> Text Match filter case 'os_index_filter_text': if (empty($_POST['os_index_filter_text'])) $_POST['os_index_filter_text'] = ''; $_POST['os_index_filter_text'] = filter_var($_POST['os_index_filter_text'], FILTER_SANITIZE_URL); $_SESSION['index_filter_text'] = $_POST['os_index_filter_text']; $_SESSION['index_page'] = 1; break; // ***** Search >> Search Settings case 'os_s_search_config': if (isset($_POST['os_s_limit_query'])) { $_POST['os_s_limit_query'] = max(1, min(255, (int)$_POST['os_s_limit_query'])); OS_setValue('s_limit_query', (int)$_POST['os_s_limit_query']); } if (isset($_POST['os_s_limit_terms'])) { $_POST['os_s_limit_terms'] = max(1, min(255, (int)$_POST['os_s_limit_terms'])); OS_setValue('s_limit_terms', (int)$_POST['os_s_limit_terms']); } if (isset($_POST['os_s_limit_term_length'])) { $_POST['os_s_limit_term_length'] = max(1, min(255, (int)$_POST['os_s_limit_term_length'])); OS_setValue('s_limit_term_length', (int)$_POST['os_s_limit_term_length']); } if (!isset($_POST['os_s_weight_title'])) $_POST['os_s_weight_title'] = $_RDATA['s_weights']['title']; $_POST['os_s_weight_title'] = number_format(max(0, (float)$_POST['os_s_weight_title']), 1, '.', ''); if (!isset($_POST['os_s_weight_body'])) $_POST['os_s_weight_body'] = $_RDATA['s_weights']['body']; $_POST['os_s_weight_body'] = number_format(max(0, (float)$_POST['os_s_weight_body']), 1, '.', ''); if (!isset($_POST['os_s_weight_keywords'])) $_POST['os_s_weight_keywords'] = $_RDATA['s_weights']['keywords']; $_POST['os_s_weight_keywords'] = number_format(max(0, (float)$_POST['os_s_weight_keywords']), 1, '.', ''); if (!isset($_POST['os_s_weight_description'])) $_POST['os_s_weight_description'] = $_RDATA['s_weights']['description']; $_POST['os_s_weight_description'] = number_format(max(0, (float)$_POST['os_s_weight_description']), 1, '.', ''); if (!isset($_POST['os_s_weight_url'])) $_POST['os_s_weight_url'] = $_RDATA['s_weights']['url']; $_POST['os_s_weight_url'] = number_format(max(0, (float)$_POST['os_s_weight_url']), 1, '.', ''); if (!isset($_POST['os_s_weight_multi'])) $_POST['os_s_weight_multi'] = $_RDATA['s_weights']['multi']; $_POST['os_s_weight_multi'] = number_format(max(0, (float)$_POST['os_s_weight_multi']), 1, '.', ''); if (!isset($_POST['os_s_weight_important'])) $_POST['os_s_weight_important'] = $_RDATA['s_weights']['important']; $_POST['os_s_weight_important'] = number_format(max(0, (float)$_POST['os_s_weight_important']), 1, '.', ''); if (!isset($_POST['os_s_weight_css_value'])) $_POST['os_s_weight_css_value'] = $_RDATA['s_weights']['css_value']; $_POST['os_s_weight_css_value'] = number_format(max(0, (float)$_POST['os_s_weight_css_value']), 1, '.', ''); OS_setValue('s_weights', implode('%', array( $_POST['os_s_weight_title'], $_POST['os_s_weight_body'], $_POST['os_s_weight_keywords'], $_POST['os_s_weight_description'], $_POST['os_s_weight_css_value'], $_POST['os_s_weight_url'], $_POST['os_s_weight_multi'], $_POST['os_s_weight_important'] ))); if (isset($_POST['os_s_weight_css'])) { $_POST['os_s_weight_css'] = preg_replace( array('/[^\w\d\. #_:-]/', '/ {2,}/'), array('', ' '), trim($_POST['os_s_weight_css']) ); OS_setValue('s_weight_css', substr($_POST['os_s_weight_css'], 0, 4095)); } if (isset($_POST['os_s_charset'])) { $_POST['os_s_charset'] = substr($_POST['os_s_charset'], 0, 63); $_POST['os_s_charset'] = preg_replace('/[^\w\d\.:_-]/', '', $_POST['os_s_charset']); OS_setValue('s_charset', $_POST['os_s_charset']); } if (isset($_POST['os_s_limit_results'])) { $_POST['os_s_limit_results'] = max(1, min(255, (int)$_POST['os_s_limit_results'])); OS_setValue('s_limit_results', (int)$_POST['os_s_limit_results']); } if (isset($_POST['os_s_results_pagination'])) { $_POST['os_s_results_pagination'] = max(1, min(255, (int)$_POST['os_s_results_pagination'])); OS_setValue('s_results_pagination', (int)$_POST['os_s_results_pagination']); } if (isset($_POST['os_s_limit_matchtext'])) { $_POST['os_s_limit_matchtext'] = max(1, min(65535, (int)$_POST['os_s_limit_matchtext'])); OS_setValue('s_limit_matchtext', $_POST['os_s_limit_matchtext']); } if (isset($_POST['os_s_show_orphans']) && $_POST['os_s_show_orphans'] == '1') { $_POST['os_s_show_orphans'] = 1; } else $_POST['os_s_show_orphans'] = 0; OS_setValue('s_show_orphans', $_POST['os_s_show_orphans']); if (isset($_POST['os_s_show_filetype_html']) && $_POST['os_s_show_filetype_html'] == '1') { $_POST['os_s_show_filetype_html'] = 1; } else $_POST['os_s_show_filetype_html'] = 0; OS_setValue('s_show_filetype_html', $_POST['os_s_show_filetype_html']); $_SESSION['message'][] = 'Search settings have been saved.'; break; // ***** Search >> Search Template case 'os_s_search_template': if (isset($_POST['os_s_result_template'])) { $_POST['os_s_result_template'] = str_replace("\r", '', $_POST['os_s_result_template']); OS_setValue('s_result_template', substr($_POST['os_s_result_template'], 0, 65535)); $_SESSION['message'][] = 'Search result template updated.'; } break; // ***** Search >> Search Result Cache case 'os_s_cache_config': if (isset($_POST['os_s_limit_query_log'])) { $_POST['os_s_limit_query_log'] = max(0, min(255, (int)$_POST['os_s_limit_query_log'])); OS_setValue('s_limit_query_log', $_POST['os_s_limit_query_log']); } if (isset($_POST['os_s_limit_cache'])) { $_POST['os_s_limit_cache'] = max(0, min(65535, (int)$_POST['os_s_limit_cache'])); OS_setValue('s_limit_cache', $_POST['os_s_limit_cache']); } break; // ***** Search >> Search Result Purge case 'os_s_cache_purge': $purge = $_DDATA['pdo']->query( 'UPDATE `'.$_DDATA['tbprefix'].'query` SET `cache`=\'\';' ); $err = $purge->errorInfo(); if ($err[0] == '00000') { $_RDATA['s_cache_size'] = 0; $_SESSION['message'][] = 'Search result cache has been purged.'; } else $_SESSION['error'][] = 'Could not purge search result cache.'; break; // ***** Search >> Offline Javascript case 'os_jw_config': // ***** Search >> Write Offline Javascript case 'os_jw_write': if (isset($_POST['os_jw_hostname'])) { $_POST['os_jw_hostname'] = filter_var($_POST['os_jw_hostname'], FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME); if ($_POST['os_jw_hostname']) { OS_setValue('jw_hostname', $_POST['os_jw_hostname']); } else $_SESSION['error'][] = 'Invalid sitemap hostname.'; } if (isset($_POST['os_jw_compression'])) { $_POST['os_jw_compression'] = max(0, min(100, (int)$_POST['os_jw_compression'])); OS_setValue('jw_compression', (int)$_POST['os_jw_compression']); } if ($_POST['os_submit'] == 'os_jw_config') { $_SESSION['message'][] = 'Offline javascript search settings have been saved.'; break; } // ***** Write to and download the Offline Javascript file $query_status = ($_ODATA['s_show_orphans']) ? '(`status`=\'OK\' || `status`=\'Orphan\')' : '`status`=\'OK\''; $select = $_DDATA['pdo']->query( 'SELECT `url`, `title`, `description`, `keywords`, `category`, `content_mime`, `weighted`, `content`, `priority` FROM `'.$_DDATA['tbprefix'].'crawldata` WHERE `flag_unlisted`<>1 AND '.$query_status.' AND `url_base` LIKE \'%'.addslashes($_ODATA['jw_hostname']).'\';' ); $err = $select->errorInfo(); if ($err[0] == '00000') { $select = $select->fetchAll(); // If compression value is less than 100 then get a word // list frequency report from all indexed pages if ($_ODATA['jw_compression'] < 100) { $words = array(); foreach ($select as $key => $row) { $select[$key]['words'] = array_unique(explode(' ', $row['content'])); foreach ($select[$key]['words'] as $index => $word) { if (!$word) continue; if (empty($words[$word])) { $words[$word] = 1; } else $words[$word]++; } } // Use the word frequency report to create a filter of // words that are more common than the compression // threshold $compressionFilter = array(); Foreach ($words as $word => $count) if (($count / count($select)) * 100 >= $_ODATA['jw_compression']) $compressionFilter[] = $word; } $repStr = '/^'.preg_quote($_ODATA['jw_hostname'], '/').'/'; foreach ($select as $key => $row) { // Use the compression filter to remove all of the most // common words from the content of this page if ($_ODATA['jw_compression'] < 100) { $select[$key]['words'] = array_diff($row['words'], $compressionFilter); $select[$key]['words'] = implode(' ', $select[$key]['words']); } else $select[$key]['words'] = $row['content']; // Remove the common domain from all URLs $select[$key]['url'] = preg_replace($repStr, '', $row['url']); // Format non-.html filenames into .html ones if ($row['content_mime'] == 'text/html') { $rq = explode('?', $select[$key]['url'], 2); if ($rq[0] == '' || $rq[0][strlen($rq[0]) - 1] == '/') $rq[0] .= 'index.html'; if (!preg_match('/\.html?$/', $rq[0])) $rq[0] .= '.html'; $select[$key]['url'] = implode('?', $rq); } } // Start JS file output ob_start(); ?> /* ******************************************************************** * Orcinus Site Search - Offline Javascript Search File * - Generated * - Requires mustache.js * */ function os_preg_quote(str, delimiter) { return (str + '').replace(new RegExp( '[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&' ); } // ***** Variable Migration let os_rdata = { s_latin: , s_filetypes: , s_category_list: , s_weights: }; Object.keys(os_rdata.s_weights).forEach(key => { os_rdata.s_weights[key] = parseFloat(os_rdata.s_weights[key]); }); let os_odata = { version: '', jw_compression: , s_limit_query: , s_limit_terms: , s_limit_term_length: , s_limit_matchtext: , s_show_filetype_html: , s_results_pagination: , s_limit_results: , s_result_template: }; let os_sdata = { terms: [], formatted: [], results: [], pages: 1, time: (new Date()).getTime() }; let os_request = {}; const os_params = new URLSearchParams(window.location.search); // ***** Page Object Constructor function os_page(content_mime, url, category, priority, title, description, keywords, weighted, content) { this.content_mime = content_mime; this.url = url; this.category = category; this.priority = parseFloat(priority); this.title = title; this.description = description; this.keywords = keywords; this.weighted = weighted; this.content = content; this.matchtext = []; this.relevance = 0; this.multi = -1; this.phrase = 0; } // ***** Search Database let os_crawldata = [ new os_page('', '', '', '', '', '', '', '', ''), ]; // ***** Return list of all pages for typeahead function os_return_all() { let fullList = []; for (let x = 0; x < os_crawldata.length; x++) { fullList.push({ title: os_crawldata[x].title, url: os_crawldata[x].url }); } return fullList; } // {{{{{ Create the Mustache template let os_TEMPLATE = { version: os_odata.version, searchable: false, addError: function(text) { if (!this.errors) { this.errors = {}; this.errors.error_list = []; } this.errors.error_list.push(text); } }; // Check if there are rows in the search database if (os_crawldata.length) { os_TEMPLATE.searchable = {}; os_TEMPLATE.searchable.form_action = window.location.pathname; os_TEMPLATE.searchable.limit_query = os_odata.s_limit_query; os_TEMPLATE.searchable.limit_term_length = os_odata.s_limit_term_length; os_request.c = os_params.get('c'); if (!os_request.c || !os_rdata.s_category_list[os_request.c]) os_request.c = ''; if (os_rdata.s_category_list.length > 2) { os_TEMPLATE.searchable.categories = {}; os_TEMPLATE.searchable.categories.category_list = []; Object.keys(os_rdata.s_category_list).forEach(category => { let cat = {}; cat.name = (category == '') ? 'All Categories' : category; cat.value = category; cat.selected = (os_request.c == category); os_TEMPLATE.searchable.categories.category_list.push(cat); }); } os_request.q = os_params.get('q'); if (!os_request.q) os_request.q = ''; os_request.q = os_request.q.trim().replace(/\s/, ' ').replace(/ {2,}/, ' '); // If there is a text request if (os_request.q) { // If compression level is < 100, remove all quotation marks if (os_odata.jw_compression < 100) os_request.q = os_request.q.replace(/"/g, ''); if (os_request.q.length > os_odata.s_limit_query) { os_request.q = os_request.q.substring(0, os_odata.s_limit_query); os_TEMPLATE.addError('Search query truncated to maximum ' + os_odata.s_limit_query + ' characters'); } os_TEMPLATE.searchable.request_q = os_request.q; // Split request string on quotation marks (") let request = (' ' + os_request.q + ' ').split('"'); for (let x = 0; x < request.length && os_sdata.terms.length < os_odata.s_limit_terms; x++) { // Every second + 1 group of terms just a list of terms if (!(x % 2)) { // Split this list of terms on spaces request[x] = request[x].split(' '); for (let y = 0, t; y < request[x].length; y++) { t = request[x][y]; if (!t) continue // Leading + means important, a MUST match if (t[0] == '+') { // Just count it as a 'phrase' of one word, functionally equivalent os_sdata.terms.push(['phrase', t.substring(1), false]); // Leading - means negative, a MUST exclude } else if (t[0] == '-') { os_sdata.terms.push(['exclude', t.substring(1), false]); // Restrict to a specific filetype (not yet implemented) // Really, we'd only allow HTML, XML and PDF here, maybe JPG? } else if (t.toLowerCase().indexOf('filetype:') === 0) { t = t.substring(9).trim(); if (t && os_rdata.s_filetypes[t.toUpperCase()]) os_sdata.terms.push(['filetype', t, false]); // Else if the term is greater than the term length limit, add it } else if (t.length >= os_odata.s_limit_term_length) os_sdata.terms.push(['term', t, false]); } // Every second group of terms is a phrase, a MUST match } else os_sdata.terms.push(['phrase', request[x], false]); } // If we successfully procured some terms if (os_sdata.terms.length) { os_TEMPLATE.searchable.searched = {}; if (os_request.c != '') { os_TEMPLATE.searchable.searched.category = {}; os_TEMPLATE.searchable.searched.category.request_c = os_request.c; } // Prepare PCRE match text for each phrase and term let filetypes = []; for (let x = 0; x < os_sdata.terms.length; x++) { switch (os_sdata.terms[x][0]) { case 'filetype': os_sdata.formatted.push(os_sdata.terms[x][0] + ':' + os_sdata.terms[x][1]); if (os_rdata.s_filetypes[os_sdata.terms[x][1].toUpperCase()]) for (let z = 0; z < os_rdata.s_filetypes[os_sdata.terms[x][1].toUpperCase()].length; z++) filetypes.push(os_rdata.s_filetypes[os_sdata.terms[x][1].toUpperCase()][z]); break; case 'exclude': os_sdata.formatted.push('-' + os_sdata.terms[x][1]); break; case 'phrase': os_sdata.formatted.push('"' + os_sdata.terms[x][1] + '"'); case 'term': if (os_sdata.terms[x][0] == 'term') os_sdata.formatted.push(os_sdata.terms[x][1]); os_sdata.terms[x][2] = os_preg_quote(os_sdata.terms[x][1].toLowerCase(), '/'); Object.keys(os_rdata.s_latin).forEach(key => { for (let y = 0; y < os_rdata.s_latin[key].length; y++) os_sdata.terms[x][2] = os_sdata.terms[x][2].replace(os_rdata.s_latin[key][y], key); if (key.length > 1) { os_sdata.terms[x][2] = os_sdata.terms[x][2].replace(key, '(' + key + '|' + os_rdata.s_latin[key].join('|') + ')'); } else os_sdata.terms[x][2] = os_sdata.terms[x][2].replace(key, '[' + key + os_rdata.s_latin[key].join('') + ']'); }); os_sdata.terms[x][2] = new RegExp('(' + os_sdata.terms[x][2] + ')', 'igu'); } } // ***** There is never any cache, so do an actual search for (let y = os_crawldata.length - 1; y >= 0; y--) { if (filetypes.length) { for (let x = 0, allowMime = false; x < filetypes.length; x++) if (os_crawldata[y].content_mime == filetypes[x]) allowMime = true; if (!allowMime) { os_crawldata.splice(y, 1); continue; } } for (let x = 0; x < os_sdata.terms.length; x++) { addRelevance = 0; if (os_sdata.terms[x][0] == 'filetype') { } else if (os_sdata.terms[x][0] == 'exclude') { if (os_crawldata[y].title.match(os_sdata.terms[x][2]) || os_crawldata[y].description.match(os_sdata.terms[x][2]) || os_crawldata[y].keywords.match(os_sdata.terms[x][2]) || os_crawldata[y].weighted.match(os_sdata.terms[x][2]) || os_crawldata[y].content.match(os_sdata.terms[x][2])) os_crawldata.splice(y, 1); } else if (os_sdata.terms[x][0] == 'phrase' || os_sdata.terms[x][0] == 'term') { if (os_sdata.terms[x][0] == 'phrase') os_crawldata[y].phrase++; if (os_crawldata[y].title.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.title; if (os_crawldata[y].description.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.description; if (os_crawldata[y].keywords.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.keywords; if (os_crawldata[y].weighted.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.css_value; if (os_crawldata[y].content.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.body; if (addRelevance) { os_crawldata[y].multi++; } else if (os_sdata.terms[x][0] == 'phrase') os_crawldata.splice(y, 1); } } if (addRelevance) { os_crawldata[y].relevance += addRelevance; // Calculate multipliers os_crawldata[y].relevance *= Math.pow(os_rdata.s_weights.multi, os_crawldata[y].multi); os_crawldata[y].relevance *= Math.pow(os_rdata.s_weights.important, os_crawldata[y].phrase); os_crawldata[y].relevance *= os_crawldata[y].priority; } } // Sort the list by relevance value os_crawldata.sort(function(a, b) { if (a.relevance == b.relevance) return 0; return (b.relevance > a.relevance) ? 1 : -1; }); // Normalize results from 0 - 100 and delete results with // relevance values < 5% of the top result for (let x = os_crawldata.length - 1; x >= 0; x--) { if (os_crawldata[0].relevance * 0.05 <= os_crawldata[x].relevance) { os_crawldata[x].relevance /= os_crawldata[0].relevance * 0.01; } else os_crawldata.splice(x, 1); } // The final results list is the top slice of this data // limited by the 's_limit_results' value os_sdata.results = os_crawldata.slice(0, os_odata.s_limit_results); // Now loop through the remaining results to generate the // proper match text for each for (let x = 0; x < os_sdata.results.length; x++) { // Add the page description to use as a default match text if (os_sdata.results[x].description.trim()) { os_sdata.results[x].matchtext.push({ rank: 0, text: os_sdata.results[x].description.substring(0, os_odata.s_limit_matchtext) }); } // Loop through each term to capture matchtexts for (let y = 0; y < os_sdata.terms.length; y++) { switch (os_sdata.terms[y][0]) { case 'filetype': break; case 'exclude': break; case 'phrase': case 'term': // Split the content on the current term let splitter = os_sdata.results[x].content.split(os_sdata.terms[y][2]); // For each match, gather the appropriate amount of match // text from either side of it for (let z = 0, caret = 0; z < splitter.length; z++) { caret += splitter[z].length; if (splitter[z].match(os_sdata.terms[y][2]) || splitter.length == 1) { let offset = 0; if (splitter.length == 1) { // Grab some random content if there were no // matches in the content let offset = Math.floor(Math.random() * os_sdata.results[x].content.length - os_odata.s_limit_matchtext); } else offset = Math.floor(Math.max(0, caret - (splitter[z].length + os_odata.s_limit_matchtext) / 2)); let match = os_sdata.results[x].content.substring(offset, offset + os_odata.s_limit_matchtext).trim(); // Add appropriate ellipses if (offset + ((splitter[z].length + os_odata.s_limit_matchtext) / 2) < os_sdata.results[x].content.length) match += "\u2026"; if (offset) match = "\u2026" + match; os_sdata.results[x].matchtext.push({ rank: 0, text: match }); } } } } // For each found match text, add a point for every time a // term is found in the match text; triple points for phrase // matches for (let y = 0; y < os_sdata.results[x].matchtext.length; y++) { for (let z = 0; z < os_sdata.terms.length; z++) { switch (os_sdata.terms[z][0]) { case 'filetype': break; case 'exclude': break; case 'phrase': case 'term': let points = os_sdata.results[x].matchtext[y].text.matchAll(os_sdata.terms[z][2]).length; // / (z + 1); if (os_sdata.terms[z][0] == 'phrase') points *= 3; os_sdata.results[x].matchtext[y].rank += points; } } } // Sort the match texts by score os_sdata.results[x].matchtext.sort(function(a, b) { if (b.rank == a.rank) return 0; return (b.rank > a.rank) ? 1 : -1; }); // Use the top-ranked match text as the official match text os_sdata.results[x].matchtext = os_sdata.results[x].matchtext[0].text; // Unset result values we no longer need so they don't // bloat memory unnecessarily os_sdata.results[x].content = null; os_sdata.results[x].keywords = null; os_sdata.results[x].weighted = null; os_sdata.results[x].multi = null; os_sdata.results[x].phrase = null; } // Limit os_request.page to within boundaries os_request.page = parseInt(os_params.get('page')); if (isNaN(os_request.page)) os_request.page = 1; os_request.page = Math.max(1, os_request.page); os_sdata.pages = Math.ceil(os_sdata.results.length / os_odata.s_results_pagination); os_request.page = Math.min(os_sdata.pages, os_request.page); // Get a slice of the results that corresponds to the current // search results pagination page we are on let resultsPage = os_sdata.results.slice( (os_request.page - 1) * os_odata.s_results_pagination, (os_request.page - 1) * os_odata.s_results_pagination + os_odata.s_results_pagination ); if (resultsPage.length) { os_TEMPLATE.searchable.searched.results = {}; os_TEMPLATE.searchable.searched.results.result_list = []; // Do a last once-over of the results for (let x = 0, result; x < resultsPage.length; x++) { result = {}; // Don't display filetype of HTML pages result.filetype = ''; Object.keys(os_rdata.s_filetypes).forEach(type => { for (let y = 0; y < os_rdata.s_filetypes[type].length; y++) if (resultsPage[x].content_mime == os_rdata.s_filetypes[type][y]) result.filetype = type; }); // Don't display filetype of HTML pages if (!os_odata.s_show_filetype_html) if (result.filetype == 'HTML') result.filetype = ''; if (result.filetype) result.filetype = '[' + result.filetype + ']'; // Don't display category if there's only one if (Object.keys(os_rdata.s_category_list).length > 2) { result.category = resultsPage[x].category; } else resultsPage[x].category = ''; // Format relevance result.relevance = Math.round(resultsPage[x].relevance * 100) / 100; // Highlight the terms in the title, url and matchtext result.title = resultsPage[x].title; result.url = resultsPage[x].url; result.matchtext = resultsPage[x].matchtext; result.description = resultsPage[x].description; result.title_highlight = resultsPage[x].title; result.url_highlight = resultsPage[x].url; result.matchtext_highlight = resultsPage[x].matchtext; result.description_highlight = resultsPage[x].description; for (let z = 0; z < os_sdata.terms.length; z++) { switch (os_sdata.terms[z][0]) { case 'filetype': break; case 'exclude': break; case 'phrase': case 'term': result.title_highlight = result.title_highlight.replace(os_sdata.terms[z][2], '$1'); result.url_highlight = result.url_highlight.replace(os_sdata.terms[z][2], '$1'); result.matchtext_highlight = result.matchtext_highlight.replace(os_sdata.terms[z][2], '$1'); result.description_highlight = result.description_highlight.replace(os_sdata.terms[z][2], '$1'); } } os_TEMPLATE.searchable.searched.results.result_list.push(result); } // If there are more than just one page of results, prepare all // the pagination variables for the template if (os_sdata.pages > 1) { let pagination = {}; pagination.page_gt1 = (os_request.page > 1); pagination.page_minus1 = os_request.page - 1; pagination.page_list = []; for (x = 1; x <= os_sdata.pages; x++) { let page = {}; page.index = x; page.current = (x == os_request.page); pagination.page_list.push(page); } pagination.page_ltpages = (os_request.page < os_sdata.pages); pagination.page_plus1 = os_request.page + 1; os_TEMPLATE.searchable.searched.results.pagination = pagination; } // Final numerical and stopwatch time values os_TEMPLATE.searchable.searched.results.from = Math.min(os_sdata.results.length, (os_request.page - 1) * os_odata.s_results_pagination + 1); os_TEMPLATE.searchable.searched.results.to = Math.min(os_sdata.results.length, os_request.page * os_odata.s_results_pagination); os_TEMPLATE.searchable.searched.results.of = os_sdata.results.length; // os_TEMPLATE.searchable.searched.results.in = Math.round(((new Date()).getTime() - os_sdata.time) / 10) / 100; } // No results } // No valid terms } // No request data } // No searchable pages in search database document.write(mustache.render( os_odata.s_result_template, os_TEMPLATE ));›×™*·,±_²°|≥!#$¢£+≤=•«%½»?"'-] $_JS = ob_get_contents(); ob_end_clean(); header('Content-type: text/javascript; charset='.strtolower($_ODATA['s_charset'])); header('Content-disposition: attachment; filename="offline-search.js"'); mb_convert_encoding($_JS, 'UTF-8', $_ODATA['s_charset']); die($_JS); } else $_SESSION['error'][] = 'Error reading from the search result database: '.$err[2]; break; // ***** Unknown 'os_submit' command default: header('Content-type: text/plain; charset='.strtolower($_ODATA['s_charset'])); var_dump($_POST); exit(); } header('Location: '.$_SERVER['REQUEST_URI']); exit(); // Normal POST request, but without 'os_submit' // These are usually triggered by a javascript form.submit() } else { // Set new Page Index pagination value if (!empty($_POST['os_index_hidden_pagination'])) { $_POST['os_index_hidden_pagination'] = (int)$_POST['os_index_hidden_pagination']; if (in_array($_POST['os_index_hidden_pagination'], $_RDATA['admin_pagination_options'])) { OS_setValue('admin_index_pagination', $_POST['os_index_hidden_pagination']); $_SESSION['index_page'] = 1; } header('Location: '.$_SERVER['REQUEST_URI']); exit(); } // Select a Page Index Category filter if (!empty($_POST['os_index_new_filter_category'])) { if (!empty($_RDATA['s_category_list'][$_POST['os_index_new_filter_category']])) { $_SESSION['index_filter_category'] = $_POST['os_index_new_filter_category']; $_SESSION['index_page'] = 1; } header('Location: '.$_SERVER['REQUEST_URI']); exit(); } // Select a Page Index Status filter if (!empty($_POST['os_index_new_filter_status'])) { if (in_array($_POST['os_index_new_filter_status'], $_RDATA['index_status_list'])) { $_SESSION['index_filter_status'] = $_POST['os_index_new_filter_status']; $_SESSION['index_page'] = 1; } header('Location: '.$_SERVER['REQUEST_URI']); exit(); } // Unknown POST command header('Content-type: text/plain; charset='.strtolower($_ODATA['s_charset'])); var_dump($_POST); exit(); } // Select a new Administration UI page } else if (!empty($_GET['page'])) { if (!empty($_RDATA['admin_pages'][$_GET['page']])) $_SESSION['admin_page'] = $_GET['page']; // Select a new page within the Page Index list } else if (isset($_GET['ipage'])) { $_GET['ipage'] = (int)$_GET['ipage']; $_SESSION['index_page'] = $_GET['ipage']; // User has requested to log out } else if (isset($_GET['logout'])) { $_SESSION = array(); $_SESSION['message'][] = 'You have been logged out.'; header('Location: '.$_SERVER['REQUEST_URI']); exit(); } // Perform pre-processing SQL actions that may trigger // $_SESSION errors switch ($_SESSION['admin_page']) { case 'crawler': // Get list of domains from the starting URLs $_RDATA['sp_starting'] = array_filter(array_map('trim', explode("\n", $_ODATA['sp_starting']))); $_RDATA['s_starting_domains'] = array(); foreach ($_RDATA['sp_starting'] as $starting) { $starting = parse_url($starting); if (!empty($starting['host'])) $_RDATA['s_starting_domains'][] = $starting['host']; } $_RDATA['s_starting_domains'] = array_unique($_RDATA['s_starting_domains']); if (count($_RDATA['s_starting_domains']) == 1) OS_setValue('sp_sitemap_hostname', $_RDATA['s_starting_domains'][0]); break; case 'index': $_RDATA['page_index_rows'] = false; $_RDATA['page_index_found_rows'] = false; if ($_RDATA['s_crawldata_info']['Rows']) { // ***** Select rows to populate the Page Index table $indexRows = $_DDATA['pdo']->prepare( 'SELECT SQL_CALC_FOUND_ROWS `url`, `url_base`, `title`, `category`, `content_checksum`, `status`, `status_noindex`, `flag_unlisted`, `flag_updated`, `priority` FROM `'.$_DDATA['tbprefix'].'crawldata` WHERE (:text1=\'\' OR `url` LIKE :text2) AND (:category1=\'\' OR `category`=:category2) AND (:status1=\'\' OR `status`=:status2) AND (:flag_unlisted1=\'any\' OR `flag_unlisted`=:flag_unlisted2) AND (:flag_updated1=\'any\' OR `flag_updated`=:flag_updated2) ORDER BY `url_sort` LIMIT :offset, :pagination;' ); $text = ($_SESSION['index_filter_text']) ? trim($_SESSION['index_filter_text']) : ''; $category = ($_SESSION['index_filter_category'] != '') ? $_SESSION['index_filter_category'] : ''; if ($_SESSION['index_filter_status'] == 'OK' || $_SESSION['index_filter_status'] == 'Orphan') { $status = $_SESSION['index_filter_status']; } else $status = ''; $unlisted = ($_SESSION['index_filter_status'] == 'Unlisted') ? 1 : 'any'; $updated = ($_SESSION['index_filter_status'] == 'Updated') ? 1 : 'any'; $_RDATA['page_index_offset'] = ($_SESSION['index_page'] - 1) * $_ODATA['admin_index_pagination']; $indexRows->execute(array( 'text1' => '%'.$text.'%', 'text2' => '%'.$text.'%', 'text1' => '%'.$text.'%', 'text2' => '%'.$text.'%', 'category1' => $category, 'category2' => $category, 'status1' => $status, 'status2' => $status, 'flag_unlisted1' => $unlisted, 'flag_unlisted2' => $unlisted, 'flag_updated1' => $updated, 'flag_updated2' => $updated, 'offset' => $_RDATA['page_index_offset'], 'pagination' => $_ODATA['admin_index_pagination'] )); $err = $indexRows->errorInfo(); if ($err[0] == '00000') { $_RDATA['page_index_rows'] = $indexRows->fetchAll(); $foundRows = $_DDATA['pdo']->query('SELECT FOUND_ROWS();'); $err = $foundRows->errorInfo(); if ($err[0] == '00000') { $foundRows = $foundRows->fetchAll(PDO::FETCH_NUM); if (count($foundRows)) { $_RDATA['page_index_found_rows'] = $foundRows[0][0]; $_RDATA['index_pages'] = ceil($_RDATA['page_index_found_rows'] / $_ODATA['admin_index_pagination']); // If the requested page is outside page limit if ($_SESSION['index_page'] != 1 && ($_SESSION['index_page'] > $_RDATA['index_pages'] || $_SESSION['index_page'] < 1)) { $_SESSION['index_page'] = max(1, min($_RDATA['index_pages'], (int)$_SESSION['index_page'])); // Redirect to a page within the limits header('Location: '.$_SERVER['REQUEST_URI'].'?ipage='.$_SESSION['index_page']); exit(); } } else $_SESSION['error'][] = 'Database did not return a search table row count.'; } else $_SESSION['error'][] = 'Database error reading search table row count: '.$err[2]; } else $_SESSION['error'][] = 'Database error reading search table: '.$err[2]; } else $_SESSION['message'][] = 'The search database is currently empty.'; break; case 'search': // Average hits per hour: First find the oldest `stamp` in the // database, then base all averages on the difference between that // time and now; also get average number of results $_RDATA['s_hours_since_oldest_hit'] = 0; $_RDATA['s_hits_per_hour'] = 0; $_RDATA['q_average_results'] = 0; $hits = $_DDATA['pdo']->query( 'SELECT MIN(`stamp`) AS `oldest`, COUNT(*) AS `hits`, AVG(`results`) AS `average` FROM `'.$_DDATA['tbprefix'].'query`;' ); $err = $hits->errorInfo(); if ($err[0] == '00000') { $hits = $hits->fetchAll(); if (count($hits) && !is_null($hits[0]['oldest']) && !is_null($hits[0]['hits'])) { $_RDATA['s_hours_since_oldest_hit'] = (time() - $hits[0]['oldest']) / 3600; $_RDATA['s_hits_per_hour'] = $hits[0]['hits'] / $_RDATA['s_hours_since_oldest_hit']; $_RDATA['q_average_results'] = $hits[0]['average']; } } else $_SESSION['error'][] = 'Could not read hit counts from query log.'; // Median number of results $_RDATA['q_median_results'] = 0; $median = $_DDATA['pdo']->query( 'SELECT `results` FROM `'.$_DDATA['tbprefix'].'query` ORDER BY `results`;' ); $err = $median->errorInfo(); if ($err[0] == '00000') { $median = $median->fetchAll(); if (count($median)) { $index = floor(count($median) / 2); if (count($median) & 1) { $_RDATA['q_median_results'] = $median[$index]['results']; } else { $_RDATA['q_median_results'] = ($median[$index - 1]['results'] + $median[$index]['results']) / 2; } } } else $_SESSION['error'][] = 'Could not read result counts from query log.'; break; case 'queries': $_RDATA['query_log_rows'] = false; $queries = $_DDATA['pdo']->query( 'SELECT *, INET_NTOA(`ip`) AS `ipaddr` FROM `'.$_DDATA['tbprefix'].'query` AS `t` INNER JOIN ( SELECT `query`, COUNT(`query`) AS `hits`, REGEXP_REPLACE(`query`, \'^[[:punct:]]+\', \'\') AS `alpha`, MAX(`stamp`) AS `last_hit`, AVG(`results`) AS `avg_results` FROM `'.$_DDATA['tbprefix'].'query` GROUP BY `query` ) AS `s` ON `s`.`query`=`t`.`query` AND `s`.`last_hit`=`t`.`stamp` ORDER BY `s`.`alpha` ASC;' ); $err = $queries->errorInfo(); if ($err[0] == '00000') { $_RDATA['query_log_rows'] = $queries->fetchAll(); if (count($_RDATA['query_log_rows'])) { $x = 0; // Add the `alpha` sort order as an index foreach ($_RDATA['query_log_rows'] as $key => $query) $_RDATA['query_log_rows'][$key]['rownum'] = $x++; // On first load, sort list by # of hits usort($_RDATA['query_log_rows'], function($a, $b) { return $b['hits'] - $a['hits']; }); } else $_SESSION['message'][] = 'The query log is currently empty.'; } else $_SESSION['error'][] = 'Database error reading query log table: '.$err[2]; } } // Not logged in ?> Orcinus Site Search <?php echo $_ODATA['version']; ?> - Administration

Orcinus Site Search

Crawler Management

Crawl Information

  • Warning: The previous crawl did not complete successfully. Please check the crawl log for more details.

  • Crawler has not yet been run. Choose your settings and run your first crawl by using the button in the top menu bar.

Crawl Administration

  • Crawl Scheduling

    Automatic crawls are triggered by people visiting your search page. To allow crawls at any time, set these both to the same time.

  • Send Email on...

    >
    >

    Warning: PHPMailer could not be found or loaded. The application will not be able to send mail until it is installed correctly.

Sitemap Settings

  • Warning: Target sitemap file doesn't exist. Please create it.

    Warning: Target sitemap file is not writable. Please adjust permissions.

  • 1) { ?>

Crawl Settings

  • Options:
    • >
    • >
    • >
  • Timeouts & Delay

  • Maximum Limits

  • Link Filters

  • Categories

    Usually you'll want all your indexed pages in just one category. In some cases however, you may want to offer users an additional way to restrict results by putting groups of pages into multiple categories. You can set page categories from the Page Index.

  • Content Filters

Page Index

$_ODATA['admin_index_pagination']) { ?>
2) { ?> $row) { ?> 2) { ?>

Filters:

2) { ?>
URL Showing pages of Category Status Priority
Per page:
Updated
Unlisted

Search Management

Search Information

    = 1) { ?>
  • = 24) { ?>
  • = 168) { ?>
  • No searches logged yet. To see search statistics here, start using your search engine. Tell your friends!

Query Log & Cache

  • The query log is a rolling log of searches on which the statistics above are based. Longer query log periods will give more accurate statistics, but also require more database space. (max: 255 days)

Offline Search Javascript

    1) { ?>

Search Settings

  • Query Limits

  • Match Weighting

    Additive Information
    Multipliers Information
  • Result Output

    The Output Encoding value should match the encoding of your search results page, and ideally match the character encoding of most of your crawled pages. UTF-8 is strongly recommended.

    Options:
    • >
    • >

Search Result Template

  • This template uses the Mustache templating system. See the Mustache manual for more information. To restore the default template, submit a blank textarea.

Query Log

country($query['ipaddr']); } catch(Exception $e) { $query['geo'] = false; } } ?>
Query Sort Hits Sort Results Sort Last Requested Sort
raw['country']['iso_code'])) { if (file_exists(__DIR__.'/img/flags/'.strtolower($query['geo']->raw['country']['iso_code']).'.png')) { $flag = 'img/flags/'.strtolower($query['geo']->raw['country']['iso_code']).'.png'; $title = $query['geo']->raw['country']['names']['en']; $classname = 'svg-icon-flag'; } else { // Missing flag $flag = 'img/help.svg'; $title = $query['geo']->raw['country']['names']['en']; $classname = 'svg-icon'; } ?> <?php echo htmlspecialchars($title); ?>

Welcome

Log In