> , > , > , ago query( 'SHOW TABLE STATUS LIKE \''.$_DDATA['tbprefix'].'%\';' ); $err = $tableinfo->errorInfo(); if ($err[0] == '00000') { $tableinfo = $tableinfo->fetchAll(); foreach ($tableinfo as $table) { switch ($table['Name']) { case $_DDATA['tbprefix'].'config': $_RDATA['s_config_info'] = $table; break; case $_DDATA['tbprefix'].'crawldata': $_RDATA['s_crawldata_info'] = $table; break; case $_DDATA['tbprefix'].'query': $_RDATA['s_query_info'] = $table; } } } else $_SESSION['error'][] = 'Could not read search database status.'; // Search Database Charsets $charsets = $_DDATA['pdo']->query( 'SELECT `content_charset`, COUNT(*) as `num` FROM `'.$_DDATA['tbprefix'].'crawldata` GROUP BY `content_charset` ORDER BY `num` DESC;' ); $err = $charsets->errorInfo(); if ($err[0] == '00000') { $charsets = $charsets->fetchAll(); foreach ($charsets as $row) { if (!$row['content_charset']) $row['content_charset'] = ''; $_RDATA['s_crawldata_info']['Charsets'][$row['content_charset']] = $row['num']; } } else $_SESSION['error'][] = 'Could not read charset counts from search database.'; // ***** Other runtime data $_RDATA['admin_pagination_options'] = array(25, 50, 100, 250, 500, 1000); if (!in_array($_ODATA['admin_index_pagination'], $_RDATA['admin_pagination_options'])) OS_setValue('admin_index_pagination', 100); $_RDATA['admin_pages'] = array( 'crawler' => 'Crawler', 'index' => 'Page Index', 'search' => 'Search' ); if ($_ODATA['s_limit_query_log']) $_RDATA['admin_pages']['queries'] = 'Query Log'; $_RDATA['index_status_list'] = array( '', 'OK', 'Orphan', 'Updated', 'Unlisted' ); // ***** Set session defaults if (empty($_SESSION['admin_page']) || empty($_RDATA['admin_pages'][$_SESSION['admin_page']])) $_SESSION['admin_page'] = 'crawler'; if (empty($_SESSION['index_page'])) $_SESSION['index_page'] = 1; if (empty($_SESSION['index_filter_category'])) $_SESSION['index_filter_category'] = ''; if (empty($_SESSION['index_filter_status'])) $_SESSION['index_filter_status'] = ''; if (empty($_SESSION['index_filter_text'])) $_SESSION['index_filter_text'] = ''; if (empty($_SESSION['admin_username'])) $_SESSION['admin_username'] = ''; if (!$_SESSION['admin_username']) { if ($_SERVER['REQUEST_METHOD'] == 'POST') { if (!empty($_POST['os_submit']) && $_POST['os_submit'] == 'os_admin_login') { if (empty($_POST['os_admin_username'])) $_POST['os_admin_username'] = ''; if (empty($_POST['os_admin_password'])) $_POST['os_admin_password'] = ''; if ($_POST['os_admin_username'] == $_RDATA['admin_username'] && $_POST['os_admin_password'] == $_RDATA['admin_password']) { $_SESSION['admin_username'] = $_RDATA['admin_username']; $_SESSION['admin_page'] = 'crawler'; header('Location: '.$_SERVER['REQUEST_URI']); exit(); } else $_SESSION['error'][] = 'Invalid username or password.'; } } } else { /* ***** Handle POST Requests ************************************** */ if ($_SERVER['REQUEST_METHOD'] == 'POST') { // JSON POST request // These are usually sent by javascript fetch() if ($_SERVER['CONTENT_TYPE'] == 'application/json') { $postBody = file_get_contents('php://input'); $_POST = json_decode($postBody, false); $response = array(); if (empty($_POST->action)) $_POST->action = ''; switch ($_POST->action) { // Set the key for initiating the crawler case 'setkey': if (!$_ODATA['sp_crawling']) { $md5 = md5(hrtime(true)); OS_setValue('sp_key', $md5); OS_setValue('sp_log', ''); OS_setValue('sp_progress', '0/1'); $response = array( 'status' => 'Success', 'message' => 'Key set to initiate crawler', 'sp_key' => $md5 ); } else { $response = array( 'status' => 'Error', 'message' => 'Crawler is already running; current progress: '.$_ODATA['sp_progress'] ); } break; // Download a text file of the most recent crawl or query log case 'download': if (empty($_POST->content)) $_POST->content = ''; switch ($_POST->content) { case 'crawl_log': if (!$_ODATA['sp_crawling']) { if ($_ODATA['sp_time_end']) { $lines = explode("\n", $_ODATA['sp_log']); if (empty($_POST->grep)) $_POST->grep = ''; switch ($_POST->grep) { case 'all': break; case 'errors': $lines = preg_grep('/^[\[\*]/', $lines); break; default: $lines = preg_grep('/^[\[\*\w\d]/', $lines); } if ($_POST->grep) $_POST->grep = '-'.$_POST->grep; header('Content-type: text/plain; charset='.strtolower($_ODATA['s_charset'])); header('Content-disposition: attachment; filename="'. 'crawl-log'.$_POST->grep.'_'.date('Y-m-d', $_ODATA['sp_time_end']).'.txt"'); die(implode("\n", $lines)); } else { $response = array( 'status' => 'Error', 'message' => 'Crawler has not run yet; no log to download' ); } } else { $response = array( 'status' => 'Error', 'message' => 'Currently crawling; try again later' ); } break; case 'query_log': $querylog = $_DDATA['pdo']->query( 'SELECT `query`, `results`, `stamp`, INET_NTOA(`ip`) AS `ipaddr` FROM `'.$_DDATA['tbprefix'].'query` ORDER BY `stamp` DESC;' ); $err = $querylog->errorInfo(); if ($err[0] == '00000') { $querylog = $querylog->fetchAll(); if (count($querylog)) { header('Content-type: text/csv; charset='.strtolower($_ODATA['s_charset'])); header('Content-disposition: attachment; filename="'. 'query-log_'.date('Y-m-d').'.csv"'); $output = fopen('php://output', 'w'); $headings = array('Query', 'Results', 'Time Stamp', 'IP'); if ($_GEOIP2) $headings[] = 'Country'; fputcsv($output, $headings); foreach ($querylog as $line) { $line['stamp'] = date('c', $line['stamp']); if ($_GEOIP2) { try { $geo = $_GEOIP2->country($line['ipaddr']); } catch(Exception $e) { $geo = false; } } else $geo = false; if ($geo) $line['country'] = $geo->raw['country']['names']['en']; fputcsv($output, $line); } fclose($output); die(); } else { $response = array( 'status' => 'Error', 'message' => 'The query log is empty; nothing to download' ); } } else { $response = array( 'status' => 'Error', 'message' => 'Could not read the query log database' ); } break; default: $response = array( 'status' => 'Error', 'message' => 'Invalid content selected to download' ); } break; // Not used? case 'fetch': if (empty($_POST->value)) $_POST->value = ''; if (!empty($_ODATA[$_POST->value])) { $response = array( 'status' => 'Success', 'message' => trim($_ODATA[$_POST->value]) ); } else { $response = array( 'status' => 'Error', 'message' => 'Invalid value selected to fetch' ); } } header('Content-type: application/json; charset='.strtolower($_ODATA['s_charset'])); die(json_encode($response, JSON_INVALID_UTF8_IGNORE)); // Normal POST request } else if (!empty($_POST['os_submit'])) { switch ($_POST['os_submit']) { // ***** Crawler >> Settings case 'os_sp_crawl_config': if (isset($_POST['os_sp_starting'])) { $_POST['os_sp_starting'] = str_replace("\r\n", "\n", trim($_POST['os_sp_starting'])); $_POST['os_sp_starting'] = preg_replace('/\n+/', "\n", $_POST['os_sp_starting']); $_POST['os_sp_starting'] = substr($_POST['os_sp_starting'], 0, 4095); $_POST['os_sp_starting'] = explode("\n", $_POST['os_sp_starting']); foreach ($_POST['os_sp_starting'] as $key => $starting) { $starting = preg_replace( '/#.*$/', '', filter_var( str_replace(' ', '%20', $starting), FILTER_SANITIZE_URL ) ); $_POST['os_sp_starting'][$key] = str_replace('%20', ' ', $starting); } $_POST['os_sp_starting'] = array_filter($_POST['os_sp_starting'], function($a) { return preg_match('/^(([^:\/?#]+):)(\/\/([^\/?#]+))([^?#]*)(\?([^#]*))?(#(.*))?/', $a); }); if (!count($_POST['os_sp_starting'])) { $_POST['os_sp_starting'][] = $_ODATA['admin_install_domain'].'/'; $_SESSION['error'][] = 'Cannot have an empty or invalid Starting URLs field.'; } OS_setValue('sp_starting', implode("\n", $_POST['os_sp_starting'])); } if (isset($_POST['os_sp_useragent'])) { $_POST['os_sp_useragent'] = filter_var($_POST['os_sp_useragent'], FILTER_SANITIZE_SPECIAL_CHARS); OS_setValue('sp_useragent', substr($_POST['os_sp_useragent'], 0, 255)); } if (isset($_POST['os_sp_cookies']) && $_POST['os_sp_cookies'] == '1') { $_POST['os_sp_cookies'] = 1; } else $_POST['os_sp_cookies'] = 0; OS_setValue('sp_cookies', $_POST['os_sp_cookies']); if (isset($_POST['os_sp_ifmodifiedsince']) && $_POST['os_sp_ifmodifiedsince'] == '1') { $_POST['os_sp_ifmodifiedsince'] = 1; } else $_POST['os_sp_ifmodifiedsince'] = 0; OS_setValue('sp_ifmodifiedsince', $_POST['os_sp_ifmodifiedsince']); if (isset($_POST['os_sp_autodelete']) && $_POST['os_sp_autodelete'] == '1') { $_POST['os_sp_autodelete'] = 1; } else $_POST['os_sp_autodelete'] = 0; OS_setValue('sp_autodelete', $_POST['os_sp_autodelete']); if (isset($_POST['os_sp_timeout_url'])) { $_POST['os_sp_timeout_url'] = max(1, min(65535, (int)$_POST['os_sp_timeout_url'])); OS_setValue('sp_timeout_url', (int)$_POST['os_sp_timeout_url']); } if (isset($_POST['os_sp_timeout_crawl'])) { $_POST['os_sp_timeout_crawl'] = max(1, min(65535, (int)$_POST['os_sp_timeout_crawl'])); OS_setValue('sp_timeout_crawl', (int)$_POST['os_sp_timeout_crawl']); } if (isset($_POST['os_sp_sleep'])) { $_POST['os_sp_sleep'] = max(0, min(65535, (int)$_POST['os_sp_sleep'])); OS_setValue('sp_sleep', (int)$_POST['os_sp_sleep']); } if (isset($_POST['os_sp_limit_crawl'])) { $_POST['os_sp_limit_crawl'] = max(1, min(65535, (int)$_POST['os_sp_limit_crawl'])); OS_setValue('sp_limit_crawl', (int)$_POST['os_sp_limit_crawl']); } if (isset($_POST['os_sp_limit_store'])) { $_POST['os_sp_limit_store'] = max(1, min(65535, (int)$_POST['os_sp_limit_store'])); OS_setValue('sp_limit_store', $_POST['os_sp_limit_store']); } if (isset($_POST['os_sp_limit_depth'])) { $_POST['os_sp_limit_depth'] = max(1, min(255, (int)$_POST['os_sp_limit_depth'])); OS_setValue('sp_limit_depth', (int)$_POST['os_sp_limit_depth']); } if (isset($_POST['os_sp_limit_filesize'])) { $_POST['os_sp_limit_filesize'] = max(1, min(65535, (int)$_POST['os_sp_limit_filesize'])); OS_setValue('sp_limit_filesize', (int)$_POST['os_sp_limit_filesize']); } if (isset($_POST['os_sp_require_url'])) { $_POST['os_sp_require_url'] = str_replace("\r\n", "\n", trim($_POST['os_sp_require_url'])); $_POST['os_sp_require_url'] = preg_replace('/\n+/', "\n", $_POST['os_sp_require_url']); $_POST['os_sp_require_url'] = substr($_POST['os_sp_require_url'], 0, 4095); $_POST['os_sp_require_url'] = explode("\n", $_POST['os_sp_require_url']); foreach ($_POST['os_sp_require_url'] as $key => $require) $_POST['os_sp_require_url'][$key] = filter_var($require, FILTER_SANITIZE_URL); OS_setValue('sp_require_url', implode("\n", $_POST['os_sp_require_url'])); } if (isset($_POST['os_sp_ignore_url'])) { $_POST['os_sp_ignore_url'] = str_replace("\r\n", "\n", trim($_POST['os_sp_ignore_url'])); $_POST['os_sp_ignore_url'] = preg_replace('/\n+/', "\n", $_POST['os_sp_ignore_url']); $_POST['os_sp_ignore_url'] = substr($_POST['os_sp_ignore_url'], 0, 4095); $_POST['os_sp_ignore_url'] = explode("\n", $_POST['os_sp_ignore_url']); foreach ($_POST['os_sp_ignore_url'] as $key => $require) $_POST['os_sp_ignore_url'][$key] = filter_var($require, FILTER_SANITIZE_URL); OS_setValue('sp_ignore_url', implode("\n", $_POST['os_sp_ignore_url'])); } if (isset($_POST['os_sp_ignore_ext'])) { $_POST['os_sp_ignore_ext'] = preg_replace( array('/[^\w\d\. _-]/', '/ {2,}/'), array('', ' '), trim($_POST['os_sp_ignore_ext']) ); OS_setValue('sp_ignore_ext', substr($_POST['os_sp_ignore_ext'], 0, 4095)); } if (isset($_POST['os_sp_ignore_css'])) { $_POST['os_sp_ignore_css'] = preg_replace( array('/[^\w\d\. #_:-]/', '/ {2,}/'), array('', ' '), trim($_POST['os_sp_ignore_css']) ); OS_setValue('sp_ignore_css', substr($_POST['os_sp_ignore_css'], 0, 4095)); } if (isset($_POST['os_sp_title_strip'])) { $_POST['os_sp_title_strip'] = str_replace("\r\n", "\n", trim($_POST['os_sp_title_strip'])); $_POST['os_sp_title_strip'] = preg_replace('/\n+/', "\n", $_POST['os_sp_title_strip']); $_POST['os_sp_title_strip'] = substr($_POST['os_sp_title_strip'], 0, 4095); $_POST['os_sp_title_strip'] = explode("\n", $_POST['os_sp_title_strip']); foreach ($_POST['os_sp_title_strip'] as $key => $require) $_POST['os_sp_title_strip'][$key] = filter_var($require, FILTER_SANITIZE_SPECIAL_CHARS); OS_setValue('sp_title_strip', implode("\n", $_POST['os_sp_title_strip'])); } $_SESSION['message'][] = 'Crawl settings have been saved.'; break; // ***** Crawler >> Administration case 'os_admin_config': if (isset($_POST['os_sp_interval'])) { $_POST['os_sp_interval'] = max(0, min(255, (int)$_POST['os_sp_interval'])); OS_setValue('sp_interval', (int)$_POST['os_sp_interval']); } if (isset($_POST['os_sp_interval_start'])) { if (preg_match('/\d\d:\d\d(:\d\d)?/', $_POST['os_sp_interval_start'])) { OS_setValue('sp_interval_start', $_POST['os_sp_interval_start']); } else $_SESSION['error'][] = 'Unexpected start time format.'; } if (isset($_POST['os_sp_interval_stop'])) { if (preg_match('/\d\d:\d\d(:\d\d)?/', $_POST['os_sp_interval_stop'])) { OS_setValue('sp_interval_stop', $_POST['os_sp_interval_stop']); } else $_SESSION['error'][] = 'Unexpected stop time format.'; } if (isset($_POST['os_sp_timezone'])) if (in_array($_POST['os_sp_timezone'], timezone_identifiers_list())) OS_setValue('sp_timezone', $_POST['os_sp_timezone']); if (isset($_POST['os_sp_email_success']) && $_POST['os_sp_email_success'] == '1') { $_POST['os_sp_email_success'] = 1; } else $_POST['os_sp_email_success'] = 0; OS_setValue('sp_email_success', $_POST['os_sp_email_success']); if (isset($_POST['os_sp_email_failure']) && $_POST['os_sp_email_failure'] == '1') { $_POST['os_sp_email_failure'] = 1; } else $_POST['os_sp_email_failure'] = 0; OS_setValue('sp_email_failure', $_POST['os_sp_email_failure']); if (isset($_POST['os_admin_email'])) { if ($_MAIL) { $_POST['os_admin_email'] = str_replace("\r\n", "\n", $_POST['os_admin_email']); $_POST['os_admin_email'] = preg_replace('/\n+/', "\n", $_POST['os_admin_email']); $_POST['os_admin_email'] = substr($_POST['os_admin_email'], 0, 4095); $_POST['os_admin_email'] = explode("\n", $_POST['os_admin_email']); foreach ($_POST['os_admin_email'] as $key => $admin_email) { $email = $_MAIL->parseAddresses($admin_email); if (count($email)) { if ($email[0]['name']) { $_POST['os_admin_email'][$key] = $email[0]['name'].' <'.$email[0]['address'].'>'; } else $_POST['os_admin_email'][$key] = $email[0]['address']; } else { $_SESSION['error'][] = 'Invalid To: email address \''.$admin_email.'\'.'; unset($_POST['os_admin_email'][$key]); } } OS_setValue('admin_email', implode("\n", array_values($_POST['os_admin_email']))); } else $_SESSION['error'][] = 'PHPMailer needs to be installed to parse new email addresses.'; } $_SESSION['message'][] = 'Crawl administration settings have been saved.'; break; // ***** Crawler >> Sitemap case 'os_sp_sitemap_config': if (isset($_POST['os_sp_sitemap_file'])) { $_POST['os_sp_sitemap_file'] = substr($_POST['os_sp_sitemap_file'], 0, 255); $_POST['os_sp_sitemap_file'] = filter_var($_POST['os_sp_sitemap_file'], FILTER_SANITIZE_URL); if ($_POST['os_sp_sitemap_file']) { if (preg_match('/\.xml(\.gz)?$/', $_POST['os_sp_sitemap_file'])) { OS_setValue('sp_sitemap_file', $_POST['os_sp_sitemap_file']); } else $_SESSION['error'][] = 'Sitemap filename must end witn .xml or .xml.gz'; } else OS_setValue('sp_sitemap_file', ''); } if (isset($_POST['os_sp_sitemap_hostname'])) { $_POST['os_sp_sitemap_hostname'] = filter_var($_POST['os_sp_sitemap_hostname'], FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME); if ($_POST['os_sp_sitemap_hostname']) { OS_setValue('sp_sitemap_hostname', $_POST['os_sp_sitemap_hostname']); } else $_SESSION['error'][] = 'Invalid sitemap hostname.'; } $_SESSION['message'][] = 'Sitemap settings have been saved.'; break; // ***** Page Index >> With Selected... case 'os_index_with_selected': if (empty($_POST['os_index_pages'])) $_POST['os_index_pages'] = array(); if (is_array($_POST['os_index_pages'])) { $checksums_good = true; foreach ($_POST['os_index_pages'] as $key => $content_checksum) { $content_checksum = base64_decode($content_checksum); if ($content_checksum && strlen($content_checksum) == 20) { $_POST['os_index_pages'][$key] = $content_checksum; } else $checksums_good = false; } if ($checksums_good) { if (empty($_POST['os_index_select_action'])) $_POST['os_index_select_action'] = ''; switch ($_POST['os_index_select_action']) { case 'delete': $delete = $_DDATA['pdo']->prepare( 'DELETE FROM `'.$_DDATA['tbprefix'].'crawldata` WHERE `content_checksum`=:content_checksum;' ); foreach ($_POST['os_index_pages'] as $content_checksum) { $delete->execute(array('content_checksum' => $content_checksum)); $err = $delete->errorInfo(); if ($err[0] != '00000') { $_SESSION['error'][] = 'Database error on attempt to delete: '.$err[2]; break; } } break; case 'category': if (!empty($_POST['os_apply_new_category'])) { $_POST['os_apply_new_category'] = preg_replace(array('/\s/', '/ {2,}/'), ' ', trim($_POST['os_apply_new_category'])); $_POST['os_apply_new_category'] = preg_replace('/[^\w \d-]/', '', $_POST['os_apply_new_category']); $_POST['os_apply_new_category'] = substr($_POST['os_apply_new_category'], 0, 30); if ($_POST['os_apply_new_category']) { $update = $_DDATA['pdo']->prepare( 'UPDATE `'.$_DDATA['tbprefix'].'crawldata` SET `category`=:category WHERE `content_checksum`=:content_checksum;' ); foreach ($_POST['os_index_pages'] as $content_checksum) { $update->execute(array( 'category' => $_POST['os_apply_new_category'], 'content_checksum' => $content_checksum )); $err = $update->errorInfo(); if ($err[0] != '00000') { $_SESSION['error'][] = 'Database error on attempt to update category: '.$err[2]; break; } } $_SESSION['index_filter_category'] = ''; } else $_SESSION['error'][] = 'Category names may only contain letters, numbers, spaces or dashes.'; } else $_SESSION['error'][] = 'Please supply a category name.'; break; case 'priority': if (!empty($_POST['os_apply_new_priority'])) { $_POST['os_apply_new_priority'] = (float)$_POST['os_apply_new_priority']; $_POST['os_apply_new_priority'] = max(0, min(1, $_POST['os_apply_new_priority'])); $_POST['os_apply_new_priority'] = round($_POST['os_apply_new_priority'], 5); $update = $_DDATA['pdo']->prepare( 'UPDATE `'.$_DDATA['tbprefix'].'crawldata` SET `priority`=:priority WHERE `content_checksum`=:content_checksum;' ); foreach ($_POST['os_index_pages'] as $content_checksum) { $update->execute(array( 'priority' => $_POST['os_apply_new_priority'], 'content_checksum' => $content_checksum )); $err = $update->errorInfo(); if ($err[0] != '00000') { $_SESSION['error'][] = 'Database error on attempt to update priority: '.$err[2]; break; } } } else $_SESSION['error'][] = 'Please supply a priority value.'; break; case 'unlisted': $update = $_DDATA['pdo']->prepare( 'UPDATE `'.$_DDATA['tbprefix'].'crawldata` SET `flag_unlisted`=!`flag_unlisted` WHERE `content_checksum`=:content_checksum;' ); foreach ($_POST['os_index_pages'] as $content_checksum) { $update->execute(array('content_checksum' => $content_checksum)); $err = $update->errorInfo(); if ($err[0] != '00000') { $_SESSION['error'][] = 'Database error on attempt to toggle \'unlisted\' status: '.$err[2]; break; } } break; default: $_SESSION['error'][] = 'Unknown command.'; } } else $_SESSION['error'][] = 'Bad page checksum(s) given by user.'; } else $_SESSION['error'][] = 'Badly formed list of pages; could not perform an action.'; break; // ***** Page Index >> Text Match filter case 'os_index_filter_text': if (empty($_POST['os_index_filter_text'])) $_POST['os_index_filter_text'] = ''; $_POST['os_index_filter_text'] = filter_var($_POST['os_index_filter_text'], FILTER_SANITIZE_URL); $_SESSION['index_filter_text'] = $_POST['os_index_filter_text']; $_SESSION['index_page'] = 1; break; // ***** Search >> Search Settings case 'os_s_search_config': if (isset($_POST['os_s_limit_terms'])) { $_POST['os_s_limit_terms'] = max(1, min(255, (int)$_POST['os_s_limit_terms'])); OS_setValue('s_limit_terms', (int)$_POST['os_s_limit_terms']); } if (isset($_POST['os_s_limit_term_length'])) { $_POST['os_s_limit_term_length'] = max(1, min(255, (int)$_POST['os_s_limit_term_length'])); OS_setValue('s_limit_term_length', (int)$_POST['os_s_limit_term_length']); } if (!isset($_POST['os_s_weight_title'])) $_POST['os_s_weight_title'] = $_RDATA['s_weights']['title']; $_POST['os_s_weight_title'] = number_format(max(0, (float)$_POST['os_s_weight_title']), 1, '.', ''); if (!isset($_POST['os_s_weight_body'])) $_POST['os_s_weight_body'] = $_RDATA['s_weights']['body']; $_POST['os_s_weight_body'] = number_format(max(0, (float)$_POST['os_s_weight_body']), 1, '.', ''); if (!isset($_POST['os_s_weight_keywords'])) $_POST['os_s_weight_keywords'] = $_RDATA['s_weights']['keywords']; $_POST['os_s_weight_keywords'] = number_format(max(0, (float)$_POST['os_s_weight_keywords']), 1, '.', ''); if (!isset($_POST['os_s_weight_description'])) $_POST['os_s_weight_description'] = $_RDATA['s_weights']['description']; $_POST['os_s_weight_description'] = number_format(max(0, (float)$_POST['os_s_weight_description']), 1, '.', ''); if (!isset($_POST['os_s_weight_url'])) $_POST['os_s_weight_url'] = $_RDATA['s_weights']['url']; $_POST['os_s_weight_url'] = number_format(max(0, (float)$_POST['os_s_weight_url']), 1, '.', ''); if (!isset($_POST['os_s_weight_multi'])) $_POST['os_s_weight_multi'] = $_RDATA['s_weights']['multi']; $_POST['os_s_weight_multi'] = number_format(max(0, (float)$_POST['os_s_weight_multi']), 1, '.', ''); if (!isset($_POST['os_s_weight_important'])) $_POST['os_s_weight_important'] = $_RDATA['s_weights']['important']; $_POST['os_s_weight_important'] = number_format(max(0, (float)$_POST['os_s_weight_important']), 1, '.', ''); if (!isset($_POST['os_s_weight_css_value'])) $_POST['os_s_weight_css_value'] = $_RDATA['s_weights']['css_value']; $_POST['os_s_weight_css_value'] = number_format(max(0, (float)$_POST['os_s_weight_css_value']), 1, '.', ''); OS_setValue('s_weights', implode('%', array( $_POST['os_s_weight_title'], $_POST['os_s_weight_body'], $_POST['os_s_weight_keywords'], $_POST['os_s_weight_description'], $_POST['os_s_weight_css_value'], $_POST['os_s_weight_url'], $_POST['os_s_weight_multi'], $_POST['os_s_weight_important'] ))); if (isset($_POST['os_s_weight_css'])) { $_POST['os_s_weight_css'] = preg_replace( array('/[^\w\d\. #_:-]/', '/ {2,}/'), array('', ' '), trim($_POST['os_s_weight_css']) ); OS_setValue('s_weight_css', substr($_POST['os_s_weight_css'], 0, 4095)); } if (isset($_POST['os_s_charset'])) { $_POST['os_s_charset'] = substr($_POST['os_s_charset'], 0, 63); $_POST['os_s_charset'] = preg_replace('/[^\w\d\.:_-]/', '', $_POST['os_s_charset']); OS_setValue('s_charset', $_POST['os_s_charset']); } if (isset($_POST['os_s_limit_results'])) { $_POST['os_s_limit_results'] = max(1, min(255, (int)$_POST['os_s_limit_results'])); OS_setValue('s_limit_results', (int)$_POST['os_s_limit_results']); } if (isset($_POST['os_s_results_pagination'])) { $_POST['os_s_results_pagination'] = max(1, min(255, (int)$_POST['os_s_results_pagination'])); OS_setValue('s_results_pagination', (int)$_POST['os_s_results_pagination']); } if (isset($_POST['os_s_limit_matchtext'])) { $_POST['os_s_limit_matchtext'] = max(1, min(65535, (int)$_POST['os_s_limit_matchtext'])); OS_setValue('s_limit_matchtext', $_POST['os_s_limit_matchtext']); } if (isset($_POST['os_s_show_orphans']) && $_POST['os_s_show_orphans'] == '1') { $_POST['os_s_show_orphans'] = 1; } else $_POST['os_s_show_orphans'] = 0; OS_setValue('s_show_orphans', $_POST['os_s_show_orphans']); if (isset($_POST['os_s_show_filetype_html']) && $_POST['os_s_show_filetype_html'] == '1') { $_POST['os_s_show_filetype_html'] = 1; } else $_POST['os_s_show_filetype_html'] = 0; OS_setValue('s_show_filetype_html', $_POST['os_s_show_filetype_html']); $_SESSION['message'][] = 'Search settings have been saved.'; break; // ***** Search >> Search Template case 'os_s_search_template': if (isset($_POST['os_s_result_template'])) { $_POST['os_s_result_template'] = str_replace("\r", '', $_POST['os_s_result_template']); OS_setValue('s_result_template', substr($_POST['os_s_result_template'], 0, 65535)); $_SESSION['message'][] = 'Search result template updated.'; } break; // ***** Search >> Search Result Cache case 'os_s_cache_config': if (isset($_POST['os_s_limit_query_log'])) { $_POST['os_s_limit_query_log'] = max(0, min(255, (int)$_POST['os_s_limit_query_log'])); OS_setValue('s_limit_query_log', $_POST['os_s_limit_query_log']); } if (isset($_POST['os_s_limit_cache'])) { $_POST['os_s_limit_cache'] = max(0, min(65535, (int)$_POST['os_s_limit_cache'])); OS_setValue('s_limit_cache', $_POST['os_s_limit_cache']); } break; // ***** Search >> Search Result Purge case 'os_s_cache_purge': $purge = $_DDATA['pdo']->query( 'UPDATE `'.$_DDATA['tbprefix'].'query` SET `cache`=\'\';' ); $err = $purge->errorInfo(); if ($err[0] == '00000') { $_RDATA['s_cache_size'] = 0; $_SESSION['message'][] = 'Search result cache has been purged.'; } else $_SESSION['error'][] = 'Could not purge search result cache.'; break; // ***** Search >> Offline Javascript case 'os_jw_config': // ***** Search >> Write Offline Javascript case 'os_jw_write': if (isset($_POST['os_jw_hostname'])) { $_POST['os_jw_hostname'] = filter_var($_POST['os_jw_hostname'], FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME); if ($_POST['os_jw_hostname']) { OS_setValue('jw_hostname', $_POST['os_jw_hostname']); } else $_SESSION['error'][] = 'Invalid sitemap hostname.'; } if (isset($_POST['os_jw_compression'])) { $_POST['os_jw_compression'] = max(0, min(100, (int)$_POST['os_jw_compression'])); OS_setValue('jw_compression', (int)$_POST['os_jw_compression']); } if ($_POST['os_submit'] == 'os_jw_config') { $_SESSION['message'][] = 'Offline javascript search settings have been saved.'; break; } // ***** Write to and download the Offline Javascript file $query_status = ($_ODATA['s_show_orphans']) ? '(`status`=\'OK\' || `status`=\'Orphan\')' : '`status`=\'OK\''; $select = $_DDATA['pdo']->query( 'SELECT `url`, `title`, `description`, `keywords`, `category`, `content_mime`, `weighted`, `content`, `priority` FROM `'.$_DDATA['tbprefix'].'crawldata` WHERE `flag_unlisted`<>1 AND '.$query_status.' AND `url_base` LIKE \'%'.addslashes($_ODATA['jw_hostname']).'\';' ); $err = $select->errorInfo(); if ($err[0] == '00000') { $select = $select->fetchAll(); // If compression value is less than 100 then get a word // list frequency report from all indexed pages if ($_ODATA['jw_compression'] < 100) { $words = array(); foreach ($select as $key => $row) { $select[$key]['words'] = array_unique(explode(' ', $row['content'])); foreach ($select[$key]['words'] as $index => $word) { if (!$word) continue; if (empty($words[$word])) { $words[$word] = 1; } else $words[$word]++; } } // Use the word frequency report to create a filter of // words that are more common than the compression // threshold $compressionFilter = array(); Foreach ($words as $word => $count) if (($count / count($select)) * 100 >= $_ODATA['jw_compression']) $compressionFilter[] = $word; } $repStr = '/^'.preg_quote($_ODATA['jw_hostname'], '/').'/'; foreach ($select as $key => $row) { // Use the compression filter to remove all of the most // common words from the content of this page if ($_ODATA['jw_compression'] < 100) { $select[$key]['words'] = array_diff($row['words'], $compressionFilter); $select[$key]['words'] = implode(' ', $select[$key]['words']); } else $select[$key]['words'] = $row['content']; // Remove the common domain from all URLs $select[$key]['url'] = preg_replace($repStr, '', $row['url']); // Format non-.html filenames into .html ones if ($row['content_mime'] == 'text/html') { $rq = explode('?', $select[$key]['url'], 2); if ($rq[0] == '' || $rq[0][strlen($rq[0]) - 1] == '/') $rq[0] .= 'index.html'; if (!preg_match('/\.html?$/', $rq[0])) $rq[0] .= '.html'; $select[$key]['url'] = implode('?', $rq); } } // Start JS file output ob_start(); ?> /* ******************************************************************** * Orcinus Site Search - Offline Javascript Search File * - Generated * - Requires mustache.js * */ function os_preg_quote(str, delimiter) { return (str + '').replace(new RegExp( '[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&' ); } // ***** Variable Migration let os_rdata = { s_latin: , s_filetypes: , s_category_list: , s_weights: }; Object.keys(os_rdata.s_weights).forEach(key => { os_rdata.s_weights[key] = parseFloat(os_rdata.s_weights[key]); }); let os_odata = { jw_compression: , s_limit_terms: , s_limit_term_length: , s_limit_matchtext: , s_show_filetype_html: , s_results_pagination: , s_limit_results: , s_result_template: }; let os_sdata = { terms: [], formatted: [], results: [], pages: 1, time: (new Date()).getTime() }; let os_request = {}; const os_params = new URLSearchParams(window.location.search); // ***** Page Object Constructor function os_page(content_mime, url, category, priority, title, description, keywords, weighted, content) { this.content_mime = content_mime; this.url = url; this.category = category; this.priority = parseFloat(priority); this.title = title; this.description = description; this.keywords = keywords; this.weighted = weighted; this.content = content; this.matchtext = []; this.relevance = 0; this.multi = -1; this.phrase = 0; } // ***** Search Database let os_crawldata = [ new os_page('', '', '', '', '', '', '', '', ''), ]; // ***** Return list of all pages for typeahead function os_return_all() { let fullList = []; for (let x = 0; x < os_crawldata.length; x++) { fullList.push({ title: os_crawldata[x].title, url: os_crawldata[x].url }); } return fullList; } // {{{{{ Create the Mustache template let os_TEMPLATE = { version: '', searchable: false, addError: function(text) { if (!this.errors) { this.errors = {}; this.errors.error_list = []; } this.errors.error_list.push(text); } }; // Check if there are rows in the search database if (os_crawldata.length) { os_TEMPLATE.searchable = {}; os_TEMPLATE.searchable.form_action = window.location.pathname; os_TEMPLATE.searchable.limit_term_length = ; os_request.c = os_params.get('c'); if (!os_request.c || !os_rdata.s_category_list[os_request.c]) os_request.c = ''; if (os_rdata.s_category_list.length > 2) { os_TEMPLATE.searchable.categories = {}; os_TEMPLATE.searchable.categories.category_list = []; Object.keys(os_rdata.s_category_list).forEach(category => { let cat = {}; cat.name = (category == '') ? 'All Categories' : category; cat.value = category; cat.selected = (os_request.c == category); os_TEMPLATE.searchable.categories.category_list.push(cat); }); } os_request.q = os_params.get('q'); if (!os_request.q) os_request.q = ''; os_request.q = os_request.q.trim().replace(/\s/, ' ').replace(/ {2,}/, ' '); // If there is a text request if (os_request.q) { // If compression level is < 100, remove all quotation marks if (os_odata.jw_compression < 100) os_request.q = os_request.q.replace(/"/g, ''); if (os_request.q.length > 127) { os_request.q = os_request.q.substring(0, 127); os_TEMPLATE.addError('Search query truncated to maximum 127 characters'); } os_TEMPLATE.searchable.request_q = os_request.q; // Split request string on quotation marks (") let request = (' ' + os_request.q + ' ').split('"'); for (let x = 0; x < request.length && os_sdata.terms.length < os_odata.s_limit_terms; x++) { // Every second + 1 group of terms just a list of terms if (!(x % 2)) { // Split this list of terms on spaces request[x] = request[x].split(' '); for (let y = 0, t; y < request[x].length; y++) { t = request[x][y]; if (!t) continue // Leading + means important, a MUST match if (t[0] == '+') { // Just count it as a 'phrase' of one word, functionally equivalent os_sdata.terms.push(['phrase', t.substring(1), false]); // Leading - means negative, a MUST exclude } else if (t[0] == '-') { os_sdata.terms.push(['exclude', t.substring(1), false]); // Restrict to a specific filetype (not yet implemented) // Really, we'd only allow HTML, XML and PDF here, maybe JPG? } else if (t.indexOf('filetype:') === 0) { t = t.substring(9).trim(); if (t && os_rdata.s_filetypes[t.toUpperCase()]) os_sdata.terms.push(['filetype', t, false]); // Else if the term is greater than the term length limit, add it } else if (t.length >= os_odata.s_limit_term_length) os_sdata.terms.push(['term', t, false]); } // Every second group of terms is a phrase, a MUST match } else os_sdata.terms.push(['phrase', request[x], false]); } // If we successfully procured some terms if (os_sdata.terms.length) { os_TEMPLATE.searchable.searched = {}; if (os_request.c != '') { os_TEMPLATE.searchable.searched.category = {}; os_TEMPLATE.searchable.searched.category.request_c = os_request.c; } // Prepare PCRE match text for each phrase and term for (let x = 0; x < os_sdata.terms.length; x++) { switch (os_sdata.terms[x][0]) { case 'filetype': os_sdata.formatted.push(os_sdata.terms[x][0] + ':' + os_sdata.terms[x][1]); break; case 'exclude': os_sdata.formatted.push('-' + os_sdata.terms[x][1]); break; case 'phrase': os_sdata.formatted.push('"' + os_sdata.terms[x][1] + '"'); case 'term': if (os_sdata.terms[x][0] == 'term') os_sdata.formatted.push(os_sdata.terms[x][1]); os_sdata.terms[x][2] = os_preg_quote(os_sdata.terms[x][1].toLowerCase(), '/'); Object.keys(os_rdata.s_latin).forEach(key => { for (let y = 0; y < os_rdata.s_latin[key].length; y++) os_sdata.terms[x][2] = os_sdata.terms[x][2].replace(os_rdata.s_latin[key][y], key); if (key.length > 1) { os_sdata.terms[x][2] = os_sdata.terms[x][2].replace(key, '(' + key + '|' + os_rdata.s_latin[key].join('|') + ')'); } else os_sdata.terms[x][2] = os_sdata.terms[x][2].replace(key, '[' + key + os_rdata.s_latin[key].join('') + ']'); }); os_sdata.terms[x][2] = new RegExp('(' + os_sdata.terms[x][2] + ')', 'igu'); } } // ***** There is never any cache, so do an actual search for (let y = os_crawldata.length - 1; y >= 0; y--) { for (let x = 0; x < os_sdata.terms.length; x++) { addRelevance = 0; if (os_sdata.terms[x][0] == 'filetype') { } else if (os_sdata.terms[x][0] == 'exclude') { if (os_crawldata[y].title.match(os_sdata.terms[x][2]) || os_crawldata[y].description.match(os_sdata.terms[x][2]) || os_crawldata[y].keywords.match(os_sdata.terms[x][2]) || os_crawldata[y].weighted.match(os_sdata.terms[x][2]) || os_crawldata[y].content.match(os_sdata.terms[x][2])) os_crawldata.splice(y, 1); } else if (os_sdata.terms[x][0] == 'phrase' || os_sdata.terms[x][0] == 'term') { if (os_sdata.terms[x][0] == 'phrase') os_crawldata[y].phrase++; if (os_crawldata[y].title.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.title; if (os_crawldata[y].description.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.description; if (os_crawldata[y].keywords.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.keywords; if (os_crawldata[y].weighted.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.css_value; if (os_crawldata[y].content.match(os_sdata.terms[x][2])) addRelevance += os_rdata.s_weights.body; if (addRelevance) { os_crawldata[y].multi++; } else if (os_sdata.terms[x][0] == 'phrase') os_crawldata.splice(y, 1); } } if (addRelevance) { os_crawldata[y].relevance += addRelevance; // Calculate multipliers os_crawldata[y].relevance *= Math.pow(os_rdata.s_weights.multi, os_crawldata[y].multi); os_crawldata[y].relevance *= Math.pow(os_rdata.s_weights.important, os_crawldata[y].phrase); os_crawldata[y].relevance *= os_crawldata[y].priority; } } // Sort the list by relevance value os_crawldata.sort(function(a, b) { if (a.relevance == b.relevance) return 0; return (b.relevance > a.relevance) ? 1 : -1; }); // Normalize results from 0 - 100 and delete results with // relevance values < 5% of the top result for (let x = os_crawldata.length - 1; x >= 0; x--) { if (os_crawldata[0].relevance * 0.05 <= os_crawldata[x].relevance) { os_crawldata[x].relevance /= os_crawldata[0].relevance * 0.01; } else os_crawldata.splice(x, 1); } // The final results list is the top slice of this data // limited by the 's_limit_results' value os_sdata.results = os_crawldata.slice(0, os_odata.s_limit_results); // Now loop through the remaining results to generate the // proper match text for each for (let x = 0; x < os_sdata.results.length; x++) { // Add the page description to use as a default match text if (os_sdata.results[x].description.trim()) { os_sdata.results[x].matchtext.push({ rank: 0, text: os_sdata.results[x].description.substring(0, os_odata.s_limit_matchtext) }); } // Loop through each term to capture matchtexts for (let y = 0; y < os_sdata.terms.length; y++) { switch (os_sdata.terms[y][0]) { case 'filetype': break; case 'exclude': break; case 'phrase': case 'term': // Split the content on the current term let splitter = os_sdata.results[x].content.split(os_sdata.terms[y][2]); // For each match, gather the appropriate amount of match // text from either side of it for (let z = 0, caret = 0; z < splitter.length; z++) { caret += splitter[z].length; if (splitter[z].match(os_sdata.terms[y][2]) || splitter.length == 1) { let offset = 0; if (splitter.length == 1) { // Grab some random content if there were no // matches in the content let offset = Math.floor(Math.random() * os_sdata.results[x].content.length - os_odata.s_limit_matchtext); } else offset = Math.floor(Math.max(0, caret - (splitter[z].length + os_odata.s_limit_matchtext) / 2)); let match = os_sdata.results[x].content.substring(offset, offset + os_odata.s_limit_matchtext).trim(); // Add appropriate ellipses if (offset + ((splitter[z].length + os_odata.s_limit_matchtext) / 2) < os_sdata.results[x].content.length) match += "\u2026"; if (offset) match = "\u2026" + match; os_sdata.results[x].matchtext.push({ rank: 0, text: match }); } } } } // For each found match text, add a point for every time a // term is found in the match text; triple points for phrase // matches for (let y = 0; y < os_sdata.results[x].matchtext.length; y++) { for (let z = 0; z < os_sdata.terms.length; z++) { switch (os_sdata.terms[z][0]) { case 'filetype': break; case 'exclude': break; case 'phrase': case 'term': let points = os_sdata.results[x].matchtext[y].text.matchAll(os_sdata.terms[z][2]).length; // / (z + 1); if (os_sdata.terms[z][0] == 'phrase') points *= 3; os_sdata.results[x].matchtext[y].rank += points; } } } // Sort the match texts by score os_sdata.results[x].matchtext.sort(function(a, b) { if (b.rank == a.rank) return 0; return (b.rank > a.rank) ? 1 : -1; }); // Use the top-ranked match text as the official match text os_sdata.results[x].matchtext = os_sdata.results[x].matchtext[0].text; // Unset result values we no longer need so they don't // bloat memory unnecessarily os_sdata.results[x].content = null; os_sdata.results[x].keywords = null; os_sdata.results[x].weighted = null; os_sdata.results[x].multi = null; os_sdata.results[x].phrase = null; } // Limit os_request.page to within boundaries os_request.page = parseInt(os_params.get('page')); if (isNaN(os_request.page)) os_request.page = 1; os_request.page = Math.max(1, os_request.page); os_sdata.pages = Math.ceil(os_sdata.results.length / os_odata.s_results_pagination); os_request.page = Math.min(os_sdata.pages, os_request.page); // Get a slice of the results that corresponds to the current // search results pagination page we are on let resultsPage = os_sdata.results.slice( (os_request.page - 1) * os_odata.s_results_pagination, (os_request.page - 1) * os_odata.s_results_pagination + os_odata.s_results_pagination ); if (resultsPage.length) { os_TEMPLATE.searchable.searched.results = {}; os_TEMPLATE.searchable.searched.results.result_list = []; // Do a last once-over of the results for (let x = 0, result; x < resultsPage.length; x++) { result = {}; // Don't display filetype of HTML pages result.filetype = ''; Object.keys(os_rdata.s_filetypes).forEach(type => { for (let y = 0; y < os_rdata.s_filetypes[type].length; y++) if (resultsPage[x].content_mime == os_rdata.s_filetypes[type][y]) result.filetype = type; }); // Don't display filetype of HTML pages if (!os_odata.s_show_filetype_html) if (result.filetype == 'HTML') result.filetype = ''; if (result.filetype) result.filetype = '[' + result.filetype + ']'; // Don't display category if there's only one if (Object.keys(os_rdata.s_category_list).length > 2) { result.category = resultsPage[x].category; } else resultsPage[x].category = ''; // Format relevance result.relevance = Math.round(resultsPage[x].relevance * 100) / 100; // Highlight the terms in the title, url and matchtext result.title = resultsPage[x].title; result.url = resultsPage[x].url; result.matchtext = resultsPage[x].matchtext; result.description = resultsPage[x].description; result.title_highlight = resultsPage[x].title; result.url_highlight = resultsPage[x].url; result.matchtext_highlight = resultsPage[x].matchtext; result.description_highlight = resultsPage[x].description; for (let z = 0; z < os_sdata.terms.length; z++) { switch (os_sdata.terms[z][0]) { case 'filetype': break; case 'exclude': break; case 'phrase': case 'term': result.title_highlight = result.title_highlight.replace(os_sdata.terms[z][2], '$1'); result.url_highlight = result.url_highlight.replace(os_sdata.terms[z][2], '$1'); result.matchtext_highlight = result.matchtext_highlight.replace(os_sdata.terms[z][2], '$1'); result.description_highlight = result.description_highlight.replace(os_sdata.terms[z][2], '$1'); } } os_TEMPLATE.searchable.searched.results.result_list.push(result); } // If there are more than just one page of results, prepare all // the pagination variables for the template if (os_sdata.pages > 1) { let pagination = {}; pagination.page_gt1 = (os_request.page > 1); pagination.page_minus1 = os_request.page - 1; pagination.page_list = []; for (x = 1; x <= os_sdata.pages; x++) { let page = {}; page.index = x; page.current = (x == os_request.page); pagination.page_list.push(page); } pagination.page_ltpages = (os_request.page < os_sdata.pages); pagination.page_plus1 = os_request.page + 1; os_TEMPLATE.searchable.searched.results.pagination = pagination; } // Final numerical and stopwatch time values os_TEMPLATE.searchable.searched.results.from = Math.min(os_sdata.results.length, (os_request.page - 1) * os_odata.s_results_pagination + 1); os_TEMPLATE.searchable.searched.results.to = Math.min(os_sdata.results.length, os_request.page * os_odata.s_results_pagination); os_TEMPLATE.searchable.searched.results.of = os_sdata.results.length; // os_TEMPLATE.searchable.searched.results.in = Math.round(((new Date()).getTime() - os_sdata.time) / 10) / 100; } // No results } // No valid terms } // No request data } // No searchable pages in search database document.write(mustache.render( os_odata.s_result_template, os_TEMPLATE ));›×™*·,±_²°|≥!#$¢£+≤=•«%½»?"'-] $_JS = ob_get_contents(); ob_end_clean(); header('Content-type: text/javascript; charset='.strtolower($_ODATA['s_charset'])); header('Content-disposition: attachment; filename="offline-search.js"'); mb_convert_encoding($_JS, 'UTF-8', $_ODATA['s_charset']); die($_JS); } else $_SESSION['error'][] = 'Error reading from the search result database: '.$err[2]; break; // ***** Unknown 'os_submit' command default: header('Content-type: text/plain; charset='.strtolower($_ODATA['s_charset'])); var_dump($_POST); exit(); } header('Location: '.$_SERVER['REQUEST_URI']); exit(); // Normal POST request, but without 'os_submit' // These are usually triggered by a javascript form.submit() } else { // Set new Page Index pagination value if (!empty($_POST['os_index_hidden_pagination'])) { $_POST['os_index_hidden_pagination'] = (int)$_POST['os_index_hidden_pagination']; if (in_array($_POST['os_index_hidden_pagination'], $_RDATA['admin_pagination_options'])) { OS_setValue('admin_index_pagination', $_POST['os_index_hidden_pagination']); $_SESSION['index_page'] = 1; } header('Location: '.$_SERVER['REQUEST_URI']); exit(); } // Select a Page Index Category filter if (!empty($_POST['os_index_new_filter_category'])) { if (!empty($_RDATA['s_category_list'][$_POST['os_index_new_filter_category']])) { $_SESSION['index_filter_category'] = $_POST['os_index_new_filter_category']; $_SESSION['index_page'] = 1; } header('Location: '.$_SERVER['REQUEST_URI']); exit(); } // Select a Page Index Status filter if (!empty($_POST['os_index_new_filter_status'])) { if (in_array($_POST['os_index_new_filter_status'], $_RDATA['index_status_list'])) { $_SESSION['index_filter_status'] = $_POST['os_index_new_filter_status']; $_SESSION['index_page'] = 1; } header('Location: '.$_SERVER['REQUEST_URI']); exit(); } // Unknown POST command header('Content-type: text/plain; charset='.strtolower($_ODATA['s_charset'])); var_dump($_POST); exit(); } // Select a new Administration UI page } else if (!empty($_GET['page'])) { if (!empty($_RDATA['admin_pages'][$_GET['page']])) $_SESSION['admin_page'] = $_GET['page']; // Select a new page within the Page Index list } else if (!empty($_GET['ipage'])) { $_GET['ipage'] = (int)$_GET['ipage']; $_SESSION['index_page'] = $_GET['ipage']; // User has requested to log out } else if (isset($_GET['logout'])) { $_SESSION = array(); $_SESSION['message'][] = 'You have been logged out.'; header('Location: '.$_SERVER['REQUEST_URI']); exit(); } // Perform pre-processing SQL actions that may trigger // $_SESSION errors switch ($_SESSION['admin_page']) { case 'crawler': // Get list of domains from the starting URLs $_RDATA['sp_starting'] = array_filter(array_map('trim', explode("\n", $_ODATA['sp_starting']))); $_RDATA['s_starting_domains'] = array(); foreach ($_RDATA['sp_starting'] as $starting) { $starting = parse_url($starting); if (!empty($starting['host'])) $_RDATA['s_starting_domains'][] = $starting['host']; } $_RDATA['s_starting_domains'] = array_unique($_RDATA['s_starting_domains']); if (count($_RDATA['s_starting_domains']) == 1) OS_setValue('sp_sitemap_hostname', $_RDATA['s_starting_domains'][0]); break; case 'index': $_RDATA['page_index_rows'] = false; $_RDATA['page_index_found_rows'] = false; if ($_RDATA['s_crawldata_info']['Rows']) { // ***** Select rows to populate the Page Index table $indexRows = $_DDATA['pdo']->prepare( 'SELECT SQL_CALC_FOUND_ROWS `url`, `url_base`, `title`, `category`, `content_checksum`, `status`, `status_noindex`, `flag_unlisted`, `flag_updated`, `priority` FROM `'.$_DDATA['tbprefix'].'crawldata` WHERE (:text1=\'\' OR `url` LIKE :text2) AND (:category1=\'\' OR `category`=:category2) AND (:status1=\'\' OR `status`=:status2) AND (:flag_unlisted1=\'any\' OR `flag_unlisted`=:flag_unlisted2) AND (:flag_updated1=\'any\' OR `flag_updated`=:flag_updated2) ORDER BY `url_sort` LIMIT :offset, :pagination;' ); $text = ($_SESSION['index_filter_text']) ? trim($_SESSION['index_filter_text']) : ''; $category = ($_SESSION['index_filter_category'] != '') ? $_SESSION['index_filter_category'] : ''; if ($_SESSION['index_filter_status'] == 'OK' || $_SESSION['index_filter_status'] == 'Orphan') { $status = $_SESSION['index_filter_status']; } else $status = ''; $unlisted = ($_SESSION['index_filter_status'] == 'Unlisted') ? 1 : 'any'; $updated = ($_SESSION['index_filter_status'] == 'Updated') ? 1 : 'any'; $_RDATA['page_index_offset'] = ($_SESSION['index_page'] - 1) * $_ODATA['admin_index_pagination']; $indexRows->execute(array( 'text1' => '%'.$text.'%', 'text2' => '%'.$text.'%', 'text1' => '%'.$text.'%', 'text2' => '%'.$text.'%', 'category1' => $category, 'category2' => $category, 'status1' => $status, 'status2' => $status, 'flag_unlisted1' => $unlisted, 'flag_unlisted2' => $unlisted, 'flag_updated1' => $updated, 'flag_updated2' => $updated, 'offset' => $_RDATA['page_index_offset'], 'pagination' => $_ODATA['admin_index_pagination'] )); $err = $indexRows->errorInfo(); if ($err[0] == '00000') { $_RDATA['page_index_rows'] = $indexRows->fetchAll(); $foundRows = $_DDATA['pdo']->query('SELECT FOUND_ROWS();'); $err = $foundRows->errorInfo(); if ($err[0] == '00000') { $foundRows = $foundRows->fetchAll(PDO::FETCH_NUM); if (count($foundRows)) { $_RDATA['page_index_found_rows'] = $foundRows[0][0]; $_RDATA['index_pages'] = ceil($_RDATA['page_index_found_rows'] / $_ODATA['admin_index_pagination']); $_SESSION['index_page'] = max(1, min($_RDATA['index_pages'], (int)$_SESSION['index_page'])); } else $_SESSION['error'][] = 'Database did not return a search table row count.'; } else $_SESSION['error'][] = 'Database error reading search table row count: '.$err[2]; } else $_SESSION['error'][] = 'Database error reading search table: '.$err[2]; } else $_SESSION['message'][] = 'The search database is currently empty.'; break; case 'search': // Average hits per hour: First find the oldest `stamp` in the // database, then base all averages on the difference between that // time and now; also get average number of results $_RDATA['s_hours_since_oldest_hit'] = 0; $_RDATA['s_hits_per_hour'] = 0; $_RDATA['q_average_results'] = 0; $hits = $_DDATA['pdo']->query( 'SELECT MIN(`stamp`) AS `oldest`, COUNT(*) AS `hits`, AVG(`results`) AS `average` FROM `'.$_DDATA['tbprefix'].'query`;' ); $err = $hits->errorInfo(); if ($err[0] == '00000') { $hits = $hits->fetchAll(); if (count($hits) && !is_null($hits[0]['oldest']) && !is_null($hits[0]['hits'])) { $_RDATA['s_hours_since_oldest_hit'] = (time() - $hits[0]['oldest']) / 3600; $_RDATA['s_hits_per_hour'] = $hits[0]['hits'] / $_RDATA['s_hours_since_oldest_hit']; $_RDATA['q_average_results'] = $hits[0]['average']; } } else $_SESSION['error'][] = 'Could not read hit counts from query log.'; // Median number of results $_RDATA['q_median_results'] = 0; $median = $_DDATA['pdo']->query( 'SELECT `results` FROM `'.$_DDATA['tbprefix'].'query` ORDER BY `results`;' ); $err = $median->errorInfo(); if ($err[0] == '00000') { $median = $median->fetchAll(); if (count($median)) { $index = floor(count($median) / 2); if (count($median) & 1) { $_RDATA['q_median_results'] = $median[$index]['results']; } else { $_RDATA['q_median_results'] = ($median[$index - 1]['results'] + $median[$index]['results']) / 2; } } } else $_SESSION['error'][] = 'Could not read result counts from query log.'; break; case 'queries': $_RDATA['query_log_rows'] = false; $queries = $_DDATA['pdo']->query( 'SELECT *, INET_NTOA(`ip`) AS `ipaddr` FROM `'.$_DDATA['tbprefix'].'query` AS `t` INNER JOIN ( SELECT `query`, COUNT(`query`) AS `hits`, REGEXP_REPLACE(`query`, \'^[[:punct:]]+\', \'\') AS `alpha`, MAX(`stamp`) AS `last_hit`, AVG(`results`) AS `avg_results` FROM `'.$_DDATA['tbprefix'].'query` GROUP BY `query` ) AS `s` ON `s`.`query`=`t`.`query` AND `s`.`last_hit`=`t`.`stamp` ORDER BY `s`.`alpha` ASC;' ); $err = $queries->errorInfo(); if ($err[0] == '00000') { $_RDATA['query_log_rows'] = $queries->fetchAll(); if (count($_RDATA['query_log_rows'])) { $x = 0; // Add the `alpha` sort order as an index foreach ($_RDATA['query_log_rows'] as $key => $query) $_RDATA['query_log_rows'][$key]['rownum'] = $x++; // On first load, sort list by # of hits usort($_RDATA['query_log_rows'], function($a, $b) { return $b['hits'] - $a['hits']; }); } else $_SESSION['message'][] = 'The query log is currently empty.'; } else $_SESSION['error'][] = 'Database error reading query log table: '.$err[2]; } } // Not logged in ?> Orcinus Site Search <?php echo $_ODATA['version']; ?> - Administration

Orcinus Site Search

Crawler Management

Crawl Information

  • Warning: The previous crawl did not complete successfully. Please check the crawl log for more details.

  • Crawler has not yet been run. Choose your settings and run your first crawl by using the button in the top menu bar.

Crawl Administration

  • Crawl Scheduling

    Automatic crawls are triggered by people visiting your search page. To allow crawls at any time, set these both to the same time.

  • Send Email on...

    >
    >

    Warning: PHPMailer could not be found or loaded. The application will not be able to send mail until it is installed correctly.

Sitemap Settings

  • Warning: Target sitemap file doesn't exist. Please create it.

    Warning: Target sitemap file is not writable. Please adjust permissions.

  • 1) { ?>

Crawl Settings

  • Options:
    • >
    • >
    • >
  • Timeouts & Delay

  • Maximum Limits

  • Link Filters

  • Content Filters

Page Index

$_ODATA['admin_index_pagination']) { ?>
2) { ?> $row) { ?> 2) { ?>

Filters:

2) { ?>
URL Showing pages of Category Status Priority
Per page:
> Updated
Unlisted

Search Management

Search Information

    = 1) { ?>
  • = 24) { ?>
  • = 168) { ?>
  • No searches logged yet. To see search statistics here, start using your search engine. Tell your friends!

Query Log & Cache

  • The query log is a rolling log of searches on which the statistics above are based. Longer query log periods will give more accurate statistics, but also require more database space. (max: 255 days)

Offline Search Javascript

    1) { ?>

Search Settings

  • Query Limits

  • Match Weighting

    Additive Information
    Multipliers Information
  • Result Output

    Options:
    • >
    • >

Search Result Template

  • This template uses the Mustache templating system. See the Mustache manual for more information. To restore the default template, submit a blank textarea.

Query Log

country($query['ipaddr']); } catch(Exception $e) { $query['geo'] = false; } } ?>
Query Sort Hits Sort Results Sort Last Requested Sort
raw['country']['iso_code']).'.png')) { ?> <?php echo htmlspecialchars($query['geo']->raw['country']['names']['en']); ?>

Welcome

Log In