Enable downloading Query Log CSV

This commit is contained in:
Brian Huisman 2023-04-21 11:27:46 -04:00
parent b93c75e132
commit 8d99e4fd41
4 changed files with 114 additions and 41 deletions

View file

@ -6,15 +6,15 @@
![banner](https://user-images.githubusercontent.com/137631/233513460-93269bbb-f218-4b00-a7bf-7fc4575d15d4.png)
The **Orcinus Site Search** PHP script is an all-in-one website crawler and search engine that extracts searchable content from XML, HTML and PDF files at a single, or multiple websites. It replaces 3rd party, remote search solutions such as Google etc.
The **Orcinus Site Search** PHP script is an all-in-one website crawler and search engine that extracts searchable content from plain text, XML, HTML and PDF files at a single, or multiple websites. It replaces 3rd party, remote search solutions such as Google etc.
**Orcinus** will crawl your website content on a schedule, or at your command via the admin UI or even by CLI/crontab. Crawler log output conveniently informs you of missing pages, links that redirect, and other errors that you, as a webmaster can fix to keep your user experience tight. A full-featured, responsive administration GUI allows you to adjust crawl settings, view and edit all crawled pages, customize search results, and view a log of all searched queries. You also have complete control over the appearance of your search results with a [convenient templating system](https://mustache.github.io/).
Optionally, **Orcinus** can generate a [sitemap XML or XML.GZ](https://www.sitemaps.org) file of your pages after every crawl, suitable for uploading to Google analytics. It can also export a JavaScript version of the entire search engine that works with offline mirrors, such as those generated by [HTTrack](https://www.httrack.com).
Optionally, **Orcinus** can generate a [sitemap .xml or .xml.gz](https://www.sitemaps.org) file of your pages after every crawl, suitable for uploading to the [Google Search Console](https://search.google.com/search-console/sitemaps). It can also export a JavaScript version of the entire search engine that works with offline mirrors, such as those generated by [HTTrack](https://www.httrack.com).
### Requirements:
- PHP >= 7.2.x
- MySQL / MariaDB
- MySQL / MariaDB >= 10.0.5
### 3rd Party Libraries:
Included:
@ -27,7 +27,6 @@ Optional:
- [Maxmind GeoIP2](https://github.com/maxmind/GeoIP2-php)
## Getting Started
1. Copy the `orcinus` directory to your root web directory.
2. Fill out your SQL and desired credential details in the `orcinus/config.ini.php` file.
3. Visit `yourdomain.com/orcinus/admin.php` in your favourite web browser and log in.

View file

@ -181,7 +181,7 @@ if (!$_SESSION['admin_username']) {
}
break;
// Download a text file log of the most recent crawl
// Download a text file of the most recent crawl or query log
case 'download':
if (empty($_POST->content)) $_POST->content = '';
switch ($_POST->content) {
@ -202,6 +202,7 @@ if (!$_SESSION['admin_username']) {
header('Content-type: text/plain; charset='.strtolower($_ODATA['s_charset']));
header('Content-disposition: attachment; filename="'.
'crawl-log'.$_POST->grep.'_'.date('Y-m-d', $_ODATA['sp_time_end']).'.txt"');
die(implode("\n", $lines));
} else {
@ -218,6 +219,55 @@ if (!$_SESSION['admin_username']) {
}
break;
case 'query_log':
$querylog = $_DDATA['pdo']->query(
'SELECT `query`, `results`, `stamp`, INET_NTOA(`ip`) AS `ipaddr`
FROM `'.$_DDATA['tbprefix'].'query` ORDER BY `stamp` DESC;'
);
$err = $querylog->errorInfo();
if ($err[0] == '00000') {
$querylog = $querylog->fetchAll();
if (count($querylog)) {
header('Content-type: text/csv; charset='.strtolower($_ODATA['s_charset']));
header('Content-disposition: attachment; filename="'.
'query-log_'.date('Y-m-d').'.csv"');
$output = fopen('php://output', 'w');
$headings = array('Query', 'Results', 'Time Stamp', 'IP');
if ($_GEOIP2) $headings[] = 'Country';
fputcsv($output, $headings);
foreach ($querylog as $line) {
$line['stamp'] = date('c', $line['stamp']);
if ($_GEOIP2) {
try {
$geo = $_GEOIP2->country($line['ipaddr']);
} catch(Exception $e) { $geo = false; }
} else $geo = false;
if ($geo) $line['country'] = $geo->raw['country']['names']['en'];
fputcsv($output, $line);
}
die();
} else {
$response = array(
'status' => 'Error',
'message' => 'The query log is empty; nothing to download'
);
}
} else {
$response = array(
'status' => 'Error',
'message' => 'Could not read the query log database'
);
}
break;
default:
$response = array(
'status' => 'Error',
@ -2804,9 +2854,13 @@ document.write(mustache.render(
* Query Log *********************************************** */
case 'queries': ?>
<section class="row justify-content-center">
<header class="col-12 mb-2">
<header class="col-5 mb-2">
<h2>Query Log</h2>
</header><?php
</header>
<div class="col-7 mb-2 text-end text-nowrap">
<button type="button" class="btn btn-primary" id="os_query_log_download"<?php
if ($_ODATA['sp_crawling']) echo ' disabled="disabled"'; ?>>Download Query Log</button>
</div><?php
if (is_array($_RDATA['query_log_rows']) && count($_RDATA['query_log_rows'])) { ?>
<div class="col-xl-10 col-xxl-8">
@ -3004,7 +3058,7 @@ document.write(mustache.render(
<strong>Note:</strong> You may close this popup and/or leave the page while the crawler is running.
</p>
<button type="button" class="btn btn-primary" id="os_crawl_log_download"<?php
if ($_ODATA['sp_crawling']) echo ' disabled="disabled"'; ?>>Download Log</button>
if ($_ODATA['sp_crawling']) echo ' disabled="disabled"'; ?>>Download Crawl Log</button>
</div>
</div>
</div>

View file

@ -744,7 +744,6 @@ $_RDATA['s_latin'] = array(
);
$_RDATA['s_filetypes'] = array(
'PDF' => array('application/pdf'),
'JPG' => array('image/jpeg'),
'HTML' => array('text/html', 'application/xhtml+xml'),
'XML' => array('text/xml', 'application/xml'),
'TXT' => array('text/plain')

View file

@ -1,6 +1,45 @@
/* ***** Orcinus Site Search - Administration UI Javascript ******** */
/**
* Request a file from the server and trigger a download prompt
*
*/
let os_download = function(defaultFilename, postValues) {
fetch(new Request('./admin.php'), {
method: 'POST',
headers: { 'Content-type': 'application/json' },
body: JSON.stringify(postValues)
})
.then((response) => {
if (response.status === 200) {
let ct = response.headers.get('content-type').trim();
if (ct.indexOf('application/json') === 0) {
response.json().then((data) => {
if (data.status == 'Error')
alert(data.message);
});
} else {
let cd = response.headers.get('content-disposition');
if (cd) {
let filename = cd.match(/filename="([^"]+)"/);
filename = (filename.length > 1) ? filename[1] : defaultFilename;
response.blob().then((blob) => {
let file = window.URL.createObjectURL(blob);
let a = document.createElement('a');
a.href = file;
a.download = filename;
document.body.appendChild(a);
a.click();
a.remove();
});
} else alert('Something went wrong!');
}
}
});
}
// Enable Popper.js tooltips
let toolTipElems = document.querySelectorAll('[data-bs-toggle="tooltip"]');
let toolTipList = [...toolTipElems].map(elem => new bootstrap.Tooltip(elem));
@ -300,6 +339,16 @@ if (queriesModal) {
}, false);
}
let os_query_log_download = document.getElementById('os_query_log_download');
if (os_query_log_download) {
os_query_log_download.addEventListener('click', function() {
os_download('query-log.txt', {
action: 'download',
content: 'query_log'
});
}, false);
}
/* ***** Crawler Modal ********************************************* */
let os_get_crawl_progress = function() {
@ -512,38 +561,10 @@ os_crawl_cancel.addEventListener('click', function() {
}, false);
os_crawl_log_download.addEventListener('click', function() {
fetch(new Request('./admin.php'), {
method: 'POST',
headers: { 'Content-type': 'application/json' },
body: JSON.stringify({
action: 'download',
content: 'crawl_log',
grep: document.querySelector('input[name="os_crawl_grep"]:checked').value
})
})
.then((response) => {
if (response.status === 200) {
let ct = response.headers.get('content-type').trim();
if (ct == 'application/json') {
response.json().then((data) => {
if (data.status == 'Error')
alert(data.message);
});
} else {
let cd = response.headers.get('content-disposition');
let filename = cd.match(/filename="([^"]+)"/);
filename = (filename.length > 1) ? filename[1] : 'log.txt';
response.blob().then((blob) => {
let file = window.URL.createObjectURL(blob);
let a = document.createElement('a');
a.href = file;
a.download = filename;
document.body.appendChild(a);
a.click();
a.remove();
});
}
}
os_download('crawl-log.txt', {
action: 'download',
content: 'crawl_log',
grep: document.querySelector('input[name="os_crawl_grep"]:checked').value
});
}, false);