Enable downloading Query Log CSV
This commit is contained in:
parent
b93c75e132
commit
8d99e4fd41
|
@ -6,15 +6,15 @@
|
|||
|
||||
![banner](https://user-images.githubusercontent.com/137631/233513460-93269bbb-f218-4b00-a7bf-7fc4575d15d4.png)
|
||||
|
||||
The **Orcinus Site Search** PHP script is an all-in-one website crawler and search engine that extracts searchable content from XML, HTML and PDF files at a single, or multiple websites. It replaces 3rd party, remote search solutions such as Google etc.
|
||||
The **Orcinus Site Search** PHP script is an all-in-one website crawler and search engine that extracts searchable content from plain text, XML, HTML and PDF files at a single, or multiple websites. It replaces 3rd party, remote search solutions such as Google etc.
|
||||
|
||||
**Orcinus** will crawl your website content on a schedule, or at your command via the admin UI or even by CLI/crontab. Crawler log output conveniently informs you of missing pages, links that redirect, and other errors that you, as a webmaster can fix to keep your user experience tight. A full-featured, responsive administration GUI allows you to adjust crawl settings, view and edit all crawled pages, customize search results, and view a log of all searched queries. You also have complete control over the appearance of your search results with a [convenient templating system](https://mustache.github.io/).
|
||||
|
||||
Optionally, **Orcinus** can generate a [sitemap XML or XML.GZ](https://www.sitemaps.org) file of your pages after every crawl, suitable for uploading to Google analytics. It can also export a JavaScript version of the entire search engine that works with offline mirrors, such as those generated by [HTTrack](https://www.httrack.com).
|
||||
Optionally, **Orcinus** can generate a [sitemap .xml or .xml.gz](https://www.sitemaps.org) file of your pages after every crawl, suitable for uploading to the [Google Search Console](https://search.google.com/search-console/sitemaps). It can also export a JavaScript version of the entire search engine that works with offline mirrors, such as those generated by [HTTrack](https://www.httrack.com).
|
||||
|
||||
### Requirements:
|
||||
- PHP >= 7.2.x
|
||||
- MySQL / MariaDB
|
||||
- MySQL / MariaDB >= 10.0.5
|
||||
|
||||
### 3rd Party Libraries:
|
||||
Included:
|
||||
|
@ -27,7 +27,6 @@ Optional:
|
|||
- [Maxmind GeoIP2](https://github.com/maxmind/GeoIP2-php)
|
||||
|
||||
## Getting Started
|
||||
|
||||
1. Copy the `orcinus` directory to your root web directory.
|
||||
2. Fill out your SQL and desired credential details in the `orcinus/config.ini.php` file.
|
||||
3. Visit `yourdomain.com/orcinus/admin.php` in your favourite web browser and log in.
|
||||
|
|
|
@ -181,7 +181,7 @@ if (!$_SESSION['admin_username']) {
|
|||
}
|
||||
break;
|
||||
|
||||
// Download a text file log of the most recent crawl
|
||||
// Download a text file of the most recent crawl or query log
|
||||
case 'download':
|
||||
if (empty($_POST->content)) $_POST->content = '';
|
||||
switch ($_POST->content) {
|
||||
|
@ -202,6 +202,7 @@ if (!$_SESSION['admin_username']) {
|
|||
header('Content-type: text/plain; charset='.strtolower($_ODATA['s_charset']));
|
||||
header('Content-disposition: attachment; filename="'.
|
||||
'crawl-log'.$_POST->grep.'_'.date('Y-m-d', $_ODATA['sp_time_end']).'.txt"');
|
||||
|
||||
die(implode("\n", $lines));
|
||||
|
||||
} else {
|
||||
|
@ -218,6 +219,55 @@ if (!$_SESSION['admin_username']) {
|
|||
}
|
||||
break;
|
||||
|
||||
case 'query_log':
|
||||
$querylog = $_DDATA['pdo']->query(
|
||||
'SELECT `query`, `results`, `stamp`, INET_NTOA(`ip`) AS `ipaddr`
|
||||
FROM `'.$_DDATA['tbprefix'].'query` ORDER BY `stamp` DESC;'
|
||||
);
|
||||
$err = $querylog->errorInfo();
|
||||
if ($err[0] == '00000') {
|
||||
|
||||
$querylog = $querylog->fetchAll();
|
||||
if (count($querylog)) {
|
||||
|
||||
header('Content-type: text/csv; charset='.strtolower($_ODATA['s_charset']));
|
||||
header('Content-disposition: attachment; filename="'.
|
||||
'query-log_'.date('Y-m-d').'.csv"');
|
||||
|
||||
$output = fopen('php://output', 'w');
|
||||
|
||||
$headings = array('Query', 'Results', 'Time Stamp', 'IP');
|
||||
if ($_GEOIP2) $headings[] = 'Country';
|
||||
|
||||
fputcsv($output, $headings);
|
||||
foreach ($querylog as $line) {
|
||||
$line['stamp'] = date('c', $line['stamp']);
|
||||
|
||||
if ($_GEOIP2) {
|
||||
try {
|
||||
$geo = $_GEOIP2->country($line['ipaddr']);
|
||||
} catch(Exception $e) { $geo = false; }
|
||||
} else $geo = false;
|
||||
if ($geo) $line['country'] = $geo->raw['country']['names']['en'];
|
||||
|
||||
fputcsv($output, $line);
|
||||
}
|
||||
die();
|
||||
|
||||
} else {
|
||||
$response = array(
|
||||
'status' => 'Error',
|
||||
'message' => 'The query log is empty; nothing to download'
|
||||
);
|
||||
}
|
||||
} else {
|
||||
$response = array(
|
||||
'status' => 'Error',
|
||||
'message' => 'Could not read the query log database'
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
$response = array(
|
||||
'status' => 'Error',
|
||||
|
@ -2804,9 +2854,13 @@ document.write(mustache.render(
|
|||
* Query Log *********************************************** */
|
||||
case 'queries': ?>
|
||||
<section class="row justify-content-center">
|
||||
<header class="col-12 mb-2">
|
||||
<header class="col-5 mb-2">
|
||||
<h2>Query Log</h2>
|
||||
</header><?php
|
||||
</header>
|
||||
<div class="col-7 mb-2 text-end text-nowrap">
|
||||
<button type="button" class="btn btn-primary" id="os_query_log_download"<?php
|
||||
if ($_ODATA['sp_crawling']) echo ' disabled="disabled"'; ?>>Download Query Log</button>
|
||||
</div><?php
|
||||
|
||||
if (is_array($_RDATA['query_log_rows']) && count($_RDATA['query_log_rows'])) { ?>
|
||||
<div class="col-xl-10 col-xxl-8">
|
||||
|
@ -3004,7 +3058,7 @@ document.write(mustache.render(
|
|||
<strong>Note:</strong> You may close this popup and/or leave the page while the crawler is running.
|
||||
</p>
|
||||
<button type="button" class="btn btn-primary" id="os_crawl_log_download"<?php
|
||||
if ($_ODATA['sp_crawling']) echo ' disabled="disabled"'; ?>>Download Log</button>
|
||||
if ($_ODATA['sp_crawling']) echo ' disabled="disabled"'; ?>>Download Crawl Log</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -744,7 +744,6 @@ $_RDATA['s_latin'] = array(
|
|||
);
|
||||
$_RDATA['s_filetypes'] = array(
|
||||
'PDF' => array('application/pdf'),
|
||||
'JPG' => array('image/jpeg'),
|
||||
'HTML' => array('text/html', 'application/xhtml+xml'),
|
||||
'XML' => array('text/xml', 'application/xml'),
|
||||
'TXT' => array('text/plain')
|
||||
|
|
|
@ -1,6 +1,45 @@
|
|||
/* ***** Orcinus Site Search - Administration UI Javascript ******** */
|
||||
|
||||
|
||||
/**
|
||||
* Request a file from the server and trigger a download prompt
|
||||
*
|
||||
*/
|
||||
let os_download = function(defaultFilename, postValues) {
|
||||
fetch(new Request('./admin.php'), {
|
||||
method: 'POST',
|
||||
headers: { 'Content-type': 'application/json' },
|
||||
body: JSON.stringify(postValues)
|
||||
})
|
||||
.then((response) => {
|
||||
if (response.status === 200) {
|
||||
let ct = response.headers.get('content-type').trim();
|
||||
if (ct.indexOf('application/json') === 0) {
|
||||
response.json().then((data) => {
|
||||
if (data.status == 'Error')
|
||||
alert(data.message);
|
||||
});
|
||||
} else {
|
||||
let cd = response.headers.get('content-disposition');
|
||||
if (cd) {
|
||||
let filename = cd.match(/filename="([^"]+)"/);
|
||||
filename = (filename.length > 1) ? filename[1] : defaultFilename;
|
||||
response.blob().then((blob) => {
|
||||
let file = window.URL.createObjectURL(blob);
|
||||
let a = document.createElement('a');
|
||||
a.href = file;
|
||||
a.download = filename;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
a.remove();
|
||||
});
|
||||
} else alert('Something went wrong!');
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// Enable Popper.js tooltips
|
||||
let toolTipElems = document.querySelectorAll('[data-bs-toggle="tooltip"]');
|
||||
let toolTipList = [...toolTipElems].map(elem => new bootstrap.Tooltip(elem));
|
||||
|
@ -300,6 +339,16 @@ if (queriesModal) {
|
|||
}, false);
|
||||
}
|
||||
|
||||
let os_query_log_download = document.getElementById('os_query_log_download');
|
||||
if (os_query_log_download) {
|
||||
os_query_log_download.addEventListener('click', function() {
|
||||
os_download('query-log.txt', {
|
||||
action: 'download',
|
||||
content: 'query_log'
|
||||
});
|
||||
}, false);
|
||||
}
|
||||
|
||||
|
||||
/* ***** Crawler Modal ********************************************* */
|
||||
let os_get_crawl_progress = function() {
|
||||
|
@ -512,38 +561,10 @@ os_crawl_cancel.addEventListener('click', function() {
|
|||
}, false);
|
||||
|
||||
os_crawl_log_download.addEventListener('click', function() {
|
||||
fetch(new Request('./admin.php'), {
|
||||
method: 'POST',
|
||||
headers: { 'Content-type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
action: 'download',
|
||||
content: 'crawl_log',
|
||||
grep: document.querySelector('input[name="os_crawl_grep"]:checked').value
|
||||
})
|
||||
})
|
||||
.then((response) => {
|
||||
if (response.status === 200) {
|
||||
let ct = response.headers.get('content-type').trim();
|
||||
if (ct == 'application/json') {
|
||||
response.json().then((data) => {
|
||||
if (data.status == 'Error')
|
||||
alert(data.message);
|
||||
});
|
||||
} else {
|
||||
let cd = response.headers.get('content-disposition');
|
||||
let filename = cd.match(/filename="([^"]+)"/);
|
||||
filename = (filename.length > 1) ? filename[1] : 'log.txt';
|
||||
response.blob().then((blob) => {
|
||||
let file = window.URL.createObjectURL(blob);
|
||||
let a = document.createElement('a');
|
||||
a.href = file;
|
||||
a.download = filename;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
a.remove();
|
||||
});
|
||||
}
|
||||
}
|
||||
os_download('crawl-log.txt', {
|
||||
action: 'download',
|
||||
content: 'crawl_log',
|
||||
grep: document.querySelector('input[name="os_crawl_grep"]:checked').value
|
||||
});
|
||||
}, false);
|
||||
|
||||
|
|
Loading…
Reference in a new issue