From 358fa42aee82e8ca0be1ec5caff2ff1d8d56e81a Mon Sep 17 00:00:00 2001 From: Brian Huisman Date: Wed, 19 Apr 2023 16:23:42 -0400 Subject: [PATCH] Update crawler.php --- orcinus/crawler.php | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/orcinus/crawler.php b/orcinus/crawler.php index a48f934..eede15b 100644 --- a/orcinus/crawler.php +++ b/orcinus/crawler.php @@ -944,7 +944,7 @@ while ($_cURL && count($_RDATA['sp_queue'])) { // Determine how to parse the content by MIME-type switch ($data['info']['mime_type']) { - /* ***** PLAIN TEXT **************************************** */ + /* ***** PLAIN TEXT ************************************** */ case 'text/plain': $data['content'] = $data['body']; @@ -952,7 +952,7 @@ while ($_cURL && count($_RDATA['sp_queue'])) { break; - /* ***** XML DOCUMENT ************************************** */ + /* ***** XML DOCUMENT ************************************ */ case 'text/xml': case 'application/xml': $data['body'] = preg_replace('//', ' ', $data['body']); @@ -993,7 +993,7 @@ while ($_cURL && count($_RDATA['sp_queue'])) { break; - /* ***** HTML DOCUMENT ************************************* */ + /* ***** HTML DOCUMENT *********************************** */ case 'text/html': case 'application/xhtml+xml': $data['body'] = preg_replace('//', ' ', $data['body']); @@ -1283,7 +1283,7 @@ while ($_cURL && count($_RDATA['sp_queue'])) { break; - /* ***** PDF *********************************************** */ + /* ***** PDF ********************************************* */ case 'application/pdf': if ($_PDF) { try { @@ -1335,10 +1335,7 @@ while ($_cURL && count($_RDATA['sp_queue'])) { break; - /* ***** JPG EXIF? ***************************************** */ - - - /* ***** Unknown MIME-type ********************************* */ + /* ***** Unknown MIME-type ******************************* */ default: $data['error'] = 'Not indexed due to unknown MIME type ('.$data['info']['mime_type'].')'; $data['info']['noindex'] = 'unknown-mime';