Rudimentary keyword generation + cache

This commit is contained in:
Belle Aerni 2023-01-06 02:46:38 -08:00
parent 8ac33d3682
commit 1d020543c0
4 changed files with 107 additions and 9 deletions

View file

@ -3,6 +3,7 @@
namespace AntCMS;
use AntCMS\AntMarkdown;
use AntCMS\AntKeywords;
class AntCMS
{
@ -41,7 +42,8 @@ class AntCMS
public function getPage($page)
{
$page = strtolower($page);
$pagePath = appDir . "/Content/$page.md";
$pagePath = AntDir . "/Content/$page.md";
$AntKeywords = new AntKeywords();
if (file_exists($pagePath)) {
try {
$pageContent = file_get_contents($pagePath);
@ -49,14 +51,14 @@ class AntCMS
// Extract the AntCMS header using the regular expression
preg_match('/--AntCMS--\n(?:Title: (.*)\n)?(?:Author: (.*)\n)?(?:Description: (.*)\n)?(?:Keywords: (.*)\n)?--AntCMS--\n/', $pageContent, $matches);
// Remove the AntCMS section from the content
$pageContent = preg_replace('/--AntCMS--.*--AntCMS--/s', '', $pageContent);
// Extract the values from the $matches array and provide default values if the elements are missing
$title = $matches[1] ?? 'AntCMS';
$author = $matches[2] ?? 'AntCMS';
$description = $matches[3] ?? 'AntCMS';
$keywords = $matches[4] ?? 'AntCMS';
// Remove the AntCMS section from the content
$pageContent = preg_replace('/--AntCMS--.*--AntCMS--/s', '', $pageContent);
$keywords = $matches[4] ?? $AntKeywords->generateKeywords($pageContent);
$result = ['content' => $pageContent, 'title' => $title, 'author' => $author, 'description' => $description, 'keywords' => $keywords];
return $result;
@ -70,7 +72,7 @@ class AntCMS
public function getThemeContent()
{
$themePath = appDir . "/Theme/default_layout.html";
$themePath = AntDir . "/Theme/default_layout.html";
$themeContent = file_get_contents($themePath);
if (!$themeContent) {
@ -79,6 +81,9 @@ class AntCMS
<html>
<head>
<title><!--AntCMS-Title--></title>
<meta name="description" content="<!--AntCMS-Description-->">
<meta name="author" content="<!--AntCMS-Author-->">
<meta name="keywords" content="<!--AntCMS-Keywords-->">
</head>
<body>
<!--AntCMS-Body-->

37
src/AntCMS/Cache.php Normal file
View file

@ -0,0 +1,37 @@
<?php
namespace AntCMS;
class AntCache
{
public function setCache($key, $content)
{
$cachePath = AntCache . "/$key.cache";
try {
$cache = fopen($cachePath, "w");
fwrite($cache, (string)$content);
fclose($cache);
return true;
} catch (\Exception $e) {
return false;
}
}
public function getCache($key)
{
$cachePath = AntCache . "/$key.cache";
try {
$contents = file_get_contents($cachePath);
return $contents;
} catch (\Exception $e) {
return false;
}
}
public function isCached($key)
{
$cachePath = AntCache . "/$key.cache";
return file_exists($cachePath);
}
}

52
src/AntCMS/Keywords.php Normal file
View file

@ -0,0 +1,52 @@
<?php
namespace AntCMS;
use AntCMS\AntCache;
class AntKeywords
{
public function generateKeywords($content = '', $count = 15)
{
$cache = new AntCache();
$cacheKey = hash('sha3-512', $content);
if ($cache->isCached($cacheKey)) {
$cachedKeywords = $cache->getCache($cacheKey);
if ($cachedKeywords !== false && !empty($cachedKeywords)) {
return $cachedKeywords;
}
}
$stopWords = array('a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'has', 'have', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the', 'to', 'was', 'were', 'will', 'with');
$symbols = array('$', '€', '£', '¥', 'CHF', '₹', '+', '-', '×', '÷', '=', '>', '<', '.', ',', ';', ':', '!', '?', '"', '\'', '(', ')', '[', ']', '{', '}', '©', '™', '°', '§', '¶', '•');
$markdownSymbols = array('#', '##', '###', '####', '#####', '~~', '__', '**', '`', '``', '```', '*', '+', '>', '[', ']', '(', ')', '!', '&', '|');
$words = explode(' ', $content);
// Remove additional newlines and spaces
$words = array_map(function ($key) {
$key = preg_replace('~[\r\n]+~', '', $key);
return trim(preg_replace('/\s\s+/', ' ', str_replace("\n", " ", $key)));
}, $words);
$words = array_diff($words, $stopWords);
$words = array_diff($words, $symbols);
$words = array_diff($words, $markdownSymbols);
// Count the frequency of each word
$word_counts = array_count_values($words);
// Sort the word counts in descending order
arsort($word_counts);
// The most frequently occurring words are at the beginning of the array
$count = (count($word_counts) < $count) ? count($word_counts) : $count;
$keywords = array_slice(array_keys($word_counts), 0, $count);
$keywords = implode(', ', $keywords);
$cache->setCache($cacheKey, $keywords);
return $keywords;
}
}

View file

@ -3,13 +3,17 @@
error_reporting(E_ALL);
ini_set('display_errors', 1);
const appDir = __DIR__;
const AntDir = __DIR__;
const AntCache = __DIR__ . '/Cache';
require_once __DIR__ . '/Vendor/autoload.php';
require_once __DIR__ . '/AntCMS/App.php';
require_once __DIR__ . '/AntCMS/Markdown.php';
require_once __DIR__ . '/AntCMS/Keywords.php';
require_once __DIR__ . '/AntCMS/Cache.php';
use \AntCMS;
$antCms = new AntCMS\AntCMS();
use AntCMS\AntCMS;
$antCms = new AntCMS();
$requestedPage = parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH);
$indexes = ['/', '/index.php', '/index.html'];
if (in_array($requestedPage, $indexes)) {