# orca-search3
Orca PHP Search - Self-crawling, indexing and search-engine script

View file

<!DOCTYPE html>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="os3/css/search.css">
<title>Orca PHP Search - Offline Javascript Search</title>
<h1>Orca PHP Search - Offline Javascript Search</h1>
<script src="os3/js/mustache.js"></script>
<script src="os3/js/search_output.js"></script>

View file

require 'os3/search.php';
?><!DOCTYPE html>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="css/search.css">
<title>Orca PHP Search <?php echo $_ODATA['version']; ?></title>
<h1>Orca PHP Search <?php echo $_ODATA['version']; ?></h1>
<?php $_TEMPLATE->render(); ?>

@ -0,0 +1,88 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache class autoloader.
class Mustache_Autoloader
private $baseDir;
* An array where the key is the baseDir and the key is an instance of this
* class.
* @var array
private static $instances;
* Autoloader constructor.
* @param string $baseDir Mustache library base directory (default: dirname(__FILE__).'/..')
public function __construct($baseDir = null)
if ($baseDir === null) {
$baseDir = dirname(__FILE__) . '/..';
// realpath doesn't always work, for example, with stream URIs
$realDir = realpath($baseDir);
if (is_dir($realDir)) {
$this->baseDir = $realDir;
} else {
$this->baseDir = $baseDir;
* Register a new instance as an SPL autoloader.
* @param string $baseDir Mustache library base directory (default: dirname(__FILE__).'/..')
* @return Mustache_Autoloader Registered Autoloader instance
public static function register($baseDir = null)
$key = $baseDir ? $baseDir : 0;
if (!isset(self::$instances[$key])) {
self::$instances[$key] = new self($baseDir);
$loader = self::$instances[$key];
spl_autoload_register(array($loader, 'autoload'));
return $loader;
* Autoload Mustache classes.
* @param string $class
public function autoload($class)
if ($class[0] === '\\') {
$class = substr($class, 1);
if (strpos($class, 'Mustache') !== 0) {
$file = sprintf('%s/%s.php', $this->baseDir, str_replace('_', '/', $class));
if (is_file($file)) {
require $file;

View file

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Cache interface.
* Interface for caching and loading Mustache_Template classes
* generated by the Mustache_Compiler.
interface Mustache_Cache
* Load a compiled Mustache_Template class from cache.
* @param string $key
* @return bool indicates successfully class load
public function load($key);
* Cache and load a compiled Mustache_Template class.
* @param string $key
* @param string $value
public function cache($key, $value);
* Set a logger instance.
* @param Mustache_Logger|Psr\Log\LoggerInterface $logger
public function setLogger($logger = null);

@ -0,0 +1,60 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Abstract Mustache Cache class.
* Provides logging support to child implementations.
* @abstract
abstract class Mustache_Cache_AbstractCache implements Mustache_Cache
private $logger = null;
* Get the current logger instance.
* @return Mustache_Logger|Psr\Log\LoggerInterface
public function getLogger()
return $this->logger;
* Set a logger instance.
* @param Mustache_Logger|Psr\Log\LoggerInterface $logger
public function setLogger($logger = null)
if ($logger !== null && !($logger instanceof Mustache_Logger || is_a($logger, 'Psr\\Log\\LoggerInterface'))) {
throw new Mustache_Exception_InvalidArgumentException('Expected an instance of Mustache_Logger or Psr\\Log\\LoggerInterface.');
$this->logger = $logger;
* Add a log record if logging is enabled.
* @param string $level The logging level
* @param string $message The log message
* @param array $context The log context
protected function log($level, $message, array $context = array())
if (isset($this->logger)) {
$this->logger->log($level, $message, $context);

@ -0,0 +1,161 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Cache filesystem implementation.
* A FilesystemCache instance caches Mustache Template classes from the filesystem by name:
* $cache = new Mustache_Cache_FilesystemCache(dirname(__FILE__).'/cache');
* $cache->cache($className, $compiledSource);
* The FilesystemCache benefits from any opcode caching that may be setup in your environment. So do that, k?
class Mustache_Cache_FilesystemCache extends Mustache_Cache_AbstractCache
private $baseDir;
private $fileMode;
* Filesystem cache constructor.
* @param string $baseDir Directory for compiled templates
* @param int $fileMode Override default permissions for cache files. Defaults to using the system-defined umask
public function __construct($baseDir, $fileMode = null)
$this->baseDir = $baseDir;
$this->fileMode = $fileMode;
* Load the class from cache using `require_once`.
* @param string $key
* @return bool
public function load($key)
$fileName = $this->getCacheFilename($key);
if (!is_file($fileName)) {
return false;
require_once $fileName;
return true;
* Cache and load the compiled class.
* @param string $key
* @param string $value
public function cache($key, $value)
$fileName = $this->getCacheFilename($key);
'Writing to template cache: "{fileName}"',
array('fileName' => $fileName)
$this->writeFile($fileName, $value);
* Build the cache filename.
* Subclasses should override for custom cache directory structures.
* @param string $name
* @return string
protected function getCacheFilename($name)
return sprintf('%s/%s.php', $this->baseDir, $name);
* Create cache directory.
* @throws Mustache_Exception_RuntimeException If unable to create directory
* @param string $fileName
* @return string
private function buildDirectoryForFilename($fileName)
$dirName = dirname($fileName);
if (!is_dir($dirName)) {
'Creating Mustache template cache directory: "{dirName}"',
array('dirName' => $dirName)
@mkdir($dirName, 0777, true);
// @codeCoverageIgnoreStart
if (!is_dir($dirName)) {
throw new Mustache_Exception_RuntimeException(sprintf('Failed to create cache directory "%s".', $dirName));
// @codeCoverageIgnoreEnd
return $dirName;
* Write cache file.
* @throws Mustache_Exception_RuntimeException If unable to write file
* @param string $fileName
* @param string $value
private function writeFile($fileName, $value)
$dirName = $this->buildDirectoryForFilename($fileName);
'Caching compiled template to "{fileName}"',
array('fileName' => $fileName)
$tempFile = tempnam($dirName, basename($fileName));
if (false !== @file_put_contents($tempFile, $value)) {
if (@rename($tempFile, $fileName)) {
$mode = isset($this->fileMode) ? $this->fileMode : (0666 & ~umask());
@chmod($fileName, $mode);
// @codeCoverageIgnoreStart
'Unable to rename Mustache temp cache file: "{tempName}" -> "{fileName}"',
array('tempName' => $tempFile, 'fileName' => $fileName)
// @codeCoverageIgnoreEnd
// @codeCoverageIgnoreStart
throw new Mustache_Exception_RuntimeException(sprintf('Failed to write cache file "%s".', $fileName));
// @codeCoverageIgnoreEnd

@ -0,0 +1,47 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Cache in-memory implementation.
* The in-memory cache is used for uncached lambda section templates. It's also useful during development, but is not
* recommended for production use.
class Mustache_Cache_NoopCache extends Mustache_Cache_AbstractCache
* Loads nothing. Move along.
* @param string $key
* @return bool
public function load($key)
return false;
* Loads the compiled Mustache Template class without caching.
* @param string $key
* @param string $value
public function cache($key, $value)
'Template cache disabled, evaluating "{className}" class at runtime',
array('className' => $key)
eval('?>' . $value);

View file

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Compiler class.
* This class is responsible for turning a Mustache token parse tree into normal PHP source code.
class Mustache_Compiler
private $pragmas;
private $defaultPragmas = array();
private $sections;
private $blocks;
private $source;
private $indentNextLine;
private $customEscape;
private $entityFlags;
private $charset;
private $strictCallables;
* Compile a Mustache token parse tree into PHP source code.
* @param string $source Mustache Template source code
* @param string $tree Parse tree of Mustache tokens
* @param string $name Mustache Template class name
* @param bool $customEscape (default: false)
* @param string $charset (default: 'UTF-8')
* @param bool $strictCallables (default: false)
* @param int $entityFlags (default: ENT_COMPAT)
* @return string Generated PHP source code
public function compile($source, array $tree, $name, $customEscape = false, $charset = 'UTF-8', $strictCallables = false, $entityFlags = ENT_COMPAT)
$this->pragmas = $this->defaultPragmas;
$this->sections = array();
$this->blocks = array();
$this->source = $source;
$this->indentNextLine = true;
$this->customEscape = $customEscape;
$this->entityFlags = $entityFlags;
$this->charset = $charset;
$this->strictCallables = $strictCallables;
return $this->writeCode($tree, $name);
* Enable pragmas across all templates, regardless of the presence of pragma
* tags in the individual templates.
* @internal Users should set global pragmas in Mustache_Engine, not here :)
* @param string[] $pragmas
public function setPragmas(array $pragmas)
$this->pragmas = array();
foreach ($pragmas as $pragma) {
$this->pragmas[$pragma] = true;
$this->defaultPragmas = $this->pragmas;
* Helper function for walking the Mustache token parse tree.
* @throws Mustache_Exception_SyntaxException upon encountering unknown token types
* @param array $tree Parse tree of Mustache tokens
* @param int $level (default: 0)
* @return string Generated PHP source code
private function walk(array $tree, $level = 0)
$code = '';
foreach ($tree as $node) {
switch ($node[Mustache_Tokenizer::TYPE]) {
case Mustache_Tokenizer::T_PRAGMA:
$this->pragmas[$node[Mustache_Tokenizer::NAME]] = true;
case Mustache_Tokenizer::T_SECTION:
$code .= $this->section(
isset($node[Mustache_Tokenizer::FILTERS]) ? $node[Mustache_Tokenizer::FILTERS] : array(),
case Mustache_Tokenizer::T_INVERTED:
$code .= $this->invertedSection(
isset($node[Mustache_Tokenizer::FILTERS]) ? $node[Mustache_Tokenizer::FILTERS] : array(),
case Mustache_Tokenizer::T_PARTIAL:
$code .= $this->partial(
isset($node[Mustache_Tokenizer::DYNAMIC]) ? $node[Mustache_Tokenizer::DYNAMIC] : false,
isset($node[Mustache_Tokenizer::INDENT]) ? $node[Mustache_Tokenizer::INDENT] : '',
case Mustache_Tokenizer::T_PARENT:
$code .= $this->parent(
isset($node[Mustache_Tokenizer::DYNAMIC]) ? $node[Mustache_Tokenizer::DYNAMIC] : false,
isset($node[Mustache_Tokenizer::INDENT]) ? $node[Mustache_Tokenizer::INDENT] : '',
case Mustache_Tokenizer::T_BLOCK_ARG:
$code .= $this->blockArg(
case Mustache_Tokenizer::T_BLOCK_VAR:
$code .= $this->blockVar(
case Mustache_Tokenizer::T_COMMENT:
case Mustache_Tokenizer::T_ESCAPED:
case Mustache_Tokenizer::T_UNESCAPED:
case Mustache_Tokenizer::T_UNESCAPED_2:
$code .= $this->variable(
isset($node[Mustache_Tokenizer::FILTERS]) ? $node[Mustache_Tokenizer::FILTERS] : array(),
$node[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_ESCAPED,
case Mustache_Tokenizer::T_TEXT:
$code .= $this->text($node[Mustache_Tokenizer::VALUE], $level);
throw new Mustache_Exception_SyntaxException(sprintf('Unknown token type: %s', $node[Mustache_Tokenizer::TYPE]), $node);
return $code;
const KLASS = '<?php
class %s extends Mustache_Template
private $lambdaHelper;%s
public function renderInternal(Mustache_Context $context, $indent = \'\')
$this->lambdaHelper = new Mustache_LambdaHelper($this->mustache, $context);
$buffer = \'\';
return $buffer;
const KLASS_NO_LAMBDAS = '<?php
class %s extends Mustache_Template
public function renderInternal(Mustache_Context $context, $indent = \'\')
$buffer = \'\';
return $buffer;
const STRICT_CALLABLE = 'protected $strictCallables = true;';
* Generate Mustache Template class PHP source.
* @param array $tree Parse tree of Mustache tokens
* @param string $name Mustache Template class name
* @return string Generated PHP source code
private function writeCode($tree, $name)
$code = $this->walk($tree);
$sections = implode("\n", $this->sections);
$blocks = implode("\n", $this->blocks);
$klass = empty($this->sections) && empty($this->blocks) ? self::KLASS_NO_LAMBDAS : self::KLASS;
$callable = $this->strictCallables ? $this->prepare(self::STRICT_CALLABLE) : '';
return sprintf($this->prepare($klass, 0, false, true), $name, $callable, $code, $sections, $blocks);
const BLOCK_VAR = '
$blockFunction = $context->findInBlock(%s);
if (is_callable($blockFunction)) {
$buffer .= call_user_func($blockFunction, $context);
const BLOCK_VAR_ELSE = '} else {%s';
* Generate Mustache Template inheritance block variable PHP source.
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param int $start Section start offset
* @param int $end Section end offset
* @param string $otag Current Mustache opening tag
* @param string $ctag Current Mustache closing tag
* @param int $level
* @return string Generated PHP source code
private function blockVar($nodes, $id, $start, $end, $otag, $ctag, $level)
$id = var_export($id, true);
$else = $this->walk($nodes, $level);
if ($else !== '') {
$else = sprintf($this->prepare(self::BLOCK_VAR_ELSE, $level + 1, false, true), $else);
return sprintf($this->prepare(self::BLOCK_VAR, $level), $id, $else);
const BLOCK_ARG = '%s => array($this, \'block%s\'),';
* Generate Mustache Template inheritance block argument PHP source.
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param int $start Section start offset
* @param int $end Section end offset
* @param string $otag Current Mustache opening tag
* @param string $ctag Current Mustache closing tag
* @param int $level
* @return string Generated PHP source code
private function blockArg($nodes, $id, $start, $end, $otag, $ctag, $level)
$key = $this->block($nodes);
$id = var_export($id, true);
return sprintf($this->prepare(self::BLOCK_ARG, $level), $id, $key);
public function block%s($context)
$indent = $buffer = \'\';%s
return $buffer;
* Generate Mustache Template inheritance block function PHP source.
* @param array $nodes Array of child tokens
* @return string key of new block function
private function block($nodes)
$code = $this->walk($nodes, 0);
$key = ucfirst(md5($code));
if (!isset($this->blocks[$key])) {
$this->blocks[$key] = sprintf($this->prepare(self::BLOCK_FUNCTION, 0), $key, $code);
return $key;
const SECTION_CALL = '
$value = $context->%s(%s);%s
$buffer .= $this->section%s($context, $indent, $value);
const SECTION = '
private function section%s(Mustache_Context $context, $indent, $value)
$buffer = \'\';
if (%s) {
$source = %s;
$result = (string) call_user_func($value, $source, %s);
if (strpos($result, \'{{\') === false) {
$buffer .= $result;
} else {
$buffer .= $this->mustache
} elseif (!empty($value)) {
$values = $this->isIterable($value) ? $value : array($value);
foreach ($values as $value) {
return $buffer;
* Generate Mustache Template section PHP source.
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param string[] $filters Array of filters
* @param int $start Section start offset
* @param int $end Section end offset
* @param string $otag Current Mustache opening tag
* @param string $ctag Current Mustache closing tag
* @param int $level
* @return string Generated section PHP source code
private function section($nodes, $id, $filters, $start, $end, $otag, $ctag, $level)
$source = var_export(substr($this->source, $start, $end - $start), true);
$callable = $this->getCallable();
if ($otag !== '{{' || $ctag !== '}}') {
$delimTag = var_export(sprintf('{{= %s %s =}}', $otag, $ctag), true);
$helper = sprintf('$this->lambdaHelper->withDelimiters(%s)', $delimTag);
$delims = ', ' . $delimTag;
} else {
$helper = '$this->lambdaHelper';
$delims = '';
$key = ucfirst(md5($delims . "\n" . $source));
if (!isset($this->sections[$key])) {
$this->sections[$key] = sprintf($this->prepare(self::SECTION), $key, $callable, $source, $helper, $delims, $this->walk($nodes, 2));
$method = $this->getFindMethod($id);
$id = var_export($id, true);
$filters = $this->getFilters($filters, $level);
return sprintf($this->prepare(self::SECTION_CALL, $level), $method, $id, $filters, $key);
$value = $context->%s(%s);%s
if (empty($value)) {
* Generate Mustache Template inverted section PHP source.
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param string[] $filters Array of filters
* @param int $level
* @return string Generated inverted section PHP source code
private function invertedSection($nodes, $id, $filters, $level)
$method = $this->getFindMethod($id);
$id = var_export($id, true);
$filters = $this->getFilters($filters, $level);
return sprintf($this->prepare(self::INVERTED_SECTION, $level), $method, $id, $filters, $this->walk($nodes, $level));
const DYNAMIC_NAME = '$this->resolveValue($context->%s(%s), $context)';
* Generate Mustache Template dynamic name resolution PHP source.
* @param string $id Tag name
* @param bool $dynamic True if the name is dynamic
* @return string Dynamic name resolution PHP source code
private function resolveDynamicName($id, $dynamic)
if (!$dynamic) {
return var_export($id, true);
$method = $this->getFindMethod($id);
$id = ($method !== 'last') ? var_export($id, true) : '';
// TODO: filters?
return sprintf(self::DYNAMIC_NAME, $method, $id);
const PARTIAL_INDENT = ', $indent . %s';
const PARTIAL = '
if ($partial = $this->mustache->loadPartial(%s)) {
$buffer .= $partial->renderInternal($context%s);
* Generate Mustache Template partial call PHP source.
* @param string $id Partial name
* @param bool $dynamic Partial name is dynamic
* @param string $indent Whitespace indent to apply to partial
* @param int $level
* @return string Generated partial call PHP source code
private function partial($id, $dynamic, $indent, $level)
if ($indent !== '') {
$indentParam = sprintf(self::PARTIAL_INDENT, var_export($indent, true));
} else {
$indentParam = '';
return sprintf(
$this->prepare(self::PARTIAL, $level),
$this->resolveDynamicName($id, $dynamic),
const PARENT = '
if ($parent = $this->mustache->loadPartial(%s)) {
$buffer .= $parent->renderInternal($context, $indent);
if ($parent = $this->mustache->loadPartial(%s)) {
$buffer .= $parent->renderInternal($context, $indent);
* Generate Mustache Template inheritance parent call PHP source.
* @param string $id Parent tag name
* @param bool $dynamic Tag name is dynamic
* @param string $indent Whitespace indent to apply to parent
* @param array $children Child nodes
* @param int $level
* @return string Generated PHP source code
private function parent($id, $dynamic, $indent, array $children, $level)
$realChildren = array_filter($children, array(__CLASS__, 'onlyBlockArgs'));
$partialName = $this->resolveDynamicName($id, $dynamic);
if (empty($realChildren)) {
return sprintf($this->prepare(self::PARENT_NO_CONTEXT, $level), $partialName);
return sprintf(
$this->prepare(self::PARENT, $level),
$this->walk($realChildren, $level + 1)
* Helper method for filtering out non-block-arg tokens.
* @param array $node
* @return bool True if $node is a block arg token
private static function onlyBlockArgs(array $node)
return $node[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_BLOCK_ARG;
const VARIABLE = '
$value = $this->resolveValue($context->%s(%s), $context);%s
$buffer .= %s($value === null ? \'\' : %s);
* Generate Mustache Template variable interpolation PHP source.
* @param string $id Variable name
* @param string[] $filters Array of filters
* @param bool $escape Escape the variable value for output?
* @param int $level
* @return string Generated variable interpolation PHP source
private function variable($id, $filters, $escape, $level)
$method = $this->getFindMethod($id);
$id = ($method !== 'last') ? var_export($id, true) : '';
$filters = $this->getFilters($filters, $level);
$value = $escape ? $this->getEscape() : '$value';
return sprintf($this->prepare(self::VARIABLE, $level), $method, $id, $filters, $this->flushIndent(), $value);
const FILTER = '
$filter = $context->%s(%s);
if (!(%s)) {
throw new Mustache_Exception_UnknownFilterException(%s);
$value = call_user_func($filter, $value);%s
* Generate Mustache Template variable filtering PHP source.
* @param string[] $filters Array of filters
* @param int $level
* @return string Generated filter PHP source
private function getFilters(array $filters, $level)
if (empty($filters)) {
return '';
$name = array_shift($filters);
$method = $this->getFindMethod($name);
$filter = ($method !== 'last') ? var_export($name, true) : '';
$callable = $this->getCallable('$filter');
$msg = var_export($name, true);
return sprintf($this->prepare(self::FILTER, $level), $method, $filter, $callable, $msg, $this->getFilters($filters, $level));
const LINE = '$buffer .= "\n";';
const TEXT = '$buffer .= %s%s;';
* Generate Mustache Template output Buffer call PHP source.
* @param string $text
* @param int $level
* @return string Generated output Buffer call PHP source
private function text($text, $level)
$indentNextLine = (substr($text, -1) === "\n");
$code = sprintf($this->prepare(self::TEXT, $level), $this->flushIndent(), var_export($text, true));
$this->indentNextLine = $indentNextLine;
return $code;
* Prepare PHP source code snippet for output.
* @param string $text
* @param int $bonus Additional indent level (default: 0)
* @param bool $prependNewline Prepend a newline to the snippet? (default: true)
* @param bool $appendNewline Append a newline to the snippet? (default: false)
* @return string PHP source code snippet
private function prepare($text, $bonus = 0, $prependNewline = true, $appendNewline = false)
$text = ($prependNewline ? "\n" : '') . trim($text);
if ($prependNewline) {
if ($appendNewline) {
$text .= "\n";
return preg_replace("/\n( {8})?/", "\n" . str_repeat(' ', $bonus * 4), $text);
const DEFAULT_ESCAPE = 'htmlspecialchars(%s, %s, %s)';
const CUSTOM_ESCAPE = 'call_user_func($this->mustache->getEscape(), %s)';
* Get the current escaper.
* @param string $value (default: '$value')
* @return string Either a custom callback, or an inline call to `htmlspecialchars`
private function getEscape($value = '$value')
if ($this->customEscape) {
return sprintf(self::CUSTOM_ESCAPE, $value);
return sprintf(self::DEFAULT_ESCAPE, $value, var_export($this->entityFlags, true), var_export($this->charset, true));
* Select the appropriate Context `find` method for a given $id.
* The return value will be one of `find`, `findDot`, `findAnchoredDot` or `last`.
* @see Mustache_Context::find
* @see Mustache_Context::findDot
* @see Mustache_Context::last
* @param string $id Variable name
* @return string `find` method name
private function getFindMethod($id)
if ($id === '.') {
return 'last';
if (isset($this->pragmas[Mustache_Engine::PRAGMA_ANCHORED_DOT]) && $this->pragmas[Mustache_Engine::PRAGMA_ANCHORED_DOT]) {
if (substr($id, 0, 1) === '.') {
return 'findAnchoredDot';
if (strpos($id, '.') === false) {
return 'find';
return 'findDot';
const IS_CALLABLE = '!is_string(%s) && is_callable(%s)';
const STRICT_IS_CALLABLE = 'is_object(%s) && is_callable(%s)';
* Helper function to compile strict vs lax "is callable" logic.
* @param string $variable (default: '$value')
* @return string "is callable" logic
private function getCallable($variable = '$value')
$tpl = $this->strictCallables ? self::STRICT_IS_CALLABLE : self::IS_CALLABLE;
return sprintf($tpl, $variable, $variable);
const LINE_INDENT = '$indent . ';
* Get the current $indent prefix to write to the buffer.
* @return string "$indent . " or ""
private function flushIndent()
if (!$this->indentNextLine) {
return '';
$this->indentNextLine = false;
return self::LINE_INDENT;

View file

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Template rendering Context.
class Mustache_Context
private $stack = array();
private $blockStack = array();
* Mustache rendering Context constructor.
* @param mixed $context Default rendering context (default: null)
public function __construct($context = null)
if ($context !== null) {
$this->stack = array($context);
* Push a new Context frame onto the stack.
* @param mixed $value Object or array to use for context
public function push($value)
array_push($this->stack, $value);
* Push a new Context frame onto the block context stack.
* @param mixed $value Object or array to use for block context
public function pushBlockContext($value)
array_push($this->blockStack, $value);
* Pop the last Context frame from the stack.
* @return mixed Last Context frame (object or array)
public function pop()
return array_pop($this->stack);
* Pop the last block Context frame from the stack.
* @return mixed Last block Context frame (object or array)
public function popBlockContext()
return array_pop($this->blockStack);
* Get the last Context frame.
* @return mixed Last Context frame (object or array)
public function last()
return end($this->stack);
* Find a variable in the Context stack.
* Starting with the last Context frame (the context of the innermost section), and working back to the top-level
* rendering context, look for a variable with the given name:
* * If the Context frame is an associative array which contains the key $id, returns the value of that element.
* * If the Context frame is an object, this will check first for a public method, then a public property named
* $id. Failing both of these, it will try `__isset` and `__get` magic methods.
* * If a value named $id is not found in any Context frame, returns an empty string.
* @param string $id Variable name
* @return mixed Variable value, or '' if not found
public function find($id)
return $this->findVariableInStack($id, $this->stack);
* Find a 'dot notation' variable in the Context stack.
* Note that dot notation traversal bubbles through scope differently than the regular find method. After finding
* the initial chunk of the dotted name, each subsequent chunk is searched for only within the value of the previous
* result. For example, given the following context stack:
* $data = array(
* 'name' => 'Fred',
* 'child' => array(
* 'name' => 'Bob'
* ),
* );
* ... and the Mustache following template:
* {{ }}
* ... the `name` value is only searched for within the `child` value of the global Context, not within parent
* Context frames.
* @param string $id Dotted variable selector
* @return mixed Variable value, or '' if not found
public function findDot($id)
$chunks = explode('.', $id);
$first = array_shift($chunks);
$value = $this->findVariableInStack($first, $this->stack);
foreach ($chunks as $chunk) {
if ($value === '') {
return $value;
$value = $this->findVariableInStack($chunk, array($value));
return $value;
* Find an 'anchored dot notation' variable in the Context stack.
* This is the same as findDot(), except it looks in the top of the context
* stack for the first value, rather than searching the whole context stack
* and starting from there.
* @see Mustache_Context::findDot
* @throws Mustache_Exception_InvalidArgumentException if given an invalid anchored dot $id
* @param string $id Dotted variable selector
* @return mixed Variable value, or '' if not found
public function findAnchoredDot($id)
$chunks = explode('.', $id);
$first = array_shift($chunks);
if ($first !== '') {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unexpected id for findAnchoredDot: %s', $id));
$value = $this->last();
foreach ($chunks as $chunk) {
if ($value === '') {
return $value;
$value = $this->findVariableInStack($chunk, array($value));
return $value;
* Find an argument in the block context stack.
* @param string $id
* @return mixed Variable value, or '' if not found
public function findInBlock($id)
foreach ($this->blockStack as $context) {
if (array_key_exists($id, $context)) {
return $context[$id];
return '';
* Helper function to find a variable in the Context stack.
* @see Mustache_Context::find
* @param string $id Variable name
* @param array $stack Context stack
* @return mixed Variable value, or '' if not found
private function findVariableInStack($id, array $stack)
for ($i = count($stack) - 1; $i >= 0; $i--) {
$frame = &$stack[$i];
switch (gettype($frame)) {
case 'object':
if (!($frame instanceof Closure)) {
// Note that is_callable() *will not work here*
// See
if (method_exists($frame, $id)) {
return $frame->$id();
if (isset($frame->$id)) {
return $frame->$id;
if ($frame instanceof ArrayAccess && isset($frame[$id])) {
return $frame[$id];
case 'array':
if (array_key_exists($id, $frame)) {
return $frame[$id];
return '';

View file

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* A Mustache implementation in PHP.
* {@link}
* Mustache is a framework-agnostic logic-less templating language. It enforces separation of view
* logic from template files. In fact, it is not even possible to embed logic in the template.
* This is very, very rad.
* @author Justin Hileman {@link}
class Mustache_Engine
const VERSION = '2.14.2';
const SPEC_VERSION = '1.3.0';
// Known pragmas
private static $knownPragmas = array(
self::PRAGMA_FILTERS => true,
self::PRAGMA_BLOCKS => true,
self::PRAGMA_ANCHORED_DOT => true,
// Template cache
private $templates = array();
// Environment
private $templateClassPrefix = '__Mustache_';
private $cache;
private $lambdaCache;
private $cacheLambdaTemplates = false;
private $loader;
private $partialsLoader;
private $helpers;
private $escape;
private $entityFlags = ENT_COMPAT;
private $charset = 'UTF-8';
private $logger;
private $strictCallables = false;
private $pragmas = array();
private $delimiters;
// Services
private $tokenizer;
private $parser;
private $compiler;
* Mustache class constructor.
* Passing an $options array allows overriding certain Mustache options during instantiation:
* $options = array(
* // The class prefix for compiled templates. Defaults to '__Mustache_'.
* 'template_class_prefix' => '__MyTemplates_',
* // A Mustache cache instance or a cache directory string for compiled templates.
* // Mustache will not cache templates unless this is set.
* 'cache' => dirname(__FILE__).'/tmp/cache/mustache',
* // Override default permissions for cache files. Defaults to using the system-defined umask. It is
* // *strongly* recommended that you configure your umask properly rather than overriding permissions here.
* 'cache_file_mode' => 0666,
* // Optionally, enable caching for lambda section templates. This is generally not recommended, as lambda
* // sections are often too dynamic to benefit from caching.
* 'cache_lambda_templates' => true,
* // Customize the tag delimiters used by this engine instance. Note that overriding here changes the
* // delimiters used to parse all templates and partials loaded by this instance. To override just for a
* // single template, use an inline "change delimiters" tag at the start of the template file:
* //
* // {{=<% %>=}}
* //
* 'delimiters' => '<% %>',
* // A Mustache template loader instance. Uses a StringLoader if not specified.
* 'loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views'),
* // A Mustache loader instance for partials.
* 'partials_loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views/partials'),
* // An array of Mustache partials. Useful for quick-and-dirty string template loading, but not as
* // efficient or lazy as a Filesystem (or database) loader.
* 'partials' => array('foo' => file_get_contents(dirname(__FILE__).'/views/partials/foo.mustache')),
* // An array of 'helpers'. Helpers can be global variables or objects, closures (e.g. for higher order
* // sections), or any other valid Mustache context value. They will be prepended to the context stack,
* // so they will be available in any template loaded by this Mustache instance.
* 'helpers' => array('i18n' => function ($text) {
* // do something translatey here...
* }),
* // An 'escape' callback, responsible for escaping double-mustache variables.
* 'escape' => function ($value) {
* return htmlspecialchars($buffer, ENT_COMPAT, 'UTF-8');
* },
* // Type argument for `htmlspecialchars`. Defaults to ENT_COMPAT. You may prefer ENT_QUOTES.
* 'entity_flags' => ENT_QUOTES,
* // Character set for `htmlspecialchars`. Defaults to 'UTF-8'. Use 'UTF-8'.
* 'charset' => 'ISO-8859-1',
* // A Mustache Logger instance. No logging will occur unless this is set. Using a PSR-3 compatible
* // logging library -- such as Monolog -- is highly recommended. A simple stream logger implementation is
* // available as well:
* 'logger' => new Mustache_Logger_StreamLogger('php://stderr'),
* // Only treat Closure instances and invokable classes as callable. If true, values like
* // `array('ClassName', 'methodName')` and `array($classInstance, 'methodName')`, which are traditionally
* // "callable" in PHP, are not called to resolve variables for interpolation or section contexts. This
* // helps protect against arbitrary code execution when user input is passed directly into the template.
* // This currently defaults to false, but will default to true in v3.0.
* 'strict_callables' => true,
* // Enable pragmas across all templates, regardless of the presence of pragma tags in the individual
* // templates.
* 'pragmas' => [Mustache_Engine::PRAGMA_FILTERS],
* );
* @throws Mustache_Exception_InvalidArgumentException If `escape` option is not callable
* @param array $options (default: array())
public function __construct(array $options = array())
if (isset($options['template_class_prefix'])) {
if ((string) $options['template_class_prefix'] === '') {
throw new Mustache_Exception_InvalidArgumentException('Mustache Constructor "template_class_prefix" must not be empty');
$this->templateClassPrefix = $options['template_class_prefix'];
if (isset($options['cache'])) {
$cache = $options['cache'];
if (is_string($cache)) {
$mode = isset($options['cache_file_mode']) ? $options['cache_file_mode'] : null;
$cache = new Mustache_Cache_FilesystemCache($cache, $mode);
if (isset($options['cache_lambda_templates'])) {
$this->cacheLambdaTemplates = (bool) $options['cache_lambda_templates'];
if (isset($options['loader'])) {
if (isset($options['partials_loader'])) {
if (isset($options['partials'])) {
if (isset($options['helpers'])) {
if (isset($options['escape'])) {
if (!is_callable($options['escape'])) {
throw new Mustache_Exception_InvalidArgumentException('Mustache Constructor "escape" option must be callable');
$this->escape = $options['escape'];
if (isset($options['entity_flags'])) {
$this->entityFlags = $options['entity_flags'];
if (isset($options['charset'])) {
$this->charset = $options['charset'];
if (isset($options['logger'])) {
if (isset($options['strict_callables'])) {
$this->strictCallables = $options['strict_callables'];
if (isset($options['delimiters'])) {
$this->delimiters = $options['delimiters'];
if (isset($options['pragmas'])) {
foreach ($options['pragmas'] as $pragma) {
if (!isset(self::$knownPragmas[$pragma])) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unknown pragma: "%s".', $pragma));
$this->pragmas[$pragma] = true;
* Shortcut 'render' invocation.
* Equivalent to calling `$mustache->loadTemplate($template)->render($context);`
* @see Mustache_Engine::loadTemplate
* @see Mustache_Template::render
* @param string $template
* @param mixed $context (default: array())
* @return string Rendered template
public function render($template, $context = array())
return $this->loadTemplate($template)->render($context);
* Get the current Mustache escape callback.
* @return callable|null
public function getEscape()
return $this->escape;
* Get the current Mustache entitity type to escape.
* @return int
public function getEntityFlags()
return $this->entityFlags;
* Get the current Mustache character set.
* @return string
public function getCharset()
return $this->charset;
* Get the current globally enabled pragmas.
* @return array
public function getPragmas()
return array_keys($this->pragmas);
* Set the Mustache template Loader instance.
* @param Mustache_Loader $loader
public function setLoader(Mustache_Loader $loader)
$this->loader = $loader;
* Get the current Mustache template Loader instance.
* If no Loader instance has been explicitly specified, this method will instantiate and return
* a StringLoader instance.
* @return Mustache_Loader
public function getLoader()
if (!isset($this->loader)) {
$this->loader = new Mustache_Loader_StringLoader();
return $this->loader;
* Set the Mustache partials Loader instance.
* @param Mustache_Loader $partialsLoader
public function setPartialsLoader(Mustache_Loader $partialsLoader)
$this->partialsLoader = $partialsLoader;
* Get the current Mustache partials Loader instance.
* If no Loader instance has been explicitly specified, this method will instantiate and return
* an ArrayLoader instance.
* @return Mustache_Loader
public function getPartialsLoader()
if (!isset($this->partialsLoader)) {
$this->partialsLoader = new Mustache_Loader_ArrayLoader();
return $this->partialsLoader;
* Set partials for the current partials Loader instance.
* @throws Mustache_Exception_RuntimeException If the current Loader instance is immutable
* @param array $partials (default: array())
public function setPartials(array $partials = array())
if (!isset($this->partialsLoader)) {
$this->partialsLoader = new Mustache_Loader_ArrayLoader();
if (!$this->partialsLoader instanceof Mustache_Loader_MutableLoader) {
throw new Mustache_Exception_RuntimeException('Unable to set partials on an immutable Mustache Loader instance');
* Set an array of Mustache helpers.
* An array of 'helpers'. Helpers can be global variables or objects, closures (e.g. for higher order sections), or
* any other valid Mustache context value. They will be prepended to the context stack, so they will be available in
* any template loaded by this Mustache instance.
* @throws Mustache_Exception_InvalidArgumentException if $helpers is not an array or Traversable
* @param array|Traversable $helpers
public function setHelpers($helpers)
if (!is_array($helpers) && !$helpers instanceof Traversable) {
throw new Mustache_Exception_InvalidArgumentException('setHelpers expects an array of helpers');
foreach ($helpers as $name => $helper) {
$this->addHelper($name, $helper);
* Get the current set of Mustache helpers.
* @see Mustache_Engine::setHelpers
* @return Mustache_HelperCollection
public function getHelpers()
if (!isset($this->helpers)) {
$this->helpers = new Mustache_HelperCollection();
return $this->helpers;
* Add a new Mustache helper.
* @see Mustache_Engine::setHelpers
* @param string $name
* @param mixed $helper
public function addHelper($name, $helper)
$this->getHelpers()->add($name, $helper);
* Get a Mustache helper by name.
* @see Mustache_Engine::setHelpers
* @param string $name
* @return mixed Helper
public function getHelper($name)
return $this->getHelpers()->get($name);
* Check whether this Mustache instance has a helper.
* @see Mustache_Engine::setHelpers
* @param string $name
* @return bool True if the helper is present
public function hasHelper($name)
return $this->getHelpers()->has($name);
* Remove a helper by name.
* @see Mustache_Engine::setHelpers
* @param string $name
public function removeHelper($name)
* Set the Mustache Logger instance.
* @throws Mustache_Exception_InvalidArgumentException If logger is not an instance of Mustache_Logger or Psr\Log\LoggerInterface
* @param Mustache_Logger|Psr\Log\LoggerInterface $logger
public function setLogger($logger = null)
if ($logger !== null && !($logger instanceof Mustache_Logger || is_a($logger, 'Psr\\Log\\LoggerInterface'))) {
throw new Mustache_Exception_InvalidArgumentException('Expected an instance of Mustache_Logger or Psr\\Log\\LoggerInterface.');
if ($this->getCache()->getLogger() === null) {
$this->logger = $logger;
* Get the current Mustache Logger instance.
* @return Mustache_Logger|Psr\Log\LoggerInterface
public function getLogger()
return $this->logger;
* Set the Mustache Tokenizer instance.
* @param Mustache_Tokenizer $tokenizer
public function setTokenizer(Mustache_Tokenizer $tokenizer)
$this->tokenizer = $tokenizer;
* Get the current Mustache Tokenizer instance.
* If no Tokenizer instance has been explicitly specified, this method will instantiate and return a new one.
* @return Mustache_Tokenizer
public function getTokenizer()
if (!isset($this->tokenizer)) {
$this->tokenizer = new Mustache_Tokenizer();
return $this->tokenizer;
* Set the Mustache Parser instance.
* @param Mustache_Parser $parser
public function setParser(Mustache_Parser $parser)
$this->parser = $parser;
* Get the current Mustache Parser instance.
* If no Parser instance has been explicitly specified, this method will instantiate and return a new one.
* @return Mustache_Parser
public function getParser()
if (!isset($this->parser)) {
$this->parser = new Mustache_Parser();
return $this->parser;
* Set the Mustache Compiler instance.
* @param Mustache_Compiler $compiler
public function setCompiler(Mustache_Compiler $compiler)
$this->compiler = $compiler;
* Get the current Mustache Compiler instance.
* If no Compiler instance has been explicitly specified, this method will instantiate and return a new one.
* @return Mustache_Compiler
public function getCompiler()
if (!isset($this->compiler)) {
$this->compiler = new Mustache_Compiler();
return $this->compiler;
* Set the Mustache Cache instance.
* @param Mustache_Cache $cache
public function setCache(Mustache_Cache $cache)
if (isset($this->logger) && $cache->getLogger() === null) {
$this->cache = $cache;
* Get the current Mustache Cache instance.
* If no Cache instance has been explicitly specified, this method will instantiate and return a new one.
* @return Mustache_Cache
public function getCache()
if (!isset($this->cache)) {
$this->setCache(new Mustache_Cache_NoopCache());
return $this->cache;
* Get the current Lambda Cache instance.
* If 'cache_lambda_templates' is enabled, this is the default cache instance. Otherwise, it is a NoopCache.
* @see Mustache_Engine::getCache
* @return Mustache_Cache
protected function getLambdaCache()
if ($this->cacheLambdaTemplates) {
return $this->getCache();
if (!isset($this->lambdaCache)) {
$this->lambdaCache = new Mustache_Cache_NoopCache();
return $this->lambdaCache;
* Helper method to generate a Mustache template class.
* This method must be updated any time options are added which make it so
* the same template could be parsed and compiled multiple different ways.
* @param string|Mustache_Source $source
* @return string Mustache Template class name
public function getTemplateClassName($source)
// For the most part, adding a new option here should do the trick.
// Pick a value here which is unique for each possible way the template
// could be compiled... but not necessarily unique per option value. See
// escape below, which only needs to differentiate between 'custom' and
// 'default' escapes.
// Keep this list in alphabetical order :)
$chunks = array(
'charset' => $this->charset,
'delimiters' => $this->delimiters ? $this->delimiters : '{{ }}',
'entityFlags' => $this->entityFlags,
'escape' => isset($this->escape) ? 'custom' : 'default',
'key' => ($source instanceof Mustache_Source) ? $source->getKey() : 'source',
'pragmas' => $this->getPragmas(),
'strictCallables' => $this->strictCallables,
'version' => self::VERSION,
$key = json_encode($chunks);
// Template Source instances have already provided their own source key. For strings, just include the whole
// source string in the md5 hash.
if (!$source instanceof Mustache_Source) {
$key .= "\n" . $source;
return $this->templateClassPrefix . md5($key);
* Load a Mustache Template by name.
* @param string $name
* @return Mustache_Template
public function loadTemplate($name)
return $this->loadSource($this->getLoader()->load($name));
* Load a Mustache partial Template by name.
* This is a helper method used internally by Template instances for loading partial templates. You can most likely
* ignore it completely.
* @param string $name
* @return Mustache_Template
public function loadPartial($name)
try {
if (isset($this->partialsLoader)) {
$loader = $this->partialsLoader;
} elseif (isset($this->loader) && !$this->loader instanceof Mustache_Loader_StringLoader) {
$loader = $this->loader;
} else {
throw new Mustache_Exception_UnknownTemplateException($name);
return $this->loadSource($loader->load($name));
} catch (Mustache_Exception_UnknownTemplateException $e) {
// If the named partial cannot be found, log then return null.
'Partial not found: "{name}"',
array('name' => $e->getTemplateName())
* Load a Mustache lambda Template by source.
* This is a helper method used by Template instances to generate subtemplates for Lambda sections. You can most
* likely ignore it completely.
* @param string $source
* @param string $delims (default: null)
* @return Mustache_Template
public function loadLambda($source, $delims = null)
if ($delims !== null) {
$source = $delims . "\n" . $source;
return $this->loadSource($source, $this->getLambdaCache());
* Instantiate and return a Mustache Template instance by source.
* Optionally provide a Mustache_Cache instance. This is used internally by Mustache_Engine::loadLambda to respect
* the 'cache_lambda_templates' configuration option.
* @see Mustache_Engine::loadTemplate
* @see Mustache_Engine::loadPartial
* @see Mustache_Engine::loadLambda
* @param string|Mustache_Source $source
* @param Mustache_Cache $cache (default: null)
* @return Mustache_Template
private function loadSource($source, Mustache_Cache $cache = null)
$className = $this->getTemplateClassName($source);
if (!isset($this->templates[$className])) {
if ($cache === null) {
$cache = $this->getCache();
if (!class_exists($className, false)) {
if (!$cache->load($className)) {
$compiled = $this->compile($source);
$cache->cache($className, $compiled);
'Instantiating template: "{className}"',
array('className' => $className)
$this->templates[$className] = new $className($this);
return $this->templates[$className];
* Helper method to tokenize a Mustache template.
* @see Mustache_Tokenizer::scan
* @param string $source
* @return array Tokens
private function tokenize($source)
return $this->getTokenizer()->scan($source, $this->delimiters);
* Helper method to parse a Mustache template.
* @see Mustache_Parser::parse
* @param string $source
* @return array Token tree
private function parse($source)
$parser = $this->getParser();
return $parser->parse($this->tokenize($source));
* Helper method to compile a Mustache template.
* @see Mustache_Compiler::compile
* @param string|Mustache_Source $source
* @return string generated Mustache template class code
private function compile($source)
$name = $this->getTemplateClassName($source);
'Compiling template to "{className}" class',
array('className' => $name)
if ($source instanceof Mustache_Source) {
$source = $source->getSource();
$tree = $this->parse($source);
$compiler = $this->getCompiler();
return $compiler->compile($source, $tree, $name, isset($this->escape), $this->charset, $this->strictCallables, $this->entityFlags);
* Add a log record if logging is enabled.
* @param int $level The logging level
* @param string $message The log message
* @param array $context The log context
private function log($level, $message, array $context = array())
if (isset($this->logger)) {
$this->logger->log($level, $message, $context);

@ -0,0 +1,18 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* A Mustache Exception interface.
interface Mustache_Exception
// This space intentionally left blank.

@ -0,0 +1,18 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Invalid argument exception.
class Mustache_Exception_InvalidArgumentException extends InvalidArgumentException implements Mustache_Exception
// This space intentionally left blank.

@ -0,0 +1,18 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Logic exception.
class Mustache_Exception_LogicException extends LogicException implements Mustache_Exception
// This space intentionally left blank.

@ -0,0 +1,18 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Runtime exception.
class Mustache_Exception_RuntimeException extends RuntimeException implements Mustache_Exception
// This space intentionally left blank.

@ -0,0 +1,41 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache syntax exception.
class Mustache_Exception_SyntaxException extends LogicException implements Mustache_Exception
protected $token;
* @param string $msg
* @param array $token
* @param Exception $previous
public function __construct($msg, array $token, Exception $previous = null)
$this->token = $token;
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($msg, 0, $previous);
} else {
parent::__construct($msg); // @codeCoverageIgnore
* @return array
public function getToken()
return $this->token;

@ -0,0 +1,38 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Unknown filter exception.
class Mustache_Exception_UnknownFilterException extends UnexpectedValueException implements Mustache_Exception
protected $filterName;
* @param string $filterName
* @param Exception $previous
public function __construct($filterName, Exception $previous = null)
$this->filterName = $filterName;
$message = sprintf('Unknown filter: %s', $filterName);
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($message, 0, $previous);
} else {
parent::__construct($message); // @codeCoverageIgnore
public function getFilterName()
return $this->filterName;

@ -0,0 +1,38 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Unknown helper exception.
class Mustache_Exception_UnknownHelperException extends InvalidArgumentException implements Mustache_Exception
protected $helperName;
* @param string $helperName
* @param Exception $previous
public function __construct($helperName, Exception $previous = null)
$this->helperName = $helperName;
$message = sprintf('Unknown helper: %s', $helperName);
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($message, 0, $previous);
} else {
parent::__construct($message); // @codeCoverageIgnore
public function getHelperName()
return $this->helperName;

@ -0,0 +1,38 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Unknown template exception.
class Mustache_Exception_UnknownTemplateException extends InvalidArgumentException implements Mustache_Exception
protected $templateName;
* @param string $templateName
* @param Exception $previous
public function __construct($templateName, Exception $previous = null)
$this->templateName = $templateName;
$message = sprintf('Unknown template: %s', $templateName);
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($message, 0, $previous);
} else {
parent::__construct($message); // @codeCoverageIgnore
public function getTemplateName()
return $this->templateName;

@ -0,0 +1,172 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* A collection of helpers for a Mustache instance.
class Mustache_HelperCollection
private $helpers = array();
* Helper Collection constructor.
* Optionally accepts an array (or Traversable) of `$name => $helper` pairs.
* @throws Mustache_Exception_InvalidArgumentException if the $helpers argument isn't an array or Traversable
* @param array|Traversable $helpers (default: null)
public function __construct($helpers = null)
if ($helpers === null) {
if (!is_array($helpers) && !$helpers instanceof Traversable) {
throw new Mustache_Exception_InvalidArgumentException('HelperCollection constructor expects an array of helpers');
foreach ($helpers as $name => $helper) {
$this->add($name, $helper);
* Magic mutator.
* @see Mustache_HelperCollection::add
* @param string $name
* @param mixed $helper
public function __set($name, $helper)
$this->add($name, $helper);
* Add a helper to this collection.
* @param string $name
* @param mixed $helper
public function add($name, $helper)
$this->helpers[$name] = $helper;
* Magic accessor.
* @see Mustache_HelperCollection::get
* @param string $name
* @return mixed Helper
public function __get($name)
return $this->get($name);
* Get a helper by name.
* @throws Mustache_Exception_UnknownHelperException If helper does not exist
* @param string $name
* @return mixed Helper
public function get($name)
if (!$this->has($name)) {
throw new Mustache_Exception_UnknownHelperException($name);
return $this->helpers[$name];
* Magic isset().
* @see Mustache_HelperCollection::has
* @param string $name
* @return bool True if helper is present
public function __isset($name)
return $this->has($name);
* Check whether a given helper is present in the collection.
* @param string $name
* @return bool True if helper is present
public function has($name)
return array_key_exists($name, $this->helpers);
* Magic unset().
* @see Mustache_HelperCollection::remove
* @param string $name
public function __unset($name)
* Check whether a given helper is present in the collection.
* @throws Mustache_Exception_UnknownHelperException if the requested helper is not present
* @param string $name
public function remove($name)
if (!$this->has($name)) {
throw new Mustache_Exception_UnknownHelperException($name);
* Clear the helper collection.
* Removes all helpers from this collection
public function clear()
$this->helpers = array();
* Check whether the helper collection is empty.
* @return bool True if the collection is empty
public function isEmpty()
return empty($this->helpers);

View file

The MIT License (MIT)
Copyright (c) 2010-2015 Justin Hileman
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

@ -0,0 +1,76 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Lambda Helper.
* Passed as the second argument to section lambdas (higher order sections),
* giving them access to a `render` method for rendering a string with the
* current context.
class Mustache_LambdaHelper
private $mustache;
private $context;
private $delims;
* Mustache Lambda Helper constructor.
* @param Mustache_Engine $mustache Mustache engine instance
* @param Mustache_Context $context Rendering context
* @param string $delims Optional custom delimiters, in the format `{{= <% %> =}}`. (default: null)
public function __construct(Mustache_Engine $mustache, Mustache_Context $context, $delims = null)
$this->mustache = $mustache;
$this->context = $context;
$this->delims = $delims;
* Render a string as a Mustache template with the current rendering context.
* @param string $string
* @return string Rendered template
public function render($string)
return $this->mustache
->loadLambda((string) $string, $this->delims)
* Render a string as a Mustache template with the current rendering context.
* @param string $string
* @return string Rendered template
public function __invoke($string)
return $this->render($string);
* Get a Lambda Helper with custom delimiters.
* @param string $delims Custom delimiters, in the format `{{= <% %> =}}`
* @return Mustache_LambdaHelper
public function withDelimiters($delims)
return new self($this->mustache, $this->context, $delims);

View file

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Template Loader interface.
interface Mustache_Loader
* Load a Template by name.
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
* @param string $name
* @return string|Mustache_Source Mustache Template source
public function load($name);

@ -0,0 +1,79 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Template array Loader implementation.
* An ArrayLoader instance loads Mustache Template source by name from an initial array:
* $loader = new ArrayLoader(
* 'foo' => '{{ bar }}',
* 'baz' => 'Hey {{ qux }}!'
* );
* $tpl = $loader->load('foo'); // '{{ bar }}'
* The ArrayLoader is used internally as a partials loader by Mustache_Engine instance when an array of partials
* is set. It can also be used as a quick-and-dirty Template loader.
class Mustache_Loader_ArrayLoader implements Mustache_Loader, Mustache_Loader_MutableLoader
private $templates;
* ArrayLoader constructor.
* @param array $templates Associative array of Template source (default: array())
public function __construct(array $templates = array())
$this->templates = $templates;
* Load a Template.
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
* @param string $name
* @return string Mustache Template source
public function load($name)
if (!isset($this->templates[$name])) {
throw new Mustache_Exception_UnknownTemplateException($name);
return $this->templates[$name];
* Set an associative array of Template sources for this loader.
* @param array $templates
public function setTemplates(array $templates)
$this->templates = $templates;
* Set a Template source by name.
* @param string $name
* @param string $template Mustache Template source
public function setTemplate($name, $template)
$this->templates[$name] = $template;

@ -0,0 +1,69 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* A Mustache Template cascading loader implementation, which delegates to other
* Loader instances.
class Mustache_Loader_CascadingLoader implements Mustache_Loader
private $loaders;
* Construct a CascadingLoader with an array of loaders.
* $loader = new Mustache_Loader_CascadingLoader(array(
* new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__),
* new Mustache_Loader_FilesystemLoader(__DIR__.'/templates')
* ));
* @param Mustache_Loader[] $loaders
public function __construct(array $loaders = array())
$this->loaders = array();
foreach ($loaders as $loader) {
* Add a Loader instance.
* @param Mustache_Loader $loader
public function addLoader(Mustache_Loader $loader)
$this->loaders[] = $loader;
* Load a Template by name.
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
* @param string $name
* @return string Mustache Template source
public function load($name)
foreach ($this->loaders as $loader) {
try {
return $loader->load($name);
} catch (Mustache_Exception_UnknownTemplateException $e) {
// do nothing, check the next loader.
throw new Mustache_Exception_UnknownTemplateException($name);

@ -0,0 +1,135 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Template filesystem Loader implementation.
* A FilesystemLoader instance loads Mustache Template source from the filesystem by name:
* $loader = new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views');
* $tpl = $loader->load('foo'); // equivalent to `file_get_contents(dirname(__FILE__).'/views/foo.mustache');
* This is probably the most useful Mustache Loader implementation. It can be used for partials and normal Templates:
* $m = new Mustache(array(
* 'loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views'),
* 'partials_loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views/partials'),
* ));
class Mustache_Loader_FilesystemLoader implements Mustache_Loader
private $baseDir;
private $extension = '.mustache';
private $templates = array();
* Mustache filesystem Loader constructor.
* Passing an $options array allows overriding certain Loader options during instantiation:
* $options = array(
* // The filename extension used for Mustache templates. Defaults to '.mustache'
* 'extension' => '.ms',
* );
* @throws Mustache_Exception_RuntimeException if $baseDir does not exist
* @param string $baseDir Base directory containing Mustache template files
* @param array $options Array of Loader options (default: array())
public function __construct($baseDir, array $options = array())
$this->baseDir = $baseDir;
if (strpos($this->baseDir, '://') === false) {
$this->baseDir = realpath($this->baseDir);
if ($this->shouldCheckPath() && !is_dir($this->baseDir)) {
throw new Mustache_Exception_RuntimeException(sprintf('FilesystemLoader baseDir must be a directory: %s', $baseDir));
if (array_key_exists('extension', $options)) {
if (empty($options['extension'])) {
$this->extension = '';
} else {
$this->extension = '.' . ltrim($options['extension'], '.');
* Load a Template by name.
* $loader = new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views');
* $loader->load('admin/dashboard'); // loads "./views/admin/dashboard.mustache";
* @param string $name
* @return string Mustache Template source
public function load($name)
if (!isset($this->templates[$name])) {
$this->templates[$name] = $this->loadFile($name);
return $this->templates[$name];
* Helper function for loading a Mustache file by name.
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
* @param string $name
* @return string Mustache Template source
protected function loadFile($name)
$fileName = $this->getFileName($name);
if ($this->shouldCheckPath() && !file_exists($fileName)) {
throw new Mustache_Exception_UnknownTemplateException($name);
return file_get_contents($fileName);
* Helper function for getting a Mustache template file name.
* @param string $name
* @return string Template file name
protected function getFileName($name)
$fileName = $this->baseDir . '/' . $name;
if (substr($fileName, 0 - strlen($this->extension)) !== $this->extension) {
$fileName .= $this->extension;
return $fileName;
* Only check if baseDir is a directory and requested templates are files if
* baseDir is using the filesystem stream wrapper.
* @return bool Whether to check `is_dir` and `file_exists`
protected function shouldCheckPath()
return strpos($this->baseDir, '://') === false || strpos($this->baseDir, 'file://') === 0;

@ -0,0 +1,123 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* A Mustache Template loader for inline templates.
* With the InlineLoader, templates can be defined at the end of any PHP source
* file:
* $loader = new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__);
* $hello = $loader->load('hello');
* $goodbye = $loader->load('goodbye');
* __halt_compiler();
* @@ hello
* Hello, {{ planet }}!
* @@ goodbye
* Goodbye, cruel {{ planet }}
* Templates are deliniated by lines containing only `@@ name`.
* The InlineLoader is well-suited to micro-frameworks such as Silex:
* $app->register(new MustacheServiceProvider, array(
* 'mustache.loader' => new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__)
* ));
* $app->get('/{name}', function ($name) use ($app) {
* return $app['mustache']->render('hello', compact('name'));
* })
* ->value('name', 'world');
* // ...
* __halt_compiler();
* @@ hello
* Hello, {{ name }}!
class Mustache_Loader_InlineLoader implements Mustache_Loader
protected $fileName;
protected $offset;
protected $templates;
* The InlineLoader requires a filename and offset to process templates.
* The magic constants `__FILE__` and `__COMPILER_HALT_OFFSET__` are usually
* perfectly suited to the job:
* $loader = new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__);
* Note that this only works if the loader is instantiated inside the same
* file as the inline templates. If the templates are located in another
* file, it would be necessary to manually specify the filename and offset.
* @param string $fileName The file to parse for inline templates
* @param int $offset A string offset for the start of the templates.
* This usually coincides with the `__halt_compiler`
* call, and the `__COMPILER_HALT_OFFSET__`
public function __construct($fileName, $offset)
if (!is_file($fileName)) {
throw new Mustache_Exception_InvalidArgumentException('InlineLoader expects a valid filename.');
if (!is_int($offset) || $offset < 0) {
throw new Mustache_Exception_InvalidArgumentException('InlineLoader expects a valid file offset.');
$this->fileName = $fileName;
$this->offset = $offset;
* Load a Template by name.
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
* @param string $name
* @return string Mustache Template source
public function load($name)
if (!array_key_exists($name, $this->templates)) {
throw new Mustache_Exception_UnknownTemplateException($name);
return $this->templates[$name];
* Parse and load templates from the end of a source file.
protected function loadTemplates()
if ($this->templates === null) {
$this->templates = array();
$data = file_get_contents($this->fileName, false, null, $this->offset);
foreach (preg_split("/^@@(?= [\w\d\.]+$)/m", $data, -1) as $chunk) {
if (trim($chunk)) {
list($name, $content) = explode("\n", $chunk, 2);
$this->templates[trim($name)] = trim($content);

@ -0,0 +1,31 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Template mutable Loader interface.
interface Mustache_Loader_MutableLoader
* Set an associative array of Template sources for this loader.
* @param array $templates
public function setTemplates(array $templates);
* Set a Template source by name.
* @param string $name
* @param string $template Mustache Template source
public function setTemplate($name, $template);

@ -0,0 +1,86 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Template production filesystem Loader implementation.
* A production-ready FilesystemLoader, which doesn't require reading a file if it already exists in the template cache.
* {@inheritdoc}
class Mustache_Loader_ProductionFilesystemLoader extends Mustache_Loader_FilesystemLoader
private $statProps;
* Mustache production filesystem Loader constructor.
* Passing an $options array allows overriding certain Loader options during instantiation:
* $options = array(
* // The filename extension used for Mustache templates. Defaults to '.mustache'
* 'extension' => '.ms',
* 'stat_props' => array('size', 'mtime'),
* );
* Specifying 'stat_props' overrides the stat properties used to invalidate the template cache. By default, this
* uses 'mtime' and 'size', but this can be set to any of the properties supported by stat():
* You can also disable filesystem stat entirely:
* $options = array('stat_props' => null);
* But with great power comes great responsibility. Namely, if you disable stat-based cache invalidation,
* YOU MUST CLEAR THE TEMPLATE CACHE YOURSELF when your templates change. Make it part of your build or deploy
* process so you don't forget!
* @throws Mustache_Exception_RuntimeException if $baseDir does not exist.
* @param string $baseDir Base directory containing Mustache template files.
* @param array $options Array of Loader options (default: array())
public function __construct($baseDir, array $options = array())
parent::__construct($baseDir, $options);
if (array_key_exists('stat_props', $options)) {
if (empty($options['stat_props'])) {
$this->statProps = array();
} else {
$this->statProps = $options['stat_props'];
} else {
$this->statProps = array('size', 'mtime');
* Helper function for loading a Mustache file by name.
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found.
* @param string $name
* @return Mustache_Source Mustache Template source
protected function loadFile($name)
$fileName = $this->getFileName($name);
if (!file_exists($fileName)) {
throw new Mustache_Exception_UnknownTemplateException($name);
View file

@ -0,0 +1,39 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Template string Loader implementation.
* A StringLoader instance is essentially a noop. It simply passes the 'name' argument straight through:
* $loader = new StringLoader;
* $tpl = $loader->load('{{ foo }}'); // '{{ foo }}'
* This is the default Template Loader instance used by Mustache:
* $m = new Mustache;
* $tpl = $m->loadTemplate('{{ foo }}');
* echo $tpl->render(array('foo' => 'bar')); // "bar"
class Mustache_Loader_StringLoader implements Mustache_Loader
* Load a Template by source.
* @param string $name Mustache Template source
* @return string Mustache Template source
public function load($name)
return $name;

View file

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Describes a Mustache logger instance.
* This is identical to the Psr\Log\LoggerInterface.
* The message MUST be a string or object implementing __toString().
* The message MAY contain placeholders in the form: {foo} where foo
* will be replaced by the context data in key "foo".
* The context array can contain arbitrary data, the only assumption that
* can be made by implementors is that if an Exception instance is given
* to produce a stack trace, it MUST be in a key named "exception".
* See
* for the full interface specification.
interface Mustache_Logger
* Psr\Log compatible log levels.
const EMERGENCY = 'emergency';
const ALERT = 'alert';
const CRITICAL = 'critical';
const ERROR = 'error';
const WARNING = 'warning';
const NOTICE = 'notice';
const INFO = 'info';
const DEBUG = 'debug';
* System is unusable.
* @param string $message
* @param array $context
public function emergency($message, array $context = array());
* Action must be taken immediately.
* Example: Entire website down, database unavailable, etc. This should
* trigger the SMS alerts and wake you up.
* @param string $message
* @param array $context
public function alert($message, array $context = array());
* Critical conditions.
* Example: Application component unavailable, unexpected exception.
* @param string $message
* @param array $context
public function critical($message, array $context = array());
* Runtime errors that do not require immediate action but should typically
* be logged and monitored.
* @param string $message
* @param array $context
public function error($message, array $context = array());
* Exceptional occurrences that are not errors.
* Example: Use of deprecated APIs, poor use of an API, undesirable things
* that are not necessarily wrong.
* @param string $message
* @param array $context
public function warning($message, array $context = array());
* Normal but significant events.
* @param string $message
* @param array $context
public function notice($message, array $context = array());
* Interesting events.
* Example: User logs in, SQL logs.
* @param string $message
* @param array $context
public function info($message, array $context = array());
* Detailed debug information.
* @param string $message
* @param array $context
public function debug($message, array $context = array());
* Logs with an arbitrary level.
* @param mixed $level
* @param string $message
* @param array $context
public function log($level, $message, array $context = array());

@ -0,0 +1,121 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* This is a simple Logger implementation that other Loggers can inherit from.
* This is identical to the Psr\Log\AbstractLogger.
* It simply delegates all log-level-specific methods to the `log` method to
* reduce boilerplate code that a simple Logger that does the same thing with
* messages regardless of the error level has to implement.
abstract class Mustache_Logger_AbstractLogger implements Mustache_Logger
* System is unusable.
* @param string $message
* @param array $context
public function emergency($message, array $context = array())
$this->log(Mustache_Logger::EMERGENCY, $message, $context);
* Action must be taken immediately.
* Example: Entire website down, database unavailable, etc. This should
* trigger the SMS alerts and wake you up.
* @param string $message
* @param array $context
public function alert($message, array $context = array())
$this->log(Mustache_Logger::ALERT, $message, $context);
* Critical conditions.
* Example: Application component unavailable, unexpected exception.
* @param string $message
* @param array $context
public function critical($message, array $context = array())
$this->log(Mustache_Logger::CRITICAL, $message, $context);
* Runtime errors that do not require immediate action but should typically
* be logged and monitored.
* @param string $message
* @param array $context
public function error($message, array $context = array())
$this->log(Mustache_Logger::ERROR, $message, $context);
* Exceptional occurrences that are not errors.
* Example: Use of deprecated APIs, poor use of an API, undesirable things
* that are not necessarily wrong.
* @param string $message
* @param array $context
public function warning($message, array $context = array())
$this->log(Mustache_Logger::WARNING, $message, $context);
* Normal but significant events.
* @param string $message
* @param array $context
public function notice($message, array $context = array())
$this->log(Mustache_Logger::NOTICE, $message, $context);
* Interesting events.
* Example: User logs in, SQL logs.
* @param string $message
* @param array $context
public function info($message, array $context = array())
$this->log(Mustache_Logger::INFO, $message, $context);
* Detailed debug information.
* @param string $message
* @param array $context
public function debug($message, array $context = array())
$this->log(Mustache_Logger::DEBUG, $message, $context);

@ -0,0 +1,194 @@
* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* A Mustache Stream Logger.
* The Stream Logger wraps a file resource instance (such as a stream) or a
* stream URL. All log messages over the threshold level will be appended to
* this stream.
* Hint: Try `php://stderr` for your stream URL.
class Mustache_Logger_StreamLogger extends Mustache_Logger_AbstractLogger
protected static $levels = array(
self::DEBUG => 100,
self::INFO => 200,
self::NOTICE => 250,
self::WARNING => 300,
self::ERROR => 400,
self::CRITICAL => 500,
self::ALERT => 550,
self::EMERGENCY => 600,
protected $level;
protected $stream = null;
protected $url = null;
* @throws InvalidArgumentException if the logging level is unknown
* @param resource|string $stream Resource instance or URL
* @param int $level The minimum logging level at which this handler will be triggered
public function __construct($stream, $level = Mustache_Logger::ERROR)
if (is_resource($stream)) {
$this->stream = $stream;
} else {
$this->url = $stream;
* Close stream resources.
public function __destruct()
if (is_resource($this->stream)) {
* Set the minimum logging level.
* @throws Mustache_Exception_InvalidArgumentException if the logging level is unknown
* @param int $level The minimum logging level which will be written
public function setLevel($level)
if (!array_key_exists($level, self::$levels)) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unexpected logging level: %s', $level));
$this->level = $level;
* Get the current minimum logging level.
* @return int
public function getLevel()
return $this->level;
* Logs with an arbitrary level.
* @throws Mustache_Exception_InvalidArgumentException if the logging level is unknown
* @param mixed $level
* @param string $message
* @param array $context
public function log($level, $message, array $context = array())
if (!array_key_exists($level, self::$levels)) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unexpected logging level: %s', $level));
if (self::$levels[$level] >= self::$levels[$this->level]) {
$this->writeLog($level, $message, $context);
* Write a record to the log.
* @throws Mustache_Exception_LogicException If neither a stream resource nor url is present
* @throws Mustache_Exception_RuntimeException If the stream url cannot be opened
* @param int $level The logging level
* @param string $message The log message
* @param array $context The log context
protected function writeLog($level, $message, array $context = array())
if (!is_resource($this->stream)) {
if (!isset($this->url)) {
throw new Mustache_Exception_LogicException('Missing stream url, the stream can not be opened. This may be caused by a premature call to close().');
$this->stream = fopen($this->url, 'a');
if (!is_resource($this->stream)) {
// @codeCoverageIgnoreStart
throw new Mustache_Exception_RuntimeException(sprintf('The stream or file "%s" could not be opened.', $this->url));
// @codeCoverageIgnoreEnd
fwrite($this->stream, self::formatLine($level, $message, $context));
* Gets the name of the logging level.
* @throws InvalidArgumentException if the logging level is unknown
* @param int $level
* @return string
protected static function getLevelName($level)
return strtoupper($level);
* Format a log line for output.
* @param int $level The logging level
* @param string $message The log message
* @param array $context The log context
* @return string
protected static function formatLine($level, $message, array $context = array())
return sprintf(
"%s: %s\n",
self::interpolateMessage($message, $context)
* Interpolate context values into the message placeholders.
* @param string $message
* @param array $context
* @return string
protected static function interpolateMessage($message, array $context = array())
if (strpos($message, '{') === false) {
return $message;
// build a replacement array with braces around the context keys
$replace = array();
foreach ($context as $key => $val) {
$replace['{' . $key . '}'] = $val;
// interpolate replacement values into the the message and return
return strtr($message, $replace);

View file

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Parser class.
* This class is responsible for turning a set of Mustache tokens into a parse tree.
class Mustache_Parser
private $lineNum;
private $lineTokens;
private $pragmas;
private $defaultPragmas = array();
private $pragmaFilters;
private $pragmaBlocks;
private $pragmaDynamicNames;
* Process an array of Mustache tokens and convert them into a parse tree.
* @param array $tokens Set of Mustache tokens
* @return array Mustache token parse tree
public function parse(array $tokens = array())
$this->lineNum = -1;
$this->lineTokens = 0;
$this->pragmas = $this->defaultPragmas;
$this->pragmaFilters = isset($this->pragmas[Mustache_Engine::PRAGMA_FILTERS]);
$this->pragmaBlocks = isset($this->pragmas[Mustache_Engine::PRAGMA_BLOCKS]);
$this->pragmaDynamicNames = isset($this->pragmas[Mustache_Engine::PRAGMA_DYNAMIC_NAMES]);
return $this->buildTree($tokens);
* Enable pragmas across all templates, regardless of the presence of pragma
* tags in the individual templates.
* @internal Users should set global pragmas in Mustache_Engine, not here :)
* @param string[] $pragmas
public function setPragmas(array $pragmas)
$this->pragmas = array();
foreach ($pragmas as $pragma) {
$this->defaultPragmas = $this->pragmas;
* Helper method for recursively building a parse tree.
* @throws Mustache_Exception_SyntaxException when nesting errors or mismatched section tags are encountered
* @param array &$tokens Set of Mustache tokens
* @param array $parent Parent token (default: null)
* @return array Mustache Token parse tree
private function buildTree(array &$tokens, array $parent = null)
$nodes = array();
while (!empty($tokens)) {
$token = array_shift($tokens);
if ($token[Mustache_Tokenizer::LINE] === $this->lineNum) {
} else {
$this->lineNum = $token[Mustache_Tokenizer::LINE];
$this->lineTokens = 0;
if ($token[Mustache_Tokenizer::TYPE] !== Mustache_Tokenizer::T_COMMENT) {
if ($this->pragmaDynamicNames && isset($token[Mustache_Tokenizer::NAME])) {
list($name, $isDynamic) = $this->getDynamicName($token);
if ($isDynamic) {
$token[Mustache_Tokenizer::NAME] = $name;
$token[Mustache_Tokenizer::DYNAMIC] = true;
if ($this->pragmaFilters && isset($token[Mustache_Tokenizer::NAME])) {
list($name, $filters) = $this->getNameAndFilters($token[Mustache_Tokenizer::NAME]);
if (!empty($filters)) {
$token[Mustache_Tokenizer::NAME] = $name;
$token[Mustache_Tokenizer::FILTERS] = $filters;
switch ($token[Mustache_Tokenizer::TYPE]) {
case Mustache_Tokenizer::T_DELIM_CHANGE:
$this->checkIfTokenIsAllowedInParent($parent, $token);
$this->clearStandaloneLines($nodes, $tokens);
case Mustache_Tokenizer::T_SECTION:
case Mustache_Tokenizer::T_INVERTED:
$this->checkIfTokenIsAllowedInParent($parent, $token);
$this->clearStandaloneLines($nodes, $tokens);
$nodes[] = $this->buildTree($tokens, $token);
case Mustache_Tokenizer::T_END_SECTION:
if (!isset($parent)) {
$msg = sprintf(
'Unexpected closing tag: /%s on line %d',
throw new Mustache_Exception_SyntaxException($msg, $token);
$sameName = $token[Mustache_Tokenizer::NAME] !== $parent[Mustache_Tokenizer::NAME];
$tokenDynamic = isset($token[Mustache_Tokenizer::DYNAMIC]) && $token[Mustache_Tokenizer::DYNAMIC];
$parentDynamic = isset($parent[Mustache_Tokenizer::DYNAMIC]) && $parent[Mustache_Tokenizer::DYNAMIC];
if ($sameName || ($tokenDynamic !== $parentDynamic)) {
$msg = sprintf(
'Nesting error: %s (on line %d) vs. %s (on line %d)',
throw new Mustache_Exception_SyntaxException($msg, $token);
$this->clearStandaloneLines($nodes, $tokens);
$parent[Mustache_Tokenizer::END] = $token[Mustache_Tokenizer::INDEX];
$parent[Mustache_Tokenizer::NODES] = $nodes;
return $parent;
case Mustache_Tokenizer::T_PARTIAL:
$this->checkIfTokenIsAllowedInParent($parent, $token);
//store the whitespace prefix for laters!
if ($indent = $this->clearStandaloneLines($nodes, $tokens)) {
$token[Mustache_Tokenizer::INDENT] = $indent[Mustache_Tokenizer::VALUE];
$nodes[] = $token;
case Mustache_Tokenizer::T_PARENT:
$this->checkIfTokenIsAllowedInParent($parent, $token);
$nodes[] = $this->buildTree($tokens, $token);
case Mustache_Tokenizer::T_BLOCK_VAR:
if ($this->pragmaBlocks) {
// BLOCKS pragma is enabled, let's do this!
if (isset($parent) && $parent[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_PARENT) {
$token[Mustache_Tokenizer::TYPE] = Mustache_Tokenizer::T_BLOCK_ARG;
$this->clearStandaloneLines($nodes, $tokens);
$nodes[] = $this->buildTree($tokens, $token);
} else {
// pretend this was just a normal "escaped" token...
$token[Mustache_Tokenizer::TYPE] = Mustache_Tokenizer::T_ESCAPED;
// TODO: figure out how to figure out if there was a space after this dollar:
$token[Mustache_Tokenizer::NAME] = '$' . $token[Mustache_Tokenizer::NAME];
$nodes[] = $token;
case Mustache_Tokenizer::T_PRAGMA:
// no break
case Mustache_Tokenizer::T_COMMENT:
$this->clearStandaloneLines($nodes, $tokens);
$nodes[] = $token;
$nodes[] = $token;
if (isset($parent)) {
$msg = sprintf(
'Missing closing tag: %s opened on line %d',
throw new Mustache_Exception_SyntaxException($msg, $parent);
return $nodes;
* Clear standalone line tokens.
* Returns a whitespace token for indenting partials, if applicable.
* @param array $nodes Parsed nodes
* @param array $tokens Tokens to be parsed
* @return array|null Resulting indent token, if any
private function clearStandaloneLines(array &$nodes, array &$tokens)
if ($this->lineTokens > 1) {
// this is the third or later node on this line, so it can't be standalone
$prev = null;
if ($this->lineTokens === 1) {
// this is the second node on this line, so it can't be standalone
// unless the previous node is whitespace.
if ($prev = end($nodes)) {
if (!$this->tokenIsWhitespace($prev)) {
if ($next = reset($tokens)) {
// If we're on a new line, bail.
if ($next[Mustache_Tokenizer::LINE] !== $this->lineNum) {
// If the next token isn't whitespace, bail.
if (!$this->tokenIsWhitespace($next)) {
if (count($tokens) !== 1) {
// Unless it's the last token in the template, the next token
// must end in newline for this to be standalone.
if (substr($next[Mustache_Tokenizer::VALUE], -1) !== "\n") {
// Discard the whitespace suffix
if ($prev) {
// Return the whitespace prefix, if any
return array_pop($nodes);
* Check whether token is a whitespace token.
* True if token type is T_TEXT and value is all whitespace characters.
* @param array $token
* @return bool True if token is a whitespace token
private function tokenIsWhitespace(array $token)
if ($token[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_TEXT) {
return preg_match('/^\s*$/', $token[Mustache_Tokenizer::VALUE]);
return false;
* Check whether a token is allowed inside a parent tag.
* @throws Mustache_Exception_SyntaxException if an invalid token is found inside a parent tag
* @param array|null $parent
* @param array $token
private function checkIfTokenIsAllowedInParent($parent, array $token)
if (isset($parent) && $parent[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_PARENT) {
throw new Mustache_Exception_SyntaxException('Illegal content in < parent tag', $token);
* Parse dynamic names.
* @throws Mustache_Exception_SyntaxException when a tag does not allow *
* @throws Mustache_Exception_SyntaxException on multiple *s, or dots or filters with *
private function getDynamicName(array $token)
$name = $token[Mustache_Tokenizer::NAME];
$isDynamic = false;
if (preg_match('/^\s*\*\s*/', $name)) {
$name = preg_replace('/^\s*\*\s*/', '', $name);
$isDynamic = true;
return array($name, $isDynamic);
* Check whether the given token supports dynamic tag names.
* @throws Mustache_Exception_SyntaxException when a tag does not allow *
* @param array $token
private function ensureTagAllowsDynamicNames(array $token)
switch ($token[Mustache_Tokenizer::TYPE]) {
case Mustache_Tokenizer::T_PARTIAL:
case Mustache_Tokenizer::T_PARENT:
case Mustache_Tokenizer::T_END_SECTION:
$msg = sprintf(
'Invalid dynamic name: %s in %s tag',
throw new Mustache_Exception_SyntaxException($msg, $token);
* Split a tag name into name and filters.
* @param string $name
* @return array [Tag name, Array of filters]
private function getNameAndFilters($name)
$filters = array_map('trim', explode('|', $name));
$name = array_shift($filters);
return array($name, $filters);
* Enable a pragma.
* @param string $name
private function enablePragma($name)
$this->pragmas[$name] = true;
switch ($name) {
case Mustache_Engine::PRAGMA_BLOCKS:
$this->pragmaBlocks = true;
case Mustache_Engine::PRAGMA_FILTERS:
$this->pragmaFilters = true;
case Mustache_Engine::PRAGMA_DYNAMIC_NAMES:
$this->pragmaDynamicNames = true;

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache template Source interface.
interface Mustache_Source
* Get the Source key (used to generate the compiled class name).
* This must return a distinct key for each template source. For example, an
* MD5 hash of the template contents would probably do the trick. The
* ProductionFilesystemLoader uses mtime and file path. If your production
* source directory is under version control, you could use the current Git
* rev and the file path...
* @throws RuntimeException when a source file cannot be read
* @return string
public function getKey();
* Get the template Source.
* @throws RuntimeException when a source file cannot be read
* @return string
public function getSource();

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache template Filesystem Source.
* This template Source uses stat() to generate the Source key, so that using
* pre-compiled templates doesn't require hitting the disk to read the source.
* It is more suitable for production use, and is used by default in the
* ProductionFilesystemLoader.
class Mustache_Source_FilesystemSource implements Mustache_Source
private $fileName;
private $statProps;
private $stat;
* Filesystem Source constructor.
* @param string $fileName
* @param array $statProps
public function __construct($fileName, array $statProps)
$this->fileName = $fileName;
$this->statProps = $statProps;
* Get the Source key (used to generate the compiled class name).
* @throws Mustache_Exception_RuntimeException when a source file cannot be read
* @return string
public function getKey()
$chunks = array(
'fileName' => $this->fileName,
if (!empty($this->statProps)) {
if (!isset($this->stat)) {
$this->stat = @stat($this->fileName);
if ($this->stat === false) {
throw new Mustache_Exception_RuntimeException(sprintf('Failed to read source file "%s".', $this->fileName));
foreach ($this->statProps as $prop) {
$chunks[$prop] = $this->stat[$prop];
return json_encode($chunks);
* Get the template Source.
* @return string
public function getSource()
return file_get_contents($this->fileName);

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Abstract Mustache Template class.
* @abstract
abstract class Mustache_Template
* @var Mustache_Engine
protected $mustache;
* @var bool
protected $strictCallables = false;
* Mustache Template constructor.
* @param Mustache_Engine $mustache
public function __construct(Mustache_Engine $mustache)
$this->mustache = $mustache;
* Mustache Template instances can be treated as a function and rendered by simply calling them.
* $m = new Mustache_Engine;
* $tpl = $m->loadTemplate('Hello, {{ name }}!');
* echo $tpl(array('name' => 'World')); // "Hello, World!"
* @see Mustache_Template::render
* @param mixed $context Array or object rendering context (default: array())
* @return string Rendered template
public function __invoke($context = array())
return $this->render($context);
* Render this template given the rendering context.
* @param mixed $context Array or object rendering context (default: array())
* @return string Rendered template
public function render($context = array())
return $this->renderInternal(
* Internal rendering method implemented by Mustache Template concrete subclasses.
* This is where the magic happens :)
* NOTE: This method is not part of the Mustache.php public API.
* @param Mustache_Context $context
* @param string $indent (default: '')
* @return string Rendered template
abstract public function renderInternal(Mustache_Context $context, $indent = '');
* Tests whether a value should be iterated over (e.g. in a section context).
* In most languages there are two distinct array types: list and hash (or whatever you want to call them). Lists
* should be iterated, hashes should be treated as objects. Mustache follows this paradigm for Ruby, Javascript,
* Java, Python, etc.
* PHP, however, treats lists and hashes as one primitive type: array. So Mustache.php needs a way to distinguish
* between between a list of things (numeric, normalized array) and a set of variables to be used as section context
* (associative array). In other words, this will be iterated over:
* $items = array(
* array('name' => 'foo'),
* array('name' => 'bar'),
* array('name' => 'baz'),
* );
* ... but this will be used as a section context block:
* $items = array(
* 1 => array('name' => 'foo'),
* 'banana' => array('name' => 'bar'),
* 42 => array('name' => 'baz'),
* );
* @param mixed $value
* @return bool True if the value is 'iterable'
protected function isIterable($value)
switch (gettype($value)) {
case 'object':
return $value instanceof Traversable;
case 'array':
$i = 0;
foreach ($value as $k => $v) {
if ($k !== $i++) {
return false;
return true;
return false;
* Helper method to prepare the Context stack.
* Adds the Mustache HelperCollection to the stack's top context frame if helpers are present.
* @param mixed $context Optional first context frame (default: null)
* @return Mustache_Context
protected function prepareContextStack($context = null)
$stack = new Mustache_Context();
$helpers = $this->mustache->getHelpers();
if (!$helpers->isEmpty()) {
if (!empty($context)) {
return $stack;
* Resolve a context value.
* Invoke the value if it is callable, otherwise return the value.
* @param mixed $value
* @param Mustache_Context $context
* @return string
protected function resolveValue($value, Mustache_Context $context)
if (($this->strictCallables ? is_object($value) : !is_string($value)) && is_callable($value)) {
return $this->mustache
->loadLambda((string) call_user_func($value))
return $value;

* This file is part of Mustache.php.
* (c) 2010-2017 Justin Hileman
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
* Mustache Tokenizer class.
* This class is responsible for turning raw template source into a set of Mustache tokens.
class Mustache_Tokenizer
// Finite state machine states
const IN_TEXT = 0;
const IN_TAG_TYPE = 1;
const IN_TAG = 2;
// Token types
const T_SECTION = '#';
const T_INVERTED = '^';
const T_END_SECTION = '/';
const T_COMMENT = '!';
const T_PARTIAL = '>';
const T_PARENT = '<';
const T_DELIM_CHANGE = '=';
const T_ESCAPED = '_v';
const T_UNESCAPED = '{';
const T_UNESCAPED_2 = '&';
const T_TEXT = '_t';
const T_PRAGMA = '%';
const T_BLOCK_VAR = '$';
const T_BLOCK_ARG = '$arg';
// Valid token types
private static $tagTypes = array(
self::T_SECTION => true,
self::T_INVERTED => true,
self::T_END_SECTION => true,
self::T_COMMENT => true,
self::T_PARTIAL => true,
self::T_PARENT => true,
self::T_DELIM_CHANGE => true,
self::T_ESCAPED => true,
self::T_UNESCAPED => true,
self::T_UNESCAPED_2 => true,
self::T_PRAGMA => true,
self::T_BLOCK_VAR => true,
private static $tagNames = array(
self::T_SECTION => 'section',
self::T_INVERTED => 'inverted section',
self::T_END_SECTION => 'section end',
self::T_COMMENT => 'comment',
self::T_PARTIAL => 'partial',
self::T_PARENT => 'parent',
self::T_DELIM_CHANGE => 'set delimiter',
self::T_ESCAPED => 'variable',
self::T_UNESCAPED => 'unescaped variable',
self::T_UNESCAPED_2 => 'unescaped variable',
self::T_PRAGMA => 'pragma',
self::T_BLOCK_VAR => 'block variable',
self::T_BLOCK_ARG => 'block variable',
// Token properties
const TYPE = 'type';
const NAME = 'name';
const DYNAMIC = 'dynamic';
const OTAG = 'otag';
const CTAG = 'ctag';
const LINE = 'line';
const INDEX = 'index';
const END = 'end';
const INDENT = 'indent';
const NODES = 'nodes';
const VALUE = 'value';
const FILTERS = 'filters';
private $state;
private $tagType;
private $buffer;
private $tokens;
private $seenTag;
private $line;
private $otag;
private $otagChar;
private $otagLen;
private $ctag;
private $ctagChar;
private $ctagLen;
* Scan and tokenize template source.
* @throws Mustache_Exception_SyntaxException when mismatched section tags are encountered
* @throws Mustache_Exception_InvalidArgumentException when $delimiters string is invalid
* @param string $text Mustache template source to tokenize
* @param string $delimiters Optionally, pass initial opening and closing delimiters (default: empty string)
* @return array Set of Mustache tokens
public function scan($text, $delimiters = '')
// Setting mbstring.func_overload makes things *really* slow.
// Let's do everyone a favor and scan this string as ASCII instead.
// The INI directive was removed in PHP 8.0 so we don't need to check there (and can drop it
// when we remove support for older versions of PHP).
// @codeCoverageIgnoreStart
$encoding = null;
if (version_compare(PHP_VERSION, '8.0.0', '<')) {
if (function_exists('mb_internal_encoding') && ini_get('mbstring.func_overload') & 2) {
$encoding = mb_internal_encoding();
// @codeCoverageIgnoreEnd
if (is_string($delimiters) && $delimiters = trim($delimiters)) {
$len = strlen($text);
for ($i = 0; $i < $len; $i++) {
switch ($this->state) {
case self::IN_TEXT:
$char = $text[$i];
// Test whether it's time to change tags.
if ($char === $this->otagChar && substr($text, $i, $this->otagLen) === $this->otag) {
$this->state = self::IN_TAG_TYPE;
} else {
$this->buffer .= $char;
if ($char === "\n") {
case self::IN_TAG_TYPE:
$i += $this->otagLen - 1;
$char = $text[$i + 1];
if (isset(self::$tagTypes[$char])) {
$tag = $char;
$this->tagType = $tag;
} else {
$tag = null;
$this->tagType = self::T_ESCAPED;
if ($this->tagType === self::T_DELIM_CHANGE) {
$i = $this->changeDelimiters($text, $i);
$this->state = self::IN_TEXT;
} elseif ($this->tagType === self::T_PRAGMA) {
$i = $this->addPragma($text, $i);
$this->state = self::IN_TEXT;
} else {
if ($tag !== null) {
$this->state = self::IN_TAG;
$this->seenTag = $i;
$char = $text[$i];
// Test whether it's time to change tags.
if ($char === $this->ctagChar && substr($text, $i, $this->ctagLen) === $this->ctag) {
$token = array(
self::TYPE => $this->tagType,
self::NAME => trim($this->buffer),
self::OTAG => $this->otag,
self::CTAG => $this->ctag,
self::LINE => $this->line,
self::INDEX => ($this->tagType === self::T_END_SECTION) ? $this->seenTag - $this->otagLen : $i + $this->ctagLen,
if ($this->tagType === self::T_UNESCAPED) {
// Clean up `{{{ tripleStache }}}` style tokens.
if ($this->ctag === '}}') {
if (($i + 2 < $len) && $text[$i + 2] === '}') {
} else {
$msg = sprintf(
'Mismatched tag delimiters: %s on line %d',
throw new Mustache_Exception_SyntaxException($msg, $token);
} else {
$lastName = $token[self::NAME];
if (substr($lastName, -1) === '}') {
$token[self::NAME] = trim(substr($lastName, 0, -1));
} else {
$msg = sprintf(
'Mismatched tag delimiters: %s on line %d',
throw new Mustache_Exception_SyntaxException($msg, $token);
$this->buffer = '';
$i += $this->ctagLen - 1;
$this->state = self::IN_TEXT;
$this->tokens[] = $token;
} else {
$this->buffer .= $char;
if ($this->state !== self::IN_TEXT) {
// Restore the user's encoding...
// @codeCoverageIgnoreStart
if ($encoding) {
// @codeCoverageIgnoreEnd
return $this->tokens;
* Helper function to reset tokenizer internal state.
private function reset()
$this->state = self::IN_TEXT;
$this->tagType = null;
$this->buffer = '';
$this->tokens = array();
$this->seenTag = false;
$this->line = 0;
$this->otag = '{{';
$this->otagChar = '{';
$this->otagLen = 2;
$this->ctag = '}}';
$this->ctagChar = '}';
$this->ctagLen = 2;
* Flush the current buffer to a token.
private function flushBuffer()
if (strlen($this->buffer) > 0) {
$this->tokens[] = array(
self::TYPE => self::T_TEXT,
self::LINE => $this->line,
self::VALUE => $this->buffer,
$this->buffer = '';
* Change the current Mustache delimiters. Set new `otag` and `ctag` values.
* @throws Mustache_Exception_SyntaxException when delimiter string is invalid
* @param string $text Mustache template source
* @param int $index Current tokenizer index
* @return int New index value
private function changeDelimiters($text, $index)
$startIndex = strpos($text, '=', $index) + 1;
$close = '=' . $this->ctag;
$closeIndex = strpos($text, $close, $index);
if ($closeIndex === false) {
$token = array(
self::TYPE => self::T_DELIM_CHANGE,
self::LINE => $this->line,
try {
$this->setDelimiters(trim(substr($text, $startIndex, $closeIndex - $startIndex)));
} catch (Mustache_Exception_InvalidArgumentException $e) {
throw new Mustache_Exception_SyntaxException($e->getMessage(), $token);
$this->tokens[] = $token;
return $closeIndex + strlen($close) - 1;
* Set the current Mustache `otag` and `ctag` delimiters.
* @throws Mustache_Exception_InvalidArgumentException when delimiter string is invalid
* @param string $delimiters
private function setDelimiters($delimiters)
if (!preg_match('/^\s*(\S+)\s+(\S+)\s*$/', $delimiters, $matches)) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Invalid delimiters: %s', $delimiters));
list($_, $otag, $ctag) = $matches;
$this->otag = $otag;
$this->otagChar = $otag[0];
$this->otagLen = strlen($otag);
$this->ctag = $ctag;
$this->ctagChar = $ctag[0];
$this->ctagLen = strlen($ctag);
* Add pragma token.
* Pragmas are hoisted to the front of the template, so all pragma tokens
* will appear at the front of the token list.
* @param string $text
* @param int $index
* @return int New index value
private function addPragma($text, $index)
$end = strpos($text, $this->ctag, $index);
if ($end === false) {
$pragma = trim(substr($text, $index + 2, $end - $index - 2));
// Pragmas are hoisted to the front of the template.
array_unshift($this->tokens, array(
self::TYPE => self::T_PRAGMA,
self::NAME => $pragma,
self::LINE => 0,
return $end + $this->ctagLen - 1;
private function throwUnclosedTagException()
$name = trim($this->buffer);
if ($name !== '') {
$msg = sprintf('Unclosed tag: %s on line %d', $name, $this->line);
} else {
$msg = sprintf('Unclosed tag on line %d', $this->line);
throw new Mustache_Exception_SyntaxException($msg, array(
self::TYPE => $this->tagType,
self::NAME => $name,
self::OTAG => $this->otag,
self::CTAG => $this->ctag,
self::LINE => $this->line,
self::INDEX => $this->seenTag - $this->otagLen,
* Get the human readable name for a tag type.
* @param string $tagType One of the tokenizer T_* constants
* @return string
static function getTagName($tagType)
return isset(self::$tagNames[$tagType]) ? self::$tagNames[$tagType] : 'unknown';

@ -0,0 +1,40 @@
* PHPMailer Exception class.
* PHP Version 5.5.
* @see The PHPMailer GitHub project
* @author Marcus Bointon (Synchro/coolbru) <>
* @author Jim Jagielski (jimjag) <>
* @author Andy Prevost (codeworxtech) <>
* @author Brent R. Matzelle (original founder)
* @copyright 2012 - 2020 Marcus Bointon
* @copyright 2010 - 2012 Jim Jagielski
* @copyright 2004 - 2009 Andy Prevost
* @license GNU Lesser General Public License
* @note This program is distributed in the hope that it will be useful - WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
namespace PHPMailer\PHPMailer;
* PHPMailer exception handler.
* @author Marcus Bointon <>
class Exception extends \Exception
* Prettify error message output.
* @return string
public function errorMessage()
return '<strong>' . htmlspecialchars($this->getMessage(), ENT_COMPAT | ENT_HTML401) . "</strong><br />\n";

That's all there is to it!

os3/PdfParser/Config.php Normal file
View file

@ -0,0 +1,154 @@
* @file
* This file is part of the PdfParser library.
* @author Konrad Abicht <>
* @date 2020-11-22
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser;
* This class contains configurations used in various classes. You can override them
* manually, in case default values aren't working.
* @see
class Config
private $fontSpaceLimit = -50;
* @var string
private $horizontalOffset = ' ';
* Represents: (NUL, HT, LF, FF, CR, SP)
* @var string
private $pdfWhitespaces = "\0\t\n\f\r ";
* Represents: (NUL, HT, LF, FF, CR, SP)
* @var string
private $pdfWhitespacesRegex = '[\0\t\n\f\r ]';
* Whether to retain raw image data as content or discard it to save memory
* @var bool
private $retainImageContent = true;
* Memory limit to use when de-compressing files, in bytes.
* @var int
private $decodeMemoryLimit = 0;
* Whether to include font id and size in dataTm array
* @var bool
private $dataTmFontInfoHasToBeIncluded = false;
public function getFontSpaceLimit()
return $this->fontSpaceLimit;
public function setFontSpaceLimit($value)
$this->fontSpaceLimit = $value;
public function getHorizontalOffset(): string
return $this->horizontalOffset;
public function setHorizontalOffset($value): void
$this->horizontalOffset = $value;
public function getPdfWhitespaces(): string
return $this->pdfWhitespaces;
public function setPdfWhitespaces(string $pdfWhitespaces): void
$this->pdfWhitespaces = $pdfWhitespaces;
public function getPdfWhitespacesRegex(): string
return $this->pdfWhitespacesRegex;
public function setPdfWhitespacesRegex(string $pdfWhitespacesRegex): void
$this->pdfWhitespacesRegex = $pdfWhitespacesRegex;
public function getRetainImageContent(): bool
return $this->retainImageContent;
public function setRetainImageContent(bool $retainImageContent): void
$this->retainImageContent = $retainImageContent;
public function getDecodeMemoryLimit(): int
return $this->decodeMemoryLimit;
public function setDecodeMemoryLimit(int $decodeMemoryLimit): void
$this->decodeMemoryLimit = $decodeMemoryLimit;
public function getDataTmFontInfoHasToBeIncluded(): bool
return $this->dataTmFontInfoHasToBeIncluded;
public function setDataTmFontInfoHasToBeIncluded(bool $dataTmFontInfoHasToBeIncluded): void
$this->dataTmFontInfoHasToBeIncluded = $dataTmFontInfoHasToBeIncluded;

@ -0,0 +1,306 @@
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser;
* Technical references :
* -
* -
* -
* -
* -
* -
* -
* Class Document
class Document
* @var PDFObject[]
protected $objects = [];
* @var array
protected $dictionary = [];
* @var Header
protected $trailer = null;
* @var array
protected $details = null;
public function __construct()
$this->trailer = new Header([], $this);
public function init()
// Propagate init to objects.
foreach ($this->objects as $object) {
* Build dictionary based on type header field.
protected function buildDictionary()
// Build dictionary.
$this->dictionary = [];
foreach ($this->objects as $id => $object) {
// Cache objects by type and subtype
$type = $object->getHeader()->get('Type')->getContent();
if (null != $type) {
if (!isset($this->dictionary[$type])) {
$this->dictionary[$type] = [
'all' => [],
'subtype' => [],
$this->dictionary[$type]['all'][$id] = $object;
$subtype = $object->getHeader()->get('Subtype')->getContent();
if (null != $subtype) {
if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
$this->dictionary[$type]['subtype'][$subtype] = [];
$this->dictionary[$type]['subtype'][$subtype][$id] = $object;
* Build details array.
protected function buildDetails()
// Build details array.
$details = [];
// Extract document info
if ($this->trailer->has('Info')) {
/** @var PDFObject $info */
$info = $this->trailer->get('Info');
// This could be an ElementMissing object, so we need to check for
// the getHeader method first.
if (null !== $info && method_exists($info, 'getHeader')) {
$details = $info->getHeader()->getDetails();
// Retrieve the page count
try {
$pages = $this->getPages();
$details['Pages'] = \count($pages);
} catch (\Exception $e) {
$details['Pages'] = 0;
$this->details = $details;
public function getDictionary(): array
return $this->dictionary;
* @param PDFObject[] $objects
public function setObjects($objects = [])
$this->objects = (array) $objects;
* @return PDFObject[]
public function getObjects()
return $this->objects;
* @return PDFObject|Font|Page|Element|null
public function getObjectById(string $id)
if (isset($this->objects[$id])) {
return $this->objects[$id];
return null;
public function hasObjectsByType(string $type, ?string $subtype = null): bool
return 0 < \count($this->getObjectsByType($type, $subtype));
public function getObjectsByType(string $type, ?string $subtype = null): array
if (!isset($this->dictionary[$type])) {
return [];
if (null != $subtype) {
if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
return [];
return $this->dictionary[$type]['subtype'][$subtype];
return $this->dictionary[$type]['all'];
* @return Font[]
public function getFonts()
return $this->getObjectsByType('Font');
public function getFirstFont(): ?Font
$fonts = $this->getFonts();
if ([] === $fonts) {
return null;
return reset($fonts);
* @return Page[]
* @throws \Exception
public function getPages()
if ($this->hasObjectsByType('Catalog')) {
// Search for catalog to list pages.
$catalogues = $this->getObjectsByType('Catalog');
$catalogue = reset($catalogues);
/** @var Pages $object */
$object = $catalogue->get('Pages');
if (method_exists($object, 'getPages')) {
return $object->getPages(true);
if ($this->hasObjectsByType('Pages')) {
// Search for pages to list kids.
$pages = [];
/** @var Pages[] $objects */
$objects = $this->getObjectsByType('Pages');
foreach ($objects as $object) {
$pages = array_merge($pages, $object->getPages(true));
return $pages;
if ($this->hasObjectsByType('Page')) {
// Search for 'page' (unordered pages).
$pages = $this->getObjectsByType('Page');
return array_values($pages);
throw new \Exception('Missing catalog.');
public function getText(?int $pageLimit = null): string
$texts = [];
$pages = $this->getPages();
// Only use the first X number of pages if $pageLimit is set and numeric.
if (\is_int($pageLimit) && 0 < $pageLimit) {
$pages = \array_slice($pages, 0, $pageLimit);
foreach ($pages as $index => $page) {
* In some cases, the $page variable may be null.
if (null === $page) {
if ($text = trim($page->getText())) {
$texts[] = $text;
return implode("\n\n", $texts);
public function getTrailer(): Header
return $this->trailer;
public function setTrailer(Header $trailer)
$this->trailer = $trailer;
public function getDetails(): array
return $this->details;

os3/PdfParser/Element.php Normal file
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
* Class Element
class Element
* @var Document
protected $document = null;
protected $value = null;
public function __construct($value, ?Document $document = null)
$this->value = $value;
$this->document = $document;
public function init()
public function equals($value): bool
return $value == $this->value;
public function contains($value): bool
if (\is_array($this->value)) {
/** @var Element $val */
foreach ($this->value as $val) {
if ($val->equals($value)) {
return true;
return false;
return $this->equals($value);
public function getContent()
return $this->value;
public function __toString(): string
return (string) $this->value;
public static function parse(string $content, ?Document $document = null, int &$position = 0)
$args = \func_get_args();
$only_values = isset($args[3]) ? $args[3] : false;
$content = trim($content);
$values = [];
do {
$old_position = $position;
if (!$only_values) {
if (!preg_match('/^\s*(?P<name>\/[A-Z0-9\._]+)(?P<value>.*)/si', substr($content, $position), $match)) {
} else {
$name = ltrim($match['name'], '/');
$value = $match['value'];
$position = strpos($content, $value, $position + \strlen($match['name']));
} else {
$name = \count($values);
$value = substr($content, $position);
if ($element = ElementName::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementXRef::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNumeric::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementStruct::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementBoolean::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNull::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementDate::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementString::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementHexa::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementArray::parse($value, $document, $position)) {
$values[$name] = $element;
} else {
$position = $old_position;
} while ($position < \strlen($content));
return $values;

* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\PDFObject;
* Class ElementArray
class ElementArray extends Element
public function __construct($value, ?Document $document = null)
parent::__construct($value, $document);
public function getContent()
foreach ($this->value as $name => $element) {
return parent::getContent();
public function getRawContent(): array
return $this->value;
public function getDetails(bool $deep = true): array
$values = [];
$elements = $this->getContent();
foreach ($elements as $key => $element) {
if ($element instanceof Header && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof PDFObject && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof self) {
if ($deep) {
$values[$key] = $element->getDetails();
} elseif ($element instanceof Element && !($element instanceof self)) {
$values[$key] = $element->getContent();
return $values;
public function __toString(): string
return implode(',', $this->value);
* @return Element|PDFObject
protected function resolveXRef(string $name)
if (($obj = $this->value[$name]) instanceof ElementXRef) {
/** @var ElementXRef $obj */
$obj = $this->document->getObjectById($obj->getId());
$this->value[$name] = $obj;
return $this->value[$name];
* @todo: These methods return mixed and mismatched types throughout the hierarchy
* @return bool|ElementArray
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*\[(?P<array>.*)/is', $content, $match)) {
preg_match_all('/(.*?)(\[|\])/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (false !== strpos($part, '[') ? 1 : -1);
if ($level <= 0) {
// Removes 1 level [ and ].
$sub = substr(trim($sub), 1, -1);
$sub_offset = 0;
$values = Element::parse($sub, $document, $sub_offset, true);
$offset += strpos($content, '[') + 1;
// Find next ']' position
$offset += \strlen($sub) + 1;
return new self($values, $document);
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
* Class ElementBoolean
class ElementBoolean extends Element
* @param string|bool $value
public function __construct($value)
parent::__construct('true' == strtolower($value) || true === $value, null);
public function __toString(): string
return $this->value ? 'true' : 'false';
public function equals($value): bool
return $this->getContent() === $value;
* @return bool|ElementBoolean
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*(?P<value>true|false)/is', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + \strlen($value);
return new self($value);
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHPi, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
* Class ElementDate
class ElementDate extends ElementString
* @var array
protected static $formats = [
4 => 'Y',
6 => 'Ym',
8 => 'Ymd',
10 => 'YmdH',
12 => 'YmdHi',
14 => 'YmdHis',
15 => 'YmdHise',
17 => 'YmdHisO',
18 => 'YmdHisO',
19 => 'YmdHisO',
* @var string
protected $format = 'c';
* @var \DateTime
protected $value;
public function __construct($value)
if (!($value instanceof \DateTime)) {
throw new \Exception('DateTime required.'); // FIXME: Sometimes strings are passed to this function
public function setFormat(string $format)
$this->format = $format;
public function equals($value): bool
if ($value instanceof \DateTime) {
$timestamp = $value->getTimeStamp();
} else {
$timestamp = strtotime($value);
return $timestamp == $this->value->getTimeStamp();
public function __toString(): string
return (string) $this->value->format($this->format);
* @return bool|ElementDate
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*\(D\:(?P<name>.*?)\)/s', $content, $match)) {
$name = $match['name'];
$name = str_replace("'", '', $name);
$date = false;
// Smallest format : Y
// Full format : YmdHisP
if (preg_match('/^\d{4}(\d{2}(\d{2}(\d{2}(\d{2}(\d{2}(Z(\d{2,4})?|[\+-]?\d{2}(\d{2})?)?)?)?)?)?)?$/', $name)) {
if ($pos = strpos($name, 'Z')) {
$name = substr($name, 0, $pos + 1);
} elseif (18 == \strlen($name) && preg_match('/[^\+-]0000$/', $name)) {
$name = substr($name, 0, -4).'+0000';
$format = self::$formats[\strlen($name)];
$date = \DateTime::createFromFormat($format, $name, new \DateTimeZone('UTC'));
} else {
// special cases
if (preg_match('/^\d{1,2}-\d{1,2}-\d{4},?\s+\d{2}:\d{2}:\d{2}[\+-]\d{4}$/', $name)) {
$name = str_replace(',', '', $name);
$format = 'n-j-Y H:i:sO';
$date = \DateTime::createFromFormat($format, $name, new \DateTimeZone('UTC'));
if (!$date) {
return false;
$offset += strpos($content, '(D:') + \strlen($match['name']) + 4; // 1 for '(D:' and ')'
return new self($date);
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
* Class ElementHexa
class ElementHexa extends ElementString
* @return bool|ElementHexa|ElementDate
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*\<(?P<name>[A-F0-9]+)\>/is', $content, $match)) {
$name = $match['name'];
$offset += strpos($content, '<'.$name) + \strlen($name) + 2; // 1 for '>'
// repackage string as standard
$name = '('.self::decode($name).')';
$element = ElementDate::parse($name, $document);
if (!$element) {
$element = ElementString::parse($name, $document);
return $element;
return false;
public static function decode(string $value): string
$text = '';
$length = \strlen($value);
if ('00' === substr($value, 0, 2)) {
for ($i = 0; $i < $length; $i += 4) {
$hex = substr($value, $i, 4);
$text .= '&#'.str_pad(hexdec($hex), 4, '0', \STR_PAD_LEFT).';';
} else {
for ($i = 0; $i < $length; $i += 2) {
$hex = substr($value, $i, 2);
$text .= \chr(hexdec($hex));
$text = html_entity_decode($text, \ENT_NOQUOTES, 'UTF-8');
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
* Class ElementMissing
class ElementMissing extends Element
public function __construct()
parent::__construct(null, null);
public function equals($value): bool
return false;
public function contains($value): bool
return false;
public function getContent(): bool
return false;
public function __toString(): string
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Font;
* Class ElementName
class ElementName extends Element
public function __construct(string $value)
parent::__construct($value, null);
public function equals($value): bool
return $value == $this->value;
* @return bool|ElementName
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*\/([A-Z0-9\-\+,#\.]+)/is', $content, $match)) {
$name = $match[1];
$offset += strpos($content, $name) + \strlen($name);
$name = Font::decodeEntities($name);
return new self($name);
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
* Class ElementNull
class ElementNull extends Element
public function __construct()
parent::__construct(null, null);
public function __toString(): string
return 'null';
public function equals($value): bool
return $this->getContent() === $value;
* @return bool|ElementNull
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*(null)/s', $content, $match)) {
$offset += strpos($content, 'null') + \strlen('null');
return new self();
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
* Class ElementNumeric
class ElementNumeric extends Element
public function __construct(string $value)
parent::__construct((float) $value, null);
* @return bool|ElementNumeric
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*(?P<value>\-?[0-9\.]+)/s', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + \strlen($value);
return new self($value);
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Font;
* Class ElementString
class ElementString extends Element
public function __construct($value)
parent::__construct($value, null);
public function equals($value): bool
return $value == $this->value;
* @return bool|ElementString
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*\((?P<name>.*)/s', $content, $match)) {
$name = $match['name'];
// Find next ')' not escaped.
$cur_start_text = $start_search_end = 0;
while (false !== ($cur_start_pos = strpos($name, ')', $start_search_end))) {
$cur_extract = substr($name, $cur_start_text, $cur_start_pos - $cur_start_text);
preg_match('/(?P<escape>[\\\]*)$/s', $cur_extract, $match);
if (!(\strlen($match['escape']) % 2)) {
$start_search_end = $cur_start_pos + 1;
// Extract string.
$name = substr($name, 0, (int) $cur_start_pos);
$offset += strpos($content, '(') + $cur_start_pos + 2; // 2 for '(' and ')'
$name = str_replace(
['\\\\', '\\ ', '\\/', '\(', '\)', '\n', '\r', '\t'],
['\\', ' ', '/', '(', ')', "\n", "\r", "\t"],
// Decode string.
$name = Font::decodeOctal($name);
$name = Font::decodeEntities($name);
$name = Font::decodeHexadecimal($name, false);
$name = Font::decodeUnicode($name);
return new self($name);
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Header;
* Class ElementStruct
class ElementStruct extends Element
* @return false|Header
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*<<(?P<struct>.*)/is', $content)) {
preg_match_all('/(.*?)(<<|>>)/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (false !== strpos($part, '<<') ? 1 : -1);
if ($level <= 0) {
$offset += strpos($content, '<<') + \strlen(rtrim($sub));
// Removes '<<' and '>>'.
$sub = trim((string) preg_replace('/^\s*<<(.*)>>\s*$/s', '\\1', $sub));
$position = 0;
$elements = Element::parse($sub, $document, $position);
return new Header($elements, $document);
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
* Class ElementXRef
class ElementXRef extends Element
public function getId(): string
return $this->getContent();
public function getObject()
return $this->document->getObjectById($this->getId());
public function equals($value): bool
* In case $value is a number and $this->value is a string like 5_0
* Without this if-clause code like:
* $element = new ElementXRef('5_0');
* $this->assertTrue($element->equals(5));
* would fail (= 5_0 and 5 are not equal in PHP 8.0+).
if (
true === is_numeric($value)
&& true === \is_string($this->getContent())
&& 1 === preg_match('/[0-9]+\_[0-9]+/', $this->getContent(), $matches)
) {
return (float) $this->getContent() == $value;
$id = ($value instanceof self) ? $value->getId() : $value;
return $this->getId() == $id;
public function __toString(): string
return '#Obj#'.$this->getId();
* @return bool|ElementXRef
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
if (preg_match('/^\s*(?P<id>[0-9]+\s+[0-9]+\s+R)/s', $content, $match)) {
$id = $match['id'];
$offset += strpos($content, $id) + \strlen($id);
$id = str_replace(' ', '_', rtrim($id, ' R'));
return new self($id, $document);
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser;
use Exception;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Encoding\EncodingLocator;
use Smalot\PdfParser\Encoding\PostScriptGlyphs;
use Smalot\PdfParser\Exception\EncodingNotFoundException;
* Class Encoding
class Encoding extends PDFObject
* @var array
protected $encoding;
* @var array
protected $differences;
* @var array
protected $mapping;
public function init()
$this->mapping = [];
$this->differences = [];
$this->encoding = [];
if ($this->has('BaseEncoding')) {
$this->encoding = EncodingLocator::getEncoding($this->getEncodingClass())->getTranslations();
// Build table including differences.
$differences = $this->get('Differences')->getContent();
$code = 0;
if (!\is_array($differences)) {
foreach ($differences as $difference) {
/** @var ElementNumeric $difference */
if ($difference instanceof ElementNumeric) {
$code = $difference->getContent();
// ElementName
$this->differences[$code] = $difference;
if (\is_object($difference)) {
$this->differences[$code] = $difference->getContent();
// For the next char.
$this->mapping = $this->encoding;
foreach ($this->differences as $code => $difference) {
/* @var string $difference */
$this->mapping[$code] = $difference;
public function getDetails(bool $deep = true): array
$details = [];
$details['BaseEncoding'] = ($this->has('BaseEncoding') ? (string) $this->get('BaseEncoding') : 'Ansi');
$details['Differences'] = ($this->has('Differences') ? (string) $this->get('Differences') : '');
$details += parent::getDetails($deep);
return $details;
public function translateChar($dec): ?int
if (isset($this->mapping[$dec])) {
$dec = $this->mapping[$dec];
return PostScriptGlyphs::getCodePoint($dec);
* Returns encoding class name if available or empty string (only prior PHP 7.4).
* @throws \Exception On PHP 7.4+ an exception is thrown if encoding class doesn't exist.
public function __toString(): string
try {
return $this->getEncodingClass();
} catch (\Exception $e) {
// prior to PHP 7.4 toString has to return an empty string.
if (version_compare(\PHP_VERSION, '7.4.0', '<')) {
return '';
throw $e;
* @throws EncodingNotFoundException
protected function getEncodingClass(): string
// Load reference table charset.
$baseEncoding = preg_replace('/[^A-Z0-9]/is', '', $this->get('BaseEncoding')->getContent());
$className = '\\Smalot\\PdfParser\\Encoding\\'.$baseEncoding;
if (!class_exists($className)) {
throw new EncodingNotFoundException('Missing encoding data for: "'.$baseEncoding.'".');
return $className;

abstract class AbstractEncoding
abstract public function getTranslations(): array;

View file

@ -0,0 +1,17 @@
namespace Smalot\PdfParser\Encoding;
class EncodingLocator
protected static $encodings;
public static function getEncoding(string $encodingClassName): AbstractEncoding
if (!isset(self::$encodings[$encodingClassName])) {
self::$encodings[$encodingClassName] = new $encodingClassName();
return self::$encodings[$encodingClassName];

* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
// Source :
namespace Smalot\PdfParser\Encoding;
* Class ISOLatin1Encoding
class ISOLatin1Encoding extends AbstractEncoding
public function getTranslations(): array
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma minus period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent '.
'sterling currency yen brokenbar section dieresis copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior acute mu paragraph '.
'periodcentered cedilla onesuperior ordmasculine guillemotright '.
'onequarter onehalf threequarters questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
// Source :
namespace Smalot\PdfParser\Encoding;
* Class ISOLatin9Encoding
class ISOLatin9Encoding extends AbstractEncoding
public function getTranslations(): array
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma minus period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent '.
'sterling Euro yen Scaron section scaron copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior Zcaron mu paragraph '.
'periodcentered zcaron onesuperior ordmasculine guillemotright '.
'OE oe Ydieresis questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
// Source :
namespace Smalot\PdfParser\Encoding;
* Class MacRomanEncoding
class MacRomanEncoding extends AbstractEncoding
public function getTranslations(): array
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quotesingle '.
'parenleft parenright asterisk plus comma minus period slash '.
'zero one two three four five six seven '.
'eight nine colon semicolon less equal greater question '.
'at A B C D E F G '.
'H I J K L M N O '.
'P Q R S T U V W '.
'X Y Z bracketleft backslash bracketright asciicircum underscore '.
'grave a b c d e f g '.
'h i j k l m n o '.
'p q r s t u v w '.
'x y z braceleft bar braceright asciitilde .notdef '.
'Adieresis Aring Ccedilla Eacute Ntilde Odieresis Udieresis aacute '.
'agrave acircumflex adieresis atilde aring ccedilla eacute egrave '.
'ecircumflex edieresis iacute igrave icircumflex idieresis ntilde oacute '.
'ograve ocircumflex odieresis otilde uacute ugrave ucircumflex udieresis '.
'dagger degree cent sterling section bullet paragraph germandbls '.
'registered copyright trademark acute dieresis notequal AE Oslash '.
'infinity plusminus lessequal greaterequal yen mu partialdiff summation '.
'Pi pi integral ordfeminine ordmasculine Omega ae oslash '.
'questiondown exclamdown logicalnot radical florin approxequal delta guillemotleft '.
'guillemotright ellipsis space Agrave Atilde Otilde OE oe '.
'endash emdash quotedblleft quotedblright quoteleft quoteright divide lozenge '.
'ydieresis Ydieresis fraction currency guilsinglleft guilsinglright fi fl '.
'daggerdbl periodcentered quotesinglbase quotedblbase perthousand Acircumflex Ecircumflex Aacute '.
'Edieresis Egrave Iacute Icircumflex Idieresis Igrave Oacute Ocircumflex '.
'heart Ograve Uacute Ucircumflex Ugrave dotlessi circumflex tilde '.
'macron breve dotaccent ring cedilla hungarumlaut ogonek caron';
View file

@ -0,0 +1,76 @@
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
// Source :
namespace Smalot\PdfParser\Encoding;
* Class StandardEncoding
class StandardEncoding extends AbstractEncoding
public function getTranslations(): array
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma hyphen period slash zero '.
'one two three four five six seven eight nine colon semicolon less '.
'equal greater question at A B C D E F G H I J K L M N O P Q R S T U '.
'V W X Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef exclamdown cent '.
'sterling fraction yen florin section currency quotesingle '.
'quotedblleft guillemotleft guilsinglleft guilsinglright fi fl '.
'.notdef endash dagger daggerdbl periodcentered .notdef paragraph '.
'bullet quotesinglbase quotedblbase quotedblright guillemotright '.
'ellipsis perthousand .notdef questiondown .notdef grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron emdash .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef AE .notdef '.
'ordfeminine .notdef .notdef .notdef .notdef Lslash Oslash OE '.
'ordmasculine .notdef .notdef .notdef .notdef .notdef ae .notdef '.
'.notdef .notdef dotlessi .notdef .notdef lslash oslash oe germandbls '.
'.notdef .notdef .notdef .notdef';
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
// Source :
namespace Smalot\PdfParser\Encoding;
* Class WinAnsiEncoding
class WinAnsiEncoding extends AbstractEncoding
public function getTranslations(): array
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quotesingle '.
'parenleft parenright asterisk plus comma hyphen period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'grave a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde bullet Euro bullet quotesinglbase '.
'florin quotedblbase ellipsis dagger daggerdbl circumflex perthousand '.
'Scaron guilsinglleft OE bullet Zcaron bullet bullet quoteleft quoteright '.
'quotedblleft quotedblright bullet endash emdash tilde trademark scaron '.
'guilsinglright oe bullet zcaron Ydieresis space exclamdown cent '.
'sterling currency yen brokenbar section dieresis copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior acute mu paragraph '.
'periodcentered cedilla onesuperior ordmasculine guillemotright '.
'onequarter onehalf threequarters questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
namespace Smalot\PdfParser\Exception;
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser;
use Smalot\PdfParser\Encoding\WinAnsiEncoding;
use Smalot\PdfParser\Exception\EncodingNotFoundException;
* Class Font
class Font extends PDFObject
public const MISSING = '?';
* @var array
protected $table = null;
* @var array
protected $tableSizes = null;
* Caches results from uchr.
* @var array
private static $uchrCache = [];
* In some PDF-files encoding could be referenced by object id but object itself does not contain
* `/Type /Encoding` in its dictionary. These objects wouldn't be initialized as Encoding in
* \Smalot\PdfParser\PDFObject::factory() during file parsing (they would be just PDFObject).
* Therefore, we create an instance of Encoding from them during decoding and cache this value in this property.
* @var Encoding
* @see
private $initializedEncodingByPdfObject;
public function init()
// Load translate table.
public function getName(): string
return $this->has('BaseFont') ? (string) $this->get('BaseFont') : '[Unknown]';
public function getType(): string
return (string) $this->header->get('Subtype');
public function getDetails(bool $deep = true): array
$details = [];
$details['Name'] = $this->getName();
$details['Type'] = $this->getType();
$details['Encoding'] = ($this->has('Encoding') ? (string) $this->get('Encoding') : 'Ansi');
$details += parent::getDetails($deep);
return $details;
* @return string|bool
public function translateChar(string $char, bool $use_default = true)
$dec = hexdec(bin2hex($char));
if (\array_key_exists($dec, $this->table)) {
return $this->table[$dec];
// fallback for decoding single-byte ANSI characters that are not in the lookup table
$fallbackDecoded = $char;
if (
\strlen($char) < 2
&& $this->has('Encoding')
&& $this->get('Encoding') instanceof Encoding
) {
try {
if (WinAnsiEncoding::class === $this->get('Encoding')->__toString()) {
$fallbackDecoded = self::uchr($dec);
} catch (EncodingNotFoundException $e) {
// Encoding->getEncodingClass() throws EncodingNotFoundException when BaseEncoding doesn't exists
// See table 5.11 on PDF 1.5 specs for more info
return $use_default ? self::MISSING : $fallbackDecoded;
* Convert unicode character code to "utf-8" encoded string.
public static function uchr(int $code): string
if (!isset(self::$uchrCache[$code])) {
// html_entity_decode() will not work with UTF-16 or UTF-32 char entities,
// therefore, we use mb_convert_encoding() instead
self::$uchrCache[$code] = mb_convert_encoding("&#{$code};", 'UTF-8', 'HTML-ENTITIES');
return self::$uchrCache[$code];
* Init internal chars translation table by ToUnicode CMap.
public function loadTranslateTable(): array
if (null !== $this->table) {
return $this->table;
$this->table = [];
$this->tableSizes = [
'from' => 1,
'to' => 1,
if ($this->has('ToUnicode')) {
$content = $this->get('ToUnicode')->getContent();
$matches = [];
// Support for multiple spacerange sections
if (preg_match_all('/begincodespacerange(?P<sections>.*?)endcodespacerange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes = [
'from' => max(1, \strlen(current($matches['from'])) / 2),
'to' => max(1, \strlen(current($matches['to'])) / 2),
// Support for multiple bfchar sections
if (preg_match_all('/beginbfchar(?P<sections>.*?)endbfchar/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> +<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes['from'] = max(1, \strlen(current($matches['from'])) / 2);
foreach ($matches['from'] as $key => $from) {
$parts = preg_split(
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
$this->table[hexdec($from)] = $text;
// Support for multiple bfrange sections
if (preg_match_all('/beginbfrange(?P<sections>.*?)endbfrange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
// Support for : <srcCode1> <srcCode2> <dstString>
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *<(?P<offset>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$char_to = hexdec($matches['to'][$key]);
$offset = hexdec($matches['offset'][$key]);
for ($char = $char_from; $char <= $char_to; ++$char) {
$this->table[$char] = self::uchr($char - $char_from + $offset);
// Support for : <srcCode1> <srcCodeN> [<dstString1> <dstString2> ... <dstStringN>]
// Some PDF file has 2-byte Unicode values on new lines > added \r\n
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *\[(?P<strings>[\r\n<>0-9A-F ]+)\][ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$strings = [];
preg_match_all('/<(?P<string>[0-9A-F]+)> */is', $matches['strings'][$key], $strings);
foreach ($strings['string'] as $position => $string) {
$parts = preg_split(
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
$this->table[$char_from + $position] = $text;
return $this->table;
* Set custom char translation table where:
* - key - integer character code;
* - value - "utf-8" encoded value;
* @return void
public function setTable(array $table)
$this->table = $table;
* Calculate text width with data from header 'Widths'. If width of character is not found then character is added to missing array.
public function calculateTextWidth(string $text, array &$missing = null): ?float
$index_map = array_flip($this->table);
$details = $this->getDetails();
$widths = $details['Widths'];
// Widths array is zero indexed but table is not. We must map them based on FirstChar and LastChar
$width_map = array_flip(range($details['FirstChar'], $details['LastChar']));
$width = null;
$missing = [];
$textLength = mb_strlen($text);
for ($i = 0; $i < $textLength; ++$i) {
$char = mb_substr($text, $i, 1);
if (
!\array_key_exists($char, $index_map)
|| !\array_key_exists($index_map[$char], $width_map)
|| !\array_key_exists($width_map[$index_map[$char]], $widths)
) {
$missing[] = $char;
$width_index = $width_map[$index_map[$char]];
$width += $widths[$width_index];
return $width;
* Decode hexadecimal encoded string. If $add_braces is true result value would be wrapped by parentheses.
public static function decodeHexadecimal(string $hexa, bool $add_braces = false): string
// Special shortcut for XML content.
if (false !== stripos($hexa, '<?xml')) {
return $hexa;
$text = '';
$parts = preg_split('/(<[a-f0-9]+>)/si', $hexa, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
foreach ($parts as $part) {
if (preg_match('/^<.*>$/s', $part) && false === stripos($part, '<?xml')) {
// strip line breaks
$part = preg_replace("/[\r\n]/", '', $part);
$part = trim($part, '<>');
if ($add_braces) {
$text .= '(';
$part = pack('H*', $part);
$text .= ($add_braces ? preg_replace('/\\\/s', '\\\\\\', $part) : $part);
if ($add_braces) {
$text .= ')';
} else {
$text .= $part;
return $text;
* Decode string with octal-decoded chunks.
public static function decodeOctal(string $text): string
$parts = preg_split('/(\\\\[0-7]{3})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^\\\\[0-7]{3}$/', $part)) {
$text .= \chr(octdec(trim($part, '\\')));
} else {
$text .= $part;
return $text;
* Decode string with html entity encoded chars.
public static function decodeEntities(string $text): string
$parts = preg_split('/(#\d{2})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^#\d{2}$/', $part)) {
$text .= \chr(hexdec(trim($part, '#')));
} else {
$text .= $part;
return $text;
* Check if given string is Unicode text (by BOM);
* If true - decode to "utf-8" encoded string.
* Otherwise - return text as is.
* @todo Rename in next major release to make the name correspond to reality (for ex. decodeIfUnicode())
public static function decodeUnicode(string $text): string
if (preg_match('/^\xFE\xFF/i', $text)) {
// Strip U+FEFF byte order marker.
$decode = substr($text, 2);
$text = '';
$length = \strlen($decode);
for ($i = 0; $i < $length; $i += 2) {
$text .= self::uchr(hexdec(bin2hex(substr($decode, $i, 2))));
return $text;
* @todo Deprecated, use $this->config->getFontSpaceLimit() instead.
protected function getFontSpaceLimit(): int
return $this->config->getFontSpaceLimit();
* Decode text by commands array.
public function decodeText(array $commands): string
$word_position = 0;
$words = [];
$font_space = $this->getFontSpaceLimit();
foreach ($commands as $command) {
switch ($command[PDFObject::TYPE]) {
case 'n':
if ((float) trim($command[PDFObject::COMMAND]) < $font_space) {
$word_position = \count($words);
continue 2;
case '<':
// Decode hexadecimal.
$text = self::decodeHexadecimal('<'.$command[PDFObject::COMMAND].'>');
// Decode octal (if necessary).
$text = self::decodeOctal($command[PDFObject::COMMAND]);
// replace escaped chars
$text = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\f', '\ '],
['\\', '(', ')', "\n", "\r", "\t", "\f", ' '],
// add content to result string
if (isset($words[$word_position])) {
$words[$word_position] .= $text;
} else {
$words[$word_position] = $text;
foreach ($words as &$word) {
$word = $this->decodeContent($word);
return implode(' ', $words);
* Decode given $text to "utf-8" encoded string.
* @param bool $unicode This parameter is deprecated and might be removed in a future release
public function decodeContent(string $text, ?bool &$unicode = null): string
if ($this->has('ToUnicode')) {
return $this->decodeContentByToUnicodeCMapOrDescendantFonts($text);
if ($this->has('Encoding')) {
$result = $this->decodeContentByEncoding($text);
if (null !== $result) {
return $result;
return $this->decodeContentByAutodetectIfNecessary($text);
* First try to decode $text by ToUnicode CMap.
* If char translation not found in ToUnicode CMap tries:
* - If DescendantFonts exists tries to decode char by one of that fonts.
* - If have no success to decode by DescendantFonts interpret $text as a string with "Windows-1252" encoding.
* - If DescendantFonts does not exist just return "?" as decoded char.
* @todo Seems this is invalid algorithm that do not follow pdf-format specification. Must be rewritten.
private function decodeContentByToUnicodeCMapOrDescendantFonts(string $text): string
$bytes = $this->tableSizes['from'];
if ($bytes) {
$result = '';
$length = \strlen($text);
for ($i = 0; $i < $length; $i += $bytes) {
$char = substr($text, $i, $bytes);
if (false !== ($decoded = $this->translateChar($char, false))) {
$char = $decoded;
} elseif ($this->has('DescendantFonts')) {
if ($this->get('DescendantFonts') instanceof PDFObject) {
$fonts = $this->get('DescendantFonts')->getHeader()->getElements();
} else {
$fonts = $this->get('DescendantFonts')->getContent();
$decoded = false;
foreach ($fonts as $font) {
if ($font instanceof self) {
if (false !== ($decoded = $font->translateChar($char, false))) {
$decoded = mb_convert_encoding($decoded, 'UTF-8', 'Windows-1252');
if (false !== $decoded) {
$char = $decoded;
} else {
$char = mb_convert_encoding($char, 'UTF-8', 'Windows-1252');
} else {
$char = self::MISSING;
$result .= $char;
$text = $result;
return $text;
* Decode content by any type of Encoding (dictionary's item) instance.
private function decodeContentByEncoding(string $text): ?string
$encoding = $this->get('Encoding');
// When Encoding referenced by object id (/Encoding 520 0 R) but object itself does not contain `/Type /Encoding` in it's dictionary.
if ($encoding instanceof PDFObject) {
$encoding = $this->getInitializedEncodingByPdfObject($encoding);
// When Encoding referenced by object id (/Encoding 520 0 R) but object itself contains `/Type /Encoding` in it's dictionary.
if ($encoding instanceof Encoding) {
return $this->decodeContentByEncodingEncoding($text, $encoding);
// When Encoding is just string (/Encoding /WinAnsiEncoding)
if ($encoding instanceof Element) { // todo: ElementString class must by used?
return $this->decodeContentByEncodingElement($text, $encoding);
// don't double-encode strings already in UTF-8
if (!mb_check_encoding($text, 'UTF-8')) {
return mb_convert_encoding($text, 'UTF-8', 'Windows-1252');
return $text;
* Returns already created or create a new one if not created before Encoding instance by PDFObject instance.
private function getInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
if (!$this->initializedEncodingByPdfObject) {
$this->initializedEncodingByPdfObject = $this->createInitializedEncodingByPdfObject($PDFObject);
return $this->initializedEncodingByPdfObject;
* Decode content when $encoding (given by $this->get('Encoding')) is instance of Encoding.
private function decodeContentByEncodingEncoding(string $text, Encoding $encoding): string
$result = '';
$length = \strlen($text);
for ($i = 0; $i < $length; ++$i) {
$dec_av = hexdec(bin2hex($text[$i]));
$dec_ap = $encoding->translateChar($dec_av);
$result .= self::uchr($dec_ap ?? $dec_av);
return $result;
* Decode content when $encoding (given by $this->get('Encoding')) is instance of Element.
private function decodeContentByEncodingElement(string $text, Element $encoding): ?string
$pdfEncodingName = $encoding->getContent();
// mb_convert_encoding does not support MacRoman/macintosh,
// so we use iconv() here
$iconvEncodingName = $this->getIconvEncodingNameOrNullByPdfEncodingName($pdfEncodingName);
return $iconvEncodingName ? iconv($iconvEncodingName, 'UTF-8', $text) : null;
* Convert PDF encoding name to iconv-known encoding name.
private function getIconvEncodingNameOrNullByPdfEncodingName(string $pdfEncodingName): ?string
$pdfToIconvEncodingNameMap = [
'StandardEncoding' => 'ISO-8859-1',
'MacRomanEncoding' => 'MACINTOSH',
'WinAnsiEncoding' => 'CP1252',
return \array_key_exists($pdfEncodingName, $pdfToIconvEncodingNameMap)
? $pdfToIconvEncodingNameMap[$pdfEncodingName]
: null;
* If string seems like "utf-8" encoded string do nothing and just return given string as is.
* Otherwise, interpret string as "Window-1252" encoded string.
* @return string|false
private function decodeContentByAutodetectIfNecessary(string $text)
if (mb_check_encoding($text, 'UTF-8')) {
return $text;
return mb_convert_encoding($text, 'UTF-8', 'Windows-1252');
// todo: Why exactly `Windows-1252` used?
* Create Encoding instance by PDFObject instance and init it.
private function createInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
$encoding = $this->createEncodingByPdfObject($PDFObject);
return $encoding;
* Create Encoding instance by PDFObject instance (without init).
private function createEncodingByPdfObject(PDFObject $PDFObject): Encoding
$document = $PDFObject->getDocument();
$header = $PDFObject->getHeader();
$content = $PDFObject->getContent();
$config = $PDFObject->getConfig();
return new Encoding($document, $header, $content, $config);

namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
* Class FontCIDFontType0
class FontCIDFontType0 extends Font

namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
* Class FontCIDFontType2
class FontCIDFontType2 extends Font

namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
* Class FontTrueType
class FontTrueType extends Font

namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
* Class FontType0
class FontType0 extends Font

namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
* Class FontType1
class FontType1 extends Font

namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
* Class FontType3
class FontType3 extends Font

namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
* Class Header
class Header
* @var Document
protected $document = null;
* @var Element[]
protected $elements = null;
* @param Element[] $elements list of elements
* @param Document $document document
public function __construct(array $elements = [], ?Document $document = null)
$this->elements = $elements;
$this->document = $document;
public function init()
foreach ($this->elements as $element) {
if ($element instanceof Element) {
* Returns all elements.
public function getElements()
foreach ($this->elements as $name => $element) {
return $this->elements;
* Used only for debug.
public function getElementTypes(): array
$types = [];
foreach ($this->elements as $key => $element) {
$types[$key] = \get_class($element);
return $types;
public function getDetails(bool $deep = true): array
$values = [];
$elements = $this->getElements();
foreach ($elements as $key => $element) {
if ($element instanceof self && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof PDFObject && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof ElementArray) {
if ($deep) {
$values[$key] = $element->getDetails();
} elseif ($element instanceof Element) {
$values[$key] = (string) $element;
return $values;
* Indicate if an element name is available in header.
* @param string $name the name of the element
public function has(string $name): bool
return \array_key_exists($name, $this->elements);
* @return Element|PDFObject
public function get(string $name)
if (\array_key_exists($name, $this->elements) && $element = $this->resolveXRef($name)) {
return $element;
return new ElementMissing();
* Resolve XRef to object.
* @return Element|PDFObject
* @throws \Exception
protected function resolveXRef(string $name)
if (($obj = $this->elements[$name]) instanceof ElementXRef && null !== $this->document) {
/** @var ElementXRef $obj */
$object = $this->document->getObjectById($obj->getId());
if (null === $object) {
return new ElementMissing();
// Update elements list for future calls.
$this->elements[$name] = $object;
return $this->elements[$name];
* @param string $content The content to parse
* @param Document $document The document
* @param int $position The new position of the cursor after parsing
public static function parse(string $content, Document $document, int &$position = 0): self
/* @var Header $header */
if ('<<' == substr(trim($content), 0, 2)) {
$header = ElementStruct::parse($content, $document, $position);
} else {
$elements = ElementArray::parse($content, $document, $position);
$header = new self([], $document);
if ($elements) {
$header = new self($elements->getRawContent(), null);
if ($header) {
return $header;
// Build an empty header.
return new self([], $document);

namespace Smalot\PdfParser;
use Smalot\PdfParser\XObject\Form;
use Smalot\PdfParser\XObject\Image;
* Class PDFObject
class PDFObject
public const TYPE = 't';
public const OPERATOR = 'o';
public const COMMAND = 'c';
* The recursion stack.
* @var array
public static $recursionStack = [];
* @var Document
protected $document = null;
* @var Header
protected $header = null;
* @var string
protected $content = null;
* @var Config
protected $config;
public function __construct(
Document $document,
?Header $header = null,
?string $content = null,
?Config $config = null
) {
$this->document = $document;
$this->header = $header ?? new Header();
$this->content = $content;
$this->config = $config;
public function init()
public function getDocument(): Document
return $this->document;
public function getHeader(): ?Header
return $this->header;
public function getConfig(): ?Config
return $this->config;
* @return Element|PDFObject|Header
public function get(string $name)
return $this->header->get($name);
public function has(string $name): bool
return $this->header->has($name);
public function getDetails(bool $deep = true): array
return $this->header->getDetails($deep);
public function getContent(): ?string
return $this->content;
public function cleanContent(string $content, string $char = 'X')
$char = $char[0];
$content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
// Remove image bloc with binary content
preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[0] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
// Clean content in square brackets [.....]
preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
// Clean content in round brackets (.....)
preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
// Clean structure
if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
$content = '';
$level = 0;
foreach ($parts as $part) {
if ('<' == $part) {
$content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
if ('>' == $part) {
// Clean BDC and EMC markup
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
return $content;
public function getSectionsText(?string $content): array
$sections = [];
$content = ' '.$content.' ';
$textCleaned = $this->cleanContent($content, '_');
// Extract text blocks.
if (preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
foreach ($matches[2] as $pos => $part) {
$text = $part[0];
if ('' === $text) {
$offset = $part[1];
$section = substr($content, $offset, \strlen($text));
// Removes BDC and EMC markup.
$section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
// Add Q and q flags if detected around BT/ET.
// @see:
$section = trim((!empty($matches[1][$pos][0]) ? "Q\n" : '').$section).(!empty($matches[3][$pos][0]) ? "\nq" : '');
$sections[] = $section;
// Extract 'do' commands.
if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
foreach ($matches[1] as $part) {
$text = $part[0];
$offset = $part[1];
$section = substr($content, $offset, \strlen($text));
$sections[] = $section;
return $sections;
private function getDefaultFont(Page $page = null): Font
$fonts = [];
if (null !== $page) {
$fonts = $page->getFonts();
$firstFont = $this->document->getFirstFont();
if (null !== $firstFont) {
$fonts[] = $firstFont;
if (\count($fonts) > 0) {
return reset($fonts);
return new Font($this->document, null, null, $this->config);
* @throws \Exception
public function getText(?Page $page = null): string
$result = '';
$sections = $this->getSectionsText($this->content);
$current_font = $this->getDefaultFont($page);
$clipped_font = $current_font;
$current_position_td = ['x' => false, 'y' => false];
$current_position_tm = ['x' => false, 'y' => false];
self::$recursionStack[] = $this->getUniqueId();
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
$reverse_text = false;
$text = '';
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
case 'BMC':
if ('ReversedChars' == $command[self::COMMAND]) {
$reverse_text = true;
// set character spacing
case 'Tc':
// move text current point
case 'Td':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if (((float) $x <= 0) ||
(false !== $current_position_td['y'] && (float) $y < (float) $current_position_td['y'])
) {
// vertical offset
$text .= "\n";
} elseif (false !== $current_position_td['x'] && (float) $x > (float)
) {
$text .= $this->config->getHorizontalOffset();
$current_position_td = ['x' => $x, 'y' => $y];
// move text current point and set leading
case 'TD':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if ((float) $y < 0) {
$text .= "\n";
} elseif ((float) $x <= 0) {
$text .= ' ';
case 'Tf':
list($id) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
if (null !== $page) {
$new_font = $page->getFont($id);
// If an invalid font ID is given, do not update the font.
// This should theoretically never happen, as the PDF spec states for the Tf operator:
// "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
// (, page 435)
// But we want to make sure that malformed PDFs do not simply crash.
if (null !== $new_font) {
$current_font = $new_font;
case 'Q':
// Use clip: restore font.
$current_font = $clipped_font;
case 'q':
// Use clip: save font.
$clipped_font = $current_font;
case "'":
case 'Tj':
$command[self::COMMAND] = [$command];
// no break
case 'TJ':
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text .= $sub_text;
// set leading
case 'TL':
$text .= ' ';
case 'Tm':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if (false !== $current_position_tm['x']) {
$delta = abs((float) $x - (float) $current_position_tm['x']);
if ($delta > 10) {
$text .= "\t";
if (false !== $current_position_tm['y']) {
$delta = abs((float) $y - (float) $current_position_tm['y']);
if ($delta > 10) {
$text .= "\n";
$current_position_tm = ['x' => $x, 'y' => $y];
// set super/subscripting text rise
case 'Ts':
// set word spacing
case 'Tw':
// set horizontal scaling
case 'Tz':
$text .= "\n";
// move to start of next line
case 'T*':
$text .= "\n";
case 'Da':
case 'Do':
if (null !== $page) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
$xobject = $page->getXObject($id);
// @todo $xobject could be a ElementXRef object, which would then throw an error
if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
// Not a circular reference.
$text .= $xobject->getText($page);
case 'rg':
case 'RG':
case 're':
case 'co':
case 'cs':
case 'gs':
case 'en':
case 'sc':
case 'SC':
case 'g':
case 'G':
case 'V':
case 'vo':
case 'Vo':
// Fix Hebrew and other reverse text oriented languages.
// @see:
if ($reverse_text) {
$chars = mb_str_split($text, 1, mb_internal_encoding());
$text = implode('', array_reverse($chars));
$result .= $text;
return $result.' ';
* @throws \Exception
public function getTextArray(?Page $page = null): array
$text = [];
$sections = $this->getSectionsText($this->content);
$current_font = new Font($this->document, null, null, $this->config);
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
// set character spacing
case 'Tc':
// move text current point
case 'Td':
// move text current point and set leading
case 'TD':
case 'Tf':
if (null !== $page) {
list($id) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
$current_font = $page->getFont($id);
case "'":
case 'Tj':
$command[self::COMMAND] = [$command];
// no break
case 'TJ':
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text[] = $sub_text;
// set leading
case 'TL':
case 'Tm':
// set super/subscripting text rise
case 'Ts':
// set word spacing
case 'Tw':
// set horizontal scaling
case 'Tz':
// $text .= "\n";
// move to start of next line
case 'T*':
// $text .= "\n";
case 'Da':
case 'Do':
if (null !== $page) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
if ($xobject = $page->getXObject($id)) {
$text[] = $xobject->getText($page);
case 'rg':
case 'RG':
case 're':
case 'co':
case 'cs':
case 'gs':
case 'en':
case 'sc':
case 'SC':
case 'g':
case 'G':
case 'V':
case 'vo':
case 'Vo':
return $text;
public function getCommandsText(string $text_part, int &$offset = 0): array
$commands = $matches = [];
while ($offset < \strlen($text_part)) {
$offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
$char = $text_part[$offset];
$operator = '';
$type = '';
$command = false;
switch ($char) {
case '/':
$type = $char;
if (preg_match(
substr($text_part, $offset),
) {
$operator = $matches[2];
$command = $matches[1];
$offset += \strlen($matches[0]);
} elseif (preg_match(
substr($text_part, $offset),
) {
$operator = $matches[2];
$command = $matches[1];
$offset += \strlen($matches[0]);
case '[':
case ']':
// array object
$type = $char;
if ('[' == $char) {
// get elements
$command = $this->getCommandsText($text_part, $offset);
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
} else {
case '<':
case '>':
// array object
$type = $char;
if ('<' == $char) {
$strpos = strpos($text_part, '>', $offset);
$command = substr($text_part, $offset, $strpos - $offset);
$offset = $strpos + 1;
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
case '(':
case ')':
$type = $char;
$strpos = $offset;
if ('(' == $char) {
$open_bracket = 1;
while ($open_bracket > 0) {
if (!isset($text_part[$strpos])) {
$ch = $text_part[$strpos];
switch ($ch) {
case '\\':
// REVERSE SOLIDUS (5Ch) (Backslash)
// skip next character
case '(':
case ')':
$command = substr($text_part, $offset, $strpos - $offset - 1);
$offset = $strpos;
if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
$operator = $matches[1];
$offset += \strlen($matches[0]);
if ('ET' == substr($text_part, $offset, 2)) {
} elseif (preg_match(
substr($text_part, $offset),
) {
$operator = trim($matches['id']);
$command = trim($matches['data']);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
$type = 'n';
$command = trim($matches[0]);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
$type = '';
$operator = $matches[1];
$command = '';
$offset += \strlen($matches[0]);
if (false !== $command) {
$commands[] = [
self::TYPE => $type,
self::OPERATOR => $operator,
self::COMMAND => $command,
} else {
return $commands;
public static function factory(
Document $document,
Header $header,
?string $content,
?Config $config = null
): self {
switch ($header->get('Type')->getContent()) {
case 'XObject':
switch ($header->get('Subtype')->getContent()) {
case 'Image':
return new Image($document, $header, $config->getRetainImageContent() ? $content : null, $config);
case 'Form':
return new Form($document, $header, $content, $config);
return new self($document, $header, $content, $config);
case 'Pages':
return new Pages($document, $header, $content, $config);
case 'Page':
return new Page($document, $header, $content, $config);
case 'Encoding':
return new Encoding($document, $header, $content, $config);
case 'Font':
$subtype = $header->get('Subtype')->getContent();
$classname = '\Smalot\PdfParser\Font\Font'.$subtype;
if (class_exists($classname)) {
return new $classname($document, $header, $content, $config);
return new Font($document, $header, $content, $config);
return new self($document, $header, $content, $config);
* Returns unique id identifying the object.
protected function getUniqueId(): string
return spl_object_hash($this);

os3/PdfParser/Page.php
@ -0,0 +1,953 @@
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementXRef;
class Page extends PDFObject
* @var Font[]
protected $fonts = null;
* @var PDFObject[]
protected $xobjects = null;
* @var array
protected $dataTm = null;
* @return Font[]
public function getFonts()
if (null !== $this->fonts) {
return $this->fonts;
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('Font')) {
if ($resources->get('Font') instanceof ElementMissing) {
return [];
if ($resources->get('Font') instanceof Header) {
$fonts = $resources->get('Font')->getElements();
} else {
$fonts = $resources->get('Font')->getHeader()->getElements();
$table = [];
foreach ($fonts as $id => $font) {
if ($font instanceof Font) {
$table[$id] = $font;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ('' != $id) {
$table[$id] = $font;
return $this->fonts = $table;
return [];
public function getFont(string $id): ?Font
$fonts = $this->getFonts();
if (isset($fonts[$id])) {
return $fonts[$id];
// According to the PDF specs (, page 238)
// "The font resource name presented to the Tf operator is arbitrary, as are the names for all kinds of resources"
// Instead, we search for the unfiltered name first and then do this cleaning as a fallback, so all tests still pass.
if (isset($fonts[$id])) {
return $fonts[$id];
} else {
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($fonts[$id])) {
return $fonts[$id];
return null;
* Support for XObject
* @return PDFObject[]
public function getXObjects()
if (null !== $this->xobjects) {
return $this->xobjects;
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('XObject')) {
if ($resources->get('XObject') instanceof Header) {
$xobjects = $resources->get('XObject')->getElements();
} else {
$xobjects = $resources->get('XObject')->getHeader()->getElements();
$table = [];
foreach ($xobjects as $id => $xobject) {
$table[$id] = $xobject;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ('' != $id) {
$table[$id] = $xobject;
return $this->xobjects = $table;
return [];
public function getXObject(string $id): ?PDFObject
$xobjects = $this->getXObjects();
if (isset($xobjects[$id])) {
return $xobjects[$id];
return null;
/*$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($xobjects[$id])) {
return $xobjects[$id];
} else {
return null;
public function getText(self $page = null): string
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return '';
} elseif ($contents instanceof ElementNull) {
return '';
} elseif ($contents instanceof PDFObject) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent()."\n";
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
* Elements referencing each other on the same page can cause endless loops during text parsing.
* To combat this we keep a recursionStack containing already parsed elements on the page.
* The stack is only emptied here after getting text from a page.
$contentsText = $contents->getText($this);
PDFObject::$recursionStack = [];
return $contentsText;
return '';
* Return true if the current page is a (setasign\Fpdi\Fpdi) FPDI/FPDF document
* The metadata 'Producer' should have the value of "FPDF" . FPDF_VERSION if the
* pdf file was generated by FPDF/Fpfi.
* @return bool true is the current page is a FPDI/FPDF document
public function isFpdf(): bool
if (\array_key_exists('Producer', $this->document->getDetails()) &&
\is_string($this->document->getDetails()['Producer']) &&
0 === strncmp($this->document->getDetails()['Producer'], 'FPDF', 4)) {
return true;
return false;
* Return the page number of the PDF document of the page object
* @return int the page number
public function getPageNumber(): int
$pages = $this->document->getPages();
$numOfPages = \count($pages);
for ($pageNum = 0; $pageNum < $numOfPages; ++$pageNum) {
if ($pages[$pageNum] === $this) {
return $pageNum;
* Return the Object of the page if the document is a FPDF/FPDI document
* If the document was generated by FPDF/FPDI it returns the
* PDFObject of the given page
* @return PDFObject The PDFObject for the page
public function getPDFObjectForFpdf(): PDFObject
$pageNum = $this->getPageNumber();
$xObjects = $this->getXObjects();
return $xObjects[$pageNum];
* Return a new PDFObject of the document created with FPDF/FPDI
* For a document generated by FPDF/FPDI, it generates a
* new PDFObject for that document
* @return PDFObject The PDFObject
public function createPDFObjectForFpdf(): PDFObject
$pdfObject = $this->getPDFObjectForFpdf();
$new_content = $pdfObject->getContent();
$header = $pdfObject->getHeader();
$config = $pdfObject->config;
return new PDFObject($pdfObject->document, $header, $new_content, $config);
* Return page if document is a FPDF/FPDI document
* @return Page The page
public function createPageForFpdf(): self
$pdfObject = $this->getPDFObjectForFpdf();
$new_content = $pdfObject->getContent();
$header = $pdfObject->getHeader();
$config = $pdfObject->config;
return new self($pdfObject->document, $header, $new_content, $config);
public function getTextArray(self $page = null): array
if ($this->isFpdf()) {
$pdfObject = $this->getPDFObjectForFpdf();
$newPdfObject = $this->createPDFObjectForFpdf();
return $newPdfObject->getTextArray($pdfObject);
} else {
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return [];
} elseif ($contents instanceof ElementNull) {
return [];
} elseif ($contents instanceof PDFObject) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
/** @var PDFObject $element */
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
} else {
try {
} catch (\Throwable $e) {
return $contents->getTextArray();
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
/** @var PDFObject $content */
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent()."\n";
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
return $contents->getTextArray($this);
return [];
* Gets all the text data with its internal representation of the page.
* Returns an array with the data and the internal representation
public function extractRawData(): array
* Now you can get the complete content of the object with the text on it
$extractedData = [];
$content = $this->get('Contents');
$values = $content->getContent();
if (isset($values) && \is_array($values)) {
$text = '';
foreach ($values as $section) {
$text .= $section->getContent();
$sectionsText = $this->getSectionsText($text);
foreach ($sectionsText as $sectionText) {
$commandsText = $this->getCommandsText($sectionText);
foreach ($commandsText as $command) {
$extractedData[] = $command;
} else {
if ($this->isFpdf()) {
$content = $this->getPDFObjectForFpdf();
$sectionsText = $content->getSectionsText($content->getContent());
foreach ($sectionsText as $sectionText) {
$extractedData[] = ['t' => '', 'o' => 'BT', 'c' => ''];
$commandsText = $content->getCommandsText($sectionText);
foreach ($commandsText as $command) {
$extractedData[] = $command;
return $extractedData;
* Gets all the decoded text data with it internal representation from a page.
* @param array $extractedRawData the extracted data return by extractRawData or
* null if extractRawData should be called
* @return array An array with the data and the internal representation
public function extractDecodedRawData(array $extractedRawData = null): array
if (!isset($extractedRawData) || !$extractedRawData) {
$extractedRawData = $this->extractRawData();
$currentFont = null; /** @var Font $currentFont */
$clippedFont = null;
$fpdfPage = null;
if ($this->isFpdf()) {
$fpdfPage = $this->createPageForFpdf();
foreach ($extractedRawData as &$command) {
if ('Tj' == $command['o'] || 'TJ' == $command['o']) {
$data = $command['c'];
if (!\is_array($data)) {
$tmpText = '';
if (isset($currentFont)) {
$tmpText = $currentFont->decodeOctal($data);
// $tmpText = $currentFont->decodeHexadecimal($tmpText, false);
$tmpText = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
['\\', '(', ')', "\n", "\r", "\t", ' '],
$tmpText = mb_convert_encoding($tmpText, 'UTF-8', 'ISO-8859-1');
if (isset($currentFont)) {
$tmpText = $currentFont->decodeContent($tmpText);
$command['c'] = $tmpText;
$numText = \count($data);
for ($i = 0; $i < $numText; ++$i) {
if (0 != ($i % 2)) {
$tmpText = $data[$i]['c'];
$decodedText = isset($currentFont) ? $currentFont->decodeOctal($tmpText) : $tmpText;
$decodedText = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
['\\', '(', ')', "\n", "\r", "\t", ' '],
$decodedText = mb_convert_encoding($decodedText, 'UTF-8', 'ISO-8859-1');
if (isset($currentFont)) {
$decodedText = $currentFont->decodeContent($decodedText);
$command['c'][$i]['c'] = $decodedText;
} elseif ('Tf' == $command['o'] || 'TF' == $command['o']) {
$fontId = explode(' ', $command['c'])[0];
// If document is a FPDI/FPDF the $page has the correct font
$currentFont = isset($fpdfPage) ? $fpdfPage->getFont($fontId) : $this->getFont($fontId);
} elseif ('Q' == $command['o']) {
$currentFont = $clippedFont;
} elseif ('q' == $command['o']) {
$clippedFont = $currentFont;
return $extractedRawData;
* Gets just the Text commands that are involved in text positions and
* Text Matrix (Tm)
* It extract just the PDF commands that are involved with text positions, and
* the Text Matrix (Tm). These are: BT, ET, TL, Td, TD, Tm, T*, Tj, ', ", and TJ
* @param array $extractedDecodedRawData The data extracted by extractDecodeRawData.
* If it is null, the method extractDecodeRawData is called.
* @return array An array with the text command of the page
public function getDataCommands(array $extractedDecodedRawData = null): array
if (!isset($extractedDecodedRawData) || !$extractedDecodedRawData) {
$extractedDecodedRawData = $this->extractDecodedRawData();
$extractedData = [];
foreach ($extractedDecodedRawData as $command) {
switch ($command['o']) {
* BT
* Begin a text object, inicializind the Tm and Tlm to identity matrix
case 'BT':
$extractedData[] = $command;
* ET
* End a text object, discarding the text matrix
case 'ET':
$extractedData[] = $command;
* leading TL
* Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
* Initial value: 0
case 'TL':
$extractedData[] = $command;
* tx ty Td
* Move to the start of the next line, offset form the start of the
* current line by tx, ty.
case 'Td':
$extractedData[] = $command;
* tx ty TD
* Move to the start of the next line, offset form the start of the
* current line by tx, ty. As a side effect, this operator set the leading
* parameter in the text state. This operator has the same effect as the
* code:
* -ty TL
* tx ty Td
case 'TD':
$extractedData[] = $command;
* a b c d e f Tm
* Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
* all numbers, and the initial value for Tm and Tlm is the identity matrix
* [1 0 0 1 0 0]
case 'Tm':
$extractedData[] = $command;
* T*
* Move to the start of the next line. This operator has the same effect
* as the code:
* 0 Tl Td
* Where Tl is the current leading parameter in the text state.
case 'T*':
$extractedData[] = $command;
* string Tj
* Show a Text String
case 'Tj':
$extractedData[] = $command;
* string '
* Move to the next line and show a text string. This operator has the
* same effect as the code:
* T*
* string Tj
case "'":
$extractedData[] = $command;
* aw ac string "
* Move to the next lkine and show a text string, using aw as the word
* spacing and ac as the character spacing. This operator has the same
* effect as the code:
* aw Tw
* ac Tc
* string '
* Tw set the word spacing, Tw, to wordSpace.
* Tc Set the character spacing, Tc, to charsSpace.
case '"':
$extractedData[] = $command;
case 'Tf':
case 'TF':
$extractedData[] = $command;
* array TJ
* Show one or more text strings allow individual glyph positioning.
* Each lement of array con be a string or a number. If the element is
* a string, this operator shows the string. If it is a number, the
* operator adjust the text position by that amount; that is, it translates
* the text matrix, Tm. This amount is substracted form the current
* horizontal or vertical coordinate, depending on the writing mode.
* in the default coordinate system, a positive adjustment has the effect
* of moving the next glyph painted either to the left or down by the given
* amount.
case 'TJ':
$extractedData[] = $command;
return $extractedData;
* Gets the Text Matrix of the text in the page
* Return an array where every item is an array where the first item is the
* Text Matrix (Tm) and the second is a string with the text data. The Text matrix
* is an array of 6 numbers. The last 2 numbers are the coordinates X and Y of the
* text. The first 4 numbers has to be with Scalation, Rotation and Skew of the text.
* @param array $dataCommands the data extracted by getDataCommands
* if null getDataCommands is called
* @return array an array with the data of the page including the Tm information
* of any text in the page
public function getDataTm(array $dataCommands = null): array
if (!isset($dataCommands) || !$dataCommands) {
$dataCommands = $this->getDataCommands();
* At the beginning of a text object Tm is the identity matrix
$defaultTm = ['1', '0', '0', '1', '0', '0'];
* Set the text leading used by T*, ' and " operators
$defaultTl = 0;
* Set default values for font data
$defaultFontId = -1;
$defaultFontSize = 1;
* Indexes of horizontal/vertical scaling and X,Y-coordinates in the matrix (Tm)
$hSc = 0; // horizontal scaling
* index of vertical scaling in the array that encodes the text matrix.
* for more information:
$vSc = 3;
$x = 4;
$y = 5;
* x,y-coordinates of text space origin in user units
* These will be assigned the value of the currently printed string
$Tx = 0;
$Ty = 0;
$Tm = $defaultTm;
$Tl = $defaultTl;
$fontId = $defaultFontId;
$fontSize = $defaultFontSize; // reflects fontSize set by Tf or Tfs
$extractedTexts = $this->getTextArray();
$extractedData = [];
foreach ($dataCommands as $command) {
$currentText = $extractedTexts[\count($extractedData)];
switch ($command['o']) {
* BT
* Begin a text object, initializing the Tm and Tlm to identity matrix
case 'BT':
$Tm = $defaultTm;
$Tl = $defaultTl;
$Tx = 0;
$Ty = 0;
$fontId = $defaultFontId;
$fontSize = $defaultFontSize;
* ET
* End a text object, discarding the text matrix
case 'ET':
$Tm = $defaultTm;
$Tl = $defaultTl;
$Tx = 0;
$Ty = 0;
$fontId = $defaultFontId;
$fontSize = $defaultFontSize;
* text leading TL
* Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
* Initial value: 0
case 'TL':
// scaled text leading
$Tl = (float) $command['c'] * (float) $Tm[$vSc];
* tx ty Td
* Move to the start of the next line, offset form the start of the
* current line by tx, ty.
case 'Td':
$coord = explode(' ', $command['c']);
$Tx += (float) $coord[0] * (float) $Tm[$hSc];
$Ty += (float) $coord[1] * (float) $Tm[$vSc];
$Tm[$x] = (string) $Tx;
$Tm[$y] = (string) $Ty;
* tx ty TD
* Move to the start of the next line, offset form the start of the
* current line by tx, ty. As a side effect, this operator set the leading
* parameter in the text state. This operator has the same effect as the
* code:
* -ty TL
* tx ty Td
case 'TD':
$coord = explode(' ', $command['c']);
$Tl = -((float) $coord[1] * (float) $Tm[$vSc]);
$Tx += (float) $coord[0] * (float) $Tm[$hSc];
$Ty += (float) $coord[1] * (float) $Tm[$vSc];
$Tm[$x] = (string) $Tx;
$Tm[$y] = (string) $Ty;
* a b c d e f Tm
* Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
* all numbers, and the initial value for Tm and Tlm is the identity matrix
* [1 0 0 1 0 0]
case 'Tm':
$Tm = explode(' ', $command['c']);
$Tx = (float) $Tm[$x];
$Ty = (float) $Tm[$y];
* T*
* Move to the start of the next line. This operator has the same effect
* as the code:
* 0 Tl Td
* Where Tl is the current leading parameter in the text state.
case 'T*':
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
* string Tj
* Show a Text String
case 'Tj':
$data = [$Tm, $currentText];
if ($this->config->getDataTmFontInfoHasToBeIncluded()) {
$data[] = $fontId;
$data[] = $fontSize;
$extractedData[] = $data;
* string '
* Move to the next line and show a text string. This operator has the
* same effect as the code:
* T*
* string Tj
case "'":
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
$extractedData[] = [$Tm, $currentText];
* aw ac string "
* Move to the next line and show a text string, using aw as the word
* spacing and ac as the character spacing. This operator has the same
* effect as the code:
* aw Tw
* ac Tc
* string '
* Tw set the word spacing, Tw, to wordSpace.
* Tc Set the character spacing, Tc, to charsSpace.
case '"':
$data = explode(' ', $currentText);
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
$extractedData[] = [$Tm, $data[2]]; // Verify
case 'Tf':
* From PDF 1.0 specification, page 106:
* fontname size Tf Set font and size
* Sets the text font and text size in the graphics state. There is no default value for
* either fontname or size; they must be selected using Tf before drawing any text.
* fontname is a resource name. size is a number expressed in text space units.
* Source:
* Introduced with
list($fontId, $fontSize) = explode(' ', $command['c'], 2);
* array TJ
* Show one or more text strings allow individual glyph positioning.
* Each lement of array con be a string or a number. If the element is
* a string, this operator shows the string. If it is a number, the
* operator adjust the text position by that amount; that is, it translates
* the text matrix, Tm. This amount is substracted form the current
* horizontal or vertical coordinate, depending on the writing mode.
* in the default coordinate system, a positive adjustment has the effect
* of moving the next glyph painted either to the left or down by the given
* amount.
case 'TJ':
$data = [$Tm, $currentText];
if ($this->config->getDataTmFontInfoHasToBeIncluded()) {
$data[] = $fontId;
$data[] = $fontSize;
$extractedData[] = $data;
$this->dataTm = $extractedData;
return $extractedData;
* Gets text data that are around the given coordinates (X,Y)
* If the text is in near the given coordinates (X,Y) (or the TM info),
* the text is returned. The extractedData return by getDataTm, could be use to see
* where is the coordinates of a given text, using the TM info for it.
* @param float $x The X value of the coordinate to search for. if null
* just the Y value is considered (same Row)
* @param float $y The Y value of the coordinate to search for
* just the X value is considered (same column)
* @param float $xError The value less or more to consider an X to be "near"
* @param float $yError The value less or more to consider an Y to be "near"
* @return array An array of text that are near the given coordinates. If no text
* "near" the x,y coordinate, an empty array is returned. If Both, x
* and y coordinates are null, null is returned.
public function getTextXY(float $x = null, float $y = null, float $xError = 0, float $yError = 0): array
if (!isset($this->dataTm) || !$this->dataTm) {
if (null !== $x) {
$x = (float) $x;
if (null !== $y) {
$y = (float) $y;
if (null === $x && null === $y) {
return [];
$xError = (float) $xError;
$yError = (float) $yError;
$extractedData = [];
foreach ($this->dataTm as $item) {
$tm = $item[0];
$xTm = (float) $tm[4];
$yTm = (float) $tm[5];
$text = $item[1];
if (null === $y) {
if (($xTm >= ($x - $xError)) &&
($xTm <= ($x + $xError))) {
$extractedData[] = [$tm, $text];
if (null === $x) {
if (($yTm >= ($y - $yError)) &&
($yTm <= ($y + $yError))) {
$extractedData[] = [$tm, $text];
if (($xTm >= ($x - $xError)) &&
($xTm <= ($x + $xError)) &&
($yTm >= ($y - $yError)) &&
($yTm <= ($y + $yError))) {
$extractedData[] = [$tm, $text];
return $extractedData;

os3/PdfParser/Pages.php
@ -0,0 +1,73 @@
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
* Class Pages
class Pages extends PDFObject
* @todo Objects other than Pages or Page might need to be treated specifically in order to get Page objects out of them,
* @see
public function getPages(bool $deep = false): array
if (!$this->has('Kids')) {
return [];
/** @var ElementArray $kidsElement */
$kidsElement = $this->get('Kids');
if (!$deep) {
return $kidsElement->getContent();
$kids = $kidsElement->getContent();
$pages = [];
foreach ($kids as $kid) {
if ($kid instanceof self) {
$pages = array_merge($pages, $kid->getPages(true));
} elseif ($kid instanceof Page) {
$pages[] = $kid;
return $pages;

os3/PdfParser/Parser.php
@ -0,0 +1,327 @@
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementXRef;
use Smalot\PdfParser\RawData\RawDataParser;
* Class Parser
class Parser
* @var Config
private $config;
* @var PDFObject[]
protected $objects = [];
protected $rawDataParser;
public function __construct($cfg = [], ?Config $config = null)
$this->config = $config ?: new Config();
$this->rawDataParser = new RawDataParser($cfg, $this->config);
public function getConfig(): Config
return $this->config;
* @throws \Exception
public function parseFile(string $filename): Document
$content = file_get_contents($filename);
* 2018/06/20 @doganoo as multiple times a
* users have complained that the parseFile()
* method dies silently, it is an better option
* to remove the error control operator (@) and
* let the users know that the method throws an exception
* by adding @throws tag to PHPDoc.
* See here for an example:
return $this->parseContent($content);
* @param string $content PDF content to parse
* @throws \Exception if secured PDF file was detected
* @throws \Exception if no object list was found
public function parseContent(string $content): Document
// Create structure from raw data.
list($xref, $data) = $this->rawDataParser->parseData($content);
if (isset($xref['trailer']['encrypt'])) {
throw new \Exception('Secured pdf file are currently not supported.');
if (empty($data)) {
throw new \Exception('Object list not found. Possible secured file.');
// Create destination object.
$document = new Document();
$this->objects = [];
foreach ($data as $id => $structure) {
$this->parseObject($id, $structure, $document);
$document->setTrailer($this->parseTrailer($xref['trailer'], $document));
return $document;
protected function parseTrailer(array $structure, ?Document $document)
$trailer = [];
foreach ($structure as $name => $values) {
$name = ucfirst($name);
if (is_numeric($values)) {
$trailer[$name] = new ElementNumeric($values);
} elseif (\is_array($values)) {
$value = $this->parseTrailer($values, null);
$trailer[$name] = new ElementArray($value, null);
} elseif (false !== strpos($values, '_')) {
$trailer[$name] = new ElementXRef($values, $document);
} else {
$trailer[$name] = $this->parseHeaderElement('(', $values, $document);
return new Header($trailer, $document);
protected function parseObject(string $id, array $structure, ?Document $document)
$header = new Header([], $document);
$content = '';
foreach ($structure as $position => $part) {
if (\is_int($part)) {
$part = [null, null];
switch ($part[0]) {
case '[':
$elements = [];
foreach ($part[1] as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$elements[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
$header = new Header($elements, $document);
case '<<':
$header = $this->parseHeader($part[1], $document);
case 'stream':
$content = isset($part[3][0]) ? $part[3][0] : $part[1];
if ($header->get('Type')->equals('ObjStm')) {
$match = [];
// Split xrefs and contents.
preg_match('/^((\d+\s+\d+\s*)*)(.*)$/s', $content, $match);
$content = $match[3];
// Extract xrefs.
$xrefs = preg_split(
$table = [];
foreach ($xrefs as $xref) {
list($id, $position) = preg_split("/\s+/", trim($xref));
$table[$position] = $id;
$ids = array_values($table);
$positions = array_keys($table);
foreach ($positions as $index => $position) {
$id = $ids[$index].'_0';
$next_position = isset($positions[$index + 1]) ? $positions[$index + 1] : \strlen($content);
$sub_content = substr($content, $position, (int) $next_position - (int) $position);
$sub_header = Header::parse($sub_content, $document);
$object = PDFObject::factory($document, $sub_header, '', $this->config);
$this->objects[$id] = $object;
// It is not necessary to store this content.
if ('null' != $part) {
$element = $this->parseHeaderElement($part[0], $part[1], $document);
if ($element) {
$header = new Header([$element], $document);
if (!isset($this->objects[$id])) {
$this->objects[$id] = PDFObject::factory($document, $header, $content, $this->config);
* @throws \Exception
protected function parseHeader(array $structure, ?Document $document): Header
$elements = [];
$count = \count($structure);
for ($position = 0; $position < $count; $position += 2) {
$name = $structure[$position][1];
$type = $structure[$position + 1][0];
$value = $structure[$position + 1][1];
$elements[$name] = $this->parseHeaderElement($type, $value, $document);
return new Header($elements, $document);
* @param string|array $value
* @return Element|Header|null
* @throws \Exception
protected function parseHeaderElement(?string $type, $value, ?Document $document)
$valueIsEmpty = null == $value || '' == $value || false == $value;
if (('<<' === $type || '>>' === $type) && $valueIsEmpty) {
$value = [];
switch ($type) {
case '<<':
case '>>':
$header = $this->parseHeader($value, $document);
PDFObject::factory($document, $header, null, $this->config);
return $header;
case 'numeric':
return new ElementNumeric($value);
case 'boolean':
return new ElementBoolean($value);
case 'null':
return new ElementNull();
case '(':
if ($date = ElementDate::parse('('.$value.')', $document)) {
return $date;
return ElementString::parse('('.$value.')', $document);
case '<':
return $this->parseHeaderElement('(', ElementHexa::decode($value), $document);
case '/':
return ElementName::parse('/'.$value, $document);
case 'ojbref': // old mistake in tcpdf parser
case 'objref':
return new ElementXRef($value, $document);
case '[':
$values = [];
if (\is_array($value)) {
foreach ($value as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$values[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
return new ElementArray($values, $document);
case 'endstream':
case 'obj': // I don't know what it means but got my project fixed.
case '':
// Nothing to do with.
return null;
os3/PdfParser/RawData/FilterHelper.php

@ -0,0 +1,396 @@
* This file is based on code of tecnickcom/TCPDF PDF library.
* Original author Nicola Asuni ( and
* contributors (
* @see
* Original code was licensed on the terms of the LGPL v3.
* ------------------------------------------------------------------------------
* @file This file is part of the PdfParser library.
* @author Konrad Abicht <>
* @date 2020-01-06
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\RawData;
class FilterHelper
protected $availableFilters = ['ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode'];
* Decode data using the specified filter type.
* @param string $filter Filter name
* @param string $data Data to decode
* @return string Decoded data string
* @throws \Exception if a certain decode function is not implemented yet
public function decodeFilter(string $filter, string $data, int $decodeMemoryLimit = 0): string
switch ($filter) {
case 'ASCIIHexDecode':
return $this->decodeFilterASCIIHexDecode($data);
case 'ASCII85Decode':
return $this->decodeFilterASCII85Decode($data);
case 'LZWDecode':
return $this->decodeFilterLZWDecode($data);
case 'FlateDecode':
return $this->decodeFilterFlateDecode($data, $decodeMemoryLimit);
case 'RunLengthDecode':
return $this->decodeFilterRunLengthDecode($data);
case 'CCITTFaxDecode':
throw new \Exception('Decode CCITTFaxDecode not implemented yet.');
case 'JBIG2Decode':
throw new \Exception('Decode JBIG2Decode not implemented yet.');
case 'DCTDecode':
throw new \Exception('Decode DCTDecode not implemented yet.');
case 'JPXDecode':
throw new \Exception('Decode JPXDecode not implemented yet.');
case 'Crypt':
throw new \Exception('Decode Crypt not implemented yet.');
return $data;
* ASCIIHexDecode
* Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
* @param string $data Data to decode
* @return string data string
* @throws \Exception
protected function decodeFilterASCIIHexDecode(string $data): string
// all white-space characters shall be ignored
$data = preg_replace('/[\s]/', '', $data);
// check for EOD character: GREATER-THAN SIGN (3Eh)
$eod = strpos($data, '>');
if (false !== $eod) {
// remove EOD and extra data (if any)
$data = substr($data, 0, $eod);
$eod = true;
// get data length
$data_length = \strlen($data);
if (0 != ($data_length % 2)) {
// odd number of hexadecimal digits
if ($eod) {
// EOD shall behave as if a 0 (zero) followed the last digit
$data = substr($data, 0, -1).'0'.substr($data, -1);
} else {
throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
// check for invalid characters
if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
// get one byte of binary data for each pair of ASCII hexadecimal digits
$decoded = pack('H*', $data);
return $decoded;
* ASCII85Decode
* Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
* @param string $data Data to decode
* @return string data string
* @throws \Exception
protected function decodeFilterASCII85Decode(string $data): string
// initialize string to return
$decoded = '';
// all white-space characters shall be ignored
$data = preg_replace('/[\s]/', '', $data);
// remove start sequence 2-character sequence <~ (3Ch)(7Eh)
if (false !== strpos($data, '<~')) {
// remove EOD and extra data (if any)
$data = substr($data, 2);
// check for EOD: 2-character sequence ~> (7Eh)(3Eh)
$eod = strpos($data, '~>');
if (false !== $eod) {
// remove EOD and extra data (if any)
$data = substr($data, 0, $eod);
// data length
$data_length = \strlen($data);
// check for invalid characters
if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
throw new \Exception('decodeFilterASCII85Decode: invalid code');
// z sequence
$zseq = \chr(0).\chr(0).\chr(0).\chr(0);
// position inside a group of 4 bytes (0-3)
$group_pos = 0;
$tuple = 0;
$pow85 = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1];
// for each byte
for ($i = 0; $i < $data_length; ++$i) {
// get char value
$char = \ord($data[$i]);
if (122 == $char) { // 'z'
if (0 == $group_pos) {
$decoded .= $zseq;
} else {
throw new \Exception('decodeFilterASCII85Decode: invalid code');
} else {
// the value represented by a group of 5 characters should never be greater than 2^32 - 1
$tuple += (($char - 33) * $pow85[$group_pos]);
if (4 == $group_pos) {
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8).\chr($tuple);
$tuple = 0;
$group_pos = 0;
} else {
if ($group_pos > 1) {
$tuple += $pow85[$group_pos - 1];
// last tuple (if any)
switch ($group_pos) {
case 4:
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8);
case 3:
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16);
case 2:
$decoded .= \chr($tuple >> 24);
case 1:
throw new \Exception('decodeFilterASCII85Decode: invalid code');
return $decoded;
* FlateDecode
* Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
* @param string $data Data to decode
* @param int $decodeMemoryLimit Memory limit on deflation
* @return string data string
* @throws \Exception
protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit): ?string
* gzuncompress may throw a not catchable E_WARNING in case of an error (like $data is empty)
* the following set_error_handler changes an E_WARNING to an E_ERROR, which is catchable.
set_error_handler(function ($errNo, $errStr) {
if (\E_WARNING === $errNo) {
throw new \Exception($errStr);
} else {
// fallback to default php error handler
return false;
$decoded = null;
// initialize string to return
try {
$decoded = gzuncompress($data, $decodeMemoryLimit);
if (false === $decoded) {
throw new \Exception('decodeFilterFlateDecode: invalid code');
} catch (\Exception $e) {
throw $e;
} finally {
// Restore old handler just in case it was customized outside of PDFParser.
return $decoded;
* LZWDecode
* Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
* @param string $data Data to decode
* @return string Data string
protected function decodeFilterLZWDecode(string $data): string
// initialize string to return
$decoded = '';
// data length
$data_length = \strlen($data);
// convert string to binary string
$bitstring = '';
for ($i = 0; $i < $data_length; ++$i) {
$bitstring .= sprintf('%08b', \ord($data[$i]));
// get the number of bits
$data_length = \strlen($bitstring);
// initialize code length in bits
$bitlen = 9;
// initialize dictionary index
$dix = 258;
// initialize the dictionary (with the first 256 entries).
$dictionary = [];
for ($i = 0; $i < 256; ++$i) {
$dictionary[$i] = \chr($i);
// previous val
$prev_index = 0;
// while we encounter EOD marker (257), read code_length bits
while (($data_length > 0) && (257 != ($index = bindec(substr($bitstring, 0, $bitlen))))) {
// remove read bits from string
$bitstring = substr($bitstring, $bitlen);
// update number of bits
$data_length -= $bitlen;
if (256 == $index) { // clear-table marker
// reset code length in bits
$bitlen = 9;
// reset dictionary index
$dix = 258;
$prev_index = 256;
// reset the dictionary (with the first 256 entries).
$dictionary = [];
for ($i = 0; $i < 256; ++$i) {
$dictionary[$i] = \chr($i);
} elseif (256 == $prev_index) {
// first entry
$decoded .= $dictionary[$index];
$prev_index = $index;
} else {
// check if index exist in the dictionary
if ($index < $dix) {
// index exist on dictionary
$decoded .= $dictionary[$index];
$dic_val = $dictionary[$prev_index].$dictionary[$index][0];
// store current index
$prev_index = $index;
} else {
// index do not exist on dictionary
$dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
$decoded .= $dic_val;
// update dictionary
$dictionary[$dix] = $dic_val;
// change bit length by case
if (2047 == $dix) {
$bitlen = 12;
} elseif (1023 == $dix) {
$bitlen = 11;
} elseif (511 == $dix) {
$bitlen = 10;
return $decoded;
* RunLengthDecode
* Decompresses data encoded using a byte-oriented run-length encoding algorithm.
* @param string $data Data to decode
protected function decodeFilterRunLengthDecode(string $data): string
// initialize string to return
$decoded = '';
// data length
$data_length = \strlen($data);
$i = 0;
while ($i < $data_length) {
// get current byte value
$byte = \ord($data[$i]);
if (128 == $byte) {
// a length value of 128 denote EOD
} elseif ($byte < 128) {
// if the length byte is in the range 0 to 127
// the following length + 1 (1 to 128) bytes shall be copied literally during decompression
$decoded .= substr($data, $i + 1, $byte + 1);
// move to next block
$i += ($byte + 2);
} else {
// if length is in the range 129 to 255,
// the following single byte shall be copied 257 - length (2 to 128) times during decompression
$decoded .= str_repeat($data[$i + 1], 257 - $byte);
// move to next block
$i += 2;
return $decoded;
* @return array list of available filters
public function getAvailableFilters(): array
os3/PdfParser/RawData/RawDataParser.php

@ -0,0 +1,902 @@
* This file is based on code of tecnickcom/TCPDF PDF library.
* Original author Nicola Asuni ( and
* contributors (
* @see
* Original code was licensed on the terms of the LGPL v3.
* ------------------------------------------------------------------------------
* @file This file is part of the PdfParser library.
* @author Konrad Abicht <>
* @date 2020-01-06
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\RawData;
use Smalot\PdfParser\Config;
class RawDataParser
* @var \Smalot\PdfParser\Config
private $config;
* Configuration array.
protected $cfg = [
// if `true` ignore filter decoding errors
'ignore_filter_decoding_errors' => true,
// if `true` ignore missing filter decoding errors
'ignore_missing_filter_decoders' => true,
protected $filterHelper;
protected $objects;
* @param array $cfg Configuration array, default is []
public function __construct($cfg = [], Config $config = null)
// merge given array with default values
$this->cfg = array_merge($this->cfg, $cfg);
$this->filterHelper = new FilterHelper();
$this->config = $config ?: new Config();
* Decode the specified stream.
* @param string $pdfData PDF data
* @param array $sdic Stream's dictionary array
* @param string $stream Stream to decode
* @return array containing decoded stream data and remaining filters
* @throws \Exception
protected function decodeStream(string $pdfData, array $xref, array $sdic, string $stream): array
// get stream length and filters
$slength = \strlen($stream);
if ($slength <= 0) {
return ['', []];
$filters = [];
foreach ($sdic as $k => $v) {
if ('/' == $v[0]) {
if (('Length' == $v[1]) && (isset($sdic[$k + 1])) && ('numeric' == $sdic[$k + 1][0])) {
// get declared stream length
$declength = (int) $sdic[$k + 1][1];
if ($declength < $slength) {
$stream = substr($stream, 0, $declength);
$slength = $declength;
} elseif (('Filter' == $v[1]) && (isset($sdic[$k + 1]))) {
// resolve indirect object
$objval = $this->getObjectVal($pdfData, $xref, $sdic[$k + 1]);
if ('/' == $objval[0]) {
// single filter
$filters[] = $objval[1];
} elseif ('[' == $objval[0]) {
// array of filters
foreach ($objval[1] as $flt) {
if ('/' == $flt[0]) {
$filters[] = $flt[1];
// decode the stream
$remaining_filters = [];
foreach ($filters as $filter) {
if (\in_array($filter, $this->filterHelper->getAvailableFilters())) {
try {
$stream = $this->filterHelper->decodeFilter($filter, $stream, $this->config->getDecodeMemoryLimit());
} catch (\Exception $e) {
$emsg = $e->getMessage();
if ((('~' == $emsg[0]) && !$this->cfg['ignore_missing_filter_decoders'])
|| (('~' != $emsg[0]) && !$this->cfg['ignore_filter_decoding_errors'])
) {
throw new \Exception($e->getMessage());
} else {
// add missing filter to array
$remaining_filters[] = $filter;
return [$stream, $remaining_filters];
* Decode the Cross-Reference section
* @param string $pdfData PDF data
* @param int $startxref Offset at which the xref section starts (position of the 'xref' keyword)
* @param array $xref Previous xref array (if any)
* @return array containing xref and trailer data
* @throws \Exception
protected function decodeXref(string $pdfData, int $startxref, array $xref = []): array
$startxref += 4; // 4 is the length of the word 'xref'
// skip initial white space chars
$offset = $startxref + strspn($pdfData, $this->config->getPdfWhitespaces(), $startxref);
// initialize object number
$obj_num = 0;
// search for cross-reference entries or subsection
while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) {
if ($matches[0][1] != $offset) {
// we are on another section
$offset += \strlen($matches[0][0]);
if ('n' == $matches[3][0]) {
// create unique object index: [object number]_[generation number]
$index = $obj_num.'_'.(int) $matches[2][0];
// check if object already exist
if (!isset($xref['xref'][$index])) {
// store object offset position
$xref['xref'][$index] = (int) $matches[1][0];
} elseif ('f' == $matches[3][0]) {
} else {
// object number (index)
$obj_num = (int) $matches[1][0];
// get trailer data
if (preg_match('/trailer[\s]*<<(.*)>>/isU', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) {
$trailer_data = $matches[1][0];
if (!isset($xref['trailer']) || empty($xref['trailer'])) {
// get only the last updated version
$xref['trailer'] = [];
// parse trailer_data
if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
$xref['trailer']['size'] = (int) $matches[1];
if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['root'] = (int) $matches[1].'_'.(int) $matches[2];
if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['encrypt'] = (int) $matches[1].'_'.(int) $matches[2];
if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['info'] = (int) $matches[1].'_'.(int) $matches[2];
if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
$xref['trailer']['id'] = [];
$xref['trailer']['id'][0] = $matches[1];
$xref['trailer']['id'][1] = $matches[2];
if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
// get previous xref
$xref = $this->getXrefData($pdfData, (int) $matches[1], $xref);
} else {
throw new \Exception('Unable to find trailer');
return $xref;
* Decode the Cross-Reference Stream section
* @param string $pdfData PDF data
* @param int $startxref Offset at which the xref section starts
* @param array $xref Previous xref array (if any)
* @return array containing xref and trailer data
* @throws \Exception if unknown PNG predictor detected
protected function decodeXrefStream(string $pdfData, int $startxref, array $xref = []): array
// try to read Cross-Reference Stream
$xrefobj = $this->getRawObject($pdfData, $startxref);
$xrefcrs = $this->getIndirectObject($pdfData, $xref, $xrefobj[1], $startxref, true);
if (!isset($xref['trailer']) || empty($xref['trailer'])) {
// get only the last updated version
$xref['trailer'] = [];
$filltrailer = true;
} else {
$filltrailer = false;
if (!isset($xref['xref'])) {
$xref['xref'] = [];
$valid_crs = false;
$columns = 0;
$predictor = null;
$sarr = $xrefcrs[0][1];
if (!\is_array($sarr)) {
$sarr = [];
$wb = [];
foreach ($sarr as $k => $v) {
if (
('/' == $v[0])
&& ('Type' == $v[1])
&& (
isset($sarr[$k + 1])
&& '/' == $sarr[$k + 1][0]
&& 'XRef' == $sarr[$k + 1][1]
) {
$valid_crs = true;
} elseif (('/' == $v[0]) && ('Index' == $v[1]) && (isset($sarr[$k + 1]))) {
// initialize list for: first object number in the subsection / number of objects
$index_blocks = [];
for ($m = 0; $m < \count($sarr[$k + 1][1]); $m += 2) {
$index_blocks[] = [$sarr[$k + 1][1][$m][1], $sarr[$k + 1][1][$m + 1][1]];
} elseif (('/' == $v[0]) && ('Prev' == $v[1]) && (isset($sarr[$k + 1]) && ('numeric' == $sarr[$k + 1][0]))) {
// get previous xref offset
$prevxref = (int) $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('W' == $v[1]) && (isset($sarr[$k + 1]))) {
// number of bytes (in the decoded stream) of the corresponding field
$wb[0] = (int) $sarr[$k + 1][1][0][1];
$wb[1] = (int) $sarr[$k + 1][1][1][1];
$wb[2] = (int) $sarr[$k + 1][1][2][1];
} elseif (('/' == $v[0]) && ('DecodeParms' == $v[1]) && (isset($sarr[$k + 1][1]))) {
$decpar = $sarr[$k + 1][1];
foreach ($decpar as $kdc => $vdc) {
if (
'/' == $vdc[0]
&& 'Columns' == $vdc[1]
&& (
isset($decpar[$kdc + 1])
&& 'numeric' == $decpar[$kdc + 1][0]
) {
$columns = (int) $decpar[$kdc + 1][1];
} elseif (
'/' == $vdc[0]
&& 'Predictor' == $vdc[1]
&& (
isset($decpar[$kdc + 1])
&& 'numeric' == $decpar[$kdc + 1][0]
) {
$predictor = (int) $decpar[$kdc + 1][1];
} elseif ($filltrailer) {
if (('/' == $v[0]) && ('Size' == $v[1]) && (isset($sarr[$k + 1]) && ('numeric' == $sarr[$k + 1][0]))) {
$xref['trailer']['size'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('Root' == $v[1]) && (isset($sarr[$k + 1]) && ('objref' == $sarr[$k + 1][0]))) {
$xref['trailer']['root'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('Info' == $v[1]) && (isset($sarr[$k + 1]) && ('objref' == $sarr[$k + 1][0]))) {
$xref['trailer']['info'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('Encrypt' == $v[1]) && (isset($sarr[$k + 1]) && ('objref' == $sarr[$k + 1][0]))) {
$xref['trailer']['encrypt'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('ID' == $v[1]) && (isset($sarr[$k + 1]))) {
$xref['trailer']['id'] = [];
$xref['trailer']['id'][0] = $sarr[$k + 1][1][0][1];
$xref['trailer']['id'][1] = $sarr[$k + 1][1][1][1];
// decode data
if ($valid_crs && isset($xrefcrs[1][3][0])) {
if (null !== $predictor) {
// number of bytes in a row
$rowlen = ($columns + 1);
// convert the stream into an array of integers
/** @var array<int> */
$sdata = unpack('C*', $xrefcrs[1][3][0]);
// TODO: Handle the case when unpack returns false
// split the rows
$sdata = array_chunk($sdata, $rowlen);
// initialize decoded array
$ddata = [];
// initialize first row with zeros
$prev_row = array_fill(0, $rowlen, 0);
// for each row apply PNG unpredictor
foreach ($sdata as $k => $row) {
// initialize new row
$ddata[$k] = [];
// get PNG predictor value
$predictor = (10 + $row[0]);
// for each byte on the row
for ($i = 1; $i <= $columns; ++$i) {
// new index
$j = ($i - 1);
$row_up = $prev_row[$j];
if (1 == $i) {
$row_left = 0;
$row_upleft = 0;
} else {
$row_left = $row[$i - 1];
$row_upleft = $prev_row[$j - 1];
switch ($predictor) {
case 10: // PNG prediction (on encoding, PNG None on all rows)
$ddata[$k][$j] = $row[$i];
case 11: // PNG prediction (on encoding, PNG Sub on all rows)
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xFF);
case 12: // PNG prediction (on encoding, PNG Up on all rows)
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xFF);
case 13: // PNG prediction (on encoding, PNG Average on all rows)
$ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xFF);
case 14: // PNG prediction (on encoding, PNG Paeth on all rows)
// initial estimate
$p = ($row_left + $row_up - $row_upleft);
// distances
$pa = abs($p - $row_left);
$pb = abs($p - $row_up);
$pc = abs($p - $row_upleft);
$pmin = min($pa, $pb, $pc);
// return minimum distance
switch ($pmin) {
case $pa:
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xFF);
case $pb:
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xFF);
case $pc:
$ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xFF);
default: // PNG prediction (on encoding, PNG optimum)
throw new \Exception('Unknown PNG predictor: '.$predictor);
$prev_row = $ddata[$k];
} // end for each row
// complete decoding
} else {
// number of bytes in a row
$rowlen = array_sum($wb);
// convert the stream into an array of integers
$sdata = unpack('C*', $xrefcrs[1][3][0]);
// split the rows
$ddata = array_chunk($sdata, $rowlen);
$sdata = [];
// for every row
foreach ($ddata as $k => $row) {
// initialize new row
$sdata[$k] = [0, 0, 0];
if (0 == $wb[0]) {
// default type field
$sdata[$k][0] = 1;
$i = 0; // count bytes in the row
// for every column
for ($c = 0; $c < 3; ++$c) {
// for every byte on the column
for ($b = 0; $b < $wb[$c]; ++$b) {
if (isset($row[$i])) {
$sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
// fill xref
if (isset($index_blocks)) {
// load the first object number of the first /Index entry
$obj_num = $index_blocks[0][0];
} else {
$obj_num = 0;
foreach ($sdata as $k => $row) {
switch ($row[0]) {
case 0: // (f) linked list of free objects
case 1: // (n) objects that are in use but are not compressed
// create unique object index: [object number]_[generation number]
$index = $obj_num.'_'.$row[2];
// check if object already exist
if (!isset($xref['xref'][$index])) {
// store object offset position
$xref['xref'][$index] = $row[1];
case 2: // compressed objects
// $row[1] = object number of the object stream in which this object is stored
// $row[2] = index of this object within the object stream
$index = $row[1].'_0_'.$row[2];
$xref['xref'][$index] = -1;
default: // null objects
if (isset($index_blocks)) {
// reduce the number of remaining objects
if (0 == $index_blocks[0][1]) {
// remove the actual used /Index entry
if (0 < \count($index_blocks)) {
// load the first object number of the following /Index entry
$obj_num = $index_blocks[0][0];
} else {
// if there are no more entries, remove $index_blocks to avoid actions on an empty array
} // end decoding data
if (isset($prevxref)) {
// get previous xref
$xref = $this->getXrefData($pdfData, $prevxref, $xref);
return $xref;
protected function getObjectHeaderPattern(array $objRefs): string
// consider all whitespace character (PDF specifications)
return '/'.$objRefs[0].$this->config->getPdfWhitespacesRegex().$objRefs[1].$this->config->getPdfWhitespacesRegex().'obj/';
protected function getObjectHeaderLen(array $objRefs): int
// "4 0 obj"
// 2 whitespaces + strlen("obj") = 5
return 5 + \strlen($objRefs[0]) + \strlen($objRefs[1]);
* Get content of indirect object.
* @param string $pdfData PDF data
* @param string $objRef Object number and generation number separated by underscore character
* @param int $offset Object offset
* @param bool $decoding If true decode streams
* @return array containing object data
* @throws \Exception if invalid object reference found
protected function getIndirectObject(string $pdfData, array $xref, string $objRef, int $offset = 0, bool $decoding = true): array
* build indirect object header
// $objHeader = "[object number] [generation number] obj"
$objRefArr = explode('_', $objRef);
if (2 !== \count($objRefArr)) {
throw new \Exception('Invalid object reference for $obj.');
$objHeaderLen = $this->getObjectHeaderLen($objRefArr);
* check if we are in position
// ignore whitespace characters at offset
$offset += strspn($pdfData, $this->config->getPdfWhitespaces(), $offset);
// ignore leading zeros for object number
$offset += strspn($pdfData, '0', $offset);
if (0 == preg_match($this->getObjectHeaderPattern($objRefArr), substr($pdfData, $offset, $objHeaderLen))) {
// an indirect reference to an undefined object shall be considered a reference to the null object
return ['null', 'null', $offset];
* get content
// starting position of object content
$offset += $objHeaderLen;
$objContentArr = [];
$i = 0; // object main index
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
// decode stream using stream's dictionary information
if ($decoding && ('stream' === $element[0]) && (isset($objContentArr[$i - 1][0])) && ('<<' === $objContentArr[$i - 1][0])) {
$element[3] = $this->decodeStream($pdfData, $xref, $objContentArr[$i - 1][1], $element[1]);
$objContentArr[$i] = $element;
} while (('endobj' !== $element[0]) && ($offset !== $oldOffset));
// remove closing delimiter
* return raw object content
return $objContentArr;
* Get the content of object, resolving indirect object reference if necessary.
* @param string $pdfData PDF data
* @param array $obj Object value
* @return array containing object data
* @throws \Exception
protected function getObjectVal(string $pdfData, $xref, array $obj): array
if ('objref' == $obj[0]) {
// reference to indirect object
if (isset($this->objects[$obj[1]])) {
// this object has been already parsed
return $this->objects[$obj[1]];
} elseif (isset($xref[$obj[1]])) {
// parse new object
$this->objects[$obj[1]] = $this->getIndirectObject($pdfData, $xref, $obj[1], $xref[$obj[1]], false);
return $this->objects[$obj[1]];
return $obj;
* Get object type, raw value and offset to next object
* @param int $offset Object offset
* @return array containing object type, raw value and offset to next object
protected function getRawObject(string $pdfData, int $offset = 0): array
$objtype = ''; // object type to be returned
$objval = ''; // object value to be returned
// skip initial white space chars
$offset += strspn($pdfData, $this->config->getPdfWhitespaces(), $offset);
// get first char
$char = $pdfData[$offset];
// get object type
switch ($char) {
case '%': // \x25 PERCENT SIGN
// skip comment and search for next token
$next = strcspn($pdfData, "\r\n", $offset);
if ($next > 0) {
$offset += $next;
return $this->getRawObject($pdfData, $offset);
case '/': // \x2F SOLIDUS
// name object
$objtype = $char;
$span = strcspn($pdfData, "\x00\x09\x0a\x0c\x0d\x20\n\t\r\v\f\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25", $offset, 256);
if ($span > 0) {
$objval = substr($pdfData, $offset, $span); // unescaped value
$offset += $span;
case '(': // \x28 LEFT PARENTHESIS
case ')': // \x29 RIGHT PARENTHESIS
// literal string object
$objtype = $char;
$strpos = $offset;
if ('(' == $char) {
$open_bracket = 1;
while ($open_bracket > 0) {
if (!isset($pdfData[$strpos])) {
$ch = $pdfData[$strpos];
switch ($ch) {
case '\\': // REVERSE SOLIDUS (5Ch) (Backslash)
// skip next character
case '(': // LEFT PARENHESIS (28h)
case ')': // RIGHT PARENTHESIS (29h)
$objval = substr($pdfData, $offset, $strpos - $offset - 1);
$offset = $strpos;
case '[': // \x5B LEFT SQUARE BRACKET
case ']': // \x5D RIGHT SQUARE BRACKET
// array object
$objtype = $char;
if ('[' == $char) {
// get array content
$objval = [];
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
$objval[] = $element;
} while ((']' != $element[0]) && ($offset != $oldOffset));
// remove closing delimiter
case '<': // \x3C LESS-THAN SIGN
case '>': // \x3E GREATER-THAN SIGN
if (isset($pdfData[$offset + 1]) && ($pdfData[$offset + 1] == $char)) {
// dictionary object
$objtype = $char.$char;
$offset += 2;
if ('<' == $char) {
// get array content
$objval = [];
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
$objval[] = $element;
} while (('>>' != $element[0]) && ($offset != $oldOffset));
// remove closing delimiter
} else {
// hexadecimal string object
$objtype = $char;
$span = strspn($pdfData, "0123456789abcdefABCDEF\x09\x0a\x0c\x0d\x20", $offset);
$dataToCheck = $pdfData[$offset + $span] ?? null;
if ('<' == $char && $span > 0 && '>' == $dataToCheck) {
// remove white space characters
$objval = strtr(substr($pdfData, $offset, $span), $this->config->getPdfWhitespaces(), '');
$offset += $span + 1;
} elseif (false !== ($endpos = strpos($pdfData, '>', $offset))) {
$offset = $endpos + 1;
if ('endobj' == substr($pdfData, $offset, 6)) {
// indirect object
$objtype = 'endobj';
$offset += 6;
} elseif ('null' == substr($pdfData, $offset, 4)) {
// null object
$objtype = 'null';
$offset += 4;
$objval = 'null';
} elseif ('true' == substr($pdfData, $offset, 4)) {
// boolean true object
$objtype = 'boolean';
$offset += 4;
$objval = 'true';
} elseif ('false' == substr($pdfData, $offset, 5)) {
// boolean false object
$objtype = 'boolean';
$offset += 5;
$objval = 'false';
} elseif ('stream' == substr($pdfData, $offset, 6)) {
// start stream object
$objtype = 'stream';
$offset += 6;
if (1 == preg_match('/^([\r]?[\n])/isU', substr($pdfData, $offset, 4), $matches)) {
$offset += \strlen($matches[0]);
$pregResult = preg_match(
if (1 == $pregResult) {
$objval = substr($pdfData, $offset, $matches[0][1] - $offset);
$offset = $matches[1][1];
} elseif ('endstream' == substr($pdfData, $offset, 9)) {
// end stream object
$objtype = 'endstream';
$offset += 9;
} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($pdfData, $offset, 33), $matches)) {
// indirect object reference
$objtype = 'objref';
$offset += \strlen($matches[0]);
$objval = (int) $matches[1].'_'.(int) $matches[2];
} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($pdfData, $offset, 33), $matches)) {
// object start
$objtype = 'obj';
$objval = (int) $matches[1].'_'.(int) $matches[2];
$offset += \strlen($matches[0]);
} elseif (($numlen = strspn($pdfData, '+-.0123456789', $offset)) > 0) {
// numeric object
$objtype = 'numeric';
$objval = substr($pdfData, $offset, $numlen);
$offset += $numlen;
return [$objtype, $objval, $offset];
* Get Cross-Reference (xref) table and trailer data from PDF document data.
* @param int $offset xref offset (if known)
* @param array $xref previous xref array (if any)
* @return array containing xref and trailer data
* @throws \Exception if it was unable to find startxref
* @throws \Exception if it was unable to find xref
protected function getXrefData(string $pdfData, int $offset = 0, array $xref = []): array
$startxrefPreg = preg_match(
if (0 == $offset) {
// find last startxref
$pregResult = preg_match_all(
$pdfData, $matches,
if (0 == $pregResult) {
throw new \Exception('Unable to find startxref');
$matches = array_pop($matches);
$startxref = $matches[1];
} elseif (strpos($pdfData, 'xref', $offset) == $offset) {
// Already pointing at the xref table
$startxref = $offset;
} elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
// Cross-Reference Stream object
$startxref = $offset;
} elseif ($startxrefPreg) {
// startxref found
$startxref = $matches[1][0];
} else {
throw new \Exception('Unable to find startxref');
if ($startxref > \strlen($pdfData)) {
throw new \Exception('Unable to find xref (PDF corrupted?)');
// check xref position
if (strpos($pdfData, 'xref', $startxref) == $startxref) {
// Cross-Reference
$xref = $this->decodeXref($pdfData, $startxref, $xref);
} else {
// Cross-Reference Stream
$xref = $this->decodeXrefStream($pdfData, $startxref, $xref);
if (empty($xref)) {
throw new \Exception('Unable to find xref');
return $xref;
* Parses PDF data and returns extracted data as array.
* @param string $data PDF data to parse
* @return array array of parsed PDF document objects
* @throws \Exception if empty PDF data given
* @throws \Exception if PDF data missing %PDF header
public function parseData(string $data): array
if (empty($data)) {
throw new \Exception('Empty PDF data given.');
// find the pdf header starting position
if (false === ($trimpos = strpos($data, '%PDF-'))) {
throw new \Exception('Invalid PDF data: missing %PDF header.');
// get PDF content string
$pdfData = $trimpos > 0 ? substr($data, $trimpos) : $data;
// get xref and trailer data
$xref = $this->getXrefData($pdfData);
// parse all document objects
$objects = [];
foreach ($xref['xref'] as $obj => $offset) {
if (!isset($objects[$obj]) && ($offset > 0)) {
// decode objects with positive offset
$objects[$obj] = $this->getIndirectObject($pdfData, $xref, $obj, $offset, true);
os3/PdfParser/XObject/Form.php

@ -0,0 +1,51 @@
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\XObject;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\Page;
use Smalot\PdfParser\PDFObject;
* Class Form
class Form extends Page
public function getText(Page $page = null): string
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $this->content, $this->config);
os3/PdfParser/XObject/Image.php

@ -0,0 +1,47 @@
* @file
* This file is part of the PdfParser library.
* @author Sébastien MALOT <>
* @date 2017-01-03
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
namespace Smalot\PdfParser\XObject;
use Smalot\PdfParser\Page;
use Smalot\PdfParser\PDFObject;
* Class Image
class Image extends PDFObject
public function getText(Page $page = null): string
return '';

@ -0,0 +1,75 @@
* @file This file is part of the PdfParser library.
* @author Konrad Abicht <>
* @date 2021-02-09
* @license LGPLv3
* @url <>
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <>
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <>.
* --------------------------------------------------------------------------------------
* About:
* This file provides an alternative to the Composer-approach.
* Include it into your project and all required files of PDFParser will be loaded automatically.
* Please use it only, if Composer is not available.
* How to use:
* 1. include this file as it is OR copy and rename it as you like (and then include it)
* 2. afterwards you can use PDFParser classes
* Done.
* Loads all files found in a given folder.
* Calls itself recursively for all sub folders.
* @param string $dir
function requireFilesOfFolder($dir)
foreach (new DirectoryIterator($dir) as $fileInfo) {
if (!$fileInfo->isDot()) {
if ($fileInfo->isDir()) {
} else {
require_once $fileInfo->getPathname();
$rootFolder = __DIR__;
// Manually require files, which can't be loaded automatically that easily.
require_once $rootFolder.'/Element.php';
require_once $rootFolder.'/PDFObject.php';
require_once $rootFolder.'/Font.php';
require_once $rootFolder.'/Page.php';
require_once $rootFolder.'/Element/ElementString.php';
require_once $rootFolder.'/Encoding/AbstractEncoding.php';
* Load the rest of PDFParser files from /src/Smalot/PDFParser
* Dont worry, it wont load files multiple times.

os3/admin.php

File diff suppressed because it is too large Load diff

os3/config.ini.php
View file

@ -0,0 +1,18 @@
<?php /* **************************************************************
* Orca Search - User Configuration
/* ***** MySQL ***************************************************** */
$_DDATA['hostname'] = 'localhost';
$_DDATA['username'] = 'username';
$_DDATA['password'] = 'password';
$_DDATA['database'] = 'database';
$_DDATA['tbprefix'] = 'os_';
/* ***** Administration ******************************************** */
$_RDATA['admin_username'] = 'admin';
$_RDATA['admin_password'] = 'password';

os3/config.php
View file

@ -0,0 +1,740 @@
<?php /* **************************************************************
* Orca PHP Search - Global Configuration
$_DDATA = array();
$_RDATA = array();
require __DIR__.'/config.ini.php';
// ***** Connect to the database
$_DDATA['pdo'] = new PDO(
$err = $_DDATA['pdo']->errorInfo();
if ($err[0]) die('Fatal database connection error: '.$err[0]);
$_DDATA['pdo']->setAttribute(PDO::ATTR_EMULATE_PREPARES, false);
$_DDATA['tables'] = $_DDATA['pdo']->query(
'SHOW TABLES FROM `'.$_DDATA['database'].'` LIKE \''.$_DDATA['tbprefix'].'%\';'
$err = $_DDATA['tables']->errorInfo();
if ($err[0] == '00000') {
$_DDATA['tables'] = $_DDATA['tables']->fetchAll(PDO::FETCH_NUM);
foreach($_DDATA['tables'] as $key => $value)
$_DDATA['tables'][$key] = $value[0];
} else die('Fatal database read error: '.$err[2]);
// ***** Create the configuration table if it doesn't exist
if (!in_array($_DDATA['tbprefix'].'config', $_DDATA['tables'])) {
$create = $_DDATA['pdo']->query(
'CREATE TABLE `'.$_DDATA['tbprefix'].'config` (
`version` VARCHAR(8) NOT NULL,
`admin_email` TEXT NOT NULL,
`admin_install_domain` TINYTEXT NOT NULL,
`admin_index_pagination` SMALLINT UNSIGNED NOT NULL,
`sp_starting` TEXT NOT NULL,
`sp_limit_filesize` SMALLINT UNSIGNED NOT NULL,
`sp_ignore_ext` TEXT NOT NULL,
`sp_ignore_url` TEXT NOT NULL,
`sp_ignore_css` TEXT NOT NULL,
`sp_require_url` TEXT NOT NULL,
`sp_title_strip` TEXT NOT NULL,
`sp_timeout_crawl` SMALLINT UNSIGNED NOT NULL,
`sp_interval_start` TIME NOT NULL,
`sp_interval_stop` TIME NOT NULL,
`sp_timezone` TINYTEXT NOT NULL,
`sp_time_start` INT UNSIGNED NOT NULL,
`sp_time_end` INT UNSIGNED NOT NULL,
`sp_time_end_success` INT UNSIGNED NOT NULL,
`sp_data_transferred` INT UNSIGNED NOT NULL,
`sp_data_stored` INT UNSIGNED NOT NULL,
`sp_links_crawled` SMALLINT UNSIGNED NOT NULL,
`sp_autodelete` BOOLEAN NOT NULL,
`sp_ifmodifiedsince` BOOLEAN NOT NULL,
`sp_cookies` BOOLEAN NOT NULL,
`sp_sitemap_file` TINYTEXT NOT NULL,
`sp_sitemap_hostname` TINYTEXT NOT NULL,
`sp_useragent` TINYTEXT NOT NULL,
`sp_crawling` BOOLEAN NOT NULL,
`sp_cancel` BOOLEAN NOT NULL,
`sp_progress` TINYTEXT NOT NULL,
`sp_email_success` BOOLEAN NOT NULL,
`sp_email_failure` BOOLEAN NOT NULL,
`s_limit_term_length` TINYINT UNSIGNED NOT NULL,
`s_limit_results` TINYINT UNSIGNED NOT NULL,
`s_results_pagination` TINYINT UNSIGNED NOT NULL,
`s_limit_matchtext` SMALLINT UNSIGNED NOT NULL,
`s_weights` TINYTEXT NOT NULL,
`s_weight_css` TEXT NOT NULL,
`s_show_orphans` BOOLEAN NOT NULL,
`s_show_filetype_html` BOOLEAN NOT NULL,
`s_charset` TINYTEXT NOT NULL,
`s_result_template` TEXT NOT NULL,
`s_limit_query_log` TINYINT UNSIGNED NOT NULL,
`jw_hostname` TINYTEXT NOT NULL,
PRIMARY KEY (`version`)
) ENGINE = MyISAM, COLLATE = utf8_general_ci;'
$testConf = $_DDATA['pdo']->query(
'SELECT `version` FROM `'.$_DDATA['tbprefix'].'config`;'
$err = $testConf->errorInfo();
if ($err[0] == '00000') {
$testConf = $testConf->fetchAll();
} else die('Fatal configuration table read error: '.$err[2]);
// ***** Set default configuration table values
if (!count($testConf)) {
$insert = $_DDATA['pdo']->query(
'INSERT INTO `'.$_DDATA['tbprefix'].'config` SET
`sp_ignore_ext`=\'7z au aiff avi bin bz bz2 cab cda cdr class com css csv doc docx dll dtd dwg dxf eps exe gif hqx ico image jar jav java jfif jpeg jpg js kbd mid mkv moov mov movie mp3 mp4 mpeg mpg ocx ogg png pps ppt ps psd qt ra ram rar rm rpm rtf scr sea sit svg swf sys tar.gz tga tgz tif tiff ttf uu uue vob wav woff woff2 xls xlsx z zip\',
`sp_ignore_css`=\'.noindex footer form head nav noscript select style svg textarea\',
`sp_useragent`=\'OrcaPHPSearch/3.0 (\',
`s_weight_css`=\'.important dt h1 h2 h3\',
// ***** Create the crawldata table if it doesn't exist
if (!in_array($_DDATA['tbprefix'].'crawldata', $_DDATA['tables'])) {
$create = $_DDATA['pdo']->query(
'CREATE TABLE `'.$_DDATA['tbprefix'].'crawldata` (
`title` TEXT NOT NULL,
`description` TEXT NOT NULL,
`keywords` TEXT NOT NULL,
`weighted` TEXT NOT NULL,
`links` TEXT NOT NULL,
`content_mime` TINYTEXT NOT NULL,
`content_charset` TINYTEXT NOT NULL,
`content_checksum` BINARY(20) NOT NULL,
`status_noindex` TINYTEXT NOT NULL,
`flag_unlisted` BOOLEAN NOT NULL,
`flag_updated` BOOLEAN NOT NULL,
`last_modified` INT NOT NULL,
`priority` DECIMAL(2,1) NOT NULL,
UNIQUE `content_checksum` (`content_checksum`)
) ENGINE = MyISAM, COLLATE = utf8_general_ci;'
// ***** Create the query log table if it doesn't exist
if (!in_array($_DDATA['tbprefix'].'query', $_DDATA['tables'])) {
$create = $_DDATA['pdo']->query(
'CREATE TABLE `'.$_DDATA['tbprefix'].'query` (
) ENGINE = MyISAM, COLLATE = utf8_general_ci;'
* Generates a readable filesize string from an integer byte-count
* $abbr => Optional <abbr> tag with title attribute added
function OS_readSize($bytes, $abbr = false) {
$bytes = (int)$bytes;
if ($bytes >= 1020054733) return round(($bytes / 1073741824), 1).' '.(($abbr) ? '<abbr title="gibibytes">GiB</abbr>' : 'GiB');
if ($bytes >= 996148) return round(($bytes / 1048576), 1).' '.(($abbr) ? '<abbr title="mebibytes">MiB</abbr>' : 'MiB');
if ($bytes >= 973) return round(($bytes / 1024), 1).' '.(($abbr) ? '<abbr title="kibibytes">kiB</abbr>' : 'kiB');
if ($bytes >= 0) return $bytes.' '.(($abbr) ? '<abbr title="bytes">B</abbr>' : 'B');
return '';
* Set an $_ODATA value by updating it in the config database
function OS_setValue($columnName, $value) {
global $_ODATA, $_DDATA;
if (!isset($_ODATA[$columnName])) return 0;
$update = $_DDATA['pdo']->prepare(
'UPDATE `'.$_DDATA['tbprefix'].'config` SET `'.$columnName.'`=:value;'
$update->execute(array('value' => $value));
$err = $update->errorInfo();
if ($err[0] != '00000') {
if (isset($_SESSION['error']))
$_SESSION['error'][] = 'Could not set value \''.$columnName.'\' in config database.';
return 0;
} else if ($update->rowCount())
$_ODATA[$columnName] = $value;
return $update->rowCount();
* Get a single live $_ODATA value from the database
function OS_getValue($columnName) {
global $_ODATA, $_DDATA;
if (isset($_ODATA[$columnName])) {
$select = $_DDATA['pdo']->query(
'SELECT `'.$columnName.'` FROM `'.$_DDATA['tbprefix'].'config`;'
$err = $select->errorInfo();
if ($err[0] == '00000') {
$select = $select->fetchAll();
if (count($select))
$_ODATA[$columnName] = $select[0][$columnName];
} else if (isset($_SESSION['error']))
$_SESSION['error'][] = 'Could not get live value of \''.$columnName.'\' from config database.';
return $_ODATA[$columnName];
* Initialize a generic cURL connection
* - If creating a cURL connection fails, we should try some fallbacks
function OS_getConnection() {
global $_ODATA;
if (function_exists('curl_init')) {
$_ = curl_init();
curl_setopt($_, CURLOPT_USERAGENT, $_ODATA['sp_useragent']);
curl_setopt($_, CURLOPT_RETURNTRANSFER, true);
curl_setopt($_, CURLOPT_CONNECTTIMEOUT, $_ODATA['sp_timeout_url']);
curl_setopt($_, CURLOPT_TIMEOUT, $_ODATA['sp_timeout_url']);
curl_setopt($_, CURLOPT_ENCODING, 'gzip');
curl_setopt($_, CURLOPT_FILETIME, true);
} else {
// Fall back to file_get_contents with stream context?
// Fall back to socket connection?
$_ = false;
return $_;
// ***** Pull the configuration data from the database
$_ODATA = $_DDATA['pdo']->query(
'SELECT * FROM `'.$_DDATA['tbprefix'].'config`;'
ini_set('mbstring.substitute_character', 'none');
// ***** Determine the install domain from run location
if (!$_ODATA['admin_install_domain']) {
isset($_SERVER['HTTP_HOST']) && $_SERVER['HTTP_HOST']) {
if (isset($_SERVER['SCRIPT_URI']) && $_SERVER['SCRIPT_URI']) {
$psuri = parse_url($_SERVER['SCRIPT_URI']);
if ($psuri && isset($psuri['port']) && !is_null($psuri['port']))
$base .= ':'.$psuri['port'];
} else if (isset($_SERVER['SERVER_PORT'])) {
if ($_SERVER['SERVER_PORT'] == '80') {
if ($_SERVER['REQUEST_SCHEME'] != 'http')
$base .= ':'.$_SERVER['SERVER_PORT'];
} else if ($_SERVER['SERVER_PORT'] == '443') {
if ($_SERVER['REQUEST_SCHEME'] != 'https')
$base .= ':'.$_SERVER['SERVER_PORT'];
} else $base .= ':'.$_SERVER['SERVER_PORT'];
OS_setValue('admin_install_domain', $base);
if (!$_ODATA['sp_starting']) {
if (!$_ODATA['admin_install_domain']) {
die('Fatal error, could not determine install domain. Please run this script from a web browser.');
} else OS_setValue('sp_starting', $_ODATA['admin_install_domain'].'/');
// ***** Load and Initialize PHPMailer
if (!class_exists('PHPMailer\PHPMailer\PHPMailer')) {
if (file_exists(__DIR__.'/PHPMailer/src/PHPMailer.php')) {
include __DIR__.'/PHPMailer/src/PHPMailer.php';
include __DIR__.'/PHPMailer/src/Exception.php';
include __DIR__.'/PHPMailer/src/SMTP.php';
if (class_exists('PHPMailer\PHPMailer\PHPMailer')) {
$_MAIL = new PHPMailer\PHPMailer\PHPMailer();
$_MAIL->FromName = "Orca PHP Search Crawler";
$_MAIL->CharSet = $_ODATA['s_charset'];
if (count($ad = $_MAIL->parseAddresses($_ODATA['admin_email'])))
foreach ($ad as $a) $_MAIL->AddAddress($a['address'], $a['name']);
} else $_MAIL = false;
// ***** Load the default Search Result Template
if (!$_ODATA['s_result_template']) {
OS_setValue('s_result_template', <<<ORCAPHP
<section id="os_results">
<!-- Orca PHP Search {{version}} - HTML Template -->
Searching within category:
Showing results
<var>{{from}}</var> &ndash; <var>{{to}}</var>
of <var>{{of}}</var>
in <var>{{in}}</var> seconds
<ol start="{{from}}">
<span title="File type">{{filetype}}</span>
<a href="{{url}}" title="{{description}}">{{{title_highlight}}}</a>
<small title="Category">{{category}}</small>
<small title="Relevance">({{relevance}})</small>
<a href="?q={{request_q}}&page={{page_minus1}}">Previous</a>
<a href="?q={{request_q}}&page={{index}}">{{index}}</a>
<a href="?q={{request_q}}&page={{page_plus1}}">Next</a>
Sorry, no results were found.
Try this search in
<a href="?q={{request_q}}">all categories?</a>
Please enter your search terms below.
<li>Search terms with fewer than {{limit_term_length}} characters are ignored</li>
<li>Enclose groups of terms in quotes ("") to search for phrases</li>
<li>Prefix terms with a plus-sign (+) to make them important</li>
<li>Prefix terms with a minus-sign (-) or exclamation point (!) to exclude terms</li>
<form action="{{form_action}}" method="get">
<input type="text" name="q" value="{{request_q}}" placeholder="Search...">
<select name="c">
<option value="{{name}}"{{#selected}} selected="selected"{{/selected}}>
<button type="submit">
There are no searchable pages in the database.
Please try again later.
<a href="" target="_blank">
Orca PHP Script
// {{{{{ Initialize the Mustache templating engine
class OS_Mustache {
public $errors;
public function __construct() {}
public function addError($text) {
if (!$this->errors) {
$this->errors = new stdClass();
$this->errors->error_list = array();
$this->errors->error_list[] = $text;
// We'll only autoload the Mustache engine if we need it
public function render() {
global $_ODATA;
require __DIR__.'/Mustache/Autoloader.php';
$output = new Mustache_Engine(array('entity_flags' => ENT_QUOTES));
echo $output->render($_ODATA['s_result_template'], $this);
// Purge entries from the search query log older than
// 's_limit_query_log' ago
$deleteold = $_DDATA['pdo']->prepare(
'DELETE FROM `'.$_DDATA['tbprefix'].'query` WHERE `stamp`<:cutoff;'
$deleteold->execute(array('cutoff' => time() - $_ODATA['s_limit_query_log'] * 86400));
$err = $deleteold->errorInfo();
if ($err[0] != '00000') {
if (isset($_SESSION['error']))
$_SESSION['error'][] = 'Database error purging old records from the query log.';
// Reduce search result cache size to within limits
$_RDATA['s_cache_size'] = 0;
$_RDATA['s_cached_searches'] = 0;
$cachesize = $_DDATA['pdo']->query(
'SELECT COUNT(`cache`) AS `count`, SUM(LENGTH(`cache`)) AS `size` FROM `'.$_DDATA['tbprefix'].'query` WHERE `cache`<>\'\';'
$err = $cachesize->errorInfo();
if ($err[0] == '00000') {
$cachesize = $cachesize->fetchAll();
$_RDATA['s_cached_searches'] = $cachesize[0]['count'];
// If search result cache is over the size limit
if ($cachesize[0]['size'] > $_ODATA['s_limit_cache'] * 1024) {
$select = $_DDATA['pdo']->query(
'SELECT `query`, `cache` FROM `'.$_DDATA['tbprefix'].'query` WHERE `cache`<>\'\' ORDER BY `stamp` ASC;'
$err = $select->errorInfo();
if ($err[0] == '00000') {
// Find out how many cache entries we need to delete, sorted by
// the oldest cached search queries first
$toDel = array();
$select = $select->fetchAll();
do {
$first = array_shift($select);
$toDel[$first['query']] = strlen($first['cache']);
} while ($cachesize[0]['size'] - array_sum($toDel) > $_ODATA['s_limit_cache'] * 1024);
// Delete cache entries with the oldest `cache` values until we
// are below the cache size limit
foreach ($toDel as $del => $size) {
$update = $_DDATA['pdo']->prepare(
'UPDATE `'.$_DDATA['tbprefix'].'query` SET `cache`=\'\' WHERE `query`=:query;'
$update->execute(array('query' => $del));
if (!$update->rowCount()) {
if (isset($_SESSION['error']))
$_SESSION['error'][] = 'Database error while limiting the search result cache size.';
} else {
$cachesize[0]['size'] -= $size;
} else if (isset($_SESSION['error']))
$_SESSION['error'][] = 'Could not read from search result cache.';
$_RDATA['s_cache_size'] = $cachesize[0]['size'];
} else if (isset($_SESSION['error']))
$_SESSION['error'][] = 'Could not read search result cache size.';
// Get a list of all categories in the search database
$_RDATA['s_category_list'] = array('<none>' => 0);
$_RDATA['s_pages_stored'] = 0;
$categories = $_DDATA['pdo']->query(
'SELECT `category`, COUNT(`category`) AS `count`
FROM `'.$_DDATA['tbprefix'].'crawldata`
GROUP BY `category` ORDER BY `category`;'
$err = $categories->errorInfo();
if ($err[0] == '00000') {
$categories = $categories->fetchAll();
foreach ($categories as $category) {
$_RDATA['s_category_list'][$category['category']] = $category['count'];
$_RDATA['s_pages_stored'] += $category['count'];
} else if (isset($_SESSION['error']))
$_SESSION['error'][] = 'Could not read categories from the search database.';
// Count base URLs / domains from the crawldata: if there is only one
// in the search database then we don't have to show it in a number of
// places
$_RDATA['s_crawldata_domains'] = array();
$domains = $_DDATA['pdo']->query(
'SELECT `url_base`, COUNT(`url_base`) as `count`
FROM `'.$_DDATA['tbprefix'].'crawldata`
GROUP BY `url_base` ORDER BY `count` DESC;'
$err = $domains->errorInfo();
if ($err[0] == '00000') {
$domains = $domains->fetchAll();
foreach ($domains as $domain)
$_RDATA['s_crawldata_domains'][$domain['url_base']] = $domain['count'];
} else $_SESSION['error'][] = 'Could not read domain count data from search database.';
if (count($_RDATA['s_crawldata_domains']) == 1)
OS_setValue('jw_hostname', key($_RDATA['s_crawldata_domains']));
// Count searchable pages
$_RDATA['s_searchable_pages'] = 0;
$query_status = ($_ODATA['s_show_orphans']) ? '(`status`=\'OK\' || `status`=\'Orphan\')' : '`status`=\'OK\'';
$searchable = $_DDATA['pdo']->query(
'SELECT COUNT(`status`) as `count`
FROM `'.$_DDATA['tbprefix'].'crawldata`
WHERE '.$query_status.' AND `flag_unlisted`=0;'
$err = $searchable->errorInfo();
if ($err[0] == '00000') {
$searchable = $searchable->fetchAll();
$_RDATA['s_searchable_pages'] = $searchable[0]['count'];
} else $_SESSION['error'][] = 'Could not read status data from search database.';
// Match Weighting Values
$weights = explode('%', $_ODATA['s_weights']);
$_RDATA['s_weights'] = array(
'title' => $weights[0],
'body' => $weights[1],
'keywords' => $weights[2],
'description' => $weights[3],
'css_value' => $weights[4],
'url' => $weights[5],
'multi' => $weights[6],
'important' => $weights[7]
$_RDATA['sp_smart'] = array(
'' => '\'',
'' => '\'',
'“' => '"',
'”' => '"',
'‟' => '"',
'„' => '"',
'…' => '...',
'·' => '•',
'' => '>',
'‖' => '|'
$_RDATA['s_latin'] = array(
'a' => array('á', 'Á', 'à', 'À', 'â', 'Â', 'ä', 'Ä', 'ã', 'Ã', 'å', 'Å', 'ą', 'Ą', 'ă', 'Ă'),
'ae' => array('æ', 'Æ'),
'c' => array('ç', 'Ç', 'ć', 'Ć', 'č', 'Č'),
'd' => array('ð', 'Ð', 'ď', 'Ď', 'đ', 'Đ'),
'e' => array('é', 'É', 'è', 'È', 'ê', 'Ê', 'ë', 'Ë', 'ę', 'Ę', 'ě', 'Ě'),
'g' => array('ğ', 'Ğ'),
'i' => array('í', 'Í', 'ì', 'Ì', 'î', 'Î', 'ï', 'Ï', 'ı', 'İ'),
'l' => array('ł', 'Ł', 'ľ', 'Ľ', 'ĺ', 'Ĺ'),
'n' => array('ñ', 'Ñ', 'ń', 'Ń', 'ň', 'Ň'),
'o' => array('ó', 'Ó', 'ò', 'Ò', 'ô', 'Ô', 'ö', 'Ö', 'õ', 'Õ', 'ø', 'Ø', 'ő', 'Ő'),
'oe' => array('œ', 'Œ'),
'r' => array('ŕ', 'Ŕ', 'ř', 'Ř'),
's' => array('ş', 'Ş', 'ś', 'Ś', 'š', 'Š'),
'sz' => array('ß'),
't' => array('ť', 'Ť', 'ţ', 'Ţ'),
'th' => array('þ', 'Þ'),
'u' => array('ú', 'Ú', 'ù', 'Ù', 'û', 'Û', 'ü', 'Ü', 'ů', 'Ů', 'ű', 'Ű'),
'x' => array('×'),
'y' => array('ý', 'Ý', 'ÿ', 'Ÿ'),
'z' => array('ź', 'Ź', 'ž', 'Ž', 'ż', 'Ż'),
'?' => array('¿')
$_RDATA['s_filetypes'] = array(
'PDF' => array('application/pdf'),
'JPG' => array('image/jpeg'),
'HTML' => array('text/html', 'application/xhtml+xml'),
'XML' => array('text/xml', 'application/xml'),
'TXT' => array('text/plain')
$_SERVER['REQUEST_URI'] = preg_replace('/\?.*$/', '', $_SERVER['REQUEST_URI']);
$_RDATA['x_generated_by'] = 'X-Generated-By: Orca PHP Search/'.$_ODATA['version'];
// ***** Prevent caching of these pages
header('Expires: Mon, 26 Jul 1997 05:00:00 GMT');
header('Last-Modified: '.gmdate('D, d M Y H:i:s').' GMT');
header('Cache-Control: no-store, no-cache, must-revalidate');
header('Cache-Control: post-check=0, pre-check=0', false);
header('Pragma: no-cache'); ?>

os3/crawler.php

File diff suppressed because it is too large Load diff

os3/css/admin.css
View file

@ -0,0 +1,78 @@
progress {
progress::-webkit-progress-bar {
progress::-moz-progress-bar {
background-color:lightblue !important;
progress::-webkit-progress-value {
background-color:lightblue !important;
progress::after {
#crawlerModal .crawl-log-prev,
#crawlerModal .modal-footer button,
#crawlerModal.crawler-log .crawl-controls,
#crawlerModal.crawler-log .modal-footer p,
#crawlerModal.crawler-log .crawl-progress,
#crawlerModal.crawler-log .crawl-log {
#crawlerModal.crawler-log .modal-footer button,
#crawlerModal.crawler-log .crawl-log-prev {
input[type="number"] {
.mw-90 {
.mw-10em {
.table-fixed {
.text-ellipsis {
img.svg-icon {
img.svg-icon-sm {
img.svg-icon-flag {
#os_queries_thead > tr > th > img {
#os_queries_thead > tr > th.os_sorting > img {
#os_queries_thead > tr > th.os_asc > img {

os3/css/bootstrap.min.css vendored

os3/css/search.css
View file

@ -0,0 +1,36 @@
#os_results ol > li {
#os_results ol > li > header > span,
#os_results ol > li > header > a {
#os_results ol > li > blockquote,
#os_results ol > li > blockquote > p{
margin:0.2em 0;
#os_results ol > li > footer > cite {
#os_results ol > li *:empty {
#os_results nav > ul {
#os_results nav > ul li {
#os_results > form {
#os_results > footer {

os3/geoip2/README.txt
View file

@ -0,0 +1,14 @@
To enable the Geo-Location service, follow the steps below:
1) Download the latest Maxmind GeoIP2 .phar file from the Github, and
place it in the same directory as this README.txt file:
2) Login at the Maxmind website; account registration is free:
3) Navigate to the "Downloads" area of your Maxmind account, and
download the GeoLite Country (not CSV) GZIP package.
4) Unzip the 'GeoLite2-Country.mmdb' file and place it in the same
directory as this README.txt file.

os3/img/arrow-down.svg
View file

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="" viewBox="0 0 16 16">
<defs><style>.cls-1 { fill: none; stroke: #000; stroke-linecap: round; stroke-linejoin: round; stroke-width: 2px; }</style></defs>
<path class="cls-1" d="m2,5l6,6,6-6"/>


Width:  |  Height:  |  Size: 280 B

os3/img/clock.svg
View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="" viewBox="0 0 800 800">
<defs><style>.cls-1 { fill: none; stroke: #000; stroke-linecap: round; stroke-linejoin: round; stroke-width: 66.67px; }</style></defs>
<polyline class="cls-1" points="396.67 196.67 396.67 396.67 400 400 470 470"/>
<circle id="Circle-2" class="cls-1" cx="400" cy="400" r="333.33"/>


Width:  |  Height:  |  Size: 395 B

os3/img/flags/README.txt
View file

@ -0,0 +1,3 @@
Flag images from

os3/img/flags/ad.png

Binary file not shown.


Width:  |  Height:  |  Size: 384 B

os3/img/flags/ae.png

Binary file not shown.


Width:  |  Height:  |  Size: 122 B

os3/img/flags/af.png

Binary file not shown.


Width:  |  Height:  |  Size: 516 B

Some files were not shown because too many files have changed in this diff Show more