Delete all included repos for reupload

This commit is contained in:
Brian Huisman 2023-04-20 10:20:38 -04:00
parent 358fa42aee
commit 4f459b61d2
83 changed files with 0 additions and 20089 deletions

View file

@ -1,8 +0,0 @@
# Maxmind GeoIP2 Geolocation
To enable the Geolocation service for items in the Query Log, follow the steps below:
1. Download the latest Maxmind GeoIP2 .phar file from the Github, and place it in the same directory as this README file: https://github.com/maxmind/GeoIP2-php/releases
2. Login at the Maxmind website; account registration is free: https://www.maxmind.com/en/account/login
3. Navigate to the "Downloads" area of your Maxmind account, and download the GeoLite Country (not CSV) GZIP package.
4. Unzip the 'GeoLite2-Country.mmdb' file and place it in the same directory as this README.txt file.

View file

@ -1,88 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache class autoloader.
*/
class Mustache_Autoloader
{
private $baseDir;
/**
* An array where the key is the baseDir and the key is an instance of this
* class.
*
* @var array
*/
private static $instances;
/**
* Autoloader constructor.
*
* @param string $baseDir Mustache library base directory (default: dirname(__FILE__).'/..')
*/
public function __construct($baseDir = null)
{
if ($baseDir === null) {
$baseDir = dirname(__FILE__) . '/..';
}
// realpath doesn't always work, for example, with stream URIs
$realDir = realpath($baseDir);
if (is_dir($realDir)) {
$this->baseDir = $realDir;
} else {
$this->baseDir = $baseDir;
}
}
/**
* Register a new instance as an SPL autoloader.
*
* @param string $baseDir Mustache library base directory (default: dirname(__FILE__).'/..')
*
* @return Mustache_Autoloader Registered Autoloader instance
*/
public static function register($baseDir = null)
{
$key = $baseDir ? $baseDir : 0;
if (!isset(self::$instances[$key])) {
self::$instances[$key] = new self($baseDir);
}
$loader = self::$instances[$key];
spl_autoload_register(array($loader, 'autoload'));
return $loader;
}
/**
* Autoload Mustache classes.
*
* @param string $class
*/
public function autoload($class)
{
if ($class[0] === '\\') {
$class = substr($class, 1);
}
if (strpos($class, 'Mustache') !== 0) {
return;
}
$file = sprintf('%s/%s.php', $this->baseDir, str_replace('_', '/', $class));
if (is_file($file)) {
require $file;
}
}
}

View file

@ -1,43 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Cache interface.
*
* Interface for caching and loading Mustache_Template classes
* generated by the Mustache_Compiler.
*/
interface Mustache_Cache
{
/**
* Load a compiled Mustache_Template class from cache.
*
* @param string $key
*
* @return bool indicates successfully class load
*/
public function load($key);
/**
* Cache and load a compiled Mustache_Template class.
*
* @param string $key
* @param string $value
*/
public function cache($key, $value);
/**
* Set a logger instance.
*
* @param Mustache_Logger|Psr\Log\LoggerInterface $logger
*/
public function setLogger($logger = null);
}

View file

@ -1,60 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Abstract Mustache Cache class.
*
* Provides logging support to child implementations.
*
* @abstract
*/
abstract class Mustache_Cache_AbstractCache implements Mustache_Cache
{
private $logger = null;
/**
* Get the current logger instance.
*
* @return Mustache_Logger|Psr\Log\LoggerInterface
*/
public function getLogger()
{
return $this->logger;
}
/**
* Set a logger instance.
*
* @param Mustache_Logger|Psr\Log\LoggerInterface $logger
*/
public function setLogger($logger = null)
{
if ($logger !== null && !($logger instanceof Mustache_Logger || is_a($logger, 'Psr\\Log\\LoggerInterface'))) {
throw new Mustache_Exception_InvalidArgumentException('Expected an instance of Mustache_Logger or Psr\\Log\\LoggerInterface.');
}
$this->logger = $logger;
}
/**
* Add a log record if logging is enabled.
*
* @param string $level The logging level
* @param string $message The log message
* @param array $context The log context
*/
protected function log($level, $message, array $context = array())
{
if (isset($this->logger)) {
$this->logger->log($level, $message, $context);
}
}
}

View file

@ -1,161 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Cache filesystem implementation.
*
* A FilesystemCache instance caches Mustache Template classes from the filesystem by name:
*
* $cache = new Mustache_Cache_FilesystemCache(dirname(__FILE__).'/cache');
* $cache->cache($className, $compiledSource);
*
* The FilesystemCache benefits from any opcode caching that may be setup in your environment. So do that, k?
*/
class Mustache_Cache_FilesystemCache extends Mustache_Cache_AbstractCache
{
private $baseDir;
private $fileMode;
/**
* Filesystem cache constructor.
*
* @param string $baseDir Directory for compiled templates
* @param int $fileMode Override default permissions for cache files. Defaults to using the system-defined umask
*/
public function __construct($baseDir, $fileMode = null)
{
$this->baseDir = $baseDir;
$this->fileMode = $fileMode;
}
/**
* Load the class from cache using `require_once`.
*
* @param string $key
*
* @return bool
*/
public function load($key)
{
$fileName = $this->getCacheFilename($key);
if (!is_file($fileName)) {
return false;
}
require_once $fileName;
return true;
}
/**
* Cache and load the compiled class.
*
* @param string $key
* @param string $value
*/
public function cache($key, $value)
{
$fileName = $this->getCacheFilename($key);
$this->log(
Mustache_Logger::DEBUG,
'Writing to template cache: "{fileName}"',
array('fileName' => $fileName)
);
$this->writeFile($fileName, $value);
$this->load($key);
}
/**
* Build the cache filename.
* Subclasses should override for custom cache directory structures.
*
* @param string $name
*
* @return string
*/
protected function getCacheFilename($name)
{
return sprintf('%s/%s.php', $this->baseDir, $name);
}
/**
* Create cache directory.
*
* @throws Mustache_Exception_RuntimeException If unable to create directory
*
* @param string $fileName
*
* @return string
*/
private function buildDirectoryForFilename($fileName)
{
$dirName = dirname($fileName);
if (!is_dir($dirName)) {
$this->log(
Mustache_Logger::INFO,
'Creating Mustache template cache directory: "{dirName}"',
array('dirName' => $dirName)
);
@mkdir($dirName, 0777, true);
// @codeCoverageIgnoreStart
if (!is_dir($dirName)) {
throw new Mustache_Exception_RuntimeException(sprintf('Failed to create cache directory "%s".', $dirName));
}
// @codeCoverageIgnoreEnd
}
return $dirName;
}
/**
* Write cache file.
*
* @throws Mustache_Exception_RuntimeException If unable to write file
*
* @param string $fileName
* @param string $value
*/
private function writeFile($fileName, $value)
{
$dirName = $this->buildDirectoryForFilename($fileName);
$this->log(
Mustache_Logger::DEBUG,
'Caching compiled template to "{fileName}"',
array('fileName' => $fileName)
);
$tempFile = tempnam($dirName, basename($fileName));
if (false !== @file_put_contents($tempFile, $value)) {
if (@rename($tempFile, $fileName)) {
$mode = isset($this->fileMode) ? $this->fileMode : (0666 & ~umask());
@chmod($fileName, $mode);
return;
}
// @codeCoverageIgnoreStart
$this->log(
Mustache_Logger::ERROR,
'Unable to rename Mustache temp cache file: "{tempName}" -> "{fileName}"',
array('tempName' => $tempFile, 'fileName' => $fileName)
);
// @codeCoverageIgnoreEnd
}
// @codeCoverageIgnoreStart
throw new Mustache_Exception_RuntimeException(sprintf('Failed to write cache file "%s".', $fileName));
// @codeCoverageIgnoreEnd
}
}

View file

@ -1,47 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Cache in-memory implementation.
*
* The in-memory cache is used for uncached lambda section templates. It's also useful during development, but is not
* recommended for production use.
*/
class Mustache_Cache_NoopCache extends Mustache_Cache_AbstractCache
{
/**
* Loads nothing. Move along.
*
* @param string $key
*
* @return bool
*/
public function load($key)
{
return false;
}
/**
* Loads the compiled Mustache Template class without caching.
*
* @param string $key
* @param string $value
*/
public function cache($key, $value)
{
$this->log(
Mustache_Logger::WARNING,
'Template cache disabled, evaluating "{className}" class at runtime',
array('className' => $key)
);
eval('?>' . $value);
}
}

View file

@ -1,718 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Compiler class.
*
* This class is responsible for turning a Mustache token parse tree into normal PHP source code.
*/
class Mustache_Compiler
{
private $pragmas;
private $defaultPragmas = array();
private $sections;
private $blocks;
private $source;
private $indentNextLine;
private $customEscape;
private $entityFlags;
private $charset;
private $strictCallables;
/**
* Compile a Mustache token parse tree into PHP source code.
*
* @param string $source Mustache Template source code
* @param string $tree Parse tree of Mustache tokens
* @param string $name Mustache Template class name
* @param bool $customEscape (default: false)
* @param string $charset (default: 'UTF-8')
* @param bool $strictCallables (default: false)
* @param int $entityFlags (default: ENT_COMPAT)
*
* @return string Generated PHP source code
*/
public function compile($source, array $tree, $name, $customEscape = false, $charset = 'UTF-8', $strictCallables = false, $entityFlags = ENT_COMPAT)
{
$this->pragmas = $this->defaultPragmas;
$this->sections = array();
$this->blocks = array();
$this->source = $source;
$this->indentNextLine = true;
$this->customEscape = $customEscape;
$this->entityFlags = $entityFlags;
$this->charset = $charset;
$this->strictCallables = $strictCallables;
return $this->writeCode($tree, $name);
}
/**
* Enable pragmas across all templates, regardless of the presence of pragma
* tags in the individual templates.
*
* @internal Users should set global pragmas in Mustache_Engine, not here :)
*
* @param string[] $pragmas
*/
public function setPragmas(array $pragmas)
{
$this->pragmas = array();
foreach ($pragmas as $pragma) {
$this->pragmas[$pragma] = true;
}
$this->defaultPragmas = $this->pragmas;
}
/**
* Helper function for walking the Mustache token parse tree.
*
* @throws Mustache_Exception_SyntaxException upon encountering unknown token types
*
* @param array $tree Parse tree of Mustache tokens
* @param int $level (default: 0)
*
* @return string Generated PHP source code
*/
private function walk(array $tree, $level = 0)
{
$code = '';
$level++;
foreach ($tree as $node) {
switch ($node[Mustache_Tokenizer::TYPE]) {
case Mustache_Tokenizer::T_PRAGMA:
$this->pragmas[$node[Mustache_Tokenizer::NAME]] = true;
break;
case Mustache_Tokenizer::T_SECTION:
$code .= $this->section(
$node[Mustache_Tokenizer::NODES],
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::FILTERS]) ? $node[Mustache_Tokenizer::FILTERS] : array(),
$node[Mustache_Tokenizer::INDEX],
$node[Mustache_Tokenizer::END],
$node[Mustache_Tokenizer::OTAG],
$node[Mustache_Tokenizer::CTAG],
$level
);
break;
case Mustache_Tokenizer::T_INVERTED:
$code .= $this->invertedSection(
$node[Mustache_Tokenizer::NODES],
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::FILTERS]) ? $node[Mustache_Tokenizer::FILTERS] : array(),
$level
);
break;
case Mustache_Tokenizer::T_PARTIAL:
$code .= $this->partial(
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::DYNAMIC]) ? $node[Mustache_Tokenizer::DYNAMIC] : false,
isset($node[Mustache_Tokenizer::INDENT]) ? $node[Mustache_Tokenizer::INDENT] : '',
$level
);
break;
case Mustache_Tokenizer::T_PARENT:
$code .= $this->parent(
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::DYNAMIC]) ? $node[Mustache_Tokenizer::DYNAMIC] : false,
isset($node[Mustache_Tokenizer::INDENT]) ? $node[Mustache_Tokenizer::INDENT] : '',
$node[Mustache_Tokenizer::NODES],
$level
);
break;
case Mustache_Tokenizer::T_BLOCK_ARG:
$code .= $this->blockArg(
$node[Mustache_Tokenizer::NODES],
$node[Mustache_Tokenizer::NAME],
$node[Mustache_Tokenizer::INDEX],
$node[Mustache_Tokenizer::END],
$node[Mustache_Tokenizer::OTAG],
$node[Mustache_Tokenizer::CTAG],
$level
);
break;
case Mustache_Tokenizer::T_BLOCK_VAR:
$code .= $this->blockVar(
$node[Mustache_Tokenizer::NODES],
$node[Mustache_Tokenizer::NAME],
$node[Mustache_Tokenizer::INDEX],
$node[Mustache_Tokenizer::END],
$node[Mustache_Tokenizer::OTAG],
$node[Mustache_Tokenizer::CTAG],
$level
);
break;
case Mustache_Tokenizer::T_COMMENT:
break;
case Mustache_Tokenizer::T_ESCAPED:
case Mustache_Tokenizer::T_UNESCAPED:
case Mustache_Tokenizer::T_UNESCAPED_2:
$code .= $this->variable(
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::FILTERS]) ? $node[Mustache_Tokenizer::FILTERS] : array(),
$node[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_ESCAPED,
$level
);
break;
case Mustache_Tokenizer::T_TEXT:
$code .= $this->text($node[Mustache_Tokenizer::VALUE], $level);
break;
default:
throw new Mustache_Exception_SyntaxException(sprintf('Unknown token type: %s', $node[Mustache_Tokenizer::TYPE]), $node);
}
}
return $code;
}
const KLASS = '<?php
class %s extends Mustache_Template
{
private $lambdaHelper;%s
public function renderInternal(Mustache_Context $context, $indent = \'\')
{
$this->lambdaHelper = new Mustache_LambdaHelper($this->mustache, $context);
$buffer = \'\';
%s
return $buffer;
}
%s
%s
}';
const KLASS_NO_LAMBDAS = '<?php
class %s extends Mustache_Template
{%s
public function renderInternal(Mustache_Context $context, $indent = \'\')
{
$buffer = \'\';
%s
return $buffer;
}
}';
const STRICT_CALLABLE = 'protected $strictCallables = true;';
/**
* Generate Mustache Template class PHP source.
*
* @param array $tree Parse tree of Mustache tokens
* @param string $name Mustache Template class name
*
* @return string Generated PHP source code
*/
private function writeCode($tree, $name)
{
$code = $this->walk($tree);
$sections = implode("\n", $this->sections);
$blocks = implode("\n", $this->blocks);
$klass = empty($this->sections) && empty($this->blocks) ? self::KLASS_NO_LAMBDAS : self::KLASS;
$callable = $this->strictCallables ? $this->prepare(self::STRICT_CALLABLE) : '';
return sprintf($this->prepare($klass, 0, false, true), $name, $callable, $code, $sections, $blocks);
}
const BLOCK_VAR = '
$blockFunction = $context->findInBlock(%s);
if (is_callable($blockFunction)) {
$buffer .= call_user_func($blockFunction, $context);
%s}
';
const BLOCK_VAR_ELSE = '} else {%s';
/**
* Generate Mustache Template inheritance block variable PHP source.
*
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param int $start Section start offset
* @param int $end Section end offset
* @param string $otag Current Mustache opening tag
* @param string $ctag Current Mustache closing tag
* @param int $level
*
* @return string Generated PHP source code
*/
private function blockVar($nodes, $id, $start, $end, $otag, $ctag, $level)
{
$id = var_export($id, true);
$else = $this->walk($nodes, $level);
if ($else !== '') {
$else = sprintf($this->prepare(self::BLOCK_VAR_ELSE, $level + 1, false, true), $else);
}
return sprintf($this->prepare(self::BLOCK_VAR, $level), $id, $else);
}
const BLOCK_ARG = '%s => array($this, \'block%s\'),';
/**
* Generate Mustache Template inheritance block argument PHP source.
*
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param int $start Section start offset
* @param int $end Section end offset
* @param string $otag Current Mustache opening tag
* @param string $ctag Current Mustache closing tag
* @param int $level
*
* @return string Generated PHP source code
*/
private function blockArg($nodes, $id, $start, $end, $otag, $ctag, $level)
{
$key = $this->block($nodes);
$id = var_export($id, true);
return sprintf($this->prepare(self::BLOCK_ARG, $level), $id, $key);
}
const BLOCK_FUNCTION = '
public function block%s($context)
{
$indent = $buffer = \'\';%s
return $buffer;
}
';
/**
* Generate Mustache Template inheritance block function PHP source.
*
* @param array $nodes Array of child tokens
*
* @return string key of new block function
*/
private function block($nodes)
{
$code = $this->walk($nodes, 0);
$key = ucfirst(md5($code));
if (!isset($this->blocks[$key])) {
$this->blocks[$key] = sprintf($this->prepare(self::BLOCK_FUNCTION, 0), $key, $code);
}
return $key;
}
const SECTION_CALL = '
$value = $context->%s(%s);%s
$buffer .= $this->section%s($context, $indent, $value);
';
const SECTION = '
private function section%s(Mustache_Context $context, $indent, $value)
{
$buffer = \'\';
if (%s) {
$source = %s;
$result = (string) call_user_func($value, $source, %s);
if (strpos($result, \'{{\') === false) {
$buffer .= $result;
} else {
$buffer .= $this->mustache
->loadLambda($result%s)
->renderInternal($context);
}
} elseif (!empty($value)) {
$values = $this->isIterable($value) ? $value : array($value);
foreach ($values as $value) {
$context->push($value);
%s
$context->pop();
}
}
return $buffer;
}
';
/**
* Generate Mustache Template section PHP source.
*
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param string[] $filters Array of filters
* @param int $start Section start offset
* @param int $end Section end offset
* @param string $otag Current Mustache opening tag
* @param string $ctag Current Mustache closing tag
* @param int $level
*
* @return string Generated section PHP source code
*/
private function section($nodes, $id, $filters, $start, $end, $otag, $ctag, $level)
{
$source = var_export(substr($this->source, $start, $end - $start), true);
$callable = $this->getCallable();
if ($otag !== '{{' || $ctag !== '}}') {
$delimTag = var_export(sprintf('{{= %s %s =}}', $otag, $ctag), true);
$helper = sprintf('$this->lambdaHelper->withDelimiters(%s)', $delimTag);
$delims = ', ' . $delimTag;
} else {
$helper = '$this->lambdaHelper';
$delims = '';
}
$key = ucfirst(md5($delims . "\n" . $source));
if (!isset($this->sections[$key])) {
$this->sections[$key] = sprintf($this->prepare(self::SECTION), $key, $callable, $source, $helper, $delims, $this->walk($nodes, 2));
}
$method = $this->getFindMethod($id);
$id = var_export($id, true);
$filters = $this->getFilters($filters, $level);
return sprintf($this->prepare(self::SECTION_CALL, $level), $method, $id, $filters, $key);
}
const INVERTED_SECTION = '
$value = $context->%s(%s);%s
if (empty($value)) {
%s
}
';
/**
* Generate Mustache Template inverted section PHP source.
*
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param string[] $filters Array of filters
* @param int $level
*
* @return string Generated inverted section PHP source code
*/
private function invertedSection($nodes, $id, $filters, $level)
{
$method = $this->getFindMethod($id);
$id = var_export($id, true);
$filters = $this->getFilters($filters, $level);
return sprintf($this->prepare(self::INVERTED_SECTION, $level), $method, $id, $filters, $this->walk($nodes, $level));
}
const DYNAMIC_NAME = '$this->resolveValue($context->%s(%s), $context)';
/**
* Generate Mustache Template dynamic name resolution PHP source.
*
* @param string $id Tag name
* @param bool $dynamic True if the name is dynamic
*
* @return string Dynamic name resolution PHP source code
*/
private function resolveDynamicName($id, $dynamic)
{
if (!$dynamic) {
return var_export($id, true);
}
$method = $this->getFindMethod($id);
$id = ($method !== 'last') ? var_export($id, true) : '';
// TODO: filters?
return sprintf(self::DYNAMIC_NAME, $method, $id);
}
const PARTIAL_INDENT = ', $indent . %s';
const PARTIAL = '
if ($partial = $this->mustache->loadPartial(%s)) {
$buffer .= $partial->renderInternal($context%s);
}
';
/**
* Generate Mustache Template partial call PHP source.
*
* @param string $id Partial name
* @param bool $dynamic Partial name is dynamic
* @param string $indent Whitespace indent to apply to partial
* @param int $level
*
* @return string Generated partial call PHP source code
*/
private function partial($id, $dynamic, $indent, $level)
{
if ($indent !== '') {
$indentParam = sprintf(self::PARTIAL_INDENT, var_export($indent, true));
} else {
$indentParam = '';
}
return sprintf(
$this->prepare(self::PARTIAL, $level),
$this->resolveDynamicName($id, $dynamic),
$indentParam
);
}
const PARENT = '
if ($parent = $this->mustache->loadPartial(%s)) {
$context->pushBlockContext(array(%s
));
$buffer .= $parent->renderInternal($context, $indent);
$context->popBlockContext();
}
';
const PARENT_NO_CONTEXT = '
if ($parent = $this->mustache->loadPartial(%s)) {
$buffer .= $parent->renderInternal($context, $indent);
}
';
/**
* Generate Mustache Template inheritance parent call PHP source.
*
* @param string $id Parent tag name
* @param bool $dynamic Tag name is dynamic
* @param string $indent Whitespace indent to apply to parent
* @param array $children Child nodes
* @param int $level
*
* @return string Generated PHP source code
*/
private function parent($id, $dynamic, $indent, array $children, $level)
{
$realChildren = array_filter($children, array(__CLASS__, 'onlyBlockArgs'));
$partialName = $this->resolveDynamicName($id, $dynamic);
if (empty($realChildren)) {
return sprintf($this->prepare(self::PARENT_NO_CONTEXT, $level), $partialName);
}
return sprintf(
$this->prepare(self::PARENT, $level),
$partialName,
$this->walk($realChildren, $level + 1)
);
}
/**
* Helper method for filtering out non-block-arg tokens.
*
* @param array $node
*
* @return bool True if $node is a block arg token
*/
private static function onlyBlockArgs(array $node)
{
return $node[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_BLOCK_ARG;
}
const VARIABLE = '
$value = $this->resolveValue($context->%s(%s), $context);%s
$buffer .= %s($value === null ? \'\' : %s);
';
/**
* Generate Mustache Template variable interpolation PHP source.
*
* @param string $id Variable name
* @param string[] $filters Array of filters
* @param bool $escape Escape the variable value for output?
* @param int $level
*
* @return string Generated variable interpolation PHP source
*/
private function variable($id, $filters, $escape, $level)
{
$method = $this->getFindMethod($id);
$id = ($method !== 'last') ? var_export($id, true) : '';
$filters = $this->getFilters($filters, $level);
$value = $escape ? $this->getEscape() : '$value';
return sprintf($this->prepare(self::VARIABLE, $level), $method, $id, $filters, $this->flushIndent(), $value);
}
const FILTER = '
$filter = $context->%s(%s);
if (!(%s)) {
throw new Mustache_Exception_UnknownFilterException(%s);
}
$value = call_user_func($filter, $value);%s
';
/**
* Generate Mustache Template variable filtering PHP source.
*
* @param string[] $filters Array of filters
* @param int $level
*
* @return string Generated filter PHP source
*/
private function getFilters(array $filters, $level)
{
if (empty($filters)) {
return '';
}
$name = array_shift($filters);
$method = $this->getFindMethod($name);
$filter = ($method !== 'last') ? var_export($name, true) : '';
$callable = $this->getCallable('$filter');
$msg = var_export($name, true);
return sprintf($this->prepare(self::FILTER, $level), $method, $filter, $callable, $msg, $this->getFilters($filters, $level));
}
const LINE = '$buffer .= "\n";';
const TEXT = '$buffer .= %s%s;';
/**
* Generate Mustache Template output Buffer call PHP source.
*
* @param string $text
* @param int $level
*
* @return string Generated output Buffer call PHP source
*/
private function text($text, $level)
{
$indentNextLine = (substr($text, -1) === "\n");
$code = sprintf($this->prepare(self::TEXT, $level), $this->flushIndent(), var_export($text, true));
$this->indentNextLine = $indentNextLine;
return $code;
}
/**
* Prepare PHP source code snippet for output.
*
* @param string $text
* @param int $bonus Additional indent level (default: 0)
* @param bool $prependNewline Prepend a newline to the snippet? (default: true)
* @param bool $appendNewline Append a newline to the snippet? (default: false)
*
* @return string PHP source code snippet
*/
private function prepare($text, $bonus = 0, $prependNewline = true, $appendNewline = false)
{
$text = ($prependNewline ? "\n" : '') . trim($text);
if ($prependNewline) {
$bonus++;
}
if ($appendNewline) {
$text .= "\n";
}
return preg_replace("/\n( {8})?/", "\n" . str_repeat(' ', $bonus * 4), $text);
}
const DEFAULT_ESCAPE = 'htmlspecialchars(%s, %s, %s)';
const CUSTOM_ESCAPE = 'call_user_func($this->mustache->getEscape(), %s)';
/**
* Get the current escaper.
*
* @param string $value (default: '$value')
*
* @return string Either a custom callback, or an inline call to `htmlspecialchars`
*/
private function getEscape($value = '$value')
{
if ($this->customEscape) {
return sprintf(self::CUSTOM_ESCAPE, $value);
}
return sprintf(self::DEFAULT_ESCAPE, $value, var_export($this->entityFlags, true), var_export($this->charset, true));
}
/**
* Select the appropriate Context `find` method for a given $id.
*
* The return value will be one of `find`, `findDot`, `findAnchoredDot` or `last`.
*
* @see Mustache_Context::find
* @see Mustache_Context::findDot
* @see Mustache_Context::last
*
* @param string $id Variable name
*
* @return string `find` method name
*/
private function getFindMethod($id)
{
if ($id === '.') {
return 'last';
}
if (isset($this->pragmas[Mustache_Engine::PRAGMA_ANCHORED_DOT]) && $this->pragmas[Mustache_Engine::PRAGMA_ANCHORED_DOT]) {
if (substr($id, 0, 1) === '.') {
return 'findAnchoredDot';
}
}
if (strpos($id, '.') === false) {
return 'find';
}
return 'findDot';
}
const IS_CALLABLE = '!is_string(%s) && is_callable(%s)';
const STRICT_IS_CALLABLE = 'is_object(%s) && is_callable(%s)';
/**
* Helper function to compile strict vs lax "is callable" logic.
*
* @param string $variable (default: '$value')
*
* @return string "is callable" logic
*/
private function getCallable($variable = '$value')
{
$tpl = $this->strictCallables ? self::STRICT_IS_CALLABLE : self::IS_CALLABLE;
return sprintf($tpl, $variable, $variable);
}
const LINE_INDENT = '$indent . ';
/**
* Get the current $indent prefix to write to the buffer.
*
* @return string "$indent . " or ""
*/
private function flushIndent()
{
if (!$this->indentNextLine) {
return '';
}
$this->indentNextLine = false;
return self::LINE_INDENT;
}
}

View file

@ -1,242 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template rendering Context.
*/
class Mustache_Context
{
private $stack = array();
private $blockStack = array();
/**
* Mustache rendering Context constructor.
*
* @param mixed $context Default rendering context (default: null)
*/
public function __construct($context = null)
{
if ($context !== null) {
$this->stack = array($context);
}
}
/**
* Push a new Context frame onto the stack.
*
* @param mixed $value Object or array to use for context
*/
public function push($value)
{
array_push($this->stack, $value);
}
/**
* Push a new Context frame onto the block context stack.
*
* @param mixed $value Object or array to use for block context
*/
public function pushBlockContext($value)
{
array_push($this->blockStack, $value);
}
/**
* Pop the last Context frame from the stack.
*
* @return mixed Last Context frame (object or array)
*/
public function pop()
{
return array_pop($this->stack);
}
/**
* Pop the last block Context frame from the stack.
*
* @return mixed Last block Context frame (object or array)
*/
public function popBlockContext()
{
return array_pop($this->blockStack);
}
/**
* Get the last Context frame.
*
* @return mixed Last Context frame (object or array)
*/
public function last()
{
return end($this->stack);
}
/**
* Find a variable in the Context stack.
*
* Starting with the last Context frame (the context of the innermost section), and working back to the top-level
* rendering context, look for a variable with the given name:
*
* * If the Context frame is an associative array which contains the key $id, returns the value of that element.
* * If the Context frame is an object, this will check first for a public method, then a public property named
* $id. Failing both of these, it will try `__isset` and `__get` magic methods.
* * If a value named $id is not found in any Context frame, returns an empty string.
*
* @param string $id Variable name
*
* @return mixed Variable value, or '' if not found
*/
public function find($id)
{
return $this->findVariableInStack($id, $this->stack);
}
/**
* Find a 'dot notation' variable in the Context stack.
*
* Note that dot notation traversal bubbles through scope differently than the regular find method. After finding
* the initial chunk of the dotted name, each subsequent chunk is searched for only within the value of the previous
* result. For example, given the following context stack:
*
* $data = array(
* 'name' => 'Fred',
* 'child' => array(
* 'name' => 'Bob'
* ),
* );
*
* ... and the Mustache following template:
*
* {{ child.name }}
*
* ... the `name` value is only searched for within the `child` value of the global Context, not within parent
* Context frames.
*
* @param string $id Dotted variable selector
*
* @return mixed Variable value, or '' if not found
*/
public function findDot($id)
{
$chunks = explode('.', $id);
$first = array_shift($chunks);
$value = $this->findVariableInStack($first, $this->stack);
foreach ($chunks as $chunk) {
if ($value === '') {
return $value;
}
$value = $this->findVariableInStack($chunk, array($value));
}
return $value;
}
/**
* Find an 'anchored dot notation' variable in the Context stack.
*
* This is the same as findDot(), except it looks in the top of the context
* stack for the first value, rather than searching the whole context stack
* and starting from there.
*
* @see Mustache_Context::findDot
*
* @throws Mustache_Exception_InvalidArgumentException if given an invalid anchored dot $id
*
* @param string $id Dotted variable selector
*
* @return mixed Variable value, or '' if not found
*/
public function findAnchoredDot($id)
{
$chunks = explode('.', $id);
$first = array_shift($chunks);
if ($first !== '') {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unexpected id for findAnchoredDot: %s', $id));
}
$value = $this->last();
foreach ($chunks as $chunk) {
if ($value === '') {
return $value;
}
$value = $this->findVariableInStack($chunk, array($value));
}
return $value;
}
/**
* Find an argument in the block context stack.
*
* @param string $id
*
* @return mixed Variable value, or '' if not found
*/
public function findInBlock($id)
{
foreach ($this->blockStack as $context) {
if (array_key_exists($id, $context)) {
return $context[$id];
}
}
return '';
}
/**
* Helper function to find a variable in the Context stack.
*
* @see Mustache_Context::find
*
* @param string $id Variable name
* @param array $stack Context stack
*
* @return mixed Variable value, or '' if not found
*/
private function findVariableInStack($id, array $stack)
{
for ($i = count($stack) - 1; $i >= 0; $i--) {
$frame = &$stack[$i];
switch (gettype($frame)) {
case 'object':
if (!($frame instanceof Closure)) {
// Note that is_callable() *will not work here*
// See https://github.com/bobthecow/mustache.php/wiki/Magic-Methods
if (method_exists($frame, $id)) {
return $frame->$id();
}
if (isset($frame->$id)) {
return $frame->$id;
}
if ($frame instanceof ArrayAccess && isset($frame[$id])) {
return $frame[$id];
}
}
break;
case 'array':
if (array_key_exists($id, $frame)) {
return $frame[$id];
}
break;
}
}
return '';
}
}

View file

@ -1,831 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache implementation in PHP.
*
* {@link http://defunkt.github.com/mustache}
*
* Mustache is a framework-agnostic logic-less templating language. It enforces separation of view
* logic from template files. In fact, it is not even possible to embed logic in the template.
*
* This is very, very rad.
*
* @author Justin Hileman {@link http://justinhileman.com}
*/
class Mustache_Engine
{
const VERSION = '2.14.2';
const SPEC_VERSION = '1.3.0';
const PRAGMA_FILTERS = 'FILTERS';
const PRAGMA_BLOCKS = 'BLOCKS';
const PRAGMA_ANCHORED_DOT = 'ANCHORED-DOT';
const PRAGMA_DYNAMIC_NAMES = 'DYNAMIC-NAMES';
// Known pragmas
private static $knownPragmas = array(
self::PRAGMA_FILTERS => true,
self::PRAGMA_BLOCKS => true,
self::PRAGMA_ANCHORED_DOT => true,
self::PRAGMA_DYNAMIC_NAMES => true,
);
// Template cache
private $templates = array();
// Environment
private $templateClassPrefix = '__Mustache_';
private $cache;
private $lambdaCache;
private $cacheLambdaTemplates = false;
private $loader;
private $partialsLoader;
private $helpers;
private $escape;
private $entityFlags = ENT_COMPAT;
private $charset = 'UTF-8';
private $logger;
private $strictCallables = false;
private $pragmas = array();
private $delimiters;
// Services
private $tokenizer;
private $parser;
private $compiler;
/**
* Mustache class constructor.
*
* Passing an $options array allows overriding certain Mustache options during instantiation:
*
* $options = array(
* // The class prefix for compiled templates. Defaults to '__Mustache_'.
* 'template_class_prefix' => '__MyTemplates_',
*
* // A Mustache cache instance or a cache directory string for compiled templates.
* // Mustache will not cache templates unless this is set.
* 'cache' => dirname(__FILE__).'/tmp/cache/mustache',
*
* // Override default permissions for cache files. Defaults to using the system-defined umask. It is
* // *strongly* recommended that you configure your umask properly rather than overriding permissions here.
* 'cache_file_mode' => 0666,
*
* // Optionally, enable caching for lambda section templates. This is generally not recommended, as lambda
* // sections are often too dynamic to benefit from caching.
* 'cache_lambda_templates' => true,
*
* // Customize the tag delimiters used by this engine instance. Note that overriding here changes the
* // delimiters used to parse all templates and partials loaded by this instance. To override just for a
* // single template, use an inline "change delimiters" tag at the start of the template file:
* //
* // {{=<% %>=}}
* //
* 'delimiters' => '<% %>',
*
* // A Mustache template loader instance. Uses a StringLoader if not specified.
* 'loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views'),
*
* // A Mustache loader instance for partials.
* 'partials_loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views/partials'),
*
* // An array of Mustache partials. Useful for quick-and-dirty string template loading, but not as
* // efficient or lazy as a Filesystem (or database) loader.
* 'partials' => array('foo' => file_get_contents(dirname(__FILE__).'/views/partials/foo.mustache')),
*
* // An array of 'helpers'. Helpers can be global variables or objects, closures (e.g. for higher order
* // sections), or any other valid Mustache context value. They will be prepended to the context stack,
* // so they will be available in any template loaded by this Mustache instance.
* 'helpers' => array('i18n' => function ($text) {
* // do something translatey here...
* }),
*
* // An 'escape' callback, responsible for escaping double-mustache variables.
* 'escape' => function ($value) {
* return htmlspecialchars($buffer, ENT_COMPAT, 'UTF-8');
* },
*
* // Type argument for `htmlspecialchars`. Defaults to ENT_COMPAT. You may prefer ENT_QUOTES.
* 'entity_flags' => ENT_QUOTES,
*
* // Character set for `htmlspecialchars`. Defaults to 'UTF-8'. Use 'UTF-8'.
* 'charset' => 'ISO-8859-1',
*
* // A Mustache Logger instance. No logging will occur unless this is set. Using a PSR-3 compatible
* // logging library -- such as Monolog -- is highly recommended. A simple stream logger implementation is
* // available as well:
* 'logger' => new Mustache_Logger_StreamLogger('php://stderr'),
*
* // Only treat Closure instances and invokable classes as callable. If true, values like
* // `array('ClassName', 'methodName')` and `array($classInstance, 'methodName')`, which are traditionally
* // "callable" in PHP, are not called to resolve variables for interpolation or section contexts. This
* // helps protect against arbitrary code execution when user input is passed directly into the template.
* // This currently defaults to false, but will default to true in v3.0.
* 'strict_callables' => true,
*
* // Enable pragmas across all templates, regardless of the presence of pragma tags in the individual
* // templates.
* 'pragmas' => [Mustache_Engine::PRAGMA_FILTERS],
* );
*
* @throws Mustache_Exception_InvalidArgumentException If `escape` option is not callable
*
* @param array $options (default: array())
*/
public function __construct(array $options = array())
{
if (isset($options['template_class_prefix'])) {
if ((string) $options['template_class_prefix'] === '') {
throw new Mustache_Exception_InvalidArgumentException('Mustache Constructor "template_class_prefix" must not be empty');
}
$this->templateClassPrefix = $options['template_class_prefix'];
}
if (isset($options['cache'])) {
$cache = $options['cache'];
if (is_string($cache)) {
$mode = isset($options['cache_file_mode']) ? $options['cache_file_mode'] : null;
$cache = new Mustache_Cache_FilesystemCache($cache, $mode);
}
$this->setCache($cache);
}
if (isset($options['cache_lambda_templates'])) {
$this->cacheLambdaTemplates = (bool) $options['cache_lambda_templates'];
}
if (isset($options['loader'])) {
$this->setLoader($options['loader']);
}
if (isset($options['partials_loader'])) {
$this->setPartialsLoader($options['partials_loader']);
}
if (isset($options['partials'])) {
$this->setPartials($options['partials']);
}
if (isset($options['helpers'])) {
$this->setHelpers($options['helpers']);
}
if (isset($options['escape'])) {
if (!is_callable($options['escape'])) {
throw new Mustache_Exception_InvalidArgumentException('Mustache Constructor "escape" option must be callable');
}
$this->escape = $options['escape'];
}
if (isset($options['entity_flags'])) {
$this->entityFlags = $options['entity_flags'];
}
if (isset($options['charset'])) {
$this->charset = $options['charset'];
}
if (isset($options['logger'])) {
$this->setLogger($options['logger']);
}
if (isset($options['strict_callables'])) {
$this->strictCallables = $options['strict_callables'];
}
if (isset($options['delimiters'])) {
$this->delimiters = $options['delimiters'];
}
if (isset($options['pragmas'])) {
foreach ($options['pragmas'] as $pragma) {
if (!isset(self::$knownPragmas[$pragma])) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unknown pragma: "%s".', $pragma));
}
$this->pragmas[$pragma] = true;
}
}
}
/**
* Shortcut 'render' invocation.
*
* Equivalent to calling `$mustache->loadTemplate($template)->render($context);`
*
* @see Mustache_Engine::loadTemplate
* @see Mustache_Template::render
*
* @param string $template
* @param mixed $context (default: array())
*
* @return string Rendered template
*/
public function render($template, $context = array())
{
return $this->loadTemplate($template)->render($context);
}
/**
* Get the current Mustache escape callback.
*
* @return callable|null
*/
public function getEscape()
{
return $this->escape;
}
/**
* Get the current Mustache entitity type to escape.
*
* @return int
*/
public function getEntityFlags()
{
return $this->entityFlags;
}
/**
* Get the current Mustache character set.
*
* @return string
*/
public function getCharset()
{
return $this->charset;
}
/**
* Get the current globally enabled pragmas.
*
* @return array
*/
public function getPragmas()
{
return array_keys($this->pragmas);
}
/**
* Set the Mustache template Loader instance.
*
* @param Mustache_Loader $loader
*/
public function setLoader(Mustache_Loader $loader)
{
$this->loader = $loader;
}
/**
* Get the current Mustache template Loader instance.
*
* If no Loader instance has been explicitly specified, this method will instantiate and return
* a StringLoader instance.
*
* @return Mustache_Loader
*/
public function getLoader()
{
if (!isset($this->loader)) {
$this->loader = new Mustache_Loader_StringLoader();
}
return $this->loader;
}
/**
* Set the Mustache partials Loader instance.
*
* @param Mustache_Loader $partialsLoader
*/
public function setPartialsLoader(Mustache_Loader $partialsLoader)
{
$this->partialsLoader = $partialsLoader;
}
/**
* Get the current Mustache partials Loader instance.
*
* If no Loader instance has been explicitly specified, this method will instantiate and return
* an ArrayLoader instance.
*
* @return Mustache_Loader
*/
public function getPartialsLoader()
{
if (!isset($this->partialsLoader)) {
$this->partialsLoader = new Mustache_Loader_ArrayLoader();
}
return $this->partialsLoader;
}
/**
* Set partials for the current partials Loader instance.
*
* @throws Mustache_Exception_RuntimeException If the current Loader instance is immutable
*
* @param array $partials (default: array())
*/
public function setPartials(array $partials = array())
{
if (!isset($this->partialsLoader)) {
$this->partialsLoader = new Mustache_Loader_ArrayLoader();
}
if (!$this->partialsLoader instanceof Mustache_Loader_MutableLoader) {
throw new Mustache_Exception_RuntimeException('Unable to set partials on an immutable Mustache Loader instance');
}
$this->partialsLoader->setTemplates($partials);
}
/**
* Set an array of Mustache helpers.
*
* An array of 'helpers'. Helpers can be global variables or objects, closures (e.g. for higher order sections), or
* any other valid Mustache context value. They will be prepended to the context stack, so they will be available in
* any template loaded by this Mustache instance.
*
* @throws Mustache_Exception_InvalidArgumentException if $helpers is not an array or Traversable
*
* @param array|Traversable $helpers
*/
public function setHelpers($helpers)
{
if (!is_array($helpers) && !$helpers instanceof Traversable) {
throw new Mustache_Exception_InvalidArgumentException('setHelpers expects an array of helpers');
}
$this->getHelpers()->clear();
foreach ($helpers as $name => $helper) {
$this->addHelper($name, $helper);
}
}
/**
* Get the current set of Mustache helpers.
*
* @see Mustache_Engine::setHelpers
*
* @return Mustache_HelperCollection
*/
public function getHelpers()
{
if (!isset($this->helpers)) {
$this->helpers = new Mustache_HelperCollection();
}
return $this->helpers;
}
/**
* Add a new Mustache helper.
*
* @see Mustache_Engine::setHelpers
*
* @param string $name
* @param mixed $helper
*/
public function addHelper($name, $helper)
{
$this->getHelpers()->add($name, $helper);
}
/**
* Get a Mustache helper by name.
*
* @see Mustache_Engine::setHelpers
*
* @param string $name
*
* @return mixed Helper
*/
public function getHelper($name)
{
return $this->getHelpers()->get($name);
}
/**
* Check whether this Mustache instance has a helper.
*
* @see Mustache_Engine::setHelpers
*
* @param string $name
*
* @return bool True if the helper is present
*/
public function hasHelper($name)
{
return $this->getHelpers()->has($name);
}
/**
* Remove a helper by name.
*
* @see Mustache_Engine::setHelpers
*
* @param string $name
*/
public function removeHelper($name)
{
$this->getHelpers()->remove($name);
}
/**
* Set the Mustache Logger instance.
*
* @throws Mustache_Exception_InvalidArgumentException If logger is not an instance of Mustache_Logger or Psr\Log\LoggerInterface
*
* @param Mustache_Logger|Psr\Log\LoggerInterface $logger
*/
public function setLogger($logger = null)
{
if ($logger !== null && !($logger instanceof Mustache_Logger || is_a($logger, 'Psr\\Log\\LoggerInterface'))) {
throw new Mustache_Exception_InvalidArgumentException('Expected an instance of Mustache_Logger or Psr\\Log\\LoggerInterface.');
}
if ($this->getCache()->getLogger() === null) {
$this->getCache()->setLogger($logger);
}
$this->logger = $logger;
}
/**
* Get the current Mustache Logger instance.
*
* @return Mustache_Logger|Psr\Log\LoggerInterface
*/
public function getLogger()
{
return $this->logger;
}
/**
* Set the Mustache Tokenizer instance.
*
* @param Mustache_Tokenizer $tokenizer
*/
public function setTokenizer(Mustache_Tokenizer $tokenizer)
{
$this->tokenizer = $tokenizer;
}
/**
* Get the current Mustache Tokenizer instance.
*
* If no Tokenizer instance has been explicitly specified, this method will instantiate and return a new one.
*
* @return Mustache_Tokenizer
*/
public function getTokenizer()
{
if (!isset($this->tokenizer)) {
$this->tokenizer = new Mustache_Tokenizer();
}
return $this->tokenizer;
}
/**
* Set the Mustache Parser instance.
*
* @param Mustache_Parser $parser
*/
public function setParser(Mustache_Parser $parser)
{
$this->parser = $parser;
}
/**
* Get the current Mustache Parser instance.
*
* If no Parser instance has been explicitly specified, this method will instantiate and return a new one.
*
* @return Mustache_Parser
*/
public function getParser()
{
if (!isset($this->parser)) {
$this->parser = new Mustache_Parser();
}
return $this->parser;
}
/**
* Set the Mustache Compiler instance.
*
* @param Mustache_Compiler $compiler
*/
public function setCompiler(Mustache_Compiler $compiler)
{
$this->compiler = $compiler;
}
/**
* Get the current Mustache Compiler instance.
*
* If no Compiler instance has been explicitly specified, this method will instantiate and return a new one.
*
* @return Mustache_Compiler
*/
public function getCompiler()
{
if (!isset($this->compiler)) {
$this->compiler = new Mustache_Compiler();
}
return $this->compiler;
}
/**
* Set the Mustache Cache instance.
*
* @param Mustache_Cache $cache
*/
public function setCache(Mustache_Cache $cache)
{
if (isset($this->logger) && $cache->getLogger() === null) {
$cache->setLogger($this->getLogger());
}
$this->cache = $cache;
}
/**
* Get the current Mustache Cache instance.
*
* If no Cache instance has been explicitly specified, this method will instantiate and return a new one.
*
* @return Mustache_Cache
*/
public function getCache()
{
if (!isset($this->cache)) {
$this->setCache(new Mustache_Cache_NoopCache());
}
return $this->cache;
}
/**
* Get the current Lambda Cache instance.
*
* If 'cache_lambda_templates' is enabled, this is the default cache instance. Otherwise, it is a NoopCache.
*
* @see Mustache_Engine::getCache
*
* @return Mustache_Cache
*/
protected function getLambdaCache()
{
if ($this->cacheLambdaTemplates) {
return $this->getCache();
}
if (!isset($this->lambdaCache)) {
$this->lambdaCache = new Mustache_Cache_NoopCache();
}
return $this->lambdaCache;
}
/**
* Helper method to generate a Mustache template class.
*
* This method must be updated any time options are added which make it so
* the same template could be parsed and compiled multiple different ways.
*
* @param string|Mustache_Source $source
*
* @return string Mustache Template class name
*/
public function getTemplateClassName($source)
{
// For the most part, adding a new option here should do the trick.
//
// Pick a value here which is unique for each possible way the template
// could be compiled... but not necessarily unique per option value. See
// escape below, which only needs to differentiate between 'custom' and
// 'default' escapes.
//
// Keep this list in alphabetical order :)
$chunks = array(
'charset' => $this->charset,
'delimiters' => $this->delimiters ? $this->delimiters : '{{ }}',
'entityFlags' => $this->entityFlags,
'escape' => isset($this->escape) ? 'custom' : 'default',
'key' => ($source instanceof Mustache_Source) ? $source->getKey() : 'source',
'pragmas' => $this->getPragmas(),
'strictCallables' => $this->strictCallables,
'version' => self::VERSION,
);
$key = json_encode($chunks);
// Template Source instances have already provided their own source key. For strings, just include the whole
// source string in the md5 hash.
if (!$source instanceof Mustache_Source) {
$key .= "\n" . $source;
}
return $this->templateClassPrefix . md5($key);
}
/**
* Load a Mustache Template by name.
*
* @param string $name
*
* @return Mustache_Template
*/
public function loadTemplate($name)
{
return $this->loadSource($this->getLoader()->load($name));
}
/**
* Load a Mustache partial Template by name.
*
* This is a helper method used internally by Template instances for loading partial templates. You can most likely
* ignore it completely.
*
* @param string $name
*
* @return Mustache_Template
*/
public function loadPartial($name)
{
try {
if (isset($this->partialsLoader)) {
$loader = $this->partialsLoader;
} elseif (isset($this->loader) && !$this->loader instanceof Mustache_Loader_StringLoader) {
$loader = $this->loader;
} else {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return $this->loadSource($loader->load($name));
} catch (Mustache_Exception_UnknownTemplateException $e) {
// If the named partial cannot be found, log then return null.
$this->log(
Mustache_Logger::WARNING,
'Partial not found: "{name}"',
array('name' => $e->getTemplateName())
);
}
}
/**
* Load a Mustache lambda Template by source.
*
* This is a helper method used by Template instances to generate subtemplates for Lambda sections. You can most
* likely ignore it completely.
*
* @param string $source
* @param string $delims (default: null)
*
* @return Mustache_Template
*/
public function loadLambda($source, $delims = null)
{
if ($delims !== null) {
$source = $delims . "\n" . $source;
}
return $this->loadSource($source, $this->getLambdaCache());
}
/**
* Instantiate and return a Mustache Template instance by source.
*
* Optionally provide a Mustache_Cache instance. This is used internally by Mustache_Engine::loadLambda to respect
* the 'cache_lambda_templates' configuration option.
*
* @see Mustache_Engine::loadTemplate
* @see Mustache_Engine::loadPartial
* @see Mustache_Engine::loadLambda
*
* @param string|Mustache_Source $source
* @param Mustache_Cache $cache (default: null)
*
* @return Mustache_Template
*/
private function loadSource($source, Mustache_Cache $cache = null)
{
$className = $this->getTemplateClassName($source);
if (!isset($this->templates[$className])) {
if ($cache === null) {
$cache = $this->getCache();
}
if (!class_exists($className, false)) {
if (!$cache->load($className)) {
$compiled = $this->compile($source);
$cache->cache($className, $compiled);
}
}
$this->log(
Mustache_Logger::DEBUG,
'Instantiating template: "{className}"',
array('className' => $className)
);
$this->templates[$className] = new $className($this);
}
return $this->templates[$className];
}
/**
* Helper method to tokenize a Mustache template.
*
* @see Mustache_Tokenizer::scan
*
* @param string $source
*
* @return array Tokens
*/
private function tokenize($source)
{
return $this->getTokenizer()->scan($source, $this->delimiters);
}
/**
* Helper method to parse a Mustache template.
*
* @see Mustache_Parser::parse
*
* @param string $source
*
* @return array Token tree
*/
private function parse($source)
{
$parser = $this->getParser();
$parser->setPragmas($this->getPragmas());
return $parser->parse($this->tokenize($source));
}
/**
* Helper method to compile a Mustache template.
*
* @see Mustache_Compiler::compile
*
* @param string|Mustache_Source $source
*
* @return string generated Mustache template class code
*/
private function compile($source)
{
$name = $this->getTemplateClassName($source);
$this->log(
Mustache_Logger::INFO,
'Compiling template to "{className}" class',
array('className' => $name)
);
if ($source instanceof Mustache_Source) {
$source = $source->getSource();
}
$tree = $this->parse($source);
$compiler = $this->getCompiler();
$compiler->setPragmas($this->getPragmas());
return $compiler->compile($source, $tree, $name, isset($this->escape), $this->charset, $this->strictCallables, $this->entityFlags);
}
/**
* Add a log record if logging is enabled.
*
* @param int $level The logging level
* @param string $message The log message
* @param array $context The log context
*/
private function log($level, $message, array $context = array())
{
if (isset($this->logger)) {
$this->logger->log($level, $message, $context);
}
}
}

View file

@ -1,18 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache Exception interface.
*/
interface Mustache_Exception
{
// This space intentionally left blank.
}

View file

@ -1,18 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Invalid argument exception.
*/
class Mustache_Exception_InvalidArgumentException extends InvalidArgumentException implements Mustache_Exception
{
// This space intentionally left blank.
}

View file

@ -1,18 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Logic exception.
*/
class Mustache_Exception_LogicException extends LogicException implements Mustache_Exception
{
// This space intentionally left blank.
}

View file

@ -1,18 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Runtime exception.
*/
class Mustache_Exception_RuntimeException extends RuntimeException implements Mustache_Exception
{
// This space intentionally left blank.
}

View file

@ -1,41 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache syntax exception.
*/
class Mustache_Exception_SyntaxException extends LogicException implements Mustache_Exception
{
protected $token;
/**
* @param string $msg
* @param array $token
* @param Exception $previous
*/
public function __construct($msg, array $token, Exception $previous = null)
{
$this->token = $token;
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($msg, 0, $previous);
} else {
parent::__construct($msg); // @codeCoverageIgnore
}
}
/**
* @return array
*/
public function getToken()
{
return $this->token;
}
}

View file

@ -1,38 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Unknown filter exception.
*/
class Mustache_Exception_UnknownFilterException extends UnexpectedValueException implements Mustache_Exception
{
protected $filterName;
/**
* @param string $filterName
* @param Exception $previous
*/
public function __construct($filterName, Exception $previous = null)
{
$this->filterName = $filterName;
$message = sprintf('Unknown filter: %s', $filterName);
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($message, 0, $previous);
} else {
parent::__construct($message); // @codeCoverageIgnore
}
}
public function getFilterName()
{
return $this->filterName;
}
}

View file

@ -1,38 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Unknown helper exception.
*/
class Mustache_Exception_UnknownHelperException extends InvalidArgumentException implements Mustache_Exception
{
protected $helperName;
/**
* @param string $helperName
* @param Exception $previous
*/
public function __construct($helperName, Exception $previous = null)
{
$this->helperName = $helperName;
$message = sprintf('Unknown helper: %s', $helperName);
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($message, 0, $previous);
} else {
parent::__construct($message); // @codeCoverageIgnore
}
}
public function getHelperName()
{
return $this->helperName;
}
}

View file

@ -1,38 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Unknown template exception.
*/
class Mustache_Exception_UnknownTemplateException extends InvalidArgumentException implements Mustache_Exception
{
protected $templateName;
/**
* @param string $templateName
* @param Exception $previous
*/
public function __construct($templateName, Exception $previous = null)
{
$this->templateName = $templateName;
$message = sprintf('Unknown template: %s', $templateName);
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($message, 0, $previous);
} else {
parent::__construct($message); // @codeCoverageIgnore
}
}
public function getTemplateName()
{
return $this->templateName;
}
}

View file

@ -1,172 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A collection of helpers for a Mustache instance.
*/
class Mustache_HelperCollection
{
private $helpers = array();
/**
* Helper Collection constructor.
*
* Optionally accepts an array (or Traversable) of `$name => $helper` pairs.
*
* @throws Mustache_Exception_InvalidArgumentException if the $helpers argument isn't an array or Traversable
*
* @param array|Traversable $helpers (default: null)
*/
public function __construct($helpers = null)
{
if ($helpers === null) {
return;
}
if (!is_array($helpers) && !$helpers instanceof Traversable) {
throw new Mustache_Exception_InvalidArgumentException('HelperCollection constructor expects an array of helpers');
}
foreach ($helpers as $name => $helper) {
$this->add($name, $helper);
}
}
/**
* Magic mutator.
*
* @see Mustache_HelperCollection::add
*
* @param string $name
* @param mixed $helper
*/
public function __set($name, $helper)
{
$this->add($name, $helper);
}
/**
* Add a helper to this collection.
*
* @param string $name
* @param mixed $helper
*/
public function add($name, $helper)
{
$this->helpers[$name] = $helper;
}
/**
* Magic accessor.
*
* @see Mustache_HelperCollection::get
*
* @param string $name
*
* @return mixed Helper
*/
public function __get($name)
{
return $this->get($name);
}
/**
* Get a helper by name.
*
* @throws Mustache_Exception_UnknownHelperException If helper does not exist
*
* @param string $name
*
* @return mixed Helper
*/
public function get($name)
{
if (!$this->has($name)) {
throw new Mustache_Exception_UnknownHelperException($name);
}
return $this->helpers[$name];
}
/**
* Magic isset().
*
* @see Mustache_HelperCollection::has
*
* @param string $name
*
* @return bool True if helper is present
*/
public function __isset($name)
{
return $this->has($name);
}
/**
* Check whether a given helper is present in the collection.
*
* @param string $name
*
* @return bool True if helper is present
*/
public function has($name)
{
return array_key_exists($name, $this->helpers);
}
/**
* Magic unset().
*
* @see Mustache_HelperCollection::remove
*
* @param string $name
*/
public function __unset($name)
{
$this->remove($name);
}
/**
* Check whether a given helper is present in the collection.
*
* @throws Mustache_Exception_UnknownHelperException if the requested helper is not present
*
* @param string $name
*/
public function remove($name)
{
if (!$this->has($name)) {
throw new Mustache_Exception_UnknownHelperException($name);
}
unset($this->helpers[$name]);
}
/**
* Clear the helper collection.
*
* Removes all helpers from this collection
*/
public function clear()
{
$this->helpers = array();
}
/**
* Check whether the helper collection is empty.
*
* @return bool True if the collection is empty
*/
public function isEmpty()
{
return empty($this->helpers);
}
}

View file

@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2010-2015 Justin Hileman
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,76 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Lambda Helper.
*
* Passed as the second argument to section lambdas (higher order sections),
* giving them access to a `render` method for rendering a string with the
* current context.
*/
class Mustache_LambdaHelper
{
private $mustache;
private $context;
private $delims;
/**
* Mustache Lambda Helper constructor.
*
* @param Mustache_Engine $mustache Mustache engine instance
* @param Mustache_Context $context Rendering context
* @param string $delims Optional custom delimiters, in the format `{{= <% %> =}}`. (default: null)
*/
public function __construct(Mustache_Engine $mustache, Mustache_Context $context, $delims = null)
{
$this->mustache = $mustache;
$this->context = $context;
$this->delims = $delims;
}
/**
* Render a string as a Mustache template with the current rendering context.
*
* @param string $string
*
* @return string Rendered template
*/
public function render($string)
{
return $this->mustache
->loadLambda((string) $string, $this->delims)
->renderInternal($this->context);
}
/**
* Render a string as a Mustache template with the current rendering context.
*
* @param string $string
*
* @return string Rendered template
*/
public function __invoke($string)
{
return $this->render($string);
}
/**
* Get a Lambda Helper with custom delimiters.
*
* @param string $delims Custom delimiters, in the format `{{= <% %> =}}`
*
* @return Mustache_LambdaHelper
*/
public function withDelimiters($delims)
{
return new self($this->mustache, $this->context, $delims);
}
}

View file

@ -1,27 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template Loader interface.
*/
interface Mustache_Loader
{
/**
* Load a Template by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string|Mustache_Source Mustache Template source
*/
public function load($name);
}

View file

@ -1,79 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template array Loader implementation.
*
* An ArrayLoader instance loads Mustache Template source by name from an initial array:
*
* $loader = new ArrayLoader(
* 'foo' => '{{ bar }}',
* 'baz' => 'Hey {{ qux }}!'
* );
*
* $tpl = $loader->load('foo'); // '{{ bar }}'
*
* The ArrayLoader is used internally as a partials loader by Mustache_Engine instance when an array of partials
* is set. It can also be used as a quick-and-dirty Template loader.
*/
class Mustache_Loader_ArrayLoader implements Mustache_Loader, Mustache_Loader_MutableLoader
{
private $templates;
/**
* ArrayLoader constructor.
*
* @param array $templates Associative array of Template source (default: array())
*/
public function __construct(array $templates = array())
{
$this->templates = $templates;
}
/**
* Load a Template.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string Mustache Template source
*/
public function load($name)
{
if (!isset($this->templates[$name])) {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return $this->templates[$name];
}
/**
* Set an associative array of Template sources for this loader.
*
* @param array $templates
*/
public function setTemplates(array $templates)
{
$this->templates = $templates;
}
/**
* Set a Template source by name.
*
* @param string $name
* @param string $template Mustache Template source
*/
public function setTemplate($name, $template)
{
$this->templates[$name] = $template;
}
}

View file

@ -1,69 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache Template cascading loader implementation, which delegates to other
* Loader instances.
*/
class Mustache_Loader_CascadingLoader implements Mustache_Loader
{
private $loaders;
/**
* Construct a CascadingLoader with an array of loaders.
*
* $loader = new Mustache_Loader_CascadingLoader(array(
* new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__),
* new Mustache_Loader_FilesystemLoader(__DIR__.'/templates')
* ));
*
* @param Mustache_Loader[] $loaders
*/
public function __construct(array $loaders = array())
{
$this->loaders = array();
foreach ($loaders as $loader) {
$this->addLoader($loader);
}
}
/**
* Add a Loader instance.
*
* @param Mustache_Loader $loader
*/
public function addLoader(Mustache_Loader $loader)
{
$this->loaders[] = $loader;
}
/**
* Load a Template by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string Mustache Template source
*/
public function load($name)
{
foreach ($this->loaders as $loader) {
try {
return $loader->load($name);
} catch (Mustache_Exception_UnknownTemplateException $e) {
// do nothing, check the next loader.
}
}
throw new Mustache_Exception_UnknownTemplateException($name);
}
}

View file

@ -1,135 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template filesystem Loader implementation.
*
* A FilesystemLoader instance loads Mustache Template source from the filesystem by name:
*
* $loader = new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views');
* $tpl = $loader->load('foo'); // equivalent to `file_get_contents(dirname(__FILE__).'/views/foo.mustache');
*
* This is probably the most useful Mustache Loader implementation. It can be used for partials and normal Templates:
*
* $m = new Mustache(array(
* 'loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views'),
* 'partials_loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views/partials'),
* ));
*/
class Mustache_Loader_FilesystemLoader implements Mustache_Loader
{
private $baseDir;
private $extension = '.mustache';
private $templates = array();
/**
* Mustache filesystem Loader constructor.
*
* Passing an $options array allows overriding certain Loader options during instantiation:
*
* $options = array(
* // The filename extension used for Mustache templates. Defaults to '.mustache'
* 'extension' => '.ms',
* );
*
* @throws Mustache_Exception_RuntimeException if $baseDir does not exist
*
* @param string $baseDir Base directory containing Mustache template files
* @param array $options Array of Loader options (default: array())
*/
public function __construct($baseDir, array $options = array())
{
$this->baseDir = $baseDir;
if (strpos($this->baseDir, '://') === false) {
$this->baseDir = realpath($this->baseDir);
}
if ($this->shouldCheckPath() && !is_dir($this->baseDir)) {
throw new Mustache_Exception_RuntimeException(sprintf('FilesystemLoader baseDir must be a directory: %s', $baseDir));
}
if (array_key_exists('extension', $options)) {
if (empty($options['extension'])) {
$this->extension = '';
} else {
$this->extension = '.' . ltrim($options['extension'], '.');
}
}
}
/**
* Load a Template by name.
*
* $loader = new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views');
* $loader->load('admin/dashboard'); // loads "./views/admin/dashboard.mustache";
*
* @param string $name
*
* @return string Mustache Template source
*/
public function load($name)
{
if (!isset($this->templates[$name])) {
$this->templates[$name] = $this->loadFile($name);
}
return $this->templates[$name];
}
/**
* Helper function for loading a Mustache file by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string Mustache Template source
*/
protected function loadFile($name)
{
$fileName = $this->getFileName($name);
if ($this->shouldCheckPath() && !file_exists($fileName)) {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return file_get_contents($fileName);
}
/**
* Helper function for getting a Mustache template file name.
*
* @param string $name
*
* @return string Template file name
*/
protected function getFileName($name)
{
$fileName = $this->baseDir . '/' . $name;
if (substr($fileName, 0 - strlen($this->extension)) !== $this->extension) {
$fileName .= $this->extension;
}
return $fileName;
}
/**
* Only check if baseDir is a directory and requested templates are files if
* baseDir is using the filesystem stream wrapper.
*
* @return bool Whether to check `is_dir` and `file_exists`
*/
protected function shouldCheckPath()
{
return strpos($this->baseDir, '://') === false || strpos($this->baseDir, 'file://') === 0;
}
}

View file

@ -1,123 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache Template loader for inline templates.
*
* With the InlineLoader, templates can be defined at the end of any PHP source
* file:
*
* $loader = new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__);
* $hello = $loader->load('hello');
* $goodbye = $loader->load('goodbye');
*
* __halt_compiler();
*
* @@ hello
* Hello, {{ planet }}!
*
* @@ goodbye
* Goodbye, cruel {{ planet }}
*
* Templates are deliniated by lines containing only `@@ name`.
*
* The InlineLoader is well-suited to micro-frameworks such as Silex:
*
* $app->register(new MustacheServiceProvider, array(
* 'mustache.loader' => new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__)
* ));
*
* $app->get('/{name}', function ($name) use ($app) {
* return $app['mustache']->render('hello', compact('name'));
* })
* ->value('name', 'world');
*
* // ...
*
* __halt_compiler();
*
* @@ hello
* Hello, {{ name }}!
*/
class Mustache_Loader_InlineLoader implements Mustache_Loader
{
protected $fileName;
protected $offset;
protected $templates;
/**
* The InlineLoader requires a filename and offset to process templates.
*
* The magic constants `__FILE__` and `__COMPILER_HALT_OFFSET__` are usually
* perfectly suited to the job:
*
* $loader = new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__);
*
* Note that this only works if the loader is instantiated inside the same
* file as the inline templates. If the templates are located in another
* file, it would be necessary to manually specify the filename and offset.
*
* @param string $fileName The file to parse for inline templates
* @param int $offset A string offset for the start of the templates.
* This usually coincides with the `__halt_compiler`
* call, and the `__COMPILER_HALT_OFFSET__`
*/
public function __construct($fileName, $offset)
{
if (!is_file($fileName)) {
throw new Mustache_Exception_InvalidArgumentException('InlineLoader expects a valid filename.');
}
if (!is_int($offset) || $offset < 0) {
throw new Mustache_Exception_InvalidArgumentException('InlineLoader expects a valid file offset.');
}
$this->fileName = $fileName;
$this->offset = $offset;
}
/**
* Load a Template by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string Mustache Template source
*/
public function load($name)
{
$this->loadTemplates();
if (!array_key_exists($name, $this->templates)) {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return $this->templates[$name];
}
/**
* Parse and load templates from the end of a source file.
*/
protected function loadTemplates()
{
if ($this->templates === null) {
$this->templates = array();
$data = file_get_contents($this->fileName, false, null, $this->offset);
foreach (preg_split("/^@@(?= [\w\d\.]+$)/m", $data, -1) as $chunk) {
if (trim($chunk)) {
list($name, $content) = explode("\n", $chunk, 2);
$this->templates[trim($name)] = trim($content);
}
}
}
}
}

View file

@ -1,31 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template mutable Loader interface.
*/
interface Mustache_Loader_MutableLoader
{
/**
* Set an associative array of Template sources for this loader.
*
* @param array $templates
*/
public function setTemplates(array $templates);
/**
* Set a Template source by name.
*
* @param string $name
* @param string $template Mustache Template source
*/
public function setTemplate($name, $template);
}

View file

@ -1,86 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template production filesystem Loader implementation.
*
* A production-ready FilesystemLoader, which doesn't require reading a file if it already exists in the template cache.
*
* {@inheritdoc}
*/
class Mustache_Loader_ProductionFilesystemLoader extends Mustache_Loader_FilesystemLoader
{
private $statProps;
/**
* Mustache production filesystem Loader constructor.
*
* Passing an $options array allows overriding certain Loader options during instantiation:
*
* $options = array(
* // The filename extension used for Mustache templates. Defaults to '.mustache'
* 'extension' => '.ms',
* 'stat_props' => array('size', 'mtime'),
* );
*
* Specifying 'stat_props' overrides the stat properties used to invalidate the template cache. By default, this
* uses 'mtime' and 'size', but this can be set to any of the properties supported by stat():
*
* http://php.net/manual/en/function.stat.php
*
* You can also disable filesystem stat entirely:
*
* $options = array('stat_props' => null);
*
* But with great power comes great responsibility. Namely, if you disable stat-based cache invalidation,
* YOU MUST CLEAR THE TEMPLATE CACHE YOURSELF when your templates change. Make it part of your build or deploy
* process so you don't forget!
*
* @throws Mustache_Exception_RuntimeException if $baseDir does not exist.
*
* @param string $baseDir Base directory containing Mustache template files.
* @param array $options Array of Loader options (default: array())
*/
public function __construct($baseDir, array $options = array())
{
parent::__construct($baseDir, $options);
if (array_key_exists('stat_props', $options)) {
if (empty($options['stat_props'])) {
$this->statProps = array();
} else {
$this->statProps = $options['stat_props'];
}
} else {
$this->statProps = array('size', 'mtime');
}
}
/**
* Helper function for loading a Mustache file by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found.
*
* @param string $name
*
* @return Mustache_Source Mustache Template source
*/
protected function loadFile($name)
{
$fileName = $this->getFileName($name);
if (!file_exists($fileName)) {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return new Mustache_Source_FilesystemSource($fileName, $this->statProps);
}
}

View file

@ -1,39 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template string Loader implementation.
*
* A StringLoader instance is essentially a noop. It simply passes the 'name' argument straight through:
*
* $loader = new StringLoader;
* $tpl = $loader->load('{{ foo }}'); // '{{ foo }}'
*
* This is the default Template Loader instance used by Mustache:
*
* $m = new Mustache;
* $tpl = $m->loadTemplate('{{ foo }}');
* echo $tpl->render(array('foo' => 'bar')); // "bar"
*/
class Mustache_Loader_StringLoader implements Mustache_Loader
{
/**
* Load a Template by source.
*
* @param string $name Mustache Template source
*
* @return string Mustache Template source
*/
public function load($name)
{
return $name;
}
}

View file

@ -1,126 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Describes a Mustache logger instance.
*
* This is identical to the Psr\Log\LoggerInterface.
*
* The message MUST be a string or object implementing __toString().
*
* The message MAY contain placeholders in the form: {foo} where foo
* will be replaced by the context data in key "foo".
*
* The context array can contain arbitrary data, the only assumption that
* can be made by implementors is that if an Exception instance is given
* to produce a stack trace, it MUST be in a key named "exception".
*
* See https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-3-logger-interface.md
* for the full interface specification.
*/
interface Mustache_Logger
{
/**
* Psr\Log compatible log levels.
*/
const EMERGENCY = 'emergency';
const ALERT = 'alert';
const CRITICAL = 'critical';
const ERROR = 'error';
const WARNING = 'warning';
const NOTICE = 'notice';
const INFO = 'info';
const DEBUG = 'debug';
/**
* System is unusable.
*
* @param string $message
* @param array $context
*/
public function emergency($message, array $context = array());
/**
* Action must be taken immediately.
*
* Example: Entire website down, database unavailable, etc. This should
* trigger the SMS alerts and wake you up.
*
* @param string $message
* @param array $context
*/
public function alert($message, array $context = array());
/**
* Critical conditions.
*
* Example: Application component unavailable, unexpected exception.
*
* @param string $message
* @param array $context
*/
public function critical($message, array $context = array());
/**
* Runtime errors that do not require immediate action but should typically
* be logged and monitored.
*
* @param string $message
* @param array $context
*/
public function error($message, array $context = array());
/**
* Exceptional occurrences that are not errors.
*
* Example: Use of deprecated APIs, poor use of an API, undesirable things
* that are not necessarily wrong.
*
* @param string $message
* @param array $context
*/
public function warning($message, array $context = array());
/**
* Normal but significant events.
*
* @param string $message
* @param array $context
*/
public function notice($message, array $context = array());
/**
* Interesting events.
*
* Example: User logs in, SQL logs.
*
* @param string $message
* @param array $context
*/
public function info($message, array $context = array());
/**
* Detailed debug information.
*
* @param string $message
* @param array $context
*/
public function debug($message, array $context = array());
/**
* Logs with an arbitrary level.
*
* @param mixed $level
* @param string $message
* @param array $context
*/
public function log($level, $message, array $context = array());
}

View file

@ -1,121 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* This is a simple Logger implementation that other Loggers can inherit from.
*
* This is identical to the Psr\Log\AbstractLogger.
*
* It simply delegates all log-level-specific methods to the `log` method to
* reduce boilerplate code that a simple Logger that does the same thing with
* messages regardless of the error level has to implement.
*/
abstract class Mustache_Logger_AbstractLogger implements Mustache_Logger
{
/**
* System is unusable.
*
* @param string $message
* @param array $context
*/
public function emergency($message, array $context = array())
{
$this->log(Mustache_Logger::EMERGENCY, $message, $context);
}
/**
* Action must be taken immediately.
*
* Example: Entire website down, database unavailable, etc. This should
* trigger the SMS alerts and wake you up.
*
* @param string $message
* @param array $context
*/
public function alert($message, array $context = array())
{
$this->log(Mustache_Logger::ALERT, $message, $context);
}
/**
* Critical conditions.
*
* Example: Application component unavailable, unexpected exception.
*
* @param string $message
* @param array $context
*/
public function critical($message, array $context = array())
{
$this->log(Mustache_Logger::CRITICAL, $message, $context);
}
/**
* Runtime errors that do not require immediate action but should typically
* be logged and monitored.
*
* @param string $message
* @param array $context
*/
public function error($message, array $context = array())
{
$this->log(Mustache_Logger::ERROR, $message, $context);
}
/**
* Exceptional occurrences that are not errors.
*
* Example: Use of deprecated APIs, poor use of an API, undesirable things
* that are not necessarily wrong.
*
* @param string $message
* @param array $context
*/
public function warning($message, array $context = array())
{
$this->log(Mustache_Logger::WARNING, $message, $context);
}
/**
* Normal but significant events.
*
* @param string $message
* @param array $context
*/
public function notice($message, array $context = array())
{
$this->log(Mustache_Logger::NOTICE, $message, $context);
}
/**
* Interesting events.
*
* Example: User logs in, SQL logs.
*
* @param string $message
* @param array $context
*/
public function info($message, array $context = array())
{
$this->log(Mustache_Logger::INFO, $message, $context);
}
/**
* Detailed debug information.
*
* @param string $message
* @param array $context
*/
public function debug($message, array $context = array())
{
$this->log(Mustache_Logger::DEBUG, $message, $context);
}
}

View file

@ -1,194 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache Stream Logger.
*
* The Stream Logger wraps a file resource instance (such as a stream) or a
* stream URL. All log messages over the threshold level will be appended to
* this stream.
*
* Hint: Try `php://stderr` for your stream URL.
*/
class Mustache_Logger_StreamLogger extends Mustache_Logger_AbstractLogger
{
protected static $levels = array(
self::DEBUG => 100,
self::INFO => 200,
self::NOTICE => 250,
self::WARNING => 300,
self::ERROR => 400,
self::CRITICAL => 500,
self::ALERT => 550,
self::EMERGENCY => 600,
);
protected $level;
protected $stream = null;
protected $url = null;
/**
* @throws InvalidArgumentException if the logging level is unknown
*
* @param resource|string $stream Resource instance or URL
* @param int $level The minimum logging level at which this handler will be triggered
*/
public function __construct($stream, $level = Mustache_Logger::ERROR)
{
$this->setLevel($level);
if (is_resource($stream)) {
$this->stream = $stream;
} else {
$this->url = $stream;
}
}
/**
* Close stream resources.
*/
public function __destruct()
{
if (is_resource($this->stream)) {
fclose($this->stream);
}
}
/**
* Set the minimum logging level.
*
* @throws Mustache_Exception_InvalidArgumentException if the logging level is unknown
*
* @param int $level The minimum logging level which will be written
*/
public function setLevel($level)
{
if (!array_key_exists($level, self::$levels)) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unexpected logging level: %s', $level));
}
$this->level = $level;
}
/**
* Get the current minimum logging level.
*
* @return int
*/
public function getLevel()
{
return $this->level;
}
/**
* Logs with an arbitrary level.
*
* @throws Mustache_Exception_InvalidArgumentException if the logging level is unknown
*
* @param mixed $level
* @param string $message
* @param array $context
*/
public function log($level, $message, array $context = array())
{
if (!array_key_exists($level, self::$levels)) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unexpected logging level: %s', $level));
}
if (self::$levels[$level] >= self::$levels[$this->level]) {
$this->writeLog($level, $message, $context);
}
}
/**
* Write a record to the log.
*
* @throws Mustache_Exception_LogicException If neither a stream resource nor url is present
* @throws Mustache_Exception_RuntimeException If the stream url cannot be opened
*
* @param int $level The logging level
* @param string $message The log message
* @param array $context The log context
*/
protected function writeLog($level, $message, array $context = array())
{
if (!is_resource($this->stream)) {
if (!isset($this->url)) {
throw new Mustache_Exception_LogicException('Missing stream url, the stream can not be opened. This may be caused by a premature call to close().');
}
$this->stream = fopen($this->url, 'a');
if (!is_resource($this->stream)) {
// @codeCoverageIgnoreStart
throw new Mustache_Exception_RuntimeException(sprintf('The stream or file "%s" could not be opened.', $this->url));
// @codeCoverageIgnoreEnd
}
}
fwrite($this->stream, self::formatLine($level, $message, $context));
}
/**
* Gets the name of the logging level.
*
* @throws InvalidArgumentException if the logging level is unknown
*
* @param int $level
*
* @return string
*/
protected static function getLevelName($level)
{
return strtoupper($level);
}
/**
* Format a log line for output.
*
* @param int $level The logging level
* @param string $message The log message
* @param array $context The log context
*
* @return string
*/
protected static function formatLine($level, $message, array $context = array())
{
return sprintf(
"%s: %s\n",
self::getLevelName($level),
self::interpolateMessage($message, $context)
);
}
/**
* Interpolate context values into the message placeholders.
*
* @param string $message
* @param array $context
*
* @return string
*/
protected static function interpolateMessage($message, array $context = array())
{
if (strpos($message, '{') === false) {
return $message;
}
// build a replacement array with braces around the context keys
$replace = array();
foreach ($context as $key => $val) {
$replace['{' . $key . '}'] = $val;
}
// interpolate replacement values into the the message and return
return strtr($message, $replace);
}
}

View file

@ -1,383 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Parser class.
*
* This class is responsible for turning a set of Mustache tokens into a parse tree.
*/
class Mustache_Parser
{
private $lineNum;
private $lineTokens;
private $pragmas;
private $defaultPragmas = array();
private $pragmaFilters;
private $pragmaBlocks;
private $pragmaDynamicNames;
/**
* Process an array of Mustache tokens and convert them into a parse tree.
*
* @param array $tokens Set of Mustache tokens
*
* @return array Mustache token parse tree
*/
public function parse(array $tokens = array())
{
$this->lineNum = -1;
$this->lineTokens = 0;
$this->pragmas = $this->defaultPragmas;
$this->pragmaFilters = isset($this->pragmas[Mustache_Engine::PRAGMA_FILTERS]);
$this->pragmaBlocks = isset($this->pragmas[Mustache_Engine::PRAGMA_BLOCKS]);
$this->pragmaDynamicNames = isset($this->pragmas[Mustache_Engine::PRAGMA_DYNAMIC_NAMES]);
return $this->buildTree($tokens);
}
/**
* Enable pragmas across all templates, regardless of the presence of pragma
* tags in the individual templates.
*
* @internal Users should set global pragmas in Mustache_Engine, not here :)
*
* @param string[] $pragmas
*/
public function setPragmas(array $pragmas)
{
$this->pragmas = array();
foreach ($pragmas as $pragma) {
$this->enablePragma($pragma);
}
$this->defaultPragmas = $this->pragmas;
}
/**
* Helper method for recursively building a parse tree.
*
* @throws Mustache_Exception_SyntaxException when nesting errors or mismatched section tags are encountered
*
* @param array &$tokens Set of Mustache tokens
* @param array $parent Parent token (default: null)
*
* @return array Mustache Token parse tree
*/
private function buildTree(array &$tokens, array $parent = null)
{
$nodes = array();
while (!empty($tokens)) {
$token = array_shift($tokens);
if ($token[Mustache_Tokenizer::LINE] === $this->lineNum) {
$this->lineTokens++;
} else {
$this->lineNum = $token[Mustache_Tokenizer::LINE];
$this->lineTokens = 0;
}
if ($token[Mustache_Tokenizer::TYPE] !== Mustache_Tokenizer::T_COMMENT) {
if ($this->pragmaDynamicNames && isset($token[Mustache_Tokenizer::NAME])) {
list($name, $isDynamic) = $this->getDynamicName($token);
if ($isDynamic) {
$token[Mustache_Tokenizer::NAME] = $name;
$token[Mustache_Tokenizer::DYNAMIC] = true;
}
}
if ($this->pragmaFilters && isset($token[Mustache_Tokenizer::NAME])) {
list($name, $filters) = $this->getNameAndFilters($token[Mustache_Tokenizer::NAME]);
if (!empty($filters)) {
$token[Mustache_Tokenizer::NAME] = $name;
$token[Mustache_Tokenizer::FILTERS] = $filters;
}
}
}
switch ($token[Mustache_Tokenizer::TYPE]) {
case Mustache_Tokenizer::T_DELIM_CHANGE:
$this->checkIfTokenIsAllowedInParent($parent, $token);
$this->clearStandaloneLines($nodes, $tokens);
break;
case Mustache_Tokenizer::T_SECTION:
case Mustache_Tokenizer::T_INVERTED:
$this->checkIfTokenIsAllowedInParent($parent, $token);
$this->clearStandaloneLines($nodes, $tokens);
$nodes[] = $this->buildTree($tokens, $token);
break;
case Mustache_Tokenizer::T_END_SECTION:
if (!isset($parent)) {
$msg = sprintf(
'Unexpected closing tag: /%s on line %d',
$token[Mustache_Tokenizer::NAME],
$token[Mustache_Tokenizer::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $token);
}
$sameName = $token[Mustache_Tokenizer::NAME] !== $parent[Mustache_Tokenizer::NAME];
$tokenDynamic = isset($token[Mustache_Tokenizer::DYNAMIC]) && $token[Mustache_Tokenizer::DYNAMIC];
$parentDynamic = isset($parent[Mustache_Tokenizer::DYNAMIC]) && $parent[Mustache_Tokenizer::DYNAMIC];
if ($sameName || ($tokenDynamic !== $parentDynamic)) {
$msg = sprintf(
'Nesting error: %s (on line %d) vs. %s (on line %d)',
$parent[Mustache_Tokenizer::NAME],
$parent[Mustache_Tokenizer::LINE],
$token[Mustache_Tokenizer::NAME],
$token[Mustache_Tokenizer::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $token);
}
$this->clearStandaloneLines($nodes, $tokens);
$parent[Mustache_Tokenizer::END] = $token[Mustache_Tokenizer::INDEX];
$parent[Mustache_Tokenizer::NODES] = $nodes;
return $parent;
case Mustache_Tokenizer::T_PARTIAL:
$this->checkIfTokenIsAllowedInParent($parent, $token);
//store the whitespace prefix for laters!
if ($indent = $this->clearStandaloneLines($nodes, $tokens)) {
$token[Mustache_Tokenizer::INDENT] = $indent[Mustache_Tokenizer::VALUE];
}
$nodes[] = $token;
break;
case Mustache_Tokenizer::T_PARENT:
$this->checkIfTokenIsAllowedInParent($parent, $token);
$nodes[] = $this->buildTree($tokens, $token);
break;
case Mustache_Tokenizer::T_BLOCK_VAR:
if ($this->pragmaBlocks) {
// BLOCKS pragma is enabled, let's do this!
if (isset($parent) && $parent[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_PARENT) {
$token[Mustache_Tokenizer::TYPE] = Mustache_Tokenizer::T_BLOCK_ARG;
}
$this->clearStandaloneLines($nodes, $tokens);
$nodes[] = $this->buildTree($tokens, $token);
} else {
// pretend this was just a normal "escaped" token...
$token[Mustache_Tokenizer::TYPE] = Mustache_Tokenizer::T_ESCAPED;
// TODO: figure out how to figure out if there was a space after this dollar:
$token[Mustache_Tokenizer::NAME] = '$' . $token[Mustache_Tokenizer::NAME];
$nodes[] = $token;
}
break;
case Mustache_Tokenizer::T_PRAGMA:
$this->enablePragma($token[Mustache_Tokenizer::NAME]);
// no break
case Mustache_Tokenizer::T_COMMENT:
$this->clearStandaloneLines($nodes, $tokens);
$nodes[] = $token;
break;
default:
$nodes[] = $token;
break;
}
}
if (isset($parent)) {
$msg = sprintf(
'Missing closing tag: %s opened on line %d',
$parent[Mustache_Tokenizer::NAME],
$parent[Mustache_Tokenizer::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $parent);
}
return $nodes;
}
/**
* Clear standalone line tokens.
*
* Returns a whitespace token for indenting partials, if applicable.
*
* @param array $nodes Parsed nodes
* @param array $tokens Tokens to be parsed
*
* @return array|null Resulting indent token, if any
*/
private function clearStandaloneLines(array &$nodes, array &$tokens)
{
if ($this->lineTokens > 1) {
// this is the third or later node on this line, so it can't be standalone
return;
}
$prev = null;
if ($this->lineTokens === 1) {
// this is the second node on this line, so it can't be standalone
// unless the previous node is whitespace.
if ($prev = end($nodes)) {
if (!$this->tokenIsWhitespace($prev)) {
return;
}
}
}
if ($next = reset($tokens)) {
// If we're on a new line, bail.
if ($next[Mustache_Tokenizer::LINE] !== $this->lineNum) {
return;
}
// If the next token isn't whitespace, bail.
if (!$this->tokenIsWhitespace($next)) {
return;
}
if (count($tokens) !== 1) {
// Unless it's the last token in the template, the next token
// must end in newline for this to be standalone.
if (substr($next[Mustache_Tokenizer::VALUE], -1) !== "\n") {
return;
}
}
// Discard the whitespace suffix
array_shift($tokens);
}
if ($prev) {
// Return the whitespace prefix, if any
return array_pop($nodes);
}
}
/**
* Check whether token is a whitespace token.
*
* True if token type is T_TEXT and value is all whitespace characters.
*
* @param array $token
*
* @return bool True if token is a whitespace token
*/
private function tokenIsWhitespace(array $token)
{
if ($token[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_TEXT) {
return preg_match('/^\s*$/', $token[Mustache_Tokenizer::VALUE]);
}
return false;
}
/**
* Check whether a token is allowed inside a parent tag.
*
* @throws Mustache_Exception_SyntaxException if an invalid token is found inside a parent tag
*
* @param array|null $parent
* @param array $token
*/
private function checkIfTokenIsAllowedInParent($parent, array $token)
{
if (isset($parent) && $parent[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_PARENT) {
throw new Mustache_Exception_SyntaxException('Illegal content in < parent tag', $token);
}
}
/**
* Parse dynamic names.
*
* @throws Mustache_Exception_SyntaxException when a tag does not allow *
* @throws Mustache_Exception_SyntaxException on multiple *s, or dots or filters with *
*/
private function getDynamicName(array $token)
{
$name = $token[Mustache_Tokenizer::NAME];
$isDynamic = false;
if (preg_match('/^\s*\*\s*/', $name)) {
$this->ensureTagAllowsDynamicNames($token);
$name = preg_replace('/^\s*\*\s*/', '', $name);
$isDynamic = true;
}
return array($name, $isDynamic);
}
/**
* Check whether the given token supports dynamic tag names.
*
* @throws Mustache_Exception_SyntaxException when a tag does not allow *
*
* @param array $token
*/
private function ensureTagAllowsDynamicNames(array $token)
{
switch ($token[Mustache_Tokenizer::TYPE]) {
case Mustache_Tokenizer::T_PARTIAL:
case Mustache_Tokenizer::T_PARENT:
case Mustache_Tokenizer::T_END_SECTION:
return;
}
$msg = sprintf(
'Invalid dynamic name: %s in %s tag',
$token[Mustache_Tokenizer::NAME],
Mustache_Tokenizer::getTagName($token[Mustache_Tokenizer::TYPE])
);
throw new Mustache_Exception_SyntaxException($msg, $token);
}
/**
* Split a tag name into name and filters.
*
* @param string $name
*
* @return array [Tag name, Array of filters]
*/
private function getNameAndFilters($name)
{
$filters = array_map('trim', explode('|', $name));
$name = array_shift($filters);
return array($name, $filters);
}
/**
* Enable a pragma.
*
* @param string $name
*/
private function enablePragma($name)
{
$this->pragmas[$name] = true;
switch ($name) {
case Mustache_Engine::PRAGMA_BLOCKS:
$this->pragmaBlocks = true;
break;
case Mustache_Engine::PRAGMA_FILTERS:
$this->pragmaFilters = true;
break;
case Mustache_Engine::PRAGMA_DYNAMIC_NAMES:
$this->pragmaDynamicNames = true;
break;
}
}
}

View file

@ -1,40 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache template Source interface.
*/
interface Mustache_Source
{
/**
* Get the Source key (used to generate the compiled class name).
*
* This must return a distinct key for each template source. For example, an
* MD5 hash of the template contents would probably do the trick. The
* ProductionFilesystemLoader uses mtime and file path. If your production
* source directory is under version control, you could use the current Git
* rev and the file path...
*
* @throws RuntimeException when a source file cannot be read
*
* @return string
*/
public function getKey();
/**
* Get the template Source.
*
* @throws RuntimeException when a source file cannot be read
*
* @return string
*/
public function getSource();
}

View file

@ -1,77 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache template Filesystem Source.
*
* This template Source uses stat() to generate the Source key, so that using
* pre-compiled templates doesn't require hitting the disk to read the source.
* It is more suitable for production use, and is used by default in the
* ProductionFilesystemLoader.
*/
class Mustache_Source_FilesystemSource implements Mustache_Source
{
private $fileName;
private $statProps;
private $stat;
/**
* Filesystem Source constructor.
*
* @param string $fileName
* @param array $statProps
*/
public function __construct($fileName, array $statProps)
{
$this->fileName = $fileName;
$this->statProps = $statProps;
}
/**
* Get the Source key (used to generate the compiled class name).
*
* @throws Mustache_Exception_RuntimeException when a source file cannot be read
*
* @return string
*/
public function getKey()
{
$chunks = array(
'fileName' => $this->fileName,
);
if (!empty($this->statProps)) {
if (!isset($this->stat)) {
$this->stat = @stat($this->fileName);
}
if ($this->stat === false) {
throw new Mustache_Exception_RuntimeException(sprintf('Failed to read source file "%s".', $this->fileName));
}
foreach ($this->statProps as $prop) {
$chunks[$prop] = $this->stat[$prop];
}
}
return json_encode($chunks);
}
/**
* Get the template Source.
*
* @return string
*/
public function getSource()
{
return file_get_contents($this->fileName);
}
}

View file

@ -1,180 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Abstract Mustache Template class.
*
* @abstract
*/
abstract class Mustache_Template
{
/**
* @var Mustache_Engine
*/
protected $mustache;
/**
* @var bool
*/
protected $strictCallables = false;
/**
* Mustache Template constructor.
*
* @param Mustache_Engine $mustache
*/
public function __construct(Mustache_Engine $mustache)
{
$this->mustache = $mustache;
}
/**
* Mustache Template instances can be treated as a function and rendered by simply calling them.
*
* $m = new Mustache_Engine;
* $tpl = $m->loadTemplate('Hello, {{ name }}!');
* echo $tpl(array('name' => 'World')); // "Hello, World!"
*
* @see Mustache_Template::render
*
* @param mixed $context Array or object rendering context (default: array())
*
* @return string Rendered template
*/
public function __invoke($context = array())
{
return $this->render($context);
}
/**
* Render this template given the rendering context.
*
* @param mixed $context Array or object rendering context (default: array())
*
* @return string Rendered template
*/
public function render($context = array())
{
return $this->renderInternal(
$this->prepareContextStack($context)
);
}
/**
* Internal rendering method implemented by Mustache Template concrete subclasses.
*
* This is where the magic happens :)
*
* NOTE: This method is not part of the Mustache.php public API.
*
* @param Mustache_Context $context
* @param string $indent (default: '')
*
* @return string Rendered template
*/
abstract public function renderInternal(Mustache_Context $context, $indent = '');
/**
* Tests whether a value should be iterated over (e.g. in a section context).
*
* In most languages there are two distinct array types: list and hash (or whatever you want to call them). Lists
* should be iterated, hashes should be treated as objects. Mustache follows this paradigm for Ruby, Javascript,
* Java, Python, etc.
*
* PHP, however, treats lists and hashes as one primitive type: array. So Mustache.php needs a way to distinguish
* between between a list of things (numeric, normalized array) and a set of variables to be used as section context
* (associative array). In other words, this will be iterated over:
*
* $items = array(
* array('name' => 'foo'),
* array('name' => 'bar'),
* array('name' => 'baz'),
* );
*
* ... but this will be used as a section context block:
*
* $items = array(
* 1 => array('name' => 'foo'),
* 'banana' => array('name' => 'bar'),
* 42 => array('name' => 'baz'),
* );
*
* @param mixed $value
*
* @return bool True if the value is 'iterable'
*/
protected function isIterable($value)
{
switch (gettype($value)) {
case 'object':
return $value instanceof Traversable;
case 'array':
$i = 0;
foreach ($value as $k => $v) {
if ($k !== $i++) {
return false;
}
}
return true;
default:
return false;
}
}
/**
* Helper method to prepare the Context stack.
*
* Adds the Mustache HelperCollection to the stack's top context frame if helpers are present.
*
* @param mixed $context Optional first context frame (default: null)
*
* @return Mustache_Context
*/
protected function prepareContextStack($context = null)
{
$stack = new Mustache_Context();
$helpers = $this->mustache->getHelpers();
if (!$helpers->isEmpty()) {
$stack->push($helpers);
}
if (!empty($context)) {
$stack->push($context);
}
return $stack;
}
/**
* Resolve a context value.
*
* Invoke the value if it is callable, otherwise return the value.
*
* @param mixed $value
* @param Mustache_Context $context
*
* @return string
*/
protected function resolveValue($value, Mustache_Context $context)
{
if (($this->strictCallables ? is_object($value) : !is_string($value)) && is_callable($value)) {
return $this->mustache
->loadLambda((string) call_user_func($value))
->renderInternal($context);
}
return $value;
}
}

View file

@ -1,408 +0,0 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Tokenizer class.
*
* This class is responsible for turning raw template source into a set of Mustache tokens.
*/
class Mustache_Tokenizer
{
// Finite state machine states
const IN_TEXT = 0;
const IN_TAG_TYPE = 1;
const IN_TAG = 2;
// Token types
const T_SECTION = '#';
const T_INVERTED = '^';
const T_END_SECTION = '/';
const T_COMMENT = '!';
const T_PARTIAL = '>';
const T_PARENT = '<';
const T_DELIM_CHANGE = '=';
const T_ESCAPED = '_v';
const T_UNESCAPED = '{';
const T_UNESCAPED_2 = '&';
const T_TEXT = '_t';
const T_PRAGMA = '%';
const T_BLOCK_VAR = '$';
const T_BLOCK_ARG = '$arg';
// Valid token types
private static $tagTypes = array(
self::T_SECTION => true,
self::T_INVERTED => true,
self::T_END_SECTION => true,
self::T_COMMENT => true,
self::T_PARTIAL => true,
self::T_PARENT => true,
self::T_DELIM_CHANGE => true,
self::T_ESCAPED => true,
self::T_UNESCAPED => true,
self::T_UNESCAPED_2 => true,
self::T_PRAGMA => true,
self::T_BLOCK_VAR => true,
);
private static $tagNames = array(
self::T_SECTION => 'section',
self::T_INVERTED => 'inverted section',
self::T_END_SECTION => 'section end',
self::T_COMMENT => 'comment',
self::T_PARTIAL => 'partial',
self::T_PARENT => 'parent',
self::T_DELIM_CHANGE => 'set delimiter',
self::T_ESCAPED => 'variable',
self::T_UNESCAPED => 'unescaped variable',
self::T_UNESCAPED_2 => 'unescaped variable',
self::T_PRAGMA => 'pragma',
self::T_BLOCK_VAR => 'block variable',
self::T_BLOCK_ARG => 'block variable',
);
// Token properties
const TYPE = 'type';
const NAME = 'name';
const DYNAMIC = 'dynamic';
const OTAG = 'otag';
const CTAG = 'ctag';
const LINE = 'line';
const INDEX = 'index';
const END = 'end';
const INDENT = 'indent';
const NODES = 'nodes';
const VALUE = 'value';
const FILTERS = 'filters';
private $state;
private $tagType;
private $buffer;
private $tokens;
private $seenTag;
private $line;
private $otag;
private $otagChar;
private $otagLen;
private $ctag;
private $ctagChar;
private $ctagLen;
/**
* Scan and tokenize template source.
*
* @throws Mustache_Exception_SyntaxException when mismatched section tags are encountered
* @throws Mustache_Exception_InvalidArgumentException when $delimiters string is invalid
*
* @param string $text Mustache template source to tokenize
* @param string $delimiters Optionally, pass initial opening and closing delimiters (default: empty string)
*
* @return array Set of Mustache tokens
*/
public function scan($text, $delimiters = '')
{
// Setting mbstring.func_overload makes things *really* slow.
// Let's do everyone a favor and scan this string as ASCII instead.
//
// The INI directive was removed in PHP 8.0 so we don't need to check there (and can drop it
// when we remove support for older versions of PHP).
//
// @codeCoverageIgnoreStart
$encoding = null;
if (version_compare(PHP_VERSION, '8.0.0', '<')) {
if (function_exists('mb_internal_encoding') && ini_get('mbstring.func_overload') & 2) {
$encoding = mb_internal_encoding();
mb_internal_encoding('ASCII');
}
}
// @codeCoverageIgnoreEnd
$this->reset();
if (is_string($delimiters) && $delimiters = trim($delimiters)) {
$this->setDelimiters($delimiters);
}
$len = strlen($text);
for ($i = 0; $i < $len; $i++) {
switch ($this->state) {
case self::IN_TEXT:
$char = $text[$i];
// Test whether it's time to change tags.
if ($char === $this->otagChar && substr($text, $i, $this->otagLen) === $this->otag) {
$i--;
$this->flushBuffer();
$this->state = self::IN_TAG_TYPE;
} else {
$this->buffer .= $char;
if ($char === "\n") {
$this->flushBuffer();
$this->line++;
}
}
break;
case self::IN_TAG_TYPE:
$i += $this->otagLen - 1;
$char = $text[$i + 1];
if (isset(self::$tagTypes[$char])) {
$tag = $char;
$this->tagType = $tag;
} else {
$tag = null;
$this->tagType = self::T_ESCAPED;
}
if ($this->tagType === self::T_DELIM_CHANGE) {
$i = $this->changeDelimiters($text, $i);
$this->state = self::IN_TEXT;
} elseif ($this->tagType === self::T_PRAGMA) {
$i = $this->addPragma($text, $i);
$this->state = self::IN_TEXT;
} else {
if ($tag !== null) {
$i++;
}
$this->state = self::IN_TAG;
}
$this->seenTag = $i;
break;
default:
$char = $text[$i];
// Test whether it's time to change tags.
if ($char === $this->ctagChar && substr($text, $i, $this->ctagLen) === $this->ctag) {
$token = array(
self::TYPE => $this->tagType,
self::NAME => trim($this->buffer),
self::OTAG => $this->otag,
self::CTAG => $this->ctag,
self::LINE => $this->line,
self::INDEX => ($this->tagType === self::T_END_SECTION) ? $this->seenTag - $this->otagLen : $i + $this->ctagLen,
);
if ($this->tagType === self::T_UNESCAPED) {
// Clean up `{{{ tripleStache }}}` style tokens.
if ($this->ctag === '}}') {
if (($i + 2 < $len) && $text[$i + 2] === '}') {
$i++;
} else {
$msg = sprintf(
'Mismatched tag delimiters: %s on line %d',
$token[self::NAME],
$token[self::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $token);
}
} else {
$lastName = $token[self::NAME];
if (substr($lastName, -1) === '}') {
$token[self::NAME] = trim(substr($lastName, 0, -1));
} else {
$msg = sprintf(
'Mismatched tag delimiters: %s on line %d',
$token[self::NAME],
$token[self::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $token);
}
}
}
$this->buffer = '';
$i += $this->ctagLen - 1;
$this->state = self::IN_TEXT;
$this->tokens[] = $token;
} else {
$this->buffer .= $char;
}
break;
}
}
if ($this->state !== self::IN_TEXT) {
$this->throwUnclosedTagException();
}
$this->flushBuffer();
// Restore the user's encoding...
// @codeCoverageIgnoreStart
if ($encoding) {
mb_internal_encoding($encoding);
}
// @codeCoverageIgnoreEnd
return $this->tokens;
}
/**
* Helper function to reset tokenizer internal state.
*/
private function reset()
{
$this->state = self::IN_TEXT;
$this->tagType = null;
$this->buffer = '';
$this->tokens = array();
$this->seenTag = false;
$this->line = 0;
$this->otag = '{{';
$this->otagChar = '{';
$this->otagLen = 2;
$this->ctag = '}}';
$this->ctagChar = '}';
$this->ctagLen = 2;
}
/**
* Flush the current buffer to a token.
*/
private function flushBuffer()
{
if (strlen($this->buffer) > 0) {
$this->tokens[] = array(
self::TYPE => self::T_TEXT,
self::LINE => $this->line,
self::VALUE => $this->buffer,
);
$this->buffer = '';
}
}
/**
* Change the current Mustache delimiters. Set new `otag` and `ctag` values.
*
* @throws Mustache_Exception_SyntaxException when delimiter string is invalid
*
* @param string $text Mustache template source
* @param int $index Current tokenizer index
*
* @return int New index value
*/
private function changeDelimiters($text, $index)
{
$startIndex = strpos($text, '=', $index) + 1;
$close = '=' . $this->ctag;
$closeIndex = strpos($text, $close, $index);
if ($closeIndex === false) {
$this->throwUnclosedTagException();
}
$token = array(
self::TYPE => self::T_DELIM_CHANGE,
self::LINE => $this->line,
);
try {
$this->setDelimiters(trim(substr($text, $startIndex, $closeIndex - $startIndex)));
} catch (Mustache_Exception_InvalidArgumentException $e) {
throw new Mustache_Exception_SyntaxException($e->getMessage(), $token);
}
$this->tokens[] = $token;
return $closeIndex + strlen($close) - 1;
}
/**
* Set the current Mustache `otag` and `ctag` delimiters.
*
* @throws Mustache_Exception_InvalidArgumentException when delimiter string is invalid
*
* @param string $delimiters
*/
private function setDelimiters($delimiters)
{
if (!preg_match('/^\s*(\S+)\s+(\S+)\s*$/', $delimiters, $matches)) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Invalid delimiters: %s', $delimiters));
}
list($_, $otag, $ctag) = $matches;
$this->otag = $otag;
$this->otagChar = $otag[0];
$this->otagLen = strlen($otag);
$this->ctag = $ctag;
$this->ctagChar = $ctag[0];
$this->ctagLen = strlen($ctag);
}
/**
* Add pragma token.
*
* Pragmas are hoisted to the front of the template, so all pragma tokens
* will appear at the front of the token list.
*
* @param string $text
* @param int $index
*
* @return int New index value
*/
private function addPragma($text, $index)
{
$end = strpos($text, $this->ctag, $index);
if ($end === false) {
$this->throwUnclosedTagException();
}
$pragma = trim(substr($text, $index + 2, $end - $index - 2));
// Pragmas are hoisted to the front of the template.
array_unshift($this->tokens, array(
self::TYPE => self::T_PRAGMA,
self::NAME => $pragma,
self::LINE => 0,
));
return $end + $this->ctagLen - 1;
}
private function throwUnclosedTagException()
{
$name = trim($this->buffer);
if ($name !== '') {
$msg = sprintf('Unclosed tag: %s on line %d', $name, $this->line);
} else {
$msg = sprintf('Unclosed tag on line %d', $this->line);
}
throw new Mustache_Exception_SyntaxException($msg, array(
self::TYPE => $this->tagType,
self::NAME => $name,
self::OTAG => $this->otag,
self::CTAG => $this->ctag,
self::LINE => $this->line,
self::INDEX => $this->seenTag - $this->otagLen,
));
}
/**
* Get the human readable name for a tag type.
*
* @param string $tagType One of the tokenizer T_* constants
*
* @return string
*/
static function getTagName($tagType)
{
return isset(self::$tagNames[$tagType]) ? self::$tagNames[$tagType] : 'unknown';
}
}

View file

@ -1,40 +0,0 @@
<?php
/**
* PHPMailer Exception class.
* PHP Version 5.5.
*
* @see https://github.com/PHPMailer/PHPMailer/ The PHPMailer GitHub project
*
* @author Marcus Bointon (Synchro/coolbru) <phpmailer@synchromedia.co.uk>
* @author Jim Jagielski (jimjag) <jimjag@gmail.com>
* @author Andy Prevost (codeworxtech) <codeworxtech@users.sourceforge.net>
* @author Brent R. Matzelle (original founder)
* @copyright 2012 - 2020 Marcus Bointon
* @copyright 2010 - 2012 Jim Jagielski
* @copyright 2004 - 2009 Andy Prevost
* @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License
* @note This program is distributed in the hope that it will be useful - WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*/
namespace PHPMailer\PHPMailer;
/**
* PHPMailer exception handler.
*
* @author Marcus Bointon <phpmailer@synchromedia.co.uk>
*/
class Exception extends \Exception
{
/**
* Prettify error message output.
*
* @return string
*/
public function errorMessage()
{
return '<strong>' . htmlspecialchars($this->getMessage(), ENT_COMPAT | ENT_HTML401) . "</strong><br />\n";
}
}

View file

@ -1,502 +0,0 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
[This is the first released version of the Lesser GPL. It also counts
as the successor of the GNU Library Public License, version 2, hence
the version number 2.1.]
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Lesser General Public License, applies to some
specially designated software packages--typically libraries--of the
Free Software Foundation and other authors who decide to use it. You
can use it too, but we suggest you first think carefully about whether
this license or the ordinary General Public License is the better
strategy to use in any particular case, based on the explanations below.
When we speak of free software, we are referring to freedom of use,
not price. Our General Public Licenses are designed to make sure that
you have the freedom to distribute copies of free software (and charge
for this service if you wish); that you receive source code or can get
it if you want it; that you can change the software and use pieces of
it in new free programs; and that you are informed that you can do
these things.
To protect your rights, we need to make restrictions that forbid
distributors to deny you these rights or to ask you to surrender these
rights. These restrictions translate to certain responsibilities for
you if you distribute copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link other code with the library, you must provide
complete object files to the recipients, so that they can relink them
with the library after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
We protect your rights with a two-step method: (1) we copyright the
library, and (2) we offer you this license, which gives you legal
permission to copy, distribute and/or modify the library.
To protect each distributor, we want to make it very clear that
there is no warranty for the free library. Also, if the library is
modified by someone else and passed on, the recipients should know
that what they have is not the original version, so that the original
author's reputation will not be affected by problems that might be
introduced by others.
Finally, software patents pose a constant threat to the existence of
any free program. We wish to make sure that a company cannot
effectively restrict the users of a free program by obtaining a
restrictive license from a patent holder. Therefore, we insist that
any patent license obtained for a version of the library must be
consistent with the full freedom of use specified in this license.
Most GNU software, including some libraries, is covered by the
ordinary GNU General Public License. This license, the GNU Lesser
General Public License, applies to certain designated libraries, and
is quite different from the ordinary General Public License. We use
this license for certain libraries in order to permit linking those
libraries into non-free programs.
When a program is linked with a library, whether statically or using
a shared library, the combination of the two is legally speaking a
combined work, a derivative of the original library. The ordinary
General Public License therefore permits such linking only if the
entire combination fits its criteria of freedom. The Lesser General
Public License permits more lax criteria for linking other code with
the library.
We call this license the "Lesser" General Public License because it
does Less to protect the user's freedom than the ordinary General
Public License. It also provides other free software developers Less
of an advantage over competing non-free programs. These disadvantages
are the reason we use the ordinary General Public License for many
libraries. However, the Lesser license provides advantages in certain
special circumstances.
For example, on rare occasions, there may be a special need to
encourage the widest possible use of a certain library, so that it becomes
a de-facto standard. To achieve this, non-free programs must be
allowed to use the library. A more frequent case is that a free
library does the same job as widely used non-free libraries. In this
case, there is little to gain by limiting the free library to free
software only, so we use the Lesser General Public License.
In other cases, permission to use a particular library in non-free
programs enables a greater number of people to use a large body of
free software. For example, permission to use the GNU C Library in
non-free programs enables many more people to use the whole GNU
operating system, as well as its variant, the GNU/Linux operating
system.
Although the Lesser General Public License is Less protective of the
users' freedom, it does ensure that the user of a program that is
linked with the Library has the freedom and the wherewithal to run
that program using a modified version of the Library.
The precise terms and conditions for copying, distribution and
modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, whereas the latter must
be combined with the library in order to run.
GNU LESSER GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library or other
program which contains a notice placed by the copyright holder or
other authorized party saying it may be distributed under the terms of
this Lesser General Public License (also called "this License").
Each licensee is addressed as "you".
A "library" means a collection of software functions and/or data
prepared so as to be conveniently linked with application programs
(which use some of those functions and data) to form executables.
The "Library", below, refers to any such software library or work
which has been distributed under these terms. A "work based on the
Library" means either the Library or any derivative work under
copyright law: that is to say, a work containing the Library or a
portion of it, either verbatim or with modifications and/or translated
straightforwardly into another language. (Hereinafter, translation is
included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for
making modifications to it. For a library, complete source code means
all the source code for all modules it contains, plus any associated
interface definition files, plus the scripts used to control compilation
and installation of the library.
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running a program using the Library is not restricted, and output from
such a program is covered only if its contents constitute a work based
on the Library (independent of the use of the Library in a tool for
writing it). Whether that is true depends on what the Library does
and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's
complete source code as you receive it, in any medium, provided that
you conspicuously and appropriately publish on each copy an
appropriate copyright notice and disclaimer of warranty; keep intact
all the notices that refer to this License and to the absence of any
warranty; and distribute a copy of this License along with the
Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices
stating that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no
charge to all third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a
table of data to be supplied by an application program that uses
the facility, other than as an argument passed when the facility
is invoked, then you must make a good faith effort to ensure that,
in the event an application does not supply such function or
table, the facility still operates, and performs whatever part of
its purpose remains meaningful.
(For example, a function in a library to compute square roots has
a purpose that is entirely well-defined independent of the
application. Therefore, Subsection 2d requires that any
application-supplied function or table used by this function must
be optional: if the application does not supply it, the square
root function must still compute square roots.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Library,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Library, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote
it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Library.
In addition, mere aggregation of another work not based on the Library
with the Library (or with a work based on the Library) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may opt to apply the terms of the ordinary GNU General Public
License instead of this License to a given copy of the Library. To do
this, you must alter all the notices that refer to this License, so
that they refer to the ordinary GNU General Public License, version 2,
instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
This option is useful when you wish to copy part of the code of
the Library into a program that is not a library.
4. You may copy and distribute the Library (or a portion or
derivative of it, under Section 2) in object code or executable form
under the terms of Sections 1 and 2 above provided that you accompany
it with the complete corresponding machine-readable source code, which
must be distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange.
If distribution of object code is made by offering access to copy
from a designated place, then offering equivalent access to copy the
source code from the same place satisfies the requirement to
distribute the source code, even though third parties are not
compelled to copy the source along with the object code.
5. A program that contains no derivative of any portion of the
Library, but is designed to work with the Library by being compiled or
linked with it, is called a "work that uses the Library". Such a
work, in isolation, is not a derivative work of the Library, and
therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library
creates an executable that is a derivative of the Library (because it
contains portions of the Library), rather than a "work that uses the
library". The executable is therefore covered by this License.
Section 6 states terms for distribution of such executables.
When a "work that uses the Library" uses material from a header file
that is part of the Library, the object code for the work may be a
derivative work of the Library even though the source code is not.
Whether this is true is especially significant if the work can be
linked without the Library, or if the work is itself a library. The
threshold for this to be true is not precisely defined by law.
If such an object file uses only numerical parameters, data
structure layouts and accessors, and small macros and small inline
functions (ten lines or less in length), then the use of the object
file is unrestricted, regardless of whether it is legally a derivative
work. (Executables containing this object code plus portions of the
Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also combine or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
under terms of your choice, provided that the terms permit
modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the
Library is used in it and that the Library and its use are covered by
this License. You must supply a copy of this License. If the work
during execution displays copyright notices, you must include the
copyright notice for the Library among them, as well as a reference
directing the user to the copy of this License. Also, you must do one
of these things:
a) Accompany the work with the complete corresponding
machine-readable source code for the Library including whatever
changes were used in the work (which must be distributed under
Sections 1 and 2 above); and, if the work is an executable linked
with the Library, with the complete machine-readable "work that
uses the Library", as object code and/or source code, so that the
user can modify the Library and then relink to produce a modified
executable containing the modified Library. (It is understood
that the user who changes the contents of definitions files in the
Library will not necessarily be able to recompile the application
to use the modified definitions.)
b) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (1) uses at run time a
copy of the library already present on the user's computer system,
rather than copying library functions into the executable, and (2)
will operate properly with a modified version of the library, if
the user installs one, as long as the modified version is
interface-compatible with the version that the work was made with.
c) Accompany the work with a written offer, valid for at
least three years, to give the same user the materials
specified in Subsection 6a, above, for a charge no more
than the cost of performing this distribution.
d) If distribution of the work is made by offering access to copy
from a designated place, offer equivalent access to copy the above
specified materials from the same place.
e) Verify that the user has already received a copy of these
materials or that you have already sent this user a copy.
For an executable, the required form of the "work that uses the
Library" must include any data and utility programs needed for
reproducing the executable from it. However, as a special exception,
the materials to be distributed need not include anything that is
normally distributed (in either source or binary form) with the major
components (compiler, kernel, and so on) of the operating system on
which the executable runs, unless that component itself accompanies
the executable.
It may happen that this requirement contradicts the license
restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
library, provided that the separate distribution of the work based on
the Library and of the other library facilities is otherwise
permitted, and provided that you do these two things:
a) Accompany the combined library with a copy of the same work
based on the Library, uncombined with any other library
facilities. This must be distributed under the terms of the
Sections above.
b) Give prominent notice with the combined library of the fact
that part of it is a work based on the Library, and explaining
where to find the accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute
the Library except as expressly provided under this License. Any
attempt otherwise to copy, modify, sublicense, link with, or
distribute the Library is void, and will automatically terminate your
rights under this License. However, parties who have received copies,
or rights, from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Library or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Library (or any work based on the
Library), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the
Library), the recipient automatically receives a license from the
original licensor to copy, distribute, link with or modify the Library
subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties with
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Library at all. For example, if a patent
license would not permit royalty-free redistribution of the Library by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply,
and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Library under this License may add
an explicit geographical distribution limitation excluding those countries,
so that distribution is permitted only in or among countries not thus
excluded. In such case, this License incorporates the limitation as if
written in the body of this License.
13. The Free Software Foundation may publish revised and/or new
versions of the Lesser General Public License from time to time.
Such new versions will be similar in spirit to the present version,
but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library
specifies a version number of this License which applies to it and
"any later version", you have the option of following the terms and
conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
copyrighted by the Free Software Foundation, write to the Free
Software Foundation; we sometimes make exceptions for this. Our
decision will be guided by the two goals of preserving the free status
of all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Libraries
If you develop a new library, and you want it to be of the greatest
possible use to the public, we recommend making it free software that
everyone can redistribute and change. You can do so by permitting
redistribution under these terms (or, alternatively, under the terms of the
ordinary General Public License).
To apply these terms, attach the following notices to the library. It is
safest to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
<one line to give the library's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Also add information on how to contact you by electronic and paper mail.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the library, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
<signature of Ty Coon>, 1 April 1990
Ty Coon, President of Vice
That's all there is to it!

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,154 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Konrad Abicht <hi@inspirito.de>
*
* @date 2020-11-22
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
/**
* This class contains configurations used in various classes. You can override them
* manually, in case default values aren't working.
*
* @see https://github.com/smalot/pdfparser/issues/305
*/
class Config
{
private $fontSpaceLimit = -50;
/**
* @var string
*/
private $horizontalOffset = ' ';
/**
* Represents: (NUL, HT, LF, FF, CR, SP)
*
* @var string
*/
private $pdfWhitespaces = "\0\t\n\f\r ";
/**
* Represents: (NUL, HT, LF, FF, CR, SP)
*
* @var string
*/
private $pdfWhitespacesRegex = '[\0\t\n\f\r ]';
/**
* Whether to retain raw image data as content or discard it to save memory
*
* @var bool
*/
private $retainImageContent = true;
/**
* Memory limit to use when de-compressing files, in bytes.
*
* @var int
*/
private $decodeMemoryLimit = 0;
/**
* Whether to include font id and size in dataTm array
*
* @var bool
*/
private $dataTmFontInfoHasToBeIncluded = false;
public function getFontSpaceLimit()
{
return $this->fontSpaceLimit;
}
public function setFontSpaceLimit($value)
{
$this->fontSpaceLimit = $value;
}
public function getHorizontalOffset(): string
{
return $this->horizontalOffset;
}
public function setHorizontalOffset($value): void
{
$this->horizontalOffset = $value;
}
public function getPdfWhitespaces(): string
{
return $this->pdfWhitespaces;
}
public function setPdfWhitespaces(string $pdfWhitespaces): void
{
$this->pdfWhitespaces = $pdfWhitespaces;
}
public function getPdfWhitespacesRegex(): string
{
return $this->pdfWhitespacesRegex;
}
public function setPdfWhitespacesRegex(string $pdfWhitespacesRegex): void
{
$this->pdfWhitespacesRegex = $pdfWhitespacesRegex;
}
public function getRetainImageContent(): bool
{
return $this->retainImageContent;
}
public function setRetainImageContent(bool $retainImageContent): void
{
$this->retainImageContent = $retainImageContent;
}
public function getDecodeMemoryLimit(): int
{
return $this->decodeMemoryLimit;
}
public function setDecodeMemoryLimit(int $decodeMemoryLimit): void
{
$this->decodeMemoryLimit = $decodeMemoryLimit;
}
public function getDataTmFontInfoHasToBeIncluded(): bool
{
return $this->dataTmFontInfoHasToBeIncluded;
}
public function setDataTmFontInfoHasToBeIncluded(bool $dataTmFontInfoHasToBeIncluded): void
{
$this->dataTmFontInfoHasToBeIncluded = $dataTmFontInfoHasToBeIncluded;
}
}

View file

@ -1,306 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
/**
* Technical references :
* - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
* - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
* - http://www.php.net/manual/en/ref.pdf.php#74211
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
*
* Class Document
*/
class Document
{
/**
* @var PDFObject[]
*/
protected $objects = [];
/**
* @var array
*/
protected $dictionary = [];
/**
* @var Header
*/
protected $trailer = null;
/**
* @var array
*/
protected $details = null;
public function __construct()
{
$this->trailer = new Header([], $this);
}
public function init()
{
$this->buildDictionary();
$this->buildDetails();
// Propagate init to objects.
foreach ($this->objects as $object) {
$object->getHeader()->init();
$object->init();
}
}
/**
* Build dictionary based on type header field.
*/
protected function buildDictionary()
{
// Build dictionary.
$this->dictionary = [];
foreach ($this->objects as $id => $object) {
// Cache objects by type and subtype
$type = $object->getHeader()->get('Type')->getContent();
if (null != $type) {
if (!isset($this->dictionary[$type])) {
$this->dictionary[$type] = [
'all' => [],
'subtype' => [],
];
}
$this->dictionary[$type]['all'][$id] = $object;
$subtype = $object->getHeader()->get('Subtype')->getContent();
if (null != $subtype) {
if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
$this->dictionary[$type]['subtype'][$subtype] = [];
}
$this->dictionary[$type]['subtype'][$subtype][$id] = $object;
}
}
}
}
/**
* Build details array.
*/
protected function buildDetails()
{
// Build details array.
$details = [];
// Extract document info
if ($this->trailer->has('Info')) {
/** @var PDFObject $info */
$info = $this->trailer->get('Info');
// This could be an ElementMissing object, so we need to check for
// the getHeader method first.
if (null !== $info && method_exists($info, 'getHeader')) {
$details = $info->getHeader()->getDetails();
}
}
// Retrieve the page count
try {
$pages = $this->getPages();
$details['Pages'] = \count($pages);
} catch (\Exception $e) {
$details['Pages'] = 0;
}
$this->details = $details;
}
public function getDictionary(): array
{
return $this->dictionary;
}
/**
* @param PDFObject[] $objects
*/
public function setObjects($objects = [])
{
$this->objects = (array) $objects;
$this->init();
}
/**
* @return PDFObject[]
*/
public function getObjects()
{
return $this->objects;
}
/**
* @return PDFObject|Font|Page|Element|null
*/
public function getObjectById(string $id)
{
if (isset($this->objects[$id])) {
return $this->objects[$id];
}
return null;
}
public function hasObjectsByType(string $type, ?string $subtype = null): bool
{
return 0 < \count($this->getObjectsByType($type, $subtype));
}
public function getObjectsByType(string $type, ?string $subtype = null): array
{
if (!isset($this->dictionary[$type])) {
return [];
}
if (null != $subtype) {
if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
return [];
}
return $this->dictionary[$type]['subtype'][$subtype];
}
return $this->dictionary[$type]['all'];
}
/**
* @return Font[]
*/
public function getFonts()
{
return $this->getObjectsByType('Font');
}
public function getFirstFont(): ?Font
{
$fonts = $this->getFonts();
if ([] === $fonts) {
return null;
}
return reset($fonts);
}
/**
* @return Page[]
*
* @throws \Exception
*/
public function getPages()
{
if ($this->hasObjectsByType('Catalog')) {
// Search for catalog to list pages.
$catalogues = $this->getObjectsByType('Catalog');
$catalogue = reset($catalogues);
/** @var Pages $object */
$object = $catalogue->get('Pages');
if (method_exists($object, 'getPages')) {
return $object->getPages(true);
}
}
if ($this->hasObjectsByType('Pages')) {
// Search for pages to list kids.
$pages = [];
/** @var Pages[] $objects */
$objects = $this->getObjectsByType('Pages');
foreach ($objects as $object) {
$pages = array_merge($pages, $object->getPages(true));
}
return $pages;
}
if ($this->hasObjectsByType('Page')) {
// Search for 'page' (unordered pages).
$pages = $this->getObjectsByType('Page');
return array_values($pages);
}
throw new \Exception('Missing catalog.');
}
public function getText(?int $pageLimit = null): string
{
$texts = [];
$pages = $this->getPages();
// Only use the first X number of pages if $pageLimit is set and numeric.
if (\is_int($pageLimit) && 0 < $pageLimit) {
$pages = \array_slice($pages, 0, $pageLimit);
}
foreach ($pages as $index => $page) {
/**
* In some cases, the $page variable may be null.
*/
if (null === $page) {
continue;
}
if ($text = trim($page->getText())) {
$texts[] = $text;
}
}
return implode("\n\n", $texts);
}
public function getTrailer(): Header
{
return $this->trailer;
}
public function setTrailer(Header $trailer)
{
$this->trailer = $trailer;
}
public function getDetails(): array
{
return $this->details;
}
}

View file

@ -1,150 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
/**
* Class Element
*/
class Element
{
/**
* @var Document
*/
protected $document = null;
protected $value = null;
public function __construct($value, ?Document $document = null)
{
$this->value = $value;
$this->document = $document;
}
public function init()
{
}
public function equals($value): bool
{
return $value == $this->value;
}
public function contains($value): bool
{
if (\is_array($this->value)) {
/** @var Element $val */
foreach ($this->value as $val) {
if ($val->equals($value)) {
return true;
}
}
return false;
}
return $this->equals($value);
}
public function getContent()
{
return $this->value;
}
public function __toString(): string
{
return (string) $this->value;
}
public static function parse(string $content, ?Document $document = null, int &$position = 0)
{
$args = \func_get_args();
$only_values = isset($args[3]) ? $args[3] : false;
$content = trim($content);
$values = [];
do {
$old_position = $position;
if (!$only_values) {
if (!preg_match('/^\s*(?P<name>\/[A-Z0-9\._]+)(?P<value>.*)/si', substr($content, $position), $match)) {
break;
} else {
$name = ltrim($match['name'], '/');
$value = $match['value'];
$position = strpos($content, $value, $position + \strlen($match['name']));
}
} else {
$name = \count($values);
$value = substr($content, $position);
}
if ($element = ElementName::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementXRef::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNumeric::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementStruct::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementBoolean::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNull::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementDate::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementString::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementHexa::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementArray::parse($value, $document, $position)) {
$values[$name] = $element;
} else {
$position = $old_position;
break;
}
} while ($position < \strlen($content));
return $values;
}
}

View file

@ -1,139 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\PDFObject;
/**
* Class ElementArray
*/
class ElementArray extends Element
{
public function __construct($value, ?Document $document = null)
{
parent::__construct($value, $document);
}
public function getContent()
{
foreach ($this->value as $name => $element) {
$this->resolveXRef($name);
}
return parent::getContent();
}
public function getRawContent(): array
{
return $this->value;
}
public function getDetails(bool $deep = true): array
{
$values = [];
$elements = $this->getContent();
foreach ($elements as $key => $element) {
if ($element instanceof Header && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof PDFObject && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof self) {
if ($deep) {
$values[$key] = $element->getDetails();
}
} elseif ($element instanceof Element && !($element instanceof self)) {
$values[$key] = $element->getContent();
}
}
return $values;
}
public function __toString(): string
{
return implode(',', $this->value);
}
/**
* @return Element|PDFObject
*/
protected function resolveXRef(string $name)
{
if (($obj = $this->value[$name]) instanceof ElementXRef) {
/** @var ElementXRef $obj */
$obj = $this->document->getObjectById($obj->getId());
$this->value[$name] = $obj;
}
return $this->value[$name];
}
/**
* @todo: These methods return mixed and mismatched types throughout the hierarchy
*
* @return bool|ElementArray
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\[(?P<array>.*)/is', $content, $match)) {
preg_match_all('/(.*?)(\[|\])/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (false !== strpos($part, '[') ? 1 : -1);
if ($level <= 0) {
break;
}
}
// Removes 1 level [ and ].
$sub = substr(trim($sub), 1, -1);
$sub_offset = 0;
$values = Element::parse($sub, $document, $sub_offset, true);
$offset += strpos($content, '[') + 1;
// Find next ']' position
$offset += \strlen($sub) + 1;
return new self($values, $document);
}
return false;
}
}

View file

@ -1,75 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementBoolean
*/
class ElementBoolean extends Element
{
/**
* @param string|bool $value
*/
public function __construct($value)
{
parent::__construct('true' == strtolower($value) || true === $value, null);
}
public function __toString(): string
{
return $this->value ? 'true' : 'false';
}
public function equals($value): bool
{
return $this->getContent() === $value;
}
/**
* @return bool|ElementBoolean
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*(?P<value>true|false)/is', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + \strlen($value);
return new self($value);
}
return false;
}
}

View file

@ -1,139 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHPi, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementDate
*/
class ElementDate extends ElementString
{
/**
* @var array
*/
protected static $formats = [
4 => 'Y',
6 => 'Ym',
8 => 'Ymd',
10 => 'YmdH',
12 => 'YmdHi',
14 => 'YmdHis',
15 => 'YmdHise',
17 => 'YmdHisO',
18 => 'YmdHisO',
19 => 'YmdHisO',
];
/**
* @var string
*/
protected $format = 'c';
/**
* @var \DateTime
*/
protected $value;
public function __construct($value)
{
if (!($value instanceof \DateTime)) {
throw new \Exception('DateTime required.'); // FIXME: Sometimes strings are passed to this function
}
parent::__construct($value);
}
public function setFormat(string $format)
{
$this->format = $format;
}
public function equals($value): bool
{
if ($value instanceof \DateTime) {
$timestamp = $value->getTimeStamp();
} else {
$timestamp = strtotime($value);
}
return $timestamp == $this->value->getTimeStamp();
}
public function __toString(): string
{
return (string) $this->value->format($this->format);
}
/**
* @return bool|ElementDate
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\(D\:(?P<name>.*?)\)/s', $content, $match)) {
$name = $match['name'];
$name = str_replace("'", '', $name);
$date = false;
// Smallest format : Y
// Full format : YmdHisP
if (preg_match('/^\d{4}(\d{2}(\d{2}(\d{2}(\d{2}(\d{2}(Z(\d{2,4})?|[\+-]?\d{2}(\d{2})?)?)?)?)?)?)?$/', $name)) {
if ($pos = strpos($name, 'Z')) {
$name = substr($name, 0, $pos + 1);
} elseif (18 == \strlen($name) && preg_match('/[^\+-]0000$/', $name)) {
$name = substr($name, 0, -4).'+0000';
}
$format = self::$formats[\strlen($name)];
$date = \DateTime::createFromFormat($format, $name, new \DateTimeZone('UTC'));
} else {
// special cases
if (preg_match('/^\d{1,2}-\d{1,2}-\d{4},?\s+\d{2}:\d{2}:\d{2}[\+-]\d{4}$/', $name)) {
$name = str_replace(',', '', $name);
$format = 'n-j-Y H:i:sO';
$date = \DateTime::createFromFormat($format, $name, new \DateTimeZone('UTC'));
}
}
if (!$date) {
return false;
}
$offset += strpos($content, '(D:') + \strlen($match['name']) + 4; // 1 for '(D:' and ')'
return new self($date);
}
return false;
}
}

View file

@ -1,85 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementHexa
*/
class ElementHexa extends ElementString
{
/**
* @return bool|ElementHexa|ElementDate
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\<(?P<name>[A-F0-9]+)\>/is', $content, $match)) {
$name = $match['name'];
$offset += strpos($content, '<'.$name) + \strlen($name) + 2; // 1 for '>'
// repackage string as standard
$name = '('.self::decode($name).')';
$element = ElementDate::parse($name, $document);
if (!$element) {
$element = ElementString::parse($name, $document);
}
return $element;
}
return false;
}
public static function decode(string $value): string
{
$text = '';
$length = \strlen($value);
if ('00' === substr($value, 0, 2)) {
for ($i = 0; $i < $length; $i += 4) {
$hex = substr($value, $i, 4);
$text .= '&#'.str_pad(hexdec($hex), 4, '0', \STR_PAD_LEFT).';';
}
} else {
for ($i = 0; $i < $length; $i += 2) {
$hex = substr($value, $i, 2);
$text .= \chr(hexdec($hex));
}
}
$text = html_entity_decode($text, \ENT_NOQUOTES, 'UTF-8');
return $text;
}
}

View file

@ -1,66 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
/**
* Class ElementMissing
*/
class ElementMissing extends Element
{
public function __construct()
{
parent::__construct(null, null);
}
public function equals($value): bool
{
return false;
}
public function contains($value): bool
{
return false;
}
public function getContent(): bool
{
return false;
}
public function __toString(): string
{
return '';
}
}

View file

@ -1,69 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Font;
/**
* Class ElementName
*/
class ElementName extends Element
{
public function __construct(string $value)
{
parent::__construct($value, null);
}
public function equals($value): bool
{
return $value == $this->value;
}
/**
* @return bool|ElementName
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\/([A-Z0-9\-\+,#\.]+)/is', $content, $match)) {
$name = $match[1];
$offset += strpos($content, $name) + \strlen($name);
$name = Font::decodeEntities($name);
return new self($name);
}
return false;
}
}

View file

@ -1,71 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementNull
*/
class ElementNull extends Element
{
public function __construct()
{
parent::__construct(null, null);
}
public function __toString(): string
{
return 'null';
}
public function equals($value): bool
{
return $this->getContent() === $value;
}
/**
* @return bool|ElementNull
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*(null)/s', $content, $match)) {
$offset += strpos($content, 'null') + \strlen('null');
return new self();
}
return false;
}
}

View file

@ -1,62 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementNumeric
*/
class ElementNumeric extends Element
{
public function __construct(string $value)
{
parent::__construct((float) $value, null);
}
/**
* @return bool|ElementNumeric
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*(?P<value>\-?[0-9\.]+)/s', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + \strlen($value);
return new self($value);
}
return false;
}
}

View file

@ -1,93 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Font;
/**
* Class ElementString
*/
class ElementString extends Element
{
public function __construct($value)
{
parent::__construct($value, null);
}
public function equals($value): bool
{
return $value == $this->value;
}
/**
* @return bool|ElementString
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\((?P<name>.*)/s', $content, $match)) {
$name = $match['name'];
// Find next ')' not escaped.
$cur_start_text = $start_search_end = 0;
while (false !== ($cur_start_pos = strpos($name, ')', $start_search_end))) {
$cur_extract = substr($name, $cur_start_text, $cur_start_pos - $cur_start_text);
preg_match('/(?P<escape>[\\\]*)$/s', $cur_extract, $match);
if (!(\strlen($match['escape']) % 2)) {
break;
}
$start_search_end = $cur_start_pos + 1;
}
// Extract string.
$name = substr($name, 0, (int) $cur_start_pos);
$offset += strpos($content, '(') + $cur_start_pos + 2; // 2 for '(' and ')'
$name = str_replace(
['\\\\', '\\ ', '\\/', '\(', '\)', '\n', '\r', '\t'],
['\\', ' ', '/', '(', ')', "\n", "\r", "\t"],
$name
);
// Decode string.
$name = Font::decodeOctal($name);
$name = Font::decodeEntities($name);
$name = Font::decodeHexadecimal($name, false);
$name = Font::decodeUnicode($name);
return new self($name);
}
return false;
}
}

View file

@ -1,75 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Header;
/**
* Class ElementStruct
*/
class ElementStruct extends Element
{
/**
* @return false|Header
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*<<(?P<struct>.*)/is', $content)) {
preg_match_all('/(.*?)(<<|>>)/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (false !== strpos($part, '<<') ? 1 : -1);
if ($level <= 0) {
break;
}
}
$offset += strpos($content, '<<') + \strlen(rtrim($sub));
// Removes '<<' and '>>'.
$sub = trim((string) preg_replace('/^\s*<<(.*)>>\s*$/s', '\\1', $sub));
$position = 0;
$elements = Element::parse($sub, $document, $position);
return new Header($elements, $document);
}
return false;
}
}

View file

@ -1,98 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementXRef
*/
class ElementXRef extends Element
{
public function getId(): string
{
return $this->getContent();
}
public function getObject()
{
return $this->document->getObjectById($this->getId());
}
public function equals($value): bool
{
/**
* In case $value is a number and $this->value is a string like 5_0
*
* Without this if-clause code like:
*
* $element = new ElementXRef('5_0');
* $this->assertTrue($element->equals(5));
*
* would fail (= 5_0 and 5 are not equal in PHP 8.0+).
*/
if (
true === is_numeric($value)
&& true === \is_string($this->getContent())
&& 1 === preg_match('/[0-9]+\_[0-9]+/', $this->getContent(), $matches)
) {
return (float) $this->getContent() == $value;
}
$id = ($value instanceof self) ? $value->getId() : $value;
return $this->getId() == $id;
}
public function __toString(): string
{
return '#Obj#'.$this->getId();
}
/**
* @return bool|ElementXRef
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*(?P<id>[0-9]+\s+[0-9]+\s+R)/s', $content, $match)) {
$id = $match['id'];
$offset += strpos($content, $id) + \strlen($id);
$id = str_replace(' ', '_', rtrim($id, ' R'));
return new self($id, $document);
}
return false;
}
}

View file

@ -1,157 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Exception;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Encoding\EncodingLocator;
use Smalot\PdfParser\Encoding\PostScriptGlyphs;
use Smalot\PdfParser\Exception\EncodingNotFoundException;
/**
* Class Encoding
*/
class Encoding extends PDFObject
{
/**
* @var array
*/
protected $encoding;
/**
* @var array
*/
protected $differences;
/**
* @var array
*/
protected $mapping;
public function init()
{
$this->mapping = [];
$this->differences = [];
$this->encoding = [];
if ($this->has('BaseEncoding')) {
$this->encoding = EncodingLocator::getEncoding($this->getEncodingClass())->getTranslations();
// Build table including differences.
$differences = $this->get('Differences')->getContent();
$code = 0;
if (!\is_array($differences)) {
return;
}
foreach ($differences as $difference) {
/** @var ElementNumeric $difference */
if ($difference instanceof ElementNumeric) {
$code = $difference->getContent();
continue;
}
// ElementName
$this->differences[$code] = $difference;
if (\is_object($difference)) {
$this->differences[$code] = $difference->getContent();
}
// For the next char.
++$code;
}
$this->mapping = $this->encoding;
foreach ($this->differences as $code => $difference) {
/* @var string $difference */
$this->mapping[$code] = $difference;
}
}
}
public function getDetails(bool $deep = true): array
{
$details = [];
$details['BaseEncoding'] = ($this->has('BaseEncoding') ? (string) $this->get('BaseEncoding') : 'Ansi');
$details['Differences'] = ($this->has('Differences') ? (string) $this->get('Differences') : '');
$details += parent::getDetails($deep);
return $details;
}
public function translateChar($dec): ?int
{
if (isset($this->mapping[$dec])) {
$dec = $this->mapping[$dec];
}
return PostScriptGlyphs::getCodePoint($dec);
}
/**
* Returns encoding class name if available or empty string (only prior PHP 7.4).
*
* @throws \Exception On PHP 7.4+ an exception is thrown if encoding class doesn't exist.
*/
public function __toString(): string
{
try {
return $this->getEncodingClass();
} catch (\Exception $e) {
// prior to PHP 7.4 toString has to return an empty string.
if (version_compare(\PHP_VERSION, '7.4.0', '<')) {
return '';
}
throw $e;
}
}
/**
* @throws EncodingNotFoundException
*/
protected function getEncodingClass(): string
{
// Load reference table charset.
$baseEncoding = preg_replace('/[^A-Z0-9]/is', '', $this->get('BaseEncoding')->getContent());
$className = '\\Smalot\\PdfParser\\Encoding\\'.$baseEncoding;
if (!class_exists($className)) {
throw new EncodingNotFoundException('Missing encoding data for: "'.$baseEncoding.'".');
}
return $className;
}
}

View file

@ -1,8 +0,0 @@
<?php
namespace Smalot\PdfParser\Encoding;
abstract class AbstractEncoding
{
abstract public function getTranslations(): array;
}

View file

@ -1,17 +0,0 @@
<?php
namespace Smalot\PdfParser\Encoding;
class EncodingLocator
{
protected static $encodings;
public static function getEncoding(string $encodingClassName): AbstractEncoding
{
if (!isset(self::$encodings[$encodingClassName])) {
self::$encodings[$encodingClassName] = new $encodingClassName();
}
return self::$encodings[$encodingClassName];
}
}

View file

@ -1,76 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class ISOLatin1Encoding
*/
class ISOLatin1Encoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma minus period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent '.
'sterling currency yen brokenbar section dieresis copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior acute mu paragraph '.
'periodcentered cedilla onesuperior ordmasculine guillemotright '.
'onequarter onehalf threequarters questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View file

@ -1,76 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class ISOLatin9Encoding
*/
class ISOLatin9Encoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma minus period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent '.
'sterling Euro yen Scaron section scaron copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior Zcaron mu paragraph '.
'periodcentered zcaron onesuperior ordmasculine guillemotright '.
'OE oe Ydieresis questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View file

@ -1,80 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://www.opensource.apple.com/source/vim/vim-34/vim/runtime/print/mac-roman.ps
namespace Smalot\PdfParser\Encoding;
/**
* Class MacRomanEncoding
*/
class MacRomanEncoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quotesingle '.
'parenleft parenright asterisk plus comma minus period slash '.
'zero one two three four five six seven '.
'eight nine colon semicolon less equal greater question '.
'at A B C D E F G '.
'H I J K L M N O '.
'P Q R S T U V W '.
'X Y Z bracketleft backslash bracketright asciicircum underscore '.
'grave a b c d e f g '.
'h i j k l m n o '.
'p q r s t u v w '.
'x y z braceleft bar braceright asciitilde .notdef '.
'Adieresis Aring Ccedilla Eacute Ntilde Odieresis Udieresis aacute '.
'agrave acircumflex adieresis atilde aring ccedilla eacute egrave '.
'ecircumflex edieresis iacute igrave icircumflex idieresis ntilde oacute '.
'ograve ocircumflex odieresis otilde uacute ugrave ucircumflex udieresis '.
'dagger degree cent sterling section bullet paragraph germandbls '.
'registered copyright trademark acute dieresis notequal AE Oslash '.
'infinity plusminus lessequal greaterequal yen mu partialdiff summation '.
'Pi pi integral ordfeminine ordmasculine Omega ae oslash '.
'questiondown exclamdown logicalnot radical florin approxequal delta guillemotleft '.
'guillemotright ellipsis space Agrave Atilde Otilde OE oe '.
'endash emdash quotedblleft quotedblright quoteleft quoteright divide lozenge '.
'ydieresis Ydieresis fraction currency guilsinglleft guilsinglright fi fl '.
'daggerdbl periodcentered quotesinglbase quotedblbase perthousand Acircumflex Ecircumflex Aacute '.
'Edieresis Egrave Iacute Icircumflex Idieresis Igrave Oacute Ocircumflex '.
'heart Ograve Uacute Ucircumflex Ugrave dotlessi circumflex tilde '.
'macron breve dotaccent ring cedilla hungarumlaut ogonek caron';
return explode(' ', $encoding);
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,76 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class StandardEncoding
*/
class StandardEncoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma hyphen period slash zero '.
'one two three four five six seven eight nine colon semicolon less '.
'equal greater question at A B C D E F G H I J K L M N O P Q R S T U '.
'V W X Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef exclamdown cent '.
'sterling fraction yen florin section currency quotesingle '.
'quotedblleft guillemotleft guilsinglleft guilsinglright fi fl '.
'.notdef endash dagger daggerdbl periodcentered .notdef paragraph '.
'bullet quotesinglbase quotedblbase quotedblright guillemotright '.
'ellipsis perthousand .notdef questiondown .notdef grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron emdash .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef AE .notdef '.
'ordfeminine .notdef .notdef .notdef .notdef Lslash Oslash OE '.
'ordmasculine .notdef .notdef .notdef .notdef .notdef ae .notdef '.
'.notdef .notdef dotlessi .notdef .notdef lslash oslash oe germandbls '.
'.notdef .notdef .notdef .notdef';
return explode(' ', $encoding);
}
}

View file

@ -1,76 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinANSIEncoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class WinAnsiEncoding
*/
class WinAnsiEncoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quotesingle '.
'parenleft parenright asterisk plus comma hyphen period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'grave a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde bullet Euro bullet quotesinglbase '.
'florin quotedblbase ellipsis dagger daggerdbl circumflex perthousand '.
'Scaron guilsinglleft OE bullet Zcaron bullet bullet quoteleft quoteright '.
'quotedblleft quotedblright bullet endash emdash tilde trademark scaron '.
'guilsinglright oe bullet zcaron Ydieresis space exclamdown cent '.
'sterling currency yen brokenbar section dieresis copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior acute mu paragraph '.
'periodcentered cedilla onesuperior ordmasculine guillemotright '.
'onequarter onehalf threequarters questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View file

@ -1,7 +0,0 @@
<?php
namespace Smalot\PdfParser\Exception;
class EncodingNotFoundException extends \Exception
{
}

View file

@ -1,664 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Encoding\WinAnsiEncoding;
use Smalot\PdfParser\Exception\EncodingNotFoundException;
/**
* Class Font
*/
class Font extends PDFObject
{
public const MISSING = '?';
/**
* @var array
*/
protected $table = null;
/**
* @var array
*/
protected $tableSizes = null;
/**
* Caches results from uchr.
*
* @var array
*/
private static $uchrCache = [];
/**
* In some PDF-files encoding could be referenced by object id but object itself does not contain
* `/Type /Encoding` in its dictionary. These objects wouldn't be initialized as Encoding in
* \Smalot\PdfParser\PDFObject::factory() during file parsing (they would be just PDFObject).
*
* Therefore, we create an instance of Encoding from them during decoding and cache this value in this property.
*
* @var Encoding
*
* @see https://github.com/smalot/pdfparser/pull/500
*/
private $initializedEncodingByPdfObject;
public function init()
{
// Load translate table.
$this->loadTranslateTable();
}
public function getName(): string
{
return $this->has('BaseFont') ? (string) $this->get('BaseFont') : '[Unknown]';
}
public function getType(): string
{
return (string) $this->header->get('Subtype');
}
public function getDetails(bool $deep = true): array
{
$details = [];
$details['Name'] = $this->getName();
$details['Type'] = $this->getType();
$details['Encoding'] = ($this->has('Encoding') ? (string) $this->get('Encoding') : 'Ansi');
$details += parent::getDetails($deep);
return $details;
}
/**
* @return string|bool
*/
public function translateChar(string $char, bool $use_default = true)
{
$dec = hexdec(bin2hex($char));
if (\array_key_exists($dec, $this->table)) {
return $this->table[$dec];
}
// fallback for decoding single-byte ANSI characters that are not in the lookup table
$fallbackDecoded = $char;
if (
\strlen($char) < 2
&& $this->has('Encoding')
&& $this->get('Encoding') instanceof Encoding
) {
try {
if (WinAnsiEncoding::class === $this->get('Encoding')->__toString()) {
$fallbackDecoded = self::uchr($dec);
}
} catch (EncodingNotFoundException $e) {
// Encoding->getEncodingClass() throws EncodingNotFoundException when BaseEncoding doesn't exists
// See table 5.11 on PDF 1.5 specs for more info
}
}
return $use_default ? self::MISSING : $fallbackDecoded;
}
/**
* Convert unicode character code to "utf-8" encoded string.
*/
public static function uchr(int $code): string
{
if (!isset(self::$uchrCache[$code])) {
// html_entity_decode() will not work with UTF-16 or UTF-32 char entities,
// therefore, we use mb_convert_encoding() instead
self::$uchrCache[$code] = mb_convert_encoding("&#{$code};", 'UTF-8', 'HTML-ENTITIES');
}
return self::$uchrCache[$code];
}
/**
* Init internal chars translation table by ToUnicode CMap.
*/
public function loadTranslateTable(): array
{
if (null !== $this->table) {
return $this->table;
}
$this->table = [];
$this->tableSizes = [
'from' => 1,
'to' => 1,
];
if ($this->has('ToUnicode')) {
$content = $this->get('ToUnicode')->getContent();
$matches = [];
// Support for multiple spacerange sections
if (preg_match_all('/begincodespacerange(?P<sections>.*?)endcodespacerange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes = [
'from' => max(1, \strlen(current($matches['from'])) / 2),
'to' => max(1, \strlen(current($matches['to'])) / 2),
];
break;
}
}
// Support for multiple bfchar sections
if (preg_match_all('/beginbfchar(?P<sections>.*?)endbfchar/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> +<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes['from'] = max(1, \strlen(current($matches['from'])) / 2);
foreach ($matches['from'] as $key => $from) {
$parts = preg_split(
'/([0-9A-F]{4})/i',
$matches['to'][$key],
0,
\PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
);
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
}
$this->table[hexdec($from)] = $text;
}
}
}
// Support for multiple bfrange sections
if (preg_match_all('/beginbfrange(?P<sections>.*?)endbfrange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
// Support for : <srcCode1> <srcCode2> <dstString>
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *<(?P<offset>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$char_to = hexdec($matches['to'][$key]);
$offset = hexdec($matches['offset'][$key]);
for ($char = $char_from; $char <= $char_to; ++$char) {
$this->table[$char] = self::uchr($char - $char_from + $offset);
}
}
// Support for : <srcCode1> <srcCodeN> [<dstString1> <dstString2> ... <dstStringN>]
// Some PDF file has 2-byte Unicode values on new lines > added \r\n
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *\[(?P<strings>[\r\n<>0-9A-F ]+)\][ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$strings = [];
preg_match_all('/<(?P<string>[0-9A-F]+)> */is', $matches['strings'][$key], $strings);
foreach ($strings['string'] as $position => $string) {
$parts = preg_split(
'/([0-9A-F]{4})/i',
$string,
0,
\PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
);
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
}
$this->table[$char_from + $position] = $text;
}
}
}
}
}
return $this->table;
}
/**
* Set custom char translation table where:
* - key - integer character code;
* - value - "utf-8" encoded value;
*
* @return void
*/
public function setTable(array $table)
{
$this->table = $table;
}
/**
* Calculate text width with data from header 'Widths'. If width of character is not found then character is added to missing array.
*/
public function calculateTextWidth(string $text, array &$missing = null): ?float
{
$index_map = array_flip($this->table);
$details = $this->getDetails();
$widths = $details['Widths'];
// Widths array is zero indexed but table is not. We must map them based on FirstChar and LastChar
$width_map = array_flip(range($details['FirstChar'], $details['LastChar']));
$width = null;
$missing = [];
$textLength = mb_strlen($text);
for ($i = 0; $i < $textLength; ++$i) {
$char = mb_substr($text, $i, 1);
if (
!\array_key_exists($char, $index_map)
|| !\array_key_exists($index_map[$char], $width_map)
|| !\array_key_exists($width_map[$index_map[$char]], $widths)
) {
$missing[] = $char;
continue;
}
$width_index = $width_map[$index_map[$char]];
$width += $widths[$width_index];
}
return $width;
}
/**
* Decode hexadecimal encoded string. If $add_braces is true result value would be wrapped by parentheses.
*/
public static function decodeHexadecimal(string $hexa, bool $add_braces = false): string
{
// Special shortcut for XML content.
if (false !== stripos($hexa, '<?xml')) {
return $hexa;
}
$text = '';
$parts = preg_split('/(<[a-f0-9]+>)/si', $hexa, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
foreach ($parts as $part) {
if (preg_match('/^<.*>$/s', $part) && false === stripos($part, '<?xml')) {
// strip line breaks
$part = preg_replace("/[\r\n]/", '', $part);
$part = trim($part, '<>');
if ($add_braces) {
$text .= '(';
}
$part = pack('H*', $part);
$text .= ($add_braces ? preg_replace('/\\\/s', '\\\\\\', $part) : $part);
if ($add_braces) {
$text .= ')';
}
} else {
$text .= $part;
}
}
return $text;
}
/**
* Decode string with octal-decoded chunks.
*/
public static function decodeOctal(string $text): string
{
$parts = preg_split('/(\\\\[0-7]{3})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^\\\\[0-7]{3}$/', $part)) {
$text .= \chr(octdec(trim($part, '\\')));
} else {
$text .= $part;
}
}
return $text;
}
/**
* Decode string with html entity encoded chars.
*/
public static function decodeEntities(string $text): string
{
$parts = preg_split('/(#\d{2})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^#\d{2}$/', $part)) {
$text .= \chr(hexdec(trim($part, '#')));
} else {
$text .= $part;
}
}
return $text;
}
/**
* Check if given string is Unicode text (by BOM);
* If true - decode to "utf-8" encoded string.
* Otherwise - return text as is.
*
* @todo Rename in next major release to make the name correspond to reality (for ex. decodeIfUnicode())
*/
public static function decodeUnicode(string $text): string
{
if (preg_match('/^\xFE\xFF/i', $text)) {
// Strip U+FEFF byte order marker.
$decode = substr($text, 2);
$text = '';
$length = \strlen($decode);
for ($i = 0; $i < $length; $i += 2) {
$text .= self::uchr(hexdec(bin2hex(substr($decode, $i, 2))));
}
}
return $text;
}
/**
* @todo Deprecated, use $this->config->getFontSpaceLimit() instead.
*/
protected function getFontSpaceLimit(): int
{
return $this->config->getFontSpaceLimit();
}
/**
* Decode text by commands array.
*/
public function decodeText(array $commands): string
{
$word_position = 0;
$words = [];
$font_space = $this->getFontSpaceLimit();
foreach ($commands as $command) {
switch ($command[PDFObject::TYPE]) {
case 'n':
if ((float) trim($command[PDFObject::COMMAND]) < $font_space) {
$word_position = \count($words);
}
continue 2;
case '<':
// Decode hexadecimal.
$text = self::decodeHexadecimal('<'.$command[PDFObject::COMMAND].'>');
break;
default:
// Decode octal (if necessary).
$text = self::decodeOctal($command[PDFObject::COMMAND]);
}
// replace escaped chars
$text = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\f', '\ '],
['\\', '(', ')', "\n", "\r", "\t", "\f", ' '],
$text
);
// add content to result string
if (isset($words[$word_position])) {
$words[$word_position] .= $text;
} else {
$words[$word_position] = $text;
}
}
foreach ($words as &$word) {
$word = $this->decodeContent($word);
}
return implode(' ', $words);
}
/**
* Decode given $text to "utf-8" encoded string.
*
* @param bool $unicode This parameter is deprecated and might be removed in a future release
*/
public function decodeContent(string $text, ?bool &$unicode = null): string
{
if ($this->has('ToUnicode')) {
return $this->decodeContentByToUnicodeCMapOrDescendantFonts($text);
}
if ($this->has('Encoding')) {
$result = $this->decodeContentByEncoding($text);
if (null !== $result) {
return $result;
}
}
return $this->decodeContentByAutodetectIfNecessary($text);
}
/**
* First try to decode $text by ToUnicode CMap.
* If char translation not found in ToUnicode CMap tries:
* - If DescendantFonts exists tries to decode char by one of that fonts.
* - If have no success to decode by DescendantFonts interpret $text as a string with "Windows-1252" encoding.
* - If DescendantFonts does not exist just return "?" as decoded char.
*
* @todo Seems this is invalid algorithm that do not follow pdf-format specification. Must be rewritten.
*/
private function decodeContentByToUnicodeCMapOrDescendantFonts(string $text): string
{
$bytes = $this->tableSizes['from'];
if ($bytes) {
$result = '';
$length = \strlen($text);
for ($i = 0; $i < $length; $i += $bytes) {
$char = substr($text, $i, $bytes);
if (false !== ($decoded = $this->translateChar($char, false))) {
$char = $decoded;
} elseif ($this->has('DescendantFonts')) {
if ($this->get('DescendantFonts') instanceof PDFObject) {
$fonts = $this->get('DescendantFonts')->getHeader()->getElements();
} else {
$fonts = $this->get('DescendantFonts')->getContent();
}
$decoded = false;
foreach ($fonts as $font) {
if ($font instanceof self) {
if (false !== ($decoded = $font->translateChar($char, false))) {
$decoded = mb_convert_encoding($decoded, 'UTF-8', 'Windows-1252');
break;
}
}
}
if (false !== $decoded) {
$char = $decoded;
} else {
$char = mb_convert_encoding($char, 'UTF-8', 'Windows-1252');
}
} else {
$char = self::MISSING;
}
$result .= $char;
}
$text = $result;
}
return $text;
}
/**
* Decode content by any type of Encoding (dictionary's item) instance.
*/
private function decodeContentByEncoding(string $text): ?string
{
$encoding = $this->get('Encoding');
// When Encoding referenced by object id (/Encoding 520 0 R) but object itself does not contain `/Type /Encoding` in it's dictionary.
if ($encoding instanceof PDFObject) {
$encoding = $this->getInitializedEncodingByPdfObject($encoding);
}
// When Encoding referenced by object id (/Encoding 520 0 R) but object itself contains `/Type /Encoding` in it's dictionary.
if ($encoding instanceof Encoding) {
return $this->decodeContentByEncodingEncoding($text, $encoding);
}
// When Encoding is just string (/Encoding /WinAnsiEncoding)
if ($encoding instanceof Element) { // todo: ElementString class must by used?
return $this->decodeContentByEncodingElement($text, $encoding);
}
// don't double-encode strings already in UTF-8
if (!mb_check_encoding($text, 'UTF-8')) {
return mb_convert_encoding($text, 'UTF-8', 'Windows-1252');
}
return $text;
}
/**
* Returns already created or create a new one if not created before Encoding instance by PDFObject instance.
*/
private function getInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
{
if (!$this->initializedEncodingByPdfObject) {
$this->initializedEncodingByPdfObject = $this->createInitializedEncodingByPdfObject($PDFObject);
}
return $this->initializedEncodingByPdfObject;
}
/**
* Decode content when $encoding (given by $this->get('Encoding')) is instance of Encoding.
*/
private function decodeContentByEncodingEncoding(string $text, Encoding $encoding): string
{
$result = '';
$length = \strlen($text);
for ($i = 0; $i < $length; ++$i) {
$dec_av = hexdec(bin2hex($text[$i]));
$dec_ap = $encoding->translateChar($dec_av);
$result .= self::uchr($dec_ap ?? $dec_av);
}
return $result;
}
/**
* Decode content when $encoding (given by $this->get('Encoding')) is instance of Element.
*/
private function decodeContentByEncodingElement(string $text, Element $encoding): ?string
{
$pdfEncodingName = $encoding->getContent();
// mb_convert_encoding does not support MacRoman/macintosh,
// so we use iconv() here
$iconvEncodingName = $this->getIconvEncodingNameOrNullByPdfEncodingName($pdfEncodingName);
return $iconvEncodingName ? iconv($iconvEncodingName, 'UTF-8', $text) : null;
}
/**
* Convert PDF encoding name to iconv-known encoding name.
*/
private function getIconvEncodingNameOrNullByPdfEncodingName(string $pdfEncodingName): ?string
{
$pdfToIconvEncodingNameMap = [
'StandardEncoding' => 'ISO-8859-1',
'MacRomanEncoding' => 'MACINTOSH',
'WinAnsiEncoding' => 'CP1252',
];
return \array_key_exists($pdfEncodingName, $pdfToIconvEncodingNameMap)
? $pdfToIconvEncodingNameMap[$pdfEncodingName]
: null;
}
/**
* If string seems like "utf-8" encoded string do nothing and just return given string as is.
* Otherwise, interpret string as "Window-1252" encoded string.
*
* @return string|false
*/
private function decodeContentByAutodetectIfNecessary(string $text)
{
if (mb_check_encoding($text, 'UTF-8')) {
return $text;
}
return mb_convert_encoding($text, 'UTF-8', 'Windows-1252');
// todo: Why exactly `Windows-1252` used?
}
/**
* Create Encoding instance by PDFObject instance and init it.
*/
private function createInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
{
$encoding = $this->createEncodingByPdfObject($PDFObject);
$encoding->init();
return $encoding;
}
/**
* Create Encoding instance by PDFObject instance (without init).
*/
private function createEncodingByPdfObject(PDFObject $PDFObject): Encoding
{
$document = $PDFObject->getDocument();
$header = $PDFObject->getHeader();
$content = $PDFObject->getContent();
$config = $PDFObject->getConfig();
return new Encoding($document, $header, $content, $config);
}
}

View file

@ -1,42 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontCIDFontType0
*/
class FontCIDFontType0 extends Font
{
}

View file

@ -1,42 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontCIDFontType2
*/
class FontCIDFontType2 extends Font
{
}

View file

@ -1,42 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontTrueType
*/
class FontTrueType extends Font
{
}

View file

@ -1,42 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType0
*/
class FontType0 extends Font
{
}

View file

@ -1,42 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType1
*/
class FontType1 extends Font
{
}

View file

@ -1,42 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType3
*/
class FontType3 extends Font
{
}

View file

@ -1,194 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
/**
* Class Header
*/
class Header
{
/**
* @var Document
*/
protected $document = null;
/**
* @var Element[]
*/
protected $elements = null;
/**
* @param Element[] $elements list of elements
* @param Document $document document
*/
public function __construct(array $elements = [], ?Document $document = null)
{
$this->elements = $elements;
$this->document = $document;
}
public function init()
{
foreach ($this->elements as $element) {
if ($element instanceof Element) {
$element->init();
}
}
}
/**
* Returns all elements.
*/
public function getElements()
{
foreach ($this->elements as $name => $element) {
$this->resolveXRef($name);
}
return $this->elements;
}
/**
* Used only for debug.
*/
public function getElementTypes(): array
{
$types = [];
foreach ($this->elements as $key => $element) {
$types[$key] = \get_class($element);
}
return $types;
}
public function getDetails(bool $deep = true): array
{
$values = [];
$elements = $this->getElements();
foreach ($elements as $key => $element) {
if ($element instanceof self && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof PDFObject && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof ElementArray) {
if ($deep) {
$values[$key] = $element->getDetails();
}
} elseif ($element instanceof Element) {
$values[$key] = (string) $element;
}
}
return $values;
}
/**
* Indicate if an element name is available in header.
*
* @param string $name the name of the element
*/
public function has(string $name): bool
{
return \array_key_exists($name, $this->elements);
}
/**
* @return Element|PDFObject
*/
public function get(string $name)
{
if (\array_key_exists($name, $this->elements) && $element = $this->resolveXRef($name)) {
return $element;
}
return new ElementMissing();
}
/**
* Resolve XRef to object.
*
* @return Element|PDFObject
*
* @throws \Exception
*/
protected function resolveXRef(string $name)
{
if (($obj = $this->elements[$name]) instanceof ElementXRef && null !== $this->document) {
/** @var ElementXRef $obj */
$object = $this->document->getObjectById($obj->getId());
if (null === $object) {
return new ElementMissing();
}
// Update elements list for future calls.
$this->elements[$name] = $object;
}
return $this->elements[$name];
}
/**
* @param string $content The content to parse
* @param Document $document The document
* @param int $position The new position of the cursor after parsing
*/
public static function parse(string $content, Document $document, int &$position = 0): self
{
/* @var Header $header */
if ('<<' == substr(trim($content), 0, 2)) {
$header = ElementStruct::parse($content, $document, $position);
} else {
$elements = ElementArray::parse($content, $document, $position);
$header = new self([], $document);
if ($elements) {
$header = new self($elements->getRawContent(), null);
}
}
if ($header) {
return $header;
}
// Build an empty header.
return new self([], $document);
}
}

View file

@ -1,165 +0,0 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

View file

@ -1,779 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\XObject\Form;
use Smalot\PdfParser\XObject\Image;
/**
* Class PDFObject
*/
class PDFObject
{
public const TYPE = 't';
public const OPERATOR = 'o';
public const COMMAND = 'c';
/**
* The recursion stack.
*
* @var array
*/
public static $recursionStack = [];
/**
* @var Document
*/
protected $document = null;
/**
* @var Header
*/
protected $header = null;
/**
* @var string
*/
protected $content = null;
/**
* @var Config
*/
protected $config;
public function __construct(
Document $document,
?Header $header = null,
?string $content = null,
?Config $config = null
) {
$this->document = $document;
$this->header = $header ?? new Header();
$this->content = $content;
$this->config = $config;
}
public function init()
{
}
public function getDocument(): Document
{
return $this->document;
}
public function getHeader(): ?Header
{
return $this->header;
}
public function getConfig(): ?Config
{
return $this->config;
}
/**
* @return Element|PDFObject|Header
*/
public function get(string $name)
{
return $this->header->get($name);
}
public function has(string $name): bool
{
return $this->header->has($name);
}
public function getDetails(bool $deep = true): array
{
return $this->header->getDetails($deep);
}
public function getContent(): ?string
{
return $this->content;
}
public function cleanContent(string $content, string $char = 'X')
{
$char = $char[0];
$content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
// Remove image bloc with binary content
preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[0] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
// Clean content in square brackets [.....]
preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
// Clean content in round brackets (.....)
preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
// Clean structure
if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
$content = '';
$level = 0;
foreach ($parts as $part) {
if ('<' == $part) {
++$level;
}
$content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
if ('>' == $part) {
--$level;
}
}
}
// Clean BDC and EMC markup
preg_match_all(
'/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
$content,
$matches,
\PREG_OFFSET_CAPTURE
);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
return $content;
}
public function getSectionsText(?string $content): array
{
$sections = [];
$content = ' '.$content.' ';
$textCleaned = $this->cleanContent($content, '_');
// Extract text blocks.
if (preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
foreach ($matches[2] as $pos => $part) {
$text = $part[0];
if ('' === $text) {
continue;
}
$offset = $part[1];
$section = substr($content, $offset, \strlen($text));
// Removes BDC and EMC markup.
$section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
// Add Q and q flags if detected around BT/ET.
// @see: https://github.com/smalot/pdfparser/issues/387
$section = trim((!empty($matches[1][$pos][0]) ? "Q\n" : '').$section).(!empty($matches[3][$pos][0]) ? "\nq" : '');
$sections[] = $section;
}
}
// Extract 'do' commands.
if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
foreach ($matches[1] as $part) {
$text = $part[0];
$offset = $part[1];
$section = substr($content, $offset, \strlen($text));
$sections[] = $section;
}
}
return $sections;
}
private function getDefaultFont(Page $page = null): Font
{
$fonts = [];
if (null !== $page) {
$fonts = $page->getFonts();
}
$firstFont = $this->document->getFirstFont();
if (null !== $firstFont) {
$fonts[] = $firstFont;
}
if (\count($fonts) > 0) {
return reset($fonts);
}
return new Font($this->document, null, null, $this->config);
}
/**
* @throws \Exception
*/
public function getText(?Page $page = null): string
{
$result = '';
$sections = $this->getSectionsText($this->content);
$current_font = $this->getDefaultFont($page);
$clipped_font = $current_font;
$current_position_td = ['x' => false, 'y' => false];
$current_position_tm = ['x' => false, 'y' => false];
self::$recursionStack[] = $this->getUniqueId();
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
$reverse_text = false;
$text = '';
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
case 'BMC':
if ('ReversedChars' == $command[self::COMMAND]) {
$reverse_text = true;
}
break;
// set character spacing
case 'Tc':
break;
// move text current point
case 'Td':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if (((float) $x <= 0) ||
(false !== $current_position_td['y'] && (float) $y < (float) $current_position_td['y'])
) {
// vertical offset
$text .= "\n";
} elseif (false !== $current_position_td['x'] && (float) $x > (float)
$current_position_td['x']
) {
$text .= $this->config->getHorizontalOffset();
}
$current_position_td = ['x' => $x, 'y' => $y];
break;
// move text current point and set leading
case 'TD':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if ((float) $y < 0) {
$text .= "\n";
} elseif ((float) $x <= 0) {
$text .= ' ';
}
break;
case 'Tf':
list($id) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
if (null !== $page) {
$new_font = $page->getFont($id);
// If an invalid font ID is given, do not update the font.
// This should theoretically never happen, as the PDF spec states for the Tf operator:
// "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
// (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
// But we want to make sure that malformed PDFs do not simply crash.
if (null !== $new_font) {
$current_font = $new_font;
}
}
break;
case 'Q':
// Use clip: restore font.
$current_font = $clipped_font;
break;
case 'q':
// Use clip: save font.
$clipped_font = $current_font;
break;
case "'":
case 'Tj':
$command[self::COMMAND] = [$command];
// no break
case 'TJ':
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text .= $sub_text;
break;
// set leading
case 'TL':
$text .= ' ';
break;
case 'Tm':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if (false !== $current_position_tm['x']) {
$delta = abs((float) $x - (float) $current_position_tm['x']);
if ($delta > 10) {
$text .= "\t";
}
}
if (false !== $current_position_tm['y']) {
$delta = abs((float) $y - (float) $current_position_tm['y']);
if ($delta > 10) {
$text .= "\n";
}
}
$current_position_tm = ['x' => $x, 'y' => $y];
break;
// set super/subscripting text rise
case 'Ts':
break;
// set word spacing
case 'Tw':
break;
// set horizontal scaling
case 'Tz':
$text .= "\n";
break;
// move to start of next line
case 'T*':
$text .= "\n";
break;
case 'Da':
break;
case 'Do':
if (null !== $page) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
$xobject = $page->getXObject($id);
// @todo $xobject could be a ElementXRef object, which would then throw an error
if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
// Not a circular reference.
$text .= $xobject->getText($page);
}
}
break;
case 'rg':
case 'RG':
break;
case 're':
break;
case 'co':
break;
case 'cs':
break;
case 'gs':
break;
case 'en':
break;
case 'sc':
case 'SC':
break;
case 'g':
case 'G':
break;
case 'V':
break;
case 'vo':
case 'Vo':
break;
default:
}
}
// Fix Hebrew and other reverse text oriented languages.
// @see: https://github.com/smalot/pdfparser/issues/398
if ($reverse_text) {
$chars = mb_str_split($text, 1, mb_internal_encoding());
$text = implode('', array_reverse($chars));
}
$result .= $text;
}
return $result.' ';
}
/**
* @throws \Exception
*/
public function getTextArray(?Page $page = null): array
{
$text = [];
$sections = $this->getSectionsText($this->content);
$current_font = new Font($this->document, null, null, $this->config);
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
// set character spacing
case 'Tc':
break;
// move text current point
case 'Td':
break;
// move text current point and set leading
case 'TD':
break;
case 'Tf':
if (null !== $page) {
list($id) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
$current_font = $page->getFont($id);
}
break;
case "'":
case 'Tj':
$command[self::COMMAND] = [$command];
// no break
case 'TJ':
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text[] = $sub_text;
break;
// set leading
case 'TL':
break;
case 'Tm':
break;
// set super/subscripting text rise
case 'Ts':
break;
// set word spacing
case 'Tw':
break;
// set horizontal scaling
case 'Tz':
// $text .= "\n";
break;
// move to start of next line
case 'T*':
// $text .= "\n";
break;
case 'Da':
break;
case 'Do':
if (null !== $page) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
if ($xobject = $page->getXObject($id)) {
$text[] = $xobject->getText($page);
}
}
break;
case 'rg':
case 'RG':
break;
case 're':
break;
case 'co':
break;
case 'cs':
break;
case 'gs':
break;
case 'en':
break;
case 'sc':
case 'SC':
break;
case 'g':
case 'G':
break;
case 'V':
break;
case 'vo':
case 'Vo':
break;
default:
}
}
}
return $text;
}
public function getCommandsText(string $text_part, int &$offset = 0): array
{
$commands = $matches = [];
while ($offset < \strlen($text_part)) {
$offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
$char = $text_part[$offset];
$operator = '';
$type = '';
$command = false;
switch ($char) {
case '/':
$type = $char;
if (preg_match(
'/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = $matches[2];
$command = $matches[1];
$offset += \strlen($matches[0]);
} elseif (preg_match(
'/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = $matches[2];
$command = $matches[1];
$offset += \strlen($matches[0]);
}
break;
case '[':
case ']':
// array object
$type = $char;
if ('[' == $char) {
++$offset;
// get elements
$command = $this->getCommandsText($text_part, $offset);
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
}
} else {
++$offset;
break;
}
break;
case '<':
case '>':
// array object
$type = $char;
++$offset;
if ('<' == $char) {
$strpos = strpos($text_part, '>', $offset);
$command = substr($text_part, $offset, $strpos - $offset);
$offset = $strpos + 1;
}
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
}
break;
case '(':
case ')':
++$offset;
$type = $char;
$strpos = $offset;
if ('(' == $char) {
$open_bracket = 1;
while ($open_bracket > 0) {
if (!isset($text_part[$strpos])) {
break;
}
$ch = $text_part[$strpos];
switch ($ch) {
case '\\':
// REVERSE SOLIDUS (5Ch) (Backslash)
// skip next character
++$strpos;
break;
case '(':
// LEFT PARENHESIS (28h)
++$open_bracket;
break;
case ')':
// RIGHT PARENTHESIS (29h)
--$open_bracket;
break;
}
++$strpos;
}
$command = substr($text_part, $offset, $strpos - $offset - 1);
$offset = $strpos;
if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
$operator = $matches[1];
$offset += \strlen($matches[0]);
}
}
break;
default:
if ('ET' == substr($text_part, $offset, 2)) {
break;
} elseif (preg_match(
'/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = trim($matches['id']);
$command = trim($matches['data']);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
$type = 'n';
$command = trim($matches[0]);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
$type = '';
$operator = $matches[1];
$command = '';
$offset += \strlen($matches[0]);
}
}
if (false !== $command) {
$commands[] = [
self::TYPE => $type,
self::OPERATOR => $operator,
self::COMMAND => $command,
];
} else {
break;
}
}
return $commands;
}
public static function factory(
Document $document,
Header $header,
?string $content,
?Config $config = null
): self {
switch ($header->get('Type')->getContent()) {
case 'XObject':
switch ($header->get('Subtype')->getContent()) {
case 'Image':
return new Image($document, $header, $config->getRetainImageContent() ? $content : null, $config);
case 'Form':
return new Form($document, $header, $content, $config);
}
return new self($document, $header, $content, $config);
case 'Pages':
return new Pages($document, $header, $content, $config);
case 'Page':
return new Page($document, $header, $content, $config);
case 'Encoding':
return new Encoding($document, $header, $content, $config);
case 'Font':
$subtype = $header->get('Subtype')->getContent();
$classname = '\Smalot\PdfParser\Font\Font'.$subtype;
if (class_exists($classname)) {
return new $classname($document, $header, $content, $config);
}
return new Font($document, $header, $content, $config);
default:
return new self($document, $header, $content, $config);
}
}
/**
* Returns unique id identifying the object.
*/
protected function getUniqueId(): string
{
return spl_object_hash($this);
}
}

View file

@ -1,953 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementXRef;
class Page extends PDFObject
{
/**
* @var Font[]
*/
protected $fonts = null;
/**
* @var PDFObject[]
*/
protected $xobjects = null;
/**
* @var array
*/
protected $dataTm = null;
/**
* @return Font[]
*/
public function getFonts()
{
if (null !== $this->fonts) {
return $this->fonts;
}
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('Font')) {
if ($resources->get('Font') instanceof ElementMissing) {
return [];
}
if ($resources->get('Font') instanceof Header) {
$fonts = $resources->get('Font')->getElements();
} else {
$fonts = $resources->get('Font')->getHeader()->getElements();
}
$table = [];
foreach ($fonts as $id => $font) {
if ($font instanceof Font) {
$table[$id] = $font;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ('' != $id) {
$table[$id] = $font;
}
}
}
return $this->fonts = $table;
}
return [];
}
public function getFont(string $id): ?Font
{
$fonts = $this->getFonts();
if (isset($fonts[$id])) {
return $fonts[$id];
}
// According to the PDF specs (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 238)
// "The font resource name presented to the Tf operator is arbitrary, as are the names for all kinds of resources"
// Instead, we search for the unfiltered name first and then do this cleaning as a fallback, so all tests still pass.
if (isset($fonts[$id])) {
return $fonts[$id];
} else {
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($fonts[$id])) {
return $fonts[$id];
}
}
return null;
}
/**
* Support for XObject
*
* @return PDFObject[]
*/
public function getXObjects()
{
if (null !== $this->xobjects) {
return $this->xobjects;
}
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('XObject')) {
if ($resources->get('XObject') instanceof Header) {
$xobjects = $resources->get('XObject')->getElements();
} else {
$xobjects = $resources->get('XObject')->getHeader()->getElements();
}
$table = [];
foreach ($xobjects as $id => $xobject) {
$table[$id] = $xobject;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ('' != $id) {
$table[$id] = $xobject;
}
}
return $this->xobjects = $table;
}
return [];
}
public function getXObject(string $id): ?PDFObject
{
$xobjects = $this->getXObjects();
if (isset($xobjects[$id])) {
return $xobjects[$id];
}
return null;
/*$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($xobjects[$id])) {
return $xobjects[$id];
} else {
return null;
}*/
}
public function getText(self $page = null): string
{
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return '';
} elseif ($contents instanceof ElementNull) {
return '';
} elseif ($contents instanceof PDFObject) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
}
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent()."\n";
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
/*
* Elements referencing each other on the same page can cause endless loops during text parsing.
* To combat this we keep a recursionStack containing already parsed elements on the page.
* The stack is only emptied here after getting text from a page.
*/
$contentsText = $contents->getText($this);
PDFObject::$recursionStack = [];
return $contentsText;
}
return '';
}
/**
* Return true if the current page is a (setasign\Fpdi\Fpdi) FPDI/FPDF document
*
* The metadata 'Producer' should have the value of "FPDF" . FPDF_VERSION if the
* pdf file was generated by FPDF/Fpfi.
*
* @return bool true is the current page is a FPDI/FPDF document
*/
public function isFpdf(): bool
{
if (\array_key_exists('Producer', $this->document->getDetails()) &&
\is_string($this->document->getDetails()['Producer']) &&
0 === strncmp($this->document->getDetails()['Producer'], 'FPDF', 4)) {
return true;
}
return false;
}
/**
* Return the page number of the PDF document of the page object
*
* @return int the page number
*/
public function getPageNumber(): int
{
$pages = $this->document->getPages();
$numOfPages = \count($pages);
for ($pageNum = 0; $pageNum < $numOfPages; ++$pageNum) {
if ($pages[$pageNum] === $this) {
break;
}
}
return $pageNum;
}
/**
* Return the Object of the page if the document is a FPDF/FPDI document
*
* If the document was generated by FPDF/FPDI it returns the
* PDFObject of the given page
*
* @return PDFObject The PDFObject for the page
*/
public function getPDFObjectForFpdf(): PDFObject
{
$pageNum = $this->getPageNumber();
$xObjects = $this->getXObjects();
return $xObjects[$pageNum];
}
/**
* Return a new PDFObject of the document created with FPDF/FPDI
*
* For a document generated by FPDF/FPDI, it generates a
* new PDFObject for that document
*
* @return PDFObject The PDFObject
*/
public function createPDFObjectForFpdf(): PDFObject
{
$pdfObject = $this->getPDFObjectForFpdf();
$new_content = $pdfObject->getContent();
$header = $pdfObject->getHeader();
$config = $pdfObject->config;
return new PDFObject($pdfObject->document, $header, $new_content, $config);
}
/**
* Return page if document is a FPDF/FPDI document
*
* @return Page The page
*/
public function createPageForFpdf(): self
{
$pdfObject = $this->getPDFObjectForFpdf();
$new_content = $pdfObject->getContent();
$header = $pdfObject->getHeader();
$config = $pdfObject->config;
return new self($pdfObject->document, $header, $new_content, $config);
}
public function getTextArray(self $page = null): array
{
if ($this->isFpdf()) {
$pdfObject = $this->getPDFObjectForFpdf();
$newPdfObject = $this->createPDFObjectForFpdf();
return $newPdfObject->getTextArray($pdfObject);
} else {
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return [];
} elseif ($contents instanceof ElementNull) {
return [];
} elseif ($contents instanceof PDFObject) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
/** @var PDFObject $element */
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
}
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
} else {
try {
$contents->getTextArray($this);
} catch (\Throwable $e) {
return $contents->getTextArray();
}
}
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
/** @var PDFObject $content */
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent()."\n";
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
return $contents->getTextArray($this);
}
return [];
}
}
/**
* Gets all the text data with its internal representation of the page.
*
* Returns an array with the data and the internal representation
*/
public function extractRawData(): array
{
/*
* Now you can get the complete content of the object with the text on it
*/
$extractedData = [];
$content = $this->get('Contents');
$values = $content->getContent();
if (isset($values) && \is_array($values)) {
$text = '';
foreach ($values as $section) {
$text .= $section->getContent();
}
$sectionsText = $this->getSectionsText($text);
foreach ($sectionsText as $sectionText) {
$commandsText = $this->getCommandsText($sectionText);
foreach ($commandsText as $command) {
$extractedData[] = $command;
}
}
} else {
if ($this->isFpdf()) {
$content = $this->getPDFObjectForFpdf();
}
$sectionsText = $content->getSectionsText($content->getContent());
foreach ($sectionsText as $sectionText) {
$extractedData[] = ['t' => '', 'o' => 'BT', 'c' => ''];
$commandsText = $content->getCommandsText($sectionText);
foreach ($commandsText as $command) {
$extractedData[] = $command;
}
}
}
return $extractedData;
}
/**
* Gets all the decoded text data with it internal representation from a page.
*
* @param array $extractedRawData the extracted data return by extractRawData or
* null if extractRawData should be called
*
* @return array An array with the data and the internal representation
*/
public function extractDecodedRawData(array $extractedRawData = null): array
{
if (!isset($extractedRawData) || !$extractedRawData) {
$extractedRawData = $this->extractRawData();
}
$currentFont = null; /** @var Font $currentFont */
$clippedFont = null;
$fpdfPage = null;
if ($this->isFpdf()) {
$fpdfPage = $this->createPageForFpdf();
}
foreach ($extractedRawData as &$command) {
if ('Tj' == $command['o'] || 'TJ' == $command['o']) {
$data = $command['c'];
if (!\is_array($data)) {
$tmpText = '';
if (isset($currentFont)) {
$tmpText = $currentFont->decodeOctal($data);
// $tmpText = $currentFont->decodeHexadecimal($tmpText, false);
}
$tmpText = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
['\\', '(', ')', "\n", "\r", "\t", ' '],
$tmpText
);
$tmpText = mb_convert_encoding($tmpText, 'UTF-8', 'ISO-8859-1');
if (isset($currentFont)) {
$tmpText = $currentFont->decodeContent($tmpText);
}
$command['c'] = $tmpText;
continue;
}
$numText = \count($data);
for ($i = 0; $i < $numText; ++$i) {
if (0 != ($i % 2)) {
continue;
}
$tmpText = $data[$i]['c'];
$decodedText = isset($currentFont) ? $currentFont->decodeOctal($tmpText) : $tmpText;
$decodedText = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
['\\', '(', ')', "\n", "\r", "\t", ' '],
$decodedText
);
$decodedText = mb_convert_encoding($decodedText, 'UTF-8', 'ISO-8859-1');
if (isset($currentFont)) {
$decodedText = $currentFont->decodeContent($decodedText);
}
$command['c'][$i]['c'] = $decodedText;
continue;
}
} elseif ('Tf' == $command['o'] || 'TF' == $command['o']) {
$fontId = explode(' ', $command['c'])[0];
// If document is a FPDI/FPDF the $page has the correct font
$currentFont = isset($fpdfPage) ? $fpdfPage->getFont($fontId) : $this->getFont($fontId);
continue;
} elseif ('Q' == $command['o']) {
$currentFont = $clippedFont;
} elseif ('q' == $command['o']) {
$clippedFont = $currentFont;
}
}
return $extractedRawData;
}
/**
* Gets just the Text commands that are involved in text positions and
* Text Matrix (Tm)
*
* It extract just the PDF commands that are involved with text positions, and
* the Text Matrix (Tm). These are: BT, ET, TL, Td, TD, Tm, T*, Tj, ', ", and TJ
*
* @param array $extractedDecodedRawData The data extracted by extractDecodeRawData.
* If it is null, the method extractDecodeRawData is called.
*
* @return array An array with the text command of the page
*/
public function getDataCommands(array $extractedDecodedRawData = null): array
{
if (!isset($extractedDecodedRawData) || !$extractedDecodedRawData) {
$extractedDecodedRawData = $this->extractDecodedRawData();
}
$extractedData = [];
foreach ($extractedDecodedRawData as $command) {
switch ($command['o']) {
/*
* BT
* Begin a text object, inicializind the Tm and Tlm to identity matrix
*/
case 'BT':
$extractedData[] = $command;
break;
/*
* ET
* End a text object, discarding the text matrix
*/
case 'ET':
$extractedData[] = $command;
break;
/*
* leading TL
* Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
* Initial value: 0
*/
case 'TL':
$extractedData[] = $command;
break;
/*
* tx ty Td
* Move to the start of the next line, offset form the start of the
* current line by tx, ty.
*/
case 'Td':
$extractedData[] = $command;
break;
/*
* tx ty TD
* Move to the start of the next line, offset form the start of the
* current line by tx, ty. As a side effect, this operator set the leading
* parameter in the text state. This operator has the same effect as the
* code:
* -ty TL
* tx ty Td
*/
case 'TD':
$extractedData[] = $command;
break;
/*
* a b c d e f Tm
* Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
* all numbers, and the initial value for Tm and Tlm is the identity matrix
* [1 0 0 1 0 0]
*/
case 'Tm':
$extractedData[] = $command;
break;
/*
* T*
* Move to the start of the next line. This operator has the same effect
* as the code:
* 0 Tl Td
* Where Tl is the current leading parameter in the text state.
*/
case 'T*':
$extractedData[] = $command;
break;
/*
* string Tj
* Show a Text String
*/
case 'Tj':
$extractedData[] = $command;
break;
/*
* string '
* Move to the next line and show a text string. This operator has the
* same effect as the code:
* T*
* string Tj
*/
case "'":
$extractedData[] = $command;
break;
/*
* aw ac string "
* Move to the next lkine and show a text string, using aw as the word
* spacing and ac as the character spacing. This operator has the same
* effect as the code:
* aw Tw
* ac Tc
* string '
* Tw set the word spacing, Tw, to wordSpace.
* Tc Set the character spacing, Tc, to charsSpace.
*/
case '"':
$extractedData[] = $command;
break;
case 'Tf':
case 'TF':
$extractedData[] = $command;
break;
/*
* array TJ
* Show one or more text strings allow individual glyph positioning.
* Each lement of array con be a string or a number. If the element is
* a string, this operator shows the string. If it is a number, the
* operator adjust the text position by that amount; that is, it translates
* the text matrix, Tm. This amount is substracted form the current
* horizontal or vertical coordinate, depending on the writing mode.
* in the default coordinate system, a positive adjustment has the effect
* of moving the next glyph painted either to the left or down by the given
* amount.
*/
case 'TJ':
$extractedData[] = $command;
break;
default:
}
}
return $extractedData;
}
/**
* Gets the Text Matrix of the text in the page
*
* Return an array where every item is an array where the first item is the
* Text Matrix (Tm) and the second is a string with the text data. The Text matrix
* is an array of 6 numbers. The last 2 numbers are the coordinates X and Y of the
* text. The first 4 numbers has to be with Scalation, Rotation and Skew of the text.
*
* @param array $dataCommands the data extracted by getDataCommands
* if null getDataCommands is called
*
* @return array an array with the data of the page including the Tm information
* of any text in the page
*/
public function getDataTm(array $dataCommands = null): array
{
if (!isset($dataCommands) || !$dataCommands) {
$dataCommands = $this->getDataCommands();
}
/*
* At the beginning of a text object Tm is the identity matrix
*/
$defaultTm = ['1', '0', '0', '1', '0', '0'];
/*
* Set the text leading used by T*, ' and " operators
*/
$defaultTl = 0;
/*
* Set default values for font data
*/
$defaultFontId = -1;
$defaultFontSize = 1;
/*
* Indexes of horizontal/vertical scaling and X,Y-coordinates in the matrix (Tm)
*/
$hSc = 0; // horizontal scaling
/**
* index of vertical scaling in the array that encodes the text matrix.
* for more information: https://github.com/smalot/pdfparser/pull/559#discussion_r1053415500
*/
$vSc = 3;
$x = 4;
$y = 5;
/*
* x,y-coordinates of text space origin in user units
*
* These will be assigned the value of the currently printed string
*/
$Tx = 0;
$Ty = 0;
$Tm = $defaultTm;
$Tl = $defaultTl;
$fontId = $defaultFontId;
$fontSize = $defaultFontSize; // reflects fontSize set by Tf or Tfs
$extractedTexts = $this->getTextArray();
$extractedData = [];
foreach ($dataCommands as $command) {
$currentText = $extractedTexts[\count($extractedData)];
switch ($command['o']) {
/*
* BT
* Begin a text object, initializing the Tm and Tlm to identity matrix
*/
case 'BT':
$Tm = $defaultTm;
$Tl = $defaultTl;
$Tx = 0;
$Ty = 0;
$fontId = $defaultFontId;
$fontSize = $defaultFontSize;
break;
/*
* ET
* End a text object, discarding the text matrix
*/
case 'ET':
$Tm = $defaultTm;
$Tl = $defaultTl;
$Tx = 0;
$Ty = 0;
$fontId = $defaultFontId;
$fontSize = $defaultFontSize;
break;
/*
* text leading TL
* Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
* Initial value: 0
*/
case 'TL':
// scaled text leading
$Tl = (float) $command['c'] * (float) $Tm[$vSc];
break;
/*
* tx ty Td
* Move to the start of the next line, offset form the start of the
* current line by tx, ty.
*/
case 'Td':
$coord = explode(' ', $command['c']);
$Tx += (float) $coord[0] * (float) $Tm[$hSc];
$Ty += (float) $coord[1] * (float) $Tm[$vSc];
$Tm[$x] = (string) $Tx;
$Tm[$y] = (string) $Ty;
break;
/*
* tx ty TD
* Move to the start of the next line, offset form the start of the
* current line by tx, ty. As a side effect, this operator set the leading
* parameter in the text state. This operator has the same effect as the
* code:
* -ty TL
* tx ty Td
*/
case 'TD':
$coord = explode(' ', $command['c']);
$Tl = -((float) $coord[1] * (float) $Tm[$vSc]);
$Tx += (float) $coord[0] * (float) $Tm[$hSc];
$Ty += (float) $coord[1] * (float) $Tm[$vSc];
$Tm[$x] = (string) $Tx;
$Tm[$y] = (string) $Ty;
break;
/*
* a b c d e f Tm
* Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
* all numbers, and the initial value for Tm and Tlm is the identity matrix
* [1 0 0 1 0 0]
*/
case 'Tm':
$Tm = explode(' ', $command['c']);
$Tx = (float) $Tm[$x];
$Ty = (float) $Tm[$y];
break;
/*
* T*
* Move to the start of the next line. This operator has the same effect
* as the code:
* 0 Tl Td
* Where Tl is the current leading parameter in the text state.
*/
case 'T*':
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
break;
/*
* string Tj
* Show a Text String
*/
case 'Tj':
$data = [$Tm, $currentText];
if ($this->config->getDataTmFontInfoHasToBeIncluded()) {
$data[] = $fontId;
$data[] = $fontSize;
}
$extractedData[] = $data;
break;
/*
* string '
* Move to the next line and show a text string. This operator has the
* same effect as the code:
* T*
* string Tj
*/
case "'":
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
$extractedData[] = [$Tm, $currentText];
break;
/*
* aw ac string "
* Move to the next line and show a text string, using aw as the word
* spacing and ac as the character spacing. This operator has the same
* effect as the code:
* aw Tw
* ac Tc
* string '
* Tw set the word spacing, Tw, to wordSpace.
* Tc Set the character spacing, Tc, to charsSpace.
*/
case '"':
$data = explode(' ', $currentText);
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
$extractedData[] = [$Tm, $data[2]]; // Verify
break;
case 'Tf':
/*
* From PDF 1.0 specification, page 106:
* fontname size Tf Set font and size
* Sets the text font and text size in the graphics state. There is no default value for
* either fontname or size; they must be selected using Tf before drawing any text.
* fontname is a resource name. size is a number expressed in text space units.
*
* Source: https://ia902503.us.archive.org/10/items/pdfy-0vt8s-egqFwDl7L2/PDF%20Reference%201.0.pdf
* Introduced with https://github.com/smalot/pdfparser/pull/516
*/
list($fontId, $fontSize) = explode(' ', $command['c'], 2);
break;
/*
* array TJ
* Show one or more text strings allow individual glyph positioning.
* Each lement of array con be a string or a number. If the element is
* a string, this operator shows the string. If it is a number, the
* operator adjust the text position by that amount; that is, it translates
* the text matrix, Tm. This amount is substracted form the current
* horizontal or vertical coordinate, depending on the writing mode.
* in the default coordinate system, a positive adjustment has the effect
* of moving the next glyph painted either to the left or down by the given
* amount.
*/
case 'TJ':
$data = [$Tm, $currentText];
if ($this->config->getDataTmFontInfoHasToBeIncluded()) {
$data[] = $fontId;
$data[] = $fontSize;
}
$extractedData[] = $data;
break;
default:
}
}
$this->dataTm = $extractedData;
return $extractedData;
}
/**
* Gets text data that are around the given coordinates (X,Y)
*
* If the text is in near the given coordinates (X,Y) (or the TM info),
* the text is returned. The extractedData return by getDataTm, could be use to see
* where is the coordinates of a given text, using the TM info for it.
*
* @param float $x The X value of the coordinate to search for. if null
* just the Y value is considered (same Row)
* @param float $y The Y value of the coordinate to search for
* just the X value is considered (same column)
* @param float $xError The value less or more to consider an X to be "near"
* @param float $yError The value less or more to consider an Y to be "near"
*
* @return array An array of text that are near the given coordinates. If no text
* "near" the x,y coordinate, an empty array is returned. If Both, x
* and y coordinates are null, null is returned.
*/
public function getTextXY(float $x = null, float $y = null, float $xError = 0, float $yError = 0): array
{
if (!isset($this->dataTm) || !$this->dataTm) {
$this->getDataTm();
}
if (null !== $x) {
$x = (float) $x;
}
if (null !== $y) {
$y = (float) $y;
}
if (null === $x && null === $y) {
return [];
}
$xError = (float) $xError;
$yError = (float) $yError;
$extractedData = [];
foreach ($this->dataTm as $item) {
$tm = $item[0];
$xTm = (float) $tm[4];
$yTm = (float) $tm[5];
$text = $item[1];
if (null === $y) {
if (($xTm >= ($x - $xError)) &&
($xTm <= ($x + $xError))) {
$extractedData[] = [$tm, $text];
continue;
}
}
if (null === $x) {
if (($yTm >= ($y - $yError)) &&
($yTm <= ($y + $yError))) {
$extractedData[] = [$tm, $text];
continue;
}
}
if (($xTm >= ($x - $xError)) &&
($xTm <= ($x + $xError)) &&
($yTm >= ($y - $yError)) &&
($yTm <= ($y + $yError))) {
$extractedData[] = [$tm, $text];
continue;
}
}
return $extractedData;
}
}

View file

@ -1,73 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
/**
* Class Pages
*/
class Pages extends PDFObject
{
/**
* @todo Objects other than Pages or Page might need to be treated specifically in order to get Page objects out of them,
*
* @see https://github.com/smalot/pdfparser/issues/331
*/
public function getPages(bool $deep = false): array
{
if (!$this->has('Kids')) {
return [];
}
/** @var ElementArray $kidsElement */
$kidsElement = $this->get('Kids');
if (!$deep) {
return $kidsElement->getContent();
}
$kids = $kidsElement->getContent();
$pages = [];
foreach ($kids as $kid) {
if ($kid instanceof self) {
$pages = array_merge($pages, $kid->getPages(true));
} elseif ($kid instanceof Page) {
$pages[] = $kid;
}
}
return $pages;
}
}

View file

@ -1,327 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementXRef;
use Smalot\PdfParser\RawData\RawDataParser;
/**
* Class Parser
*/
class Parser
{
/**
* @var Config
*/
private $config;
/**
* @var PDFObject[]
*/
protected $objects = [];
protected $rawDataParser;
public function __construct($cfg = [], ?Config $config = null)
{
$this->config = $config ?: new Config();
$this->rawDataParser = new RawDataParser($cfg, $this->config);
}
public function getConfig(): Config
{
return $this->config;
}
/**
* @throws \Exception
*/
public function parseFile(string $filename): Document
{
$content = file_get_contents($filename);
/*
* 2018/06/20 @doganoo as multiple times a
* users have complained that the parseFile()
* method dies silently, it is an better option
* to remove the error control operator (@) and
* let the users know that the method throws an exception
* by adding @throws tag to PHPDoc.
*
* See here for an example: https://github.com/smalot/pdfparser/issues/204
*/
return $this->parseContent($content);
}
/**
* @param string $content PDF content to parse
*
* @throws \Exception if secured PDF file was detected
* @throws \Exception if no object list was found
*/
public function parseContent(string $content): Document
{
// Create structure from raw data.
list($xref, $data) = $this->rawDataParser->parseData($content);
if (isset($xref['trailer']['encrypt'])) {
throw new \Exception('Secured pdf file are currently not supported.');
}
if (empty($data)) {
throw new \Exception('Object list not found. Possible secured file.');
}
// Create destination object.
$document = new Document();
$this->objects = [];
foreach ($data as $id => $structure) {
$this->parseObject($id, $structure, $document);
unset($data[$id]);
}
$document->setTrailer($this->parseTrailer($xref['trailer'], $document));
$document->setObjects($this->objects);
return $document;
}
protected function parseTrailer(array $structure, ?Document $document)
{
$trailer = [];
foreach ($structure as $name => $values) {
$name = ucfirst($name);
if (is_numeric($values)) {
$trailer[$name] = new ElementNumeric($values);
} elseif (\is_array($values)) {
$value = $this->parseTrailer($values, null);
$trailer[$name] = new ElementArray($value, null);
} elseif (false !== strpos($values, '_')) {
$trailer[$name] = new ElementXRef($values, $document);
} else {
$trailer[$name] = $this->parseHeaderElement('(', $values, $document);
}
}
return new Header($trailer, $document);
}
protected function parseObject(string $id, array $structure, ?Document $document)
{
$header = new Header([], $document);
$content = '';
foreach ($structure as $position => $part) {
if (\is_int($part)) {
$part = [null, null];
}
switch ($part[0]) {
case '[':
$elements = [];
foreach ($part[1] as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$elements[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
}
$header = new Header($elements, $document);
break;
case '<<':
$header = $this->parseHeader($part[1], $document);
break;
case 'stream':
$content = isset($part[3][0]) ? $part[3][0] : $part[1];
if ($header->get('Type')->equals('ObjStm')) {
$match = [];
// Split xrefs and contents.
preg_match('/^((\d+\s+\d+\s*)*)(.*)$/s', $content, $match);
$content = $match[3];
// Extract xrefs.
$xrefs = preg_split(
'/(\d+\s+\d+\s*)/s',
$match[1],
-1,
\PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
);
$table = [];
foreach ($xrefs as $xref) {
list($id, $position) = preg_split("/\s+/", trim($xref));
$table[$position] = $id;
}
ksort($table);
$ids = array_values($table);
$positions = array_keys($table);
foreach ($positions as $index => $position) {
$id = $ids[$index].'_0';
$next_position = isset($positions[$index + 1]) ? $positions[$index + 1] : \strlen($content);
$sub_content = substr($content, $position, (int) $next_position - (int) $position);
$sub_header = Header::parse($sub_content, $document);
$object = PDFObject::factory($document, $sub_header, '', $this->config);
$this->objects[$id] = $object;
}
// It is not necessary to store this content.
return;
}
break;
default:
if ('null' != $part) {
$element = $this->parseHeaderElement($part[0], $part[1], $document);
if ($element) {
$header = new Header([$element], $document);
}
}
break;
}
}
if (!isset($this->objects[$id])) {
$this->objects[$id] = PDFObject::factory($document, $header, $content, $this->config);
}
}
/**
* @throws \Exception
*/
protected function parseHeader(array $structure, ?Document $document): Header
{
$elements = [];
$count = \count($structure);
for ($position = 0; $position < $count; $position += 2) {
$name = $structure[$position][1];
$type = $structure[$position + 1][0];
$value = $structure[$position + 1][1];
$elements[$name] = $this->parseHeaderElement($type, $value, $document);
}
return new Header($elements, $document);
}
/**
* @param string|array $value
*
* @return Element|Header|null
*
* @throws \Exception
*/
protected function parseHeaderElement(?string $type, $value, ?Document $document)
{
$valueIsEmpty = null == $value || '' == $value || false == $value;
if (('<<' === $type || '>>' === $type) && $valueIsEmpty) {
$value = [];
}
switch ($type) {
case '<<':
case '>>':
$header = $this->parseHeader($value, $document);
PDFObject::factory($document, $header, null, $this->config);
return $header;
case 'numeric':
return new ElementNumeric($value);
case 'boolean':
return new ElementBoolean($value);
case 'null':
return new ElementNull();
case '(':
if ($date = ElementDate::parse('('.$value.')', $document)) {
return $date;
}
return ElementString::parse('('.$value.')', $document);
case '<':
return $this->parseHeaderElement('(', ElementHexa::decode($value), $document);
case '/':
return ElementName::parse('/'.$value, $document);
case 'ojbref': // old mistake in tcpdf parser
case 'objref':
return new ElementXRef($value, $document);
case '[':
$values = [];
if (\is_array($value)) {
foreach ($value as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$values[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
}
}
return new ElementArray($values, $document);
case 'endstream':
case 'obj': // I don't know what it means but got my project fixed.
case '':
// Nothing to do with.
return null;
default:
throw new \Exception('Invalid type: "'.$type.'".');
}
}
}

View file

@ -1,396 +0,0 @@
<?php
/**
* This file is based on code of tecnickcom/TCPDF PDF library.
*
* Original author Nicola Asuni (info@tecnick.com) and
* contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
*
* @see https://github.com/tecnickcom/TCPDF
*
* Original code was licensed on the terms of the LGPL v3.
*
* ------------------------------------------------------------------------------
*
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
*
* @date 2020-01-06
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\RawData;
class FilterHelper
{
protected $availableFilters = ['ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode'];
/**
* Decode data using the specified filter type.
*
* @param string $filter Filter name
* @param string $data Data to decode
*
* @return string Decoded data string
*
* @throws \Exception if a certain decode function is not implemented yet
*/
public function decodeFilter(string $filter, string $data, int $decodeMemoryLimit = 0): string
{
switch ($filter) {
case 'ASCIIHexDecode':
return $this->decodeFilterASCIIHexDecode($data);
case 'ASCII85Decode':
return $this->decodeFilterASCII85Decode($data);
case 'LZWDecode':
return $this->decodeFilterLZWDecode($data);
case 'FlateDecode':
return $this->decodeFilterFlateDecode($data, $decodeMemoryLimit);
case 'RunLengthDecode':
return $this->decodeFilterRunLengthDecode($data);
case 'CCITTFaxDecode':
throw new \Exception('Decode CCITTFaxDecode not implemented yet.');
case 'JBIG2Decode':
throw new \Exception('Decode JBIG2Decode not implemented yet.');
case 'DCTDecode':
throw new \Exception('Decode DCTDecode not implemented yet.');
case 'JPXDecode':
throw new \Exception('Decode JPXDecode not implemented yet.');
case 'Crypt':
throw new \Exception('Decode Crypt not implemented yet.');
default:
return $data;
}
}
/**
* ASCIIHexDecode
*
* Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
*
* @param string $data Data to decode
*
* @return string data string
*
* @throws \Exception
*/
protected function decodeFilterASCIIHexDecode(string $data): string
{
// all white-space characters shall be ignored
$data = preg_replace('/[\s]/', '', $data);
// check for EOD character: GREATER-THAN SIGN (3Eh)
$eod = strpos($data, '>');
if (false !== $eod) {
// remove EOD and extra data (if any)
$data = substr($data, 0, $eod);
$eod = true;
}
// get data length
$data_length = \strlen($data);
if (0 != ($data_length % 2)) {
// odd number of hexadecimal digits
if ($eod) {
// EOD shall behave as if a 0 (zero) followed the last digit
$data = substr($data, 0, -1).'0'.substr($data, -1);
} else {
throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
}
}
// check for invalid characters
if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
}
// get one byte of binary data for each pair of ASCII hexadecimal digits
$decoded = pack('H*', $data);
return $decoded;
}
/**
* ASCII85Decode
*
* Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
*
* @param string $data Data to decode
*
* @return string data string
*
* @throws \Exception
*/
protected function decodeFilterASCII85Decode(string $data): string
{
// initialize string to return
$decoded = '';
// all white-space characters shall be ignored
$data = preg_replace('/[\s]/', '', $data);
// remove start sequence 2-character sequence <~ (3Ch)(7Eh)
if (false !== strpos($data, '<~')) {
// remove EOD and extra data (if any)
$data = substr($data, 2);
}
// check for EOD: 2-character sequence ~> (7Eh)(3Eh)
$eod = strpos($data, '~>');
if (false !== $eod) {
// remove EOD and extra data (if any)
$data = substr($data, 0, $eod);
}
// data length
$data_length = \strlen($data);
// check for invalid characters
if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
throw new \Exception('decodeFilterASCII85Decode: invalid code');
}
// z sequence
$zseq = \chr(0).\chr(0).\chr(0).\chr(0);
// position inside a group of 4 bytes (0-3)
$group_pos = 0;
$tuple = 0;
$pow85 = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1];
// for each byte
for ($i = 0; $i < $data_length; ++$i) {
// get char value
$char = \ord($data[$i]);
if (122 == $char) { // 'z'
if (0 == $group_pos) {
$decoded .= $zseq;
} else {
throw new \Exception('decodeFilterASCII85Decode: invalid code');
}
} else {
// the value represented by a group of 5 characters should never be greater than 2^32 - 1
$tuple += (($char - 33) * $pow85[$group_pos]);
if (4 == $group_pos) {
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8).\chr($tuple);
$tuple = 0;
$group_pos = 0;
} else {
++$group_pos;
}
}
}
if ($group_pos > 1) {
$tuple += $pow85[$group_pos - 1];
}
// last tuple (if any)
switch ($group_pos) {
case 4:
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8);
break;
case 3:
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16);
break;
case 2:
$decoded .= \chr($tuple >> 24);
break;
case 1:
throw new \Exception('decodeFilterASCII85Decode: invalid code');
}
return $decoded;
}
/**
* FlateDecode
*
* Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
*
* @param string $data Data to decode
* @param int $decodeMemoryLimit Memory limit on deflation
*
* @return string data string
*
* @throws \Exception
*/
protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit): ?string
{
/*
* gzuncompress may throw a not catchable E_WARNING in case of an error (like $data is empty)
* the following set_error_handler changes an E_WARNING to an E_ERROR, which is catchable.
*/
set_error_handler(function ($errNo, $errStr) {
if (\E_WARNING === $errNo) {
throw new \Exception($errStr);
} else {
// fallback to default php error handler
return false;
}
});
$decoded = null;
// initialize string to return
try {
$decoded = gzuncompress($data, $decodeMemoryLimit);
if (false === $decoded) {
throw new \Exception('decodeFilterFlateDecode: invalid code');
}
} catch (\Exception $e) {
throw $e;
} finally {
// Restore old handler just in case it was customized outside of PDFParser.
restore_error_handler();
}
return $decoded;
}
/**
* LZWDecode
*
* Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
*
* @param string $data Data to decode
*
* @return string Data string
*/
protected function decodeFilterLZWDecode(string $data): string
{
// initialize string to return
$decoded = '';
// data length
$data_length = \strlen($data);
// convert string to binary string
$bitstring = '';
for ($i = 0; $i < $data_length; ++$i) {
$bitstring .= sprintf('%08b', \ord($data[$i]));
}
// get the number of bits
$data_length = \strlen($bitstring);
// initialize code length in bits
$bitlen = 9;
// initialize dictionary index
$dix = 258;
// initialize the dictionary (with the first 256 entries).
$dictionary = [];
for ($i = 0; $i < 256; ++$i) {
$dictionary[$i] = \chr($i);
}
// previous val
$prev_index = 0;
// while we encounter EOD marker (257), read code_length bits
while (($data_length > 0) && (257 != ($index = bindec(substr($bitstring, 0, $bitlen))))) {
// remove read bits from string
$bitstring = substr($bitstring, $bitlen);
// update number of bits
$data_length -= $bitlen;
if (256 == $index) { // clear-table marker
// reset code length in bits
$bitlen = 9;
// reset dictionary index
$dix = 258;
$prev_index = 256;
// reset the dictionary (with the first 256 entries).
$dictionary = [];
for ($i = 0; $i < 256; ++$i) {
$dictionary[$i] = \chr($i);
}
} elseif (256 == $prev_index) {
// first entry
$decoded .= $dictionary[$index];
$prev_index = $index;
} else {
// check if index exist in the dictionary
if ($index < $dix) {
// index exist on dictionary
$decoded .= $dictionary[$index];
$dic_val = $dictionary[$prev_index].$dictionary[$index][0];
// store current index
$prev_index = $index;
} else {
// index do not exist on dictionary
$dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
$decoded .= $dic_val;
}
// update dictionary
$dictionary[$dix] = $dic_val;
++$dix;
// change bit length by case
if (2047 == $dix) {
$bitlen = 12;
} elseif (1023 == $dix) {
$bitlen = 11;
} elseif (511 == $dix) {
$bitlen = 10;
}
}
}
return $decoded;
}
/**
* RunLengthDecode
*
* Decompresses data encoded using a byte-oriented run-length encoding algorithm.
*
* @param string $data Data to decode
*/
protected function decodeFilterRunLengthDecode(string $data): string
{
// initialize string to return
$decoded = '';
// data length
$data_length = \strlen($data);
$i = 0;
while ($i < $data_length) {
// get current byte value
$byte = \ord($data[$i]);
if (128 == $byte) {
// a length value of 128 denote EOD
break;
} elseif ($byte < 128) {
// if the length byte is in the range 0 to 127
// the following length + 1 (1 to 128) bytes shall be copied literally during decompression
$decoded .= substr($data, $i + 1, $byte + 1);
// move to next block
$i += ($byte + 2);
} else {
// if length is in the range 129 to 255,
// the following single byte shall be copied 257 - length (2 to 128) times during decompression
$decoded .= str_repeat($data[$i + 1], 257 - $byte);
// move to next block
$i += 2;
}
}
return $decoded;
}
/**
* @return array list of available filters
*/
public function getAvailableFilters(): array
{
return $this->availableFilters;
}
}

View file

@ -1,902 +0,0 @@
<?php
/**
* This file is based on code of tecnickcom/TCPDF PDF library.
*
* Original author Nicola Asuni (info@tecnick.com) and
* contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
*
* @see https://github.com/tecnickcom/TCPDF
*
* Original code was licensed on the terms of the LGPL v3.
*
* ------------------------------------------------------------------------------
*
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
*
* @date 2020-01-06
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\RawData;
use Smalot\PdfParser\Config;
class RawDataParser
{
/**
* @var \Smalot\PdfParser\Config
*/
private $config;
/**
* Configuration array.
*/
protected $cfg = [
// if `true` ignore filter decoding errors
'ignore_filter_decoding_errors' => true,
// if `true` ignore missing filter decoding errors
'ignore_missing_filter_decoders' => true,
];
protected $filterHelper;
protected $objects;
/**
* @param array $cfg Configuration array, default is []
*/
public function __construct($cfg = [], Config $config = null)
{
// merge given array with default values
$this->cfg = array_merge($this->cfg, $cfg);
$this->filterHelper = new FilterHelper();
$this->config = $config ?: new Config();
}
/**
* Decode the specified stream.
*
* @param string $pdfData PDF data
* @param array $sdic Stream's dictionary array
* @param string $stream Stream to decode
*
* @return array containing decoded stream data and remaining filters
*
* @throws \Exception
*/
protected function decodeStream(string $pdfData, array $xref, array $sdic, string $stream): array
{
// get stream length and filters
$slength = \strlen($stream);
if ($slength <= 0) {
return ['', []];
}
$filters = [];
foreach ($sdic as $k => $v) {
if ('/' == $v[0]) {
if (('Length' == $v[1]) && (isset($sdic[$k + 1])) && ('numeric' == $sdic[$k + 1][0])) {
// get declared stream length
$declength = (int) $sdic[$k + 1][1];
if ($declength < $slength) {
$stream = substr($stream, 0, $declength);
$slength = $declength;
}
} elseif (('Filter' == $v[1]) && (isset($sdic[$k + 1]))) {
// resolve indirect object
$objval = $this->getObjectVal($pdfData, $xref, $sdic[$k + 1]);
if ('/' == $objval[0]) {
// single filter
$filters[] = $objval[1];
} elseif ('[' == $objval[0]) {
// array of filters
foreach ($objval[1] as $flt) {
if ('/' == $flt[0]) {
$filters[] = $flt[1];
}
}
}
}
}
}
// decode the stream
$remaining_filters = [];
foreach ($filters as $filter) {
if (\in_array($filter, $this->filterHelper->getAvailableFilters())) {
try {
$stream = $this->filterHelper->decodeFilter($filter, $stream, $this->config->getDecodeMemoryLimit());
} catch (\Exception $e) {
$emsg = $e->getMessage();
if ((('~' == $emsg[0]) && !$this->cfg['ignore_missing_filter_decoders'])
|| (('~' != $emsg[0]) && !$this->cfg['ignore_filter_decoding_errors'])
) {
throw new \Exception($e->getMessage());
}
}
} else {
// add missing filter to array
$remaining_filters[] = $filter;
}
}
return [$stream, $remaining_filters];
}
/**
* Decode the Cross-Reference section
*
* @param string $pdfData PDF data
* @param int $startxref Offset at which the xref section starts (position of the 'xref' keyword)
* @param array $xref Previous xref array (if any)
*
* @return array containing xref and trailer data
*
* @throws \Exception
*/
protected function decodeXref(string $pdfData, int $startxref, array $xref = []): array
{
$startxref += 4; // 4 is the length of the word 'xref'
// skip initial white space chars
$offset = $startxref + strspn($pdfData, $this->config->getPdfWhitespaces(), $startxref);
// initialize object number
$obj_num = 0;
// search for cross-reference entries or subsection
while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) {
if ($matches[0][1] != $offset) {
// we are on another section
break;
}
$offset += \strlen($matches[0][0]);
if ('n' == $matches[3][0]) {
// create unique object index: [object number]_[generation number]
$index = $obj_num.'_'.(int) $matches[2][0];
// check if object already exist
if (!isset($xref['xref'][$index])) {
// store object offset position
$xref['xref'][$index] = (int) $matches[1][0];
}
++$obj_num;
} elseif ('f' == $matches[3][0]) {
++$obj_num;
} else {
// object number (index)
$obj_num = (int) $matches[1][0];
}
}
// get trailer data
if (preg_match('/trailer[\s]*<<(.*)>>/isU', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) {
$trailer_data = $matches[1][0];
if (!isset($xref['trailer']) || empty($xref['trailer'])) {
// get only the last updated version
$xref['trailer'] = [];
// parse trailer_data
if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
$xref['trailer']['size'] = (int) $matches[1];
}
if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['root'] = (int) $matches[1].'_'.(int) $matches[2];
}
if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['encrypt'] = (int) $matches[1].'_'.(int) $matches[2];
}
if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['info'] = (int) $matches[1].'_'.(int) $matches[2];
}
if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
$xref['trailer']['id'] = [];
$xref['trailer']['id'][0] = $matches[1];
$xref['trailer']['id'][1] = $matches[2];
}
}
if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
// get previous xref
$xref = $this->getXrefData($pdfData, (int) $matches[1], $xref);
}
} else {
throw new \Exception('Unable to find trailer');
}
return $xref;
}
/**
* Decode the Cross-Reference Stream section
*
* @param string $pdfData PDF data
* @param int $startxref Offset at which the xref section starts
* @param array $xref Previous xref array (if any)
*
* @return array containing xref and trailer data
*
* @throws \Exception if unknown PNG predictor detected
*/
protected function decodeXrefStream(string $pdfData, int $startxref, array $xref = []): array
{
// try to read Cross-Reference Stream
$xrefobj = $this->getRawObject($pdfData, $startxref);
$xrefcrs = $this->getIndirectObject($pdfData, $xref, $xrefobj[1], $startxref, true);
if (!isset($xref['trailer']) || empty($xref['trailer'])) {
// get only the last updated version
$xref['trailer'] = [];
$filltrailer = true;
} else {
$filltrailer = false;
}
if (!isset($xref['xref'])) {
$xref['xref'] = [];
}
$valid_crs = false;
$columns = 0;
$predictor = null;
$sarr = $xrefcrs[0][1];
if (!\is_array($sarr)) {
$sarr = [];
}
$wb = [];
foreach ($sarr as $k => $v) {
if (
('/' == $v[0])
&& ('Type' == $v[1])
&& (
isset($sarr[$k + 1])
&& '/' == $sarr[$k + 1][0]
&& 'XRef' == $sarr[$k + 1][1]
)
) {
$valid_crs = true;
} elseif (('/' == $v[0]) && ('Index' == $v[1]) && (isset($sarr[$k + 1]))) {
// initialize list for: first object number in the subsection / number of objects
$index_blocks = [];
for ($m = 0; $m < \count($sarr[$k + 1][1]); $m += 2) {
$index_blocks[] = [$sarr[$k + 1][1][$m][1], $sarr[$k + 1][1][$m + 1][1]];
}
} elseif (('/' == $v[0]) && ('Prev' == $v[1]) && (isset($sarr[$k + 1]) && ('numeric' == $sarr[$k + 1][0]))) {
// get previous xref offset
$prevxref = (int) $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('W' == $v[1]) && (isset($sarr[$k + 1]))) {
// number of bytes (in the decoded stream) of the corresponding field
$wb[0] = (int) $sarr[$k + 1][1][0][1];
$wb[1] = (int) $sarr[$k + 1][1][1][1];
$wb[2] = (int) $sarr[$k + 1][1][2][1];
} elseif (('/' == $v[0]) && ('DecodeParms' == $v[1]) && (isset($sarr[$k + 1][1]))) {
$decpar = $sarr[$k + 1][1];
foreach ($decpar as $kdc => $vdc) {
if (
'/' == $vdc[0]
&& 'Columns' == $vdc[1]
&& (
isset($decpar[$kdc + 1])
&& 'numeric' == $decpar[$kdc + 1][0]
)
) {
$columns = (int) $decpar[$kdc + 1][1];
} elseif (
'/' == $vdc[0]
&& 'Predictor' == $vdc[1]
&& (
isset($decpar[$kdc + 1])
&& 'numeric' == $decpar[$kdc + 1][0]
)
) {
$predictor = (int) $decpar[$kdc + 1][1];
}
}
} elseif ($filltrailer) {
if (('/' == $v[0]) && ('Size' == $v[1]) && (isset($sarr[$k + 1]) && ('numeric' == $sarr[$k + 1][0]))) {
$xref['trailer']['size'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('Root' == $v[1]) && (isset($sarr[$k + 1]) && ('objref' == $sarr[$k + 1][0]))) {
$xref['trailer']['root'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('Info' == $v[1]) && (isset($sarr[$k + 1]) && ('objref' == $sarr[$k + 1][0]))) {
$xref['trailer']['info'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('Encrypt' == $v[1]) && (isset($sarr[$k + 1]) && ('objref' == $sarr[$k + 1][0]))) {
$xref['trailer']['encrypt'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('ID' == $v[1]) && (isset($sarr[$k + 1]))) {
$xref['trailer']['id'] = [];
$xref['trailer']['id'][0] = $sarr[$k + 1][1][0][1];
$xref['trailer']['id'][1] = $sarr[$k + 1][1][1][1];
}
}
}
// decode data
if ($valid_crs && isset($xrefcrs[1][3][0])) {
if (null !== $predictor) {
// number of bytes in a row
$rowlen = ($columns + 1);
// convert the stream into an array of integers
/** @var array<int> */
$sdata = unpack('C*', $xrefcrs[1][3][0]);
// TODO: Handle the case when unpack returns false
// split the rows
$sdata = array_chunk($sdata, $rowlen);
// initialize decoded array
$ddata = [];
// initialize first row with zeros
$prev_row = array_fill(0, $rowlen, 0);
// for each row apply PNG unpredictor
foreach ($sdata as $k => $row) {
// initialize new row
$ddata[$k] = [];
// get PNG predictor value
$predictor = (10 + $row[0]);
// for each byte on the row
for ($i = 1; $i <= $columns; ++$i) {
// new index
$j = ($i - 1);
$row_up = $prev_row[$j];
if (1 == $i) {
$row_left = 0;
$row_upleft = 0;
} else {
$row_left = $row[$i - 1];
$row_upleft = $prev_row[$j - 1];
}
switch ($predictor) {
case 10: // PNG prediction (on encoding, PNG None on all rows)
$ddata[$k][$j] = $row[$i];
break;
case 11: // PNG prediction (on encoding, PNG Sub on all rows)
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xFF);
break;
case 12: // PNG prediction (on encoding, PNG Up on all rows)
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xFF);
break;
case 13: // PNG prediction (on encoding, PNG Average on all rows)
$ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xFF);
break;
case 14: // PNG prediction (on encoding, PNG Paeth on all rows)
// initial estimate
$p = ($row_left + $row_up - $row_upleft);
// distances
$pa = abs($p - $row_left);
$pb = abs($p - $row_up);
$pc = abs($p - $row_upleft);
$pmin = min($pa, $pb, $pc);
// return minimum distance
switch ($pmin) {
case $pa:
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xFF);
break;
case $pb:
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xFF);
break;
case $pc:
$ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xFF);
break;
}
break;
default: // PNG prediction (on encoding, PNG optimum)
throw new \Exception('Unknown PNG predictor: '.$predictor);
}
}
$prev_row = $ddata[$k];
} // end for each row
// complete decoding
} else {
// number of bytes in a row
$rowlen = array_sum($wb);
// convert the stream into an array of integers
$sdata = unpack('C*', $xrefcrs[1][3][0]);
// split the rows
$ddata = array_chunk($sdata, $rowlen);
}
$sdata = [];
// for every row
foreach ($ddata as $k => $row) {
// initialize new row
$sdata[$k] = [0, 0, 0];
if (0 == $wb[0]) {
// default type field
$sdata[$k][0] = 1;
}
$i = 0; // count bytes in the row
// for every column
for ($c = 0; $c < 3; ++$c) {
// for every byte on the column
for ($b = 0; $b < $wb[$c]; ++$b) {
if (isset($row[$i])) {
$sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
}
++$i;
}
}
}
// fill xref
if (isset($index_blocks)) {
// load the first object number of the first /Index entry
$obj_num = $index_blocks[0][0];
} else {
$obj_num = 0;
}
foreach ($sdata as $k => $row) {
switch ($row[0]) {
case 0: // (f) linked list of free objects
break;
case 1: // (n) objects that are in use but are not compressed
// create unique object index: [object number]_[generation number]
$index = $obj_num.'_'.$row[2];
// check if object already exist
if (!isset($xref['xref'][$index])) {
// store object offset position
$xref['xref'][$index] = $row[1];
}
break;
case 2: // compressed objects
// $row[1] = object number of the object stream in which this object is stored
// $row[2] = index of this object within the object stream
$index = $row[1].'_0_'.$row[2];
$xref['xref'][$index] = -1;
break;
default: // null objects
break;
}
++$obj_num;
if (isset($index_blocks)) {
// reduce the number of remaining objects
--$index_blocks[0][1];
if (0 == $index_blocks[0][1]) {
// remove the actual used /Index entry
array_shift($index_blocks);
if (0 < \count($index_blocks)) {
// load the first object number of the following /Index entry
$obj_num = $index_blocks[0][0];
} else {
// if there are no more entries, remove $index_blocks to avoid actions on an empty array
unset($index_blocks);
}
}
}
}
} // end decoding data
if (isset($prevxref)) {
// get previous xref
$xref = $this->getXrefData($pdfData, $prevxref, $xref);
}
return $xref;
}
protected function getObjectHeaderPattern(array $objRefs): string
{
// consider all whitespace character (PDF specifications)
return '/'.$objRefs[0].$this->config->getPdfWhitespacesRegex().$objRefs[1].$this->config->getPdfWhitespacesRegex().'obj/';
}
protected function getObjectHeaderLen(array $objRefs): int
{
// "4 0 obj"
// 2 whitespaces + strlen("obj") = 5
return 5 + \strlen($objRefs[0]) + \strlen($objRefs[1]);
}
/**
* Get content of indirect object.
*
* @param string $pdfData PDF data
* @param string $objRef Object number and generation number separated by underscore character
* @param int $offset Object offset
* @param bool $decoding If true decode streams
*
* @return array containing object data
*
* @throws \Exception if invalid object reference found
*/
protected function getIndirectObject(string $pdfData, array $xref, string $objRef, int $offset = 0, bool $decoding = true): array
{
/*
* build indirect object header
*/
// $objHeader = "[object number] [generation number] obj"
$objRefArr = explode('_', $objRef);
if (2 !== \count($objRefArr)) {
throw new \Exception('Invalid object reference for $obj.');
}
$objHeaderLen = $this->getObjectHeaderLen($objRefArr);
/*
* check if we are in position
*/
// ignore whitespace characters at offset
$offset += strspn($pdfData, $this->config->getPdfWhitespaces(), $offset);
// ignore leading zeros for object number
$offset += strspn($pdfData, '0', $offset);
if (0 == preg_match($this->getObjectHeaderPattern($objRefArr), substr($pdfData, $offset, $objHeaderLen))) {
// an indirect reference to an undefined object shall be considered a reference to the null object
return ['null', 'null', $offset];
}
/*
* get content
*/
// starting position of object content
$offset += $objHeaderLen;
$objContentArr = [];
$i = 0; // object main index
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
// decode stream using stream's dictionary information
if ($decoding && ('stream' === $element[0]) && (isset($objContentArr[$i - 1][0])) && ('<<' === $objContentArr[$i - 1][0])) {
$element[3] = $this->decodeStream($pdfData, $xref, $objContentArr[$i - 1][1], $element[1]);
}
$objContentArr[$i] = $element;
++$i;
} while (('endobj' !== $element[0]) && ($offset !== $oldOffset));
// remove closing delimiter
array_pop($objContentArr);
/*
* return raw object content
*/
return $objContentArr;
}
/**
* Get the content of object, resolving indirect object reference if necessary.
*
* @param string $pdfData PDF data
* @param array $obj Object value
*
* @return array containing object data
*
* @throws \Exception
*/
protected function getObjectVal(string $pdfData, $xref, array $obj): array
{
if ('objref' == $obj[0]) {
// reference to indirect object
if (isset($this->objects[$obj[1]])) {
// this object has been already parsed
return $this->objects[$obj[1]];
} elseif (isset($xref[$obj[1]])) {
// parse new object
$this->objects[$obj[1]] = $this->getIndirectObject($pdfData, $xref, $obj[1], $xref[$obj[1]], false);
return $this->objects[$obj[1]];
}
}
return $obj;
}
/**
* Get object type, raw value and offset to next object
*
* @param int $offset Object offset
*
* @return array containing object type, raw value and offset to next object
*/
protected function getRawObject(string $pdfData, int $offset = 0): array
{
$objtype = ''; // object type to be returned
$objval = ''; // object value to be returned
// skip initial white space chars
$offset += strspn($pdfData, $this->config->getPdfWhitespaces(), $offset);
// get first char
$char = $pdfData[$offset];
// get object type
switch ($char) {
case '%': // \x25 PERCENT SIGN
// skip comment and search for next token
$next = strcspn($pdfData, "\r\n", $offset);
if ($next > 0) {
$offset += $next;
return $this->getRawObject($pdfData, $offset);
}
break;
case '/': // \x2F SOLIDUS
// name object
$objtype = $char;
++$offset;
$span = strcspn($pdfData, "\x00\x09\x0a\x0c\x0d\x20\n\t\r\v\f\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25", $offset, 256);
if ($span > 0) {
$objval = substr($pdfData, $offset, $span); // unescaped value
$offset += $span;
}
break;
case '(': // \x28 LEFT PARENTHESIS
case ')': // \x29 RIGHT PARENTHESIS
// literal string object
$objtype = $char;
++$offset;
$strpos = $offset;
if ('(' == $char) {
$open_bracket = 1;
while ($open_bracket > 0) {
if (!isset($pdfData[$strpos])) {
break;
}
$ch = $pdfData[$strpos];
switch ($ch) {
case '\\': // REVERSE SOLIDUS (5Ch) (Backslash)
// skip next character
++$strpos;
break;
case '(': // LEFT PARENHESIS (28h)
++$open_bracket;
break;
case ')': // RIGHT PARENTHESIS (29h)
--$open_bracket;
break;
}
++$strpos;
}
$objval = substr($pdfData, $offset, $strpos - $offset - 1);
$offset = $strpos;
}
break;
case '[': // \x5B LEFT SQUARE BRACKET
case ']': // \x5D RIGHT SQUARE BRACKET
// array object
$objtype = $char;
++$offset;
if ('[' == $char) {
// get array content
$objval = [];
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
$objval[] = $element;
} while ((']' != $element[0]) && ($offset != $oldOffset));
// remove closing delimiter
array_pop($objval);
}
break;
case '<': // \x3C LESS-THAN SIGN
case '>': // \x3E GREATER-THAN SIGN
if (isset($pdfData[$offset + 1]) && ($pdfData[$offset + 1] == $char)) {
// dictionary object
$objtype = $char.$char;
$offset += 2;
if ('<' == $char) {
// get array content
$objval = [];
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
$objval[] = $element;
} while (('>>' != $element[0]) && ($offset != $oldOffset));
// remove closing delimiter
array_pop($objval);
}
} else {
// hexadecimal string object
$objtype = $char;
++$offset;
$span = strspn($pdfData, "0123456789abcdefABCDEF\x09\x0a\x0c\x0d\x20", $offset);
$dataToCheck = $pdfData[$offset + $span] ?? null;
if ('<' == $char && $span > 0 && '>' == $dataToCheck) {
// remove white space characters
$objval = strtr(substr($pdfData, $offset, $span), $this->config->getPdfWhitespaces(), '');
$offset += $span + 1;
} elseif (false !== ($endpos = strpos($pdfData, '>', $offset))) {
$offset = $endpos + 1;
}
}
break;
default:
if ('endobj' == substr($pdfData, $offset, 6)) {
// indirect object
$objtype = 'endobj';
$offset += 6;
} elseif ('null' == substr($pdfData, $offset, 4)) {
// null object
$objtype = 'null';
$offset += 4;
$objval = 'null';
} elseif ('true' == substr($pdfData, $offset, 4)) {
// boolean true object
$objtype = 'boolean';
$offset += 4;
$objval = 'true';
} elseif ('false' == substr($pdfData, $offset, 5)) {
// boolean false object
$objtype = 'boolean';
$offset += 5;
$objval = 'false';
} elseif ('stream' == substr($pdfData, $offset, 6)) {
// start stream object
$objtype = 'stream';
$offset += 6;
if (1 == preg_match('/^([\r]?[\n])/isU', substr($pdfData, $offset, 4), $matches)) {
$offset += \strlen($matches[0]);
$pregResult = preg_match(
'/(endstream)[\x09\x0a\x0c\x0d\x20]/isU',
$pdfData,
$matches,
\PREG_OFFSET_CAPTURE,
$offset
);
if (1 == $pregResult) {
$objval = substr($pdfData, $offset, $matches[0][1] - $offset);
$offset = $matches[1][1];
}
}
} elseif ('endstream' == substr($pdfData, $offset, 9)) {
// end stream object
$objtype = 'endstream';
$offset += 9;
} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($pdfData, $offset, 33), $matches)) {
// indirect object reference
$objtype = 'objref';
$offset += \strlen($matches[0]);
$objval = (int) $matches[1].'_'.(int) $matches[2];
} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($pdfData, $offset, 33), $matches)) {
// object start
$objtype = 'obj';
$objval = (int) $matches[1].'_'.(int) $matches[2];
$offset += \strlen($matches[0]);
} elseif (($numlen = strspn($pdfData, '+-.0123456789', $offset)) > 0) {
// numeric object
$objtype = 'numeric';
$objval = substr($pdfData, $offset, $numlen);
$offset += $numlen;
}
break;
}
return [$objtype, $objval, $offset];
}
/**
* Get Cross-Reference (xref) table and trailer data from PDF document data.
*
* @param int $offset xref offset (if known)
* @param array $xref previous xref array (if any)
*
* @return array containing xref and trailer data
*
* @throws \Exception if it was unable to find startxref
* @throws \Exception if it was unable to find xref
*/
protected function getXrefData(string $pdfData, int $offset = 0, array $xref = []): array
{
$startxrefPreg = preg_match(
'/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i',
$pdfData,
$matches,
\PREG_OFFSET_CAPTURE,
$offset
);
if (0 == $offset) {
// find last startxref
$pregResult = preg_match_all(
'/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i',
$pdfData, $matches,
\PREG_SET_ORDER,
$offset
);
if (0 == $pregResult) {
throw new \Exception('Unable to find startxref');
}
$matches = array_pop($matches);
$startxref = $matches[1];
} elseif (strpos($pdfData, 'xref', $offset) == $offset) {
// Already pointing at the xref table
$startxref = $offset;
} elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
// Cross-Reference Stream object
$startxref = $offset;
} elseif ($startxrefPreg) {
// startxref found
$startxref = $matches[1][0];
} else {
throw new \Exception('Unable to find startxref');
}
if ($startxref > \strlen($pdfData)) {
throw new \Exception('Unable to find xref (PDF corrupted?)');
}
// check xref position
if (strpos($pdfData, 'xref', $startxref) == $startxref) {
// Cross-Reference
$xref = $this->decodeXref($pdfData, $startxref, $xref);
} else {
// Cross-Reference Stream
$xref = $this->decodeXrefStream($pdfData, $startxref, $xref);
}
if (empty($xref)) {
throw new \Exception('Unable to find xref');
}
return $xref;
}
/**
* Parses PDF data and returns extracted data as array.
*
* @param string $data PDF data to parse
*
* @return array array of parsed PDF document objects
*
* @throws \Exception if empty PDF data given
* @throws \Exception if PDF data missing %PDF header
*/
public function parseData(string $data): array
{
if (empty($data)) {
throw new \Exception('Empty PDF data given.');
}
// find the pdf header starting position
if (false === ($trimpos = strpos($data, '%PDF-'))) {
throw new \Exception('Invalid PDF data: missing %PDF header.');
}
// get PDF content string
$pdfData = $trimpos > 0 ? substr($data, $trimpos) : $data;
// get xref and trailer data
$xref = $this->getXrefData($pdfData);
// parse all document objects
$objects = [];
foreach ($xref['xref'] as $obj => $offset) {
if (!isset($objects[$obj]) && ($offset > 0)) {
// decode objects with positive offset
$objects[$obj] = $this->getIndirectObject($pdfData, $xref, $obj, $offset, true);
}
}
return [$xref, $objects];
}
}

View file

@ -1,51 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\XObject;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\Page;
use Smalot\PdfParser\PDFObject;
/**
* Class Form
*/
class Form extends Page
{
public function getText(Page $page = null): string
{
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $this->content, $this->config);
return $contents->getText($this);
}
}

View file

@ -1,47 +0,0 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\XObject;
use Smalot\PdfParser\Page;
use Smalot\PdfParser\PDFObject;
/**
* Class Image
*/
class Image extends PDFObject
{
public function getText(Page $page = null): string
{
return '';
}
}

View file

@ -1,75 +0,0 @@
<?php
/**
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
* @date 2021-02-09
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
* --------------------------------------------------------------------------------------
*
* About:
* This file provides an alternative to the Composer-approach.
* Include it into your project and all required files of PDFParser will be loaded automatically.
* Please use it only, if Composer is not available.
*
* How to use:
* 1. include this file as it is OR copy and rename it as you like (and then include it)
* 2. afterwards you can use PDFParser classes
* Done.
*/
/**
* Loads all files found in a given folder.
* Calls itself recursively for all sub folders.
*
* @param string $dir
*/
function requireFilesOfFolder($dir)
{
foreach (new DirectoryIterator($dir) as $fileInfo) {
if (!$fileInfo->isDot()) {
if ($fileInfo->isDir()) {
requireFilesOfFolder($fileInfo->getPathname());
} else if ($fileInfo->getExtension() == 'php') {
require_once $fileInfo->getPathname();
}
}
}
}
$rootFolder = __DIR__;
// Manually require files, which can't be loaded automatically that easily.
require_once $rootFolder.'/Element.php';
require_once $rootFolder.'/PDFObject.php';
require_once $rootFolder.'/Font.php';
require_once $rootFolder.'/Page.php';
require_once $rootFolder.'/Element/ElementString.php';
require_once $rootFolder.'/Encoding/AbstractEncoding.php';
/*
* Load the rest of PDFParser files from /src/Smalot/PDFParser
* Dont worry, it wont load files multiple times.
*/
requireFilesOfFolder($rootFolder);

View file

@ -1,11 +0,0 @@
# libcurlemu cURL PHP function emulator
If your PHP installation was not compiled with cURL enabled, you may be able to get the crawler running by using the libcurlemu library. This library will attempt to emulate the normal cURL functions even if the cURL package isn't installed. I can't guarantee it will work, but the script will definitely try!
To install the libcurlemu library follow these steps:
1. Go to the libcurlemu Github repo and download the code as a ZIP package: https://github.com/m1k3lm/libcurlemu
2. Unzip the individual files (not the 'libcurlemu-master' folder) into this directory.
3. Try running the Crawler again via the administration UI.
If you're forced to use this method and still have issues, please let me know by submitting a Github issue for tracking. Thanks!