Re-upload 3rd party libraries

This commit is contained in:
Brian Huisman 2023-04-20 10:47:11 -04:00
parent 4f459b61d2
commit 84e38a5663
168 changed files with 26854 additions and 51 deletions

8
.gitignore vendored
View file

@ -4,10 +4,4 @@ orcinus/GeoIP2/GeoLite2-Country.mmdb
orcinus/GeoIP2/geoip2.phar
orcinus/GeoIP2/COPYRIGHT.txt
orcinus/config.ini.php
*.7z
orcinus/libcurlemu/class_HTTPRetriever.php
orcinus/libcurlemu/example.php
orcinus/libcurlemu/libcurlemu.inc.php
orcinus/libcurlemu/libcurlexternal.inc.php
orcinus/libcurlemu/libcurlnative.inc.php
orcinus/libcurlemu/README.txt
*.7z

View file

@ -50,13 +50,13 @@ function OS_countUp($time) {
// ***** Load Maxmind GeoIP2
if (!class_exists('GeoIp2\Database\Reader')) {
if (file_exists(__DIR__.'/GeoIP2/geoip2.phar')) {
include __DIR__.'/GeoIP2/geoip2.phar';
if (file_exists(__DIR__.'/geoip2/geoip2.phar')) {
include __DIR__.'/geoip2/geoip2.phar';
}
}
if (class_exists('GeoIp2\Database\Reader')) {
if (file_exists(__DIR__.'/GeoIP2/GeoLite2-Country.mmdb'))
$_GEOIP2 = new GeoIp2\Database\Reader(__DIR__.'/GeoIP2/GeoLite2-Country.mmdb');
if (file_exists(__DIR__.'/geoip2/GeoLite2-Country.mmdb'))
$_GEOIP2 = new GeoIp2\Database\Reader(__DIR__.'/geoip2/GeoLite2-Country.mmdb');
} else $_GEOIP2 = false;

View file

@ -395,10 +395,10 @@ if (!$_ODATA['admin_from']) {
// ***** Load and Initialize PHPMailer
if (!class_exists('PHPMailer\PHPMailer\PHPMailer')) {
if (file_exists(__DIR__.'/PHPMailer/PHPMailer.php')) {
include __DIR__.'/PHPMailer/PHPMailer.php';
include __DIR__.'/PHPMailer/Exception.php';
include __DIR__.'/PHPMailer/SMTP.php';
if (file_exists(__DIR__.'/phpmailer/src/PHPMailer.php')) {
include __DIR__.'/phpmailer/src/PHPMailer.php';
include __DIR__.'/phpmailer/src/Exception.php';
include __DIR__.'/phpmailer/src/SMTP.php';
}
}
if (class_exists('PHPMailer\PHPMailer\PHPMailer')) {
@ -571,38 +571,6 @@ if (!$_ODATA['s_result_template']) {
</section>');
}
// {{{{{ Initialize the Mustache templating engine
class OS_Mustache {
public $errors;
public $version;
public $searchable;
function __construct() {
global $_ODATA;
$this->version = $_ODATA['version'];
}
function addError($text) {
if (!$this->errors) {
$this->errors = new stdClass();
$this->errors->error_list = array();
}
$this->errors->error_list[] = $text;
}
// We'll only autoload the Mustache engine if we need it
function render() {
global $_ODATA;
require_once __DIR__.'/Mustache/Autoloader.php';
Mustache_Autoloader::register();
$output = new Mustache_Engine(array('entity_flags' => ENT_QUOTES));
echo $output->render($_ODATA['s_result_template'], $this);
}
}
// Purge entries from the search query log older than
// 's_limit_query_log' ago

View file

@ -658,15 +658,15 @@ foreach ($_RDATA['sp_entity'] as $key => $value)
// ***** Load PDF parser
if (!class_exists('\Smalot\PdfParser\Parser'))
if (file_exists(__DIR__.'/PdfParser/alt_autoload.php-dist'))
include __DIR__.'/PdfParser/alt_autoload.php-dist';
if (file_exists(__DIR__.'/pdfparser/alt_autoload.php-dist'))
include __DIR__.'/pdfparser/alt_autoload.php-dist';
if (class_exists('\Smalot\PdfParser\Parser')) {
$config = new \Smalot\PdfParser\Config();
$config->setRetainImageContent(false);
$config->setDecodeMemoryLimit(16777216);
$_PDF = new \Smalot\PdfParser\Parser([], $config);
} else {
OS_crawlLog('Could not include \'PdfParser\'; PDFs will not be indexed', 1);
OS_crawlLog('Could not include \'PDFParser\'; PDFs will not be indexed', 1);
$_PDF = false;
}
@ -1307,7 +1307,7 @@ while ($_cURL && count($_RDATA['sp_queue'])) {
// Discard the PDF text if it contains Unicode control
// characters; some of these might be simple PDF ligatures
// but PdfParser doesn't support them; any content that
// but PDFParser doesn't support them; any content that
// contains these is usually mostly gobbledegook
if (strpos($data['content'], "\u{3}") === false &&
strpos($data['content'], "\u{2}") === false &&

8
orcinus/geoip2/README.md Normal file
View file

@ -0,0 +1,8 @@
# Maxmind GeoIP2 Geolocation
To enable the Geolocation service for items in the Query Log, follow the steps below:
1. Download the latest Maxmind GeoIP2 .phar file from the Github, and place it in the same directory as this README file: https://github.com/maxmind/GeoIP2-php/releases
2. Login at the Maxmind website; account registration is free: https://www.maxmind.com/en/account/login
3. Navigate to the "Downloads" area of your Maxmind account, and download the GeoLite Country (not CSV) GZIP package.
4. Unzip the 'GeoLite2-Country.mmdb' file and place it in the same directory as this README.txt file.

View file

@ -0,0 +1,621 @@
# mustache.js - Logic-less {{mustache}} templates with JavaScript
> What could be more logical awesome than no logic at all?
[![Build Status](https://travis-ci.org/janl/mustache.js.svg?branch=master)](https://travis-ci.org/janl/mustache.js)
[mustache.js](http://github.com/janl/mustache.js) is a zero-dependency implementation of the [mustache](http://mustache.github.com/) template system in JavaScript.
[Mustache](http://mustache.github.com/) is a logic-less template syntax. It can be used for HTML, config files, source code - anything. It works by expanding tags in a template using values provided in a hash or object.
We call it "logic-less" because there are no if statements, else clauses, or for loops. Instead there are only tags. Some tags are replaced with a value, some nothing, and others a series of values.
For a language-agnostic overview of mustache's template syntax, see the `mustache(5)` [manpage](http://mustache.github.com/mustache.5.html).
## Where to use mustache.js?
You can use mustache.js to render mustache templates anywhere you can use JavaScript. This includes web browsers, server-side environments such as [Node.js](http://nodejs.org/), and [CouchDB](http://couchdb.apache.org/) views.
mustache.js ships with support for the [CommonJS](http://www.commonjs.org/) module API, the [Asynchronous Module Definition](https://github.com/amdjs/amdjs-api/wiki/AMD) API (AMD) and [ECMAScript modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules).
In addition to being a package to be used programmatically, you can use it as a [command line tool](#command-line-tool).
And this will be your templates after you use Mustache:
!['stache](https://cloud.githubusercontent.com/assets/288977/8779228/a3cf700e-2f02-11e5-869a-300312fb7a00.gif)
## Install
You can get Mustache via [npm](http://npmjs.com).
```bash
$ npm install mustache --save
```
## Usage
Below is a quick example how to use mustache.js:
```js
var view = {
title: "Joe",
calc: function () {
return 2 + 4;
}
};
var output = Mustache.render("{{title}} spends {{calc}}", view);
```
In this example, the `Mustache.render` function takes two parameters: 1) the [mustache](http://mustache.github.com/) template and 2) a `view` object that contains the data and code needed to render the template.
## Templates
A [mustache](http://mustache.github.com/) template is a string that contains any number of mustache tags. Tags are indicated by the double mustaches that surround them. `{{person}}` is a tag, as is `{{#person}}`. In both examples we refer to `person` as the tag's key. There are several types of tags available in mustache.js, described below.
There are several techniques that can be used to load templates and hand them to mustache.js, here are two of them:
#### Include Templates
If you need a template for a dynamic part in a static website, you can consider including the template in the static HTML file to avoid loading templates separately. Here's a small example:
```js
// file: render.js
function renderHello() {
var template = document.getElementById('template').innerHTML;
var rendered = Mustache.render(template, { name: 'Luke' });
document.getElementById('target').innerHTML = rendered;
}
```
```html
<html>
<body onload="renderHello()">
<div id="target">Loading...</div>
<script id="template" type="x-tmpl-mustache">
Hello {{ name }}!
</script>
<script src="https://unpkg.com/mustache@latest"></script>
<script src="render.js"></script>
</body>
</html>
```
#### Load External Templates
If your templates reside in individual files, you can load them asynchronously and render them when they arrive. Another example using [fetch](https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API/Using_Fetch):
```js
function renderHello() {
fetch('template.mustache')
.then((response) => response.text())
.then((template) => {
var rendered = Mustache.render(template, { name: 'Luke' });
document.getElementById('target').innerHTML = rendered;
});
}
```
### Variables
The most basic tag type is a simple variable. A `{{name}}` tag renders the value of the `name` key in the current context. If there is no such key, nothing is rendered.
All variables are HTML-escaped by default. If you want to render unescaped HTML, use the triple mustache: `{{{name}}}`. You can also use `&` to unescape a variable.
If you'd like to change HTML-escaping behavior globally (for example, to template non-HTML formats), you can override Mustache's escape function. For example, to disable all escaping: `Mustache.escape = function(text) {return text;};`.
If you want `{{name}}` _not_ to be interpreted as a mustache tag, but rather to appear exactly as `{{name}}` in the output, you must change and then restore the default delimiter. See the [Custom Delimiters](#custom-delimiters) section for more information.
View:
```json
{
"name": "Chris",
"company": "<b>GitHub</b>"
}
```
Template:
```
* {{name}}
* {{age}}
* {{company}}
* {{{company}}}
* {{&company}}
{{=<% %>=}}
* {{company}}
<%={{ }}=%>
```
Output:
```html
* Chris
*
* &lt;b&gt;GitHub&lt;/b&gt;
* <b>GitHub</b>
* <b>GitHub</b>
* {{company}}
```
JavaScript's dot notation may be used to access keys that are properties of objects in a view.
View:
```json
{
"name": {
"first": "Michael",
"last": "Jackson"
},
"age": "RIP"
}
```
Template:
```html
* {{name.first}} {{name.last}}
* {{age}}
```
Output:
```html
* Michael Jackson
* RIP
```
### Sections
Sections render blocks of text zero or more times, depending on the value of the key in the current context.
A section begins with a pound and ends with a slash. That is, `{{#person}}` begins a `person` section, while `{{/person}}` ends it. The text between the two tags is referred to as that section's "block".
The behavior of the section is determined by the value of the key.
#### False Values or Empty Lists
If the `person` key does not exist, or exists and has a value of `null`, `undefined`, `false`, `0`, or `NaN`, or is an empty string or an empty list, the block will not be rendered.
View:
```json
{
"person": false
}
```
Template:
```html
Shown.
{{#person}}
Never shown!
{{/person}}
```
Output:
```html
Shown.
```
#### Non-Empty Lists
If the `person` key exists and is not `null`, `undefined`, or `false`, and is not an empty list the block will be rendered one or more times.
When the value is a list, the block is rendered once for each item in the list. The context of the block is set to the current item in the list for each iteration. In this way we can loop over collections.
View:
```json
{
"stooges": [
{ "name": "Moe" },
{ "name": "Larry" },
{ "name": "Curly" }
]
}
```
Template:
```html
{{#stooges}}
<b>{{name}}</b>
{{/stooges}}
```
Output:
```html
<b>Moe</b>
<b>Larry</b>
<b>Curly</b>
```
When looping over an array of strings, a `.` can be used to refer to the current item in the list.
View:
```json
{
"musketeers": ["Athos", "Aramis", "Porthos", "D'Artagnan"]
}
```
Template:
```html
{{#musketeers}}
* {{.}}
{{/musketeers}}
```
Output:
```html
* Athos
* Aramis
* Porthos
* D'Artagnan
```
If the value of a section variable is a function, it will be called in the context of the current item in the list on each iteration.
View:
```js
{
"beatles": [
{ "firstName": "John", "lastName": "Lennon" },
{ "firstName": "Paul", "lastName": "McCartney" },
{ "firstName": "George", "lastName": "Harrison" },
{ "firstName": "Ringo", "lastName": "Starr" }
],
"name": function () {
return this.firstName + " " + this.lastName;
}
}
```
Template:
```html
{{#beatles}}
* {{name}}
{{/beatles}}
```
Output:
```html
* John Lennon
* Paul McCartney
* George Harrison
* Ringo Starr
```
#### Functions
If the value of a section key is a function, it is called with the section's literal block of text, un-rendered, as its first argument. The second argument is a special rendering function that uses the current view as its view argument. It is called in the context of the current view object.
View:
```js
{
"name": "Tater",
"bold": function () {
return function (text, render) {
return "<b>" + render(text) + "</b>";
}
}
}
```
Template:
```html
{{#bold}}Hi {{name}}.{{/bold}}
```
Output:
```html
<b>Hi Tater.</b>
```
### Inverted Sections
An inverted section opens with `{{^section}}` instead of `{{#section}}`. The block of an inverted section is rendered only if the value of that section's tag is `null`, `undefined`, `false`, *falsy* or an empty list.
View:
```json
{
"repos": []
}
```
Template:
```html
{{#repos}}<b>{{name}}</b>{{/repos}}
{{^repos}}No repos :({{/repos}}
```
Output:
```html
No repos :(
```
### Comments
Comments begin with a bang and are ignored. The following template:
```html
<h1>Today{{! ignore me }}.</h1>
```
Will render as follows:
```html
<h1>Today.</h1>
```
Comments may contain newlines.
### Partials
Partials begin with a greater than sign, like {{> box}}.
Partials are rendered at runtime (as opposed to compile time), so recursive partials are possible. Just avoid infinite loops.
They also inherit the calling context. Whereas in ERB you may have this:
```html+erb
<%= partial :next_more, :start => start, :size => size %>
```
Mustache requires only this:
```html
{{> next_more}}
```
Why? Because the `next_more.mustache` file will inherit the `size` and `start` variables from the calling context. In this way you may want to think of partials as includes, imports, template expansion, nested templates, or subtemplates, even though those aren't literally the case here.
For example, this template and partial:
base.mustache:
<h2>Names</h2>
{{#names}}
{{> user}}
{{/names}}
user.mustache:
<strong>{{name}}</strong>
Can be thought of as a single, expanded template:
```html
<h2>Names</h2>
{{#names}}
<strong>{{name}}</strong>
{{/names}}
```
In mustache.js an object of partials may be passed as the third argument to `Mustache.render`. The object should be keyed by the name of the partial, and its value should be the partial text.
```js
Mustache.render(template, view, {
user: userTemplate
});
```
### Custom Delimiters
Custom delimiters can be used in place of `{{` and `}}` by setting the new values in JavaScript or in templates.
#### Setting in JavaScript
The `Mustache.tags` property holds an array consisting of the opening and closing tag values. Set custom values by passing a new array of tags to `render()`, which gets honored over the default values, or by overriding the `Mustache.tags` property itself:
```js
var customTags = [ '<%', '%>' ];
```
##### Pass Value into Render Method
```js
Mustache.render(template, view, {}, customTags);
```
##### Override Tags Property
```js
Mustache.tags = customTags;
// Subsequent parse() and render() calls will use customTags
```
#### Setting in Templates
Set Delimiter tags start with an equals sign and change the tag delimiters from `{{` and `}}` to custom strings.
Consider the following contrived example:
```html+erb
* {{ default_tags }}
{{=<% %>=}}
* <% erb_style_tags %>
<%={{ }}=%>
* {{ default_tags_again }}
```
Here we have a list with three items. The first item uses the default tag style, the second uses ERB style as defined by the Set Delimiter tag, and the third returns to the default style after yet another Set Delimiter declaration.
According to [ctemplates](https://htmlpreview.github.io/?https://raw.githubusercontent.com/OlafvdSpek/ctemplate/master/doc/howto.html), this "is useful for languages like TeX, where double-braces may occur in the text and are awkward to use for markup."
Custom delimiters may not contain whitespace or the equals sign.
## Pre-parsing and Caching Templates
By default, when mustache.js first parses a template it keeps the full parsed token tree in a cache. The next time it sees that same template it skips the parsing step and renders the template much more quickly. If you'd like, you can do this ahead of time using `mustache.parse`.
```js
Mustache.parse(template);
// Then, sometime later.
Mustache.render(template, view);
```
## Command line tool
mustache.js is shipped with a Node.js based command line tool. It might be installed as a global tool on your computer to render a mustache template of some kind
```bash
$ npm install -g mustache
$ mustache dataView.json myTemplate.mustache > output.html
```
also supports stdin.
```bash
$ cat dataView.json | mustache - myTemplate.mustache > output.html
```
or as a package.json `devDependency` in a build process maybe?
```bash
$ npm install mustache --save-dev
```
```json
{
"scripts": {
"build": "mustache dataView.json myTemplate.mustache > public/output.html"
}
}
```
```bash
$ npm run build
```
The command line tool is basically a wrapper around `Mustache.render` so you get all the features.
If your templates use partials you should pass paths to partials using `-p` flag:
```bash
$ mustache -p path/to/partial1.mustache -p path/to/partial2.mustache dataView.json myTemplate.mustache
```
## Plugins for JavaScript Libraries
mustache.js may be built specifically for several different client libraries, including the following:
- [jQuery](http://jquery.com/)
- [MooTools](http://mootools.net/)
- [Dojo](http://www.dojotoolkit.org/)
- [YUI](http://developer.yahoo.com/yui/)
- [qooxdoo](http://qooxdoo.org/)
These may be built using [Rake](http://rake.rubyforge.org/) and one of the following commands:
```bash
$ rake jquery
$ rake mootools
$ rake dojo
$ rake yui3
$ rake qooxdoo
```
## TypeScript
Since the source code of this package is written in JavaScript, we follow the [TypeScript publishing docs](https://www.typescriptlang.org/docs/handbook/declaration-files/publishing.html) preferred approach
by having type definitions available via [@types/mustache](https://www.npmjs.com/package/@types/mustache).
## Testing
In order to run the tests you'll need to install [Node.js](http://nodejs.org/).
You also need to install the sub module containing [Mustache specifications](http://github.com/mustache/spec) in the project root.
```bash
$ git submodule init
$ git submodule update
```
Install dependencies.
```bash
$ npm install
```
Then run the tests.
```bash
$ npm test
```
The test suite consists of both unit and integration tests. If a template isn't rendering correctly for you, you can make a test for it by doing the following:
1. Create a template file named `mytest.mustache` in the `test/_files`
directory. Replace `mytest` with the name of your test.
2. Create a corresponding view file named `mytest.js` in the same directory.
This file should contain a JavaScript object literal enclosed in
parentheses. See any of the other view files for an example.
3. Create a file with the expected output in `mytest.txt` in the same
directory.
Then, you can run the test with:
```bash
$ TEST=mytest npm run test-render
```
### Browser tests
Browser tests are not included in `npm test` as they run for too long, although they are ran automatically on Travis when merged into master. Run browser tests locally in any browser:
```bash
$ npm run test-browser-local
```
then point your browser to `http://localhost:8080/__zuul`
## Who uses mustache.js?
An updated list of mustache.js users is kept [on the Github wiki](https://github.com/janl/mustache.js/wiki/Beard-Competition). Add yourself or your company if you use mustache.js!
## Contributing
mustache.js is a mature project, but it continues to actively invite maintainers. You can help out a high-profile project that is used in a lot of places on the web. No big commitment required, if all you do is review a single [Pull Request](https://github.com/janl/mustache.js/pulls), you are a maintainer. And a hero.
### Your First Contribution
- review a [Pull Request](https://github.com/janl/mustache.js/pulls)
- fix an [Issue](https://github.com/janl/mustache.js/issues)
- update the [documentation](https://github.com/janl/mustache.js#usage)
- make a website
- write a tutorial
## Thanks
mustache.js wouldn't kick ass if it weren't for these fine souls:
* Chris Wanstrath / defunkt
* Alexander Lang / langalex
* Sebastian Cohnen / tisba
* J Chris Anderson / jchris
* Tom Robinson / tlrobinson
* Aaron Quint / quirkey
* Douglas Crockford
* Nikita Vasilyev / NV
* Elise Wood / glytch
* Damien Mathieu / dmathieu
* Jakub Kuźma / qoobaa
* Will Leinweber / will
* dpree
* Jason Smith / jhs
* Aaron Gibralter / agibralter
* Ross Boucher / boucher
* Matt Sanford / mzsanford
* Ben Cherry / bcherry
* Michael Jackson / mjackson
* Phillip Johnsen / phillipj
* David da Silva Contín / dasilvacontin

View file

@ -0,0 +1,99 @@
CURL Extension Emulation Library
Version 1.0.4
Copyright 2004-2007, Steve Blinch
http://code.blitzaffe.com
============================================================================
DESCRIPTION
Provides a pure-PHP implementation of the PHP CURL extension, for use on
systems which do not already have the CURL extension installed. It emulates
all of the curl_* functions normally provided by the CURL extension itself.
This will automatically detect and use the best CURL implementation available
on your server. It will attempt the following, in order:
1) Check for the existence of the "real" CURL PHP Extension. If it is
loaded, the library will do nothing (and it will not interfere with the
"real" extension).
2) Check for the existence of the CURL console binary (usually located in
/usr/bin/curl). If found, the library will emulate the CURL PHP
extension (including all curl_* functions) and use the console binary
to execute all requests.
3) If neither the "real" CURL PHP Extension nor the CURL console binary
are available, the library will emulate the CURL PHP extension (including
all curl_* functions) using a native, pure-PHP HTTP client implementation.
This implementation is somewhat limited, but it provides support for most
of the common CURL options. HTTPS (SSL) support is available in this
mode under PHP 4.3.0 if the OpenSSL Extension is loaded.
Thus, by including this library in your project, you can rely on having some
level of CURL support regardless of the configuration of the server on which
it is being used.
HISTORY
1.0.4 (not released)
- Fixed HTTPRetriever double-inclusion bug.
USAGE
Simply copy all of the libcurlemu files into your project directory, then:
require_once("libcurlemu.inc.php");
After this, you can use all of the curl_* functions documented in the PHP
Manual.
EXAMPLE
// CURL Extension Emulation Library Example
//
// Usage should be straightforward; you simply use this script exactly as you
// would normally use the PHP CURL extension functions.
// first, include libcurlemu.inc.php
require_once('libcurlemu.inc.php');
// at this point, libcurlemu has detected the best available CURL solution
// (either the CURL extension, if available, or the CURL commandline binary,
// if available, or as a last resort, HTTPRetriever, our native-PHP HTTP
// client implementation) and has implemented the curl_* functions if
// necessary, so you can use CURL normally and safely assume that all CURL
// functions are available.
// the rest of this example code is copied straight from the PHP manual's
// reference for the curl_init() function, and will work fine with libcurlemu
// create a new CURL resource
$ch = curl_init();
// set URL and other appropriate options
curl_setopt($ch, CURLOPT_URL, "http://www.example.com/");
curl_setopt($ch, CURLOPT_HEADER, false);
// grab URL and pass it to the browser
curl_exec($ch);
// close CURL resource, and free up system resources
curl_close($ch);
LICENSE
This script is free software; you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.
This script is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public License along
with this script; if not, write to the Free Software Foundation, Inc.,
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,35 @@
<?php
// CURL Extension Emulation Library Example
//
// Usage should be straightforward; you simply use this script exactly as you
// would normally use the PHP CURL extension functions.
// first, include libcurlemu.inc.php
require_once('libcurlemu.inc.php');
// at this point, libcurlemu has detected the best available CURL solution
// (either the CURL extension, if available, or the CURL commandline binary,
// if available, or as a last resort, HTTPRetriever, our native-PHP HTTP
// client implementation) and has implemented the curl_* functions if
// necessary, so you can use CURL normally and safely assume that all CURL
// functions are available.
// the rest of this example code is copied straight from the PHP manual's
// reference for the curl_init() function, and will work fine with libcurlemu
// create a new CURL resource
$ch = curl_init();
// set URL and other appropriate options
curl_setopt($ch, CURLOPT_URL, "http://www.example.com/");
curl_setopt($ch, CURLOPT_HEADER, false);
// grab URL and pass it to the browser
curl_exec($ch);
// close CURL resource, and free up system resources
curl_close($ch);
?>

View file

@ -0,0 +1,111 @@
<?php
/* CURL Extension Emulation Library
* Version 1.0.4
* Copyright 2004-2007, Steve Blinch
* http://code.blitzaffe.com
* ============================================================================
*
* DESCRIPTION
*
* Provides a pure-PHP implementation of the PHP CURL extension, for use on
* systems which do not already have the CURL extension installed. It emulates
* all of the curl_* functions normally provided by the CURL extension itself.
*
* This will automatically detect and use the best CURL implementation available
* on your server. It will attempt the following, in order:
*
* 1) Check for the existence of the "real" CURL PHP Extension. If it is
* loaded, the library will do nothing (and it will not interfere with the
* "real" extension).
* 2) Check for the existence of the CURL console binary (usually located in
* /usr/bin/curl). If found, the library will emulate the CURL PHP
* extension (including all curl_* functions) and use the console binary
* to execute all requests.
* 3) If neither the "real" CURL PHP Extension nor the CURL console binary
* are available, the library will emulate the CURL PHP extension (including
* all curl_* functions) using a native, pure-PHP HTTP client implementation.
* This implementation is somewhat limited, but it provides support for most
* of the common CURL options. HTTPS (SSL) support is available in this
* mode under PHP 4.3.0 if the OpenSSL Extension is loaded.
*
* Thus, by including this library in your project, you can rely on having some
* level of CURL support regardless of the configuration of the server on which
* it is being used.
*
*
* HISTORY
*
* 1.0.4 (not released)
* - Fixed HTTPRetriever double-inclusion bug.
*
*
* USAGE
*
* Simply copy all of the libcurlemu files into your project directory, then:
*
* require_once("libcurlemu.inc.php");
*
* After this, you can use all of the curl_* functions documented in the PHP
* Manual.
*
*
* EXAMPLE
*
* // CURL Extension Emulation Library Example
* //
* // Usage should be straightforward; you simply use this script exactly as you
* // would normally use the PHP CURL extension functions.
*
* // first, include libcurlemu.inc.php
* require_once('libcurlemu.inc.php');
*
* // at this point, libcurlemu has detected the best available CURL solution
* // (either the CURL extension, if available, or the CURL commandline binary,
* // if available, or as a last resort, HTTPRetriever, our native-PHP HTTP
* // client implementation) and has implemented the curl_* functions if
* // necessary, so you can use CURL normally and safely assume that all CURL
* // functions are available.
*
* // the rest of this example code is copied straight from the PHP manual's
* // reference for the curl_init() function, and will work fine with libcurlemu
*
* // create a new CURL resource
* $ch = curl_init();
*
* // set URL and other appropriate options
* curl_setopt($ch, CURLOPT_URL, "http://www.example.com/");
* curl_setopt($ch, CURLOPT_HEADER, false);
*
* // grab URL and pass it to the browser
* curl_exec($ch);
*
* // close CURL resource, and free up system resources
* curl_close($ch);
*
*
* LICENSE
*
* This script is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later
* version.
*
* This script is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with this script; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
if (!extension_loaded('curl') && !function_exists('curl_init')) {
define('CURLEXT_MISSING_ABORT',true);
require_once(dirname(__FILE__)."/libcurlexternal.inc.php");
if (!function_exists('curl_init')) {
if (!class_exists('HTTPRetriever')) require_once(dirname(__FILE__)."/class_HTTPRetriever.php");
require_once(dirname(__FILE__)."/libcurlnative.inc.php");
}
}
?>

View file

@ -0,0 +1,664 @@
<?php
/* CURL Extension Emulation Library (Console Binary)
* Copyright 2004-2007, Steve Blinch
* http://code.blitzaffe.com
* ============================================================================
*
* DESCRIPTION
*
* Provides a pure-PHP implementation of the PHP CURL extension, for use on
* systems which do not already have the CURL extension installed. It emulates
* all of the curl_* functions normally provided by the CURL extension itself
* by wrapping the CURL console binary.
*
* This library will automatically detect whether or not the "real" CURL
* extension is installed, and if so, it will not interfere. Thus, it can be
* used to ensure that, one way or another, the CURL functions are available
* for use.
*
* This library is actually a wrapper for the CURL console application (usually
* found in /usr/bin/curl), so you must have the CURL binary installed in order
* to use this script.
*
*
* USAGE
*
* Please see the PHP documentation under the "CURL, Client URL Library
* Functions" section for information about using this library. Almost all of
* the documentation and examples in the PHP manual should work with this
* library.
*
*
* LICENSE
*
* This script is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later
* version.
*
* This script is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with this script; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
// if the real CURL PHP extension is installed, exit without doing anything
if (!extension_loaded("curl")) {
// if the CURL path was not defined by the calling script, define it
if (!defined("CURL_PATH")) define("CURL_PATH","/usr/bin/curl");
// if the CURL binary was not found, do one of the following:
// - if CURLEXT_MISSING_ABORT was defined, then exit without implementing the CURL functions
// - if CURLEXT_MISSING_IGNORE was defined, then implement the CURL functions anyway (even
// though they won't work without the CURL binary installed)
// - otherwise, raise a fatal error and halt the script
if (!is_executable(CURL_PATH)) {
if (defined("CURLEXT_MISSING_ABORT") && CURLEXT_MISSING_ABORT) {
return;
} elseif (defined("CURLEXT_MISSING_IGNORE") && CURLEXT_MISSING_IGNORE) {
// proceed and implement the CURL functions anyway, even though they won't work
} else {
trigger_error("CURL extension is not loaded, and the commandline version of CURL was not found at ".CURL_PATH,E_USER_ERROR);
}
}
define("CURLEXT_VERSION","1.1.0");
define('CURLOPT_NOTHING',0);
define('CURLOPT_FILE',10001);
define('CURLOPT_URL',10002);
define('CURLOPT_PORT',3);
define('CURLOPT_PROXY',10004);
define('CURLOPT_USERPWD',10005);
define('CURLOPT_PROXYUSERPWD',10006);
define('CURLOPT_RANGE',10007);
define('CURLOPT_INFILE',10009);
define('CURLOPT_ERRORBUFFER',10010);
define('CURLOPT_WRITEFUNCTION',20011);
define('CURLOPT_READFUNCTION',20012);
define('CURLOPT_TIMEOUT',13);
define('CURLOPT_INFILESIZE',14);
define('CURLOPT_POSTFIELDS',10015);
define('CURLOPT_REFERER',10016);
define('CURLOPT_FTPPORT',10017);
define('CURLOPT_USERAGENT',10018);
define('CURLOPT_LOW_SPEED_LIMIT',19);
define('CURLOPT_LOW_SPEED_TIME',20);
define('CURLOPT_RESUME_FROM',21);
define('CURLOPT_COOKIE',10022);
define('CURLOPT_HTTPHEADER',10023);
define('CURLOPT_HTTPPOST',10024);
define('CURLOPT_SSLCERT',10025);
define('CURLOPT_SSLCERTPASSWD',10026);
define('CURLOPT_SSLKEYPASSWD',10026);
define('CURLOPT_CRLF',27);
define('CURLOPT_QUOTE',10028);
define('CURLOPT_WRITEHEADER',10029);
define('CURLOPT_COOKIEFILE',10031);
define('CURLOPT_SSLVERSION',32);
define('CURLOPT_TIMECONDITION',33);
define('CURLOPT_TIMEVALUE',34);
define('CURLOPT_HTTPREQUEST',10035);
define('CURLOPT_CUSTOMREQUEST',10036);
define('CURLOPT_STDERR',10037);
define('CURLOPT_POSTQUOTE',10039);
define('CURLOPT_WRITEINFO',10040);
define('CURLOPT_VERBOSE',41);
define('CURLOPT_HEADER',42);
define('CURLOPT_NOPROGRESS',43);
define('CURLOPT_NOBODY',44);
define('CURLOPT_FAILONERROR',45);
define('CURLOPT_UPLOAD',46);
define('CURLOPT_POST',47);
define('CURLOPT_FTPLISTONLY',48);
define('CURLOPT_FTPAPPEND',50);
define('CURLOPT_NETRC',51);
define('CURLOPT_FOLLOWLOCATION',52);
define('CURLOPT_FTPASCII',53);
define('CURLOPT_TRANSFERTEXT',53);
define('CURLOPT_PUT',54);
define('CURLOPT_MUTE',55);
define('CURLOPT_PROGRESSFUNCTION',20056);
define('CURLOPT_PROGRESSDATA',10057);
define('CURLOPT_AUTOREFERER',58);
define('CURLOPT_PROXYPORT',59);
define('CURLOPT_POSTFIELDSIZE',60);
define('CURLOPT_HTTPPROXYTUNNEL',61);
define('CURLOPT_INTERFACE',10062);
define('CURLOPT_KRB4LEVEL',10063);
define('CURLOPT_SSL_VERIFYPEER',64);
define('CURLOPT_CAINFO',10065);
define('CURLOPT_PASSWDFUNCTION',20066);
define('CURLOPT_PASSWDDATA',10067);
define('CURLOPT_MAXREDIRS',68);
define('CURLOPT_FILETIME',10069);
define('CURLOPT_TELNETOPTIONS',10070);
define('CURLOPT_MAXCONNECTS',71);
define('CURLOPT_CLOSEPOLICY',72);
define('CURLOPT_CLOSEFUNCTION',20073);
define('CURLOPT_FRESH_CONNECT',74);
define('CURLOPT_FORBID_REUSE',75);
define('CURLOPT_RANDOM_FILE',10076);
define('CURLOPT_EGDSOCKET',10077);
define('CURLOPT_CONNECTTIMEOUT',78);
define('CURLOPT_HEADERFUNCTION',20079);
define('CURLOPT_HTTPGET',80);
define('CURLOPT_SSL_VERIFYHOST',81);
define('CURLOPT_COOKIEJAR',10082);
define('CURLOPT_SSL_CIPHER_LIST',10083);
define('CURLOPT_HTTP_VERSION',84);
define('CURLOPT_FTP_USE_EPSV',85);
define('CURLOPT_SSLCERTTYPE',10086);
define('CURLOPT_SSLKEY',10087);
define('CURLOPT_SSLKEYTYPE',10088);
define('CURLOPT_SSLENGINE',10089);
define('CURLOPT_SSLENGINE_DEFAULT',90);
define('CURLOPT_DNS_USE_GLOBAL_CACHE',91);
define('CURLOPT_DNS_CACHE_TIMEOUT',92);
define('CURLOPT_PREQUOTE',10093);
define('CURLINFO_EFFECTIVE_URL',1);
define('CURLINFO_HTTP_CODE',2);
define('CURLINFO_FILETIME',14);
define('CURLINFO_TOTAL_TIME',3);
define('CURLINFO_NAMELOOKUP_TIME',4);
define('CURLINFO_CONNECT_TIME',5);
define('CURLINFO_PRETRANSFER_TIME',6);
define('CURLINFO_STARTTRANSFER_TIME',17);
define('CURLINFO_REDIRECT_TIME',19);
define('CURLINFO_REDIRECT_COUNT',20);
define('CURLINFO_SIZE_UPLOAD',7);
define('CURLINFO_SIZE_DOWNLOAD',8);
define('CURLINFO_SPEED_DOWNLOAD',9);
define('CURLINFO_SPEED_UPLOAD',10);
define('CURLINFO_HEADER_SIZE',11);
define('CURLINFO_REQUEST_SIZE',12);
define('CURLINFO_SSL_VERIFYRESULT',13);
define('CURLINFO_CONTENT_LENGTH_DOWNLOAD',15);
define('CURLINFO_CONTENT_LENGTH_UPLOAD',16);
define('CURLINFO_CONTENT_TYPE',18);
define("TIMECOND_ISUNMODSINCE",1);
define("TIMECOND_IFMODSINCE",2);
function _curlopt_name($curlopt) {
foreach (get_defined_constants() as $k=>$v) {
if ( (substr($k,0,8)=="CURLOPT_") && ($v==$curlopt)) return $k;
}
return false;
}
// Initialize a CURL emulation session
function curl_init($url=false) {
$i = $GLOBALS["_CURLEXT_OPT"]["index"]++;
$GLOBALS["_CURLEXT_OPT"][$i] = array("url"=>$url);
return $i;
}
// Set an option for a CURL emulation transfer
function curl_setopt($ch,$option,$value) {
$opt = &$GLOBALS["_CURLEXT_OPT"][$ch];
if (!$opt["args"]) $opt["args"] = array();
$args = &$opt["args"];
if (!$opt["settings"]) $opt["settings"] = array();
$settings = &$opt["settings"];
switch($option) {
case CURLOPT_URL:
$opt["url"] = $value;
break;
case CURLOPT_VERBOSE:
$opt["verbose"] = $value>0;
break;
case CURLOPT_USERPWD:
if ($value==="") $value = false;
$settings["user"] = $value;
break;
case CURLOPT_PROXYUSERPWD:
if ($value==="") $value = false;
$settings["proxy-user"] = $value;
break;
case CURLOPT_COOKIE:
if ($value==="") $value = false;
if ( is_bool($value) || (strpos($value,"=")!==false) ) $settings["cookie"] = $value;
break;
case CURLOPT_COOKIEFILE:
if ($value==="") $value = false;
$settings["cookie"] = $value;
break;
case CURLOPT_COOKIEJAR:
if ($value==="") $value = false;
$settings["cookie-jar"] = $value;
break;
case CURLOPT_CUSTOMREQUEST:
if ($value==="") $value = false;
$settings["request"] = $value;
break;
case CURLOPT_PROXY:
if ($value==="") $value = false;
$settings["proxy"] = $value;
break;
case CURLOPT_INTERFACE:
if ($value==="") $value = false;
$settings["interface"] = $value;
break;
case CURLOPT_KRB4LEVEL:
if ($value==="") $value = false;
$settings["krb4"] = $value;
break;
case CURLOPT_SSLCERT:
$pass = "";
if (is_string($settings["cert"])) {
list(,$pass) = explode(":",$settings["cert"]);
if (strlen($pass)) $pass = ":$pass";
}
$settings["cert"] = $value.$pass;
break;
case CURLOPT_SSLCERTPASSWD:
$filename = "";
if (is_string($settings["cert"])) {
list($filename,) = explode(":",$settings["cert"]);
}
$settings["cert"] = $filename.":".$value;
break;
case CURLOPT_RANGE:
if ($value==="") $value = false;
$settings["range"] = $value;
break;
case CURLOPT_REFERER:
if ($value==="") $value = false;
$settings["referer"] = $value;
break;
case CURLOPT_NOBODY:
$settings["head"] = $value>0;
break;
case CURLOPT_FAILONERROR:
$opt["fail_on_error"] = $value>0;
break;
case CURLOPT_USERAGENT:
$settings["user-agent"] = $value;
break;
case CURLOPT_HEADER:
$settings["include"] = $value>0;
break;
case CURLOPT_RETURNTRANSFER:
$opt["return_transfer"] = $value>0;
break;
case CURLOPT_TIMEOUT:
$settings["max-time"] = (int) $value;
break;
case CURLOPT_HTTPHEADER:
reset($value);
foreach ($value as $k=>$header) $args[] = "header=".$header;
break;
case CURLOPT_POST:
$settings["data"]["enabled"] = $value>0;
break;
case CURLOPT_POSTFIELDS:
if ($value==="") $value = false;
$settings["data"]["value"] = $value;
break;
case CURLOPT_SSL_VERIFYPEER:
$settings["insecure"] = !$value;
break;
case CURLOPT_HTTP_VERSION:
switch ($value){
case 1:
$settings["http1.0"] = true;
break;
case 2:
$settings["http1.1"] = true;
break;
case 3:
$settings["http2"] = true;
break;
}
break;
case CURLOPT_SSL_VERIFYHOST:
// not supported by the commandline client
break;
case CURLOPT_FOLLOWLOCATION:
$settings["location"] = $value>0;
break;
case CURLOPT_PUT:
$settings["upload-file"]["enabled"] = $value>0;
break;
case CURLOPT_INFILE:
if ($value==="") $value = false;
if (is_resource($value)) {
// Ugh, this is a terrible hack. The CURL extension accepts a file handle, but
// the CURL binary obviously wants a filename. Since you can't derive a filename
// from a file handle, we have to make a copy of the file from the file handle,
// then pass the temporary filename to the CURL binary.
$tmpfilename = tempnam("/tmp","cif");
$fp = @fopen($tmpfilename,"w");
if (!$fp) {
trigger_error("CURL emulation library could not create a temporary file for CURLOPT_INFILE; upload aborted",E_USER_WARNING);
} else {
while (!feof($value)) {
$contents = fread($value,8192);
fwrite($fp,$contents);
}
fclose($fp);
// if a temporary file was previously created, unlink it
if ($settings["upload-file"]["value"] && file_exists($settings["upload-file"]["value"])) unlink($settings["upload-file"]["value"]);
// set the new upload-file filename
$settings["upload-file"]["value"] = $tmpfilename;
}
} else {
trigger_error("CURLOPT_INFILE must specify a valid file resource",E_USER_WARNING);
}
break;
case CURLOPT_MUTE:
// we're already mute, no?
break;
case CURLOPT_LOW_SPEED_LIMIT:
$settings["speed-limit"] = (int) $value;
break;
case CURLOPT_LOW_SPEED_TIME:
$settings["speed-time"] = (int) $value;
break;
case CURLOPT_RESUME_FROM:
$settings["continue-at"] = (int) $value;
break;
case CURLOPT_CAINFO:
if ($value==="") $value = false;
$settings["cacert"] = $value;
break;
case CURLOPT_SSLVERSION:
$value = (int) $value;
switch($value) {
case 2:
case 3:
unset($settings["sslv2"]);
unset($settings["sslv3"]);
$settings["sslv".$value] = true;
break;
}
break;
case CURLOPT_TIMECONDITION:
// untested - I'm lazy :)
if (!isset($settings["time-cond"]["enabled"])) $settings["time-cond"]["enabled"] = false;
if (!$settings["time-cond"]["value"]) $settings["time-cond"]["value"] = 1;
$settings["time-cond"]["value"] = abs($settings["time-cond"]["value"]);
if ($value==TIMECOND_ISUNMODSINCE) {
$settings["time-cond"]["value"] *= -1;
}
break;
case CURLOPT_TIMEVALUE:
// untested - I'm lazy :)
if ($settings["time-cond"]["value"]) {
$sign = $settings["time-cond"]["value"] / abs($settings["time-cond"]["value"]);
} else {
$sign = 1;
}
$settings["time-cond"]["value"] = (int) $value * $sign;
break;
case CURLOPT_FILE:
if (is_resource($value)) {
$opt["output_handle"] = $value;
} else {
trigger_error("CURLOPT_FILE must specify a valid file resource",E_USER_WARNING);
}
break;
case CURLOPT_WRITEHEADER:
if (is_resource($value)) {
$opt["header_handle"] = $value;
} else {
trigger_error("CURLOPT_WRITEHEADER must specify a valid file resource",E_USER_WARNING);
}
break;
case CURLOPT_HEADERFUNCTION:
$opt["header_function"] = $value;
break;
case CURLOPT_STDERR:
// not implemented for now - not really relevant
break;
case CURLOPT_CONNECTTIMEOUT:
$opt["connect-timeout"] = $value;
break;
// FTP stuff not implemented
case CURLOPT_QUOTE:
case CURLOPT_POSTQUOTE:
case CURLOPT_UPLOAD:
case CURLOPT_FTPLISTONLY:
case CURLOPT_FTPAPPEND:
case CURLOPT_FTPPORT:
// Other stuff not implemented
case CURLOPT_NETRC:
default:
trigger_error("CURL emulation does not implement CURL option "._curlopt_name($option),E_USER_WARNING);
break;
}
}
// Perform a CURL emulation session
function curl_exec($ch) {
$opt = &$GLOBALS["_CURLEXT_OPT"][$ch];
$url = $opt["url"];
$verbose = $opt["verbose"];
// ask commandline CURL to return its statistics at the end of its output
$opt["settings"]["write-out"] = "\n%{http_code}|%{time_total}|%{time_namelookup}|%{time_connect}|%{time_pretransfer}|%{time_starttransfer}|%{size_download}|%{size_upload}|%{size_header}|%{size_request}|%{speed_download}|%{speed_upload}|||||||%{content_type}|%{url_effective}";
$writeout_order = array(
CURLINFO_HTTP_CODE,
CURLINFO_TOTAL_TIME,
CURLINFO_NAMELOOKUP_TIME,
CURLINFO_CONNECT_TIME,
CURLINFO_PRETRANSFER_TIME,
CURLINFO_STARTTRANSFER_TIME,
CURLINFO_SIZE_DOWNLOAD,
CURLINFO_SIZE_UPLOAD,
CURLINFO_HEADER_SIZE,
CURLINFO_REQUEST_SIZE,
CURLINFO_SPEED_DOWNLOAD,
CURLINFO_SPEED_UPLOAD,
// the following 5 items are not provided by commandline CURL, and thus are left empty
CURLINFO_FILETIME,
CURLINFO_REDIRECT_TIME,
CURLINFO_SSL_VERIFYRESULT,
CURLINFO_CONTENT_LENGTH_DOWNLOAD,
CURLINFO_CONTENT_LENGTH_UPLOAD,
CURLINFO_REDIRECT_COUNT,
CURLINFO_CONTENT_TYPE,
CURLINFO_EFFECTIVE_URL,
);
// if the CURLOPT_NOBODY option was specified (to remove the body from the output),
// but an output file handle was set, we need to tell CURL to return the body so
// that we can write it to the output handle and strip it from the output
if ($opt["settings"]["head"] && $opt["output_handle"]) {
unset($opt["settings"]["head"]);
$strip_body = true;
}
// if the CURLOPT_HEADER option was NOT specified, but a header file handle was
// specified, we again need to tell CURL to return the headers so we can write
// them, then strip them from the output
if (!isset($opt["settings"]["include"]) && (isset($opt["header_handle"]) || isset($opt["header_function"]))) {
$opt["settings"]["include"] = true;
$strip_headers = true;
}
// build the CURL argument list
$arguments = "";
foreach ($opt["args"] as $k=>$arg) {
list($argname,$argval) = explode('=',$arg,2);
$arguments .= "--$argname ".escapeshellarg($argval)." ";
}
foreach ($opt["settings"] as $argname=>$argval) {
if (is_array($argval)) {
if (isset($argval["enabled"]) && !$argval["enabled"]) continue;
$argval = $argval["value"];
}
if ($argval===false) continue;
$arguments .= "--$argname ".(is_bool($argval)?"":escapeshellarg($argval)." ");
}
// build the CURL commandline and execute it
$cmd = CURL_PATH." ".$arguments." ".escapeshellarg($url);
if ($verbose) echo "libcurlemu: Executing: $cmd\n";
exec($cmd,$output,$ret);
if ($verbose) {
echo "libcurlemu: Result: ";
var_dump($output);
echo "libcurlemu: Exit code: $ret\n";
}
// check for errors
$opt["errno"] = $ret;
if ($ret) $opt["error"] = "CURL error #$ret";
// die if CURLOPT_FAILONERROR is set and the HTTP result code is greater than 300
if ($opt["fail_on_error"]) {
if (preg_match("/^HTTP\/1.[0-9]+ ([0-9]{3}) /",$output[0],$matches)) {
$resultcode = (int) $matches[1];
if ($resultcode>300) die;
} else {
die; // couldn't get result code!
}
}
// pull the statistics out from the output
$stats = explode('|',array_pop($output));
foreach ($writeout_order as $k=>$item) {
$opt["stats"][$item] = $stats[$k];
}
// build the response string
$output = implode("\r\n",$output);
// find the header end position if needed
if ($strip_headers || $strip_body || isset($opt["header_handle"]) || isset($opt["header_function"])) {
$headerpos = strpos($output,"\r\n\r\n");
while(preg_match("/HTTP\/1.[0-9]+ [0-9]{3} /",substr($output,$headerpos+4))){
$headerpos = strpos($output,"\r\n\r\n",$headerpos+4);
}
}
// if a file handle was provided for header output, extract the headers
// and write them to the handle
if (isset($opt["header_handle"])) {
$headers = substr($output,0,$headerpos);
fwrite($opt["header_handle"],$headers);
}
if (isset($opt["header_function"])) {
$headers = substr($output,0,$headerpos);
call_user_func($opt["header_function"],$ch,$headers);
}
// if the caller did not request headers in the output, strip them
if ($strip_headers) {
$output = substr($output,$headerpos+4);
}
// if the caller did not request the response body in the output, strip it
if ($strip_body) {
if ($strip_headers) {
$body = $output;
$output = "";
} else {
$body = substr($output,$headerpos+4);
$output = substr($output,0,$headerpos);
}
}
// if a file handle was provided for output, write the output to it
if (isset($opt["output_handle"])) {
fwrite($opt["output_handle"],$output);
// if the caller requested that the response be returned, return it
} elseif ($opt["return_transfer"]) {
return $output;
// otherwise, just echo the output to stdout
} else {
echo $output;
}
return true;
}
function curl_close($ch) {
$opt = &$GLOBALS["_CURLEXT_OPT"][$ch];
if ($opt["settings"]) {
$settings = &$opt["settings"];
// if the user used CURLOPT_INFILE to specify a file to upload, remove the
// temporary file created for the CURL binary
if ($settings["upload-file"]["value"] && file_exists($settings["upload-file"]["value"])) unlink($settings["upload-file"]["value"]);
}
unset($GLOBALS["_CURLEXT_OPT"][$ch]);
}
function curl_errno($ch) {
return (int) $GLOBALS["_CURLEXT_OPT"][$ch]["errno"];
}
function curl_error($ch) {
return $GLOBALS["_CURLEXT_OPT"][$ch]["error"];
}
function curl_getinfo($ch,$opt=NULL) {
if ($opt) {
return $GLOBALS["_CURLEXT_OPT"][$ch]["stats"][$opt];
} else {
$curlinfo_tags = array(
"url"=>CURLINFO_EFFECTIVE_URL,
"content_type"=>CURLINFO_CONTENT_TYPE,
"http_code"=>CURLINFO_HTTP_CODE,
"header_size"=>CURLINFO_HEADER_SIZE,
"request_size"=>CURLINFO_REQUEST_SIZE,
"filetime"=>CURLINFO_FILETIME,
"ssl_verify_result"=>CURLINFO_SSL_VERIFYRESULT,
"redirect_count"=>CURLINFO_REDIRECT_COUNT,
"total_time"=>CURLINFO_TOTAL_TIME,
"namelookup_time"=>CURLINFO_NAMELOOKUP_TIME,
"connect_time"=>CURLINFO_CONNECT_TIME,
"pretransfer_time"=>CURLINFO_PRETRANSFER_TIME,
"size_upload"=>CURLINFO_SIZE_UPLOAD,
"size_download"=>CURLINFO_SIZE_DOWNLOAD,
"speed_download"=>CURLINFO_SPEED_DOWNLOAD,
"speed_upload"=>CURLINFO_SPEED_UPLOAD,
"download_content_length"=>CURLINFO_CONTENT_LENGTH_DOWNLOAD,
"upload_content_length"=>CURLINFO_CONTENT_LENGTH_UPLOAD,
"starttransfer_time"=>CURLINFO_STARTTRANSFER_TIME,
"redirect_time"=>CURLINFO_REDIRECT_TIME
);
$res = array();
foreach ($curlinfo_tags as $tag=>$opt) {
$res[$tag] = $GLOBALS["_CURLEXT_OPT"][$ch]["stats"][$opt];
}
return $res;
}
}
function curl_version() {
return "libcurlemu/".CURLEXT_VERSION."-ext";
}
}
?>

View file

@ -0,0 +1,453 @@
<?php
/* CURL Extension Emulation Library (Native PHP)
* Copyright 2004-2007, Steve Blinch
* http://code.blitzaffe.com
* ============================================================================
*
* DESCRIPTION
*
* Provides a pure-PHP implementation of the PHP CURL extension, for use on
* systems which do not already have the CURL extension installed. It emulates
* all of the curl_* functions normally provided by the CURL extension itself,
* and uses an internal, native-PHP HTTP library to make requests.
*
* This library will automatically detect whether or not the "real" CURL
* extension is installed, and if so, it will not interfere. Thus, it can be
* used to ensure that, one way or another, the CURL functions are available
* for use.
*
* Note that this is only a *rough* emulation of CURL; it is not exact, and
* many of CURL's options are not implemented. For a more precise emulation of
* CURL, you may want to try our other libcurlexternal library which is based on
* the CURL console binary (and is virtually identical to the CURL extension).
*
*
* USAGE
*
* Please see the PHP documentation under the "CURL, Client URL Library
* Functions" section for information about using this library. Almost all of
* the documentation and examples in the PHP manual should work with this
* library.
*
*
* LICENSE
*
* This script is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later
* version.
*
* This script is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with this script; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
// if the real CURL PHP extension is installed, exit without doing anything;
// if libcurlemu is installed and providing a wrapper for the CURL binary,
// exit without doing anything
if (!extension_loaded("curl") && !function_exists("curl_init")) {
// if the CURL binary was not found, do one of the following:
// - if CURLNAT_MISSING_ABORT was defined, then exit without
// implementing the CURL functions
// - otherwise, raise a fatal error and halt the script
if (!class_exists("HTTPRetriever")) {
if (is_readable(dirname(__FILE__)."/class_HTTPRetriever.php")) {
define("HTTPR_NO_REDECLARE_CURL",true);
require_once(dirname(__FILE__)."/class_HTTPRetriever.php");
} else {
if (defined("CURLNAT_MISSING_ABORT") && CURLNAT_MISSING_ABORT) {
return;
} else {
trigger_error("CURL extension is not loaded, libcurlemu is not loaded, and the HTTPRetriever class is unavailable",E_USER_ERROR);
}
}
}
define("CURLNAT_VERSION","1.0.0");
define('CURLOPT_NOTHING',0);
define('CURLOPT_FILE',10001);
define('CURLOPT_URL',10002);
define('CURLOPT_PORT',3);
define('CURLOPT_PROXY',10004);
define('CURLOPT_USERPWD',10005);
define('CURLOPT_PROXYUSERPWD',10006);
define('CURLOPT_RANGE',10007);
define('CURLOPT_INFILE',10009);
define('CURLOPT_ERRORBUFFER',10010);
define('CURLOPT_WRITEFUNCTION',20011);
define('CURLOPT_READFUNCTION',20012);
define('CURLOPT_TIMEOUT',13);
define('CURLOPT_INFILESIZE',14);
define('CURLOPT_POSTFIELDS',10015);
define('CURLOPT_REFERER',10016);
define('CURLOPT_FTPPORT',10017);
define('CURLOPT_USERAGENT',10018);
define('CURLOPT_LOW_SPEED_LIMIT',19);
define('CURLOPT_LOW_SPEED_TIME',20);
define('CURLOPT_RESUME_FROM',21);
define('CURLOPT_COOKIE',10022);
define('CURLOPT_HTTPHEADER',10023);
define('CURLOPT_HTTPPOST',10024);
define('CURLOPT_SSLCERT',10025);
define('CURLOPT_SSLCERTPASSWD',10026);
define('CURLOPT_SSLKEYPASSWD',10026);
define('CURLOPT_CRLF',27);
define('CURLOPT_QUOTE',10028);
define('CURLOPT_WRITEHEADER',10029);
define('CURLOPT_COOKIEFILE',10031);
define('CURLOPT_SSLVERSION',32);
define('CURLOPT_TIMECONDITION',33);
define('CURLOPT_TIMEVALUE',34);
define('CURLOPT_HTTPREQUEST',10035);
define('CURLOPT_CUSTOMREQUEST',10036);
define('CURLOPT_STDERR',10037);
define('CURLOPT_POSTQUOTE',10039);
define('CURLOPT_WRITEINFO',10040);
define('CURLOPT_VERBOSE',41);
define('CURLOPT_HEADER',42);
define('CURLOPT_NOPROGRESS',43);
define('CURLOPT_NOBODY',44);
define('CURLOPT_FAILONERROR',45);
define('CURLOPT_UPLOAD',46);
define('CURLOPT_POST',47);
define('CURLOPT_FTPLISTONLY',48);
define('CURLOPT_FTPAPPEND',50);
define('CURLOPT_NETRC',51);
define('CURLOPT_FOLLOWLOCATION',52);
define('CURLOPT_FTPASCII',53);
define('CURLOPT_TRANSFERTEXT',53);
define('CURLOPT_PUT',54);
define('CURLOPT_MUTE',55);
define('CURLOPT_PROGRESSFUNCTION',20056);
define('CURLOPT_PROGRESSDATA',10057);
define('CURLOPT_AUTOREFERER',58);
define('CURLOPT_PROXYPORT',59);
define('CURLOPT_POSTFIELDSIZE',60);
define('CURLOPT_HTTPPROXYTUNNEL',61);
define('CURLOPT_INTERFACE',10062);
define('CURLOPT_KRB4LEVEL',10063);
define('CURLOPT_SSL_VERIFYPEER',64);
define('CURLOPT_CAINFO',10065);
define('CURLOPT_PASSWDFUNCTION',20066);
define('CURLOPT_PASSWDDATA',10067);
define('CURLOPT_MAXREDIRS',68);
define('CURLOPT_FILETIME',10069);
define('CURLOPT_TELNETOPTIONS',10070);
define('CURLOPT_MAXCONNECTS',71);
define('CURLOPT_CLOSEPOLICY',72);
define('CURLOPT_CLOSEFUNCTION',20073);
define('CURLOPT_FRESH_CONNECT',74);
define('CURLOPT_FORBID_REUSE',75);
define('CURLOPT_RANDOM_FILE',10076);
define('CURLOPT_EGDSOCKET',10077);
define('CURLOPT_CONNECTTIMEOUT',78);
define('CURLOPT_HEADERFUNCTION',20079);
define('CURLOPT_HTTPGET',80);
define('CURLOPT_SSL_VERIFYHOST',81);
define('CURLOPT_COOKIEJAR',10082);
define('CURLOPT_SSL_CIPHER_LIST',10083);
define('CURLOPT_HTTP_VERSION',84);
define('CURLOPT_FTP_USE_EPSV',85);
define('CURLOPT_SSLCERTTYPE',10086);
define('CURLOPT_SSLKEY',10087);
define('CURLOPT_SSLKEYTYPE',10088);
define('CURLOPT_SSLENGINE',10089);
define('CURLOPT_SSLENGINE_DEFAULT',90);
define('CURLOPT_DNS_USE_GLOBAL_CACHE',91);
define('CURLOPT_DNS_CACHE_TIMEOUT',92);
define('CURLOPT_PREQUOTE',10093);
define('CURLINFO_EFFECTIVE_URL',1);
define('CURLINFO_HTTP_CODE',2);
define('CURLINFO_FILETIME',14);
define('CURLINFO_TOTAL_TIME',3);
define('CURLINFO_NAMELOOKUP_TIME',4);
define('CURLINFO_CONNECT_TIME',5);
define('CURLINFO_PRETRANSFER_TIME',6);
define('CURLINFO_STARTTRANSFER_TIME',17);
define('CURLINFO_REDIRECT_TIME',19);
define('CURLINFO_REDIRECT_COUNT',20);
define('CURLINFO_SIZE_UPLOAD',7);
define('CURLINFO_SIZE_DOWNLOAD',8);
define('CURLINFO_SPEED_DOWNLOAD',9);
define('CURLINFO_SPEED_UPLOAD',10);
define('CURLINFO_HEADER_SIZE',11);
define('CURLINFO_REQUEST_SIZE',12);
define('CURLINFO_SSL_VERIFYRESULT',13);
define('CURLINFO_CONTENT_LENGTH_DOWNLOAD',15);
define('CURLINFO_CONTENT_LENGTH_UPLOAD',16);
define('CURLINFO_CONTENT_TYPE',18);
define("TIMECOND_ISUNMODSINCE",1);
define("TIMECOND_IFMODSINCE",2);
function _curlopt_name($curlopt) {
foreach (get_defined_constants() as $k=>$v) {
if ( (substr($k,0,8)=="CURLOPT_") && ($v==$curlopt)) return $k;
}
return false;
}
// Initialize a CURL emulation session
function curl_init() {
$i = $GLOBALS["_CURLNAT_OPT"]["index"]++;
$GLOBALS["_CURLNAT_OPT"][$i] = array();
$GLOBALS["_CURLNAT_OPT"][$i]["http"] = &new HTTPRetriever();
$GLOBALS["_CURLNAT_OPT"][$i]["include_body"] = true;
return $i;
}
// Set an option for a CURL emulation transfer
function curl_setopt($ch,$option,$value) {
$opt = &$GLOBALS["_CURLNAT_OPT"][$ch];
if (!$opt["args"]) $opt["args"] = array();
$args = &$opt["args"];
if (!$opt["settings"]) $opt["settings"] = array();
$settings = &$opt["settings"];
$http = &$opt["http"];
switch($option) {
case CURLOPT_URL:
$opt["url"] = $value;
break;
case CURLOPT_CUSTOMREQUEST:
$opt["method"] = $value;
break;
case CURLOPT_REFERER:
$http->headers["Referer"] = $value;
break;
case CURLOPT_NOBODY:
$opt["include_body"] = $value==0;
break;
case CURLOPT_FAILONERROR:
$opt["fail_on_error"] = $value>0;
break;
case CURLOPT_USERAGENT:
$http->headers["User-Agent"] = $value;
break;
case CURLOPT_HEADER:
$opt["include_headers"] = $value>0;
break;
case CURLOPT_RETURNTRANSFER:
$opt["return_transfer"] = $value>0;
break;
case CURLOPT_TIMEOUT:
$opt["max-time"] = (int) $value;
break;
case CURLOPT_HTTPHEADER:
reset($value);
foreach ($value as $k=>$header) {
list($headername,$headervalue) = explode(":",$header);
$http->headers[$headername] = ltrim($headervalue);
}
break;
case CURLOPT_POST:
$opt["post"] = $value>0;
break;
case CURLOPT_POSTFIELDS:
$opt["postdata"] = $value;
break;
case CURLOPT_MUTE:
// we're already mute, no?
break;
case CURLOPT_FILE:
if (is_resource($value)) {
$opt["output_handle"] = $value;
} else {
trigger_error("CURLOPT_FILE must specify a valid file resource",E_USER_WARNING);
}
break;
case CURLOPT_WRITEHEADER:
if (is_resource($value)) {
$opt["header_handle"] = $value;
} else {
trigger_error("CURLOPT_WRITEHEADER must specify a valid file resource",E_USER_WARNING);
}
break;
case CURLOPT_STDERR:
// not implemented for now - not really relevant
break;
case CURLOPT_SSL_VERIFYPEER:
case CURLOPT_SSL_VERIFYHOST:
// these are automatically disabled using ssl:// anyway
break;
case CURLOPT_USERPWD:
list($curl_user,$curl_pass) = explode(':',$value,2);
$http->auth_username = $curl_user;
$http->auth_password = $curl_pass;
break;
// Important stuff not implemented (as it's not yet supported by HTTPRetriever)
case CURLOPT_PUT:
case CURLOPT_INFILE:
case CURLOPT_FOLLOWLOCATION:
case CURLOPT_PROXYUSERPWD:
case CURLOPT_COOKIE:
case CURLOPT_COOKIEFILE:
case CURLOPT_PROXY:
case CURLOPT_RANGE:
case CURLOPT_RESUME_FROM:
// Things that cannot (reasonably) be implemented here
case CURLOPT_LOW_SPEED_LIMIT:
case CURLOPT_LOW_SPEED_TIME:
case CURLOPT_KRB4LEVEL:
case CURLOPT_SSLCERT:
case CURLOPT_SSLCERTPASSWD:
case CURLOPT_SSLVERSION:
case CURLOPT_INTERFACE:
case CURLOPT_CAINFO:
case CURLOPT_TIMECONDITION:
case CURLOPT_TIMEVALUE:
// FTP stuff not implemented
case CURLOPT_QUOTE:
case CURLOPT_POSTQUOTE:
case CURLOPT_UPLOAD:
case CURLOPT_FTPLISTONLY:
case CURLOPT_FTPAPPEND:
case CURLOPT_FTPPORT:
// Other stuff not implemented
case CURLOPT_VERBOSE:
case CURLOPT_NETRC:
default:
trigger_error("CURL emulation does not implement CURL option "._curlopt_name($option),E_USER_WARNING);
break;
}
}
// Perform a CURL emulation session
function curl_exec($ch) {
$opt = &$GLOBALS["_CURLNAT_OPT"][$ch];
$url = $opt["url"];
$http = &$opt["http"];
$http->disable_curl = true; // avoid problems with recursion, since we *ARE* CURL
// set time limits if requested
if ($opt["max-time"]) {
$http->connect_timeout = $opt["max-time"];
$http->max_time = $opt["max-time"];
}
if ($opt["post"]) {
$res = $http->post($url,$opt["postdata"]);
} elseif ($opt["method"]) {
$res = $http->custom($opt["method"],$url,$opt["postdata"]);
} else {
$res = $http->get($url);
}
// check for errors
$opt["errno"] = (!$res && $http->error) ? 1 : 0;
if ($opt["errno"]) $opt["error"] = $http->error;
// die if CURLOPT_FAILONERROR is set and the HTTP result code is greater than 300
if ($opt["fail_on_error"]) {
if ($http->result_code>300) die;
}
$opt["stats"] = $http->stats;
$headers = "";
foreach ($http->response_headers as $k=>$v) {
$headers .= "$k: $v\r\n";
}
// if a file handle was provided for header output, extract the headers
// and write them to the handle
if (isset($opt["header_handle"])) {
fwrite($opt["header_handle"],$headers);
}
$output = ($opt["include_headers"] ? $headers."\r\n" : "") . ($opt["include_body"] ? $http->response : "");
// if a file handle was provided for output, write the output to it
if (isset($opt["output_handle"])) {
fwrite($opt["output_handle"],$output);
// if the caller requested that the response be returned, return it
} elseif ($opt["return_transfer"]) {
return $output;
// otherwise, just echo the output to stdout
} else {
echo $output;
}
return true;
}
function curl_close($ch) {
$opt = &$GLOBALS["_CURLNAT_OPT"][$ch];
if ($opt["settings"]) {
$settings = &$opt["settings"];
// if the user used CURLOPT_INFILE to specify a file to upload, remove the
// temporary file created for the CURL binary
if ($settings["upload-file"]["value"] && file_exists($settings["upload-file"]["value"])) unlink($settings["upload-file"]["value"]);
}
unset($GLOBALS["_CURLNAT_OPT"][$ch]);
}
function curl_errno($ch) {
return (int) $GLOBALS["_CURLNAT_OPT"][$ch]["errno"];
}
function curl_error($ch) {
return $GLOBALS["_CURLNAT_OPT"][$ch]["error"];
}
function curl_getinfo($ch,$opt=NULL) {
if ($opt) {
$curlinfo_tags = array(
CURLINFO_EFFECTIVE_URL=>"url",
CURLINFO_CONTENT_TYPE=>"content_type",
CURLINFO_HTTP_CODE=>"http_code",
CURLINFO_HEADER_SIZE=>"header_size",
CURLINFO_REQUEST_SIZE=>"request_size",
CURLINFO_FILETIME=>"filetime",
CURLINFO_SSL_VERIFYRESULT=>"ssl_verify_result",
CURLINFO_REDIRECT_COUNT=>"redirect_count",
CURLINFO_TOTAL_TIME=>"total_time",
CURLINFO_NAMELOOKUP_TIME=>"namelookup_time",
CURLINFO_CONNECT_TIME=>"connect_time",
CURLINFO_PRETRANSFER_TIME=>"pretransfer_time",
CURLINFO_SIZE_UPLOAD=>"size_upload",
CURLINFO_SIZE_DOWNLOAD=>"size_download",
CURLINFO_SPEED_DOWNLOAD=>"speed_download",
CURLINFO_SPEED_UPLOAD=>"speed_upload",
CURLINFO_CONTENT_LENGTH_DOWNLOAD=>"download_content_length",
CURLINFO_CONTENT_LENGTH_UPLOAD=>"upload_content_length",
CURLINFO_STARTTRANSFER_TIME=>"starttransfer_time",
CURLINFO_REDIRECT_TIME=>"redirect_time"
);
$key = $curlinfo_tags[$opt];
return $GLOBALS["_CURLNAT_OPT"][$ch]["stats"][$key];
} else {
return $GLOBALS["_CURLNAT_OPT"][$ch]["stats"];
}
}
function curl_version() {
return "libcurlemu/".CURLNAT_VERSION."-nat";
}
}
?>

8
orcinus/mustache/.gitattributes vendored Normal file
View file

@ -0,0 +1,8 @@
/test export-ignore
/CONTRIBUTING.md export-ignore
/.php_cs export-ignore
/phpunit.xml.dist export-ignore
/.travis.yml export-ignore
/.styleci.yml export-ignore
/.gitmodules export-ignore
/.gitignore export-ignore

21
orcinus/mustache/LICENSE Normal file
View file

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2010-2015 Justin Hileman
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -0,0 +1,73 @@
Mustache.php
============
A [Mustache](https://mustache.github.io/) implementation in PHP.
[![Package version](http://img.shields.io/packagist/v/mustache/mustache.svg?style=flat-square)](https://packagist.org/packages/mustache/mustache)
[![Build status](http://img.shields.io/travis/bobthecow/mustache.php/dev.svg?style=flat-square)](http://travis-ci.org/bobthecow/mustache.php)
[![StyleCI](https://styleci.io/repos/569670/shield)](https://styleci.io/repos/569670)
[![Monthly downloads](http://img.shields.io/packagist/dm/mustache/mustache.svg?style=flat-square)](https://packagist.org/packages/mustache/mustache)
Usage
-----
A quick example:
```php
<?php
$m = new Mustache_Engine(array('entity_flags' => ENT_QUOTES));
echo $m->render('Hello {{planet}}', array('planet' => 'World!')); // "Hello World!"
```
And a more in-depth example -- this is the canonical Mustache template:
```html+jinja
Hello {{name}}
You have just won {{value}} dollars!
{{#in_ca}}
Well, {{taxed_value}} dollars, after taxes.
{{/in_ca}}
```
Create a view "context" object -- which could also be an associative array, but those don't do functions quite as well:
```php
<?php
class Chris {
public $name = "Chris";
public $value = 10000;
public function taxed_value() {
return $this->value - ($this->value * 0.4);
}
public $in_ca = true;
}
```
And render it:
```php
<?php
$m = new Mustache_Engine(array('entity_flags' => ENT_QUOTES));
$chris = new Chris;
echo $m->render($template, $chris);
```
*Note:* we recommend using `ENT_QUOTES` as a default of [entity_flags](https://github.com/bobthecow/mustache.php/wiki#entity_flags) to decrease the chance of Cross-site scripting vulnerability.
And That's Not All!
-------------------
Read [the Mustache.php documentation](https://github.com/bobthecow/mustache.php/wiki/Home) for more information.
See Also
--------
* [mustache(5)](http://mustache.github.io/mustache.5.html) man page.
* [Readme for the Ruby Mustache implementation](http://github.com/defunkt/mustache/blob/master/README.md).

View file

@ -0,0 +1,178 @@
#!/usr/bin/env php
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2015 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A shell script to create a single-file class cache of the entire Mustache
* library.
*
* $ bin/build_bootstrap.php
*
* ... will create a `mustache.php` bootstrap file in the project directory,
* containing all Mustache library classes. This file can then be included in
* your project, rather than requiring the Mustache Autoloader.
*/
$baseDir = realpath(dirname(__FILE__) . '/..');
require $baseDir . '/src/Mustache/Autoloader.php';
Mustache_Autoloader::register();
// delete the old file
$file = $baseDir . '/mustache.php';
if (file_exists($file)) {
unlink($file);
}
// and load the new one
SymfonyClassCollectionLoader::load(array(
'Mustache_Engine',
'Mustache_Cache',
'Mustache_Cache_AbstractCache',
'Mustache_Cache_FilesystemCache',
'Mustache_Cache_NoopCache',
'Mustache_Compiler',
'Mustache_Context',
'Mustache_Exception',
'Mustache_Exception_InvalidArgumentException',
'Mustache_Exception_LogicException',
'Mustache_Exception_RuntimeException',
'Mustache_Exception_SyntaxException',
'Mustache_Exception_UnknownFilterException',
'Mustache_Exception_UnknownHelperException',
'Mustache_Exception_UnknownTemplateException',
'Mustache_HelperCollection',
'Mustache_LambdaHelper',
'Mustache_Loader',
'Mustache_Loader_ArrayLoader',
'Mustache_Loader_CascadingLoader',
'Mustache_Loader_FilesystemLoader',
'Mustache_Loader_InlineLoader',
'Mustache_Loader_MutableLoader',
'Mustache_Loader_StringLoader',
'Mustache_Logger',
'Mustache_Logger_AbstractLogger',
'Mustache_Logger_StreamLogger',
'Mustache_Parser',
'Mustache_Template',
'Mustache_Tokenizer',
), dirname($file), basename($file, '.php'));
/**
* SymfonyClassCollectionLoader.
*
* Based heavily on the Symfony ClassCollectionLoader component, with all
* the unnecessary bits removed.
*
* @license http://www.opensource.org/licenses/MIT
* @author Fabien Potencier <fabien@symfony.com>
*/
class SymfonyClassCollectionLoader
{
private static $loaded;
const HEADER = <<<'EOS'
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-%d Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
EOS;
/**
* Loads a list of classes and caches them in one big file.
*
* @param array $classes An array of classes to load
* @param string $cacheDir A cache directory
* @param string $name The cache name prefix
* @param string $extension File extension of the resulting file
*
* @throws InvalidArgumentException When class can't be loaded
*/
public static function load(array $classes, $cacheDir, $name, $extension = '.php')
{
// each $name can only be loaded once per PHP process
if (isset(self::$loaded[$name])) {
return;
}
self::$loaded[$name] = true;
$content = '';
foreach ($classes as $class) {
if (!class_exists($class) && !interface_exists($class) && (!function_exists('trait_exists') || !trait_exists($class))) {
throw new InvalidArgumentException(sprintf('Unable to load class "%s"', $class));
}
$r = new ReflectionClass($class);
$content .= preg_replace(array('/^\s*<\?php/', '/\?>\s*$/'), '', file_get_contents($r->getFileName()));
}
$cache = $cacheDir . '/' . $name . $extension;
$header = sprintf(self::HEADER, strftime('%Y'));
self::writeCacheFile($cache, $header . substr(self::stripComments('<?php ' . $content), 5));
}
/**
* Writes a cache file.
*
* @param string $file Filename
* @param string $content Temporary file content
*
* @throws RuntimeException when a cache file cannot be written
*/
private static function writeCacheFile($file, $content)
{
$tmpFile = tempnam(dirname($file), basename($file));
if (false !== @file_put_contents($tmpFile, $content) && @rename($tmpFile, $file)) {
chmod($file, 0666 & ~umask());
return;
}
throw new RuntimeException(sprintf('Failed to write cache file "%s".', $file));
}
/**
* Removes comments from a PHP source string.
*
* We don't use the PHP php_strip_whitespace() function
* as we want the content to be readable and well-formatted.
*
* @param string $source A PHP string
*
* @return string The PHP string with the comments removed
*/
private static function stripComments($source)
{
if (!function_exists('token_get_all')) {
return $source;
}
$output = '';
foreach (token_get_all($source) as $token) {
if (is_string($token)) {
$output .= $token;
} elseif (!in_array($token[0], array(T_COMMENT, T_DOC_COMMENT))) {
$output .= $token[1];
}
}
// replace multiple new lines with a single newline
$output = preg_replace(array('/\s+$/Sm', '/\n+/S'), "\n", $output);
return $output;
}
}

View file

@ -0,0 +1,25 @@
{
"name": "mustache/mustache",
"description": "A Mustache implementation in PHP.",
"keywords": ["templating", "mustache"],
"homepage": "https://github.com/bobthecow/mustache.php",
"type": "library",
"license": "MIT",
"authors": [
{
"name": "Justin Hileman",
"email": "justin@justinhileman.info",
"homepage": "http://justinhileman.com"
}
],
"require": {
"php": ">=5.2.4"
},
"require-dev": {
"phpunit/phpunit": "~3.7|~4.0|~5.0",
"friendsofphp/php-cs-fixer": "~1.11"
},
"autoload": {
"psr-0": { "Mustache": "src/" }
}
}

View file

@ -0,0 +1,88 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache class autoloader.
*/
class Mustache_Autoloader
{
private $baseDir;
/**
* An array where the key is the baseDir and the key is an instance of this
* class.
*
* @var array
*/
private static $instances;
/**
* Autoloader constructor.
*
* @param string $baseDir Mustache library base directory (default: dirname(__FILE__).'/..')
*/
public function __construct($baseDir = null)
{
if ($baseDir === null) {
$baseDir = dirname(__FILE__) . '/..';
}
// realpath doesn't always work, for example, with stream URIs
$realDir = realpath($baseDir);
if (is_dir($realDir)) {
$this->baseDir = $realDir;
} else {
$this->baseDir = $baseDir;
}
}
/**
* Register a new instance as an SPL autoloader.
*
* @param string $baseDir Mustache library base directory (default: dirname(__FILE__).'/..')
*
* @return Mustache_Autoloader Registered Autoloader instance
*/
public static function register($baseDir = null)
{
$key = $baseDir ? $baseDir : 0;
if (!isset(self::$instances[$key])) {
self::$instances[$key] = new self($baseDir);
}
$loader = self::$instances[$key];
spl_autoload_register(array($loader, 'autoload'));
return $loader;
}
/**
* Autoload Mustache classes.
*
* @param string $class
*/
public function autoload($class)
{
if ($class[0] === '\\') {
$class = substr($class, 1);
}
if (strpos($class, 'Mustache') !== 0) {
return;
}
$file = sprintf('%s/%s.php', $this->baseDir, str_replace('_', '/', $class));
if (is_file($file)) {
require $file;
}
}
}

View file

@ -0,0 +1,43 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Cache interface.
*
* Interface for caching and loading Mustache_Template classes
* generated by the Mustache_Compiler.
*/
interface Mustache_Cache
{
/**
* Load a compiled Mustache_Template class from cache.
*
* @param string $key
*
* @return bool indicates successfully class load
*/
public function load($key);
/**
* Cache and load a compiled Mustache_Template class.
*
* @param string $key
* @param string $value
*/
public function cache($key, $value);
/**
* Set a logger instance.
*
* @param Mustache_Logger|Psr\Log\LoggerInterface $logger
*/
public function setLogger($logger = null);
}

View file

@ -0,0 +1,60 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Abstract Mustache Cache class.
*
* Provides logging support to child implementations.
*
* @abstract
*/
abstract class Mustache_Cache_AbstractCache implements Mustache_Cache
{
private $logger = null;
/**
* Get the current logger instance.
*
* @return Mustache_Logger|Psr\Log\LoggerInterface
*/
public function getLogger()
{
return $this->logger;
}
/**
* Set a logger instance.
*
* @param Mustache_Logger|Psr\Log\LoggerInterface $logger
*/
public function setLogger($logger = null)
{
if ($logger !== null && !($logger instanceof Mustache_Logger || is_a($logger, 'Psr\\Log\\LoggerInterface'))) {
throw new Mustache_Exception_InvalidArgumentException('Expected an instance of Mustache_Logger or Psr\\Log\\LoggerInterface.');
}
$this->logger = $logger;
}
/**
* Add a log record if logging is enabled.
*
* @param string $level The logging level
* @param string $message The log message
* @param array $context The log context
*/
protected function log($level, $message, array $context = array())
{
if (isset($this->logger)) {
$this->logger->log($level, $message, $context);
}
}
}

View file

@ -0,0 +1,161 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Cache filesystem implementation.
*
* A FilesystemCache instance caches Mustache Template classes from the filesystem by name:
*
* $cache = new Mustache_Cache_FilesystemCache(dirname(__FILE__).'/cache');
* $cache->cache($className, $compiledSource);
*
* The FilesystemCache benefits from any opcode caching that may be setup in your environment. So do that, k?
*/
class Mustache_Cache_FilesystemCache extends Mustache_Cache_AbstractCache
{
private $baseDir;
private $fileMode;
/**
* Filesystem cache constructor.
*
* @param string $baseDir Directory for compiled templates
* @param int $fileMode Override default permissions for cache files. Defaults to using the system-defined umask
*/
public function __construct($baseDir, $fileMode = null)
{
$this->baseDir = $baseDir;
$this->fileMode = $fileMode;
}
/**
* Load the class from cache using `require_once`.
*
* @param string $key
*
* @return bool
*/
public function load($key)
{
$fileName = $this->getCacheFilename($key);
if (!is_file($fileName)) {
return false;
}
require_once $fileName;
return true;
}
/**
* Cache and load the compiled class.
*
* @param string $key
* @param string $value
*/
public function cache($key, $value)
{
$fileName = $this->getCacheFilename($key);
$this->log(
Mustache_Logger::DEBUG,
'Writing to template cache: "{fileName}"',
array('fileName' => $fileName)
);
$this->writeFile($fileName, $value);
$this->load($key);
}
/**
* Build the cache filename.
* Subclasses should override for custom cache directory structures.
*
* @param string $name
*
* @return string
*/
protected function getCacheFilename($name)
{
return sprintf('%s/%s.php', $this->baseDir, $name);
}
/**
* Create cache directory.
*
* @throws Mustache_Exception_RuntimeException If unable to create directory
*
* @param string $fileName
*
* @return string
*/
private function buildDirectoryForFilename($fileName)
{
$dirName = dirname($fileName);
if (!is_dir($dirName)) {
$this->log(
Mustache_Logger::INFO,
'Creating Mustache template cache directory: "{dirName}"',
array('dirName' => $dirName)
);
@mkdir($dirName, 0777, true);
// @codeCoverageIgnoreStart
if (!is_dir($dirName)) {
throw new Mustache_Exception_RuntimeException(sprintf('Failed to create cache directory "%s".', $dirName));
}
// @codeCoverageIgnoreEnd
}
return $dirName;
}
/**
* Write cache file.
*
* @throws Mustache_Exception_RuntimeException If unable to write file
*
* @param string $fileName
* @param string $value
*/
private function writeFile($fileName, $value)
{
$dirName = $this->buildDirectoryForFilename($fileName);
$this->log(
Mustache_Logger::DEBUG,
'Caching compiled template to "{fileName}"',
array('fileName' => $fileName)
);
$tempFile = tempnam($dirName, basename($fileName));
if (false !== @file_put_contents($tempFile, $value)) {
if (@rename($tempFile, $fileName)) {
$mode = isset($this->fileMode) ? $this->fileMode : (0666 & ~umask());
@chmod($fileName, $mode);
return;
}
// @codeCoverageIgnoreStart
$this->log(
Mustache_Logger::ERROR,
'Unable to rename Mustache temp cache file: "{tempName}" -> "{fileName}"',
array('tempName' => $tempFile, 'fileName' => $fileName)
);
// @codeCoverageIgnoreEnd
}
// @codeCoverageIgnoreStart
throw new Mustache_Exception_RuntimeException(sprintf('Failed to write cache file "%s".', $fileName));
// @codeCoverageIgnoreEnd
}
}

View file

@ -0,0 +1,47 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Cache in-memory implementation.
*
* The in-memory cache is used for uncached lambda section templates. It's also useful during development, but is not
* recommended for production use.
*/
class Mustache_Cache_NoopCache extends Mustache_Cache_AbstractCache
{
/**
* Loads nothing. Move along.
*
* @param string $key
*
* @return bool
*/
public function load($key)
{
return false;
}
/**
* Loads the compiled Mustache Template class without caching.
*
* @param string $key
* @param string $value
*/
public function cache($key, $value)
{
$this->log(
Mustache_Logger::WARNING,
'Template cache disabled, evaluating "{className}" class at runtime',
array('className' => $key)
);
eval('?>' . $value);
}
}

View file

@ -0,0 +1,689 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Compiler class.
*
* This class is responsible for turning a Mustache token parse tree into normal PHP source code.
*/
class Mustache_Compiler
{
private $pragmas;
private $defaultPragmas = array();
private $sections;
private $blocks;
private $source;
private $indentNextLine;
private $customEscape;
private $entityFlags;
private $charset;
private $strictCallables;
/**
* Compile a Mustache token parse tree into PHP source code.
*
* @param string $source Mustache Template source code
* @param string $tree Parse tree of Mustache tokens
* @param string $name Mustache Template class name
* @param bool $customEscape (default: false)
* @param string $charset (default: 'UTF-8')
* @param bool $strictCallables (default: false)
* @param int $entityFlags (default: ENT_COMPAT)
*
* @return string Generated PHP source code
*/
public function compile($source, array $tree, $name, $customEscape = false, $charset = 'UTF-8', $strictCallables = false, $entityFlags = ENT_COMPAT)
{
$this->pragmas = $this->defaultPragmas;
$this->sections = array();
$this->blocks = array();
$this->source = $source;
$this->indentNextLine = true;
$this->customEscape = $customEscape;
$this->entityFlags = $entityFlags;
$this->charset = $charset;
$this->strictCallables = $strictCallables;
return $this->writeCode($tree, $name);
}
/**
* Enable pragmas across all templates, regardless of the presence of pragma
* tags in the individual templates.
*
* @internal Users should set global pragmas in Mustache_Engine, not here :)
*
* @param string[] $pragmas
*/
public function setPragmas(array $pragmas)
{
$this->pragmas = array();
foreach ($pragmas as $pragma) {
$this->pragmas[$pragma] = true;
}
$this->defaultPragmas = $this->pragmas;
}
/**
* Helper function for walking the Mustache token parse tree.
*
* @throws Mustache_Exception_SyntaxException upon encountering unknown token types
*
* @param array $tree Parse tree of Mustache tokens
* @param int $level (default: 0)
*
* @return string Generated PHP source code
*/
private function walk(array $tree, $level = 0)
{
$code = '';
$level++;
foreach ($tree as $node) {
switch ($node[Mustache_Tokenizer::TYPE]) {
case Mustache_Tokenizer::T_PRAGMA:
$this->pragmas[$node[Mustache_Tokenizer::NAME]] = true;
break;
case Mustache_Tokenizer::T_SECTION:
$code .= $this->section(
$node[Mustache_Tokenizer::NODES],
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::FILTERS]) ? $node[Mustache_Tokenizer::FILTERS] : array(),
$node[Mustache_Tokenizer::INDEX],
$node[Mustache_Tokenizer::END],
$node[Mustache_Tokenizer::OTAG],
$node[Mustache_Tokenizer::CTAG],
$level
);
break;
case Mustache_Tokenizer::T_INVERTED:
$code .= $this->invertedSection(
$node[Mustache_Tokenizer::NODES],
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::FILTERS]) ? $node[Mustache_Tokenizer::FILTERS] : array(),
$level
);
break;
case Mustache_Tokenizer::T_PARTIAL:
$code .= $this->partial(
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::INDENT]) ? $node[Mustache_Tokenizer::INDENT] : '',
$level
);
break;
case Mustache_Tokenizer::T_PARENT:
$code .= $this->parent(
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::INDENT]) ? $node[Mustache_Tokenizer::INDENT] : '',
$node[Mustache_Tokenizer::NODES],
$level
);
break;
case Mustache_Tokenizer::T_BLOCK_ARG:
$code .= $this->blockArg(
$node[Mustache_Tokenizer::NODES],
$node[Mustache_Tokenizer::NAME],
$node[Mustache_Tokenizer::INDEX],
$node[Mustache_Tokenizer::END],
$node[Mustache_Tokenizer::OTAG],
$node[Mustache_Tokenizer::CTAG],
$level
);
break;
case Mustache_Tokenizer::T_BLOCK_VAR:
$code .= $this->blockVar(
$node[Mustache_Tokenizer::NODES],
$node[Mustache_Tokenizer::NAME],
$node[Mustache_Tokenizer::INDEX],
$node[Mustache_Tokenizer::END],
$node[Mustache_Tokenizer::OTAG],
$node[Mustache_Tokenizer::CTAG],
$level
);
break;
case Mustache_Tokenizer::T_COMMENT:
break;
case Mustache_Tokenizer::T_ESCAPED:
case Mustache_Tokenizer::T_UNESCAPED:
case Mustache_Tokenizer::T_UNESCAPED_2:
$code .= $this->variable(
$node[Mustache_Tokenizer::NAME],
isset($node[Mustache_Tokenizer::FILTERS]) ? $node[Mustache_Tokenizer::FILTERS] : array(),
$node[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_ESCAPED,
$level
);
break;
case Mustache_Tokenizer::T_TEXT:
$code .= $this->text($node[Mustache_Tokenizer::VALUE], $level);
break;
default:
throw new Mustache_Exception_SyntaxException(sprintf('Unknown token type: %s', $node[Mustache_Tokenizer::TYPE]), $node);
}
}
return $code;
}
const KLASS = '<?php
class %s extends Mustache_Template
{
private $lambdaHelper;%s
public function renderInternal(Mustache_Context $context, $indent = \'\')
{
$this->lambdaHelper = new Mustache_LambdaHelper($this->mustache, $context);
$buffer = \'\';
%s
return $buffer;
}
%s
%s
}';
const KLASS_NO_LAMBDAS = '<?php
class %s extends Mustache_Template
{%s
public function renderInternal(Mustache_Context $context, $indent = \'\')
{
$buffer = \'\';
%s
return $buffer;
}
}';
const STRICT_CALLABLE = 'protected $strictCallables = true;';
/**
* Generate Mustache Template class PHP source.
*
* @param array $tree Parse tree of Mustache tokens
* @param string $name Mustache Template class name
*
* @return string Generated PHP source code
*/
private function writeCode($tree, $name)
{
$code = $this->walk($tree);
$sections = implode("\n", $this->sections);
$blocks = implode("\n", $this->blocks);
$klass = empty($this->sections) && empty($this->blocks) ? self::KLASS_NO_LAMBDAS : self::KLASS;
$callable = $this->strictCallables ? $this->prepare(self::STRICT_CALLABLE) : '';
return sprintf($this->prepare($klass, 0, false, true), $name, $callable, $code, $sections, $blocks);
}
const BLOCK_VAR = '
$blockFunction = $context->findInBlock(%s);
if (is_callable($blockFunction)) {
$buffer .= call_user_func($blockFunction, $context);
%s}
';
const BLOCK_VAR_ELSE = '} else {%s';
/**
* Generate Mustache Template inheritance block variable PHP source.
*
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param int $start Section start offset
* @param int $end Section end offset
* @param string $otag Current Mustache opening tag
* @param string $ctag Current Mustache closing tag
* @param int $level
*
* @return string Generated PHP source code
*/
private function blockVar($nodes, $id, $start, $end, $otag, $ctag, $level)
{
$id = var_export($id, true);
$else = $this->walk($nodes, $level);
if ($else !== '') {
$else = sprintf($this->prepare(self::BLOCK_VAR_ELSE, $level + 1, false, true), $else);
}
return sprintf($this->prepare(self::BLOCK_VAR, $level), $id, $else);
}
const BLOCK_ARG = '%s => array($this, \'block%s\'),';
/**
* Generate Mustache Template inheritance block argument PHP source.
*
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param int $start Section start offset
* @param int $end Section end offset
* @param string $otag Current Mustache opening tag
* @param string $ctag Current Mustache closing tag
* @param int $level
*
* @return string Generated PHP source code
*/
private function blockArg($nodes, $id, $start, $end, $otag, $ctag, $level)
{
$key = $this->block($nodes);
$id = var_export($id, true);
return sprintf($this->prepare(self::BLOCK_ARG, $level), $id, $key);
}
const BLOCK_FUNCTION = '
public function block%s($context)
{
$indent = $buffer = \'\';%s
return $buffer;
}
';
/**
* Generate Mustache Template inheritance block function PHP source.
*
* @param array $nodes Array of child tokens
*
* @return string key of new block function
*/
private function block($nodes)
{
$code = $this->walk($nodes, 0);
$key = ucfirst(md5($code));
if (!isset($this->blocks[$key])) {
$this->blocks[$key] = sprintf($this->prepare(self::BLOCK_FUNCTION, 0), $key, $code);
}
return $key;
}
const SECTION_CALL = '
$value = $context->%s(%s);%s
$buffer .= $this->section%s($context, $indent, $value);
';
const SECTION = '
private function section%s(Mustache_Context $context, $indent, $value)
{
$buffer = \'\';
if (%s) {
$source = %s;
$result = (string) call_user_func($value, $source, %s);
if (strpos($result, \'{{\') === false) {
$buffer .= $result;
} else {
$buffer .= $this->mustache
->loadLambda($result%s)
->renderInternal($context);
}
} elseif (!empty($value)) {
$values = $this->isIterable($value) ? $value : array($value);
foreach ($values as $value) {
$context->push($value);
%s
$context->pop();
}
}
return $buffer;
}
';
/**
* Generate Mustache Template section PHP source.
*
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param string[] $filters Array of filters
* @param int $start Section start offset
* @param int $end Section end offset
* @param string $otag Current Mustache opening tag
* @param string $ctag Current Mustache closing tag
* @param int $level
*
* @return string Generated section PHP source code
*/
private function section($nodes, $id, $filters, $start, $end, $otag, $ctag, $level)
{
$source = var_export(substr($this->source, $start, $end - $start), true);
$callable = $this->getCallable();
if ($otag !== '{{' || $ctag !== '}}') {
$delimTag = var_export(sprintf('{{= %s %s =}}', $otag, $ctag), true);
$helper = sprintf('$this->lambdaHelper->withDelimiters(%s)', $delimTag);
$delims = ', ' . $delimTag;
} else {
$helper = '$this->lambdaHelper';
$delims = '';
}
$key = ucfirst(md5($delims . "\n" . $source));
if (!isset($this->sections[$key])) {
$this->sections[$key] = sprintf($this->prepare(self::SECTION), $key, $callable, $source, $helper, $delims, $this->walk($nodes, 2));
}
$method = $this->getFindMethod($id);
$id = var_export($id, true);
$filters = $this->getFilters($filters, $level);
return sprintf($this->prepare(self::SECTION_CALL, $level), $method, $id, $filters, $key);
}
const INVERTED_SECTION = '
$value = $context->%s(%s);%s
if (empty($value)) {
%s
}
';
/**
* Generate Mustache Template inverted section PHP source.
*
* @param array $nodes Array of child tokens
* @param string $id Section name
* @param string[] $filters Array of filters
* @param int $level
*
* @return string Generated inverted section PHP source code
*/
private function invertedSection($nodes, $id, $filters, $level)
{
$method = $this->getFindMethod($id);
$id = var_export($id, true);
$filters = $this->getFilters($filters, $level);
return sprintf($this->prepare(self::INVERTED_SECTION, $level), $method, $id, $filters, $this->walk($nodes, $level));
}
const PARTIAL_INDENT = ', $indent . %s';
const PARTIAL = '
if ($partial = $this->mustache->loadPartial(%s)) {
$buffer .= $partial->renderInternal($context%s);
}
';
/**
* Generate Mustache Template partial call PHP source.
*
* @param string $id Partial name
* @param string $indent Whitespace indent to apply to partial
* @param int $level
*
* @return string Generated partial call PHP source code
*/
private function partial($id, $indent, $level)
{
if ($indent !== '') {
$indentParam = sprintf(self::PARTIAL_INDENT, var_export($indent, true));
} else {
$indentParam = '';
}
return sprintf(
$this->prepare(self::PARTIAL, $level),
var_export($id, true),
$indentParam
);
}
const PARENT = '
if ($parent = $this->mustache->loadPartial(%s)) {
$context->pushBlockContext(array(%s
));
$buffer .= $parent->renderInternal($context, $indent);
$context->popBlockContext();
}
';
const PARENT_NO_CONTEXT = '
if ($parent = $this->mustache->loadPartial(%s)) {
$buffer .= $parent->renderInternal($context, $indent);
}
';
/**
* Generate Mustache Template inheritance parent call PHP source.
*
* @param string $id Parent tag name
* @param string $indent Whitespace indent to apply to parent
* @param array $children Child nodes
* @param int $level
*
* @return string Generated PHP source code
*/
private function parent($id, $indent, array $children, $level)
{
$realChildren = array_filter($children, array(__CLASS__, 'onlyBlockArgs'));
if (empty($realChildren)) {
return sprintf($this->prepare(self::PARENT_NO_CONTEXT, $level), var_export($id, true));
}
return sprintf(
$this->prepare(self::PARENT, $level),
var_export($id, true),
$this->walk($realChildren, $level + 1)
);
}
/**
* Helper method for filtering out non-block-arg tokens.
*
* @param array $node
*
* @return bool True if $node is a block arg token
*/
private static function onlyBlockArgs(array $node)
{
return $node[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_BLOCK_ARG;
}
const VARIABLE = '
$value = $this->resolveValue($context->%s(%s), $context);%s
$buffer .= %s($value === null ? \'\' : %s);
';
/**
* Generate Mustache Template variable interpolation PHP source.
*
* @param string $id Variable name
* @param string[] $filters Array of filters
* @param bool $escape Escape the variable value for output?
* @param int $level
*
* @return string Generated variable interpolation PHP source
*/
private function variable($id, $filters, $escape, $level)
{
$method = $this->getFindMethod($id);
$id = ($method !== 'last') ? var_export($id, true) : '';
$filters = $this->getFilters($filters, $level);
$value = $escape ? $this->getEscape() : '$value';
return sprintf($this->prepare(self::VARIABLE, $level), $method, $id, $filters, $this->flushIndent(), $value);
}
const FILTER = '
$filter = $context->%s(%s);
if (!(%s)) {
throw new Mustache_Exception_UnknownFilterException(%s);
}
$value = call_user_func($filter, $value);%s
';
/**
* Generate Mustache Template variable filtering PHP source.
*
* @param string[] $filters Array of filters
* @param int $level
*
* @return string Generated filter PHP source
*/
private function getFilters(array $filters, $level)
{
if (empty($filters)) {
return '';
}
$name = array_shift($filters);
$method = $this->getFindMethod($name);
$filter = ($method !== 'last') ? var_export($name, true) : '';
$callable = $this->getCallable('$filter');
$msg = var_export($name, true);
return sprintf($this->prepare(self::FILTER, $level), $method, $filter, $callable, $msg, $this->getFilters($filters, $level));
}
const LINE = '$buffer .= "\n";';
const TEXT = '$buffer .= %s%s;';
/**
* Generate Mustache Template output Buffer call PHP source.
*
* @param string $text
* @param int $level
*
* @return string Generated output Buffer call PHP source
*/
private function text($text, $level)
{
$indentNextLine = (substr($text, -1) === "\n");
$code = sprintf($this->prepare(self::TEXT, $level), $this->flushIndent(), var_export($text, true));
$this->indentNextLine = $indentNextLine;
return $code;
}
/**
* Prepare PHP source code snippet for output.
*
* @param string $text
* @param int $bonus Additional indent level (default: 0)
* @param bool $prependNewline Prepend a newline to the snippet? (default: true)
* @param bool $appendNewline Append a newline to the snippet? (default: false)
*
* @return string PHP source code snippet
*/
private function prepare($text, $bonus = 0, $prependNewline = true, $appendNewline = false)
{
$text = ($prependNewline ? "\n" : '') . trim($text);
if ($prependNewline) {
$bonus++;
}
if ($appendNewline) {
$text .= "\n";
}
return preg_replace("/\n( {8})?/", "\n" . str_repeat(' ', $bonus * 4), $text);
}
const DEFAULT_ESCAPE = 'htmlspecialchars(%s, %s, %s)';
const CUSTOM_ESCAPE = 'call_user_func($this->mustache->getEscape(), %s)';
/**
* Get the current escaper.
*
* @param string $value (default: '$value')
*
* @return string Either a custom callback, or an inline call to `htmlspecialchars`
*/
private function getEscape($value = '$value')
{
if ($this->customEscape) {
return sprintf(self::CUSTOM_ESCAPE, $value);
}
return sprintf(self::DEFAULT_ESCAPE, $value, var_export($this->entityFlags, true), var_export($this->charset, true));
}
/**
* Select the appropriate Context `find` method for a given $id.
*
* The return value will be one of `find`, `findDot`, `findAnchoredDot` or `last`.
*
* @see Mustache_Context::find
* @see Mustache_Context::findDot
* @see Mustache_Context::last
*
* @param string $id Variable name
*
* @return string `find` method name
*/
private function getFindMethod($id)
{
if ($id === '.') {
return 'last';
}
if (isset($this->pragmas[Mustache_Engine::PRAGMA_ANCHORED_DOT]) && $this->pragmas[Mustache_Engine::PRAGMA_ANCHORED_DOT]) {
if (substr($id, 0, 1) === '.') {
return 'findAnchoredDot';
}
}
if (strpos($id, '.') === false) {
return 'find';
}
return 'findDot';
}
const IS_CALLABLE = '!is_string(%s) && is_callable(%s)';
const STRICT_IS_CALLABLE = 'is_object(%s) && is_callable(%s)';
/**
* Helper function to compile strict vs lax "is callable" logic.
*
* @param string $variable (default: '$value')
*
* @return string "is callable" logic
*/
private function getCallable($variable = '$value')
{
$tpl = $this->strictCallables ? self::STRICT_IS_CALLABLE : self::IS_CALLABLE;
return sprintf($tpl, $variable, $variable);
}
const LINE_INDENT = '$indent . ';
/**
* Get the current $indent prefix to write to the buffer.
*
* @return string "$indent . " or ""
*/
private function flushIndent()
{
if (!$this->indentNextLine) {
return '';
}
$this->indentNextLine = false;
return self::LINE_INDENT;
}
}

View file

@ -0,0 +1,242 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template rendering Context.
*/
class Mustache_Context
{
private $stack = array();
private $blockStack = array();
/**
* Mustache rendering Context constructor.
*
* @param mixed $context Default rendering context (default: null)
*/
public function __construct($context = null)
{
if ($context !== null) {
$this->stack = array($context);
}
}
/**
* Push a new Context frame onto the stack.
*
* @param mixed $value Object or array to use for context
*/
public function push($value)
{
array_push($this->stack, $value);
}
/**
* Push a new Context frame onto the block context stack.
*
* @param mixed $value Object or array to use for block context
*/
public function pushBlockContext($value)
{
array_push($this->blockStack, $value);
}
/**
* Pop the last Context frame from the stack.
*
* @return mixed Last Context frame (object or array)
*/
public function pop()
{
return array_pop($this->stack);
}
/**
* Pop the last block Context frame from the stack.
*
* @return mixed Last block Context frame (object or array)
*/
public function popBlockContext()
{
return array_pop($this->blockStack);
}
/**
* Get the last Context frame.
*
* @return mixed Last Context frame (object or array)
*/
public function last()
{
return end($this->stack);
}
/**
* Find a variable in the Context stack.
*
* Starting with the last Context frame (the context of the innermost section), and working back to the top-level
* rendering context, look for a variable with the given name:
*
* * If the Context frame is an associative array which contains the key $id, returns the value of that element.
* * If the Context frame is an object, this will check first for a public method, then a public property named
* $id. Failing both of these, it will try `__isset` and `__get` magic methods.
* * If a value named $id is not found in any Context frame, returns an empty string.
*
* @param string $id Variable name
*
* @return mixed Variable value, or '' if not found
*/
public function find($id)
{
return $this->findVariableInStack($id, $this->stack);
}
/**
* Find a 'dot notation' variable in the Context stack.
*
* Note that dot notation traversal bubbles through scope differently than the regular find method. After finding
* the initial chunk of the dotted name, each subsequent chunk is searched for only within the value of the previous
* result. For example, given the following context stack:
*
* $data = array(
* 'name' => 'Fred',
* 'child' => array(
* 'name' => 'Bob'
* ),
* );
*
* ... and the Mustache following template:
*
* {{ child.name }}
*
* ... the `name` value is only searched for within the `child` value of the global Context, not within parent
* Context frames.
*
* @param string $id Dotted variable selector
*
* @return mixed Variable value, or '' if not found
*/
public function findDot($id)
{
$chunks = explode('.', $id);
$first = array_shift($chunks);
$value = $this->findVariableInStack($first, $this->stack);
foreach ($chunks as $chunk) {
if ($value === '') {
return $value;
}
$value = $this->findVariableInStack($chunk, array($value));
}
return $value;
}
/**
* Find an 'anchored dot notation' variable in the Context stack.
*
* This is the same as findDot(), except it looks in the top of the context
* stack for the first value, rather than searching the whole context stack
* and starting from there.
*
* @see Mustache_Context::findDot
*
* @throws Mustache_Exception_InvalidArgumentException if given an invalid anchored dot $id
*
* @param string $id Dotted variable selector
*
* @return mixed Variable value, or '' if not found
*/
public function findAnchoredDot($id)
{
$chunks = explode('.', $id);
$first = array_shift($chunks);
if ($first !== '') {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unexpected id for findAnchoredDot: %s', $id));
}
$value = $this->last();
foreach ($chunks as $chunk) {
if ($value === '') {
return $value;
}
$value = $this->findVariableInStack($chunk, array($value));
}
return $value;
}
/**
* Find an argument in the block context stack.
*
* @param string $id
*
* @return mixed Variable value, or '' if not found
*/
public function findInBlock($id)
{
foreach ($this->blockStack as $context) {
if (array_key_exists($id, $context)) {
return $context[$id];
}
}
return '';
}
/**
* Helper function to find a variable in the Context stack.
*
* @see Mustache_Context::find
*
* @param string $id Variable name
* @param array $stack Context stack
*
* @return mixed Variable value, or '' if not found
*/
private function findVariableInStack($id, array $stack)
{
for ($i = count($stack) - 1; $i >= 0; $i--) {
$frame = &$stack[$i];
switch (gettype($frame)) {
case 'object':
if (!($frame instanceof Closure)) {
// Note that is_callable() *will not work here*
// See https://github.com/bobthecow/mustache.php/wiki/Magic-Methods
if (method_exists($frame, $id)) {
return $frame->$id();
}
if (isset($frame->$id)) {
return $frame->$id;
}
if ($frame instanceof ArrayAccess && isset($frame[$id])) {
return $frame[$id];
}
}
break;
case 'array':
if (array_key_exists($id, $frame)) {
return $frame[$id];
}
break;
}
}
return '';
}
}

View file

@ -0,0 +1,829 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache implementation in PHP.
*
* {@link http://defunkt.github.com/mustache}
*
* Mustache is a framework-agnostic logic-less templating language. It enforces separation of view
* logic from template files. In fact, it is not even possible to embed logic in the template.
*
* This is very, very rad.
*
* @author Justin Hileman {@link http://justinhileman.com}
*/
class Mustache_Engine
{
const VERSION = '2.14.2';
const SPEC_VERSION = '1.2.2';
const PRAGMA_FILTERS = 'FILTERS';
const PRAGMA_BLOCKS = 'BLOCKS';
const PRAGMA_ANCHORED_DOT = 'ANCHORED-DOT';
// Known pragmas
private static $knownPragmas = array(
self::PRAGMA_FILTERS => true,
self::PRAGMA_BLOCKS => true,
self::PRAGMA_ANCHORED_DOT => true,
);
// Template cache
private $templates = array();
// Environment
private $templateClassPrefix = '__Mustache_';
private $cache;
private $lambdaCache;
private $cacheLambdaTemplates = false;
private $loader;
private $partialsLoader;
private $helpers;
private $escape;
private $entityFlags = ENT_COMPAT;
private $charset = 'UTF-8';
private $logger;
private $strictCallables = false;
private $pragmas = array();
private $delimiters;
// Services
private $tokenizer;
private $parser;
private $compiler;
/**
* Mustache class constructor.
*
* Passing an $options array allows overriding certain Mustache options during instantiation:
*
* $options = array(
* // The class prefix for compiled templates. Defaults to '__Mustache_'.
* 'template_class_prefix' => '__MyTemplates_',
*
* // A Mustache cache instance or a cache directory string for compiled templates.
* // Mustache will not cache templates unless this is set.
* 'cache' => dirname(__FILE__).'/tmp/cache/mustache',
*
* // Override default permissions for cache files. Defaults to using the system-defined umask. It is
* // *strongly* recommended that you configure your umask properly rather than overriding permissions here.
* 'cache_file_mode' => 0666,
*
* // Optionally, enable caching for lambda section templates. This is generally not recommended, as lambda
* // sections are often too dynamic to benefit from caching.
* 'cache_lambda_templates' => true,
*
* // Customize the tag delimiters used by this engine instance. Note that overriding here changes the
* // delimiters used to parse all templates and partials loaded by this instance. To override just for a
* // single template, use an inline "change delimiters" tag at the start of the template file:
* //
* // {{=<% %>=}}
* //
* 'delimiters' => '<% %>',
*
* // A Mustache template loader instance. Uses a StringLoader if not specified.
* 'loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views'),
*
* // A Mustache loader instance for partials.
* 'partials_loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views/partials'),
*
* // An array of Mustache partials. Useful for quick-and-dirty string template loading, but not as
* // efficient or lazy as a Filesystem (or database) loader.
* 'partials' => array('foo' => file_get_contents(dirname(__FILE__).'/views/partials/foo.mustache')),
*
* // An array of 'helpers'. Helpers can be global variables or objects, closures (e.g. for higher order
* // sections), or any other valid Mustache context value. They will be prepended to the context stack,
* // so they will be available in any template loaded by this Mustache instance.
* 'helpers' => array('i18n' => function ($text) {
* // do something translatey here...
* }),
*
* // An 'escape' callback, responsible for escaping double-mustache variables.
* 'escape' => function ($value) {
* return htmlspecialchars($buffer, ENT_COMPAT, 'UTF-8');
* },
*
* // Type argument for `htmlspecialchars`. Defaults to ENT_COMPAT. You may prefer ENT_QUOTES.
* 'entity_flags' => ENT_QUOTES,
*
* // Character set for `htmlspecialchars`. Defaults to 'UTF-8'. Use 'UTF-8'.
* 'charset' => 'ISO-8859-1',
*
* // A Mustache Logger instance. No logging will occur unless this is set. Using a PSR-3 compatible
* // logging library -- such as Monolog -- is highly recommended. A simple stream logger implementation is
* // available as well:
* 'logger' => new Mustache_Logger_StreamLogger('php://stderr'),
*
* // Only treat Closure instances and invokable classes as callable. If true, values like
* // `array('ClassName', 'methodName')` and `array($classInstance, 'methodName')`, which are traditionally
* // "callable" in PHP, are not called to resolve variables for interpolation or section contexts. This
* // helps protect against arbitrary code execution when user input is passed directly into the template.
* // This currently defaults to false, but will default to true in v3.0.
* 'strict_callables' => true,
*
* // Enable pragmas across all templates, regardless of the presence of pragma tags in the individual
* // templates.
* 'pragmas' => [Mustache_Engine::PRAGMA_FILTERS],
* );
*
* @throws Mustache_Exception_InvalidArgumentException If `escape` option is not callable
*
* @param array $options (default: array())
*/
public function __construct(array $options = array())
{
if (isset($options['template_class_prefix'])) {
if ((string) $options['template_class_prefix'] === '') {
throw new Mustache_Exception_InvalidArgumentException('Mustache Constructor "template_class_prefix" must not be empty');
}
$this->templateClassPrefix = $options['template_class_prefix'];
}
if (isset($options['cache'])) {
$cache = $options['cache'];
if (is_string($cache)) {
$mode = isset($options['cache_file_mode']) ? $options['cache_file_mode'] : null;
$cache = new Mustache_Cache_FilesystemCache($cache, $mode);
}
$this->setCache($cache);
}
if (isset($options['cache_lambda_templates'])) {
$this->cacheLambdaTemplates = (bool) $options['cache_lambda_templates'];
}
if (isset($options['loader'])) {
$this->setLoader($options['loader']);
}
if (isset($options['partials_loader'])) {
$this->setPartialsLoader($options['partials_loader']);
}
if (isset($options['partials'])) {
$this->setPartials($options['partials']);
}
if (isset($options['helpers'])) {
$this->setHelpers($options['helpers']);
}
if (isset($options['escape'])) {
if (!is_callable($options['escape'])) {
throw new Mustache_Exception_InvalidArgumentException('Mustache Constructor "escape" option must be callable');
}
$this->escape = $options['escape'];
}
if (isset($options['entity_flags'])) {
$this->entityFlags = $options['entity_flags'];
}
if (isset($options['charset'])) {
$this->charset = $options['charset'];
}
if (isset($options['logger'])) {
$this->setLogger($options['logger']);
}
if (isset($options['strict_callables'])) {
$this->strictCallables = $options['strict_callables'];
}
if (isset($options['delimiters'])) {
$this->delimiters = $options['delimiters'];
}
if (isset($options['pragmas'])) {
foreach ($options['pragmas'] as $pragma) {
if (!isset(self::$knownPragmas[$pragma])) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unknown pragma: "%s".', $pragma));
}
$this->pragmas[$pragma] = true;
}
}
}
/**
* Shortcut 'render' invocation.
*
* Equivalent to calling `$mustache->loadTemplate($template)->render($context);`
*
* @see Mustache_Engine::loadTemplate
* @see Mustache_Template::render
*
* @param string $template
* @param mixed $context (default: array())
*
* @return string Rendered template
*/
public function render($template, $context = array())
{
return $this->loadTemplate($template)->render($context);
}
/**
* Get the current Mustache escape callback.
*
* @return callable|null
*/
public function getEscape()
{
return $this->escape;
}
/**
* Get the current Mustache entitity type to escape.
*
* @return int
*/
public function getEntityFlags()
{
return $this->entityFlags;
}
/**
* Get the current Mustache character set.
*
* @return string
*/
public function getCharset()
{
return $this->charset;
}
/**
* Get the current globally enabled pragmas.
*
* @return array
*/
public function getPragmas()
{
return array_keys($this->pragmas);
}
/**
* Set the Mustache template Loader instance.
*
* @param Mustache_Loader $loader
*/
public function setLoader(Mustache_Loader $loader)
{
$this->loader = $loader;
}
/**
* Get the current Mustache template Loader instance.
*
* If no Loader instance has been explicitly specified, this method will instantiate and return
* a StringLoader instance.
*
* @return Mustache_Loader
*/
public function getLoader()
{
if (!isset($this->loader)) {
$this->loader = new Mustache_Loader_StringLoader();
}
return $this->loader;
}
/**
* Set the Mustache partials Loader instance.
*
* @param Mustache_Loader $partialsLoader
*/
public function setPartialsLoader(Mustache_Loader $partialsLoader)
{
$this->partialsLoader = $partialsLoader;
}
/**
* Get the current Mustache partials Loader instance.
*
* If no Loader instance has been explicitly specified, this method will instantiate and return
* an ArrayLoader instance.
*
* @return Mustache_Loader
*/
public function getPartialsLoader()
{
if (!isset($this->partialsLoader)) {
$this->partialsLoader = new Mustache_Loader_ArrayLoader();
}
return $this->partialsLoader;
}
/**
* Set partials for the current partials Loader instance.
*
* @throws Mustache_Exception_RuntimeException If the current Loader instance is immutable
*
* @param array $partials (default: array())
*/
public function setPartials(array $partials = array())
{
if (!isset($this->partialsLoader)) {
$this->partialsLoader = new Mustache_Loader_ArrayLoader();
}
if (!$this->partialsLoader instanceof Mustache_Loader_MutableLoader) {
throw new Mustache_Exception_RuntimeException('Unable to set partials on an immutable Mustache Loader instance');
}
$this->partialsLoader->setTemplates($partials);
}
/**
* Set an array of Mustache helpers.
*
* An array of 'helpers'. Helpers can be global variables or objects, closures (e.g. for higher order sections), or
* any other valid Mustache context value. They will be prepended to the context stack, so they will be available in
* any template loaded by this Mustache instance.
*
* @throws Mustache_Exception_InvalidArgumentException if $helpers is not an array or Traversable
*
* @param array|Traversable $helpers
*/
public function setHelpers($helpers)
{
if (!is_array($helpers) && !$helpers instanceof Traversable) {
throw new Mustache_Exception_InvalidArgumentException('setHelpers expects an array of helpers');
}
$this->getHelpers()->clear();
foreach ($helpers as $name => $helper) {
$this->addHelper($name, $helper);
}
}
/**
* Get the current set of Mustache helpers.
*
* @see Mustache_Engine::setHelpers
*
* @return Mustache_HelperCollection
*/
public function getHelpers()
{
if (!isset($this->helpers)) {
$this->helpers = new Mustache_HelperCollection();
}
return $this->helpers;
}
/**
* Add a new Mustache helper.
*
* @see Mustache_Engine::setHelpers
*
* @param string $name
* @param mixed $helper
*/
public function addHelper($name, $helper)
{
$this->getHelpers()->add($name, $helper);
}
/**
* Get a Mustache helper by name.
*
* @see Mustache_Engine::setHelpers
*
* @param string $name
*
* @return mixed Helper
*/
public function getHelper($name)
{
return $this->getHelpers()->get($name);
}
/**
* Check whether this Mustache instance has a helper.
*
* @see Mustache_Engine::setHelpers
*
* @param string $name
*
* @return bool True if the helper is present
*/
public function hasHelper($name)
{
return $this->getHelpers()->has($name);
}
/**
* Remove a helper by name.
*
* @see Mustache_Engine::setHelpers
*
* @param string $name
*/
public function removeHelper($name)
{
$this->getHelpers()->remove($name);
}
/**
* Set the Mustache Logger instance.
*
* @throws Mustache_Exception_InvalidArgumentException If logger is not an instance of Mustache_Logger or Psr\Log\LoggerInterface
*
* @param Mustache_Logger|Psr\Log\LoggerInterface $logger
*/
public function setLogger($logger = null)
{
if ($logger !== null && !($logger instanceof Mustache_Logger || is_a($logger, 'Psr\\Log\\LoggerInterface'))) {
throw new Mustache_Exception_InvalidArgumentException('Expected an instance of Mustache_Logger or Psr\\Log\\LoggerInterface.');
}
if ($this->getCache()->getLogger() === null) {
$this->getCache()->setLogger($logger);
}
$this->logger = $logger;
}
/**
* Get the current Mustache Logger instance.
*
* @return Mustache_Logger|Psr\Log\LoggerInterface
*/
public function getLogger()
{
return $this->logger;
}
/**
* Set the Mustache Tokenizer instance.
*
* @param Mustache_Tokenizer $tokenizer
*/
public function setTokenizer(Mustache_Tokenizer $tokenizer)
{
$this->tokenizer = $tokenizer;
}
/**
* Get the current Mustache Tokenizer instance.
*
* If no Tokenizer instance has been explicitly specified, this method will instantiate and return a new one.
*
* @return Mustache_Tokenizer
*/
public function getTokenizer()
{
if (!isset($this->tokenizer)) {
$this->tokenizer = new Mustache_Tokenizer();
}
return $this->tokenizer;
}
/**
* Set the Mustache Parser instance.
*
* @param Mustache_Parser $parser
*/
public function setParser(Mustache_Parser $parser)
{
$this->parser = $parser;
}
/**
* Get the current Mustache Parser instance.
*
* If no Parser instance has been explicitly specified, this method will instantiate and return a new one.
*
* @return Mustache_Parser
*/
public function getParser()
{
if (!isset($this->parser)) {
$this->parser = new Mustache_Parser();
}
return $this->parser;
}
/**
* Set the Mustache Compiler instance.
*
* @param Mustache_Compiler $compiler
*/
public function setCompiler(Mustache_Compiler $compiler)
{
$this->compiler = $compiler;
}
/**
* Get the current Mustache Compiler instance.
*
* If no Compiler instance has been explicitly specified, this method will instantiate and return a new one.
*
* @return Mustache_Compiler
*/
public function getCompiler()
{
if (!isset($this->compiler)) {
$this->compiler = new Mustache_Compiler();
}
return $this->compiler;
}
/**
* Set the Mustache Cache instance.
*
* @param Mustache_Cache $cache
*/
public function setCache(Mustache_Cache $cache)
{
if (isset($this->logger) && $cache->getLogger() === null) {
$cache->setLogger($this->getLogger());
}
$this->cache = $cache;
}
/**
* Get the current Mustache Cache instance.
*
* If no Cache instance has been explicitly specified, this method will instantiate and return a new one.
*
* @return Mustache_Cache
*/
public function getCache()
{
if (!isset($this->cache)) {
$this->setCache(new Mustache_Cache_NoopCache());
}
return $this->cache;
}
/**
* Get the current Lambda Cache instance.
*
* If 'cache_lambda_templates' is enabled, this is the default cache instance. Otherwise, it is a NoopCache.
*
* @see Mustache_Engine::getCache
*
* @return Mustache_Cache
*/
protected function getLambdaCache()
{
if ($this->cacheLambdaTemplates) {
return $this->getCache();
}
if (!isset($this->lambdaCache)) {
$this->lambdaCache = new Mustache_Cache_NoopCache();
}
return $this->lambdaCache;
}
/**
* Helper method to generate a Mustache template class.
*
* This method must be updated any time options are added which make it so
* the same template could be parsed and compiled multiple different ways.
*
* @param string|Mustache_Source $source
*
* @return string Mustache Template class name
*/
public function getTemplateClassName($source)
{
// For the most part, adding a new option here should do the trick.
//
// Pick a value here which is unique for each possible way the template
// could be compiled... but not necessarily unique per option value. See
// escape below, which only needs to differentiate between 'custom' and
// 'default' escapes.
//
// Keep this list in alphabetical order :)
$chunks = array(
'charset' => $this->charset,
'delimiters' => $this->delimiters ? $this->delimiters : '{{ }}',
'entityFlags' => $this->entityFlags,
'escape' => isset($this->escape) ? 'custom' : 'default',
'key' => ($source instanceof Mustache_Source) ? $source->getKey() : 'source',
'pragmas' => $this->getPragmas(),
'strictCallables' => $this->strictCallables,
'version' => self::VERSION,
);
$key = json_encode($chunks);
// Template Source instances have already provided their own source key. For strings, just include the whole
// source string in the md5 hash.
if (!$source instanceof Mustache_Source) {
$key .= "\n" . $source;
}
return $this->templateClassPrefix . md5($key);
}
/**
* Load a Mustache Template by name.
*
* @param string $name
*
* @return Mustache_Template
*/
public function loadTemplate($name)
{
return $this->loadSource($this->getLoader()->load($name));
}
/**
* Load a Mustache partial Template by name.
*
* This is a helper method used internally by Template instances for loading partial templates. You can most likely
* ignore it completely.
*
* @param string $name
*
* @return Mustache_Template
*/
public function loadPartial($name)
{
try {
if (isset($this->partialsLoader)) {
$loader = $this->partialsLoader;
} elseif (isset($this->loader) && !$this->loader instanceof Mustache_Loader_StringLoader) {
$loader = $this->loader;
} else {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return $this->loadSource($loader->load($name));
} catch (Mustache_Exception_UnknownTemplateException $e) {
// If the named partial cannot be found, log then return null.
$this->log(
Mustache_Logger::WARNING,
'Partial not found: "{name}"',
array('name' => $e->getTemplateName())
);
}
}
/**
* Load a Mustache lambda Template by source.
*
* This is a helper method used by Template instances to generate subtemplates for Lambda sections. You can most
* likely ignore it completely.
*
* @param string $source
* @param string $delims (default: null)
*
* @return Mustache_Template
*/
public function loadLambda($source, $delims = null)
{
if ($delims !== null) {
$source = $delims . "\n" . $source;
}
return $this->loadSource($source, $this->getLambdaCache());
}
/**
* Instantiate and return a Mustache Template instance by source.
*
* Optionally provide a Mustache_Cache instance. This is used internally by Mustache_Engine::loadLambda to respect
* the 'cache_lambda_templates' configuration option.
*
* @see Mustache_Engine::loadTemplate
* @see Mustache_Engine::loadPartial
* @see Mustache_Engine::loadLambda
*
* @param string|Mustache_Source $source
* @param Mustache_Cache $cache (default: null)
*
* @return Mustache_Template
*/
private function loadSource($source, Mustache_Cache $cache = null)
{
$className = $this->getTemplateClassName($source);
if (!isset($this->templates[$className])) {
if ($cache === null) {
$cache = $this->getCache();
}
if (!class_exists($className, false)) {
if (!$cache->load($className)) {
$compiled = $this->compile($source);
$cache->cache($className, $compiled);
}
}
$this->log(
Mustache_Logger::DEBUG,
'Instantiating template: "{className}"',
array('className' => $className)
);
$this->templates[$className] = new $className($this);
}
return $this->templates[$className];
}
/**
* Helper method to tokenize a Mustache template.
*
* @see Mustache_Tokenizer::scan
*
* @param string $source
*
* @return array Tokens
*/
private function tokenize($source)
{
return $this->getTokenizer()->scan($source, $this->delimiters);
}
/**
* Helper method to parse a Mustache template.
*
* @see Mustache_Parser::parse
*
* @param string $source
*
* @return array Token tree
*/
private function parse($source)
{
$parser = $this->getParser();
$parser->setPragmas($this->getPragmas());
return $parser->parse($this->tokenize($source));
}
/**
* Helper method to compile a Mustache template.
*
* @see Mustache_Compiler::compile
*
* @param string|Mustache_Source $source
*
* @return string generated Mustache template class code
*/
private function compile($source)
{
$name = $this->getTemplateClassName($source);
$this->log(
Mustache_Logger::INFO,
'Compiling template to "{className}" class',
array('className' => $name)
);
if ($source instanceof Mustache_Source) {
$source = $source->getSource();
}
$tree = $this->parse($source);
$compiler = $this->getCompiler();
$compiler->setPragmas($this->getPragmas());
return $compiler->compile($source, $tree, $name, isset($this->escape), $this->charset, $this->strictCallables, $this->entityFlags);
}
/**
* Add a log record if logging is enabled.
*
* @param int $level The logging level
* @param string $message The log message
* @param array $context The log context
*/
private function log($level, $message, array $context = array())
{
if (isset($this->logger)) {
$this->logger->log($level, $message, $context);
}
}
}

View file

@ -0,0 +1,18 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache Exception interface.
*/
interface Mustache_Exception
{
// This space intentionally left blank.
}

View file

@ -0,0 +1,18 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Invalid argument exception.
*/
class Mustache_Exception_InvalidArgumentException extends InvalidArgumentException implements Mustache_Exception
{
// This space intentionally left blank.
}

View file

@ -0,0 +1,18 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Logic exception.
*/
class Mustache_Exception_LogicException extends LogicException implements Mustache_Exception
{
// This space intentionally left blank.
}

View file

@ -0,0 +1,18 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Runtime exception.
*/
class Mustache_Exception_RuntimeException extends RuntimeException implements Mustache_Exception
{
// This space intentionally left blank.
}

View file

@ -0,0 +1,41 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache syntax exception.
*/
class Mustache_Exception_SyntaxException extends LogicException implements Mustache_Exception
{
protected $token;
/**
* @param string $msg
* @param array $token
* @param Exception $previous
*/
public function __construct($msg, array $token, Exception $previous = null)
{
$this->token = $token;
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($msg, 0, $previous);
} else {
parent::__construct($msg); // @codeCoverageIgnore
}
}
/**
* @return array
*/
public function getToken()
{
return $this->token;
}
}

View file

@ -0,0 +1,38 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Unknown filter exception.
*/
class Mustache_Exception_UnknownFilterException extends UnexpectedValueException implements Mustache_Exception
{
protected $filterName;
/**
* @param string $filterName
* @param Exception $previous
*/
public function __construct($filterName, Exception $previous = null)
{
$this->filterName = $filterName;
$message = sprintf('Unknown filter: %s', $filterName);
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($message, 0, $previous);
} else {
parent::__construct($message); // @codeCoverageIgnore
}
}
public function getFilterName()
{
return $this->filterName;
}
}

View file

@ -0,0 +1,38 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Unknown helper exception.
*/
class Mustache_Exception_UnknownHelperException extends InvalidArgumentException implements Mustache_Exception
{
protected $helperName;
/**
* @param string $helperName
* @param Exception $previous
*/
public function __construct($helperName, Exception $previous = null)
{
$this->helperName = $helperName;
$message = sprintf('Unknown helper: %s', $helperName);
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($message, 0, $previous);
} else {
parent::__construct($message); // @codeCoverageIgnore
}
}
public function getHelperName()
{
return $this->helperName;
}
}

View file

@ -0,0 +1,38 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Unknown template exception.
*/
class Mustache_Exception_UnknownTemplateException extends InvalidArgumentException implements Mustache_Exception
{
protected $templateName;
/**
* @param string $templateName
* @param Exception $previous
*/
public function __construct($templateName, Exception $previous = null)
{
$this->templateName = $templateName;
$message = sprintf('Unknown template: %s', $templateName);
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
parent::__construct($message, 0, $previous);
} else {
parent::__construct($message); // @codeCoverageIgnore
}
}
public function getTemplateName()
{
return $this->templateName;
}
}

View file

@ -0,0 +1,172 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A collection of helpers for a Mustache instance.
*/
class Mustache_HelperCollection
{
private $helpers = array();
/**
* Helper Collection constructor.
*
* Optionally accepts an array (or Traversable) of `$name => $helper` pairs.
*
* @throws Mustache_Exception_InvalidArgumentException if the $helpers argument isn't an array or Traversable
*
* @param array|Traversable $helpers (default: null)
*/
public function __construct($helpers = null)
{
if ($helpers === null) {
return;
}
if (!is_array($helpers) && !$helpers instanceof Traversable) {
throw new Mustache_Exception_InvalidArgumentException('HelperCollection constructor expects an array of helpers');
}
foreach ($helpers as $name => $helper) {
$this->add($name, $helper);
}
}
/**
* Magic mutator.
*
* @see Mustache_HelperCollection::add
*
* @param string $name
* @param mixed $helper
*/
public function __set($name, $helper)
{
$this->add($name, $helper);
}
/**
* Add a helper to this collection.
*
* @param string $name
* @param mixed $helper
*/
public function add($name, $helper)
{
$this->helpers[$name] = $helper;
}
/**
* Magic accessor.
*
* @see Mustache_HelperCollection::get
*
* @param string $name
*
* @return mixed Helper
*/
public function __get($name)
{
return $this->get($name);
}
/**
* Get a helper by name.
*
* @throws Mustache_Exception_UnknownHelperException If helper does not exist
*
* @param string $name
*
* @return mixed Helper
*/
public function get($name)
{
if (!$this->has($name)) {
throw new Mustache_Exception_UnknownHelperException($name);
}
return $this->helpers[$name];
}
/**
* Magic isset().
*
* @see Mustache_HelperCollection::has
*
* @param string $name
*
* @return bool True if helper is present
*/
public function __isset($name)
{
return $this->has($name);
}
/**
* Check whether a given helper is present in the collection.
*
* @param string $name
*
* @return bool True if helper is present
*/
public function has($name)
{
return array_key_exists($name, $this->helpers);
}
/**
* Magic unset().
*
* @see Mustache_HelperCollection::remove
*
* @param string $name
*/
public function __unset($name)
{
$this->remove($name);
}
/**
* Check whether a given helper is present in the collection.
*
* @throws Mustache_Exception_UnknownHelperException if the requested helper is not present
*
* @param string $name
*/
public function remove($name)
{
if (!$this->has($name)) {
throw new Mustache_Exception_UnknownHelperException($name);
}
unset($this->helpers[$name]);
}
/**
* Clear the helper collection.
*
* Removes all helpers from this collection
*/
public function clear()
{
$this->helpers = array();
}
/**
* Check whether the helper collection is empty.
*
* @return bool True if the collection is empty
*/
public function isEmpty()
{
return empty($this->helpers);
}
}

View file

@ -0,0 +1,76 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Lambda Helper.
*
* Passed as the second argument to section lambdas (higher order sections),
* giving them access to a `render` method for rendering a string with the
* current context.
*/
class Mustache_LambdaHelper
{
private $mustache;
private $context;
private $delims;
/**
* Mustache Lambda Helper constructor.
*
* @param Mustache_Engine $mustache Mustache engine instance
* @param Mustache_Context $context Rendering context
* @param string $delims Optional custom delimiters, in the format `{{= <% %> =}}`. (default: null)
*/
public function __construct(Mustache_Engine $mustache, Mustache_Context $context, $delims = null)
{
$this->mustache = $mustache;
$this->context = $context;
$this->delims = $delims;
}
/**
* Render a string as a Mustache template with the current rendering context.
*
* @param string $string
*
* @return string Rendered template
*/
public function render($string)
{
return $this->mustache
->loadLambda((string) $string, $this->delims)
->renderInternal($this->context);
}
/**
* Render a string as a Mustache template with the current rendering context.
*
* @param string $string
*
* @return string Rendered template
*/
public function __invoke($string)
{
return $this->render($string);
}
/**
* Get a Lambda Helper with custom delimiters.
*
* @param string $delims Custom delimiters, in the format `{{= <% %> =}}`
*
* @return Mustache_LambdaHelper
*/
public function withDelimiters($delims)
{
return new self($this->mustache, $this->context, $delims);
}
}

View file

@ -0,0 +1,27 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template Loader interface.
*/
interface Mustache_Loader
{
/**
* Load a Template by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string|Mustache_Source Mustache Template source
*/
public function load($name);
}

View file

@ -0,0 +1,79 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template array Loader implementation.
*
* An ArrayLoader instance loads Mustache Template source by name from an initial array:
*
* $loader = new ArrayLoader(
* 'foo' => '{{ bar }}',
* 'baz' => 'Hey {{ qux }}!'
* );
*
* $tpl = $loader->load('foo'); // '{{ bar }}'
*
* The ArrayLoader is used internally as a partials loader by Mustache_Engine instance when an array of partials
* is set. It can also be used as a quick-and-dirty Template loader.
*/
class Mustache_Loader_ArrayLoader implements Mustache_Loader, Mustache_Loader_MutableLoader
{
private $templates;
/**
* ArrayLoader constructor.
*
* @param array $templates Associative array of Template source (default: array())
*/
public function __construct(array $templates = array())
{
$this->templates = $templates;
}
/**
* Load a Template.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string Mustache Template source
*/
public function load($name)
{
if (!isset($this->templates[$name])) {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return $this->templates[$name];
}
/**
* Set an associative array of Template sources for this loader.
*
* @param array $templates
*/
public function setTemplates(array $templates)
{
$this->templates = $templates;
}
/**
* Set a Template source by name.
*
* @param string $name
* @param string $template Mustache Template source
*/
public function setTemplate($name, $template)
{
$this->templates[$name] = $template;
}
}

View file

@ -0,0 +1,69 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache Template cascading loader implementation, which delegates to other
* Loader instances.
*/
class Mustache_Loader_CascadingLoader implements Mustache_Loader
{
private $loaders;
/**
* Construct a CascadingLoader with an array of loaders.
*
* $loader = new Mustache_Loader_CascadingLoader(array(
* new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__),
* new Mustache_Loader_FilesystemLoader(__DIR__.'/templates')
* ));
*
* @param Mustache_Loader[] $loaders
*/
public function __construct(array $loaders = array())
{
$this->loaders = array();
foreach ($loaders as $loader) {
$this->addLoader($loader);
}
}
/**
* Add a Loader instance.
*
* @param Mustache_Loader $loader
*/
public function addLoader(Mustache_Loader $loader)
{
$this->loaders[] = $loader;
}
/**
* Load a Template by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string Mustache Template source
*/
public function load($name)
{
foreach ($this->loaders as $loader) {
try {
return $loader->load($name);
} catch (Mustache_Exception_UnknownTemplateException $e) {
// do nothing, check the next loader.
}
}
throw new Mustache_Exception_UnknownTemplateException($name);
}
}

View file

@ -0,0 +1,135 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template filesystem Loader implementation.
*
* A FilesystemLoader instance loads Mustache Template source from the filesystem by name:
*
* $loader = new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views');
* $tpl = $loader->load('foo'); // equivalent to `file_get_contents(dirname(__FILE__).'/views/foo.mustache');
*
* This is probably the most useful Mustache Loader implementation. It can be used for partials and normal Templates:
*
* $m = new Mustache(array(
* 'loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views'),
* 'partials_loader' => new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views/partials'),
* ));
*/
class Mustache_Loader_FilesystemLoader implements Mustache_Loader
{
private $baseDir;
private $extension = '.mustache';
private $templates = array();
/**
* Mustache filesystem Loader constructor.
*
* Passing an $options array allows overriding certain Loader options during instantiation:
*
* $options = array(
* // The filename extension used for Mustache templates. Defaults to '.mustache'
* 'extension' => '.ms',
* );
*
* @throws Mustache_Exception_RuntimeException if $baseDir does not exist
*
* @param string $baseDir Base directory containing Mustache template files
* @param array $options Array of Loader options (default: array())
*/
public function __construct($baseDir, array $options = array())
{
$this->baseDir = $baseDir;
if (strpos($this->baseDir, '://') === false) {
$this->baseDir = realpath($this->baseDir);
}
if ($this->shouldCheckPath() && !is_dir($this->baseDir)) {
throw new Mustache_Exception_RuntimeException(sprintf('FilesystemLoader baseDir must be a directory: %s', $baseDir));
}
if (array_key_exists('extension', $options)) {
if (empty($options['extension'])) {
$this->extension = '';
} else {
$this->extension = '.' . ltrim($options['extension'], '.');
}
}
}
/**
* Load a Template by name.
*
* $loader = new Mustache_Loader_FilesystemLoader(dirname(__FILE__).'/views');
* $loader->load('admin/dashboard'); // loads "./views/admin/dashboard.mustache";
*
* @param string $name
*
* @return string Mustache Template source
*/
public function load($name)
{
if (!isset($this->templates[$name])) {
$this->templates[$name] = $this->loadFile($name);
}
return $this->templates[$name];
}
/**
* Helper function for loading a Mustache file by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string Mustache Template source
*/
protected function loadFile($name)
{
$fileName = $this->getFileName($name);
if ($this->shouldCheckPath() && !file_exists($fileName)) {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return file_get_contents($fileName);
}
/**
* Helper function for getting a Mustache template file name.
*
* @param string $name
*
* @return string Template file name
*/
protected function getFileName($name)
{
$fileName = $this->baseDir . '/' . $name;
if (substr($fileName, 0 - strlen($this->extension)) !== $this->extension) {
$fileName .= $this->extension;
}
return $fileName;
}
/**
* Only check if baseDir is a directory and requested templates are files if
* baseDir is using the filesystem stream wrapper.
*
* @return bool Whether to check `is_dir` and `file_exists`
*/
protected function shouldCheckPath()
{
return strpos($this->baseDir, '://') === false || strpos($this->baseDir, 'file://') === 0;
}
}

View file

@ -0,0 +1,123 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache Template loader for inline templates.
*
* With the InlineLoader, templates can be defined at the end of any PHP source
* file:
*
* $loader = new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__);
* $hello = $loader->load('hello');
* $goodbye = $loader->load('goodbye');
*
* __halt_compiler();
*
* @@ hello
* Hello, {{ planet }}!
*
* @@ goodbye
* Goodbye, cruel {{ planet }}
*
* Templates are deliniated by lines containing only `@@ name`.
*
* The InlineLoader is well-suited to micro-frameworks such as Silex:
*
* $app->register(new MustacheServiceProvider, array(
* 'mustache.loader' => new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__)
* ));
*
* $app->get('/{name}', function ($name) use ($app) {
* return $app['mustache']->render('hello', compact('name'));
* })
* ->value('name', 'world');
*
* // ...
*
* __halt_compiler();
*
* @@ hello
* Hello, {{ name }}!
*/
class Mustache_Loader_InlineLoader implements Mustache_Loader
{
protected $fileName;
protected $offset;
protected $templates;
/**
* The InlineLoader requires a filename and offset to process templates.
*
* The magic constants `__FILE__` and `__COMPILER_HALT_OFFSET__` are usually
* perfectly suited to the job:
*
* $loader = new Mustache_Loader_InlineLoader(__FILE__, __COMPILER_HALT_OFFSET__);
*
* Note that this only works if the loader is instantiated inside the same
* file as the inline templates. If the templates are located in another
* file, it would be necessary to manually specify the filename and offset.
*
* @param string $fileName The file to parse for inline templates
* @param int $offset A string offset for the start of the templates.
* This usually coincides with the `__halt_compiler`
* call, and the `__COMPILER_HALT_OFFSET__`
*/
public function __construct($fileName, $offset)
{
if (!is_file($fileName)) {
throw new Mustache_Exception_InvalidArgumentException('InlineLoader expects a valid filename.');
}
if (!is_int($offset) || $offset < 0) {
throw new Mustache_Exception_InvalidArgumentException('InlineLoader expects a valid file offset.');
}
$this->fileName = $fileName;
$this->offset = $offset;
}
/**
* Load a Template by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found
*
* @param string $name
*
* @return string Mustache Template source
*/
public function load($name)
{
$this->loadTemplates();
if (!array_key_exists($name, $this->templates)) {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return $this->templates[$name];
}
/**
* Parse and load templates from the end of a source file.
*/
protected function loadTemplates()
{
if ($this->templates === null) {
$this->templates = array();
$data = file_get_contents($this->fileName, false, null, $this->offset);
foreach (preg_split("/^@@(?= [\w\d\.]+$)/m", $data, -1) as $chunk) {
if (trim($chunk)) {
list($name, $content) = explode("\n", $chunk, 2);
$this->templates[trim($name)] = trim($content);
}
}
}
}
}

View file

@ -0,0 +1,31 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template mutable Loader interface.
*/
interface Mustache_Loader_MutableLoader
{
/**
* Set an associative array of Template sources for this loader.
*
* @param array $templates
*/
public function setTemplates(array $templates);
/**
* Set a Template source by name.
*
* @param string $name
* @param string $template Mustache Template source
*/
public function setTemplate($name, $template);
}

View file

@ -0,0 +1,86 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template production filesystem Loader implementation.
*
* A production-ready FilesystemLoader, which doesn't require reading a file if it already exists in the template cache.
*
* {@inheritdoc}
*/
class Mustache_Loader_ProductionFilesystemLoader extends Mustache_Loader_FilesystemLoader
{
private $statProps;
/**
* Mustache production filesystem Loader constructor.
*
* Passing an $options array allows overriding certain Loader options during instantiation:
*
* $options = array(
* // The filename extension used for Mustache templates. Defaults to '.mustache'
* 'extension' => '.ms',
* 'stat_props' => array('size', 'mtime'),
* );
*
* Specifying 'stat_props' overrides the stat properties used to invalidate the template cache. By default, this
* uses 'mtime' and 'size', but this can be set to any of the properties supported by stat():
*
* http://php.net/manual/en/function.stat.php
*
* You can also disable filesystem stat entirely:
*
* $options = array('stat_props' => null);
*
* But with great power comes great responsibility. Namely, if you disable stat-based cache invalidation,
* YOU MUST CLEAR THE TEMPLATE CACHE YOURSELF when your templates change. Make it part of your build or deploy
* process so you don't forget!
*
* @throws Mustache_Exception_RuntimeException if $baseDir does not exist.
*
* @param string $baseDir Base directory containing Mustache template files.
* @param array $options Array of Loader options (default: array())
*/
public function __construct($baseDir, array $options = array())
{
parent::__construct($baseDir, $options);
if (array_key_exists('stat_props', $options)) {
if (empty($options['stat_props'])) {
$this->statProps = array();
} else {
$this->statProps = $options['stat_props'];
}
} else {
$this->statProps = array('size', 'mtime');
}
}
/**
* Helper function for loading a Mustache file by name.
*
* @throws Mustache_Exception_UnknownTemplateException If a template file is not found.
*
* @param string $name
*
* @return Mustache_Source Mustache Template source
*/
protected function loadFile($name)
{
$fileName = $this->getFileName($name);
if (!file_exists($fileName)) {
throw new Mustache_Exception_UnknownTemplateException($name);
}
return new Mustache_Source_FilesystemSource($fileName, $this->statProps);
}
}

View file

@ -0,0 +1,39 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Template string Loader implementation.
*
* A StringLoader instance is essentially a noop. It simply passes the 'name' argument straight through:
*
* $loader = new StringLoader;
* $tpl = $loader->load('{{ foo }}'); // '{{ foo }}'
*
* This is the default Template Loader instance used by Mustache:
*
* $m = new Mustache;
* $tpl = $m->loadTemplate('{{ foo }}');
* echo $tpl->render(array('foo' => 'bar')); // "bar"
*/
class Mustache_Loader_StringLoader implements Mustache_Loader
{
/**
* Load a Template by source.
*
* @param string $name Mustache Template source
*
* @return string Mustache Template source
*/
public function load($name)
{
return $name;
}
}

View file

@ -0,0 +1,126 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Describes a Mustache logger instance.
*
* This is identical to the Psr\Log\LoggerInterface.
*
* The message MUST be a string or object implementing __toString().
*
* The message MAY contain placeholders in the form: {foo} where foo
* will be replaced by the context data in key "foo".
*
* The context array can contain arbitrary data, the only assumption that
* can be made by implementors is that if an Exception instance is given
* to produce a stack trace, it MUST be in a key named "exception".
*
* See https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-3-logger-interface.md
* for the full interface specification.
*/
interface Mustache_Logger
{
/**
* Psr\Log compatible log levels.
*/
const EMERGENCY = 'emergency';
const ALERT = 'alert';
const CRITICAL = 'critical';
const ERROR = 'error';
const WARNING = 'warning';
const NOTICE = 'notice';
const INFO = 'info';
const DEBUG = 'debug';
/**
* System is unusable.
*
* @param string $message
* @param array $context
*/
public function emergency($message, array $context = array());
/**
* Action must be taken immediately.
*
* Example: Entire website down, database unavailable, etc. This should
* trigger the SMS alerts and wake you up.
*
* @param string $message
* @param array $context
*/
public function alert($message, array $context = array());
/**
* Critical conditions.
*
* Example: Application component unavailable, unexpected exception.
*
* @param string $message
* @param array $context
*/
public function critical($message, array $context = array());
/**
* Runtime errors that do not require immediate action but should typically
* be logged and monitored.
*
* @param string $message
* @param array $context
*/
public function error($message, array $context = array());
/**
* Exceptional occurrences that are not errors.
*
* Example: Use of deprecated APIs, poor use of an API, undesirable things
* that are not necessarily wrong.
*
* @param string $message
* @param array $context
*/
public function warning($message, array $context = array());
/**
* Normal but significant events.
*
* @param string $message
* @param array $context
*/
public function notice($message, array $context = array());
/**
* Interesting events.
*
* Example: User logs in, SQL logs.
*
* @param string $message
* @param array $context
*/
public function info($message, array $context = array());
/**
* Detailed debug information.
*
* @param string $message
* @param array $context
*/
public function debug($message, array $context = array());
/**
* Logs with an arbitrary level.
*
* @param mixed $level
* @param string $message
* @param array $context
*/
public function log($level, $message, array $context = array());
}

View file

@ -0,0 +1,121 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* This is a simple Logger implementation that other Loggers can inherit from.
*
* This is identical to the Psr\Log\AbstractLogger.
*
* It simply delegates all log-level-specific methods to the `log` method to
* reduce boilerplate code that a simple Logger that does the same thing with
* messages regardless of the error level has to implement.
*/
abstract class Mustache_Logger_AbstractLogger implements Mustache_Logger
{
/**
* System is unusable.
*
* @param string $message
* @param array $context
*/
public function emergency($message, array $context = array())
{
$this->log(Mustache_Logger::EMERGENCY, $message, $context);
}
/**
* Action must be taken immediately.
*
* Example: Entire website down, database unavailable, etc. This should
* trigger the SMS alerts and wake you up.
*
* @param string $message
* @param array $context
*/
public function alert($message, array $context = array())
{
$this->log(Mustache_Logger::ALERT, $message, $context);
}
/**
* Critical conditions.
*
* Example: Application component unavailable, unexpected exception.
*
* @param string $message
* @param array $context
*/
public function critical($message, array $context = array())
{
$this->log(Mustache_Logger::CRITICAL, $message, $context);
}
/**
* Runtime errors that do not require immediate action but should typically
* be logged and monitored.
*
* @param string $message
* @param array $context
*/
public function error($message, array $context = array())
{
$this->log(Mustache_Logger::ERROR, $message, $context);
}
/**
* Exceptional occurrences that are not errors.
*
* Example: Use of deprecated APIs, poor use of an API, undesirable things
* that are not necessarily wrong.
*
* @param string $message
* @param array $context
*/
public function warning($message, array $context = array())
{
$this->log(Mustache_Logger::WARNING, $message, $context);
}
/**
* Normal but significant events.
*
* @param string $message
* @param array $context
*/
public function notice($message, array $context = array())
{
$this->log(Mustache_Logger::NOTICE, $message, $context);
}
/**
* Interesting events.
*
* Example: User logs in, SQL logs.
*
* @param string $message
* @param array $context
*/
public function info($message, array $context = array())
{
$this->log(Mustache_Logger::INFO, $message, $context);
}
/**
* Detailed debug information.
*
* @param string $message
* @param array $context
*/
public function debug($message, array $context = array())
{
$this->log(Mustache_Logger::DEBUG, $message, $context);
}
}

View file

@ -0,0 +1,194 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A Mustache Stream Logger.
*
* The Stream Logger wraps a file resource instance (such as a stream) or a
* stream URL. All log messages over the threshold level will be appended to
* this stream.
*
* Hint: Try `php://stderr` for your stream URL.
*/
class Mustache_Logger_StreamLogger extends Mustache_Logger_AbstractLogger
{
protected static $levels = array(
self::DEBUG => 100,
self::INFO => 200,
self::NOTICE => 250,
self::WARNING => 300,
self::ERROR => 400,
self::CRITICAL => 500,
self::ALERT => 550,
self::EMERGENCY => 600,
);
protected $level;
protected $stream = null;
protected $url = null;
/**
* @throws InvalidArgumentException if the logging level is unknown
*
* @param resource|string $stream Resource instance or URL
* @param int $level The minimum logging level at which this handler will be triggered
*/
public function __construct($stream, $level = Mustache_Logger::ERROR)
{
$this->setLevel($level);
if (is_resource($stream)) {
$this->stream = $stream;
} else {
$this->url = $stream;
}
}
/**
* Close stream resources.
*/
public function __destruct()
{
if (is_resource($this->stream)) {
fclose($this->stream);
}
}
/**
* Set the minimum logging level.
*
* @throws Mustache_Exception_InvalidArgumentException if the logging level is unknown
*
* @param int $level The minimum logging level which will be written
*/
public function setLevel($level)
{
if (!array_key_exists($level, self::$levels)) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unexpected logging level: %s', $level));
}
$this->level = $level;
}
/**
* Get the current minimum logging level.
*
* @return int
*/
public function getLevel()
{
return $this->level;
}
/**
* Logs with an arbitrary level.
*
* @throws Mustache_Exception_InvalidArgumentException if the logging level is unknown
*
* @param mixed $level
* @param string $message
* @param array $context
*/
public function log($level, $message, array $context = array())
{
if (!array_key_exists($level, self::$levels)) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Unexpected logging level: %s', $level));
}
if (self::$levels[$level] >= self::$levels[$this->level]) {
$this->writeLog($level, $message, $context);
}
}
/**
* Write a record to the log.
*
* @throws Mustache_Exception_LogicException If neither a stream resource nor url is present
* @throws Mustache_Exception_RuntimeException If the stream url cannot be opened
*
* @param int $level The logging level
* @param string $message The log message
* @param array $context The log context
*/
protected function writeLog($level, $message, array $context = array())
{
if (!is_resource($this->stream)) {
if (!isset($this->url)) {
throw new Mustache_Exception_LogicException('Missing stream url, the stream can not be opened. This may be caused by a premature call to close().');
}
$this->stream = fopen($this->url, 'a');
if (!is_resource($this->stream)) {
// @codeCoverageIgnoreStart
throw new Mustache_Exception_RuntimeException(sprintf('The stream or file "%s" could not be opened.', $this->url));
// @codeCoverageIgnoreEnd
}
}
fwrite($this->stream, self::formatLine($level, $message, $context));
}
/**
* Gets the name of the logging level.
*
* @throws InvalidArgumentException if the logging level is unknown
*
* @param int $level
*
* @return string
*/
protected static function getLevelName($level)
{
return strtoupper($level);
}
/**
* Format a log line for output.
*
* @param int $level The logging level
* @param string $message The log message
* @param array $context The log context
*
* @return string
*/
protected static function formatLine($level, $message, array $context = array())
{
return sprintf(
"%s: %s\n",
self::getLevelName($level),
self::interpolateMessage($message, $context)
);
}
/**
* Interpolate context values into the message placeholders.
*
* @param string $message
* @param array $context
*
* @return string
*/
protected static function interpolateMessage($message, array $context = array())
{
if (strpos($message, '{') === false) {
return $message;
}
// build a replacement array with braces around the context keys
$replace = array();
foreach ($context as $key => $val) {
$replace['{' . $key . '}'] = $val;
}
// interpolate replacement values into the the message and return
return strtr($message, $replace);
}
}

View file

@ -0,0 +1,317 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Parser class.
*
* This class is responsible for turning a set of Mustache tokens into a parse tree.
*/
class Mustache_Parser
{
private $lineNum;
private $lineTokens;
private $pragmas;
private $defaultPragmas = array();
private $pragmaFilters;
private $pragmaBlocks;
/**
* Process an array of Mustache tokens and convert them into a parse tree.
*
* @param array $tokens Set of Mustache tokens
*
* @return array Mustache token parse tree
*/
public function parse(array $tokens = array())
{
$this->lineNum = -1;
$this->lineTokens = 0;
$this->pragmas = $this->defaultPragmas;
$this->pragmaFilters = isset($this->pragmas[Mustache_Engine::PRAGMA_FILTERS]);
$this->pragmaBlocks = isset($this->pragmas[Mustache_Engine::PRAGMA_BLOCKS]);
return $this->buildTree($tokens);
}
/**
* Enable pragmas across all templates, regardless of the presence of pragma
* tags in the individual templates.
*
* @internal Users should set global pragmas in Mustache_Engine, not here :)
*
* @param string[] $pragmas
*/
public function setPragmas(array $pragmas)
{
$this->pragmas = array();
foreach ($pragmas as $pragma) {
$this->enablePragma($pragma);
}
$this->defaultPragmas = $this->pragmas;
}
/**
* Helper method for recursively building a parse tree.
*
* @throws Mustache_Exception_SyntaxException when nesting errors or mismatched section tags are encountered
*
* @param array &$tokens Set of Mustache tokens
* @param array $parent Parent token (default: null)
*
* @return array Mustache Token parse tree
*/
private function buildTree(array &$tokens, array $parent = null)
{
$nodes = array();
while (!empty($tokens)) {
$token = array_shift($tokens);
if ($token[Mustache_Tokenizer::LINE] === $this->lineNum) {
$this->lineTokens++;
} else {
$this->lineNum = $token[Mustache_Tokenizer::LINE];
$this->lineTokens = 0;
}
if ($this->pragmaFilters && isset($token[Mustache_Tokenizer::NAME])) {
list($name, $filters) = $this->getNameAndFilters($token[Mustache_Tokenizer::NAME]);
if (!empty($filters)) {
$token[Mustache_Tokenizer::NAME] = $name;
$token[Mustache_Tokenizer::FILTERS] = $filters;
}
}
switch ($token[Mustache_Tokenizer::TYPE]) {
case Mustache_Tokenizer::T_DELIM_CHANGE:
$this->checkIfTokenIsAllowedInParent($parent, $token);
$this->clearStandaloneLines($nodes, $tokens);
break;
case Mustache_Tokenizer::T_SECTION:
case Mustache_Tokenizer::T_INVERTED:
$this->checkIfTokenIsAllowedInParent($parent, $token);
$this->clearStandaloneLines($nodes, $tokens);
$nodes[] = $this->buildTree($tokens, $token);
break;
case Mustache_Tokenizer::T_END_SECTION:
if (!isset($parent)) {
$msg = sprintf(
'Unexpected closing tag: /%s on line %d',
$token[Mustache_Tokenizer::NAME],
$token[Mustache_Tokenizer::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $token);
}
if ($token[Mustache_Tokenizer::NAME] !== $parent[Mustache_Tokenizer::NAME]) {
$msg = sprintf(
'Nesting error: %s (on line %d) vs. %s (on line %d)',
$parent[Mustache_Tokenizer::NAME],
$parent[Mustache_Tokenizer::LINE],
$token[Mustache_Tokenizer::NAME],
$token[Mustache_Tokenizer::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $token);
}
$this->clearStandaloneLines($nodes, $tokens);
$parent[Mustache_Tokenizer::END] = $token[Mustache_Tokenizer::INDEX];
$parent[Mustache_Tokenizer::NODES] = $nodes;
return $parent;
case Mustache_Tokenizer::T_PARTIAL:
$this->checkIfTokenIsAllowedInParent($parent, $token);
//store the whitespace prefix for laters!
if ($indent = $this->clearStandaloneLines($nodes, $tokens)) {
$token[Mustache_Tokenizer::INDENT] = $indent[Mustache_Tokenizer::VALUE];
}
$nodes[] = $token;
break;
case Mustache_Tokenizer::T_PARENT:
$this->checkIfTokenIsAllowedInParent($parent, $token);
$nodes[] = $this->buildTree($tokens, $token);
break;
case Mustache_Tokenizer::T_BLOCK_VAR:
if ($this->pragmaBlocks) {
// BLOCKS pragma is enabled, let's do this!
if (isset($parent) && $parent[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_PARENT) {
$token[Mustache_Tokenizer::TYPE] = Mustache_Tokenizer::T_BLOCK_ARG;
}
$this->clearStandaloneLines($nodes, $tokens);
$nodes[] = $this->buildTree($tokens, $token);
} else {
// pretend this was just a normal "escaped" token...
$token[Mustache_Tokenizer::TYPE] = Mustache_Tokenizer::T_ESCAPED;
// TODO: figure out how to figure out if there was a space after this dollar:
$token[Mustache_Tokenizer::NAME] = '$' . $token[Mustache_Tokenizer::NAME];
$nodes[] = $token;
}
break;
case Mustache_Tokenizer::T_PRAGMA:
$this->enablePragma($token[Mustache_Tokenizer::NAME]);
// no break
case Mustache_Tokenizer::T_COMMENT:
$this->clearStandaloneLines($nodes, $tokens);
$nodes[] = $token;
break;
default:
$nodes[] = $token;
break;
}
}
if (isset($parent)) {
$msg = sprintf(
'Missing closing tag: %s opened on line %d',
$parent[Mustache_Tokenizer::NAME],
$parent[Mustache_Tokenizer::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $parent);
}
return $nodes;
}
/**
* Clear standalone line tokens.
*
* Returns a whitespace token for indenting partials, if applicable.
*
* @param array $nodes Parsed nodes
* @param array $tokens Tokens to be parsed
*
* @return array|null Resulting indent token, if any
*/
private function clearStandaloneLines(array &$nodes, array &$tokens)
{
if ($this->lineTokens > 1) {
// this is the third or later node on this line, so it can't be standalone
return;
}
$prev = null;
if ($this->lineTokens === 1) {
// this is the second node on this line, so it can't be standalone
// unless the previous node is whitespace.
if ($prev = end($nodes)) {
if (!$this->tokenIsWhitespace($prev)) {
return;
}
}
}
if ($next = reset($tokens)) {
// If we're on a new line, bail.
if ($next[Mustache_Tokenizer::LINE] !== $this->lineNum) {
return;
}
// If the next token isn't whitespace, bail.
if (!$this->tokenIsWhitespace($next)) {
return;
}
if (count($tokens) !== 1) {
// Unless it's the last token in the template, the next token
// must end in newline for this to be standalone.
if (substr($next[Mustache_Tokenizer::VALUE], -1) !== "\n") {
return;
}
}
// Discard the whitespace suffix
array_shift($tokens);
}
if ($prev) {
// Return the whitespace prefix, if any
return array_pop($nodes);
}
}
/**
* Check whether token is a whitespace token.
*
* True if token type is T_TEXT and value is all whitespace characters.
*
* @param array $token
*
* @return bool True if token is a whitespace token
*/
private function tokenIsWhitespace(array $token)
{
if ($token[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_TEXT) {
return preg_match('/^\s*$/', $token[Mustache_Tokenizer::VALUE]);
}
return false;
}
/**
* Check whether a token is allowed inside a parent tag.
*
* @throws Mustache_Exception_SyntaxException if an invalid token is found inside a parent tag
*
* @param array|null $parent
* @param array $token
*/
private function checkIfTokenIsAllowedInParent($parent, array $token)
{
if (isset($parent) && $parent[Mustache_Tokenizer::TYPE] === Mustache_Tokenizer::T_PARENT) {
throw new Mustache_Exception_SyntaxException('Illegal content in < parent tag', $token);
}
}
/**
* Split a tag name into name and filters.
*
* @param string $name
*
* @return array [Tag name, Array of filters]
*/
private function getNameAndFilters($name)
{
$filters = array_map('trim', explode('|', $name));
$name = array_shift($filters);
return array($name, $filters);
}
/**
* Enable a pragma.
*
* @param string $name
*/
private function enablePragma($name)
{
$this->pragmas[$name] = true;
switch ($name) {
case Mustache_Engine::PRAGMA_BLOCKS:
$this->pragmaBlocks = true;
break;
case Mustache_Engine::PRAGMA_FILTERS:
$this->pragmaFilters = true;
break;
}
}
}

View file

@ -0,0 +1,40 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache template Source interface.
*/
interface Mustache_Source
{
/**
* Get the Source key (used to generate the compiled class name).
*
* This must return a distinct key for each template source. For example, an
* MD5 hash of the template contents would probably do the trick. The
* ProductionFilesystemLoader uses mtime and file path. If your production
* source directory is under version control, you could use the current Git
* rev and the file path...
*
* @throws RuntimeException when a source file cannot be read
*
* @return string
*/
public function getKey();
/**
* Get the template Source.
*
* @throws RuntimeException when a source file cannot be read
*
* @return string
*/
public function getSource();
}

View file

@ -0,0 +1,77 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache template Filesystem Source.
*
* This template Source uses stat() to generate the Source key, so that using
* pre-compiled templates doesn't require hitting the disk to read the source.
* It is more suitable for production use, and is used by default in the
* ProductionFilesystemLoader.
*/
class Mustache_Source_FilesystemSource implements Mustache_Source
{
private $fileName;
private $statProps;
private $stat;
/**
* Filesystem Source constructor.
*
* @param string $fileName
* @param array $statProps
*/
public function __construct($fileName, array $statProps)
{
$this->fileName = $fileName;
$this->statProps = $statProps;
}
/**
* Get the Source key (used to generate the compiled class name).
*
* @throws Mustache_Exception_RuntimeException when a source file cannot be read
*
* @return string
*/
public function getKey()
{
$chunks = array(
'fileName' => $this->fileName,
);
if (!empty($this->statProps)) {
if (!isset($this->stat)) {
$this->stat = @stat($this->fileName);
}
if ($this->stat === false) {
throw new Mustache_Exception_RuntimeException(sprintf('Failed to read source file "%s".', $this->fileName));
}
foreach ($this->statProps as $prop) {
$chunks[$prop] = $this->stat[$prop];
}
}
return json_encode($chunks);
}
/**
* Get the template Source.
*
* @return string
*/
public function getSource()
{
return file_get_contents($this->fileName);
}
}

View file

@ -0,0 +1,180 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Abstract Mustache Template class.
*
* @abstract
*/
abstract class Mustache_Template
{
/**
* @var Mustache_Engine
*/
protected $mustache;
/**
* @var bool
*/
protected $strictCallables = false;
/**
* Mustache Template constructor.
*
* @param Mustache_Engine $mustache
*/
public function __construct(Mustache_Engine $mustache)
{
$this->mustache = $mustache;
}
/**
* Mustache Template instances can be treated as a function and rendered by simply calling them.
*
* $m = new Mustache_Engine;
* $tpl = $m->loadTemplate('Hello, {{ name }}!');
* echo $tpl(array('name' => 'World')); // "Hello, World!"
*
* @see Mustache_Template::render
*
* @param mixed $context Array or object rendering context (default: array())
*
* @return string Rendered template
*/
public function __invoke($context = array())
{
return $this->render($context);
}
/**
* Render this template given the rendering context.
*
* @param mixed $context Array or object rendering context (default: array())
*
* @return string Rendered template
*/
public function render($context = array())
{
return $this->renderInternal(
$this->prepareContextStack($context)
);
}
/**
* Internal rendering method implemented by Mustache Template concrete subclasses.
*
* This is where the magic happens :)
*
* NOTE: This method is not part of the Mustache.php public API.
*
* @param Mustache_Context $context
* @param string $indent (default: '')
*
* @return string Rendered template
*/
abstract public function renderInternal(Mustache_Context $context, $indent = '');
/**
* Tests whether a value should be iterated over (e.g. in a section context).
*
* In most languages there are two distinct array types: list and hash (or whatever you want to call them). Lists
* should be iterated, hashes should be treated as objects. Mustache follows this paradigm for Ruby, Javascript,
* Java, Python, etc.
*
* PHP, however, treats lists and hashes as one primitive type: array. So Mustache.php needs a way to distinguish
* between between a list of things (numeric, normalized array) and a set of variables to be used as section context
* (associative array). In other words, this will be iterated over:
*
* $items = array(
* array('name' => 'foo'),
* array('name' => 'bar'),
* array('name' => 'baz'),
* );
*
* ... but this will be used as a section context block:
*
* $items = array(
* 1 => array('name' => 'foo'),
* 'banana' => array('name' => 'bar'),
* 42 => array('name' => 'baz'),
* );
*
* @param mixed $value
*
* @return bool True if the value is 'iterable'
*/
protected function isIterable($value)
{
switch (gettype($value)) {
case 'object':
return $value instanceof Traversable;
case 'array':
$i = 0;
foreach ($value as $k => $v) {
if ($k !== $i++) {
return false;
}
}
return true;
default:
return false;
}
}
/**
* Helper method to prepare the Context stack.
*
* Adds the Mustache HelperCollection to the stack's top context frame if helpers are present.
*
* @param mixed $context Optional first context frame (default: null)
*
* @return Mustache_Context
*/
protected function prepareContextStack($context = null)
{
$stack = new Mustache_Context();
$helpers = $this->mustache->getHelpers();
if (!$helpers->isEmpty()) {
$stack->push($helpers);
}
if (!empty($context)) {
$stack->push($context);
}
return $stack;
}
/**
* Resolve a context value.
*
* Invoke the value if it is callable, otherwise return the value.
*
* @param mixed $value
* @param Mustache_Context $context
*
* @return string
*/
protected function resolveValue($value, Mustache_Context $context)
{
if (($this->strictCallables ? is_object($value) : !is_string($value)) && is_callable($value)) {
return $this->mustache
->loadLambda((string) call_user_func($value))
->renderInternal($context);
}
return $value;
}
}

View file

@ -0,0 +1,378 @@
<?php
/*
* This file is part of Mustache.php.
*
* (c) 2010-2017 Justin Hileman
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Mustache Tokenizer class.
*
* This class is responsible for turning raw template source into a set of Mustache tokens.
*/
class Mustache_Tokenizer
{
// Finite state machine states
const IN_TEXT = 0;
const IN_TAG_TYPE = 1;
const IN_TAG = 2;
// Token types
const T_SECTION = '#';
const T_INVERTED = '^';
const T_END_SECTION = '/';
const T_COMMENT = '!';
const T_PARTIAL = '>';
const T_PARENT = '<';
const T_DELIM_CHANGE = '=';
const T_ESCAPED = '_v';
const T_UNESCAPED = '{';
const T_UNESCAPED_2 = '&';
const T_TEXT = '_t';
const T_PRAGMA = '%';
const T_BLOCK_VAR = '$';
const T_BLOCK_ARG = '$arg';
// Valid token types
private static $tagTypes = array(
self::T_SECTION => true,
self::T_INVERTED => true,
self::T_END_SECTION => true,
self::T_COMMENT => true,
self::T_PARTIAL => true,
self::T_PARENT => true,
self::T_DELIM_CHANGE => true,
self::T_ESCAPED => true,
self::T_UNESCAPED => true,
self::T_UNESCAPED_2 => true,
self::T_PRAGMA => true,
self::T_BLOCK_VAR => true,
);
// Token properties
const TYPE = 'type';
const NAME = 'name';
const OTAG = 'otag';
const CTAG = 'ctag';
const LINE = 'line';
const INDEX = 'index';
const END = 'end';
const INDENT = 'indent';
const NODES = 'nodes';
const VALUE = 'value';
const FILTERS = 'filters';
private $state;
private $tagType;
private $buffer;
private $tokens;
private $seenTag;
private $line;
private $otag;
private $otagChar;
private $otagLen;
private $ctag;
private $ctagChar;
private $ctagLen;
/**
* Scan and tokenize template source.
*
* @throws Mustache_Exception_SyntaxException when mismatched section tags are encountered
* @throws Mustache_Exception_InvalidArgumentException when $delimiters string is invalid
*
* @param string $text Mustache template source to tokenize
* @param string $delimiters Optionally, pass initial opening and closing delimiters (default: empty string)
*
* @return array Set of Mustache tokens
*/
public function scan($text, $delimiters = '')
{
// Setting mbstring.func_overload makes things *really* slow.
// Let's do everyone a favor and scan this string as ASCII instead.
//
// The INI directive was removed in PHP 8.0 so we don't need to check there (and can drop it
// when we remove support for older versions of PHP).
//
// @codeCoverageIgnoreStart
$encoding = null;
if (version_compare(PHP_VERSION, '8.0.0', '<')) {
if (function_exists('mb_internal_encoding') && ini_get('mbstring.func_overload') & 2) {
$encoding = mb_internal_encoding();
mb_internal_encoding('ASCII');
}
}
// @codeCoverageIgnoreEnd
$this->reset();
if (is_string($delimiters) && $delimiters = trim($delimiters)) {
$this->setDelimiters($delimiters);
}
$len = strlen($text);
for ($i = 0; $i < $len; $i++) {
switch ($this->state) {
case self::IN_TEXT:
$char = $text[$i];
// Test whether it's time to change tags.
if ($char === $this->otagChar && substr($text, $i, $this->otagLen) === $this->otag) {
$i--;
$this->flushBuffer();
$this->state = self::IN_TAG_TYPE;
} else {
$this->buffer .= $char;
if ($char === "\n") {
$this->flushBuffer();
$this->line++;
}
}
break;
case self::IN_TAG_TYPE:
$i += $this->otagLen - 1;
$char = $text[$i + 1];
if (isset(self::$tagTypes[$char])) {
$tag = $char;
$this->tagType = $tag;
} else {
$tag = null;
$this->tagType = self::T_ESCAPED;
}
if ($this->tagType === self::T_DELIM_CHANGE) {
$i = $this->changeDelimiters($text, $i);
$this->state = self::IN_TEXT;
} elseif ($this->tagType === self::T_PRAGMA) {
$i = $this->addPragma($text, $i);
$this->state = self::IN_TEXT;
} else {
if ($tag !== null) {
$i++;
}
$this->state = self::IN_TAG;
}
$this->seenTag = $i;
break;
default:
$char = $text[$i];
// Test whether it's time to change tags.
if ($char === $this->ctagChar && substr($text, $i, $this->ctagLen) === $this->ctag) {
$token = array(
self::TYPE => $this->tagType,
self::NAME => trim($this->buffer),
self::OTAG => $this->otag,
self::CTAG => $this->ctag,
self::LINE => $this->line,
self::INDEX => ($this->tagType === self::T_END_SECTION) ? $this->seenTag - $this->otagLen : $i + $this->ctagLen,
);
if ($this->tagType === self::T_UNESCAPED) {
// Clean up `{{{ tripleStache }}}` style tokens.
if ($this->ctag === '}}') {
if (($i + 2 < $len) && $text[$i + 2] === '}') {
$i++;
} else {
$msg = sprintf(
'Mismatched tag delimiters: %s on line %d',
$token[self::NAME],
$token[self::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $token);
}
} else {
$lastName = $token[self::NAME];
if (substr($lastName, -1) === '}') {
$token[self::NAME] = trim(substr($lastName, 0, -1));
} else {
$msg = sprintf(
'Mismatched tag delimiters: %s on line %d',
$token[self::NAME],
$token[self::LINE]
);
throw new Mustache_Exception_SyntaxException($msg, $token);
}
}
}
$this->buffer = '';
$i += $this->ctagLen - 1;
$this->state = self::IN_TEXT;
$this->tokens[] = $token;
} else {
$this->buffer .= $char;
}
break;
}
}
if ($this->state !== self::IN_TEXT) {
$this->throwUnclosedTagException();
}
$this->flushBuffer();
// Restore the user's encoding...
// @codeCoverageIgnoreStart
if ($encoding) {
mb_internal_encoding($encoding);
}
// @codeCoverageIgnoreEnd
return $this->tokens;
}
/**
* Helper function to reset tokenizer internal state.
*/
private function reset()
{
$this->state = self::IN_TEXT;
$this->tagType = null;
$this->buffer = '';
$this->tokens = array();
$this->seenTag = false;
$this->line = 0;
$this->otag = '{{';
$this->otagChar = '{';
$this->otagLen = 2;
$this->ctag = '}}';
$this->ctagChar = '}';
$this->ctagLen = 2;
}
/**
* Flush the current buffer to a token.
*/
private function flushBuffer()
{
if (strlen($this->buffer) > 0) {
$this->tokens[] = array(
self::TYPE => self::T_TEXT,
self::LINE => $this->line,
self::VALUE => $this->buffer,
);
$this->buffer = '';
}
}
/**
* Change the current Mustache delimiters. Set new `otag` and `ctag` values.
*
* @throws Mustache_Exception_SyntaxException when delimiter string is invalid
*
* @param string $text Mustache template source
* @param int $index Current tokenizer index
*
* @return int New index value
*/
private function changeDelimiters($text, $index)
{
$startIndex = strpos($text, '=', $index) + 1;
$close = '=' . $this->ctag;
$closeIndex = strpos($text, $close, $index);
if ($closeIndex === false) {
$this->throwUnclosedTagException();
}
$token = array(
self::TYPE => self::T_DELIM_CHANGE,
self::LINE => $this->line,
);
try {
$this->setDelimiters(trim(substr($text, $startIndex, $closeIndex - $startIndex)));
} catch (Mustache_Exception_InvalidArgumentException $e) {
throw new Mustache_Exception_SyntaxException($e->getMessage(), $token);
}
$this->tokens[] = $token;
return $closeIndex + strlen($close) - 1;
}
/**
* Set the current Mustache `otag` and `ctag` delimiters.
*
* @throws Mustache_Exception_InvalidArgumentException when delimiter string is invalid
*
* @param string $delimiters
*/
private function setDelimiters($delimiters)
{
if (!preg_match('/^\s*(\S+)\s+(\S+)\s*$/', $delimiters, $matches)) {
throw new Mustache_Exception_InvalidArgumentException(sprintf('Invalid delimiters: %s', $delimiters));
}
list($_, $otag, $ctag) = $matches;
$this->otag = $otag;
$this->otagChar = $otag[0];
$this->otagLen = strlen($otag);
$this->ctag = $ctag;
$this->ctagChar = $ctag[0];
$this->ctagLen = strlen($ctag);
}
/**
* Add pragma token.
*
* Pragmas are hoisted to the front of the template, so all pragma tokens
* will appear at the front of the token list.
*
* @param string $text
* @param int $index
*
* @return int New index value
*/
private function addPragma($text, $index)
{
$end = strpos($text, $this->ctag, $index);
if ($end === false) {
$this->throwUnclosedTagException();
}
$pragma = trim(substr($text, $index + 2, $end - $index - 2));
// Pragmas are hoisted to the front of the template.
array_unshift($this->tokens, array(
self::TYPE => self::T_PRAGMA,
self::NAME => $pragma,
self::LINE => 0,
));
return $end + $this->ctagLen - 1;
}
private function throwUnclosedTagException()
{
$name = trim($this->buffer);
if ($name !== '') {
$msg = sprintf('Unclosed tag: %s on line %d', $name, $this->line);
} else {
$msg = sprintf('Unclosed tag on line %d', $this->line);
}
throw new Mustache_Exception_SyntaxException($msg, array(
self::TYPE => $this->tagType,
self::NAME => $name,
self::OTAG => $this->otag,
self::CTAG => $this->ctag,
self::LINE => $this->line,
self::INDEX => $this->seenTag - $this->otagLen,
));
}
}

View file

@ -0,0 +1,31 @@
<?php
use PhpCsFixer\Config;
use PhpCsFixer\Finder;
$finder = Finder::create()
->in([
__DIR__.'/src',
__DIR__.'/tests',
])
->name('*.php')
;
$config = new Config();
$config
->setFinder($finder)
->setRiskyAllowed(true)
->setRules([
'@Symfony' => true,
'@Symfony:risky' => true,
'array_syntax' => ['syntax' => 'short'],
'no_empty_phpdoc' => true,
'no_unused_imports' => true,
'no_superfluous_phpdoc_tags' => true,
'ordered_imports' => true,
'phpdoc_summary' => false,
'protected_to_private' => false,
])
;
return $config;

View file

@ -0,0 +1,165 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

View file

@ -0,0 +1,18 @@
install-dev-tools:
composer update --working-dir=dev-tools
# Workaround to force PHPUnit 7.5.x when running Scrutinizer.
# Scrutinizer fails due to not enough memory when using a newer PHPUnit version (tested with 9.5).
# @see: https://github.com/smalot/pdfparser/issues/410
# @see: https://github.com/smalot/pdfparser/pull/412
prepare-for-scrutinizer:
cd dev-tools && sed -e 's/>=7.5/^7.5/g' composer.json > composer.json2 && rm composer.json && mv composer.json2 composer.json
run-php-cs-fixer:
dev-tools/vendor/bin/php-cs-fixer fix $(ARGS)
run-phpstan:
dev-tools/vendor/bin/phpstan analyze $(ARGS)
run-phpunit:
dev-tools/vendor/bin/phpunit $(ARGS)

View file

@ -0,0 +1,58 @@
# PDF parser
[![Version](https://poser.pugx.org/smalot/pdfparser/v)](//packagist.org/packages/smalot/pdfparser)
![CI](https://github.com/smalot/pdfparser/workflows/CI/badge.svg)
![CS](https://github.com/smalot/pdfparser/workflows/CS/badge.svg)
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/smalot/pdfparser/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/smalot/pdfparser/?branch=master)
[![Downloads](https://poser.pugx.org/smalot/pdfparser/downloads)](//packagist.org/packages/smalot/pdfparser)
The `smalot/pdfparser` is a standalone PHP package that provides various tools to extract data from PDF files.
This library is under **active maintenance**.
There is no active development by the author of this library (at the moment), but we welcome any pull request adding/extending functionality!
## Features
- Load/parse objects and headers
- Extract metadata (author, description, ...)
- Extract text from ordered pages
- Support of compressed PDFs
- Support of MAC OS Roman charset encoding
- Handling of hexa and octal encoding in text sections
- Create custom configurations (see [CustomConfig.md](/doc/CustomConfig.md)).
Currently, secured documents and extracting form data are not supported.
## License
This library is under the [LGPLv3 license](https://github.com/smalot/pdfparser/blob/master/LICENSE.txt).
## Install
This library requires PHP 7.1+ since [v1](https://github.com/smalot/pdfparser/releases/tag/v1.0.0).
You can install it via [Composer](https://getcomposer.org/):
```bash
composer require smalot/pdfparser
```
In case you can't use Composer, you can include `alt_autoload.php-dist`. It will include all required files automatically.
## Quick example
```php
<?php
// Parse PDF file and build necessary objects.
$parser = new \Smalot\PdfParser\Parser();
$pdf = $parser->parseFile('/path/to/document.pdf');
$text = $pdf->getText();
echo $text;
```
Further usage information can be found [here](/doc/Usage.md).
## Documentation
Documentation can be found in the [doc](/doc) folder.

View file

@ -0,0 +1,75 @@
<?php
/**
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
* @date 2021-02-09
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
* --------------------------------------------------------------------------------------
*
* About:
* This file provides an alternative to the Composer-approach.
* Include it into your project and all required files of PDFParser will be loaded automatically.
* Please use it only, if Composer is not available.
*
* How to use:
* 1. include this file as it is OR copy and rename it as you like (and then include it)
* 2. afterwards you can use PDFParser classes
* Done.
*/
/**
* Loads all files found in a given folder.
* Calls itself recursively for all sub folders.
*
* @param string $dir
*/
function requireFilesOfFolder($dir)
{
foreach (new DirectoryIterator($dir) as $fileInfo) {
if (!$fileInfo->isDot()) {
if ($fileInfo->isDir()) {
requireFilesOfFolder($fileInfo->getPathname());
} else {
require_once $fileInfo->getPathname();
}
}
}
}
$rootFolder = __DIR__.'/src/Smalot/PdfParser';
// Manually require files, which can't be loaded automatically that easily.
require_once $rootFolder.'/Element.php';
require_once $rootFolder.'/PDFObject.php';
require_once $rootFolder.'/Font.php';
require_once $rootFolder.'/Page.php';
require_once $rootFolder.'/Element/ElementString.php';
require_once $rootFolder.'/Encoding/AbstractEncoding.php';
/*
* Load the rest of PDFParser files from /src/Smalot/PDFParser
* Dont worry, it wont load files multiple times.
*/
requireFilesOfFolder($rootFolder);

View file

@ -0,0 +1,37 @@
{
"name": "smalot/pdfparser",
"description": "Pdf parser library. Can read and extract information from pdf file.",
"keywords": ["PDF", "text", "parser", "parse", "extract"],
"type": "library",
"license": "LGPL-3.0",
"authors": [
{
"name": "Sebastien MALOT",
"email": "sebastien@malot.fr"
}
],
"support": {
"issues": "https://github.com/smalot/pdfparser/issues"
},
"homepage": "https://www.pdfparser.org",
"require": {
"php": ">=7.1",
"symfony/polyfill-mbstring": "^1.18",
"ext-zlib": "*",
"ext-iconv": "*"
},
"autoload": {
"psr-0": {
"Smalot\\PdfParser\\": "src/"
}
},
"autoload-dev": {
"psr-4": {
"PerformanceTests\\": "tests/Performance/",
"PHPUnitTests\\": "tests/PHPUnit/"
}
},
"config": {
"process-timeout": 1200
}
}

View file

@ -0,0 +1,65 @@
# Configuring the behavior of the parser
To change the behavior of the parser, create a `Config` object and pass it to the parser.
In this case, we're setting the font space limit.
Changing this value can be helpful when `getText()` returns a text with too many spaces.
```php
$config = new \Smalot\PdfParser\Config();
$config->setFontSpaceLimit(-60);
$parser = new \Smalot\PdfParser\Parser([], $config);
$pdf = $parser->parseFile('document.pdf');
// output extracted text
// echo $pdf->getText();
```
## Config options overview
The `Config` class has the following options:
| Option | Type | Default | Description |
|--------------------------|---------|-----------------|------------------------------------------------------------------------------------------------------------------------------------------------------|
| `setDecodeMemoryLimit` | Integer | `0` | If parsing fails because of memory exhaustion, you can set a lower memory limit for decoding operations. |
| `setFontSpaceLimit` | Integer | `-50` | Changing font space limit can be helpful when `Parser::getText()` returns a text with too many spaces. |
| `setHorizontalOffset` | String | ` ` | When words are broken up or when the structure of a table is not preserved, you may get better results when adapting `setHorizontalOffset`. |
| `setPdfWhitespaces` | String | `\0\t\n\f\r ` | |
| `setPdfWhitespacesRegex` | String | `[\0\t\n\f\r ]` | |
| `setRetainImageContent` | Boolean | `true` | If parsing fails because of memory exhaustion, you can set the value to `false`. It wont retain image content anymore, but will use less memory too. |
## option setDecodeMemoryLimit + setRetainImageContent (manage memory usage)
If parsing fails because of memory exhaustion, you can use the following options.
```php
$config = new \Smalot\PdfParser\Config();
// Whether to retain raw image data as content or discard it to save memory
$config->setRetainImageContent(false);
// Memory limit to use when de-compressing files, in bytes
$config->setDecodeMemoryLimit(1000000);
$parser = new \Smalot\PdfParser\Parser([], $config);
```
## option setHorizontalOffset
When words are broken up or when the structure of a table is not preserved, you can use `setHorizontalOffset`.
```php
$config = new \Smalot\PdfParser\Config();
// An empty string can prevent words from breaking up
$config->setHorizontalOffset('');
// A tab can help preserve the structure of your document
$config->setHorizontalOffset("\t");
$parser = new \Smalot\PdfParser\Parser([], $config);
```
## option setFontSpaceLimit
Changing font space limit can be helpful when `getText()` returns a text with too many spaces.
```php
$config = new \Smalot\PdfParser\Config();
$config->setFontSpaceLimit(-60);
$parser = new \Smalot\PdfParser\Parser([], $config);
$pdf = $parser->parseFile('document.pdf');
```

View file

@ -0,0 +1,57 @@
# Developers
Here you will find information about our development tools and how to use them.
## .editorconfig
Please make sure your editor uses our `.editorconfig` file. It contains rules about our coding styles.
## GitHub Action Workflows
We use GitHub Actions to run our continuous integration as well as other tasks after pushing changes.
You will find related files in `.github/workflows/`.
## Development Tools and Tests
Our test related files are located in `tests` folder.
Tests are written using PHPUnit.
To install (and update) development tools like PHPUnit or PHP-CS-Fixer run:
```bash
make install-dev-tools
```
Development tools are getting installed in `dev-tools/vendor`.
Please check `dev-tools/composer.json` for more information about versions etc.
To run a tool manually, you use `dev-tools/vendor/bin`, for instance:
```bash
dev-tools/vendor/bin/php-cs-fixer fix --verbose --dry-run
```
Below are a few shortcuts to improve your developer experience.
### PHPUnit
To run all tests run:
```bash
make run-phpunit
```
### PHP-CS-Fixer
To check coding styles, run:
```bash
make run-php-cs-fixer
```
### PHPStan
To run a static code analysis, use:
```bash
make run-phpstan
```

View file

@ -0,0 +1,173 @@
# Usage
First create a parser object and point it to a file.
```php
$parser = new \Smalot\PdfParser\Parser();
$pdf = $parser->parseFile('document.pdf');
// .. or ...
$pdf = $parser->parseContent(file_get_contents('document.pdf'))
```
## Extract text
A common scenario is to extract text.
```php
// extract text of the whole PDF
$text = $pdf->getText();
// or extract the text of a specific page (in this case the first page)
$text = $pdf->getPages()[0]->getText();
// you can also extract text of a limited amount of pages. here, it will only use the first five pages.
$text = $pdf->getText(5);
```
## Extract text positions
You can extract transformation matrix (indexes 0-3) and x,y position of text objects (indexes 4,5).
```php
$data = $pdf->getPages()[0]->getDataTm();
Array
(
[0] => Array
(
[0] => Array
(
[0] => 0.999429
[1] => 0
[2] => 0
[3] => 1
[4] => 201.96
[5] => 720.68
)
[1] => Document title
)
[1] => Array
(
[0] => Array
(
[0] => 0.999402
[1] => 0
[2] => 0
[3] => 1
[4] => 70.8
[5] => 673.64
)
[1] => Calibri : Lorem ipsum dolor sit amet, consectetur a
)
)
```
When activated via Config setting (`Config::setDataTmFontInfoHasToBeIncluded(true)`) font identifier (index 2) and font size (index 3) are added to dataTm.
```php
// create config
$config = new Smalot\PdfParser\Config();
$config->setDataTmFontInfoHasToBeIncluded(true);
// use config and parse file
$parser = new Smalot\PdfParser\Parser([], $config);
$pdf = $parser->parseFile('document.pdf');
$data = $pdf->getPages()[0]->getDataTm();
Array
(
[0] => Array
(
[0] => Array
(
[0] => 0.999429
[1] => 0
[2] => 0
[3] => 1
[4] => 201.96
[5] => 720.68
)
[1] => Document title
[2] => R7
[3] => 27.96
)
[1] => Array
(
[0] => Array
(
[0] => 0.999402
[1] => 0
[2] => 0
[3] => 1
[4] => 70.8
[5] => 673.64
)
[1] => Calibri : Lorem ipsum dolor sit amet, consectetur a
[2] => R9
[3] => 11.04
)
)
```
Text width should be calculated on text from dataTm to make sure all character widths are available.
In next example we are using data from above.
```php
$fonts = $pdf->getFonts();
$font_id = $data[0][2]; //R7
$font = $fonts[$font_id];
$text = $data[0][1];
$width = $font->calculateTextWidth($text, $missing);
```
## Extract metadata
You can also extract metadata. The available data varies from PDF to PDF.
```php
$metaData = $pdf->getDetails();
Array
(
[Producer] => Adobe Acrobat
[CreatedOn] => 2022-01-28T16:36:11+00:00
[Pages] => 35
)
```
## Read Base64 encoded PDFs
If working with [Base64](https://en.wikipedia.org/wiki/Base64) encoded PDFs, you might want to parse the PDF without saving the file to disk.
This sample will parse the Base64 encoded PDF and extract text from each page.
```php
<?php
// Parse Base64 encoded PDF string and build necessary objects.
$parser = new \Smalot\PdfParser\Parser();
$pdf = $parser->parseContent(base64_decode($base64PDF));
$text = $pdf->getText();
echo $text;
```
## Calculate text width
Try to calculate text width for given font.
Characters without width are added to `$missing` array in second parameter.
```php
$parser = new \Smalot\PdfParser\Parser();
$pdf = $parser->parseFile('document.pdf');
// get first font (we assume here there is at least one)
$font = reset($pdf->getFonts());
// get width
$width = $font->calculateTextWidth('Some text', $missing);
```

View file

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- https://phpunit.de/manual/current/en/appendixes.configuration.html -->
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd" backupGlobals="false" bootstrap="vendor\autoload.php" colors="true" processIsolation="false" stopOnFailure="false" cacheDirectory=".phpunit.cache" backupStaticProperties="false">
<coverage>
<include>
<directory>src</directory>
</include>
</coverage>
<php>
<ini name="error_reporting" value="-1"/>
<ini name="zend.enable_gc" value="0"/>
<ini name="error_reporting" value="-1"/>
<ini name="intl.error_level" value="0"/>
<ini name="display_errors" value="On"/>
</php>
<testsuites>
<testsuite name="all">
<directory>tests/PHPUnit</directory>
</testsuite>
</testsuites>
</phpunit>

View file

@ -0,0 +1,154 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Konrad Abicht <hi@inspirito.de>
*
* @date 2020-11-22
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
/**
* This class contains configurations used in various classes. You can override them
* manually, in case default values aren't working.
*
* @see https://github.com/smalot/pdfparser/issues/305
*/
class Config
{
private $fontSpaceLimit = -50;
/**
* @var string
*/
private $horizontalOffset = ' ';
/**
* Represents: (NUL, HT, LF, FF, CR, SP)
*
* @var string
*/
private $pdfWhitespaces = "\0\t\n\f\r ";
/**
* Represents: (NUL, HT, LF, FF, CR, SP)
*
* @var string
*/
private $pdfWhitespacesRegex = '[\0\t\n\f\r ]';
/**
* Whether to retain raw image data as content or discard it to save memory
*
* @var bool
*/
private $retainImageContent = true;
/**
* Memory limit to use when de-compressing files, in bytes.
*
* @var int
*/
private $decodeMemoryLimit = 0;
/**
* Whether to include font id and size in dataTm array
*
* @var bool
*/
private $dataTmFontInfoHasToBeIncluded = false;
public function getFontSpaceLimit()
{
return $this->fontSpaceLimit;
}
public function setFontSpaceLimit($value)
{
$this->fontSpaceLimit = $value;
}
public function getHorizontalOffset(): string
{
return $this->horizontalOffset;
}
public function setHorizontalOffset($value): void
{
$this->horizontalOffset = $value;
}
public function getPdfWhitespaces(): string
{
return $this->pdfWhitespaces;
}
public function setPdfWhitespaces(string $pdfWhitespaces): void
{
$this->pdfWhitespaces = $pdfWhitespaces;
}
public function getPdfWhitespacesRegex(): string
{
return $this->pdfWhitespacesRegex;
}
public function setPdfWhitespacesRegex(string $pdfWhitespacesRegex): void
{
$this->pdfWhitespacesRegex = $pdfWhitespacesRegex;
}
public function getRetainImageContent(): bool
{
return $this->retainImageContent;
}
public function setRetainImageContent(bool $retainImageContent): void
{
$this->retainImageContent = $retainImageContent;
}
public function getDecodeMemoryLimit(): int
{
return $this->decodeMemoryLimit;
}
public function setDecodeMemoryLimit(int $decodeMemoryLimit): void
{
$this->decodeMemoryLimit = $decodeMemoryLimit;
}
public function getDataTmFontInfoHasToBeIncluded(): bool
{
return $this->dataTmFontInfoHasToBeIncluded;
}
public function setDataTmFontInfoHasToBeIncluded(bool $dataTmFontInfoHasToBeIncluded): void
{
$this->dataTmFontInfoHasToBeIncluded = $dataTmFontInfoHasToBeIncluded;
}
}

View file

@ -0,0 +1,306 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
/**
* Technical references :
* - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
* - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
* - http://www.php.net/manual/en/ref.pdf.php#74211
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
*
* Class Document
*/
class Document
{
/**
* @var PDFObject[]
*/
protected $objects = [];
/**
* @var array
*/
protected $dictionary = [];
/**
* @var Header
*/
protected $trailer = null;
/**
* @var array
*/
protected $details = null;
public function __construct()
{
$this->trailer = new Header([], $this);
}
public function init()
{
$this->buildDictionary();
$this->buildDetails();
// Propagate init to objects.
foreach ($this->objects as $object) {
$object->getHeader()->init();
$object->init();
}
}
/**
* Build dictionary based on type header field.
*/
protected function buildDictionary()
{
// Build dictionary.
$this->dictionary = [];
foreach ($this->objects as $id => $object) {
// Cache objects by type and subtype
$type = $object->getHeader()->get('Type')->getContent();
if (null != $type) {
if (!isset($this->dictionary[$type])) {
$this->dictionary[$type] = [
'all' => [],
'subtype' => [],
];
}
$this->dictionary[$type]['all'][$id] = $object;
$subtype = $object->getHeader()->get('Subtype')->getContent();
if (null != $subtype) {
if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
$this->dictionary[$type]['subtype'][$subtype] = [];
}
$this->dictionary[$type]['subtype'][$subtype][$id] = $object;
}
}
}
}
/**
* Build details array.
*/
protected function buildDetails()
{
// Build details array.
$details = [];
// Extract document info
if ($this->trailer->has('Info')) {
/** @var PDFObject $info */
$info = $this->trailer->get('Info');
// This could be an ElementMissing object, so we need to check for
// the getHeader method first.
if (null !== $info && method_exists($info, 'getHeader')) {
$details = $info->getHeader()->getDetails();
}
}
// Retrieve the page count
try {
$pages = $this->getPages();
$details['Pages'] = \count($pages);
} catch (\Exception $e) {
$details['Pages'] = 0;
}
$this->details = $details;
}
public function getDictionary(): array
{
return $this->dictionary;
}
/**
* @param PDFObject[] $objects
*/
public function setObjects($objects = [])
{
$this->objects = (array) $objects;
$this->init();
}
/**
* @return PDFObject[]
*/
public function getObjects()
{
return $this->objects;
}
/**
* @return PDFObject|Font|Page|Element|null
*/
public function getObjectById(string $id)
{
if (isset($this->objects[$id])) {
return $this->objects[$id];
}
return null;
}
public function hasObjectsByType(string $type, ?string $subtype = null): bool
{
return 0 < \count($this->getObjectsByType($type, $subtype));
}
public function getObjectsByType(string $type, ?string $subtype = null): array
{
if (!isset($this->dictionary[$type])) {
return [];
}
if (null != $subtype) {
if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
return [];
}
return $this->dictionary[$type]['subtype'][$subtype];
}
return $this->dictionary[$type]['all'];
}
/**
* @return Font[]
*/
public function getFonts()
{
return $this->getObjectsByType('Font');
}
public function getFirstFont(): ?Font
{
$fonts = $this->getFonts();
if ([] === $fonts) {
return null;
}
return reset($fonts);
}
/**
* @return Page[]
*
* @throws \Exception
*/
public function getPages()
{
if ($this->hasObjectsByType('Catalog')) {
// Search for catalog to list pages.
$catalogues = $this->getObjectsByType('Catalog');
$catalogue = reset($catalogues);
/** @var Pages $object */
$object = $catalogue->get('Pages');
if (method_exists($object, 'getPages')) {
return $object->getPages(true);
}
}
if ($this->hasObjectsByType('Pages')) {
// Search for pages to list kids.
$pages = [];
/** @var Pages[] $objects */
$objects = $this->getObjectsByType('Pages');
foreach ($objects as $object) {
$pages = array_merge($pages, $object->getPages(true));
}
return $pages;
}
if ($this->hasObjectsByType('Page')) {
// Search for 'page' (unordered pages).
$pages = $this->getObjectsByType('Page');
return array_values($pages);
}
throw new \Exception('Missing catalog.');
}
public function getText(?int $pageLimit = null): string
{
$texts = [];
$pages = $this->getPages();
// Only use the first X number of pages if $pageLimit is set and numeric.
if (\is_int($pageLimit) && 0 < $pageLimit) {
$pages = \array_slice($pages, 0, $pageLimit);
}
foreach ($pages as $index => $page) {
/**
* In some cases, the $page variable may be null.
*/
if (null === $page) {
continue;
}
if ($text = trim($page->getText())) {
$texts[] = $text;
}
}
return implode("\n\n", $texts);
}
public function getTrailer(): Header
{
return $this->trailer;
}
public function setTrailer(Header $trailer)
{
$this->trailer = $trailer;
}
public function getDetails(): array
{
return $this->details;
}
}

View file

@ -0,0 +1,150 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
/**
* Class Element
*/
class Element
{
/**
* @var Document
*/
protected $document = null;
protected $value = null;
public function __construct($value, ?Document $document = null)
{
$this->value = $value;
$this->document = $document;
}
public function init()
{
}
public function equals($value): bool
{
return $value == $this->value;
}
public function contains($value): bool
{
if (\is_array($this->value)) {
/** @var Element $val */
foreach ($this->value as $val) {
if ($val->equals($value)) {
return true;
}
}
return false;
}
return $this->equals($value);
}
public function getContent()
{
return $this->value;
}
public function __toString(): string
{
return (string) $this->value;
}
public static function parse(string $content, ?Document $document = null, int &$position = 0)
{
$args = \func_get_args();
$only_values = isset($args[3]) ? $args[3] : false;
$content = trim($content);
$values = [];
do {
$old_position = $position;
if (!$only_values) {
if (!preg_match('/^\s*(?P<name>\/[A-Z0-9\._]+)(?P<value>.*)/si', substr($content, $position), $match)) {
break;
} else {
$name = ltrim($match['name'], '/');
$value = $match['value'];
$position = strpos($content, $value, $position + \strlen($match['name']));
}
} else {
$name = \count($values);
$value = substr($content, $position);
}
if ($element = ElementName::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementXRef::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNumeric::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementStruct::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementBoolean::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNull::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementDate::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementString::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementHexa::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementArray::parse($value, $document, $position)) {
$values[$name] = $element;
} else {
$position = $old_position;
break;
}
} while ($position < \strlen($content));
return $values;
}
}

View file

@ -0,0 +1,139 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\PDFObject;
/**
* Class ElementArray
*/
class ElementArray extends Element
{
public function __construct($value, ?Document $document = null)
{
parent::__construct($value, $document);
}
public function getContent()
{
foreach ($this->value as $name => $element) {
$this->resolveXRef($name);
}
return parent::getContent();
}
public function getRawContent(): array
{
return $this->value;
}
public function getDetails(bool $deep = true): array
{
$values = [];
$elements = $this->getContent();
foreach ($elements as $key => $element) {
if ($element instanceof Header && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof PDFObject && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof self) {
if ($deep) {
$values[$key] = $element->getDetails();
}
} elseif ($element instanceof Element && !($element instanceof self)) {
$values[$key] = $element->getContent();
}
}
return $values;
}
public function __toString(): string
{
return implode(',', $this->value);
}
/**
* @return Element|PDFObject
*/
protected function resolveXRef(string $name)
{
if (($obj = $this->value[$name]) instanceof ElementXRef) {
/** @var ElementXRef $obj */
$obj = $this->document->getObjectById($obj->getId());
$this->value[$name] = $obj;
}
return $this->value[$name];
}
/**
* @todo: These methods return mixed and mismatched types throughout the hierarchy
*
* @return bool|ElementArray
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\[(?P<array>.*)/is', $content, $match)) {
preg_match_all('/(.*?)(\[|\])/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (false !== strpos($part, '[') ? 1 : -1);
if ($level <= 0) {
break;
}
}
// Removes 1 level [ and ].
$sub = substr(trim($sub), 1, -1);
$sub_offset = 0;
$values = Element::parse($sub, $document, $sub_offset, true);
$offset += strpos($content, '[') + 1;
// Find next ']' position
$offset += \strlen($sub) + 1;
return new self($values, $document);
}
return false;
}
}

View file

@ -0,0 +1,75 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementBoolean
*/
class ElementBoolean extends Element
{
/**
* @param string|bool $value
*/
public function __construct($value)
{
parent::__construct('true' == strtolower($value) || true === $value, null);
}
public function __toString(): string
{
return $this->value ? 'true' : 'false';
}
public function equals($value): bool
{
return $this->getContent() === $value;
}
/**
* @return bool|ElementBoolean
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*(?P<value>true|false)/is', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + \strlen($value);
return new self($value);
}
return false;
}
}

View file

@ -0,0 +1,139 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHPi, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementDate
*/
class ElementDate extends ElementString
{
/**
* @var array
*/
protected static $formats = [
4 => 'Y',
6 => 'Ym',
8 => 'Ymd',
10 => 'YmdH',
12 => 'YmdHi',
14 => 'YmdHis',
15 => 'YmdHise',
17 => 'YmdHisO',
18 => 'YmdHisO',
19 => 'YmdHisO',
];
/**
* @var string
*/
protected $format = 'c';
/**
* @var \DateTime
*/
protected $value;
public function __construct($value)
{
if (!($value instanceof \DateTime)) {
throw new \Exception('DateTime required.'); // FIXME: Sometimes strings are passed to this function
}
parent::__construct($value);
}
public function setFormat(string $format)
{
$this->format = $format;
}
public function equals($value): bool
{
if ($value instanceof \DateTime) {
$timestamp = $value->getTimeStamp();
} else {
$timestamp = strtotime($value);
}
return $timestamp == $this->value->getTimeStamp();
}
public function __toString(): string
{
return (string) $this->value->format($this->format);
}
/**
* @return bool|ElementDate
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\(D\:(?P<name>.*?)\)/s', $content, $match)) {
$name = $match['name'];
$name = str_replace("'", '', $name);
$date = false;
// Smallest format : Y
// Full format : YmdHisP
if (preg_match('/^\d{4}(\d{2}(\d{2}(\d{2}(\d{2}(\d{2}(Z(\d{2,4})?|[\+-]?\d{2}(\d{2})?)?)?)?)?)?)?$/', $name)) {
if ($pos = strpos($name, 'Z')) {
$name = substr($name, 0, $pos + 1);
} elseif (18 == \strlen($name) && preg_match('/[^\+-]0000$/', $name)) {
$name = substr($name, 0, -4).'+0000';
}
$format = self::$formats[\strlen($name)];
$date = \DateTime::createFromFormat($format, $name, new \DateTimeZone('UTC'));
} else {
// special cases
if (preg_match('/^\d{1,2}-\d{1,2}-\d{4},?\s+\d{2}:\d{2}:\d{2}[\+-]\d{4}$/', $name)) {
$name = str_replace(',', '', $name);
$format = 'n-j-Y H:i:sO';
$date = \DateTime::createFromFormat($format, $name, new \DateTimeZone('UTC'));
}
}
if (!$date) {
return false;
}
$offset += strpos($content, '(D:') + \strlen($match['name']) + 4; // 1 for '(D:' and ')'
return new self($date);
}
return false;
}
}

View file

@ -0,0 +1,85 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementHexa
*/
class ElementHexa extends ElementString
{
/**
* @return bool|ElementHexa|ElementDate
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\<(?P<name>[A-F0-9]+)\>/is', $content, $match)) {
$name = $match['name'];
$offset += strpos($content, '<'.$name) + \strlen($name) + 2; // 1 for '>'
// repackage string as standard
$name = '('.self::decode($name).')';
$element = ElementDate::parse($name, $document);
if (!$element) {
$element = ElementString::parse($name, $document);
}
return $element;
}
return false;
}
public static function decode(string $value): string
{
$text = '';
$length = \strlen($value);
if ('00' === substr($value, 0, 2)) {
for ($i = 0; $i < $length; $i += 4) {
$hex = substr($value, $i, 4);
$text .= '&#'.str_pad(hexdec($hex), 4, '0', \STR_PAD_LEFT).';';
}
} else {
for ($i = 0; $i < $length; $i += 2) {
$hex = substr($value, $i, 2);
$text .= \chr(hexdec($hex));
}
}
$text = html_entity_decode($text, \ENT_NOQUOTES, 'UTF-8');
return $text;
}
}

View file

@ -0,0 +1,66 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
/**
* Class ElementMissing
*/
class ElementMissing extends Element
{
public function __construct()
{
parent::__construct(null, null);
}
public function equals($value): bool
{
return false;
}
public function contains($value): bool
{
return false;
}
public function getContent(): bool
{
return false;
}
public function __toString(): string
{
return '';
}
}

View file

@ -0,0 +1,69 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Font;
/**
* Class ElementName
*/
class ElementName extends Element
{
public function __construct(string $value)
{
parent::__construct($value, null);
}
public function equals($value): bool
{
return $value == $this->value;
}
/**
* @return bool|ElementName
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\/([A-Z0-9\-\+,#\.]+)/is', $content, $match)) {
$name = $match[1];
$offset += strpos($content, $name) + \strlen($name);
$name = Font::decodeEntities($name);
return new self($name);
}
return false;
}
}

View file

@ -0,0 +1,71 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementNull
*/
class ElementNull extends Element
{
public function __construct()
{
parent::__construct(null, null);
}
public function __toString(): string
{
return 'null';
}
public function equals($value): bool
{
return $this->getContent() === $value;
}
/**
* @return bool|ElementNull
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*(null)/s', $content, $match)) {
$offset += strpos($content, 'null') + \strlen('null');
return new self();
}
return false;
}
}

View file

@ -0,0 +1,62 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementNumeric
*/
class ElementNumeric extends Element
{
public function __construct(string $value)
{
parent::__construct((float) $value, null);
}
/**
* @return bool|ElementNumeric
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*(?P<value>\-?[0-9\.]+)/s', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + \strlen($value);
return new self($value);
}
return false;
}
}

View file

@ -0,0 +1,93 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Font;
/**
* Class ElementString
*/
class ElementString extends Element
{
public function __construct($value)
{
parent::__construct($value, null);
}
public function equals($value): bool
{
return $value == $this->value;
}
/**
* @return bool|ElementString
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*\((?P<name>.*)/s', $content, $match)) {
$name = $match['name'];
// Find next ')' not escaped.
$cur_start_text = $start_search_end = 0;
while (false !== ($cur_start_pos = strpos($name, ')', $start_search_end))) {
$cur_extract = substr($name, $cur_start_text, $cur_start_pos - $cur_start_text);
preg_match('/(?P<escape>[\\\]*)$/s', $cur_extract, $match);
if (!(\strlen($match['escape']) % 2)) {
break;
}
$start_search_end = $cur_start_pos + 1;
}
// Extract string.
$name = substr($name, 0, (int) $cur_start_pos);
$offset += strpos($content, '(') + $cur_start_pos + 2; // 2 for '(' and ')'
$name = str_replace(
['\\\\', '\\ ', '\\/', '\(', '\)', '\n', '\r', '\t'],
['\\', ' ', '/', '(', ')', "\n", "\r", "\t"],
$name
);
// Decode string.
$name = Font::decodeOctal($name);
$name = Font::decodeEntities($name);
$name = Font::decodeHexadecimal($name, false);
$name = Font::decodeUnicode($name);
return new self($name);
}
return false;
}
}

View file

@ -0,0 +1,75 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Header;
/**
* Class ElementStruct
*/
class ElementStruct extends Element
{
/**
* @return false|Header
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*<<(?P<struct>.*)/is', $content)) {
preg_match_all('/(.*?)(<<|>>)/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (false !== strpos($part, '<<') ? 1 : -1);
if ($level <= 0) {
break;
}
}
$offset += strpos($content, '<<') + \strlen(rtrim($sub));
// Removes '<<' and '>>'.
$sub = trim((string) preg_replace('/^\s*<<(.*)>>\s*$/s', '\\1', $sub));
$position = 0;
$elements = Element::parse($sub, $document, $position);
return new Header($elements, $document);
}
return false;
}
}

View file

@ -0,0 +1,98 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementXRef
*/
class ElementXRef extends Element
{
public function getId(): string
{
return $this->getContent();
}
public function getObject()
{
return $this->document->getObjectById($this->getId());
}
public function equals($value): bool
{
/**
* In case $value is a number and $this->value is a string like 5_0
*
* Without this if-clause code like:
*
* $element = new ElementXRef('5_0');
* $this->assertTrue($element->equals(5));
*
* would fail (= 5_0 and 5 are not equal in PHP 8.0+).
*/
if (
true === is_numeric($value)
&& true === \is_string($this->getContent())
&& 1 === preg_match('/[0-9]+\_[0-9]+/', $this->getContent(), $matches)
) {
return (float) $this->getContent() == $value;
}
$id = ($value instanceof self) ? $value->getId() : $value;
return $this->getId() == $id;
}
public function __toString(): string
{
return '#Obj#'.$this->getId();
}
/**
* @return bool|ElementXRef
*/
public static function parse(string $content, ?Document $document = null, int &$offset = 0)
{
if (preg_match('/^\s*(?P<id>[0-9]+\s+[0-9]+\s+R)/s', $content, $match)) {
$id = $match['id'];
$offset += strpos($content, $id) + \strlen($id);
$id = str_replace(' ', '_', rtrim($id, ' R'));
return new self($id, $document);
}
return false;
}
}

View file

@ -0,0 +1,157 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Exception;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Encoding\EncodingLocator;
use Smalot\PdfParser\Encoding\PostScriptGlyphs;
use Smalot\PdfParser\Exception\EncodingNotFoundException;
/**
* Class Encoding
*/
class Encoding extends PDFObject
{
/**
* @var array
*/
protected $encoding;
/**
* @var array
*/
protected $differences;
/**
* @var array
*/
protected $mapping;
public function init()
{
$this->mapping = [];
$this->differences = [];
$this->encoding = [];
if ($this->has('BaseEncoding')) {
$this->encoding = EncodingLocator::getEncoding($this->getEncodingClass())->getTranslations();
// Build table including differences.
$differences = $this->get('Differences')->getContent();
$code = 0;
if (!\is_array($differences)) {
return;
}
foreach ($differences as $difference) {
/** @var ElementNumeric $difference */
if ($difference instanceof ElementNumeric) {
$code = $difference->getContent();
continue;
}
// ElementName
$this->differences[$code] = $difference;
if (\is_object($difference)) {
$this->differences[$code] = $difference->getContent();
}
// For the next char.
++$code;
}
$this->mapping = $this->encoding;
foreach ($this->differences as $code => $difference) {
/* @var string $difference */
$this->mapping[$code] = $difference;
}
}
}
public function getDetails(bool $deep = true): array
{
$details = [];
$details['BaseEncoding'] = ($this->has('BaseEncoding') ? (string) $this->get('BaseEncoding') : 'Ansi');
$details['Differences'] = ($this->has('Differences') ? (string) $this->get('Differences') : '');
$details += parent::getDetails($deep);
return $details;
}
public function translateChar($dec): ?int
{
if (isset($this->mapping[$dec])) {
$dec = $this->mapping[$dec];
}
return PostScriptGlyphs::getCodePoint($dec);
}
/**
* Returns encoding class name if available or empty string (only prior PHP 7.4).
*
* @throws \Exception On PHP 7.4+ an exception is thrown if encoding class doesn't exist.
*/
public function __toString(): string
{
try {
return $this->getEncodingClass();
} catch (\Exception $e) {
// prior to PHP 7.4 toString has to return an empty string.
if (version_compare(\PHP_VERSION, '7.4.0', '<')) {
return '';
}
throw $e;
}
}
/**
* @throws EncodingNotFoundException
*/
protected function getEncodingClass(): string
{
// Load reference table charset.
$baseEncoding = preg_replace('/[^A-Z0-9]/is', '', $this->get('BaseEncoding')->getContent());
$className = '\\Smalot\\PdfParser\\Encoding\\'.$baseEncoding;
if (!class_exists($className)) {
throw new EncodingNotFoundException('Missing encoding data for: "'.$baseEncoding.'".');
}
return $className;
}
}

View file

@ -0,0 +1,8 @@
<?php
namespace Smalot\PdfParser\Encoding;
abstract class AbstractEncoding
{
abstract public function getTranslations(): array;
}

View file

@ -0,0 +1,17 @@
<?php
namespace Smalot\PdfParser\Encoding;
class EncodingLocator
{
protected static $encodings;
public static function getEncoding(string $encodingClassName): AbstractEncoding
{
if (!isset(self::$encodings[$encodingClassName])) {
self::$encodings[$encodingClassName] = new $encodingClassName();
}
return self::$encodings[$encodingClassName];
}
}

View file

@ -0,0 +1,76 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class ISOLatin1Encoding
*/
class ISOLatin1Encoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma minus period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent '.
'sterling currency yen brokenbar section dieresis copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior acute mu paragraph '.
'periodcentered cedilla onesuperior ordmasculine guillemotright '.
'onequarter onehalf threequarters questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View file

@ -0,0 +1,76 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class ISOLatin9Encoding
*/
class ISOLatin9Encoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma minus period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent '.
'sterling Euro yen Scaron section scaron copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior Zcaron mu paragraph '.
'periodcentered zcaron onesuperior ordmasculine guillemotright '.
'OE oe Ydieresis questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View file

@ -0,0 +1,80 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://www.opensource.apple.com/source/vim/vim-34/vim/runtime/print/mac-roman.ps
namespace Smalot\PdfParser\Encoding;
/**
* Class MacRomanEncoding
*/
class MacRomanEncoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quotesingle '.
'parenleft parenright asterisk plus comma minus period slash '.
'zero one two three four five six seven '.
'eight nine colon semicolon less equal greater question '.
'at A B C D E F G '.
'H I J K L M N O '.
'P Q R S T U V W '.
'X Y Z bracketleft backslash bracketright asciicircum underscore '.
'grave a b c d e f g '.
'h i j k l m n o '.
'p q r s t u v w '.
'x y z braceleft bar braceright asciitilde .notdef '.
'Adieresis Aring Ccedilla Eacute Ntilde Odieresis Udieresis aacute '.
'agrave acircumflex adieresis atilde aring ccedilla eacute egrave '.
'ecircumflex edieresis iacute igrave icircumflex idieresis ntilde oacute '.
'ograve ocircumflex odieresis otilde uacute ugrave ucircumflex udieresis '.
'dagger degree cent sterling section bullet paragraph germandbls '.
'registered copyright trademark acute dieresis notequal AE Oslash '.
'infinity plusminus lessequal greaterequal yen mu partialdiff summation '.
'Pi pi integral ordfeminine ordmasculine Omega ae oslash '.
'questiondown exclamdown logicalnot radical florin approxequal delta guillemotleft '.
'guillemotright ellipsis space Agrave Atilde Otilde OE oe '.
'endash emdash quotedblleft quotedblright quoteleft quoteright divide lozenge '.
'ydieresis Ydieresis fraction currency guilsinglleft guilsinglright fi fl '.
'daggerdbl periodcentered quotesinglbase quotedblbase perthousand Acircumflex Ecircumflex Aacute '.
'Edieresis Egrave Iacute Icircumflex Idieresis Igrave Oacute Ocircumflex '.
'heart Ograve Uacute Ucircumflex Ugrave dotlessi circumflex tilde '.
'macron breve dotaccent ring cedilla hungarumlaut ogonek caron';
return explode(' ', $encoding);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,76 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class StandardEncoding
*/
class StandardEncoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma hyphen period slash zero '.
'one two three four five six seven eight nine colon semicolon less '.
'equal greater question at A B C D E F G H I J K L M N O P Q R S T U '.
'V W X Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef exclamdown cent '.
'sterling fraction yen florin section currency quotesingle '.
'quotedblleft guillemotleft guilsinglleft guilsinglright fi fl '.
'.notdef endash dagger daggerdbl periodcentered .notdef paragraph '.
'bullet quotesinglbase quotedblbase quotedblright guillemotright '.
'ellipsis perthousand .notdef questiondown .notdef grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron emdash .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef AE .notdef '.
'ordfeminine .notdef .notdef .notdef .notdef Lslash Oslash OE '.
'ordmasculine .notdef .notdef .notdef .notdef .notdef ae .notdef '.
'.notdef .notdef dotlessi .notdef .notdef lslash oslash oe germandbls '.
'.notdef .notdef .notdef .notdef';
return explode(' ', $encoding);
}
}

View file

@ -0,0 +1,76 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinANSIEncoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class WinAnsiEncoding
*/
class WinAnsiEncoding extends AbstractEncoding
{
public function getTranslations(): array
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quotesingle '.
'parenleft parenright asterisk plus comma hyphen period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'grave a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde bullet Euro bullet quotesinglbase '.
'florin quotedblbase ellipsis dagger daggerdbl circumflex perthousand '.
'Scaron guilsinglleft OE bullet Zcaron bullet bullet quoteleft quoteright '.
'quotedblleft quotedblright bullet endash emdash tilde trademark scaron '.
'guilsinglright oe bullet zcaron Ydieresis space exclamdown cent '.
'sterling currency yen brokenbar section dieresis copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior acute mu paragraph '.
'periodcentered cedilla onesuperior ordmasculine guillemotright '.
'onequarter onehalf threequarters questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View file

@ -0,0 +1,7 @@
<?php
namespace Smalot\PdfParser\Exception;
class EncodingNotFoundException extends \Exception
{
}

View file

@ -0,0 +1,664 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Encoding\WinAnsiEncoding;
use Smalot\PdfParser\Exception\EncodingNotFoundException;
/**
* Class Font
*/
class Font extends PDFObject
{
public const MISSING = '?';
/**
* @var array
*/
protected $table = null;
/**
* @var array
*/
protected $tableSizes = null;
/**
* Caches results from uchr.
*
* @var array
*/
private static $uchrCache = [];
/**
* In some PDF-files encoding could be referenced by object id but object itself does not contain
* `/Type /Encoding` in its dictionary. These objects wouldn't be initialized as Encoding in
* \Smalot\PdfParser\PDFObject::factory() during file parsing (they would be just PDFObject).
*
* Therefore, we create an instance of Encoding from them during decoding and cache this value in this property.
*
* @var Encoding
*
* @see https://github.com/smalot/pdfparser/pull/500
*/
private $initializedEncodingByPdfObject;
public function init()
{
// Load translate table.
$this->loadTranslateTable();
}
public function getName(): string
{
return $this->has('BaseFont') ? (string) $this->get('BaseFont') : '[Unknown]';
}
public function getType(): string
{
return (string) $this->header->get('Subtype');
}
public function getDetails(bool $deep = true): array
{
$details = [];
$details['Name'] = $this->getName();
$details['Type'] = $this->getType();
$details['Encoding'] = ($this->has('Encoding') ? (string) $this->get('Encoding') : 'Ansi');
$details += parent::getDetails($deep);
return $details;
}
/**
* @return string|bool
*/
public function translateChar(string $char, bool $use_default = true)
{
$dec = hexdec(bin2hex($char));
if (\array_key_exists($dec, $this->table)) {
return $this->table[$dec];
}
// fallback for decoding single-byte ANSI characters that are not in the lookup table
$fallbackDecoded = $char;
if (
\strlen($char) < 2
&& $this->has('Encoding')
&& $this->get('Encoding') instanceof Encoding
) {
try {
if (WinAnsiEncoding::class === $this->get('Encoding')->__toString()) {
$fallbackDecoded = self::uchr($dec);
}
} catch (EncodingNotFoundException $e) {
// Encoding->getEncodingClass() throws EncodingNotFoundException when BaseEncoding doesn't exists
// See table 5.11 on PDF 1.5 specs for more info
}
}
return $use_default ? self::MISSING : $fallbackDecoded;
}
/**
* Convert unicode character code to "utf-8" encoded string.
*/
public static function uchr(int $code): string
{
if (!isset(self::$uchrCache[$code])) {
// html_entity_decode() will not work with UTF-16 or UTF-32 char entities,
// therefore, we use mb_convert_encoding() instead
self::$uchrCache[$code] = mb_convert_encoding("&#{$code};", 'UTF-8', 'HTML-ENTITIES');
}
return self::$uchrCache[$code];
}
/**
* Init internal chars translation table by ToUnicode CMap.
*/
public function loadTranslateTable(): array
{
if (null !== $this->table) {
return $this->table;
}
$this->table = [];
$this->tableSizes = [
'from' => 1,
'to' => 1,
];
if ($this->has('ToUnicode')) {
$content = $this->get('ToUnicode')->getContent();
$matches = [];
// Support for multiple spacerange sections
if (preg_match_all('/begincodespacerange(?P<sections>.*?)endcodespacerange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes = [
'from' => max(1, \strlen(current($matches['from'])) / 2),
'to' => max(1, \strlen(current($matches['to'])) / 2),
];
break;
}
}
// Support for multiple bfchar sections
if (preg_match_all('/beginbfchar(?P<sections>.*?)endbfchar/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> +<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes['from'] = max(1, \strlen(current($matches['from'])) / 2);
foreach ($matches['from'] as $key => $from) {
$parts = preg_split(
'/([0-9A-F]{4})/i',
$matches['to'][$key],
0,
\PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
);
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
}
$this->table[hexdec($from)] = $text;
}
}
}
// Support for multiple bfrange sections
if (preg_match_all('/beginbfrange(?P<sections>.*?)endbfrange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
// Support for : <srcCode1> <srcCode2> <dstString>
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *<(?P<offset>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$char_to = hexdec($matches['to'][$key]);
$offset = hexdec($matches['offset'][$key]);
for ($char = $char_from; $char <= $char_to; ++$char) {
$this->table[$char] = self::uchr($char - $char_from + $offset);
}
}
// Support for : <srcCode1> <srcCodeN> [<dstString1> <dstString2> ... <dstStringN>]
// Some PDF file has 2-byte Unicode values on new lines > added \r\n
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *\[(?P<strings>[\r\n<>0-9A-F ]+)\][ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$strings = [];
preg_match_all('/<(?P<string>[0-9A-F]+)> */is', $matches['strings'][$key], $strings);
foreach ($strings['string'] as $position => $string) {
$parts = preg_split(
'/([0-9A-F]{4})/i',
$string,
0,
\PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
);
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
}
$this->table[$char_from + $position] = $text;
}
}
}
}
}
return $this->table;
}
/**
* Set custom char translation table where:
* - key - integer character code;
* - value - "utf-8" encoded value;
*
* @return void
*/
public function setTable(array $table)
{
$this->table = $table;
}
/**
* Calculate text width with data from header 'Widths'. If width of character is not found then character is added to missing array.
*/
public function calculateTextWidth(string $text, array &$missing = null): ?float
{
$index_map = array_flip($this->table);
$details = $this->getDetails();
$widths = $details['Widths'];
// Widths array is zero indexed but table is not. We must map them based on FirstChar and LastChar
$width_map = array_flip(range($details['FirstChar'], $details['LastChar']));
$width = null;
$missing = [];
$textLength = mb_strlen($text);
for ($i = 0; $i < $textLength; ++$i) {
$char = mb_substr($text, $i, 1);
if (
!\array_key_exists($char, $index_map)
|| !\array_key_exists($index_map[$char], $width_map)
|| !\array_key_exists($width_map[$index_map[$char]], $widths)
) {
$missing[] = $char;
continue;
}
$width_index = $width_map[$index_map[$char]];
$width += $widths[$width_index];
}
return $width;
}
/**
* Decode hexadecimal encoded string. If $add_braces is true result value would be wrapped by parentheses.
*/
public static function decodeHexadecimal(string $hexa, bool $add_braces = false): string
{
// Special shortcut for XML content.
if (false !== stripos($hexa, '<?xml')) {
return $hexa;
}
$text = '';
$parts = preg_split('/(<[a-f0-9]+>)/si', $hexa, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
foreach ($parts as $part) {
if (preg_match('/^<.*>$/s', $part) && false === stripos($part, '<?xml')) {
// strip line breaks
$part = preg_replace("/[\r\n]/", '', $part);
$part = trim($part, '<>');
if ($add_braces) {
$text .= '(';
}
$part = pack('H*', $part);
$text .= ($add_braces ? preg_replace('/\\\/s', '\\\\\\', $part) : $part);
if ($add_braces) {
$text .= ')';
}
} else {
$text .= $part;
}
}
return $text;
}
/**
* Decode string with octal-decoded chunks.
*/
public static function decodeOctal(string $text): string
{
$parts = preg_split('/(\\\\[0-7]{3})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^\\\\[0-7]{3}$/', $part)) {
$text .= \chr(octdec(trim($part, '\\')));
} else {
$text .= $part;
}
}
return $text;
}
/**
* Decode string with html entity encoded chars.
*/
public static function decodeEntities(string $text): string
{
$parts = preg_split('/(#\d{2})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^#\d{2}$/', $part)) {
$text .= \chr(hexdec(trim($part, '#')));
} else {
$text .= $part;
}
}
return $text;
}
/**
* Check if given string is Unicode text (by BOM);
* If true - decode to "utf-8" encoded string.
* Otherwise - return text as is.
*
* @todo Rename in next major release to make the name correspond to reality (for ex. decodeIfUnicode())
*/
public static function decodeUnicode(string $text): string
{
if (preg_match('/^\xFE\xFF/i', $text)) {
// Strip U+FEFF byte order marker.
$decode = substr($text, 2);
$text = '';
$length = \strlen($decode);
for ($i = 0; $i < $length; $i += 2) {
$text .= self::uchr(hexdec(bin2hex(substr($decode, $i, 2))));
}
}
return $text;
}
/**
* @todo Deprecated, use $this->config->getFontSpaceLimit() instead.
*/
protected function getFontSpaceLimit(): int
{
return $this->config->getFontSpaceLimit();
}
/**
* Decode text by commands array.
*/
public function decodeText(array $commands): string
{
$word_position = 0;
$words = [];
$font_space = $this->getFontSpaceLimit();
foreach ($commands as $command) {
switch ($command[PDFObject::TYPE]) {
case 'n':
if ((float) trim($command[PDFObject::COMMAND]) < $font_space) {
$word_position = \count($words);
}
continue 2;
case '<':
// Decode hexadecimal.
$text = self::decodeHexadecimal('<'.$command[PDFObject::COMMAND].'>');
break;
default:
// Decode octal (if necessary).
$text = self::decodeOctal($command[PDFObject::COMMAND]);
}
// replace escaped chars
$text = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\f', '\ '],
['\\', '(', ')', "\n", "\r", "\t", "\f", ' '],
$text
);
// add content to result string
if (isset($words[$word_position])) {
$words[$word_position] .= $text;
} else {
$words[$word_position] = $text;
}
}
foreach ($words as &$word) {
$word = $this->decodeContent($word);
}
return implode(' ', $words);
}
/**
* Decode given $text to "utf-8" encoded string.
*
* @param bool $unicode This parameter is deprecated and might be removed in a future release
*/
public function decodeContent(string $text, ?bool &$unicode = null): string
{
if ($this->has('ToUnicode')) {
return $this->decodeContentByToUnicodeCMapOrDescendantFonts($text);
}
if ($this->has('Encoding')) {
$result = $this->decodeContentByEncoding($text);
if (null !== $result) {
return $result;
}
}
return $this->decodeContentByAutodetectIfNecessary($text);
}
/**
* First try to decode $text by ToUnicode CMap.
* If char translation not found in ToUnicode CMap tries:
* - If DescendantFonts exists tries to decode char by one of that fonts.
* - If have no success to decode by DescendantFonts interpret $text as a string with "Windows-1252" encoding.
* - If DescendantFonts does not exist just return "?" as decoded char.
*
* @todo Seems this is invalid algorithm that do not follow pdf-format specification. Must be rewritten.
*/
private function decodeContentByToUnicodeCMapOrDescendantFonts(string $text): string
{
$bytes = $this->tableSizes['from'];
if ($bytes) {
$result = '';
$length = \strlen($text);
for ($i = 0; $i < $length; $i += $bytes) {
$char = substr($text, $i, $bytes);
if (false !== ($decoded = $this->translateChar($char, false))) {
$char = $decoded;
} elseif ($this->has('DescendantFonts')) {
if ($this->get('DescendantFonts') instanceof PDFObject) {
$fonts = $this->get('DescendantFonts')->getHeader()->getElements();
} else {
$fonts = $this->get('DescendantFonts')->getContent();
}
$decoded = false;
foreach ($fonts as $font) {
if ($font instanceof self) {
if (false !== ($decoded = $font->translateChar($char, false))) {
$decoded = mb_convert_encoding($decoded, 'UTF-8', 'Windows-1252');
break;
}
}
}
if (false !== $decoded) {
$char = $decoded;
} else {
$char = mb_convert_encoding($char, 'UTF-8', 'Windows-1252');
}
} else {
$char = self::MISSING;
}
$result .= $char;
}
$text = $result;
}
return $text;
}
/**
* Decode content by any type of Encoding (dictionary's item) instance.
*/
private function decodeContentByEncoding(string $text): ?string
{
$encoding = $this->get('Encoding');
// When Encoding referenced by object id (/Encoding 520 0 R) but object itself does not contain `/Type /Encoding` in it's dictionary.
if ($encoding instanceof PDFObject) {
$encoding = $this->getInitializedEncodingByPdfObject($encoding);
}
// When Encoding referenced by object id (/Encoding 520 0 R) but object itself contains `/Type /Encoding` in it's dictionary.
if ($encoding instanceof Encoding) {
return $this->decodeContentByEncodingEncoding($text, $encoding);
}
// When Encoding is just string (/Encoding /WinAnsiEncoding)
if ($encoding instanceof Element) { // todo: ElementString class must by used?
return $this->decodeContentByEncodingElement($text, $encoding);
}
// don't double-encode strings already in UTF-8
if (!mb_check_encoding($text, 'UTF-8')) {
return mb_convert_encoding($text, 'UTF-8', 'Windows-1252');
}
return $text;
}
/**
* Returns already created or create a new one if not created before Encoding instance by PDFObject instance.
*/
private function getInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
{
if (!$this->initializedEncodingByPdfObject) {
$this->initializedEncodingByPdfObject = $this->createInitializedEncodingByPdfObject($PDFObject);
}
return $this->initializedEncodingByPdfObject;
}
/**
* Decode content when $encoding (given by $this->get('Encoding')) is instance of Encoding.
*/
private function decodeContentByEncodingEncoding(string $text, Encoding $encoding): string
{
$result = '';
$length = \strlen($text);
for ($i = 0; $i < $length; ++$i) {
$dec_av = hexdec(bin2hex($text[$i]));
$dec_ap = $encoding->translateChar($dec_av);
$result .= self::uchr($dec_ap ?? $dec_av);
}
return $result;
}
/**
* Decode content when $encoding (given by $this->get('Encoding')) is instance of Element.
*/
private function decodeContentByEncodingElement(string $text, Element $encoding): ?string
{
$pdfEncodingName = $encoding->getContent();
// mb_convert_encoding does not support MacRoman/macintosh,
// so we use iconv() here
$iconvEncodingName = $this->getIconvEncodingNameOrNullByPdfEncodingName($pdfEncodingName);
return $iconvEncodingName ? iconv($iconvEncodingName, 'UTF-8', $text) : null;
}
/**
* Convert PDF encoding name to iconv-known encoding name.
*/
private function getIconvEncodingNameOrNullByPdfEncodingName(string $pdfEncodingName): ?string
{
$pdfToIconvEncodingNameMap = [
'StandardEncoding' => 'ISO-8859-1',
'MacRomanEncoding' => 'MACINTOSH',
'WinAnsiEncoding' => 'CP1252',
];
return \array_key_exists($pdfEncodingName, $pdfToIconvEncodingNameMap)
? $pdfToIconvEncodingNameMap[$pdfEncodingName]
: null;
}
/**
* If string seems like "utf-8" encoded string do nothing and just return given string as is.
* Otherwise, interpret string as "Window-1252" encoded string.
*
* @return string|false
*/
private function decodeContentByAutodetectIfNecessary(string $text)
{
if (mb_check_encoding($text, 'UTF-8')) {
return $text;
}
return mb_convert_encoding($text, 'UTF-8', 'Windows-1252');
// todo: Why exactly `Windows-1252` used?
}
/**
* Create Encoding instance by PDFObject instance and init it.
*/
private function createInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
{
$encoding = $this->createEncodingByPdfObject($PDFObject);
$encoding->init();
return $encoding;
}
/**
* Create Encoding instance by PDFObject instance (without init).
*/
private function createEncodingByPdfObject(PDFObject $PDFObject): Encoding
{
$document = $PDFObject->getDocument();
$header = $PDFObject->getHeader();
$content = $PDFObject->getContent();
$config = $PDFObject->getConfig();
return new Encoding($document, $header, $content, $config);
}
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontCIDFontType0
*/
class FontCIDFontType0 extends Font
{
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontCIDFontType2
*/
class FontCIDFontType2 extends Font
{
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontTrueType
*/
class FontTrueType extends Font
{
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType0
*/
class FontType0 extends Font
{
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType1
*/
class FontType1 extends Font
{
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType3
*/
class FontType3 extends Font
{
}

View file

@ -0,0 +1,194 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
/**
* Class Header
*/
class Header
{
/**
* @var Document
*/
protected $document = null;
/**
* @var Element[]
*/
protected $elements = null;
/**
* @param Element[] $elements list of elements
* @param Document $document document
*/
public function __construct(array $elements = [], ?Document $document = null)
{
$this->elements = $elements;
$this->document = $document;
}
public function init()
{
foreach ($this->elements as $element) {
if ($element instanceof Element) {
$element->init();
}
}
}
/**
* Returns all elements.
*/
public function getElements()
{
foreach ($this->elements as $name => $element) {
$this->resolveXRef($name);
}
return $this->elements;
}
/**
* Used only for debug.
*/
public function getElementTypes(): array
{
$types = [];
foreach ($this->elements as $key => $element) {
$types[$key] = \get_class($element);
}
return $types;
}
public function getDetails(bool $deep = true): array
{
$values = [];
$elements = $this->getElements();
foreach ($elements as $key => $element) {
if ($element instanceof self && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof PDFObject && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof ElementArray) {
if ($deep) {
$values[$key] = $element->getDetails();
}
} elseif ($element instanceof Element) {
$values[$key] = (string) $element;
}
}
return $values;
}
/**
* Indicate if an element name is available in header.
*
* @param string $name the name of the element
*/
public function has(string $name): bool
{
return \array_key_exists($name, $this->elements);
}
/**
* @return Element|PDFObject
*/
public function get(string $name)
{
if (\array_key_exists($name, $this->elements) && $element = $this->resolveXRef($name)) {
return $element;
}
return new ElementMissing();
}
/**
* Resolve XRef to object.
*
* @return Element|PDFObject
*
* @throws \Exception
*/
protected function resolveXRef(string $name)
{
if (($obj = $this->elements[$name]) instanceof ElementXRef && null !== $this->document) {
/** @var ElementXRef $obj */
$object = $this->document->getObjectById($obj->getId());
if (null === $object) {
return new ElementMissing();
}
// Update elements list for future calls.
$this->elements[$name] = $object;
}
return $this->elements[$name];
}
/**
* @param string $content The content to parse
* @param Document $document The document
* @param int $position The new position of the cursor after parsing
*/
public static function parse(string $content, Document $document, int &$position = 0): self
{
/* @var Header $header */
if ('<<' == substr(trim($content), 0, 2)) {
$header = ElementStruct::parse($content, $document, $position);
} else {
$elements = ElementArray::parse($content, $document, $position);
$header = new self([], $document);
if ($elements) {
$header = new self($elements->getRawContent(), null);
}
}
if ($header) {
return $header;
}
// Build an empty header.
return new self([], $document);
}
}

View file

@ -0,0 +1,779 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\XObject\Form;
use Smalot\PdfParser\XObject\Image;
/**
* Class PDFObject
*/
class PDFObject
{
public const TYPE = 't';
public const OPERATOR = 'o';
public const COMMAND = 'c';
/**
* The recursion stack.
*
* @var array
*/
public static $recursionStack = [];
/**
* @var Document
*/
protected $document = null;
/**
* @var Header
*/
protected $header = null;
/**
* @var string
*/
protected $content = null;
/**
* @var Config
*/
protected $config;
public function __construct(
Document $document,
?Header $header = null,
?string $content = null,
?Config $config = null
) {
$this->document = $document;
$this->header = $header ?? new Header();
$this->content = $content;
$this->config = $config;
}
public function init()
{
}
public function getDocument(): Document
{
return $this->document;
}
public function getHeader(): ?Header
{
return $this->header;
}
public function getConfig(): ?Config
{
return $this->config;
}
/**
* @return Element|PDFObject|Header
*/
public function get(string $name)
{
return $this->header->get($name);
}
public function has(string $name): bool
{
return $this->header->has($name);
}
public function getDetails(bool $deep = true): array
{
return $this->header->getDetails($deep);
}
public function getContent(): ?string
{
return $this->content;
}
public function cleanContent(string $content, string $char = 'X')
{
$char = $char[0];
$content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
// Remove image bloc with binary content
preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[0] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
// Clean content in square brackets [.....]
preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
// Clean content in round brackets (.....)
preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
// Clean structure
if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
$content = '';
$level = 0;
foreach ($parts as $part) {
if ('<' == $part) {
++$level;
}
$content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
if ('>' == $part) {
--$level;
}
}
}
// Clean BDC and EMC markup
preg_match_all(
'/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
$content,
$matches,
\PREG_OFFSET_CAPTURE
);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
return $content;
}
public function getSectionsText(?string $content): array
{
$sections = [];
$content = ' '.$content.' ';
$textCleaned = $this->cleanContent($content, '_');
// Extract text blocks.
if (preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
foreach ($matches[2] as $pos => $part) {
$text = $part[0];
if ('' === $text) {
continue;
}
$offset = $part[1];
$section = substr($content, $offset, \strlen($text));
// Removes BDC and EMC markup.
$section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
// Add Q and q flags if detected around BT/ET.
// @see: https://github.com/smalot/pdfparser/issues/387
$section = trim((!empty($matches[1][$pos][0]) ? "Q\n" : '').$section).(!empty($matches[3][$pos][0]) ? "\nq" : '');
$sections[] = $section;
}
}
// Extract 'do' commands.
if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
foreach ($matches[1] as $part) {
$text = $part[0];
$offset = $part[1];
$section = substr($content, $offset, \strlen($text));
$sections[] = $section;
}
}
return $sections;
}
private function getDefaultFont(Page $page = null): Font
{
$fonts = [];
if (null !== $page) {
$fonts = $page->getFonts();
}
$firstFont = $this->document->getFirstFont();
if (null !== $firstFont) {
$fonts[] = $firstFont;
}
if (\count($fonts) > 0) {
return reset($fonts);
}
return new Font($this->document, null, null, $this->config);
}
/**
* @throws \Exception
*/
public function getText(?Page $page = null): string
{
$result = '';
$sections = $this->getSectionsText($this->content);
$current_font = $this->getDefaultFont($page);
$clipped_font = $current_font;
$current_position_td = ['x' => false, 'y' => false];
$current_position_tm = ['x' => false, 'y' => false];
self::$recursionStack[] = $this->getUniqueId();
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
$reverse_text = false;
$text = '';
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
case 'BMC':
if ('ReversedChars' == $command[self::COMMAND]) {
$reverse_text = true;
}
break;
// set character spacing
case 'Tc':
break;
// move text current point
case 'Td':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if (((float) $x <= 0) ||
(false !== $current_position_td['y'] && (float) $y < (float) $current_position_td['y'])
) {
// vertical offset
$text .= "\n";
} elseif (false !== $current_position_td['x'] && (float) $x > (float)
$current_position_td['x']
) {
$text .= $this->config->getHorizontalOffset();
}
$current_position_td = ['x' => $x, 'y' => $y];
break;
// move text current point and set leading
case 'TD':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if ((float) $y < 0) {
$text .= "\n";
} elseif ((float) $x <= 0) {
$text .= ' ';
}
break;
case 'Tf':
list($id) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
if (null !== $page) {
$new_font = $page->getFont($id);
// If an invalid font ID is given, do not update the font.
// This should theoretically never happen, as the PDF spec states for the Tf operator:
// "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
// (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
// But we want to make sure that malformed PDFs do not simply crash.
if (null !== $new_font) {
$current_font = $new_font;
}
}
break;
case 'Q':
// Use clip: restore font.
$current_font = $clipped_font;
break;
case 'q':
// Use clip: save font.
$clipped_font = $current_font;
break;
case "'":
case 'Tj':
$command[self::COMMAND] = [$command];
// no break
case 'TJ':
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text .= $sub_text;
break;
// set leading
case 'TL':
$text .= ' ';
break;
case 'Tm':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if (false !== $current_position_tm['x']) {
$delta = abs((float) $x - (float) $current_position_tm['x']);
if ($delta > 10) {
$text .= "\t";
}
}
if (false !== $current_position_tm['y']) {
$delta = abs((float) $y - (float) $current_position_tm['y']);
if ($delta > 10) {
$text .= "\n";
}
}
$current_position_tm = ['x' => $x, 'y' => $y];
break;
// set super/subscripting text rise
case 'Ts':
break;
// set word spacing
case 'Tw':
break;
// set horizontal scaling
case 'Tz':
$text .= "\n";
break;
// move to start of next line
case 'T*':
$text .= "\n";
break;
case 'Da':
break;
case 'Do':
if (null !== $page) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
$xobject = $page->getXObject($id);
// @todo $xobject could be a ElementXRef object, which would then throw an error
if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
// Not a circular reference.
$text .= $xobject->getText($page);
}
}
break;
case 'rg':
case 'RG':
break;
case 're':
break;
case 'co':
break;
case 'cs':
break;
case 'gs':
break;
case 'en':
break;
case 'sc':
case 'SC':
break;
case 'g':
case 'G':
break;
case 'V':
break;
case 'vo':
case 'Vo':
break;
default:
}
}
// Fix Hebrew and other reverse text oriented languages.
// @see: https://github.com/smalot/pdfparser/issues/398
if ($reverse_text) {
$chars = mb_str_split($text, 1, mb_internal_encoding());
$text = implode('', array_reverse($chars));
}
$result .= $text;
}
return $result.' ';
}
/**
* @throws \Exception
*/
public function getTextArray(?Page $page = null): array
{
$text = [];
$sections = $this->getSectionsText($this->content);
$current_font = new Font($this->document, null, null, $this->config);
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
// set character spacing
case 'Tc':
break;
// move text current point
case 'Td':
break;
// move text current point and set leading
case 'TD':
break;
case 'Tf':
if (null !== $page) {
list($id) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
$current_font = $page->getFont($id);
}
break;
case "'":
case 'Tj':
$command[self::COMMAND] = [$command];
// no break
case 'TJ':
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text[] = $sub_text;
break;
// set leading
case 'TL':
break;
case 'Tm':
break;
// set super/subscripting text rise
case 'Ts':
break;
// set word spacing
case 'Tw':
break;
// set horizontal scaling
case 'Tz':
// $text .= "\n";
break;
// move to start of next line
case 'T*':
// $text .= "\n";
break;
case 'Da':
break;
case 'Do':
if (null !== $page) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
if ($xobject = $page->getXObject($id)) {
$text[] = $xobject->getText($page);
}
}
break;
case 'rg':
case 'RG':
break;
case 're':
break;
case 'co':
break;
case 'cs':
break;
case 'gs':
break;
case 'en':
break;
case 'sc':
case 'SC':
break;
case 'g':
case 'G':
break;
case 'V':
break;
case 'vo':
case 'Vo':
break;
default:
}
}
}
return $text;
}
public function getCommandsText(string $text_part, int &$offset = 0): array
{
$commands = $matches = [];
while ($offset < \strlen($text_part)) {
$offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
$char = $text_part[$offset];
$operator = '';
$type = '';
$command = false;
switch ($char) {
case '/':
$type = $char;
if (preg_match(
'/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = $matches[2];
$command = $matches[1];
$offset += \strlen($matches[0]);
} elseif (preg_match(
'/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = $matches[2];
$command = $matches[1];
$offset += \strlen($matches[0]);
}
break;
case '[':
case ']':
// array object
$type = $char;
if ('[' == $char) {
++$offset;
// get elements
$command = $this->getCommandsText($text_part, $offset);
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
}
} else {
++$offset;
break;
}
break;
case '<':
case '>':
// array object
$type = $char;
++$offset;
if ('<' == $char) {
$strpos = strpos($text_part, '>', $offset);
$command = substr($text_part, $offset, $strpos - $offset);
$offset = $strpos + 1;
}
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
}
break;
case '(':
case ')':
++$offset;
$type = $char;
$strpos = $offset;
if ('(' == $char) {
$open_bracket = 1;
while ($open_bracket > 0) {
if (!isset($text_part[$strpos])) {
break;
}
$ch = $text_part[$strpos];
switch ($ch) {
case '\\':
// REVERSE SOLIDUS (5Ch) (Backslash)
// skip next character
++$strpos;
break;
case '(':
// LEFT PARENHESIS (28h)
++$open_bracket;
break;
case ')':
// RIGHT PARENTHESIS (29h)
--$open_bracket;
break;
}
++$strpos;
}
$command = substr($text_part, $offset, $strpos - $offset - 1);
$offset = $strpos;
if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
$operator = $matches[1];
$offset += \strlen($matches[0]);
}
}
break;
default:
if ('ET' == substr($text_part, $offset, 2)) {
break;
} elseif (preg_match(
'/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = trim($matches['id']);
$command = trim($matches['data']);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
$type = 'n';
$command = trim($matches[0]);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
$type = '';
$operator = $matches[1];
$command = '';
$offset += \strlen($matches[0]);
}
}
if (false !== $command) {
$commands[] = [
self::TYPE => $type,
self::OPERATOR => $operator,
self::COMMAND => $command,
];
} else {
break;
}
}
return $commands;
}
public static function factory(
Document $document,
Header $header,
?string $content,
?Config $config = null
): self {
switch ($header->get('Type')->getContent()) {
case 'XObject':
switch ($header->get('Subtype')->getContent()) {
case 'Image':
return new Image($document, $header, $config->getRetainImageContent() ? $content : null, $config);
case 'Form':
return new Form($document, $header, $content, $config);
}
return new self($document, $header, $content, $config);
case 'Pages':
return new Pages($document, $header, $content, $config);
case 'Page':
return new Page($document, $header, $content, $config);
case 'Encoding':
return new Encoding($document, $header, $content, $config);
case 'Font':
$subtype = $header->get('Subtype')->getContent();
$classname = '\Smalot\PdfParser\Font\Font'.$subtype;
if (class_exists($classname)) {
return new $classname($document, $header, $content, $config);
}
return new Font($document, $header, $content, $config);
default:
return new self($document, $header, $content, $config);
}
}
/**
* Returns unique id identifying the object.
*/
protected function getUniqueId(): string
{
return spl_object_hash($this);
}
}

View file

@ -0,0 +1,953 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementXRef;
class Page extends PDFObject
{
/**
* @var Font[]
*/
protected $fonts = null;
/**
* @var PDFObject[]
*/
protected $xobjects = null;
/**
* @var array
*/
protected $dataTm = null;
/**
* @return Font[]
*/
public function getFonts()
{
if (null !== $this->fonts) {
return $this->fonts;
}
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('Font')) {
if ($resources->get('Font') instanceof ElementMissing) {
return [];
}
if ($resources->get('Font') instanceof Header) {
$fonts = $resources->get('Font')->getElements();
} else {
$fonts = $resources->get('Font')->getHeader()->getElements();
}
$table = [];
foreach ($fonts as $id => $font) {
if ($font instanceof Font) {
$table[$id] = $font;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ('' != $id) {
$table[$id] = $font;
}
}
}
return $this->fonts = $table;
}
return [];
}
public function getFont(string $id): ?Font
{
$fonts = $this->getFonts();
if (isset($fonts[$id])) {
return $fonts[$id];
}
// According to the PDF specs (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 238)
// "The font resource name presented to the Tf operator is arbitrary, as are the names for all kinds of resources"
// Instead, we search for the unfiltered name first and then do this cleaning as a fallback, so all tests still pass.
if (isset($fonts[$id])) {
return $fonts[$id];
} else {
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($fonts[$id])) {
return $fonts[$id];
}
}
return null;
}
/**
* Support for XObject
*
* @return PDFObject[]
*/
public function getXObjects()
{
if (null !== $this->xobjects) {
return $this->xobjects;
}
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('XObject')) {
if ($resources->get('XObject') instanceof Header) {
$xobjects = $resources->get('XObject')->getElements();
} else {
$xobjects = $resources->get('XObject')->getHeader()->getElements();
}
$table = [];
foreach ($xobjects as $id => $xobject) {
$table[$id] = $xobject;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ('' != $id) {
$table[$id] = $xobject;
}
}
return $this->xobjects = $table;
}
return [];
}
public function getXObject(string $id): ?PDFObject
{
$xobjects = $this->getXObjects();
if (isset($xobjects[$id])) {
return $xobjects[$id];
}
return null;
/*$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($xobjects[$id])) {
return $xobjects[$id];
} else {
return null;
}*/
}
public function getText(self $page = null): string
{
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return '';
} elseif ($contents instanceof ElementNull) {
return '';
} elseif ($contents instanceof PDFObject) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
}
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent()."\n";
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
/*
* Elements referencing each other on the same page can cause endless loops during text parsing.
* To combat this we keep a recursionStack containing already parsed elements on the page.
* The stack is only emptied here after getting text from a page.
*/
$contentsText = $contents->getText($this);
PDFObject::$recursionStack = [];
return $contentsText;
}
return '';
}
/**
* Return true if the current page is a (setasign\Fpdi\Fpdi) FPDI/FPDF document
*
* The metadata 'Producer' should have the value of "FPDF" . FPDF_VERSION if the
* pdf file was generated by FPDF/Fpfi.
*
* @return bool true is the current page is a FPDI/FPDF document
*/
public function isFpdf(): bool
{
if (\array_key_exists('Producer', $this->document->getDetails()) &&
\is_string($this->document->getDetails()['Producer']) &&
0 === strncmp($this->document->getDetails()['Producer'], 'FPDF', 4)) {
return true;
}
return false;
}
/**
* Return the page number of the PDF document of the page object
*
* @return int the page number
*/
public function getPageNumber(): int
{
$pages = $this->document->getPages();
$numOfPages = \count($pages);
for ($pageNum = 0; $pageNum < $numOfPages; ++$pageNum) {
if ($pages[$pageNum] === $this) {
break;
}
}
return $pageNum;
}
/**
* Return the Object of the page if the document is a FPDF/FPDI document
*
* If the document was generated by FPDF/FPDI it returns the
* PDFObject of the given page
*
* @return PDFObject The PDFObject for the page
*/
public function getPDFObjectForFpdf(): PDFObject
{
$pageNum = $this->getPageNumber();
$xObjects = $this->getXObjects();
return $xObjects[$pageNum];
}
/**
* Return a new PDFObject of the document created with FPDF/FPDI
*
* For a document generated by FPDF/FPDI, it generates a
* new PDFObject for that document
*
* @return PDFObject The PDFObject
*/
public function createPDFObjectForFpdf(): PDFObject
{
$pdfObject = $this->getPDFObjectForFpdf();
$new_content = $pdfObject->getContent();
$header = $pdfObject->getHeader();
$config = $pdfObject->config;
return new PDFObject($pdfObject->document, $header, $new_content, $config);
}
/**
* Return page if document is a FPDF/FPDI document
*
* @return Page The page
*/
public function createPageForFpdf(): self
{
$pdfObject = $this->getPDFObjectForFpdf();
$new_content = $pdfObject->getContent();
$header = $pdfObject->getHeader();
$config = $pdfObject->config;
return new self($pdfObject->document, $header, $new_content, $config);
}
public function getTextArray(self $page = null): array
{
if ($this->isFpdf()) {
$pdfObject = $this->getPDFObjectForFpdf();
$newPdfObject = $this->createPDFObjectForFpdf();
return $newPdfObject->getTextArray($pdfObject);
} else {
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return [];
} elseif ($contents instanceof ElementNull) {
return [];
} elseif ($contents instanceof PDFObject) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
/** @var PDFObject $element */
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
}
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
} else {
try {
$contents->getTextArray($this);
} catch (\Throwable $e) {
return $contents->getTextArray();
}
}
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
/** @var PDFObject $content */
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent()."\n";
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
return $contents->getTextArray($this);
}
return [];
}
}
/**
* Gets all the text data with its internal representation of the page.
*
* Returns an array with the data and the internal representation
*/
public function extractRawData(): array
{
/*
* Now you can get the complete content of the object with the text on it
*/
$extractedData = [];
$content = $this->get('Contents');
$values = $content->getContent();
if (isset($values) && \is_array($values)) {
$text = '';
foreach ($values as $section) {
$text .= $section->getContent();
}
$sectionsText = $this->getSectionsText($text);
foreach ($sectionsText as $sectionText) {
$commandsText = $this->getCommandsText($sectionText);
foreach ($commandsText as $command) {
$extractedData[] = $command;
}
}
} else {
if ($this->isFpdf()) {
$content = $this->getPDFObjectForFpdf();
}
$sectionsText = $content->getSectionsText($content->getContent());
foreach ($sectionsText as $sectionText) {
$extractedData[] = ['t' => '', 'o' => 'BT', 'c' => ''];
$commandsText = $content->getCommandsText($sectionText);
foreach ($commandsText as $command) {
$extractedData[] = $command;
}
}
}
return $extractedData;
}
/**
* Gets all the decoded text data with it internal representation from a page.
*
* @param array $extractedRawData the extracted data return by extractRawData or
* null if extractRawData should be called
*
* @return array An array with the data and the internal representation
*/
public function extractDecodedRawData(array $extractedRawData = null): array
{
if (!isset($extractedRawData) || !$extractedRawData) {
$extractedRawData = $this->extractRawData();
}
$currentFont = null; /** @var Font $currentFont */
$clippedFont = null;
$fpdfPage = null;
if ($this->isFpdf()) {
$fpdfPage = $this->createPageForFpdf();
}
foreach ($extractedRawData as &$command) {
if ('Tj' == $command['o'] || 'TJ' == $command['o']) {
$data = $command['c'];
if (!\is_array($data)) {
$tmpText = '';
if (isset($currentFont)) {
$tmpText = $currentFont->decodeOctal($data);
// $tmpText = $currentFont->decodeHexadecimal($tmpText, false);
}
$tmpText = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
['\\', '(', ')', "\n", "\r", "\t", ' '],
$tmpText
);
$tmpText = mb_convert_encoding($tmpText, 'UTF-8', 'ISO-8859-1');
if (isset($currentFont)) {
$tmpText = $currentFont->decodeContent($tmpText);
}
$command['c'] = $tmpText;
continue;
}
$numText = \count($data);
for ($i = 0; $i < $numText; ++$i) {
if (0 != ($i % 2)) {
continue;
}
$tmpText = $data[$i]['c'];
$decodedText = isset($currentFont) ? $currentFont->decodeOctal($tmpText) : $tmpText;
$decodedText = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
['\\', '(', ')', "\n", "\r", "\t", ' '],
$decodedText
);
$decodedText = mb_convert_encoding($decodedText, 'UTF-8', 'ISO-8859-1');
if (isset($currentFont)) {
$decodedText = $currentFont->decodeContent($decodedText);
}
$command['c'][$i]['c'] = $decodedText;
continue;
}
} elseif ('Tf' == $command['o'] || 'TF' == $command['o']) {
$fontId = explode(' ', $command['c'])[0];
// If document is a FPDI/FPDF the $page has the correct font
$currentFont = isset($fpdfPage) ? $fpdfPage->getFont($fontId) : $this->getFont($fontId);
continue;
} elseif ('Q' == $command['o']) {
$currentFont = $clippedFont;
} elseif ('q' == $command['o']) {
$clippedFont = $currentFont;
}
}
return $extractedRawData;
}
/**
* Gets just the Text commands that are involved in text positions and
* Text Matrix (Tm)
*
* It extract just the PDF commands that are involved with text positions, and
* the Text Matrix (Tm). These are: BT, ET, TL, Td, TD, Tm, T*, Tj, ', ", and TJ
*
* @param array $extractedDecodedRawData The data extracted by extractDecodeRawData.
* If it is null, the method extractDecodeRawData is called.
*
* @return array An array with the text command of the page
*/
public function getDataCommands(array $extractedDecodedRawData = null): array
{
if (!isset($extractedDecodedRawData) || !$extractedDecodedRawData) {
$extractedDecodedRawData = $this->extractDecodedRawData();
}
$extractedData = [];
foreach ($extractedDecodedRawData as $command) {
switch ($command['o']) {
/*
* BT
* Begin a text object, inicializind the Tm and Tlm to identity matrix
*/
case 'BT':
$extractedData[] = $command;
break;
/*
* ET
* End a text object, discarding the text matrix
*/
case 'ET':
$extractedData[] = $command;
break;
/*
* leading TL
* Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
* Initial value: 0
*/
case 'TL':
$extractedData[] = $command;
break;
/*
* tx ty Td
* Move to the start of the next line, offset form the start of the
* current line by tx, ty.
*/
case 'Td':
$extractedData[] = $command;
break;
/*
* tx ty TD
* Move to the start of the next line, offset form the start of the
* current line by tx, ty. As a side effect, this operator set the leading
* parameter in the text state. This operator has the same effect as the
* code:
* -ty TL
* tx ty Td
*/
case 'TD':
$extractedData[] = $command;
break;
/*
* a b c d e f Tm
* Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
* all numbers, and the initial value for Tm and Tlm is the identity matrix
* [1 0 0 1 0 0]
*/
case 'Tm':
$extractedData[] = $command;
break;
/*
* T*
* Move to the start of the next line. This operator has the same effect
* as the code:
* 0 Tl Td
* Where Tl is the current leading parameter in the text state.
*/
case 'T*':
$extractedData[] = $command;
break;
/*
* string Tj
* Show a Text String
*/
case 'Tj':
$extractedData[] = $command;
break;
/*
* string '
* Move to the next line and show a text string. This operator has the
* same effect as the code:
* T*
* string Tj
*/
case "'":
$extractedData[] = $command;
break;
/*
* aw ac string "
* Move to the next lkine and show a text string, using aw as the word
* spacing and ac as the character spacing. This operator has the same
* effect as the code:
* aw Tw
* ac Tc
* string '
* Tw set the word spacing, Tw, to wordSpace.
* Tc Set the character spacing, Tc, to charsSpace.
*/
case '"':
$extractedData[] = $command;
break;
case 'Tf':
case 'TF':
$extractedData[] = $command;
break;
/*
* array TJ
* Show one or more text strings allow individual glyph positioning.
* Each lement of array con be a string or a number. If the element is
* a string, this operator shows the string. If it is a number, the
* operator adjust the text position by that amount; that is, it translates
* the text matrix, Tm. This amount is substracted form the current
* horizontal or vertical coordinate, depending on the writing mode.
* in the default coordinate system, a positive adjustment has the effect
* of moving the next glyph painted either to the left or down by the given
* amount.
*/
case 'TJ':
$extractedData[] = $command;
break;
default:
}
}
return $extractedData;
}
/**
* Gets the Text Matrix of the text in the page
*
* Return an array where every item is an array where the first item is the
* Text Matrix (Tm) and the second is a string with the text data. The Text matrix
* is an array of 6 numbers. The last 2 numbers are the coordinates X and Y of the
* text. The first 4 numbers has to be with Scalation, Rotation and Skew of the text.
*
* @param array $dataCommands the data extracted by getDataCommands
* if null getDataCommands is called
*
* @return array an array with the data of the page including the Tm information
* of any text in the page
*/
public function getDataTm(array $dataCommands = null): array
{
if (!isset($dataCommands) || !$dataCommands) {
$dataCommands = $this->getDataCommands();
}
/*
* At the beginning of a text object Tm is the identity matrix
*/
$defaultTm = ['1', '0', '0', '1', '0', '0'];
/*
* Set the text leading used by T*, ' and " operators
*/
$defaultTl = 0;
/*
* Set default values for font data
*/
$defaultFontId = -1;
$defaultFontSize = 1;
/*
* Indexes of horizontal/vertical scaling and X,Y-coordinates in the matrix (Tm)
*/
$hSc = 0; // horizontal scaling
/**
* index of vertical scaling in the array that encodes the text matrix.
* for more information: https://github.com/smalot/pdfparser/pull/559#discussion_r1053415500
*/
$vSc = 3;
$x = 4;
$y = 5;
/*
* x,y-coordinates of text space origin in user units
*
* These will be assigned the value of the currently printed string
*/
$Tx = 0;
$Ty = 0;
$Tm = $defaultTm;
$Tl = $defaultTl;
$fontId = $defaultFontId;
$fontSize = $defaultFontSize; // reflects fontSize set by Tf or Tfs
$extractedTexts = $this->getTextArray();
$extractedData = [];
foreach ($dataCommands as $command) {
$currentText = $extractedTexts[\count($extractedData)];
switch ($command['o']) {
/*
* BT
* Begin a text object, initializing the Tm and Tlm to identity matrix
*/
case 'BT':
$Tm = $defaultTm;
$Tl = $defaultTl;
$Tx = 0;
$Ty = 0;
$fontId = $defaultFontId;
$fontSize = $defaultFontSize;
break;
/*
* ET
* End a text object, discarding the text matrix
*/
case 'ET':
$Tm = $defaultTm;
$Tl = $defaultTl;
$Tx = 0;
$Ty = 0;
$fontId = $defaultFontId;
$fontSize = $defaultFontSize;
break;
/*
* text leading TL
* Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
* Initial value: 0
*/
case 'TL':
// scaled text leading
$Tl = (float) $command['c'] * (float) $Tm[$vSc];
break;
/*
* tx ty Td
* Move to the start of the next line, offset form the start of the
* current line by tx, ty.
*/
case 'Td':
$coord = explode(' ', $command['c']);
$Tx += (float) $coord[0] * (float) $Tm[$hSc];
$Ty += (float) $coord[1] * (float) $Tm[$vSc];
$Tm[$x] = (string) $Tx;
$Tm[$y] = (string) $Ty;
break;
/*
* tx ty TD
* Move to the start of the next line, offset form the start of the
* current line by tx, ty. As a side effect, this operator set the leading
* parameter in the text state. This operator has the same effect as the
* code:
* -ty TL
* tx ty Td
*/
case 'TD':
$coord = explode(' ', $command['c']);
$Tl = -((float) $coord[1] * (float) $Tm[$vSc]);
$Tx += (float) $coord[0] * (float) $Tm[$hSc];
$Ty += (float) $coord[1] * (float) $Tm[$vSc];
$Tm[$x] = (string) $Tx;
$Tm[$y] = (string) $Ty;
break;
/*
* a b c d e f Tm
* Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
* all numbers, and the initial value for Tm and Tlm is the identity matrix
* [1 0 0 1 0 0]
*/
case 'Tm':
$Tm = explode(' ', $command['c']);
$Tx = (float) $Tm[$x];
$Ty = (float) $Tm[$y];
break;
/*
* T*
* Move to the start of the next line. This operator has the same effect
* as the code:
* 0 Tl Td
* Where Tl is the current leading parameter in the text state.
*/
case 'T*':
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
break;
/*
* string Tj
* Show a Text String
*/
case 'Tj':
$data = [$Tm, $currentText];
if ($this->config->getDataTmFontInfoHasToBeIncluded()) {
$data[] = $fontId;
$data[] = $fontSize;
}
$extractedData[] = $data;
break;
/*
* string '
* Move to the next line and show a text string. This operator has the
* same effect as the code:
* T*
* string Tj
*/
case "'":
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
$extractedData[] = [$Tm, $currentText];
break;
/*
* aw ac string "
* Move to the next line and show a text string, using aw as the word
* spacing and ac as the character spacing. This operator has the same
* effect as the code:
* aw Tw
* ac Tc
* string '
* Tw set the word spacing, Tw, to wordSpace.
* Tc Set the character spacing, Tc, to charsSpace.
*/
case '"':
$data = explode(' ', $currentText);
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
$extractedData[] = [$Tm, $data[2]]; // Verify
break;
case 'Tf':
/*
* From PDF 1.0 specification, page 106:
* fontname size Tf Set font and size
* Sets the text font and text size in the graphics state. There is no default value for
* either fontname or size; they must be selected using Tf before drawing any text.
* fontname is a resource name. size is a number expressed in text space units.
*
* Source: https://ia902503.us.archive.org/10/items/pdfy-0vt8s-egqFwDl7L2/PDF%20Reference%201.0.pdf
* Introduced with https://github.com/smalot/pdfparser/pull/516
*/
list($fontId, $fontSize) = explode(' ', $command['c'], 2);
break;
/*
* array TJ
* Show one or more text strings allow individual glyph positioning.
* Each lement of array con be a string or a number. If the element is
* a string, this operator shows the string. If it is a number, the
* operator adjust the text position by that amount; that is, it translates
* the text matrix, Tm. This amount is substracted form the current
* horizontal or vertical coordinate, depending on the writing mode.
* in the default coordinate system, a positive adjustment has the effect
* of moving the next glyph painted either to the left or down by the given
* amount.
*/
case 'TJ':
$data = [$Tm, $currentText];
if ($this->config->getDataTmFontInfoHasToBeIncluded()) {
$data[] = $fontId;
$data[] = $fontSize;
}
$extractedData[] = $data;
break;
default:
}
}
$this->dataTm = $extractedData;
return $extractedData;
}
/**
* Gets text data that are around the given coordinates (X,Y)
*
* If the text is in near the given coordinates (X,Y) (or the TM info),
* the text is returned. The extractedData return by getDataTm, could be use to see
* where is the coordinates of a given text, using the TM info for it.
*
* @param float $x The X value of the coordinate to search for. if null
* just the Y value is considered (same Row)
* @param float $y The Y value of the coordinate to search for
* just the X value is considered (same column)
* @param float $xError The value less or more to consider an X to be "near"
* @param float $yError The value less or more to consider an Y to be "near"
*
* @return array An array of text that are near the given coordinates. If no text
* "near" the x,y coordinate, an empty array is returned. If Both, x
* and y coordinates are null, null is returned.
*/
public function getTextXY(float $x = null, float $y = null, float $xError = 0, float $yError = 0): array
{
if (!isset($this->dataTm) || !$this->dataTm) {
$this->getDataTm();
}
if (null !== $x) {
$x = (float) $x;
}
if (null !== $y) {
$y = (float) $y;
}
if (null === $x && null === $y) {
return [];
}
$xError = (float) $xError;
$yError = (float) $yError;
$extractedData = [];
foreach ($this->dataTm as $item) {
$tm = $item[0];
$xTm = (float) $tm[4];
$yTm = (float) $tm[5];
$text = $item[1];
if (null === $y) {
if (($xTm >= ($x - $xError)) &&
($xTm <= ($x + $xError))) {
$extractedData[] = [$tm, $text];
continue;
}
}
if (null === $x) {
if (($yTm >= ($y - $yError)) &&
($yTm <= ($y + $yError))) {
$extractedData[] = [$tm, $text];
continue;
}
}
if (($xTm >= ($x - $xError)) &&
($xTm <= ($x + $xError)) &&
($yTm >= ($y - $yError)) &&
($yTm <= ($y + $yError))) {
$extractedData[] = [$tm, $text];
continue;
}
}
return $extractedData;
}
}

View file

@ -0,0 +1,73 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
/**
* Class Pages
*/
class Pages extends PDFObject
{
/**
* @todo Objects other than Pages or Page might need to be treated specifically in order to get Page objects out of them,
*
* @see https://github.com/smalot/pdfparser/issues/331
*/
public function getPages(bool $deep = false): array
{
if (!$this->has('Kids')) {
return [];
}
/** @var ElementArray $kidsElement */
$kidsElement = $this->get('Kids');
if (!$deep) {
return $kidsElement->getContent();
}
$kids = $kidsElement->getContent();
$pages = [];
foreach ($kids as $kid) {
if ($kid instanceof self) {
$pages = array_merge($pages, $kid->getPages(true));
} elseif ($kid instanceof Page) {
$pages[] = $kid;
}
}
return $pages;
}
}

View file

@ -0,0 +1,327 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementXRef;
use Smalot\PdfParser\RawData\RawDataParser;
/**
* Class Parser
*/
class Parser
{
/**
* @var Config
*/
private $config;
/**
* @var PDFObject[]
*/
protected $objects = [];
protected $rawDataParser;
public function __construct($cfg = [], ?Config $config = null)
{
$this->config = $config ?: new Config();
$this->rawDataParser = new RawDataParser($cfg, $this->config);
}
public function getConfig(): Config
{
return $this->config;
}
/**
* @throws \Exception
*/
public function parseFile(string $filename): Document
{
$content = file_get_contents($filename);
/*
* 2018/06/20 @doganoo as multiple times a
* users have complained that the parseFile()
* method dies silently, it is an better option
* to remove the error control operator (@) and
* let the users know that the method throws an exception
* by adding @throws tag to PHPDoc.
*
* See here for an example: https://github.com/smalot/pdfparser/issues/204
*/
return $this->parseContent($content);
}
/**
* @param string $content PDF content to parse
*
* @throws \Exception if secured PDF file was detected
* @throws \Exception if no object list was found
*/
public function parseContent(string $content): Document
{
// Create structure from raw data.
list($xref, $data) = $this->rawDataParser->parseData($content);
if (isset($xref['trailer']['encrypt'])) {
throw new \Exception('Secured pdf file are currently not supported.');
}
if (empty($data)) {
throw new \Exception('Object list not found. Possible secured file.');
}
// Create destination object.
$document = new Document();
$this->objects = [];
foreach ($data as $id => $structure) {
$this->parseObject($id, $structure, $document);
unset($data[$id]);
}
$document->setTrailer($this->parseTrailer($xref['trailer'], $document));
$document->setObjects($this->objects);
return $document;
}
protected function parseTrailer(array $structure, ?Document $document)
{
$trailer = [];
foreach ($structure as $name => $values) {
$name = ucfirst($name);
if (is_numeric($values)) {
$trailer[$name] = new ElementNumeric($values);
} elseif (\is_array($values)) {
$value = $this->parseTrailer($values, null);
$trailer[$name] = new ElementArray($value, null);
} elseif (false !== strpos($values, '_')) {
$trailer[$name] = new ElementXRef($values, $document);
} else {
$trailer[$name] = $this->parseHeaderElement('(', $values, $document);
}
}
return new Header($trailer, $document);
}
protected function parseObject(string $id, array $structure, ?Document $document)
{
$header = new Header([], $document);
$content = '';
foreach ($structure as $position => $part) {
if (\is_int($part)) {
$part = [null, null];
}
switch ($part[0]) {
case '[':
$elements = [];
foreach ($part[1] as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$elements[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
}
$header = new Header($elements, $document);
break;
case '<<':
$header = $this->parseHeader($part[1], $document);
break;
case 'stream':
$content = isset($part[3][0]) ? $part[3][0] : $part[1];
if ($header->get('Type')->equals('ObjStm')) {
$match = [];
// Split xrefs and contents.
preg_match('/^((\d+\s+\d+\s*)*)(.*)$/s', $content, $match);
$content = $match[3];
// Extract xrefs.
$xrefs = preg_split(
'/(\d+\s+\d+\s*)/s',
$match[1],
-1,
\PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
);
$table = [];
foreach ($xrefs as $xref) {
list($id, $position) = preg_split("/\s+/", trim($xref));
$table[$position] = $id;
}
ksort($table);
$ids = array_values($table);
$positions = array_keys($table);
foreach ($positions as $index => $position) {
$id = $ids[$index].'_0';
$next_position = isset($positions[$index + 1]) ? $positions[$index + 1] : \strlen($content);
$sub_content = substr($content, $position, (int) $next_position - (int) $position);
$sub_header = Header::parse($sub_content, $document);
$object = PDFObject::factory($document, $sub_header, '', $this->config);
$this->objects[$id] = $object;
}
// It is not necessary to store this content.
return;
}
break;
default:
if ('null' != $part) {
$element = $this->parseHeaderElement($part[0], $part[1], $document);
if ($element) {
$header = new Header([$element], $document);
}
}
break;
}
}
if (!isset($this->objects[$id])) {
$this->objects[$id] = PDFObject::factory($document, $header, $content, $this->config);
}
}
/**
* @throws \Exception
*/
protected function parseHeader(array $structure, ?Document $document): Header
{
$elements = [];
$count = \count($structure);
for ($position = 0; $position < $count; $position += 2) {
$name = $structure[$position][1];
$type = $structure[$position + 1][0];
$value = $structure[$position + 1][1];
$elements[$name] = $this->parseHeaderElement($type, $value, $document);
}
return new Header($elements, $document);
}
/**
* @param string|array $value
*
* @return Element|Header|null
*
* @throws \Exception
*/
protected function parseHeaderElement(?string $type, $value, ?Document $document)
{
$valueIsEmpty = null == $value || '' == $value || false == $value;
if (('<<' === $type || '>>' === $type) && $valueIsEmpty) {
$value = [];
}
switch ($type) {
case '<<':
case '>>':
$header = $this->parseHeader($value, $document);
PDFObject::factory($document, $header, null, $this->config);
return $header;
case 'numeric':
return new ElementNumeric($value);
case 'boolean':
return new ElementBoolean($value);
case 'null':
return new ElementNull();
case '(':
if ($date = ElementDate::parse('('.$value.')', $document)) {
return $date;
}
return ElementString::parse('('.$value.')', $document);
case '<':
return $this->parseHeaderElement('(', ElementHexa::decode($value), $document);
case '/':
return ElementName::parse('/'.$value, $document);
case 'ojbref': // old mistake in tcpdf parser
case 'objref':
return new ElementXRef($value, $document);
case '[':
$values = [];
if (\is_array($value)) {
foreach ($value as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$values[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
}
}
return new ElementArray($values, $document);
case 'endstream':
case 'obj': // I don't know what it means but got my project fixed.
case '':
// Nothing to do with.
return null;
default:
throw new \Exception('Invalid type: "'.$type.'".');
}
}
}

View file

@ -0,0 +1,396 @@
<?php
/**
* This file is based on code of tecnickcom/TCPDF PDF library.
*
* Original author Nicola Asuni (info@tecnick.com) and
* contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
*
* @see https://github.com/tecnickcom/TCPDF
*
* Original code was licensed on the terms of the LGPL v3.
*
* ------------------------------------------------------------------------------
*
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
*
* @date 2020-01-06
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\RawData;
class FilterHelper
{
protected $availableFilters = ['ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode'];
/**
* Decode data using the specified filter type.
*
* @param string $filter Filter name
* @param string $data Data to decode
*
* @return string Decoded data string
*
* @throws \Exception if a certain decode function is not implemented yet
*/
public function decodeFilter(string $filter, string $data, int $decodeMemoryLimit = 0): string
{
switch ($filter) {
case 'ASCIIHexDecode':
return $this->decodeFilterASCIIHexDecode($data);
case 'ASCII85Decode':
return $this->decodeFilterASCII85Decode($data);
case 'LZWDecode':
return $this->decodeFilterLZWDecode($data);
case 'FlateDecode':
return $this->decodeFilterFlateDecode($data, $decodeMemoryLimit);
case 'RunLengthDecode':
return $this->decodeFilterRunLengthDecode($data);
case 'CCITTFaxDecode':
throw new \Exception('Decode CCITTFaxDecode not implemented yet.');
case 'JBIG2Decode':
throw new \Exception('Decode JBIG2Decode not implemented yet.');
case 'DCTDecode':
throw new \Exception('Decode DCTDecode not implemented yet.');
case 'JPXDecode':
throw new \Exception('Decode JPXDecode not implemented yet.');
case 'Crypt':
throw new \Exception('Decode Crypt not implemented yet.');
default:
return $data;
}
}
/**
* ASCIIHexDecode
*
* Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
*
* @param string $data Data to decode
*
* @return string data string
*
* @throws \Exception
*/
protected function decodeFilterASCIIHexDecode(string $data): string
{
// all white-space characters shall be ignored
$data = preg_replace('/[\s]/', '', $data);
// check for EOD character: GREATER-THAN SIGN (3Eh)
$eod = strpos($data, '>');
if (false !== $eod) {
// remove EOD and extra data (if any)
$data = substr($data, 0, $eod);
$eod = true;
}
// get data length
$data_length = \strlen($data);
if (0 != ($data_length % 2)) {
// odd number of hexadecimal digits
if ($eod) {
// EOD shall behave as if a 0 (zero) followed the last digit
$data = substr($data, 0, -1).'0'.substr($data, -1);
} else {
throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
}
}
// check for invalid characters
if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
}
// get one byte of binary data for each pair of ASCII hexadecimal digits
$decoded = pack('H*', $data);
return $decoded;
}
/**
* ASCII85Decode
*
* Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
*
* @param string $data Data to decode
*
* @return string data string
*
* @throws \Exception
*/
protected function decodeFilterASCII85Decode(string $data): string
{
// initialize string to return
$decoded = '';
// all white-space characters shall be ignored
$data = preg_replace('/[\s]/', '', $data);
// remove start sequence 2-character sequence <~ (3Ch)(7Eh)
if (false !== strpos($data, '<~')) {
// remove EOD and extra data (if any)
$data = substr($data, 2);
}
// check for EOD: 2-character sequence ~> (7Eh)(3Eh)
$eod = strpos($data, '~>');
if (false !== $eod) {
// remove EOD and extra data (if any)
$data = substr($data, 0, $eod);
}
// data length
$data_length = \strlen($data);
// check for invalid characters
if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
throw new \Exception('decodeFilterASCII85Decode: invalid code');
}
// z sequence
$zseq = \chr(0).\chr(0).\chr(0).\chr(0);
// position inside a group of 4 bytes (0-3)
$group_pos = 0;
$tuple = 0;
$pow85 = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1];
// for each byte
for ($i = 0; $i < $data_length; ++$i) {
// get char value
$char = \ord($data[$i]);
if (122 == $char) { // 'z'
if (0 == $group_pos) {
$decoded .= $zseq;
} else {
throw new \Exception('decodeFilterASCII85Decode: invalid code');
}
} else {
// the value represented by a group of 5 characters should never be greater than 2^32 - 1
$tuple += (($char - 33) * $pow85[$group_pos]);
if (4 == $group_pos) {
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8).\chr($tuple);
$tuple = 0;
$group_pos = 0;
} else {
++$group_pos;
}
}
}
if ($group_pos > 1) {
$tuple += $pow85[$group_pos - 1];
}
// last tuple (if any)
switch ($group_pos) {
case 4:
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8);
break;
case 3:
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16);
break;
case 2:
$decoded .= \chr($tuple >> 24);
break;
case 1:
throw new \Exception('decodeFilterASCII85Decode: invalid code');
}
return $decoded;
}
/**
* FlateDecode
*
* Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
*
* @param string $data Data to decode
* @param int $decodeMemoryLimit Memory limit on deflation
*
* @return string data string
*
* @throws \Exception
*/
protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit): ?string
{
/*
* gzuncompress may throw a not catchable E_WARNING in case of an error (like $data is empty)
* the following set_error_handler changes an E_WARNING to an E_ERROR, which is catchable.
*/
set_error_handler(function ($errNo, $errStr) {
if (\E_WARNING === $errNo) {
throw new \Exception($errStr);
} else {
// fallback to default php error handler
return false;
}
});
$decoded = null;
// initialize string to return
try {
$decoded = gzuncompress($data, $decodeMemoryLimit);
if (false === $decoded) {
throw new \Exception('decodeFilterFlateDecode: invalid code');
}
} catch (\Exception $e) {
throw $e;
} finally {
// Restore old handler just in case it was customized outside of PDFParser.
restore_error_handler();
}
return $decoded;
}
/**
* LZWDecode
*
* Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
*
* @param string $data Data to decode
*
* @return string Data string
*/
protected function decodeFilterLZWDecode(string $data): string
{
// initialize string to return
$decoded = '';
// data length
$data_length = \strlen($data);
// convert string to binary string
$bitstring = '';
for ($i = 0; $i < $data_length; ++$i) {
$bitstring .= sprintf('%08b', \ord($data[$i]));
}
// get the number of bits
$data_length = \strlen($bitstring);
// initialize code length in bits
$bitlen = 9;
// initialize dictionary index
$dix = 258;
// initialize the dictionary (with the first 256 entries).
$dictionary = [];
for ($i = 0; $i < 256; ++$i) {
$dictionary[$i] = \chr($i);
}
// previous val
$prev_index = 0;
// while we encounter EOD marker (257), read code_length bits
while (($data_length > 0) && (257 != ($index = bindec(substr($bitstring, 0, $bitlen))))) {
// remove read bits from string
$bitstring = substr($bitstring, $bitlen);
// update number of bits
$data_length -= $bitlen;
if (256 == $index) { // clear-table marker
// reset code length in bits
$bitlen = 9;
// reset dictionary index
$dix = 258;
$prev_index = 256;
// reset the dictionary (with the first 256 entries).
$dictionary = [];
for ($i = 0; $i < 256; ++$i) {
$dictionary[$i] = \chr($i);
}
} elseif (256 == $prev_index) {
// first entry
$decoded .= $dictionary[$index];
$prev_index = $index;
} else {
// check if index exist in the dictionary
if ($index < $dix) {
// index exist on dictionary
$decoded .= $dictionary[$index];
$dic_val = $dictionary[$prev_index].$dictionary[$index][0];
// store current index
$prev_index = $index;
} else {
// index do not exist on dictionary
$dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
$decoded .= $dic_val;
}
// update dictionary
$dictionary[$dix] = $dic_val;
++$dix;
// change bit length by case
if (2047 == $dix) {
$bitlen = 12;
} elseif (1023 == $dix) {
$bitlen = 11;
} elseif (511 == $dix) {
$bitlen = 10;
}
}
}
return $decoded;
}
/**
* RunLengthDecode
*
* Decompresses data encoded using a byte-oriented run-length encoding algorithm.
*
* @param string $data Data to decode
*/
protected function decodeFilterRunLengthDecode(string $data): string
{
// initialize string to return
$decoded = '';
// data length
$data_length = \strlen($data);
$i = 0;
while ($i < $data_length) {
// get current byte value
$byte = \ord($data[$i]);
if (128 == $byte) {
// a length value of 128 denote EOD
break;
} elseif ($byte < 128) {
// if the length byte is in the range 0 to 127
// the following length + 1 (1 to 128) bytes shall be copied literally during decompression
$decoded .= substr($data, $i + 1, $byte + 1);
// move to next block
$i += ($byte + 2);
} else {
// if length is in the range 129 to 255,
// the following single byte shall be copied 257 - length (2 to 128) times during decompression
$decoded .= str_repeat($data[$i + 1], 257 - $byte);
// move to next block
$i += 2;
}
}
return $decoded;
}
/**
* @return array list of available filters
*/
public function getAvailableFilters(): array
{
return $this->availableFilters;
}
}

View file

@ -0,0 +1,902 @@
<?php
/**
* This file is based on code of tecnickcom/TCPDF PDF library.
*
* Original author Nicola Asuni (info@tecnick.com) and
* contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
*
* @see https://github.com/tecnickcom/TCPDF
*
* Original code was licensed on the terms of the LGPL v3.
*
* ------------------------------------------------------------------------------
*
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
*
* @date 2020-01-06
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\RawData;
use Smalot\PdfParser\Config;
class RawDataParser
{
/**
* @var \Smalot\PdfParser\Config
*/
private $config;
/**
* Configuration array.
*/
protected $cfg = [
// if `true` ignore filter decoding errors
'ignore_filter_decoding_errors' => true,
// if `true` ignore missing filter decoding errors
'ignore_missing_filter_decoders' => true,
];
protected $filterHelper;
protected $objects;
/**
* @param array $cfg Configuration array, default is []
*/
public function __construct($cfg = [], Config $config = null)
{
// merge given array with default values
$this->cfg = array_merge($this->cfg, $cfg);
$this->filterHelper = new FilterHelper();
$this->config = $config ?: new Config();
}
/**
* Decode the specified stream.
*
* @param string $pdfData PDF data
* @param array $sdic Stream's dictionary array
* @param string $stream Stream to decode
*
* @return array containing decoded stream data and remaining filters
*
* @throws \Exception
*/
protected function decodeStream(string $pdfData, array $xref, array $sdic, string $stream): array
{
// get stream length and filters
$slength = \strlen($stream);
if ($slength <= 0) {
return ['', []];
}
$filters = [];
foreach ($sdic as $k => $v) {
if ('/' == $v[0]) {
if (('Length' == $v[1]) && (isset($sdic[$k + 1])) && ('numeric' == $sdic[$k + 1][0])) {
// get declared stream length
$declength = (int) $sdic[$k + 1][1];
if ($declength < $slength) {
$stream = substr($stream, 0, $declength);
$slength = $declength;
}
} elseif (('Filter' == $v[1]) && (isset($sdic[$k + 1]))) {
// resolve indirect object
$objval = $this->getObjectVal($pdfData, $xref, $sdic[$k + 1]);
if ('/' == $objval[0]) {
// single filter
$filters[] = $objval[1];
} elseif ('[' == $objval[0]) {
// array of filters
foreach ($objval[1] as $flt) {
if ('/' == $flt[0]) {
$filters[] = $flt[1];
}
}
}
}
}
}
// decode the stream
$remaining_filters = [];
foreach ($filters as $filter) {
if (\in_array($filter, $this->filterHelper->getAvailableFilters())) {
try {
$stream = $this->filterHelper->decodeFilter($filter, $stream, $this->config->getDecodeMemoryLimit());
} catch (\Exception $e) {
$emsg = $e->getMessage();
if ((('~' == $emsg[0]) && !$this->cfg['ignore_missing_filter_decoders'])
|| (('~' != $emsg[0]) && !$this->cfg['ignore_filter_decoding_errors'])
) {
throw new \Exception($e->getMessage());
}
}
} else {
// add missing filter to array
$remaining_filters[] = $filter;
}
}
return [$stream, $remaining_filters];
}
/**
* Decode the Cross-Reference section
*
* @param string $pdfData PDF data
* @param int $startxref Offset at which the xref section starts (position of the 'xref' keyword)
* @param array $xref Previous xref array (if any)
*
* @return array containing xref and trailer data
*
* @throws \Exception
*/
protected function decodeXref(string $pdfData, int $startxref, array $xref = []): array
{
$startxref += 4; // 4 is the length of the word 'xref'
// skip initial white space chars
$offset = $startxref + strspn($pdfData, $this->config->getPdfWhitespaces(), $startxref);
// initialize object number
$obj_num = 0;
// search for cross-reference entries or subsection
while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) {
if ($matches[0][1] != $offset) {
// we are on another section
break;
}
$offset += \strlen($matches[0][0]);
if ('n' == $matches[3][0]) {
// create unique object index: [object number]_[generation number]
$index = $obj_num.'_'.(int) $matches[2][0];
// check if object already exist
if (!isset($xref['xref'][$index])) {
// store object offset position
$xref['xref'][$index] = (int) $matches[1][0];
}
++$obj_num;
} elseif ('f' == $matches[3][0]) {
++$obj_num;
} else {
// object number (index)
$obj_num = (int) $matches[1][0];
}
}
// get trailer data
if (preg_match('/trailer[\s]*<<(.*)>>/isU', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) {
$trailer_data = $matches[1][0];
if (!isset($xref['trailer']) || empty($xref['trailer'])) {
// get only the last updated version
$xref['trailer'] = [];
// parse trailer_data
if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
$xref['trailer']['size'] = (int) $matches[1];
}
if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['root'] = (int) $matches[1].'_'.(int) $matches[2];
}
if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['encrypt'] = (int) $matches[1].'_'.(int) $matches[2];
}
if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['info'] = (int) $matches[1].'_'.(int) $matches[2];
}
if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
$xref['trailer']['id'] = [];
$xref['trailer']['id'][0] = $matches[1];
$xref['trailer']['id'][1] = $matches[2];
}
}
if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
// get previous xref
$xref = $this->getXrefData($pdfData, (int) $matches[1], $xref);
}
} else {
throw new \Exception('Unable to find trailer');
}
return $xref;
}
/**
* Decode the Cross-Reference Stream section
*
* @param string $pdfData PDF data
* @param int $startxref Offset at which the xref section starts
* @param array $xref Previous xref array (if any)
*
* @return array containing xref and trailer data
*
* @throws \Exception if unknown PNG predictor detected
*/
protected function decodeXrefStream(string $pdfData, int $startxref, array $xref = []): array
{
// try to read Cross-Reference Stream
$xrefobj = $this->getRawObject($pdfData, $startxref);
$xrefcrs = $this->getIndirectObject($pdfData, $xref, $xrefobj[1], $startxref, true);
if (!isset($xref['trailer']) || empty($xref['trailer'])) {
// get only the last updated version
$xref['trailer'] = [];
$filltrailer = true;
} else {
$filltrailer = false;
}
if (!isset($xref['xref'])) {
$xref['xref'] = [];
}
$valid_crs = false;
$columns = 0;
$predictor = null;
$sarr = $xrefcrs[0][1];
if (!\is_array($sarr)) {
$sarr = [];
}
$wb = [];
foreach ($sarr as $k => $v) {
if (
('/' == $v[0])
&& ('Type' == $v[1])
&& (
isset($sarr[$k + 1])
&& '/' == $sarr[$k + 1][0]
&& 'XRef' == $sarr[$k + 1][1]
)
) {
$valid_crs = true;
} elseif (('/' == $v[0]) && ('Index' == $v[1]) && (isset($sarr[$k + 1]))) {
// initialize list for: first object number in the subsection / number of objects
$index_blocks = [];
for ($m = 0; $m < \count($sarr[$k + 1][1]); $m += 2) {
$index_blocks[] = [$sarr[$k + 1][1][$m][1], $sarr[$k + 1][1][$m + 1][1]];
}
} elseif (('/' == $v[0]) && ('Prev' == $v[1]) && (isset($sarr[$k + 1]) && ('numeric' == $sarr[$k + 1][0]))) {
// get previous xref offset
$prevxref = (int) $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('W' == $v[1]) && (isset($sarr[$k + 1]))) {
// number of bytes (in the decoded stream) of the corresponding field
$wb[0] = (int) $sarr[$k + 1][1][0][1];
$wb[1] = (int) $sarr[$k + 1][1][1][1];
$wb[2] = (int) $sarr[$k + 1][1][2][1];
} elseif (('/' == $v[0]) && ('DecodeParms' == $v[1]) && (isset($sarr[$k + 1][1]))) {
$decpar = $sarr[$k + 1][1];
foreach ($decpar as $kdc => $vdc) {
if (
'/' == $vdc[0]
&& 'Columns' == $vdc[1]
&& (
isset($decpar[$kdc + 1])
&& 'numeric' == $decpar[$kdc + 1][0]
)
) {
$columns = (int) $decpar[$kdc + 1][1];
} elseif (
'/' == $vdc[0]
&& 'Predictor' == $vdc[1]
&& (
isset($decpar[$kdc + 1])
&& 'numeric' == $decpar[$kdc + 1][0]
)
) {
$predictor = (int) $decpar[$kdc + 1][1];
}
}
} elseif ($filltrailer) {
if (('/' == $v[0]) && ('Size' == $v[1]) && (isset($sarr[$k + 1]) && ('numeric' == $sarr[$k + 1][0]))) {
$xref['trailer']['size'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('Root' == $v[1]) && (isset($sarr[$k + 1]) && ('objref' == $sarr[$k + 1][0]))) {
$xref['trailer']['root'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('Info' == $v[1]) && (isset($sarr[$k + 1]) && ('objref' == $sarr[$k + 1][0]))) {
$xref['trailer']['info'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('Encrypt' == $v[1]) && (isset($sarr[$k + 1]) && ('objref' == $sarr[$k + 1][0]))) {
$xref['trailer']['encrypt'] = $sarr[$k + 1][1];
} elseif (('/' == $v[0]) && ('ID' == $v[1]) && (isset($sarr[$k + 1]))) {
$xref['trailer']['id'] = [];
$xref['trailer']['id'][0] = $sarr[$k + 1][1][0][1];
$xref['trailer']['id'][1] = $sarr[$k + 1][1][1][1];
}
}
}
// decode data
if ($valid_crs && isset($xrefcrs[1][3][0])) {
if (null !== $predictor) {
// number of bytes in a row
$rowlen = ($columns + 1);
// convert the stream into an array of integers
/** @var array<int> */
$sdata = unpack('C*', $xrefcrs[1][3][0]);
// TODO: Handle the case when unpack returns false
// split the rows
$sdata = array_chunk($sdata, $rowlen);
// initialize decoded array
$ddata = [];
// initialize first row with zeros
$prev_row = array_fill(0, $rowlen, 0);
// for each row apply PNG unpredictor
foreach ($sdata as $k => $row) {
// initialize new row
$ddata[$k] = [];
// get PNG predictor value
$predictor = (10 + $row[0]);
// for each byte on the row
for ($i = 1; $i <= $columns; ++$i) {
// new index
$j = ($i - 1);
$row_up = $prev_row[$j];
if (1 == $i) {
$row_left = 0;
$row_upleft = 0;
} else {
$row_left = $row[$i - 1];
$row_upleft = $prev_row[$j - 1];
}
switch ($predictor) {
case 10: // PNG prediction (on encoding, PNG None on all rows)
$ddata[$k][$j] = $row[$i];
break;
case 11: // PNG prediction (on encoding, PNG Sub on all rows)
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xFF);
break;
case 12: // PNG prediction (on encoding, PNG Up on all rows)
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xFF);
break;
case 13: // PNG prediction (on encoding, PNG Average on all rows)
$ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xFF);
break;
case 14: // PNG prediction (on encoding, PNG Paeth on all rows)
// initial estimate
$p = ($row_left + $row_up - $row_upleft);
// distances
$pa = abs($p - $row_left);
$pb = abs($p - $row_up);
$pc = abs($p - $row_upleft);
$pmin = min($pa, $pb, $pc);
// return minimum distance
switch ($pmin) {
case $pa:
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xFF);
break;
case $pb:
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xFF);
break;
case $pc:
$ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xFF);
break;
}
break;
default: // PNG prediction (on encoding, PNG optimum)
throw new \Exception('Unknown PNG predictor: '.$predictor);
}
}
$prev_row = $ddata[$k];
} // end for each row
// complete decoding
} else {
// number of bytes in a row
$rowlen = array_sum($wb);
// convert the stream into an array of integers
$sdata = unpack('C*', $xrefcrs[1][3][0]);
// split the rows
$ddata = array_chunk($sdata, $rowlen);
}
$sdata = [];
// for every row
foreach ($ddata as $k => $row) {
// initialize new row
$sdata[$k] = [0, 0, 0];
if (0 == $wb[0]) {
// default type field
$sdata[$k][0] = 1;
}
$i = 0; // count bytes in the row
// for every column
for ($c = 0; $c < 3; ++$c) {
// for every byte on the column
for ($b = 0; $b < $wb[$c]; ++$b) {
if (isset($row[$i])) {
$sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
}
++$i;
}
}
}
// fill xref
if (isset($index_blocks)) {
// load the first object number of the first /Index entry
$obj_num = $index_blocks[0][0];
} else {
$obj_num = 0;
}
foreach ($sdata as $k => $row) {
switch ($row[0]) {
case 0: // (f) linked list of free objects
break;
case 1: // (n) objects that are in use but are not compressed
// create unique object index: [object number]_[generation number]
$index = $obj_num.'_'.$row[2];
// check if object already exist
if (!isset($xref['xref'][$index])) {
// store object offset position
$xref['xref'][$index] = $row[1];
}
break;
case 2: // compressed objects
// $row[1] = object number of the object stream in which this object is stored
// $row[2] = index of this object within the object stream
$index = $row[1].'_0_'.$row[2];
$xref['xref'][$index] = -1;
break;
default: // null objects
break;
}
++$obj_num;
if (isset($index_blocks)) {
// reduce the number of remaining objects
--$index_blocks[0][1];
if (0 == $index_blocks[0][1]) {
// remove the actual used /Index entry
array_shift($index_blocks);
if (0 < \count($index_blocks)) {
// load the first object number of the following /Index entry
$obj_num = $index_blocks[0][0];
} else {
// if there are no more entries, remove $index_blocks to avoid actions on an empty array
unset($index_blocks);
}
}
}
}
} // end decoding data
if (isset($prevxref)) {
// get previous xref
$xref = $this->getXrefData($pdfData, $prevxref, $xref);
}
return $xref;
}
protected function getObjectHeaderPattern(array $objRefs): string
{
// consider all whitespace character (PDF specifications)
return '/'.$objRefs[0].$this->config->getPdfWhitespacesRegex().$objRefs[1].$this->config->getPdfWhitespacesRegex().'obj/';
}
protected function getObjectHeaderLen(array $objRefs): int
{
// "4 0 obj"
// 2 whitespaces + strlen("obj") = 5
return 5 + \strlen($objRefs[0]) + \strlen($objRefs[1]);
}
/**
* Get content of indirect object.
*
* @param string $pdfData PDF data
* @param string $objRef Object number and generation number separated by underscore character
* @param int $offset Object offset
* @param bool $decoding If true decode streams
*
* @return array containing object data
*
* @throws \Exception if invalid object reference found
*/
protected function getIndirectObject(string $pdfData, array $xref, string $objRef, int $offset = 0, bool $decoding = true): array
{
/*
* build indirect object header
*/
// $objHeader = "[object number] [generation number] obj"
$objRefArr = explode('_', $objRef);
if (2 !== \count($objRefArr)) {
throw new \Exception('Invalid object reference for $obj.');
}
$objHeaderLen = $this->getObjectHeaderLen($objRefArr);
/*
* check if we are in position
*/
// ignore whitespace characters at offset
$offset += strspn($pdfData, $this->config->getPdfWhitespaces(), $offset);
// ignore leading zeros for object number
$offset += strspn($pdfData, '0', $offset);
if (0 == preg_match($this->getObjectHeaderPattern($objRefArr), substr($pdfData, $offset, $objHeaderLen))) {
// an indirect reference to an undefined object shall be considered a reference to the null object
return ['null', 'null', $offset];
}
/*
* get content
*/
// starting position of object content
$offset += $objHeaderLen;
$objContentArr = [];
$i = 0; // object main index
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
// decode stream using stream's dictionary information
if ($decoding && ('stream' === $element[0]) && (isset($objContentArr[$i - 1][0])) && ('<<' === $objContentArr[$i - 1][0])) {
$element[3] = $this->decodeStream($pdfData, $xref, $objContentArr[$i - 1][1], $element[1]);
}
$objContentArr[$i] = $element;
++$i;
} while (('endobj' !== $element[0]) && ($offset !== $oldOffset));
// remove closing delimiter
array_pop($objContentArr);
/*
* return raw object content
*/
return $objContentArr;
}
/**
* Get the content of object, resolving indirect object reference if necessary.
*
* @param string $pdfData PDF data
* @param array $obj Object value
*
* @return array containing object data
*
* @throws \Exception
*/
protected function getObjectVal(string $pdfData, $xref, array $obj): array
{
if ('objref' == $obj[0]) {
// reference to indirect object
if (isset($this->objects[$obj[1]])) {
// this object has been already parsed
return $this->objects[$obj[1]];
} elseif (isset($xref[$obj[1]])) {
// parse new object
$this->objects[$obj[1]] = $this->getIndirectObject($pdfData, $xref, $obj[1], $xref[$obj[1]], false);
return $this->objects[$obj[1]];
}
}
return $obj;
}
/**
* Get object type, raw value and offset to next object
*
* @param int $offset Object offset
*
* @return array containing object type, raw value and offset to next object
*/
protected function getRawObject(string $pdfData, int $offset = 0): array
{
$objtype = ''; // object type to be returned
$objval = ''; // object value to be returned
// skip initial white space chars
$offset += strspn($pdfData, $this->config->getPdfWhitespaces(), $offset);
// get first char
$char = $pdfData[$offset];
// get object type
switch ($char) {
case '%': // \x25 PERCENT SIGN
// skip comment and search for next token
$next = strcspn($pdfData, "\r\n", $offset);
if ($next > 0) {
$offset += $next;
return $this->getRawObject($pdfData, $offset);
}
break;
case '/': // \x2F SOLIDUS
// name object
$objtype = $char;
++$offset;
$span = strcspn($pdfData, "\x00\x09\x0a\x0c\x0d\x20\n\t\r\v\f\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25", $offset, 256);
if ($span > 0) {
$objval = substr($pdfData, $offset, $span); // unescaped value
$offset += $span;
}
break;
case '(': // \x28 LEFT PARENTHESIS
case ')': // \x29 RIGHT PARENTHESIS
// literal string object
$objtype = $char;
++$offset;
$strpos = $offset;
if ('(' == $char) {
$open_bracket = 1;
while ($open_bracket > 0) {
if (!isset($pdfData[$strpos])) {
break;
}
$ch = $pdfData[$strpos];
switch ($ch) {
case '\\': // REVERSE SOLIDUS (5Ch) (Backslash)
// skip next character
++$strpos;
break;
case '(': // LEFT PARENHESIS (28h)
++$open_bracket;
break;
case ')': // RIGHT PARENTHESIS (29h)
--$open_bracket;
break;
}
++$strpos;
}
$objval = substr($pdfData, $offset, $strpos - $offset - 1);
$offset = $strpos;
}
break;
case '[': // \x5B LEFT SQUARE BRACKET
case ']': // \x5D RIGHT SQUARE BRACKET
// array object
$objtype = $char;
++$offset;
if ('[' == $char) {
// get array content
$objval = [];
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
$objval[] = $element;
} while ((']' != $element[0]) && ($offset != $oldOffset));
// remove closing delimiter
array_pop($objval);
}
break;
case '<': // \x3C LESS-THAN SIGN
case '>': // \x3E GREATER-THAN SIGN
if (isset($pdfData[$offset + 1]) && ($pdfData[$offset + 1] == $char)) {
// dictionary object
$objtype = $char.$char;
$offset += 2;
if ('<' == $char) {
// get array content
$objval = [];
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
$objval[] = $element;
} while (('>>' != $element[0]) && ($offset != $oldOffset));
// remove closing delimiter
array_pop($objval);
}
} else {
// hexadecimal string object
$objtype = $char;
++$offset;
$span = strspn($pdfData, "0123456789abcdefABCDEF\x09\x0a\x0c\x0d\x20", $offset);
$dataToCheck = $pdfData[$offset + $span] ?? null;
if ('<' == $char && $span > 0 && '>' == $dataToCheck) {
// remove white space characters
$objval = strtr(substr($pdfData, $offset, $span), $this->config->getPdfWhitespaces(), '');
$offset += $span + 1;
} elseif (false !== ($endpos = strpos($pdfData, '>', $offset))) {
$offset = $endpos + 1;
}
}
break;
default:
if ('endobj' == substr($pdfData, $offset, 6)) {
// indirect object
$objtype = 'endobj';
$offset += 6;
} elseif ('null' == substr($pdfData, $offset, 4)) {
// null object
$objtype = 'null';
$offset += 4;
$objval = 'null';
} elseif ('true' == substr($pdfData, $offset, 4)) {
// boolean true object
$objtype = 'boolean';
$offset += 4;
$objval = 'true';
} elseif ('false' == substr($pdfData, $offset, 5)) {
// boolean false object
$objtype = 'boolean';
$offset += 5;
$objval = 'false';
} elseif ('stream' == substr($pdfData, $offset, 6)) {
// start stream object
$objtype = 'stream';
$offset += 6;
if (1 == preg_match('/^([\r]?[\n])/isU', substr($pdfData, $offset, 4), $matches)) {
$offset += \strlen($matches[0]);
$pregResult = preg_match(
'/(endstream)[\x09\x0a\x0c\x0d\x20]/isU',
$pdfData,
$matches,
\PREG_OFFSET_CAPTURE,
$offset
);
if (1 == $pregResult) {
$objval = substr($pdfData, $offset, $matches[0][1] - $offset);
$offset = $matches[1][1];
}
}
} elseif ('endstream' == substr($pdfData, $offset, 9)) {
// end stream object
$objtype = 'endstream';
$offset += 9;
} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($pdfData, $offset, 33), $matches)) {
// indirect object reference
$objtype = 'objref';
$offset += \strlen($matches[0]);
$objval = (int) $matches[1].'_'.(int) $matches[2];
} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($pdfData, $offset, 33), $matches)) {
// object start
$objtype = 'obj';
$objval = (int) $matches[1].'_'.(int) $matches[2];
$offset += \strlen($matches[0]);
} elseif (($numlen = strspn($pdfData, '+-.0123456789', $offset)) > 0) {
// numeric object
$objtype = 'numeric';
$objval = substr($pdfData, $offset, $numlen);
$offset += $numlen;
}
break;
}
return [$objtype, $objval, $offset];
}
/**
* Get Cross-Reference (xref) table and trailer data from PDF document data.
*
* @param int $offset xref offset (if known)
* @param array $xref previous xref array (if any)
*
* @return array containing xref and trailer data
*
* @throws \Exception if it was unable to find startxref
* @throws \Exception if it was unable to find xref
*/
protected function getXrefData(string $pdfData, int $offset = 0, array $xref = []): array
{
$startxrefPreg = preg_match(
'/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i',
$pdfData,
$matches,
\PREG_OFFSET_CAPTURE,
$offset
);
if (0 == $offset) {
// find last startxref
$pregResult = preg_match_all(
'/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i',
$pdfData, $matches,
\PREG_SET_ORDER,
$offset
);
if (0 == $pregResult) {
throw new \Exception('Unable to find startxref');
}
$matches = array_pop($matches);
$startxref = $matches[1];
} elseif (strpos($pdfData, 'xref', $offset) == $offset) {
// Already pointing at the xref table
$startxref = $offset;
} elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
// Cross-Reference Stream object
$startxref = $offset;
} elseif ($startxrefPreg) {
// startxref found
$startxref = $matches[1][0];
} else {
throw new \Exception('Unable to find startxref');
}
if ($startxref > \strlen($pdfData)) {
throw new \Exception('Unable to find xref (PDF corrupted?)');
}
// check xref position
if (strpos($pdfData, 'xref', $startxref) == $startxref) {
// Cross-Reference
$xref = $this->decodeXref($pdfData, $startxref, $xref);
} else {
// Cross-Reference Stream
$xref = $this->decodeXrefStream($pdfData, $startxref, $xref);
}
if (empty($xref)) {
throw new \Exception('Unable to find xref');
}
return $xref;
}
/**
* Parses PDF data and returns extracted data as array.
*
* @param string $data PDF data to parse
*
* @return array array of parsed PDF document objects
*
* @throws \Exception if empty PDF data given
* @throws \Exception if PDF data missing %PDF header
*/
public function parseData(string $data): array
{
if (empty($data)) {
throw new \Exception('Empty PDF data given.');
}
// find the pdf header starting position
if (false === ($trimpos = strpos($data, '%PDF-'))) {
throw new \Exception('Invalid PDF data: missing %PDF header.');
}
// get PDF content string
$pdfData = $trimpos > 0 ? substr($data, $trimpos) : $data;
// get xref and trailer data
$xref = $this->getXrefData($pdfData);
// parse all document objects
$objects = [];
foreach ($xref['xref'] as $obj => $offset) {
if (!isset($objects[$obj]) && ($offset > 0)) {
// decode objects with positive offset
$objects[$obj] = $this->getIndirectObject($pdfData, $xref, $obj, $offset, true);
}
}
return [$xref, $objects];
}
}

View file

@ -0,0 +1,51 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
*
* @date 2017-01-03
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\XObject;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\Page;
use Smalot\PdfParser\PDFObject;
/**
* Class Form
*/
class Form extends Page
{
public function getText(Page $page = null): string
{
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $this->content, $this->config);
return $contents->getText($this);
}
}

Some files were not shown because too many files have changed in this diff Show more