From 5362508a9968d3754ed22ac2524246e321babbe9 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Wed, 16 May 2018 17:10:50 +0000 Subject: [PATCH] ESM: Ported HTML, Unicode, Quoted Printable and Endian operations --- .../operations/EscapeUnicodeCharacters.mjs | 79 ++++ src/core/operations/FromHTMLEntity.mjs | 335 +++++++++++++++++ src/core/operations/FromQuotedPrintable.mjs | 61 ++++ src/core/operations/ParseColourCode.mjs | 195 ++++++++++ src/core/operations/StripHTMLTags.mjs | 65 ++++ src/core/operations/SwapEndianness.mjs | 137 +++++++ src/core/operations/ToHTMLEntity.mjs | 345 ++++++++++++++++++ src/core/operations/ToQuotedPrintable.mjs | 244 +++++++++++++ .../operations/UnescapeUnicodeCharacters.mjs | 75 ++++ 9 files changed, 1536 insertions(+) create mode 100644 src/core/operations/EscapeUnicodeCharacters.mjs create mode 100644 src/core/operations/FromHTMLEntity.mjs create mode 100644 src/core/operations/FromQuotedPrintable.mjs create mode 100644 src/core/operations/ParseColourCode.mjs create mode 100644 src/core/operations/StripHTMLTags.mjs create mode 100644 src/core/operations/SwapEndianness.mjs create mode 100644 src/core/operations/ToHTMLEntity.mjs create mode 100644 src/core/operations/ToQuotedPrintable.mjs create mode 100644 src/core/operations/UnescapeUnicodeCharacters.mjs diff --git a/src/core/operations/EscapeUnicodeCharacters.mjs b/src/core/operations/EscapeUnicodeCharacters.mjs new file mode 100644 index 00000000..0496784c --- /dev/null +++ b/src/core/operations/EscapeUnicodeCharacters.mjs @@ -0,0 +1,79 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; + +/** + * Escape Unicode Characters operation + */ +class EscapeUnicodeCharacters extends Operation { + + /** + * EscapeUnicodeCharacters constructor + */ + constructor() { + super(); + + this.name = "Escape Unicode Characters"; + this.module = "Default"; + this.description = "Converts characters to their unicode-escaped notations.

Supports the prefixes:e.g. σου becomes \\u03C3\\u03BF\\u03C5"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Prefix", + "type": "option", + "value": ["\\u", "%u", "U+"] + }, + { + "name": "Encode all chars", + "type": "boolean", + "value": false + }, + { + "name": "Padding", + "type": "number", + "value": 4 + }, + { + "name": "Uppercase hex", + "type": "boolean", + "value": true + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const regexWhitelist = /[ -~]/i, + [prefix, encodeAll, padding, uppercaseHex] = args; + + let output = "", + character = ""; + + for (let i = 0; i < input.length; i++) { + character = input[i]; + if (!encodeAll && regexWhitelist.test(character)) { + // It’s a printable ASCII character so don’t escape it. + output += character; + continue; + } + + let cp = character.codePointAt(0).toString(16); + if (uppercaseHex) cp = cp.toUpperCase(); + output += prefix + cp.padStart(padding, "0"); + } + + return output; + } + +} + +export default EscapeUnicodeCharacters; diff --git a/src/core/operations/FromHTMLEntity.mjs b/src/core/operations/FromHTMLEntity.mjs new file mode 100644 index 00000000..7dbb1afe --- /dev/null +++ b/src/core/operations/FromHTMLEntity.mjs @@ -0,0 +1,335 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import Utils from "../Utils"; + +/** + * From HTML Entity operation + */ +class FromHTMLEntity extends Operation { + + /** + * FromHTMLEntity constructor + */ + constructor() { + super(); + + this.name = "From HTML Entity"; + this.module = "Default"; + this.description = "Converts HTML entities back to characters

e.g. &amp; becomes &"; + this.inputType = "string"; + this.outputType = "string"; + this.args = []; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const regex = /&(#?x?[a-zA-Z0-9]{1,8});/g; + let output = "", + m, + i = 0; + + while ((m = regex.exec(input))) { + // Add up to match + for (; i < m.index;) + output += input[i++]; + + // Add match + const bite = entityToByte[m[1]]; + if (bite) { + output += Utils.chr(bite); + } else if (!bite && m[1][0] === "#" && m[1].length > 1 && /^#\d{1,6}$/.test(m[1])) { + // Numeric entity (e.g. ) + const num = m[1].slice(1, m[1].length); + output += Utils.chr(parseInt(num, 10)); + } else if (!bite && m[1][0] === "#" && m[1].length > 3 && /^#x[\dA-F]{2,8}$/i.test(m[1])) { + // Hex entity (e.g. :) + const hex = m[1].slice(2, m[1].length); + output += Utils.chr(parseInt(hex, 16)); + } else { + // Not a valid entity, print as normal + for (; i < regex.lastIndex;) + output += input[i++]; + } + + i = regex.lastIndex; + } + // Add all after final match + for (; i < input.length;) + output += input[i++]; + + return output; + } + +} + + +/** + * Lookup table to translate HTML entity codes to their byte values. + */ +const entityToByte = { + "quot": 34, + "amp": 38, + "apos": 39, + "lt": 60, + "gt": 62, + "nbsp": 160, + "iexcl": 161, + "cent": 162, + "pound": 163, + "curren": 164, + "yen": 165, + "brvbar": 166, + "sect": 167, + "uml": 168, + "copy": 169, + "ordf": 170, + "laquo": 171, + "not": 172, + "shy": 173, + "reg": 174, + "macr": 175, + "deg": 176, + "plusmn": 177, + "sup2": 178, + "sup3": 179, + "acute": 180, + "micro": 181, + "para": 182, + "middot": 183, + "cedil": 184, + "sup1": 185, + "ordm": 186, + "raquo": 187, + "frac14": 188, + "frac12": 189, + "frac34": 190, + "iquest": 191, + "Agrave": 192, + "Aacute": 193, + "Acirc": 194, + "Atilde": 195, + "Auml": 196, + "Aring": 197, + "AElig": 198, + "Ccedil": 199, + "Egrave": 200, + "Eacute": 201, + "Ecirc": 202, + "Euml": 203, + "Igrave": 204, + "Iacute": 205, + "Icirc": 206, + "Iuml": 207, + "ETH": 208, + "Ntilde": 209, + "Ograve": 210, + "Oacute": 211, + "Ocirc": 212, + "Otilde": 213, + "Ouml": 214, + "times": 215, + "Oslash": 216, + "Ugrave": 217, + "Uacute": 218, + "Ucirc": 219, + "Uuml": 220, + "Yacute": 221, + "THORN": 222, + "szlig": 223, + "agrave": 224, + "aacute": 225, + "acirc": 226, + "atilde": 227, + "auml": 228, + "aring": 229, + "aelig": 230, + "ccedil": 231, + "egrave": 232, + "eacute": 233, + "ecirc": 234, + "euml": 235, + "igrave": 236, + "iacute": 237, + "icirc": 238, + "iuml": 239, + "eth": 240, + "ntilde": 241, + "ograve": 242, + "oacute": 243, + "ocirc": 244, + "otilde": 245, + "ouml": 246, + "divide": 247, + "oslash": 248, + "ugrave": 249, + "uacute": 250, + "ucirc": 251, + "uuml": 252, + "yacute": 253, + "thorn": 254, + "yuml": 255, + "OElig": 338, + "oelig": 339, + "Scaron": 352, + "scaron": 353, + "Yuml": 376, + "fnof": 402, + "circ": 710, + "tilde": 732, + "Alpha": 913, + "Beta": 914, + "Gamma": 915, + "Delta": 916, + "Epsilon": 917, + "Zeta": 918, + "Eta": 919, + "Theta": 920, + "Iota": 921, + "Kappa": 922, + "Lambda": 923, + "Mu": 924, + "Nu": 925, + "Xi": 926, + "Omicron": 927, + "Pi": 928, + "Rho": 929, + "Sigma": 931, + "Tau": 932, + "Upsilon": 933, + "Phi": 934, + "Chi": 935, + "Psi": 936, + "Omega": 937, + "alpha": 945, + "beta": 946, + "gamma": 947, + "delta": 948, + "epsilon": 949, + "zeta": 950, + "eta": 951, + "theta": 952, + "iota": 953, + "kappa": 954, + "lambda": 955, + "mu": 956, + "nu": 957, + "xi": 958, + "omicron": 959, + "pi": 960, + "rho": 961, + "sigmaf": 962, + "sigma": 963, + "tau": 964, + "upsilon": 965, + "phi": 966, + "chi": 967, + "psi": 968, + "omega": 969, + "thetasym": 977, + "upsih": 978, + "piv": 982, + "ensp": 8194, + "emsp": 8195, + "thinsp": 8201, + "zwnj": 8204, + "zwj": 8205, + "lrm": 8206, + "rlm": 8207, + "ndash": 8211, + "mdash": 8212, + "lsquo": 8216, + "rsquo": 8217, + "sbquo": 8218, + "ldquo": 8220, + "rdquo": 8221, + "bdquo": 8222, + "dagger": 8224, + "Dagger": 8225, + "bull": 8226, + "hellip": 8230, + "permil": 8240, + "prime": 8242, + "Prime": 8243, + "lsaquo": 8249, + "rsaquo": 8250, + "oline": 8254, + "frasl": 8260, + "euro": 8364, + "image": 8465, + "weierp": 8472, + "real": 8476, + "trade": 8482, + "alefsym": 8501, + "larr": 8592, + "uarr": 8593, + "rarr": 8594, + "darr": 8595, + "harr": 8596, + "crarr": 8629, + "lArr": 8656, + "uArr": 8657, + "rArr": 8658, + "dArr": 8659, + "hArr": 8660, + "forall": 8704, + "part": 8706, + "exist": 8707, + "empty": 8709, + "nabla": 8711, + "isin": 8712, + "notin": 8713, + "ni": 8715, + "prod": 8719, + "sum": 8721, + "minus": 8722, + "lowast": 8727, + "radic": 8730, + "prop": 8733, + "infin": 8734, + "ang": 8736, + "and": 8743, + "or": 8744, + "cap": 8745, + "cup": 8746, + "int": 8747, + "there4": 8756, + "sim": 8764, + "cong": 8773, + "asymp": 8776, + "ne": 8800, + "equiv": 8801, + "le": 8804, + "ge": 8805, + "sub": 8834, + "sup": 8835, + "nsub": 8836, + "sube": 8838, + "supe": 8839, + "oplus": 8853, + "otimes": 8855, + "perp": 8869, + "sdot": 8901, + "vellip": 8942, + "lceil": 8968, + "rceil": 8969, + "lfloor": 8970, + "rfloor": 8971, + "lang": 9001, + "rang": 9002, + "loz": 9674, + "spades": 9824, + "clubs": 9827, + "hearts": 9829, + "diams": 9830, +}; + +export default FromHTMLEntity; diff --git a/src/core/operations/FromQuotedPrintable.mjs b/src/core/operations/FromQuotedPrintable.mjs new file mode 100644 index 00000000..4fb098fe --- /dev/null +++ b/src/core/operations/FromQuotedPrintable.mjs @@ -0,0 +1,61 @@ +/** + * Some parts taken from mimelib (http://github.com/andris9/mimelib) + * @author Andris Reinman + * @license MIT + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; + +/** + * From Quoted Printable operation + */ +class FromQuotedPrintable extends Operation { + + /** + * FromQuotedPrintable constructor + */ + constructor() { + super(); + + this.name = "From Quoted Printable"; + this.module = "Default"; + this.description = "Converts QP-encoded text back to standard text."; + this.inputType = "string"; + this.outputType = "byteArray"; + this.args = []; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {byteArray} + */ + run(input, args) { + const str = input.replace(/=(?:\r?\n|$)/g, ""); + + const encodedBytesCount = (str.match(/=[\da-fA-F]{2}/g) || []).length, + bufferLength = str.length - encodedBytesCount * 2, + buffer = new Array(bufferLength); + let chr, hex, + bufferPos = 0; + + for (let i = 0, len = str.length; i < len; i++) { + chr = str.charAt(i); + if (chr === "=" && (hex = str.substr(i + 1, 2)) && /[\da-fA-F]{2}/.test(hex)) { + buffer[bufferPos++] = parseInt(hex, 16); + i += 2; + continue; + } + buffer[bufferPos++] = chr.charCodeAt(0); + } + + return buffer; + } + +} + +export default FromQuotedPrintable; diff --git a/src/core/operations/ParseColourCode.mjs b/src/core/operations/ParseColourCode.mjs new file mode 100644 index 00000000..a80e3b3a --- /dev/null +++ b/src/core/operations/ParseColourCode.mjs @@ -0,0 +1,195 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; + +/** + * Parse colour code operation + */ +class ParseColourCode extends Operation { + + /** + * ParseColourCode constructor + */ + constructor() { + super(); + + this.name = "Parse colour code"; + this.module = "Default"; + this.description = "Converts a colour code in a standard format to other standard formats and displays the colour itself.

Example inputs"; + this.inputType = "string"; + this.outputType = "html"; + this.args = []; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {html} + */ + run(input, args) { + let m = null, + r = 0, g = 0, b = 0, a = 1; + + // Read in the input + if ((m = input.match(/#([a-f0-9]{2})([a-f0-9]{2})([a-f0-9]{2})/i))) { + // Hex - #d9edf7 + r = parseInt(m[1], 16); + g = parseInt(m[2], 16); + b = parseInt(m[3], 16); + } else if ((m = input.match(/rgba?\((\d{1,3}(?:\.\d+)?),\s?(\d{1,3}(?:\.\d+)?),\s?(\d{1,3}(?:\.\d+)?)(?:,\s?(\d(?:\.\d+)?))?\)/i))) { + // RGB or RGBA - rgb(217,237,247) or rgba(217,237,247,1) + r = parseFloat(m[1]); + g = parseFloat(m[2]); + b = parseFloat(m[3]); + a = m[4] ? parseFloat(m[4]) : 1; + } else if ((m = input.match(/hsla?\((\d{1,3}(?:\.\d+)?),\s?(\d{1,3}(?:\.\d+)?)%,\s?(\d{1,3}(?:\.\d+)?)%(?:,\s?(\d(?:\.\d+)?))?\)/i))) { + // HSL or HSLA - hsl(200, 65%, 91%) or hsla(200, 65%, 91%, 1) + const h_ = parseFloat(m[1]) / 360, + s_ = parseFloat(m[2]) / 100, + l_ = parseFloat(m[3]) / 100, + rgb_ = ParseColourCode._hslToRgb(h_, s_, l_); + + r = rgb_[0]; + g = rgb_[1]; + b = rgb_[2]; + a = m[4] ? parseFloat(m[4]) : 1; + } else if ((m = input.match(/cmyk\((\d(?:\.\d+)?),\s?(\d(?:\.\d+)?),\s?(\d(?:\.\d+)?),\s?(\d(?:\.\d+)?)\)/i))) { + // CMYK - cmyk(0.12, 0.04, 0.00, 0.03) + const c_ = parseFloat(m[1]), + m_ = parseFloat(m[2]), + y_ = parseFloat(m[3]), + k_ = parseFloat(m[4]); + + r = Math.round(255 * (1 - c_) * (1 - k_)); + g = Math.round(255 * (1 - m_) * (1 - k_)); + b = Math.round(255 * (1 - y_) * (1 - k_)); + } + + const hsl_ = ParseColourCode._rgbToHsl(r, g, b), + h = Math.round(hsl_[0] * 360), + s = Math.round(hsl_[1] * 100), + l = Math.round(hsl_[2] * 100); + let k = 1 - Math.max(r/255, g/255, b/255), + c = (1 - r/255 - k) / (1 - k), + y = (1 - b/255 - k) / (1 - k); + + m = (1 - g/255 - k) / (1 - k); + + c = isNaN(c) ? "0" : c.toFixed(2); + m = isNaN(m) ? "0" : m.toFixed(2); + y = isNaN(y) ? "0" : y.toFixed(2); + k = k.toFixed(2); + + const hex = "#" + + Math.round(r).toString(16).padStart(2, "0") + + Math.round(g).toString(16).padStart(2, "0") + + Math.round(b).toString(16).padStart(2, "0"), + rgb = "rgb(" + r + ", " + g + ", " + b + ")", + rgba = "rgba(" + r + ", " + g + ", " + b + ", " + a + ")", + hsl = "hsl(" + h + ", " + s + "%, " + l + "%)", + hsla = "hsla(" + h + ", " + s + "%, " + l + "%, " + a + ")", + cmyk = "cmyk(" + c + ", " + m + ", " + y + ", " + k + ")"; + + // Generate output + return `
+Hex: ${hex} +RGB: ${rgb} +RGBA: ${rgba} +HSL: ${hsl} +HSLA: ${hsla} +CMYK: ${cmyk} +`; + } + + /** + * Converts an HSL color value to RGB. Conversion formula + * adapted from http://en.wikipedia.org/wiki/HSL_colorSpace. + * Assumes h, s, and l are contained in the set [0, 1] and + * returns r, g, and b in the set [0, 255]. + * + * @author Mohsen (http://stackoverflow.com/a/9493060) + * + * @param {number} h - The hue + * @param {number} s - The saturation + * @param {number} l - The lightness + * @return {Array} The RGB representation + */ + static _hslToRgb(h, s, l) { + let r, g, b; + + if (s === 0){ + r = g = b = l; // achromatic + } else { + const hue2rgb = function hue2rgb(p, q, t) { + if (t < 0) t += 1; + if (t > 1) t -= 1; + if (t < 1/6) return p + (q - p) * 6 * t; + if (t < 1/2) return q; + if (t < 2/3) return p + (q - p) * (2/3 - t) * 6; + return p; + }; + + const q = l < 0.5 ? l * (1 + s) : l + s - l * s; + const p = 2 * l - q; + r = hue2rgb(p, q, h + 1/3); + g = hue2rgb(p, q, h); + b = hue2rgb(p, q, h - 1/3); + } + + return [Math.round(r * 255), Math.round(g * 255), Math.round(b * 255)]; + } + + /** + * Converts an RGB color value to HSL. Conversion formula + * adapted from http://en.wikipedia.org/wiki/HSL_colorSpace. + * Assumes r, g, and b are contained in the set [0, 255] and + * returns h, s, and l in the set [0, 1]. + * + * @author Mohsen (http://stackoverflow.com/a/9493060) + * + * @param {number} r - The red color value + * @param {number} g - The green color value + * @param {number} b - The blue color value + * @return {Array} The HSL representation + */ + static _rgbToHsl(r, g, b) { + r /= 255; g /= 255; b /= 255; + const max = Math.max(r, g, b), + min = Math.min(r, g, b), + l = (max + min) / 2; + let h, s; + + if (max === min) { + h = s = 0; // achromatic + } else { + const d = max - min; + s = l > 0.5 ? d / (2 - max - min) : d / (max + min); + switch (max) { + case r: h = (g - b) / d + (g < b ? 6 : 0); break; + case g: h = (b - r) / d + 2; break; + case b: h = (r - g) / d + 4; break; + } + h /= 6; + } + + return [h, s, l]; + } +} + +export default ParseColourCode; diff --git a/src/core/operations/StripHTMLTags.mjs b/src/core/operations/StripHTMLTags.mjs new file mode 100644 index 00000000..f1b7b08e --- /dev/null +++ b/src/core/operations/StripHTMLTags.mjs @@ -0,0 +1,65 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import Utils from "../Utils"; + +/** + * Strip HTML tags operation + */ +class StripHTMLTags extends Operation { + + /** + * StripHTMLTags constructor + */ + constructor() { + super(); + + this.name = "Strip HTML tags"; + this.module = "Default"; + this.description = "Removes all HTML tags from the input."; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Remove indentation", + "type": "boolean", + "value": true + }, + { + "name": "Remove excess line breaks", + "type": "boolean", + "value": true + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const [removeIndentation, removeLineBreaks] = args; + + input = Utils.stripHtmlTags(input); + + if (removeIndentation) { + input = input.replace(/\n[ \f\t]+/g, "\n"); + } + + if (removeLineBreaks) { + input = input + .replace(/^\s*\n/, "") // first line + .replace(/(\n\s*){2,}/g, "\n"); // all others + } + + return input; + } + +} + +export default StripHTMLTags; diff --git a/src/core/operations/SwapEndianness.mjs b/src/core/operations/SwapEndianness.mjs new file mode 100644 index 00000000..21cd4e9c --- /dev/null +++ b/src/core/operations/SwapEndianness.mjs @@ -0,0 +1,137 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import Utils from "../Utils"; +import {toHex, fromHex} from "../lib/Hex"; +import OperationError from "../errors/OperationError"; + +/** + * Swap endianness operation + */ +class SwapEndianness extends Operation { + + /** + * SwapEndianness constructor + */ + constructor() { + super(); + + this.name = "Swap endianness"; + this.module = "Default"; + this.description = "Switches the data from big-endian to little-endian or vice-versa. Data can be read in as hexadecimal or raw bytes. It will be returned in the same format as it is entered."; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Data format", + "type": "option", + "value": ["Hex", "Raw"] + }, + { + "name": "Word length (bytes)", + "type": "number", + "value": 4 + }, + { + "name": "Pad incomplete words", + "type": "boolean", + "value": true + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const [dataFormat, wordLength, padIncompleteWords] = args, + result = [], + words = []; + let i = 0, + j = 0, + data = []; + + if (wordLength <= 0) { + throw new OperationError("Word length must be greater than 0"); + } + + // Convert input to raw data based on specified data format + switch (dataFormat) { + case "Hex": + data = fromHex(input); + break; + case "Raw": + data = Utils.strToByteArray(input); + break; + default: + data = input; + } + + // Split up into words + for (i = 0; i < data.length; i += wordLength) { + const word = data.slice(i, i + wordLength); + + // Pad word if too short + if (padIncompleteWords && word.length < wordLength){ + for (j = word.length; j < wordLength; j++) { + word.push(0); + } + } + + words.push(word); + } + + // Swap endianness and flatten + for (i = 0; i < words.length; i++) { + j = words[i].length; + while (j--) { + result.push(words[i][j]); + } + } + + // Convert data back to specified data format + switch (dataFormat) { + case "Hex": + return toHex(result); + case "Raw": + return Utils.byteArrayToUtf8(result); + default: + return result; + } + } + + /** + * Highlight Swap endianness + * + * @param {Object[]} pos + * @param {number} pos[].start + * @param {number} pos[].end + * @param {Object[]} args + * @returns {Object[]} pos + */ + highlight(pos, args) { + return pos; + } + + /** + * Highlight Swap endianness in reverse + * + * @param {Object[]} pos + * @param {number} pos[].start + * @param {number} pos[].end + * @param {Object[]} args + * @returns {Object[]} pos + */ + highlightReverse(pos, args) { + return pos; + } + +} + +export default SwapEndianness; diff --git a/src/core/operations/ToHTMLEntity.mjs b/src/core/operations/ToHTMLEntity.mjs new file mode 100644 index 00000000..06b5ff25 --- /dev/null +++ b/src/core/operations/ToHTMLEntity.mjs @@ -0,0 +1,345 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import Utils from "../Utils"; + +/** + * To HTML Entity operation + */ +class ToHTMLEntity extends Operation { + + /** + * ToHTMLEntity constructor + */ + constructor() { + super(); + + this.name = "To HTML Entity"; + this.module = "Default"; + this.description = "Converts characters to HTML entities

e.g. & becomes &amp;"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Convert all characters", + "type": "boolean", + "value": false + }, + { + "name": "Convert to", + "type": "option", + "value": ["Named entities where possible", "Numeric entities", "Hex entities"] + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const convertAll = args[0], + numeric = args[1] === "Numeric entities", + hexa = args[1] === "Hex entities"; + + const charcodes = Utils.strToCharcode(input); + let output = ""; + + for (let i = 0; i < charcodes.length; i++) { + if (convertAll && numeric) { + output += "&#" + charcodes[i] + ";"; + } else if (convertAll && hexa) { + output += "&#x" + Utils.hex(charcodes[i]) + ";"; + } else if (convertAll) { + output += byteToEntity[charcodes[i]] || "&#" + charcodes[i] + ";"; + } else if (numeric) { + if (charcodes[i] > 255 || byteToEntity.hasOwnProperty(charcodes[i])) { + output += "&#" + charcodes[i] + ";"; + } else { + output += Utils.chr(charcodes[i]); + } + } else if (hexa) { + if (charcodes[i] > 255 || byteToEntity.hasOwnProperty(charcodes[i])) { + output += "&#x" + Utils.hex(charcodes[i]) + ";"; + } else { + output += Utils.chr(charcodes[i]); + } + } else { + output += byteToEntity[charcodes[i]] || ( + charcodes[i] > 255 ? + "&#" + charcodes[i] + ";" : + Utils.chr(charcodes[i]) + ); + } + } + return output; + } + +} + +/** + * Lookup table to translate byte values to their HTML entity codes. + */ +const byteToEntity = { + 34: """, + 38: "&", + 39: "'", + 60: "<", + 62: ">", + 160: " ", + 161: "¡", + 162: "¢", + 163: "£", + 164: "¤", + 165: "¥", + 166: "¦", + 167: "§", + 168: "¨", + 169: "©", + 170: "ª", + 171: "«", + 172: "¬", + 173: "­", + 174: "®", + 175: "¯", + 176: "°", + 177: "±", + 178: "²", + 179: "³", + 180: "´", + 181: "µ", + 182: "¶", + 183: "·", + 184: "¸", + 185: "¹", + 186: "º", + 187: "»", + 188: "¼", + 189: "½", + 190: "¾", + 191: "¿", + 192: "À", + 193: "Á", + 194: "Â", + 195: "Ã", + 196: "Ä", + 197: "Å", + 198: "Æ", + 199: "Ç", + 200: "È", + 201: "É", + 202: "Ê", + 203: "Ë", + 204: "Ì", + 205: "Í", + 206: "Î", + 207: "Ï", + 208: "Ð", + 209: "Ñ", + 210: "Ò", + 211: "Ó", + 212: "Ô", + 213: "Õ", + 214: "Ö", + 215: "×", + 216: "Ø", + 217: "Ù", + 218: "Ú", + 219: "Û", + 220: "Ü", + 221: "Ý", + 222: "Þ", + 223: "ß", + 224: "à", + 225: "á", + 226: "â", + 227: "ã", + 228: "ä", + 229: "å", + 230: "æ", + 231: "ç", + 232: "è", + 233: "é", + 234: "ê", + 235: "ë", + 236: "ì", + 237: "í", + 238: "î", + 239: "ï", + 240: "ð", + 241: "ñ", + 242: "ò", + 243: "ó", + 244: "ô", + 245: "õ", + 246: "ö", + 247: "÷", + 248: "ø", + 249: "ù", + 250: "ú", + 251: "û", + 252: "ü", + 253: "ý", + 254: "þ", + 255: "ÿ", + 338: "Œ", + 339: "œ", + 352: "Š", + 353: "š", + 376: "Ÿ", + 402: "ƒ", + 710: "ˆ", + 732: "˜", + 913: "Α", + 914: "Β", + 915: "Γ", + 916: "Δ", + 917: "Ε", + 918: "Ζ", + 919: "Η", + 920: "Θ", + 921: "Ι", + 922: "Κ", + 923: "Λ", + 924: "Μ", + 925: "Ν", + 926: "Ξ", + 927: "Ο", + 928: "Π", + 929: "Ρ", + 931: "Σ", + 932: "Τ", + 933: "Υ", + 934: "Φ", + 935: "Χ", + 936: "Ψ", + 937: "Ω", + 945: "α", + 946: "β", + 947: "γ", + 948: "δ", + 949: "ε", + 950: "ζ", + 951: "η", + 952: "θ", + 953: "ι", + 954: "κ", + 955: "λ", + 956: "μ", + 957: "ν", + 958: "ξ", + 959: "ο", + 960: "π", + 961: "ρ", + 962: "ς", + 963: "σ", + 964: "τ", + 965: "υ", + 966: "φ", + 967: "χ", + 968: "ψ", + 969: "ω", + 977: "ϑ", + 978: "ϒ", + 982: "ϖ", + 8194: " ", + 8195: " ", + 8201: " ", + 8204: "‌", + 8205: "‍", + 8206: "‎", + 8207: "‏", + 8211: "–", + 8212: "—", + 8216: "‘", + 8217: "’", + 8218: "‚", + 8220: "“", + 8221: "”", + 8222: "„", + 8224: "†", + 8225: "‡", + 8226: "•", + 8230: "…", + 8240: "‰", + 8242: "′", + 8243: "″", + 8249: "‹", + 8250: "›", + 8254: "‾", + 8260: "⁄", + 8364: "€", + 8465: "ℑ", + 8472: "℘", + 8476: "ℜ", + 8482: "™", + 8501: "ℵ", + 8592: "←", + 8593: "↑", + 8594: "→", + 8595: "↓", + 8596: "↔", + 8629: "↵", + 8656: "⇐", + 8657: "⇑", + 8658: "⇒", + 8659: "⇓", + 8660: "⇔", + 8704: "∀", + 8706: "∂", + 8707: "∃", + 8709: "∅", + 8711: "∇", + 8712: "∈", + 8713: "∉", + 8715: "∋", + 8719: "∏", + 8721: "∑", + 8722: "−", + 8727: "∗", + 8730: "√", + 8733: "∝", + 8734: "∞", + 8736: "∠", + 8743: "∧", + 8744: "∨", + 8745: "∩", + 8746: "∪", + 8747: "∫", + 8756: "∴", + 8764: "∼", + 8773: "≅", + 8776: "≈", + 8800: "≠", + 8801: "≡", + 8804: "≤", + 8805: "≥", + 8834: "⊂", + 8835: "⊃", + 8836: "⊄", + 8838: "⊆", + 8839: "⊇", + 8853: "⊕", + 8855: "⊗", + 8869: "⊥", + 8901: "⋅", + 8942: "⋮", + 8968: "⌈", + 8969: "⌉", + 8970: "⌊", + 8971: "⌋", + 9001: "⟨", + 9002: "⟩", + 9674: "◊", + 9824: "♠", + 9827: "♣", + 9829: "♥", + 9830: "♦", +}; + +export default ToHTMLEntity; diff --git a/src/core/operations/ToQuotedPrintable.mjs b/src/core/operations/ToQuotedPrintable.mjs new file mode 100644 index 00000000..aa1025d7 --- /dev/null +++ b/src/core/operations/ToQuotedPrintable.mjs @@ -0,0 +1,244 @@ +/** + * Some parts taken from mimelib (http://github.com/andris9/mimelib) + * @author Andris Reinman + * @license MIT + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; + +/** + * To Quoted Printable operation + */ +class ToQuotedPrintable extends Operation { + + /** + * ToQuotedPrintable constructor + */ + constructor() { + super(); + + this.name = "To Quoted Printable"; + this.module = "Default"; + this.description = "Quoted-Printable, or QP encoding, is an encoding using printable ASCII characters (alphanumeric and the equals sign '=') to transmit 8-bit data over a 7-bit data path or, generally, over a medium which is not 8-bit clean. It is defined as a MIME content transfer encoding for use in e-mail.

QP works by using the equals sign '=' as an escape character. It also limits line length to 76, as some software has limits on line length."; + this.inputType = "byteArray"; + this.outputType = "string"; + this.args = []; + } + + /** + * @param {byteArray} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + let mimeEncodedStr = this.mimeEncode(input); + + // fix line breaks + mimeEncodedStr = mimeEncodedStr.replace(/\r?\n|\r/g, function() { + return "\r\n"; + }).replace(/[\t ]+$/gm, function(spaces) { + return spaces.replace(/ /g, "=20").replace(/\t/g, "=09"); + }); + + return this._addSoftLinebreaks(mimeEncodedStr, "qp"); + } + + + /** @license + ======================================================================== + mimelib: http://github.com/andris9/mimelib + Copyright (c) 2011-2012 Andris Reinman + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + + /** + * Encodes mime data. + * + * @param {byteArray} buffer + * @returns {string} + */ + mimeEncode(buffer) { + const ranges = [ + [0x09], + [0x0A], + [0x0D], + [0x20], + [0x21], + [0x23, 0x3C], + [0x3E], + [0x40, 0x5E], + [0x60, 0x7E] + ]; + let result = ""; + + for (let i = 0, len = buffer.length; i < len; i++) { + if (this._checkRanges(buffer[i], ranges)) { + result += String.fromCharCode(buffer[i]); + continue; + } + result += "=" + (buffer[i] < 0x10 ? "0" : "") + buffer[i].toString(16).toUpperCase(); + } + + return result; + } + + /** + * Checks if a given number falls within a given set of ranges. + * + * @private + * @param {number} nr + * @param {byteArray[]} ranges + * @returns {bolean} + */ + _checkRanges(nr, ranges) { + for (let i = ranges.length - 1; i >= 0; i--) { + if (!ranges[i].length) + continue; + if (ranges[i].length === 1 && nr === ranges[i][0]) + return true; + if (ranges[i].length === 2 && nr >= ranges[i][0] && nr <= ranges[i][1]) + return true; + } + return false; + } + + /** + * Adds soft line breaks to a string. + * Lines can't be longer that 76 + = 78 bytes + * http://tools.ietf.org/html/rfc2045#section-6.7 + * + * @private + * @param {string} str + * @param {string} encoding + * @returns {string} + */ + _addSoftLinebreaks(str, encoding) { + const lineLengthMax = 76; + + encoding = (encoding || "base64").toString().toLowerCase().trim(); + + if (encoding === "qp") { + return this._addQPSoftLinebreaks(str, lineLengthMax); + } else { + return this._addBase64SoftLinebreaks(str, lineLengthMax); + } + } + + /** + * Adds soft line breaks to a base64 string. + * + * @private + * @param {string} base64EncodedStr + * @param {number} lineLengthMax + * @returns {string} + */ + _addBase64SoftLinebreaks(base64EncodedStr, lineLengthMax) { + base64EncodedStr = (base64EncodedStr || "").toString().trim(); + return base64EncodedStr.replace(new RegExp(".{" + lineLengthMax + "}", "g"), "$&\r\n").trim(); + } + + /** + * Adds soft line breaks to a quoted printable string. + * + * @private + * @param {string} mimeEncodedStr + * @param {number} lineLengthMax + * @returns {string} + */ + _addQPSoftLinebreaks(mimeEncodedStr, lineLengthMax) { + const len = mimeEncodedStr.length, + lineMargin = Math.floor(lineLengthMax / 3); + let pos = 0, + match, code, line, + result = ""; + + // insert soft linebreaks where needed + while (pos < len) { + line = mimeEncodedStr.substr(pos, lineLengthMax); + if ((match = line.match(/\r\n/))) { + line = line.substr(0, match.index + match[0].length); + result += line; + pos += line.length; + continue; + } + + if (line.substr(-1) === "\n") { + // nothing to change here + result += line; + pos += line.length; + continue; + } else if ((match = line.substr(-lineMargin).match(/\n.*?$/))) { + // truncate to nearest line break + line = line.substr(0, line.length - (match[0].length - 1)); + result += line; + pos += line.length; + continue; + } else if (line.length > lineLengthMax - lineMargin && (match = line.substr(-lineMargin).match(/[ \t.,!?][^ \t.,!?]*$/))) { + // truncate to nearest space + line = line.substr(0, line.length - (match[0].length - 1)); + } else if (line.substr(-1) === "\r") { + line = line.substr(0, line.length - 1); + } else { + if (line.match(/=[\da-f]{0,2}$/i)) { + + // push incomplete encoding sequences to the next line + if ((match = line.match(/=[\da-f]{0,1}$/i))) { + line = line.substr(0, line.length - match[0].length); + } + + // ensure that utf-8 sequences are not split + while (line.length > 3 && line.length < len - pos && !line.match(/^(?:=[\da-f]{2}){1,4}$/i) && (match = line.match(/=[\da-f]{2}$/ig))) { + code = parseInt(match[0].substr(1, 2), 16); + if (code < 128) { + break; + } + + line = line.substr(0, line.length - 3); + + if (code >= 0xC0) { + break; + } + } + + } + } + + if (pos + line.length < len && line.substr(-1) !== "\n") { + if (line.length === 76 && line.match(/=[\da-f]{2}$/i)) { + line = line.substr(0, line.length - 3); + } else if (line.length === 76) { + line = line.substr(0, line.length - 1); + } + pos += line.length; + line += "=\r\n"; + } else { + pos += line.length; + } + + result += line; + } + + return result; + } + +} + +export default ToQuotedPrintable; diff --git a/src/core/operations/UnescapeUnicodeCharacters.mjs b/src/core/operations/UnescapeUnicodeCharacters.mjs new file mode 100644 index 00000000..ab8af2a8 --- /dev/null +++ b/src/core/operations/UnescapeUnicodeCharacters.mjs @@ -0,0 +1,75 @@ +/** + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import Utils from "../Utils"; + +/** + * Unescape Unicode Characters operation + */ +class UnescapeUnicodeCharacters extends Operation { + + /** + * UnescapeUnicodeCharacters constructor + */ + constructor() { + super(); + + this.name = "Unescape Unicode Characters"; + this.module = "Default"; + this.description = "Converts unicode-escaped character notation back into raw characters.

Supports the prefixes:
  • \\u
  • %u
  • U+
e.g. \\u03c3\\u03bf\\u03c5 becomes σου"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + "name": "Prefix", + "type": "option", + "value": ["\\u", "%u", "U+"] + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const prefix = prefixToRegex[args[0]], + regex = new RegExp(prefix+"([a-f\\d]{4})", "ig"); + let output = "", + m, + i = 0; + + while ((m = regex.exec(input))) { + // Add up to match + output += input.slice(i, m.index); + i = m.index; + + // Add match + output += Utils.chr(parseInt(m[1], 16)); + + i = regex.lastIndex; + } + + // Add all after final match + output += input.slice(i, input.length); + + return output; + } + +} + +/** + * Lookup table to add prefixes to unicode delimiters so that they can be used in a regex. + */ +const prefixToRegex = { + "\\u": "\\\\u", + "%u": "%u", + "U+": "U\\+" +}; + +export default UnescapeUnicodeCharacters;