Merge branch 'tlwr-feature-ebcdic'

2017-05-19 13:44:18 +00:00 · 2017-05-19 13:44:18 +00:00 · 7b18a2f46f
parent 9a6e4b1e85 19103a64e5
commit 7b18a2f46f
6 changed files with 2695 additions and 32 deletions
--- a/src/core/config/Categories.js
+++ b/src/core/config/Categories.js
@ -61,7 +61,8 @@ const Categories = [
            "Hex to PEM",
            "Parse ASN.1 hex string",
            "Change IP format",
-            "Text encoding",
+            "Encode text",
+            "Decode text",
            "Swap endianness",
        ]
    },
@ -143,7 +144,8 @@ const Categories = [
    {
        name: "Language",
        ops: [
-            "Text encoding",
+            "Encode text",
+            "Decode text",
            "Unescape Unicode Characters",
        ]
    },
--- a/src/core/config/OperationConfig.js
+++ b/src/core/config/OperationConfig.js
@ -887,21 +887,43 @@ const OperationConfig = {
            }
        ]
    },
-    "Text encoding": {
-        description: "Translates the data between different character encodings.<br><br>Supported charsets are:<ul><li>UTF8</li><li>UTF16</li><li>UTF16LE (little-endian)</li><li>UTF16BE (big-endian)</li><li>Hex</li><li>Base64</li><li>Latin1 (ISO-8859-1)</li><li>Windows-1251</li></ul>",
-        run: CharEnc.run,
+    "Encode text": {
+        description: [
+            "Encodes text into the chosen character encoding.",
+            "<br><br>",
+            "Supported charsets are:",
+            "<ul>",
+            Object.keys(CharEnc.IO_FORMAT).map(e => `<li>${e}</li>`).join("\n"),
+            "</ul>",
+        ].join("\n"),
+        run: CharEnc.runEncode,
        inputType: "string",
+        outputType: "byteArray",
+        args: [
+            {
+                name: "Encoding",
+                type: "option",
+                value: Object.keys(CharEnc.IO_FORMAT),
+            },
+        ]
+    },
+    "Decode text": {
+        description: [
+            "Decodes text from the chosen character encoding.",
+            "<br><br>",
+            "Supported charsets are:",
+            "<ul>",
+            Object.keys(CharEnc.IO_FORMAT).map(e => `<li>${e}</li>`).join("\n"),
+            "</ul>",
+        ].join("\n"),
+        run: CharEnc.runDecode,
+        inputType: "byteArray",
        outputType: "string",
        args: [
            {
-                name: "Input type",
+                name: "Encoding",
                type: "option",
-                value: CharEnc.IO_FORMAT
-            },
-            {
-                name: "Output type",
-                type: "option",
-                value: CharEnc.IO_FORMAT
+                value: Object.keys(CharEnc.IO_FORMAT),
            },
        ]
    },
--- a/src/core/lib/codepage.js
+++ b/src/core/lib/codepage.js
--- a/src/core/operations/CharEnc.js
+++ b/src/core/operations/CharEnc.js
@ -1,3 +1,4 @@
+import cptable from "../lib/codepage.js";
 import Utils from "../Utils.js";
 import CryptoJS from "crypto-js";

@ -17,34 +18,82 @@ const CharEnc = {
     * @constant
     * @default
     */
-    IO_FORMAT: ["UTF8", "UTF16", "UTF16LE", "UTF16BE", "Latin1", "Windows-1251", "Hex", "Base64"],
+    IO_FORMAT: {
+        "UTF-8 (65001)": 65001,
+        "UTF-7 (65000)": 65000,
+        "UTF16LE (1200)": 1200,
+        "UTF16BE (1201)": 1201,
+        "UTF16 (1201)": 1201,
+        "IBM EBCDIC International (500)": 500,
+        "IBM EBCDIC US-Canada (37)": 37,
+        "Windows-874 Thai (874)": 874,
+        "Japanese Shift-JIS (932)": 932,
+        "Simplified Chinese GBK (936)": 936,
+        "Korean (949)": 949,
+        "Traditional Chinese Big5 (950)": 950,
+        "Windows-1250 Central European (1250)": 1250,
+        "Windows-1251 Cyrillic (1251)": 1251,
+        "Windows-1252 Latin (1252)": 1252,
+        "Windows-1253 Greek (1253)": 1253,
+        "Windows-1254 Turkish (1254)": 1254,
+        "Windows-1255 Hebrew (1255)": 1255,
+        "Windows-1256 Arabic (1256)": 1256,
+        "Windows-1257 Baltic (1257)": 1257,
+        "Windows-1258 Vietnam (1258)": 1258,
+        "US-ASCII (20127)": 20127,
+        "Simplified Chinese GB2312 (20936)": 20936,
+        "KOI8-R Russian Cyrillic (20866)": 20866,
+        "KOI8-U Ukrainian Cyrillic (21866)": 21866,
+        "ISO-8859-1 Latin 1 Western European (28591)": 28591,
+        "ISO-8859-2 Latin 2 Central European (28592)": 28592,
+        "ISO-8859-3 Latin 3 South European (28593)": 28593,
+        "ISO-8859-4 Latin 4 North European (28594)": 28594,
+        "ISO-8859-5 Latin/Cyrillic (28595)": 28595,
+        "ISO-8859-6 Latin/Arabic (28596)": 28596,
+        "ISO-8859-7 Latin/Greek (28597)": 28597,
+        "ISO-8859-8 Latin/Hebrew (28598)": 28598,
+        "ISO-8859-9 Latin 5 Turkish (28599)": 28599,
+        "ISO-8859-10 Latin 6 Nordic (28600)": 28600,
+        "ISO-8859-11 Latin/Thai (28601)": 28601,
+        "ISO-8859-13 Latin 7 Baltic Rim (28603)": 28603,
+        "ISO-8859-14 Latin 8 Celtic (28604)": 28604,
+        "ISO-8859-15 Latin 9 (28605)": 28605,
+        "ISO-8859-16 Latin 10 (28606)": 28606,
+        "ISO-2022 JIS Japanese (50222)": 50222,
+        "EUC Japanese (51932)": 51932,
+        "EUC Korean (51949)": 51949,
+        "Simplified Chinese GB18030 (54936)": 54936,
+    },

    /**
-     * Text encoding operation.
+     * Encode text operation.
+     * @author tlwr [toby@toby.codes]
     *
     * @param {string} input
     * @param {Object[]} args
-     * @returns {string}
+     * @returns {byteArray}
     */
-    run: function(input, args) {
-        let inputFormat = args[0],
-            outputFormat = args[1];
-
-        if (inputFormat === "Windows-1251") {
-            input = Utils.win1251ToUnicode(input);
-            input = CryptoJS.enc.Utf8.parse(input);
-        } else {
-            input = Utils.format[inputFormat].parse(input);
-        }
-
-        if (outputFormat === "Windows-1251") {
-            input = CryptoJS.enc.Utf8.stringify(input);
-            return Utils.unicodeToWin1251(input);
-        } else {
-            return Utils.format[outputFormat].stringify(input);
-        }
+    runEncode: function(input, args) {
+        const format = CharEnc.IO_FORMAT[args[0]];
+        let encoded = cptable.utils.encode(format, input);
+        encoded = Array.from(encoded);
+        return encoded;
    },

+
+    /**
+     * Decode text operation.
+     * @author tlwr [toby@toby.codes]
+     *
+     * @param {byteArray} input
+     * @param {Object[]} args
+     * @returns {string}
+     */
+    runDecode: function(input, args) {
+        const format = CharEnc.IO_FORMAT[args[0]];
+        let decoded = cptable.utils.decode(format, input);
+        return decoded;
+    },
 };

 export default CharEnc;
--- a/test/index.js
+++ b/test/index.js
@ -13,6 +13,7 @@ import "babel-polyfill";
 import TestRegister from "./TestRegister.js";
 import "./tests/operations/Base58.js";
 import "./tests/operations/ByteRepr.js";
+import "./tests/operations/CharEnc.js";
 import "./tests/operations/Code.js";
 import "./tests/operations/Compress.js";
 import "./tests/operations/FlowControl.js";
--- a/test/tests/operations/CharEnc.js
+++ b/test/tests/operations/CharEnc.js
@ -0,0 +1,71 @@
+/**
+ * CharEnc tests.
+ *
+ * @author tlwr [toby@toby.codes]
+ * @copyright Crown Copyright 2017
+ * @license Apache-2.0
+ */
+import TestRegister from "../../TestRegister.js";
+
+TestRegister.addTests([
+    {
+        name: "Encode text, Decode text: nothing",
+        input: "",
+        expectedOutput: "",
+        recipeConfig: [
+            {
+                "op": "Encode text",
+                "args": ["UTF-8 (65001)"]
+            },
+            {
+                "op": "Decode text",
+                "args": ["UTF-8 (65001)"]
+            },
+        ],
+    },
+    {
+        name: "Encode text, Decode text: hello",
+        input: "hello",
+        expectedOutput: "hello",
+        recipeConfig: [
+            {
+                "op": "Encode text",
+                "args": ["UTF-8 (65001)"]
+            },
+            {
+                "op": "Decode text",
+                "args": ["UTF-8 (65001)"]
+            },
+        ],
+    },
+    {
+        name: "Encode text (EBCDIC): hello",
+        input: "hello",
+        expectedOutput: "88 85 93 93 96",
+        recipeConfig: [
+            {
+                "op": "Encode text",
+                "args": ["IBM EBCDIC International (500)"]
+            },
+            {
+                "op": "To Hex",
+                "args": ["Space"]
+            },
+        ],
+    },
+    {
+        name: "Decode text (EBCDIC): 88 85 93 93 96",
+        input: "88 85 93 93 96",
+        expectedOutput: "hello",
+        recipeConfig: [
+            {
+                "op": "From Hex",
+                "args": ["Space"]
+            },
+            {
+                "op": "Decode text",
+                "args": ["IBM EBCDIC International (500)"]
+            },
+        ],
+    },
+]);