diff --git a/src/core/config/Categories.js b/src/core/config/Categories.js index 2038ad50..187cb405 100755 --- a/src/core/config/Categories.js +++ b/src/core/config/Categories.js @@ -189,6 +189,7 @@ const Categories = [ "Find / Replace", "Regular expression", "Offset checker", + "Hamming Distance", "Convert distance", "Convert area", "Convert mass", diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index 8b3b61ff..41fbfe07 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -3913,6 +3913,29 @@ const OperationConfig = { } ] }, + "Hamming Distance": { + module: "Default", + description: "In information theory, the Hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different. In other words, it measures the minimum number of substitutions required to change one string into the other, or the minimum number of errors that could have transformed one string into the other. In a more general context, the Hamming distance is one of several string metrics for measuring the edit distance between two sequences.", + inputType: "string", + outputType: "string", + args: [ + { + name: "Delimiter", + type: "binaryShortString", + value: StrUtils.HAMMING_DELIM + }, + { + name: "Unit", + type: "option", + value: StrUtils.HAMMING_UNIT + }, + { + name: "Input type", + type: "option", + value: StrUtils.HAMMING_INPUT_TYPE + } + ] + } }; diff --git a/src/core/config/modules/Default.js b/src/core/config/modules/Default.js index b36e00aa..3963d943 100644 --- a/src/core/config/modules/Default.js +++ b/src/core/config/modules/Default.js @@ -110,6 +110,7 @@ OpModules.Default = { "Unescape string": StrUtils.runUnescape, "Head": StrUtils.runHead, "Tail": StrUtils.runTail, + "Hamming Distance": StrUtils.runHamming, "Remove whitespace": Tidy.runRemoveWhitespace, "Remove null bytes": Tidy.runRemoveNulls, "Drop bytes": Tidy.runDropBytes, diff --git a/src/core/operations/StrUtils.js b/src/core/operations/StrUtils.js index 23b5eb26..dd15b327 100755 --- a/src/core/operations/StrUtils.js +++ b/src/core/operations/StrUtils.js @@ -509,6 +509,75 @@ const StrUtils = { }, + /** + * @constant + * @default + */ + HAMMING_DELIM: "\\n\\n", + /** + * @constant + * @default + */ + HAMMING_INPUT_TYPE: ["Raw string", "Hex"], + /** + * @constant + * @default + */ + HAMMING_UNIT: ["Byte", "Bit"], + + /** + * Hamming Distance operation. + * + * @author GCHQ Contributor [2] + * + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + runHamming: function(input, args) { + const delim = args[0], + byByte = args[1] === "Byte", + inputType = args[2], + samples = input.split(delim); + + if (samples.length !== 2) { + return "Error: You can only calculae the edit distance between 2 strings. Please ensure exactly two inputs are provided, separated by the specified delimiter."; + } + + if (samples[0].length !== samples[1].length) { + return "Error: Both inputs must be of the same length."; + } + + if (inputType === "Hex") { + samples[0] = Utils.fromHex(samples[0]); + samples[1] = Utils.fromHex(samples[1]); + } else { + samples[0] = Utils.strToByteArray(samples[0]); + samples[1] = Utils.strToByteArray(samples[1]); + } + + let dist = 0; + + for (let i = 0; i < samples[0].length; i++) { + const lhs = samples[0][i], + rhs = samples[1][i]; + + if (byByte && lhs !== rhs) { + dist++; + } else if (!byByte) { + let xord = lhs ^ rhs; + + while (xord) { + dist++; + xord &= xord - 1; + } + } + } + + return dist.toString(); + }, + + /** * Adds HTML highlights to matches within a string. *