[web] ML cleanup - Part 4/x (#1761)
This commit is contained in:
commit
1edafd3568
|
@ -1,88 +0,0 @@
|
|||
import { Matrix } from "ml-matrix";
|
||||
import { Point } from "services/face/geom";
|
||||
import { FaceAlignment, FaceDetection } from "services/face/types";
|
||||
import { getSimilarityTransformation } from "similarity-transformation";
|
||||
|
||||
const ARCFACE_LANDMARKS = [
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[56.1396, 92.2848],
|
||||
] as Array<[number, number]>;
|
||||
|
||||
const ARCFACE_LANDMARKS_FACE_SIZE = 112;
|
||||
|
||||
const ARC_FACE_5_LANDMARKS = [
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
] as Array<[number, number]>;
|
||||
|
||||
/**
|
||||
* Compute and return an {@link FaceAlignment} for the given face detection.
|
||||
*
|
||||
* @param faceDetection A geometry indicating a face detected in an image.
|
||||
*/
|
||||
export const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
|
||||
const landmarkCount = faceDetection.landmarks.length;
|
||||
return getFaceAlignmentUsingSimilarityTransform(
|
||||
faceDetection,
|
||||
normalizeLandmarks(
|
||||
landmarkCount === 5 ? ARC_FACE_5_LANDMARKS : ARCFACE_LANDMARKS,
|
||||
ARCFACE_LANDMARKS_FACE_SIZE,
|
||||
),
|
||||
);
|
||||
};
|
||||
|
||||
function getFaceAlignmentUsingSimilarityTransform(
|
||||
faceDetection: FaceDetection,
|
||||
alignedLandmarks: Array<[number, number]>,
|
||||
): FaceAlignment {
|
||||
const landmarksMat = new Matrix(
|
||||
faceDetection.landmarks
|
||||
.map((p) => [p.x, p.y])
|
||||
.slice(0, alignedLandmarks.length),
|
||||
).transpose();
|
||||
const alignedLandmarksMat = new Matrix(alignedLandmarks).transpose();
|
||||
|
||||
const simTransform = getSimilarityTransformation(
|
||||
landmarksMat,
|
||||
alignedLandmarksMat,
|
||||
);
|
||||
|
||||
const RS = Matrix.mul(simTransform.rotation, simTransform.scale);
|
||||
const TR = simTransform.translation;
|
||||
|
||||
const affineMatrix = [
|
||||
[RS.get(0, 0), RS.get(0, 1), TR.get(0, 0)],
|
||||
[RS.get(1, 0), RS.get(1, 1), TR.get(1, 0)],
|
||||
[0, 0, 1],
|
||||
];
|
||||
|
||||
const size = 1 / simTransform.scale;
|
||||
const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
|
||||
const centerMat = simTransform.fromMean.sub(meanTranslation);
|
||||
const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
|
||||
const rotation = -Math.atan2(
|
||||
simTransform.rotation.get(0, 1),
|
||||
simTransform.rotation.get(0, 0),
|
||||
);
|
||||
|
||||
return {
|
||||
affineMatrix,
|
||||
center,
|
||||
size,
|
||||
rotation,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeLandmarks(
|
||||
landmarks: Array<[number, number]>,
|
||||
faceSize: number,
|
||||
): Array<[number, number]> {
|
||||
return landmarks.map((landmark) =>
|
||||
landmark.map((p) => p / faceSize),
|
||||
) as Array<[number, number]>;
|
||||
}
|
|
@ -1,187 +0,0 @@
|
|||
import { Face } from "services/face/types";
|
||||
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
|
||||
import { mobileFaceNetFaceSize } from "./embed";
|
||||
|
||||
/**
|
||||
* Laplacian blur detection.
|
||||
*/
|
||||
export const detectBlur = (
|
||||
alignedFaces: Float32Array,
|
||||
faces: Face[],
|
||||
): number[] => {
|
||||
const numFaces = Math.round(
|
||||
alignedFaces.length /
|
||||
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
|
||||
);
|
||||
const blurValues: number[] = [];
|
||||
for (let i = 0; i < numFaces; i++) {
|
||||
const face = faces[i];
|
||||
const direction = faceDirection(face);
|
||||
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
|
||||
alignedFaces,
|
||||
i,
|
||||
);
|
||||
const laplacian = applyLaplacian(faceImage, direction);
|
||||
blurValues.push(matrixVariance(laplacian));
|
||||
}
|
||||
return blurValues;
|
||||
};
|
||||
|
||||
type FaceDirection = "left" | "right" | "straight";
|
||||
|
||||
const faceDirection = (face: Face): FaceDirection => {
|
||||
const landmarks = face.detection.landmarks;
|
||||
const leftEye = landmarks[0];
|
||||
const rightEye = landmarks[1];
|
||||
const nose = landmarks[2];
|
||||
const leftMouth = landmarks[3];
|
||||
const rightMouth = landmarks[4];
|
||||
|
||||
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
|
||||
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
|
||||
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
|
||||
|
||||
const faceIsUpright =
|
||||
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
|
||||
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
|
||||
|
||||
const noseStickingOutLeft =
|
||||
nose.x < Math.min(leftEye.x, rightEye.x) &&
|
||||
nose.x < Math.min(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseStickingOutRight =
|
||||
nose.x > Math.max(leftEye.x, rightEye.x) &&
|
||||
nose.x > Math.max(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseCloseToLeftEye =
|
||||
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
|
||||
const noseCloseToRightEye =
|
||||
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
|
||||
|
||||
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
|
||||
return "left";
|
||||
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
|
||||
return "right";
|
||||
}
|
||||
|
||||
return "straight";
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a new image by applying a Laplacian blur kernel to each pixel.
|
||||
*/
|
||||
const applyLaplacian = (
|
||||
image: number[][],
|
||||
direction: FaceDirection,
|
||||
): number[][] => {
|
||||
const paddedImage: number[][] = padImage(image, direction);
|
||||
const numRows = paddedImage.length - 2;
|
||||
const numCols = paddedImage[0].length - 2;
|
||||
|
||||
// Create an output image initialized to 0.
|
||||
const outputImage: number[][] = Array.from({ length: numRows }, () =>
|
||||
new Array(numCols).fill(0),
|
||||
);
|
||||
|
||||
// Define the Laplacian kernel.
|
||||
const kernel: number[][] = [
|
||||
[0, 1, 0],
|
||||
[1, -4, 1],
|
||||
[0, 1, 0],
|
||||
];
|
||||
|
||||
// Apply the kernel to each pixel
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < numCols; j++) {
|
||||
let sum = 0;
|
||||
for (let ki = 0; ki < 3; ki++) {
|
||||
for (let kj = 0; kj < 3; kj++) {
|
||||
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
|
||||
}
|
||||
}
|
||||
// Adjust the output value if necessary (e.g., clipping).
|
||||
outputImage[i][j] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
return outputImage;
|
||||
};
|
||||
|
||||
const padImage = (image: number[][], direction: FaceDirection): number[][] => {
|
||||
const removeSideColumns = 56; /* must be even */
|
||||
|
||||
const numRows = image.length;
|
||||
const numCols = image[0].length;
|
||||
const paddedNumCols = numCols + 2 - removeSideColumns;
|
||||
const paddedNumRows = numRows + 2;
|
||||
|
||||
// Create a new matrix with extra padding.
|
||||
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
|
||||
new Array(paddedNumCols).fill(0),
|
||||
);
|
||||
|
||||
if (direction === "straight") {
|
||||
// Copy original image into the center of the padded image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] =
|
||||
image[i][j + Math.round(removeSideColumns / 2)];
|
||||
}
|
||||
}
|
||||
} else if (direction === "left") {
|
||||
// If the face is facing left, we only take the right side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
|
||||
}
|
||||
}
|
||||
} else if (direction === "right") {
|
||||
// If the face is facing right, we only take the left side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reflect padding
|
||||
// Top and bottom rows
|
||||
for (let j = 1; j <= paddedNumCols - 2; j++) {
|
||||
paddedImage[0][j] = paddedImage[2][j]; // Top row
|
||||
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
|
||||
}
|
||||
// Left and right columns
|
||||
for (let i = 0; i < numRows + 2; i++) {
|
||||
paddedImage[i][0] = paddedImage[i][2]; // Left column
|
||||
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
|
||||
}
|
||||
|
||||
return paddedImage;
|
||||
};
|
||||
|
||||
const matrixVariance = (matrix: number[][]): number => {
|
||||
const numRows = matrix.length;
|
||||
const numCols = matrix[0].length;
|
||||
const totalElements = numRows * numCols;
|
||||
|
||||
// Calculate the mean.
|
||||
let mean: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
mean += value;
|
||||
});
|
||||
});
|
||||
mean /= totalElements;
|
||||
|
||||
// Calculate the variance.
|
||||
let variance: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
const diff: number = value - mean;
|
||||
variance += diff * diff;
|
||||
});
|
||||
});
|
||||
variance /= totalElements;
|
||||
|
||||
return variance;
|
||||
};
|
|
@ -1,32 +0,0 @@
|
|||
import { Box, enlargeBox } from "services/face/geom";
|
||||
import { FaceCrop, FaceDetection } from "services/face/types";
|
||||
import { cropWithRotation } from "utils/image";
|
||||
import { faceAlignment } from "./align";
|
||||
|
||||
export const getFaceCrop = (
|
||||
imageBitmap: ImageBitmap,
|
||||
faceDetection: FaceDetection,
|
||||
): FaceCrop => {
|
||||
const alignment = faceAlignment(faceDetection);
|
||||
|
||||
const padding = 0.25;
|
||||
const maxSize = 256;
|
||||
|
||||
const alignmentBox = new Box({
|
||||
x: alignment.center.x - alignment.size / 2,
|
||||
y: alignment.center.y - alignment.size / 2,
|
||||
width: alignment.size,
|
||||
height: alignment.size,
|
||||
}).round();
|
||||
const scaleForPadding = 1 + padding * 2;
|
||||
const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
|
||||
const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
|
||||
width: maxSize,
|
||||
height: maxSize,
|
||||
});
|
||||
|
||||
return {
|
||||
image: faceImageBitmap,
|
||||
imageBox: paddedBox,
|
||||
};
|
||||
};
|
|
@ -9,7 +9,7 @@ import {
|
|||
openDB,
|
||||
} from "idb";
|
||||
import isElectron from "is-electron";
|
||||
import { Face, MLLibraryData, MlFileData, Person } from "services/face/types";
|
||||
import { Face, MlFileData, Person } from "services/face/types";
|
||||
import {
|
||||
DEFAULT_ML_SEARCH_CONFIG,
|
||||
MAX_ML_SYNC_ERROR_COUNT,
|
||||
|
@ -50,7 +50,7 @@ interface MLDb extends DBSchema {
|
|||
};
|
||||
library: {
|
||||
key: string;
|
||||
value: MLLibraryData;
|
||||
value: unknown;
|
||||
};
|
||||
configs: {
|
||||
key: string;
|
||||
|
@ -177,6 +177,7 @@ class MLIDbStorage {
|
|||
ML_SEARCH_CONFIG_NAME,
|
||||
);
|
||||
|
||||
db.deleteObjectStore("library");
|
||||
db.deleteObjectStore("things");
|
||||
} catch {
|
||||
// TODO: ignore for now as we finalize the new version
|
||||
|
@ -400,16 +401,6 @@ class MLIDbStorage {
|
|||
return db.put("versions", version, index);
|
||||
}
|
||||
|
||||
public async getLibraryData() {
|
||||
const db = await this.db;
|
||||
return db.get("library", "data");
|
||||
}
|
||||
|
||||
public async putLibraryData(data: MLLibraryData) {
|
||||
const db = await this.db;
|
||||
return db.put("library", data, "data");
|
||||
}
|
||||
|
||||
public async getConfig<T extends Config>(name: string, def: T) {
|
||||
const db = await this.db;
|
||||
const tx = db.transaction("configs", "readwrite");
|
||||
|
|
|
@ -1,316 +0,0 @@
|
|||
import { workerBridge } from "@/next/worker/worker-bridge";
|
||||
import { euclidean } from "hdbscan";
|
||||
import {
|
||||
Box,
|
||||
Dimensions,
|
||||
Point,
|
||||
boxFromBoundingBox,
|
||||
newBox,
|
||||
} from "services/face/geom";
|
||||
import { FaceDetection } from "services/face/types";
|
||||
import {
|
||||
Matrix,
|
||||
applyToPoint,
|
||||
compose,
|
||||
scale,
|
||||
translate,
|
||||
} from "transformation-matrix";
|
||||
import {
|
||||
clamp,
|
||||
getPixelBilinear,
|
||||
normalizePixelBetween0And1,
|
||||
} from "utils/image";
|
||||
|
||||
/**
|
||||
* Detect faces in the given {@link imageBitmap}.
|
||||
*
|
||||
* The model used is YOLO, running in an ONNX runtime.
|
||||
*/
|
||||
export const detectFaces = async (
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<Array<FaceDetection>> => {
|
||||
const maxFaceDistancePercent = Math.sqrt(2) / 100;
|
||||
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
|
||||
const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
|
||||
imageBitmap,
|
||||
640,
|
||||
640,
|
||||
);
|
||||
const data = preprocessResult.data;
|
||||
const resized = preprocessResult.newSize;
|
||||
const outputData = await workerBridge.detectFaces(data);
|
||||
const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
|
||||
const inBox = newBox(0, 0, resized.width, resized.height);
|
||||
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const transform = computeTransformToBox(inBox, toBox);
|
||||
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
|
||||
const box = transformBox(f.box, transform);
|
||||
const normLandmarks = f.landmarks;
|
||||
const landmarks = transformPoints(normLandmarks, transform);
|
||||
return {
|
||||
box,
|
||||
landmarks,
|
||||
probability: f.probability as number,
|
||||
} as FaceDetection;
|
||||
});
|
||||
return removeDuplicateDetections(faceDetections, maxFaceDistance);
|
||||
};
|
||||
|
||||
const preprocessImageBitmapToFloat32ChannelsFirst = (
|
||||
imageBitmap: ImageBitmap,
|
||||
requiredWidth: number,
|
||||
requiredHeight: number,
|
||||
maintainAspectRatio: boolean = true,
|
||||
normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
|
||||
) => {
|
||||
// Create an OffscreenCanvas and set its size.
|
||||
const offscreenCanvas = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const ctx = offscreenCanvas.getContext("2d");
|
||||
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const imageData = ctx.getImageData(
|
||||
0,
|
||||
0,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const pixelData = imageData.data;
|
||||
|
||||
let scaleW = requiredWidth / imageBitmap.width;
|
||||
let scaleH = requiredHeight / imageBitmap.height;
|
||||
if (maintainAspectRatio) {
|
||||
const scale = Math.min(
|
||||
requiredWidth / imageBitmap.width,
|
||||
requiredHeight / imageBitmap.height,
|
||||
);
|
||||
scaleW = scale;
|
||||
scaleH = scale;
|
||||
}
|
||||
const scaledWidth = clamp(
|
||||
Math.round(imageBitmap.width * scaleW),
|
||||
0,
|
||||
requiredWidth,
|
||||
);
|
||||
const scaledHeight = clamp(
|
||||
Math.round(imageBitmap.height * scaleH),
|
||||
0,
|
||||
requiredHeight,
|
||||
);
|
||||
|
||||
const processedImage = new Float32Array(
|
||||
1 * 3 * requiredWidth * requiredHeight,
|
||||
);
|
||||
|
||||
// Populate the Float32Array with normalized pixel values
|
||||
let pixelIndex = 0;
|
||||
const channelOffsetGreen = requiredHeight * requiredWidth;
|
||||
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
|
||||
for (let h = 0; h < requiredHeight; h++) {
|
||||
for (let w = 0; w < requiredWidth; w++) {
|
||||
let pixel: {
|
||||
r: number;
|
||||
g: number;
|
||||
b: number;
|
||||
};
|
||||
if (w >= scaledWidth || h >= scaledHeight) {
|
||||
pixel = { r: 114, g: 114, b: 114 };
|
||||
} else {
|
||||
pixel = getPixelBilinear(
|
||||
w / scaleW,
|
||||
h / scaleH,
|
||||
pixelData,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
}
|
||||
processedImage[pixelIndex] = normFunction(pixel.r);
|
||||
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
|
||||
pixel.g,
|
||||
);
|
||||
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
|
||||
pixel.b,
|
||||
);
|
||||
pixelIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
data: processedImage,
|
||||
originalSize: {
|
||||
width: imageBitmap.width,
|
||||
height: imageBitmap.height,
|
||||
},
|
||||
newSize: { width: scaledWidth, height: scaledHeight },
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* @param rowOutput A Float32Array of shape [25200, 16], where each row
|
||||
* represents a bounding box.
|
||||
*/
|
||||
const getFacesFromYOLOOutput = (
|
||||
rowOutput: Float32Array,
|
||||
minScore: number,
|
||||
): Array<FaceDetection> => {
|
||||
const faces: Array<FaceDetection> = [];
|
||||
// Iterate over each row.
|
||||
for (let i = 0; i < rowOutput.length; i += 16) {
|
||||
const score = rowOutput[i + 4];
|
||||
if (score < minScore) {
|
||||
continue;
|
||||
}
|
||||
// The first 4 values represent the bounding box's coordinates:
|
||||
//
|
||||
// (x1, y1, x2, y2)
|
||||
//
|
||||
const xCenter = rowOutput[i];
|
||||
const yCenter = rowOutput[i + 1];
|
||||
const width = rowOutput[i + 2];
|
||||
const height = rowOutput[i + 3];
|
||||
const xMin = xCenter - width / 2.0; // topLeft
|
||||
const yMin = yCenter - height / 2.0; // topLeft
|
||||
|
||||
const leftEyeX = rowOutput[i + 5];
|
||||
const leftEyeY = rowOutput[i + 6];
|
||||
const rightEyeX = rowOutput[i + 7];
|
||||
const rightEyeY = rowOutput[i + 8];
|
||||
const noseX = rowOutput[i + 9];
|
||||
const noseY = rowOutput[i + 10];
|
||||
const leftMouthX = rowOutput[i + 11];
|
||||
const leftMouthY = rowOutput[i + 12];
|
||||
const rightMouthX = rowOutput[i + 13];
|
||||
const rightMouthY = rowOutput[i + 14];
|
||||
|
||||
const box = new Box({
|
||||
x: xMin,
|
||||
y: yMin,
|
||||
width: width,
|
||||
height: height,
|
||||
});
|
||||
const probability = score as number;
|
||||
const landmarks = [
|
||||
new Point(leftEyeX, leftEyeY),
|
||||
new Point(rightEyeX, rightEyeY),
|
||||
new Point(noseX, noseY),
|
||||
new Point(leftMouthX, leftMouthY),
|
||||
new Point(rightMouthX, rightMouthY),
|
||||
];
|
||||
faces.push({ box, landmarks, probability });
|
||||
}
|
||||
return faces;
|
||||
};
|
||||
|
||||
export const getRelativeDetection = (
|
||||
faceDetection: FaceDetection,
|
||||
dimensions: Dimensions,
|
||||
): FaceDetection => {
|
||||
const oldBox: Box = faceDetection.box;
|
||||
const box = new Box({
|
||||
x: oldBox.x / dimensions.width,
|
||||
y: oldBox.y / dimensions.height,
|
||||
width: oldBox.width / dimensions.width,
|
||||
height: oldBox.height / dimensions.height,
|
||||
});
|
||||
const oldLandmarks: Point[] = faceDetection.landmarks;
|
||||
const landmarks = oldLandmarks.map((l) => {
|
||||
return new Point(l.x / dimensions.width, l.y / dimensions.height);
|
||||
});
|
||||
const probability = faceDetection.probability;
|
||||
return { box, landmarks, probability };
|
||||
};
|
||||
|
||||
/**
|
||||
* Removes duplicate face detections from an array of detections.
|
||||
*
|
||||
* This function sorts the detections by their probability in descending order,
|
||||
* then iterates over them.
|
||||
*
|
||||
* For each detection, it calculates the Euclidean distance to all other
|
||||
* detections.
|
||||
*
|
||||
* If the distance is less than or equal to the specified threshold
|
||||
* (`withinDistance`), the other detection is considered a duplicate and is
|
||||
* removed.
|
||||
*
|
||||
* @param detections - An array of face detections to remove duplicates from.
|
||||
*
|
||||
* @param withinDistance - The maximum Euclidean distance between two detections
|
||||
* for them to be considered duplicates.
|
||||
*
|
||||
* @returns An array of face detections with duplicates removed.
|
||||
*/
|
||||
const removeDuplicateDetections = (
|
||||
detections: Array<FaceDetection>,
|
||||
withinDistance: number,
|
||||
) => {
|
||||
detections.sort((a, b) => b.probability - a.probability);
|
||||
const isSelected = new Map<number, boolean>();
|
||||
for (let i = 0; i < detections.length; i++) {
|
||||
if (isSelected.get(i) === false) {
|
||||
continue;
|
||||
}
|
||||
isSelected.set(i, true);
|
||||
for (let j = i + 1; j < detections.length; j++) {
|
||||
if (isSelected.get(j) === false) {
|
||||
continue;
|
||||
}
|
||||
const centeri = getDetectionCenter(detections[i]);
|
||||
const centerj = getDetectionCenter(detections[j]);
|
||||
const dist = euclidean(
|
||||
[centeri.x, centeri.y],
|
||||
[centerj.x, centerj.y],
|
||||
);
|
||||
if (dist <= withinDistance) {
|
||||
isSelected.set(j, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uniques: Array<FaceDetection> = [];
|
||||
for (let i = 0; i < detections.length; i++) {
|
||||
isSelected.get(i) && uniques.push(detections[i]);
|
||||
}
|
||||
return uniques;
|
||||
};
|
||||
|
||||
function getDetectionCenter(detection: FaceDetection) {
|
||||
const center = new Point(0, 0);
|
||||
// TODO: first 4 landmarks is applicable to blazeface only
|
||||
// this needs to consider eyes, nose and mouth landmarks to take center
|
||||
detection.landmarks?.slice(0, 4).forEach((p) => {
|
||||
center.x += p.x;
|
||||
center.y += p.y;
|
||||
});
|
||||
|
||||
return new Point(center.x / 4, center.y / 4);
|
||||
}
|
||||
|
||||
function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
|
||||
return compose(
|
||||
translate(toBox.x, toBox.y),
|
||||
scale(toBox.width / inBox.width, toBox.height / inBox.height),
|
||||
);
|
||||
}
|
||||
|
||||
function transformPoint(point: Point, transform: Matrix) {
|
||||
const txdPoint = applyToPoint(transform, point);
|
||||
return new Point(txdPoint.x, txdPoint.y);
|
||||
}
|
||||
|
||||
function transformPoints(points: Point[], transform: Matrix) {
|
||||
return points?.map((p) => transformPoint(p, transform));
|
||||
}
|
||||
|
||||
function transformBox(box: Box, transform: Matrix) {
|
||||
const topLeft = transformPoint(box.topLeft, transform);
|
||||
const bottomRight = transformPoint(box.bottomRight, transform);
|
||||
|
||||
return boxFromBoundingBox({
|
||||
left: topLeft.x,
|
||||
top: topLeft.y,
|
||||
right: bottomRight.x,
|
||||
bottom: bottomRight.y,
|
||||
});
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
import { workerBridge } from "@/next/worker/worker-bridge";
|
||||
import { FaceEmbedding } from "services/face/types";
|
||||
|
||||
export const mobileFaceNetFaceSize = 112;
|
||||
|
||||
/**
|
||||
* Compute embeddings for the given {@link faceData}.
|
||||
*
|
||||
* The model used is MobileFaceNet, running in an ONNX runtime.
|
||||
*/
|
||||
export const faceEmbeddings = async (
|
||||
faceData: Float32Array,
|
||||
): Promise<Array<FaceEmbedding>> => {
|
||||
const outputData = await workerBridge.faceEmbeddings(faceData);
|
||||
|
||||
const embeddingSize = 192;
|
||||
const embeddings = new Array<FaceEmbedding>(
|
||||
outputData.length / embeddingSize,
|
||||
);
|
||||
for (let i = 0; i < embeddings.length; i++) {
|
||||
embeddings[i] = new Float32Array(
|
||||
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
|
||||
);
|
||||
}
|
||||
return embeddings;
|
||||
};
|
|
@ -1,26 +1,76 @@
|
|||
import { openCache } from "@/next/blob-cache";
|
||||
import log from "@/next/log";
|
||||
import { faceAlignment } from "services/face/align";
|
||||
import mlIDbStorage from "services/face/db";
|
||||
import { detectFaces, getRelativeDetection } from "services/face/detect";
|
||||
import { faceEmbeddings, mobileFaceNetFaceSize } from "services/face/embed";
|
||||
import { workerBridge } from "@/next/worker/worker-bridge";
|
||||
import { euclidean } from "hdbscan";
|
||||
import { Matrix } from "ml-matrix";
|
||||
import { Box, Dimensions, Point, enlargeBox, newBox } from "services/face/geom";
|
||||
import {
|
||||
DetectedFace,
|
||||
Face,
|
||||
FaceAlignment,
|
||||
FaceCrop,
|
||||
FaceDetection,
|
||||
FaceEmbedding,
|
||||
MLSyncFileContext,
|
||||
type FaceAlignment,
|
||||
type MlFileData,
|
||||
} from "services/face/types";
|
||||
import { imageBitmapToBlob, warpAffineFloat32List } from "utils/image";
|
||||
import { detectBlur } from "./blur";
|
||||
import { getFaceCrop } from "./crop";
|
||||
import { defaultMLVersion } from "services/machineLearning/machineLearningService";
|
||||
import { getSimilarityTransformation } from "similarity-transformation";
|
||||
import type { EnteFile } from "types/file";
|
||||
import {
|
||||
fetchImageBitmap,
|
||||
clamp,
|
||||
createGrayscaleIntMatrixFromNormalized2List,
|
||||
cropWithRotation,
|
||||
fetchImageBitmapForContext,
|
||||
getFaceId,
|
||||
getLocalFile,
|
||||
getPixelBilinear,
|
||||
imageBitmapToBlob,
|
||||
normalizePixelBetween0And1,
|
||||
warpAffineFloat32List,
|
||||
} from "./image";
|
||||
import { transformFaceDetections } from "./transform-box";
|
||||
|
||||
export const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
|
||||
/**
|
||||
* Index faces in the given file.
|
||||
*
|
||||
* This function is the entry point to the indexing pipeline. The file goes
|
||||
* through various stages:
|
||||
*
|
||||
* 1. Downloading the original if needed.
|
||||
* 2. Detect faces using ONNX/YOLO
|
||||
* 3. Align the face rectangles, compute blur.
|
||||
* 4. Compute embbeddings for the detected face (crops).
|
||||
*
|
||||
* Once all of it is done, it returns the face rectangles and embeddings to the
|
||||
* higher layer (which saves them to locally for offline use, and encrypts and
|
||||
* uploads them to the user's remote storage so that their other devices can
|
||||
* download them instead of needing to reindex).
|
||||
*/
|
||||
export const indexFaces = async (
|
||||
enteFile: EnteFile,
|
||||
localFile?: globalThis.File,
|
||||
) => {
|
||||
log.debug(() => ({ a: "Indexing faces in file", enteFile }));
|
||||
const fileContext: MLSyncFileContext = { enteFile, localFile };
|
||||
|
||||
const newMlFile = (fileContext.newMlFile = {
|
||||
fileId: enteFile.id,
|
||||
mlVersion: defaultMLVersion,
|
||||
errorCount: 0,
|
||||
} as MlFileData);
|
||||
|
||||
try {
|
||||
await fetchImageBitmapForContext(fileContext);
|
||||
await syncFileAnalyzeFaces(fileContext);
|
||||
newMlFile.errorCount = 0;
|
||||
} finally {
|
||||
fileContext.imageBitmap && fileContext.imageBitmap.close();
|
||||
}
|
||||
|
||||
return newMlFile;
|
||||
};
|
||||
|
||||
const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
|
||||
const { newMlFile } = fileContext;
|
||||
const startTime = Date.now();
|
||||
|
||||
|
@ -43,10 +93,6 @@ export const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
|
|||
|
||||
const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
|
||||
const { newMlFile } = fileContext;
|
||||
newMlFile.faceDetectionMethod = {
|
||||
value: "YoloFace",
|
||||
version: 1,
|
||||
};
|
||||
fileContext.newDetection = true;
|
||||
const imageBitmap = await fetchImageBitmapForContext(fileContext);
|
||||
const faceDetections = await detectFaces(imageBitmap);
|
||||
|
@ -67,14 +113,265 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
|
|||
log.info("[MLService] Detected Faces: ", newMlFile.faces?.length);
|
||||
};
|
||||
|
||||
/**
|
||||
* Detect faces in the given {@link imageBitmap}.
|
||||
*
|
||||
* The model used is YOLO, running in an ONNX runtime.
|
||||
*/
|
||||
const detectFaces = async (
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<Array<FaceDetection>> => {
|
||||
const maxFaceDistancePercent = Math.sqrt(2) / 100;
|
||||
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
|
||||
const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
|
||||
imageBitmap,
|
||||
640,
|
||||
640,
|
||||
);
|
||||
const data = preprocessResult.data;
|
||||
const resized = preprocessResult.newSize;
|
||||
const outputData = await workerBridge.detectFaces(data);
|
||||
const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
|
||||
const inBox = newBox(0, 0, resized.width, resized.height);
|
||||
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const faceDetections = transformFaceDetections(faces, inBox, toBox);
|
||||
return removeDuplicateDetections(faceDetections, maxFaceDistance);
|
||||
};
|
||||
|
||||
const preprocessImageBitmapToFloat32ChannelsFirst = (
|
||||
imageBitmap: ImageBitmap,
|
||||
requiredWidth: number,
|
||||
requiredHeight: number,
|
||||
maintainAspectRatio: boolean = true,
|
||||
normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
|
||||
) => {
|
||||
// Create an OffscreenCanvas and set its size.
|
||||
const offscreenCanvas = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const ctx = offscreenCanvas.getContext("2d");
|
||||
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const imageData = ctx.getImageData(
|
||||
0,
|
||||
0,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const pixelData = imageData.data;
|
||||
|
||||
let scaleW = requiredWidth / imageBitmap.width;
|
||||
let scaleH = requiredHeight / imageBitmap.height;
|
||||
if (maintainAspectRatio) {
|
||||
const scale = Math.min(
|
||||
requiredWidth / imageBitmap.width,
|
||||
requiredHeight / imageBitmap.height,
|
||||
);
|
||||
scaleW = scale;
|
||||
scaleH = scale;
|
||||
}
|
||||
const scaledWidth = clamp(
|
||||
Math.round(imageBitmap.width * scaleW),
|
||||
0,
|
||||
requiredWidth,
|
||||
);
|
||||
const scaledHeight = clamp(
|
||||
Math.round(imageBitmap.height * scaleH),
|
||||
0,
|
||||
requiredHeight,
|
||||
);
|
||||
|
||||
const processedImage = new Float32Array(
|
||||
1 * 3 * requiredWidth * requiredHeight,
|
||||
);
|
||||
|
||||
// Populate the Float32Array with normalized pixel values
|
||||
let pixelIndex = 0;
|
||||
const channelOffsetGreen = requiredHeight * requiredWidth;
|
||||
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
|
||||
for (let h = 0; h < requiredHeight; h++) {
|
||||
for (let w = 0; w < requiredWidth; w++) {
|
||||
let pixel: {
|
||||
r: number;
|
||||
g: number;
|
||||
b: number;
|
||||
};
|
||||
if (w >= scaledWidth || h >= scaledHeight) {
|
||||
pixel = { r: 114, g: 114, b: 114 };
|
||||
} else {
|
||||
pixel = getPixelBilinear(
|
||||
w / scaleW,
|
||||
h / scaleH,
|
||||
pixelData,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
}
|
||||
processedImage[pixelIndex] = normFunction(pixel.r);
|
||||
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
|
||||
pixel.g,
|
||||
);
|
||||
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
|
||||
pixel.b,
|
||||
);
|
||||
pixelIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
data: processedImage,
|
||||
originalSize: {
|
||||
width: imageBitmap.width,
|
||||
height: imageBitmap.height,
|
||||
},
|
||||
newSize: { width: scaledWidth, height: scaledHeight },
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* @param rowOutput A Float32Array of shape [25200, 16], where each row
|
||||
* represents a bounding box.
|
||||
*/
|
||||
const getFacesFromYOLOOutput = (
|
||||
rowOutput: Float32Array,
|
||||
minScore: number,
|
||||
): Array<FaceDetection> => {
|
||||
const faces: Array<FaceDetection> = [];
|
||||
// Iterate over each row.
|
||||
for (let i = 0; i < rowOutput.length; i += 16) {
|
||||
const score = rowOutput[i + 4];
|
||||
if (score < minScore) {
|
||||
continue;
|
||||
}
|
||||
// The first 4 values represent the bounding box's coordinates:
|
||||
//
|
||||
// (x1, y1, x2, y2)
|
||||
//
|
||||
const xCenter = rowOutput[i];
|
||||
const yCenter = rowOutput[i + 1];
|
||||
const width = rowOutput[i + 2];
|
||||
const height = rowOutput[i + 3];
|
||||
const xMin = xCenter - width / 2.0; // topLeft
|
||||
const yMin = yCenter - height / 2.0; // topLeft
|
||||
|
||||
const leftEyeX = rowOutput[i + 5];
|
||||
const leftEyeY = rowOutput[i + 6];
|
||||
const rightEyeX = rowOutput[i + 7];
|
||||
const rightEyeY = rowOutput[i + 8];
|
||||
const noseX = rowOutput[i + 9];
|
||||
const noseY = rowOutput[i + 10];
|
||||
const leftMouthX = rowOutput[i + 11];
|
||||
const leftMouthY = rowOutput[i + 12];
|
||||
const rightMouthX = rowOutput[i + 13];
|
||||
const rightMouthY = rowOutput[i + 14];
|
||||
|
||||
const box = new Box({
|
||||
x: xMin,
|
||||
y: yMin,
|
||||
width: width,
|
||||
height: height,
|
||||
});
|
||||
const probability = score as number;
|
||||
const landmarks = [
|
||||
new Point(leftEyeX, leftEyeY),
|
||||
new Point(rightEyeX, rightEyeY),
|
||||
new Point(noseX, noseY),
|
||||
new Point(leftMouthX, leftMouthY),
|
||||
new Point(rightMouthX, rightMouthY),
|
||||
];
|
||||
faces.push({ box, landmarks, probability });
|
||||
}
|
||||
return faces;
|
||||
};
|
||||
|
||||
const getRelativeDetection = (
|
||||
faceDetection: FaceDetection,
|
||||
dimensions: Dimensions,
|
||||
): FaceDetection => {
|
||||
const oldBox: Box = faceDetection.box;
|
||||
const box = new Box({
|
||||
x: oldBox.x / dimensions.width,
|
||||
y: oldBox.y / dimensions.height,
|
||||
width: oldBox.width / dimensions.width,
|
||||
height: oldBox.height / dimensions.height,
|
||||
});
|
||||
const oldLandmarks: Point[] = faceDetection.landmarks;
|
||||
const landmarks = oldLandmarks.map((l) => {
|
||||
return new Point(l.x / dimensions.width, l.y / dimensions.height);
|
||||
});
|
||||
const probability = faceDetection.probability;
|
||||
return { box, landmarks, probability };
|
||||
};
|
||||
|
||||
/**
|
||||
* Removes duplicate face detections from an array of detections.
|
||||
*
|
||||
* This function sorts the detections by their probability in descending order,
|
||||
* then iterates over them.
|
||||
*
|
||||
* For each detection, it calculates the Euclidean distance to all other
|
||||
* detections.
|
||||
*
|
||||
* If the distance is less than or equal to the specified threshold
|
||||
* (`withinDistance`), the other detection is considered a duplicate and is
|
||||
* removed.
|
||||
*
|
||||
* @param detections - An array of face detections to remove duplicates from.
|
||||
*
|
||||
* @param withinDistance - The maximum Euclidean distance between two detections
|
||||
* for them to be considered duplicates.
|
||||
*
|
||||
* @returns An array of face detections with duplicates removed.
|
||||
*/
|
||||
const removeDuplicateDetections = (
|
||||
detections: Array<FaceDetection>,
|
||||
withinDistance: number,
|
||||
) => {
|
||||
detections.sort((a, b) => b.probability - a.probability);
|
||||
const isSelected = new Map<number, boolean>();
|
||||
for (let i = 0; i < detections.length; i++) {
|
||||
if (isSelected.get(i) === false) {
|
||||
continue;
|
||||
}
|
||||
isSelected.set(i, true);
|
||||
for (let j = i + 1; j < detections.length; j++) {
|
||||
if (isSelected.get(j) === false) {
|
||||
continue;
|
||||
}
|
||||
const centeri = getDetectionCenter(detections[i]);
|
||||
const centerj = getDetectionCenter(detections[j]);
|
||||
const dist = euclidean(
|
||||
[centeri.x, centeri.y],
|
||||
[centerj.x, centerj.y],
|
||||
);
|
||||
if (dist <= withinDistance) {
|
||||
isSelected.set(j, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uniques: Array<FaceDetection> = [];
|
||||
for (let i = 0; i < detections.length; i++) {
|
||||
isSelected.get(i) && uniques.push(detections[i]);
|
||||
}
|
||||
return uniques;
|
||||
};
|
||||
|
||||
function getDetectionCenter(detection: FaceDetection) {
|
||||
const center = new Point(0, 0);
|
||||
// TODO: first 4 landmarks is applicable to blazeface only
|
||||
// this needs to consider eyes, nose and mouth landmarks to take center
|
||||
detection.landmarks?.slice(0, 4).forEach((p) => {
|
||||
center.x += p.x;
|
||||
center.y += p.y;
|
||||
});
|
||||
|
||||
return new Point(center.x / 4, center.y / 4);
|
||||
}
|
||||
|
||||
const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => {
|
||||
const { newMlFile } = fileContext;
|
||||
const imageBitmap = await fetchImageBitmapForContext(fileContext);
|
||||
newMlFile.faceCropMethod = {
|
||||
value: "ArcFace",
|
||||
version: 1,
|
||||
};
|
||||
|
||||
for (const face of newMlFile.faces) {
|
||||
await saveFaceCrop(imageBitmap, face);
|
||||
}
|
||||
|
@ -84,10 +381,6 @@ const syncFileFaceAlignments = async (
|
|||
fileContext: MLSyncFileContext,
|
||||
): Promise<Float32Array> => {
|
||||
const { newMlFile } = fileContext;
|
||||
newMlFile.faceAlignmentMethod = {
|
||||
value: "ArcFace",
|
||||
version: 1,
|
||||
};
|
||||
fileContext.newAlignment = true;
|
||||
const imageBitmap =
|
||||
fileContext.imageBitmap ||
|
||||
|
@ -113,15 +406,277 @@ const syncFileFaceAlignments = async (
|
|||
return faceImages;
|
||||
};
|
||||
|
||||
// TODO-ML(MR): When is this used or is it as Blazeface leftover?
|
||||
const ARCFACE_LANDMARKS = [
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[56.1396, 92.2848],
|
||||
] as Array<[number, number]>;
|
||||
|
||||
const ARCFACE_LANDMARKS_FACE_SIZE = 112;
|
||||
|
||||
const ARC_FACE_5_LANDMARKS = [
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
] as Array<[number, number]>;
|
||||
|
||||
/**
|
||||
* Compute and return an {@link FaceAlignment} for the given face detection.
|
||||
*
|
||||
* @param faceDetection A geometry indicating a face detected in an image.
|
||||
*/
|
||||
const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
|
||||
const landmarkCount = faceDetection.landmarks.length;
|
||||
return getFaceAlignmentUsingSimilarityTransform(
|
||||
faceDetection,
|
||||
normalizeLandmarks(
|
||||
landmarkCount === 5 ? ARC_FACE_5_LANDMARKS : ARCFACE_LANDMARKS,
|
||||
ARCFACE_LANDMARKS_FACE_SIZE,
|
||||
),
|
||||
);
|
||||
};
|
||||
|
||||
function getFaceAlignmentUsingSimilarityTransform(
|
||||
faceDetection: FaceDetection,
|
||||
alignedLandmarks: Array<[number, number]>,
|
||||
): FaceAlignment {
|
||||
const landmarksMat = new Matrix(
|
||||
faceDetection.landmarks
|
||||
.map((p) => [p.x, p.y])
|
||||
.slice(0, alignedLandmarks.length),
|
||||
).transpose();
|
||||
const alignedLandmarksMat = new Matrix(alignedLandmarks).transpose();
|
||||
|
||||
const simTransform = getSimilarityTransformation(
|
||||
landmarksMat,
|
||||
alignedLandmarksMat,
|
||||
);
|
||||
|
||||
const RS = Matrix.mul(simTransform.rotation, simTransform.scale);
|
||||
const TR = simTransform.translation;
|
||||
|
||||
const affineMatrix = [
|
||||
[RS.get(0, 0), RS.get(0, 1), TR.get(0, 0)],
|
||||
[RS.get(1, 0), RS.get(1, 1), TR.get(1, 0)],
|
||||
[0, 0, 1],
|
||||
];
|
||||
|
||||
const size = 1 / simTransform.scale;
|
||||
const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
|
||||
const centerMat = simTransform.fromMean.sub(meanTranslation);
|
||||
const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
|
||||
const rotation = -Math.atan2(
|
||||
simTransform.rotation.get(0, 1),
|
||||
simTransform.rotation.get(0, 0),
|
||||
);
|
||||
|
||||
return {
|
||||
affineMatrix,
|
||||
center,
|
||||
size,
|
||||
rotation,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeLandmarks(
|
||||
landmarks: Array<[number, number]>,
|
||||
faceSize: number,
|
||||
): Array<[number, number]> {
|
||||
return landmarks.map((landmark) =>
|
||||
landmark.map((p) => p / faceSize),
|
||||
) as Array<[number, number]>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Laplacian blur detection.
|
||||
*/
|
||||
const detectBlur = (alignedFaces: Float32Array, faces: Face[]): number[] => {
|
||||
const numFaces = Math.round(
|
||||
alignedFaces.length /
|
||||
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
|
||||
);
|
||||
const blurValues: number[] = [];
|
||||
for (let i = 0; i < numFaces; i++) {
|
||||
const face = faces[i];
|
||||
const direction = faceDirection(face);
|
||||
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
|
||||
alignedFaces,
|
||||
i,
|
||||
);
|
||||
const laplacian = applyLaplacian(faceImage, direction);
|
||||
blurValues.push(matrixVariance(laplacian));
|
||||
}
|
||||
return blurValues;
|
||||
};
|
||||
|
||||
type FaceDirection = "left" | "right" | "straight";
|
||||
|
||||
const faceDirection = (face: Face): FaceDirection => {
|
||||
const landmarks = face.detection.landmarks;
|
||||
const leftEye = landmarks[0];
|
||||
const rightEye = landmarks[1];
|
||||
const nose = landmarks[2];
|
||||
const leftMouth = landmarks[3];
|
||||
const rightMouth = landmarks[4];
|
||||
|
||||
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
|
||||
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
|
||||
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
|
||||
|
||||
const faceIsUpright =
|
||||
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
|
||||
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
|
||||
|
||||
const noseStickingOutLeft =
|
||||
nose.x < Math.min(leftEye.x, rightEye.x) &&
|
||||
nose.x < Math.min(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseStickingOutRight =
|
||||
nose.x > Math.max(leftEye.x, rightEye.x) &&
|
||||
nose.x > Math.max(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseCloseToLeftEye =
|
||||
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
|
||||
const noseCloseToRightEye =
|
||||
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
|
||||
|
||||
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
|
||||
return "left";
|
||||
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
|
||||
return "right";
|
||||
}
|
||||
|
||||
return "straight";
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a new image by applying a Laplacian blur kernel to each pixel.
|
||||
*/
|
||||
const applyLaplacian = (
|
||||
image: number[][],
|
||||
direction: FaceDirection,
|
||||
): number[][] => {
|
||||
const paddedImage: number[][] = padImage(image, direction);
|
||||
const numRows = paddedImage.length - 2;
|
||||
const numCols = paddedImage[0].length - 2;
|
||||
|
||||
// Create an output image initialized to 0.
|
||||
const outputImage: number[][] = Array.from({ length: numRows }, () =>
|
||||
new Array(numCols).fill(0),
|
||||
);
|
||||
|
||||
// Define the Laplacian kernel.
|
||||
const kernel: number[][] = [
|
||||
[0, 1, 0],
|
||||
[1, -4, 1],
|
||||
[0, 1, 0],
|
||||
];
|
||||
|
||||
// Apply the kernel to each pixel
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < numCols; j++) {
|
||||
let sum = 0;
|
||||
for (let ki = 0; ki < 3; ki++) {
|
||||
for (let kj = 0; kj < 3; kj++) {
|
||||
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
|
||||
}
|
||||
}
|
||||
// Adjust the output value if necessary (e.g., clipping).
|
||||
outputImage[i][j] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
return outputImage;
|
||||
};
|
||||
|
||||
const padImage = (image: number[][], direction: FaceDirection): number[][] => {
|
||||
const removeSideColumns = 56; /* must be even */
|
||||
|
||||
const numRows = image.length;
|
||||
const numCols = image[0].length;
|
||||
const paddedNumCols = numCols + 2 - removeSideColumns;
|
||||
const paddedNumRows = numRows + 2;
|
||||
|
||||
// Create a new matrix with extra padding.
|
||||
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
|
||||
new Array(paddedNumCols).fill(0),
|
||||
);
|
||||
|
||||
if (direction === "straight") {
|
||||
// Copy original image into the center of the padded image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] =
|
||||
image[i][j + Math.round(removeSideColumns / 2)];
|
||||
}
|
||||
}
|
||||
} else if (direction === "left") {
|
||||
// If the face is facing left, we only take the right side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
|
||||
}
|
||||
}
|
||||
} else if (direction === "right") {
|
||||
// If the face is facing right, we only take the left side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reflect padding
|
||||
// Top and bottom rows
|
||||
for (let j = 1; j <= paddedNumCols - 2; j++) {
|
||||
paddedImage[0][j] = paddedImage[2][j]; // Top row
|
||||
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
|
||||
}
|
||||
// Left and right columns
|
||||
for (let i = 0; i < numRows + 2; i++) {
|
||||
paddedImage[i][0] = paddedImage[i][2]; // Left column
|
||||
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
|
||||
}
|
||||
|
||||
return paddedImage;
|
||||
};
|
||||
|
||||
const matrixVariance = (matrix: number[][]): number => {
|
||||
const numRows = matrix.length;
|
||||
const numCols = matrix[0].length;
|
||||
const totalElements = numRows * numCols;
|
||||
|
||||
// Calculate the mean.
|
||||
let mean: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
mean += value;
|
||||
});
|
||||
});
|
||||
mean /= totalElements;
|
||||
|
||||
// Calculate the variance.
|
||||
let variance: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
const diff: number = value - mean;
|
||||
variance += diff * diff;
|
||||
});
|
||||
});
|
||||
variance /= totalElements;
|
||||
|
||||
return variance;
|
||||
};
|
||||
|
||||
const syncFileFaceEmbeddings = async (
|
||||
fileContext: MLSyncFileContext,
|
||||
alignedFacesInput: Float32Array,
|
||||
) => {
|
||||
const { newMlFile } = fileContext;
|
||||
newMlFile.faceEmbeddingMethod = {
|
||||
value: "MobileFaceNet",
|
||||
version: 2,
|
||||
};
|
||||
// TODO: when not storing face crops, image will be needed to extract faces
|
||||
// fileContext.imageBitmap ||
|
||||
// (await this.getImageBitmap(fileContext));
|
||||
|
@ -132,6 +687,30 @@ const syncFileFaceEmbeddings = async (
|
|||
log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length);
|
||||
};
|
||||
|
||||
const mobileFaceNetFaceSize = 112;
|
||||
|
||||
/**
|
||||
* Compute embeddings for the given {@link faceData}.
|
||||
*
|
||||
* The model used is MobileFaceNet, running in an ONNX runtime.
|
||||
*/
|
||||
const faceEmbeddings = async (
|
||||
faceData: Float32Array,
|
||||
): Promise<Array<FaceEmbedding>> => {
|
||||
const outputData = await workerBridge.faceEmbeddings(faceData);
|
||||
|
||||
const embeddingSize = 192;
|
||||
const embeddings = new Array<FaceEmbedding>(
|
||||
outputData.length / embeddingSize,
|
||||
);
|
||||
for (let i = 0; i < embeddings.length; i++) {
|
||||
embeddings[i] = new Float32Array(
|
||||
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
|
||||
);
|
||||
}
|
||||
return embeddings;
|
||||
};
|
||||
|
||||
const syncFileFaceMakeRelativeDetections = async (
|
||||
fileContext: MLSyncFileContext,
|
||||
) => {
|
||||
|
@ -159,16 +738,32 @@ export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
|
|||
return blob;
|
||||
};
|
||||
|
||||
export const regenerateFaceCrop = async (faceID: string) => {
|
||||
const fileID = Number(faceID.split("-")[0]);
|
||||
const personFace = await mlIDbStorage.getFace(fileID, faceID);
|
||||
if (!personFace) {
|
||||
throw Error("Face not found");
|
||||
}
|
||||
const getFaceCrop = (
|
||||
imageBitmap: ImageBitmap,
|
||||
faceDetection: FaceDetection,
|
||||
): FaceCrop => {
|
||||
const alignment = faceAlignment(faceDetection);
|
||||
|
||||
const file = await getLocalFile(personFace.fileId);
|
||||
const imageBitmap = await fetchImageBitmap(file);
|
||||
return await saveFaceCrop(imageBitmap, personFace);
|
||||
const padding = 0.25;
|
||||
const maxSize = 256;
|
||||
|
||||
const alignmentBox = new Box({
|
||||
x: alignment.center.x - alignment.size / 2,
|
||||
y: alignment.center.y - alignment.size / 2,
|
||||
width: alignment.size,
|
||||
height: alignment.size,
|
||||
}).round();
|
||||
const scaleForPadding = 1 + padding * 2;
|
||||
const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
|
||||
const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
|
||||
width: maxSize,
|
||||
height: maxSize,
|
||||
});
|
||||
|
||||
return {
|
||||
image: faceImageBitmap,
|
||||
imageBox: paddedBox,
|
||||
};
|
||||
};
|
||||
|
||||
async function extractFaceImagesToFloat32(
|
||||
|
|
|
@ -1,13 +1,17 @@
|
|||
import { FILE_TYPE } from "@/media/file-type";
|
||||
import { decodeLivePhoto } from "@/media/live-photo";
|
||||
import log from "@/next/log";
|
||||
import { Matrix, inverse } from "ml-matrix";
|
||||
import DownloadManager from "services/download";
|
||||
import { Dimensions } from "services/face/geom";
|
||||
import { DetectedFace, MLSyncFileContext } from "services/face/types";
|
||||
import { Box, Dimensions, enlargeBox } from "services/face/geom";
|
||||
import {
|
||||
DetectedFace,
|
||||
FaceAlignment,
|
||||
MLSyncFileContext,
|
||||
} from "services/face/types";
|
||||
import { getLocalFiles } from "services/fileService";
|
||||
import { EnteFile } from "types/file";
|
||||
import { getRenderableImage } from "utils/file";
|
||||
import { clamp } from "utils/image";
|
||||
|
||||
export const fetchImageBitmapForContext = async (
|
||||
fileContext: MLSyncFileContext,
|
||||
|
@ -37,7 +41,6 @@ export const fetchImageBitmapForContext = async (
|
|||
);
|
||||
}
|
||||
|
||||
fileContext.newMlFile.imageSource = "Original";
|
||||
const { width, height } = fileContext.imageBitmap;
|
||||
fileContext.newMlFile.imageDimensions = { width, height };
|
||||
|
||||
|
@ -119,3 +122,468 @@ export async function getLocalFileImageBitmap(
|
|||
fileBlob = await getRenderableImage(enteFile.metadata.title, fileBlob);
|
||||
return createImageBitmap(fileBlob);
|
||||
}
|
||||
|
||||
export function normalizePixelBetween0And1(pixelValue: number) {
|
||||
return pixelValue / 255.0;
|
||||
}
|
||||
|
||||
export function normalizePixelBetweenMinus1And1(pixelValue: number) {
|
||||
return pixelValue / 127.5 - 1.0;
|
||||
}
|
||||
|
||||
export function unnormalizePixelFromBetweenMinus1And1(pixelValue: number) {
|
||||
return clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
|
||||
}
|
||||
|
||||
export function readPixelColor(
|
||||
imageData: Uint8ClampedArray,
|
||||
width: number,
|
||||
height: number,
|
||||
x: number,
|
||||
y: number,
|
||||
) {
|
||||
if (x < 0 || x >= width || y < 0 || y >= height) {
|
||||
return { r: 0, g: 0, b: 0, a: 0 };
|
||||
}
|
||||
const index = (y * width + x) * 4;
|
||||
return {
|
||||
r: imageData[index],
|
||||
g: imageData[index + 1],
|
||||
b: imageData[index + 2],
|
||||
a: imageData[index + 3],
|
||||
};
|
||||
}
|
||||
|
||||
export function clamp(value: number, min: number, max: number) {
|
||||
return Math.min(max, Math.max(min, value));
|
||||
}
|
||||
|
||||
export function getPixelBicubic(
|
||||
fx: number,
|
||||
fy: number,
|
||||
imageData: Uint8ClampedArray,
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
) {
|
||||
// Clamp to image boundaries
|
||||
fx = clamp(fx, 0, imageWidth - 1);
|
||||
fy = clamp(fy, 0, imageHeight - 1);
|
||||
|
||||
const x = Math.trunc(fx) - (fx >= 0.0 ? 0 : 1);
|
||||
const px = x - 1;
|
||||
const nx = x + 1;
|
||||
const ax = x + 2;
|
||||
const y = Math.trunc(fy) - (fy >= 0.0 ? 0 : 1);
|
||||
const py = y - 1;
|
||||
const ny = y + 1;
|
||||
const ay = y + 2;
|
||||
const dx = fx - x;
|
||||
const dy = fy - y;
|
||||
|
||||
function cubic(
|
||||
dx: number,
|
||||
ipp: number,
|
||||
icp: number,
|
||||
inp: number,
|
||||
iap: number,
|
||||
) {
|
||||
return (
|
||||
icp +
|
||||
0.5 *
|
||||
(dx * (-ipp + inp) +
|
||||
dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
|
||||
dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap))
|
||||
);
|
||||
}
|
||||
|
||||
const icc = readPixelColor(imageData, imageWidth, imageHeight, x, y);
|
||||
|
||||
const ipp =
|
||||
px < 0 || py < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, py);
|
||||
const icp =
|
||||
px < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, x, py);
|
||||
const inp =
|
||||
py < 0 || nx >= imageWidth
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, py);
|
||||
const iap =
|
||||
ax >= imageWidth || py < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, py);
|
||||
|
||||
const ip0 = cubic(dx, ipp.r, icp.r, inp.r, iap.r);
|
||||
const ip1 = cubic(dx, ipp.g, icp.g, inp.g, iap.g);
|
||||
const ip2 = cubic(dx, ipp.b, icp.b, inp.b, iap.b);
|
||||
// const ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);
|
||||
|
||||
const ipc =
|
||||
px < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, y);
|
||||
const inc =
|
||||
nx >= imageWidth
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, y);
|
||||
const iac =
|
||||
ax >= imageWidth
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, y);
|
||||
|
||||
const ic0 = cubic(dx, ipc.r, icc.r, inc.r, iac.r);
|
||||
const ic1 = cubic(dx, ipc.g, icc.g, inc.g, iac.g);
|
||||
const ic2 = cubic(dx, ipc.b, icc.b, inc.b, iac.b);
|
||||
// const ic3 = cubic(dx, ipc.a, icc.a, inc.a, iac.a);
|
||||
|
||||
const ipn =
|
||||
px < 0 || ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, ny);
|
||||
const icn =
|
||||
ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, x, ny);
|
||||
const inn =
|
||||
nx >= imageWidth || ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, ny);
|
||||
const ian =
|
||||
ax >= imageWidth || ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, ny);
|
||||
|
||||
const in0 = cubic(dx, ipn.r, icn.r, inn.r, ian.r);
|
||||
const in1 = cubic(dx, ipn.g, icn.g, inn.g, ian.g);
|
||||
const in2 = cubic(dx, ipn.b, icn.b, inn.b, ian.b);
|
||||
// const in3 = cubic(dx, ipn.a, icn.a, inn.a, ian.a);
|
||||
|
||||
const ipa =
|
||||
px < 0 || ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, ay);
|
||||
const ica =
|
||||
ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, x, ay);
|
||||
const ina =
|
||||
nx >= imageWidth || ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, ay);
|
||||
const iaa =
|
||||
ax >= imageWidth || ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, ay);
|
||||
|
||||
const ia0 = cubic(dx, ipa.r, ica.r, ina.r, iaa.r);
|
||||
const ia1 = cubic(dx, ipa.g, ica.g, ina.g, iaa.g);
|
||||
const ia2 = cubic(dx, ipa.b, ica.b, ina.b, iaa.b);
|
||||
// const ia3 = cubic(dx, ipa.a, ica.a, ina.a, iaa.a);
|
||||
|
||||
const c0 = Math.trunc(clamp(cubic(dy, ip0, ic0, in0, ia0), 0, 255));
|
||||
const c1 = Math.trunc(clamp(cubic(dy, ip1, ic1, in1, ia1), 0, 255));
|
||||
const c2 = Math.trunc(clamp(cubic(dy, ip2, ic2, in2, ia2), 0, 255));
|
||||
// const c3 = cubic(dy, ip3, ic3, in3, ia3);
|
||||
|
||||
return { r: c0, g: c1, b: c2 };
|
||||
}
|
||||
|
||||
/// Returns the pixel value (RGB) at the given coordinates using bilinear interpolation.
|
||||
export function getPixelBilinear(
|
||||
fx: number,
|
||||
fy: number,
|
||||
imageData: Uint8ClampedArray,
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
) {
|
||||
// Clamp to image boundaries
|
||||
fx = clamp(fx, 0, imageWidth - 1);
|
||||
fy = clamp(fy, 0, imageHeight - 1);
|
||||
|
||||
// Get the surrounding coordinates and their weights
|
||||
const x0 = Math.floor(fx);
|
||||
const x1 = Math.ceil(fx);
|
||||
const y0 = Math.floor(fy);
|
||||
const y1 = Math.ceil(fy);
|
||||
const dx = fx - x0;
|
||||
const dy = fy - y0;
|
||||
const dx1 = 1.0 - dx;
|
||||
const dy1 = 1.0 - dy;
|
||||
|
||||
// Get the original pixels
|
||||
const pixel1 = readPixelColor(imageData, imageWidth, imageHeight, x0, y0);
|
||||
const pixel2 = readPixelColor(imageData, imageWidth, imageHeight, x1, y0);
|
||||
const pixel3 = readPixelColor(imageData, imageWidth, imageHeight, x0, y1);
|
||||
const pixel4 = readPixelColor(imageData, imageWidth, imageHeight, x1, y1);
|
||||
|
||||
function bilinear(val1: number, val2: number, val3: number, val4: number) {
|
||||
return Math.round(
|
||||
val1 * dx1 * dy1 +
|
||||
val2 * dx * dy1 +
|
||||
val3 * dx1 * dy +
|
||||
val4 * dx * dy,
|
||||
);
|
||||
}
|
||||
|
||||
// Interpolate the pixel values
|
||||
const red = bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r);
|
||||
const green = bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g);
|
||||
const blue = bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b);
|
||||
|
||||
return { r: red, g: green, b: blue };
|
||||
}
|
||||
|
||||
export function warpAffineFloat32List(
|
||||
imageBitmap: ImageBitmap,
|
||||
faceAlignment: FaceAlignment,
|
||||
faceSize: number,
|
||||
inputData: Float32Array,
|
||||
inputStartIndex: number,
|
||||
): void {
|
||||
// Get the pixel data
|
||||
const offscreenCanvas = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const ctx = offscreenCanvas.getContext("2d");
|
||||
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const imageData = ctx.getImageData(
|
||||
0,
|
||||
0,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const pixelData = imageData.data;
|
||||
|
||||
const transformationMatrix = faceAlignment.affineMatrix.map((row) =>
|
||||
row.map((val) => (val != 1.0 ? val * faceSize : 1.0)),
|
||||
); // 3x3
|
||||
|
||||
const A: Matrix = new Matrix([
|
||||
[transformationMatrix[0][0], transformationMatrix[0][1]],
|
||||
[transformationMatrix[1][0], transformationMatrix[1][1]],
|
||||
]);
|
||||
const Ainverse = inverse(A);
|
||||
|
||||
const b00 = transformationMatrix[0][2];
|
||||
const b10 = transformationMatrix[1][2];
|
||||
const a00Prime = Ainverse.get(0, 0);
|
||||
const a01Prime = Ainverse.get(0, 1);
|
||||
const a10Prime = Ainverse.get(1, 0);
|
||||
const a11Prime = Ainverse.get(1, 1);
|
||||
|
||||
for (let yTrans = 0; yTrans < faceSize; ++yTrans) {
|
||||
for (let xTrans = 0; xTrans < faceSize; ++xTrans) {
|
||||
// Perform inverse affine transformation
|
||||
const xOrigin =
|
||||
a00Prime * (xTrans - b00) + a01Prime * (yTrans - b10);
|
||||
const yOrigin =
|
||||
a10Prime * (xTrans - b00) + a11Prime * (yTrans - b10);
|
||||
|
||||
// Get the pixel from interpolation
|
||||
const pixel = getPixelBicubic(
|
||||
xOrigin,
|
||||
yOrigin,
|
||||
pixelData,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
|
||||
// Set the pixel in the input data
|
||||
const index = (yTrans * faceSize + xTrans) * 3;
|
||||
inputData[inputStartIndex + index] =
|
||||
normalizePixelBetweenMinus1And1(pixel.r);
|
||||
inputData[inputStartIndex + index + 1] =
|
||||
normalizePixelBetweenMinus1And1(pixel.g);
|
||||
inputData[inputStartIndex + index + 2] =
|
||||
normalizePixelBetweenMinus1And1(pixel.b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function createGrayscaleIntMatrixFromNormalized2List(
|
||||
imageList: Float32Array,
|
||||
faceNumber: number,
|
||||
width: number = 112,
|
||||
height: number = 112,
|
||||
): number[][] {
|
||||
const startIndex = faceNumber * width * height * 3;
|
||||
return Array.from({ length: height }, (_, y) =>
|
||||
Array.from({ length: width }, (_, x) => {
|
||||
// 0.299 ∙ Red + 0.587 ∙ Green + 0.114 ∙ Blue
|
||||
const pixelIndex = startIndex + 3 * (y * width + x);
|
||||
return clamp(
|
||||
Math.round(
|
||||
0.299 *
|
||||
unnormalizePixelFromBetweenMinus1And1(
|
||||
imageList[pixelIndex],
|
||||
) +
|
||||
0.587 *
|
||||
unnormalizePixelFromBetweenMinus1And1(
|
||||
imageList[pixelIndex + 1],
|
||||
) +
|
||||
0.114 *
|
||||
unnormalizePixelFromBetweenMinus1And1(
|
||||
imageList[pixelIndex + 2],
|
||||
),
|
||||
),
|
||||
0,
|
||||
255,
|
||||
);
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
export function resizeToSquare(img: ImageBitmap, size: number) {
|
||||
const scale = size / Math.max(img.height, img.width);
|
||||
const width = scale * img.width;
|
||||
const height = scale * img.height;
|
||||
const offscreen = new OffscreenCanvas(size, size);
|
||||
const ctx = offscreen.getContext("2d");
|
||||
ctx.imageSmoothingQuality = "high";
|
||||
ctx.drawImage(img, 0, 0, width, height);
|
||||
const resizedImage = offscreen.transferToImageBitmap();
|
||||
return { image: resizedImage, width, height };
|
||||
}
|
||||
|
||||
export function transform(
|
||||
imageBitmap: ImageBitmap,
|
||||
affineMat: number[][],
|
||||
outputWidth: number,
|
||||
outputHeight: number,
|
||||
) {
|
||||
const offscreen = new OffscreenCanvas(outputWidth, outputHeight);
|
||||
const context = offscreen.getContext("2d");
|
||||
context.imageSmoothingQuality = "high";
|
||||
|
||||
context.transform(
|
||||
affineMat[0][0],
|
||||
affineMat[1][0],
|
||||
affineMat[0][1],
|
||||
affineMat[1][1],
|
||||
affineMat[0][2],
|
||||
affineMat[1][2],
|
||||
);
|
||||
|
||||
context.drawImage(imageBitmap, 0, 0);
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
||||
export function crop(imageBitmap: ImageBitmap, cropBox: Box, size: number) {
|
||||
const dimensions: Dimensions = {
|
||||
width: size,
|
||||
height: size,
|
||||
};
|
||||
|
||||
return cropWithRotation(imageBitmap, cropBox, 0, dimensions, dimensions);
|
||||
}
|
||||
|
||||
// these utils only work in env where OffscreenCanvas is available
|
||||
|
||||
export function cropWithRotation(
|
||||
imageBitmap: ImageBitmap,
|
||||
cropBox: Box,
|
||||
rotation?: number,
|
||||
maxSize?: Dimensions,
|
||||
minSize?: Dimensions,
|
||||
) {
|
||||
const box = cropBox.round();
|
||||
|
||||
const outputSize = { width: box.width, height: box.height };
|
||||
if (maxSize) {
|
||||
const minScale = Math.min(
|
||||
maxSize.width / box.width,
|
||||
maxSize.height / box.height,
|
||||
);
|
||||
if (minScale < 1) {
|
||||
outputSize.width = Math.round(minScale * box.width);
|
||||
outputSize.height = Math.round(minScale * box.height);
|
||||
}
|
||||
}
|
||||
|
||||
if (minSize) {
|
||||
const maxScale = Math.max(
|
||||
minSize.width / box.width,
|
||||
minSize.height / box.height,
|
||||
);
|
||||
if (maxScale > 1) {
|
||||
outputSize.width = Math.round(maxScale * box.width);
|
||||
outputSize.height = Math.round(maxScale * box.height);
|
||||
}
|
||||
}
|
||||
|
||||
// log.info({ imageBitmap, box, outputSize });
|
||||
|
||||
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
|
||||
const offscreenCtx = offscreen.getContext("2d");
|
||||
offscreenCtx.imageSmoothingQuality = "high";
|
||||
|
||||
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
|
||||
rotation && offscreenCtx.rotate(rotation);
|
||||
|
||||
const outputBox = new Box({
|
||||
x: -outputSize.width / 2,
|
||||
y: -outputSize.height / 2,
|
||||
width: outputSize.width,
|
||||
height: outputSize.height,
|
||||
});
|
||||
|
||||
const enlargedBox = enlargeBox(box, 1.5);
|
||||
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
|
||||
|
||||
offscreenCtx.drawImage(
|
||||
imageBitmap,
|
||||
enlargedBox.x,
|
||||
enlargedBox.y,
|
||||
enlargedBox.width,
|
||||
enlargedBox.height,
|
||||
enlargedOutputBox.x,
|
||||
enlargedOutputBox.y,
|
||||
enlargedOutputBox.width,
|
||||
enlargedOutputBox.height,
|
||||
);
|
||||
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
||||
export function addPadding(image: ImageBitmap, padding: number) {
|
||||
const scale = 1 + padding * 2;
|
||||
const width = scale * image.width;
|
||||
const height = scale * image.height;
|
||||
const offscreen = new OffscreenCanvas(width, height);
|
||||
const ctx = offscreen.getContext("2d");
|
||||
ctx.imageSmoothingEnabled = false;
|
||||
ctx.drawImage(
|
||||
image,
|
||||
width / 2 - image.width / 2,
|
||||
height / 2 - image.height / 2,
|
||||
image.width,
|
||||
image.height,
|
||||
);
|
||||
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
||||
export interface BlobOptions {
|
||||
type?: string;
|
||||
quality?: number;
|
||||
}
|
||||
|
||||
export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
|
||||
const offscreen = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
|
||||
|
||||
return offscreen.convertToBlob({
|
||||
type: "image/jpeg",
|
||||
quality: 0.8,
|
||||
});
|
||||
}
|
||||
|
||||
export async function imageBitmapFromBlob(blob: Blob) {
|
||||
return createImageBitmap(blob);
|
||||
}
|
||||
|
|
|
@ -1,37 +1,53 @@
|
|||
import log from "@/next/log";
|
||||
import mlIDbStorage from "services/face/db";
|
||||
import { Face, Person } from "services/face/types";
|
||||
import { type MLSyncContext } from "services/machineLearning/machineLearningService";
|
||||
import { Person } from "services/face/types";
|
||||
import { clusterFaces } from "./cluster";
|
||||
import { saveFaceCrop } from "./f-index";
|
||||
import { fetchImageBitmap, getLocalFile } from "./image";
|
||||
|
||||
export const syncPeopleIndex = async (syncContext: MLSyncContext) => {
|
||||
export const syncPeopleIndex = async () => {
|
||||
// TODO-ML(MR): Forced disable clustering. It doesn't currently work,
|
||||
// need to finalize it before we move out of beta.
|
||||
//
|
||||
// > Error: Failed to execute 'transferToImageBitmap' on
|
||||
// > 'OffscreenCanvas': ImageBitmap construction failed
|
||||
/*
|
||||
if (
|
||||
syncContext.outOfSyncFiles.length <= 0 ||
|
||||
(syncContext.nSyncedFiles === batchSize && Math.random() < 0)
|
||||
) {
|
||||
await this.syncIndex(syncContext);
|
||||
}
|
||||
|
||||
public async syncIndex(syncContext: MLSyncContext) {
|
||||
await this.getMLLibraryData(syncContext);
|
||||
|
||||
// TODO-ML(MR): Ensure this doesn't run until fixed.
|
||||
await syncPeopleIndex(syncContext);
|
||||
|
||||
await this.persistMLLibraryData(syncContext);
|
||||
}
|
||||
|
||||
const filesVersion = await mlIDbStorage.getIndexVersion("files");
|
||||
if (filesVersion <= (await mlIDbStorage.getIndexVersion("people"))) {
|
||||
return;
|
||||
}
|
||||
*/
|
||||
|
||||
// TODO: have faces addresable through fileId + faceId
|
||||
// to avoid index based addressing, which is prone to wrong results
|
||||
// one way could be to match nearest face within threshold in the file
|
||||
/*
|
||||
const allFacesMap =
|
||||
syncContext.allSyncedFacesMap ??
|
||||
(syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap());
|
||||
const allFaces = [...allFacesMap.values()].flat();
|
||||
*/
|
||||
|
||||
await runFaceClustering(syncContext, allFaces);
|
||||
await syncPeopleFromClusters(syncContext, allFacesMap, allFaces);
|
||||
|
||||
await mlIDbStorage.setIndexVersion("people", filesVersion);
|
||||
};
|
||||
|
||||
const runFaceClustering = async (
|
||||
syncContext: MLSyncContext,
|
||||
allFaces: Array<Face>,
|
||||
) => {
|
||||
// await this.init();
|
||||
|
||||
const allFacesMap = await mlIDbStorage.getAllFacesMap();
|
||||
const allFaces = [...allFacesMap.values()].flat();
|
||||
|
||||
if (!allFaces || allFaces.length < 50) {
|
||||
log.info(
|
||||
`Skipping clustering since number of faces (${allFaces.length}) is less than the clustering threshold (50)`,
|
||||
|
@ -40,34 +56,15 @@ const runFaceClustering = async (
|
|||
}
|
||||
|
||||
log.info("Running clustering allFaces: ", allFaces.length);
|
||||
syncContext.mlLibraryData.faceClusteringResults = await clusterFaces(
|
||||
const faceClusteringResults = await clusterFaces(
|
||||
allFaces.map((f) => Array.from(f.embedding)),
|
||||
);
|
||||
syncContext.mlLibraryData.faceClusteringMethod = {
|
||||
value: "Hdbscan",
|
||||
version: 1,
|
||||
};
|
||||
log.info(
|
||||
"[MLService] Got face clustering results: ",
|
||||
JSON.stringify(syncContext.mlLibraryData.faceClusteringResults),
|
||||
JSON.stringify(faceClusteringResults),
|
||||
);
|
||||
|
||||
// syncContext.faceClustersWithNoise = {
|
||||
// clusters: syncContext.faceClusteringResults.clusters.map(
|
||||
// (faces) => ({
|
||||
// faces,
|
||||
// })
|
||||
// ),
|
||||
// noise: syncContext.faceClusteringResults.noise,
|
||||
// };
|
||||
};
|
||||
|
||||
const syncPeopleFromClusters = async (
|
||||
syncContext: MLSyncContext,
|
||||
allFacesMap: Map<number, Array<Face>>,
|
||||
allFaces: Array<Face>,
|
||||
) => {
|
||||
const clusters = syncContext.mlLibraryData.faceClusteringResults?.clusters;
|
||||
const clusters = faceClusteringResults?.clusters;
|
||||
if (!clusters || clusters.length < 1) {
|
||||
return;
|
||||
}
|
||||
|
@ -108,4 +105,6 @@ const syncPeopleFromClusters = async (
|
|||
}
|
||||
|
||||
await mlIDbStorage.updateFaces(allFacesMap);
|
||||
|
||||
// await mlIDbStorage.setIndexVersion("people", filesVersion);
|
||||
};
|
||||
|
|
64
web/apps/photos/src/services/face/transform-box.ts
Normal file
64
web/apps/photos/src/services/face/transform-box.ts
Normal file
|
@ -0,0 +1,64 @@
|
|||
import { Box, Point, boxFromBoundingBox } from "services/face/geom";
|
||||
import { FaceDetection } from "services/face/types";
|
||||
// TODO-ML(MR): Do we need two separate Matrix libraries?
|
||||
//
|
||||
// Keeping this in a separate file so that we can audit this. If these can be
|
||||
// expressed using ml-matrix, then we can move the code to f-index.
|
||||
import {
|
||||
Matrix,
|
||||
applyToPoint,
|
||||
compose,
|
||||
scale,
|
||||
translate,
|
||||
} from "transformation-matrix";
|
||||
|
||||
/**
|
||||
* Detect faces in the given {@link imageBitmap}.
|
||||
*
|
||||
* The model used is YOLO, running in an ONNX runtime.
|
||||
*/
|
||||
export const transformFaceDetections = (
|
||||
faces: FaceDetection[],
|
||||
inBox: Box,
|
||||
toBox: Box,
|
||||
): FaceDetection[] => {
|
||||
const transform = computeTransformToBox(inBox, toBox);
|
||||
return faces.map((f) => {
|
||||
const box = transformBox(f.box, transform);
|
||||
const normLandmarks = f.landmarks;
|
||||
const landmarks = transformPoints(normLandmarks, transform);
|
||||
return {
|
||||
box,
|
||||
landmarks,
|
||||
probability: f.probability as number,
|
||||
} as FaceDetection;
|
||||
});
|
||||
};
|
||||
|
||||
function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
|
||||
return compose(
|
||||
translate(toBox.x, toBox.y),
|
||||
scale(toBox.width / inBox.width, toBox.height / inBox.height),
|
||||
);
|
||||
}
|
||||
|
||||
function transformPoint(point: Point, transform: Matrix) {
|
||||
const txdPoint = applyToPoint(transform, point);
|
||||
return new Point(txdPoint.x, txdPoint.y);
|
||||
}
|
||||
|
||||
function transformPoints(points: Point[], transform: Matrix) {
|
||||
return points?.map((p) => transformPoint(p, transform));
|
||||
}
|
||||
|
||||
function transformBox(box: Box, transform: Matrix) {
|
||||
const topLeft = transformPoint(box.topLeft, transform);
|
||||
const bottomRight = transformPoint(box.bottomRight, transform);
|
||||
|
||||
return boxFromBoundingBox({
|
||||
left: topLeft.x,
|
||||
top: topLeft.y,
|
||||
right: bottomRight.x,
|
||||
bottom: bottomRight.y,
|
||||
});
|
||||
}
|
|
@ -1,62 +1,10 @@
|
|||
import type { ClusterFacesResult } from "services/face/cluster";
|
||||
import { Dimensions } from "services/face/geom";
|
||||
import { Box, Dimensions, Point } from "services/face/geom";
|
||||
import { EnteFile } from "types/file";
|
||||
import { Box, Point } from "./geom";
|
||||
|
||||
export interface MLSyncResult {
|
||||
nOutOfSyncFiles: number;
|
||||
nSyncedFiles: number;
|
||||
nSyncedFaces: number;
|
||||
nFaceClusters: number;
|
||||
nFaceNoise: number;
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
export declare type FaceDescriptor = Float32Array;
|
||||
|
||||
export declare type Cluster = Array<number>;
|
||||
|
||||
export interface FacesCluster {
|
||||
faces: Cluster;
|
||||
summary?: FaceDescriptor;
|
||||
}
|
||||
|
||||
export interface FacesClustersWithNoise {
|
||||
clusters: Array<FacesCluster>;
|
||||
noise: Cluster;
|
||||
}
|
||||
|
||||
export interface NearestCluster {
|
||||
cluster: FacesCluster;
|
||||
distance: number;
|
||||
}
|
||||
|
||||
export declare type Landmark = Point;
|
||||
|
||||
export declare type ImageType = "Original" | "Preview";
|
||||
|
||||
export declare type FaceDetectionMethod = "YoloFace";
|
||||
|
||||
export declare type FaceCropMethod = "ArcFace";
|
||||
|
||||
export declare type FaceAlignmentMethod = "ArcFace";
|
||||
|
||||
export declare type FaceEmbeddingMethod = "MobileFaceNet";
|
||||
|
||||
export declare type BlurDetectionMethod = "Laplacian";
|
||||
|
||||
export declare type ClusteringMethod = "Hdbscan" | "Dbscan";
|
||||
|
||||
export class AlignedBox {
|
||||
box: Box;
|
||||
rotation: number;
|
||||
}
|
||||
|
||||
export interface Versioned<T> {
|
||||
value: T;
|
||||
version: number;
|
||||
}
|
||||
|
||||
export interface FaceDetection {
|
||||
// box and landmarks is relative to image dimentions stored at mlFileData
|
||||
box: Box;
|
||||
|
@ -124,15 +72,9 @@ export interface Person {
|
|||
export interface MlFileData {
|
||||
fileId: number;
|
||||
faces?: Face[];
|
||||
imageSource?: ImageType;
|
||||
imageDimensions?: Dimensions;
|
||||
faceDetectionMethod?: Versioned<FaceDetectionMethod>;
|
||||
faceCropMethod?: Versioned<FaceCropMethod>;
|
||||
faceAlignmentMethod?: Versioned<FaceAlignmentMethod>;
|
||||
faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
|
||||
mlVersion: number;
|
||||
errorCount: number;
|
||||
lastErrorMessage?: string;
|
||||
}
|
||||
|
||||
export interface MLSearchConfig {
|
||||
|
@ -152,10 +94,4 @@ export interface MLSyncFileContext {
|
|||
newAlignment?: boolean;
|
||||
}
|
||||
|
||||
export interface MLLibraryData {
|
||||
faceClusteringMethod?: Versioned<ClusteringMethod>;
|
||||
faceClusteringResults?: ClusterFacesResult;
|
||||
faceClustersWithNoise?: FacesClustersWithNoise;
|
||||
}
|
||||
|
||||
export declare type MLIndex = "files" | "people";
|
||||
|
|
|
@ -9,22 +9,18 @@ import { CustomError, parseUploadErrorCodes } from "@ente/shared/error";
|
|||
import PQueue from "p-queue";
|
||||
import { putEmbedding } from "services/embeddingService";
|
||||
import mlIDbStorage, { ML_SEARCH_CONFIG_NAME } from "services/face/db";
|
||||
import { fetchImageBitmap, getLocalFile } from "services/face/image";
|
||||
import {
|
||||
Face,
|
||||
FaceDetection,
|
||||
Landmark,
|
||||
MLLibraryData,
|
||||
MLSearchConfig,
|
||||
MLSyncFileContext,
|
||||
MLSyncResult,
|
||||
MlFileData,
|
||||
} from "services/face/types";
|
||||
import { getLocalFiles } from "services/fileService";
|
||||
import { EnteFile } from "types/file";
|
||||
import { isInternalUserForML } from "utils/user";
|
||||
import { regenerateFaceCrop, syncFileAnalyzeFaces } from "../face/f-index";
|
||||
import { fetchImageBitmapForContext } from "../face/image";
|
||||
import { syncPeopleIndex } from "../face/people";
|
||||
import { indexFaces, saveFaceCrop } from "../face/f-index";
|
||||
|
||||
/**
|
||||
* TODO-ML(MR): What and why.
|
||||
|
@ -56,41 +52,16 @@ export async function updateMLSearchConfig(newConfig: MLSearchConfig) {
|
|||
return mlIDbStorage.putConfig(ML_SEARCH_CONFIG_NAME, newConfig);
|
||||
}
|
||||
|
||||
export interface MLSyncContext {
|
||||
token: string;
|
||||
userID: number;
|
||||
|
||||
localFilesMap: Map<number, EnteFile>;
|
||||
outOfSyncFiles: EnteFile[];
|
||||
nSyncedFiles: number;
|
||||
nSyncedFaces: number;
|
||||
allSyncedFacesMap?: Map<number, Array<Face>>;
|
||||
|
||||
error?: Error;
|
||||
|
||||
// oldMLLibraryData: MLLibraryData;
|
||||
mlLibraryData: MLLibraryData;
|
||||
|
||||
syncQueue: PQueue;
|
||||
|
||||
getEnteWorker(id: number): Promise<any>;
|
||||
dispose(): Promise<void>;
|
||||
}
|
||||
|
||||
export class LocalMLSyncContext implements MLSyncContext {
|
||||
class MLSyncContext {
|
||||
public token: string;
|
||||
public userID: number;
|
||||
|
||||
public localFilesMap: Map<number, EnteFile>;
|
||||
public outOfSyncFiles: EnteFile[];
|
||||
public nSyncedFiles: number;
|
||||
public nSyncedFaces: number;
|
||||
public allSyncedFacesMap?: Map<number, Array<Face>>;
|
||||
|
||||
public error?: Error;
|
||||
|
||||
public mlLibraryData: MLLibraryData;
|
||||
|
||||
public syncQueue: PQueue;
|
||||
// TODO: wheather to limit concurrent downloads
|
||||
// private downloadQueue: PQueue;
|
||||
|
@ -107,7 +78,6 @@ export class LocalMLSyncContext implements MLSyncContext {
|
|||
|
||||
this.outOfSyncFiles = [];
|
||||
this.nSyncedFiles = 0;
|
||||
this.nSyncedFaces = 0;
|
||||
|
||||
this.concurrency = concurrency ?? getConcurrency();
|
||||
|
||||
|
@ -151,7 +121,7 @@ class MachineLearningService {
|
|||
private localSyncContext: Promise<MLSyncContext>;
|
||||
private syncContext: Promise<MLSyncContext>;
|
||||
|
||||
public async sync(token: string, userID: number): Promise<MLSyncResult> {
|
||||
public async sync(token: string, userID: number): Promise<boolean> {
|
||||
if (!token) {
|
||||
throw Error("Token needed by ml service to sync file");
|
||||
}
|
||||
|
@ -166,34 +136,9 @@ class MachineLearningService {
|
|||
await this.syncFiles(syncContext);
|
||||
}
|
||||
|
||||
// TODO-ML(MR): Forced disable clustering. It doesn't currently work,
|
||||
// need to finalize it before we move out of beta.
|
||||
//
|
||||
// > Error: Failed to execute 'transferToImageBitmap' on
|
||||
// > 'OffscreenCanvas': ImageBitmap construction failed
|
||||
/*
|
||||
if (
|
||||
syncContext.outOfSyncFiles.length <= 0 ||
|
||||
(syncContext.nSyncedFiles === batchSize && Math.random() < 0)
|
||||
) {
|
||||
await this.syncIndex(syncContext);
|
||||
}
|
||||
*/
|
||||
|
||||
const mlSyncResult: MLSyncResult = {
|
||||
nOutOfSyncFiles: syncContext.outOfSyncFiles.length,
|
||||
nSyncedFiles: syncContext.nSyncedFiles,
|
||||
nSyncedFaces: syncContext.nSyncedFaces,
|
||||
nFaceClusters:
|
||||
syncContext.mlLibraryData?.faceClusteringResults?.clusters
|
||||
.length,
|
||||
nFaceNoise:
|
||||
syncContext.mlLibraryData?.faceClusteringResults?.noise.length,
|
||||
error: syncContext.error,
|
||||
};
|
||||
// log.info('[MLService] sync results: ', mlSyncResult);
|
||||
|
||||
return mlSyncResult;
|
||||
const error = syncContext.error;
|
||||
const nOutOfSyncFiles = syncContext.outOfSyncFiles.length;
|
||||
return !error && nOutOfSyncFiles > 0;
|
||||
}
|
||||
|
||||
public async regenerateFaceCrop(faceID: string) {
|
||||
|
@ -309,7 +254,6 @@ class MachineLearningService {
|
|||
syncContext.error = error;
|
||||
}
|
||||
await syncContext.syncQueue.onIdle();
|
||||
log.info("allFaces: ", syncContext.nSyncedFaces);
|
||||
|
||||
// TODO: In case syncJob has to use multiple ml workers
|
||||
// do in same transaction with each file update
|
||||
|
@ -324,7 +268,7 @@ class MachineLearningService {
|
|||
|
||||
// TODO-ML(MR): Keep as promise for now.
|
||||
this.syncContext = new Promise((resolve) => {
|
||||
resolve(new LocalMLSyncContext(token, userID));
|
||||
resolve(new MLSyncContext(token, userID));
|
||||
});
|
||||
} else {
|
||||
log.info("reusing existing syncContext");
|
||||
|
@ -338,7 +282,7 @@ class MachineLearningService {
|
|||
log.info("Creating localSyncContext");
|
||||
// TODO-ML(MR):
|
||||
this.localSyncContext = new Promise((resolve) => {
|
||||
resolve(new LocalMLSyncContext(token, userID));
|
||||
resolve(new MLSyncContext(token, userID));
|
||||
});
|
||||
} else {
|
||||
log.info("reusing existing localSyncContext");
|
||||
|
@ -389,7 +333,6 @@ class MachineLearningService {
|
|||
`Indexing ${enteFile.title ?? "<untitled>"} ${enteFile.id}`,
|
||||
);
|
||||
const mlFileData = await this.syncFile(enteFile, localFile);
|
||||
syncContext.nSyncedFaces += mlFileData.faces?.length || 0;
|
||||
syncContext.nSyncedFiles += 1;
|
||||
return mlFileData;
|
||||
} catch (e) {
|
||||
|
@ -422,31 +365,14 @@ class MachineLearningService {
|
|||
}
|
||||
|
||||
private async syncFile(enteFile: EnteFile, localFile?: globalThis.File) {
|
||||
log.debug(() => ({ a: "Syncing file", enteFile }));
|
||||
const fileContext: MLSyncFileContext = { enteFile, localFile };
|
||||
const oldMlFile = await this.getMLFileData(enteFile.id);
|
||||
if (oldMlFile && oldMlFile.mlVersion) {
|
||||
return oldMlFile;
|
||||
}
|
||||
|
||||
const newMlFile = (fileContext.newMlFile = this.newMlData(enteFile.id));
|
||||
newMlFile.mlVersion = defaultMLVersion;
|
||||
|
||||
try {
|
||||
await fetchImageBitmapForContext(fileContext);
|
||||
await syncFileAnalyzeFaces(fileContext);
|
||||
newMlFile.errorCount = 0;
|
||||
newMlFile.lastErrorMessage = undefined;
|
||||
await this.persistOnServer(newMlFile, enteFile);
|
||||
await mlIDbStorage.putFile(newMlFile);
|
||||
} catch (e) {
|
||||
log.error("ml detection failed", e);
|
||||
newMlFile.mlVersion = oldMlFile.mlVersion;
|
||||
throw e;
|
||||
} finally {
|
||||
fileContext.imageBitmap && fileContext.imageBitmap.close();
|
||||
}
|
||||
|
||||
const newMlFile = await indexFaces(enteFile, localFile);
|
||||
await this.persistOnServer(newMlFile, enteFile);
|
||||
await mlIDbStorage.putFile(newMlFile);
|
||||
return newMlFile;
|
||||
}
|
||||
|
||||
|
@ -484,7 +410,7 @@ class MachineLearningService {
|
|||
mlFileData = this.newMlData(enteFile.id);
|
||||
}
|
||||
mlFileData.errorCount = (mlFileData.errorCount || 0) + 1;
|
||||
mlFileData.lastErrorMessage = e.message;
|
||||
console.error(`lastError for ${enteFile.id}`, e);
|
||||
|
||||
return mlFileData;
|
||||
});
|
||||
|
@ -493,26 +419,6 @@ class MachineLearningService {
|
|||
console.error("Error while storing ml sync error", e);
|
||||
}
|
||||
}
|
||||
|
||||
private async getMLLibraryData(syncContext: MLSyncContext) {
|
||||
syncContext.mlLibraryData = await mlIDbStorage.getLibraryData();
|
||||
if (!syncContext.mlLibraryData) {
|
||||
syncContext.mlLibraryData = {};
|
||||
}
|
||||
}
|
||||
|
||||
private async persistMLLibraryData(syncContext: MLSyncContext) {
|
||||
return mlIDbStorage.putLibraryData(syncContext.mlLibraryData);
|
||||
}
|
||||
|
||||
public async syncIndex(syncContext: MLSyncContext) {
|
||||
await this.getMLLibraryData(syncContext);
|
||||
|
||||
// TODO-ML(MR): Ensure this doesn't run until fixed.
|
||||
await syncPeopleIndex(syncContext);
|
||||
|
||||
await this.persistMLLibraryData(syncContext);
|
||||
}
|
||||
}
|
||||
|
||||
export default new MachineLearningService();
|
||||
|
@ -543,19 +449,14 @@ class ServerFileMl {
|
|||
class ServerFaceEmbeddings {
|
||||
public faces: ServerFace[];
|
||||
public version: number;
|
||||
/* TODO
|
||||
public client?: string;
|
||||
public error?: boolean;
|
||||
*/
|
||||
|
||||
public constructor(
|
||||
faces: ServerFace[],
|
||||
version: number,
|
||||
client?: string,
|
||||
error?: boolean,
|
||||
) {
|
||||
public constructor(faces: ServerFace[], version: number) {
|
||||
this.faces = faces;
|
||||
this.version = version;
|
||||
this.client = client;
|
||||
this.error = error;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -613,10 +514,7 @@ class ServerFaceBox {
|
|||
function LocalFileMlDataToServerFileMl(
|
||||
localFileMlData: MlFileData,
|
||||
): ServerFileMl {
|
||||
if (
|
||||
localFileMlData.errorCount > 0 &&
|
||||
localFileMlData.lastErrorMessage !== undefined
|
||||
) {
|
||||
if (localFileMlData.errorCount > 0) {
|
||||
return null;
|
||||
}
|
||||
const imageDimensions = localFileMlData.imageDimensions;
|
||||
|
@ -640,6 +538,7 @@ function LocalFileMlDataToServerFileMl(
|
|||
} as Landmark);
|
||||
}
|
||||
|
||||
// TODO: Add client UA and version
|
||||
const newFaceObject = new ServerFace(
|
||||
faceID,
|
||||
Array.from(embedding),
|
||||
|
@ -649,11 +548,7 @@ function LocalFileMlDataToServerFileMl(
|
|||
);
|
||||
faces.push(newFaceObject);
|
||||
}
|
||||
const faceEmbeddings = new ServerFaceEmbeddings(
|
||||
faces,
|
||||
1,
|
||||
localFileMlData.lastErrorMessage,
|
||||
);
|
||||
const faceEmbeddings = new ServerFaceEmbeddings(faces, 1);
|
||||
return new ServerFileMl(
|
||||
localFileMlData.fileId,
|
||||
faceEmbeddings,
|
||||
|
@ -673,3 +568,15 @@ export function logQueueStats(queue: PQueue, name: string) {
|
|||
console.error(`queuestats: ${name}: Error, `, error),
|
||||
);
|
||||
}
|
||||
|
||||
export const regenerateFaceCrop = async (faceID: string) => {
|
||||
const fileID = Number(faceID.split("-")[0]);
|
||||
const personFace = await mlIDbStorage.getFace(fileID, faceID);
|
||||
if (!personFace) {
|
||||
throw Error("Face not found");
|
||||
}
|
||||
|
||||
const file = await getLocalFile(personFace.fileId);
|
||||
const imageBitmap = await fetchImageBitmap(file);
|
||||
return await saveFaceCrop(imageBitmap, personFace);
|
||||
};
|
||||
|
|
|
@ -8,25 +8,19 @@ import PQueue from "p-queue";
|
|||
import { createFaceComlinkWorker } from "services/face";
|
||||
import mlIDbStorage from "services/face/db";
|
||||
import type { DedicatedMLWorker } from "services/face/face.worker";
|
||||
import { MLSyncResult } from "services/face/types";
|
||||
import { EnteFile } from "types/file";
|
||||
import { logQueueStats } from "./machineLearningService";
|
||||
|
||||
export type JobState = "Scheduled" | "Running" | "NotScheduled";
|
||||
|
||||
export interface MLSyncJobResult {
|
||||
shouldBackoff: boolean;
|
||||
mlSyncResult: MLSyncResult;
|
||||
}
|
||||
|
||||
export class MLSyncJob {
|
||||
private runCallback: () => Promise<MLSyncJobResult>;
|
||||
private runCallback: () => Promise<boolean>;
|
||||
private state: JobState;
|
||||
private stopped: boolean;
|
||||
private intervalSec: number;
|
||||
private nextTimeoutId: ReturnType<typeof setTimeout>;
|
||||
|
||||
constructor(runCallback: () => Promise<MLSyncJobResult>) {
|
||||
constructor(runCallback: () => Promise<boolean>) {
|
||||
this.runCallback = runCallback;
|
||||
this.state = "NotScheduled";
|
||||
this.stopped = true;
|
||||
|
@ -65,13 +59,11 @@ export class MLSyncJob {
|
|||
this.state = "Running";
|
||||
|
||||
try {
|
||||
const jobResult = await this.runCallback();
|
||||
if (jobResult && jobResult.shouldBackoff) {
|
||||
this.intervalSec = Math.min(960, this.intervalSec * 2);
|
||||
} else {
|
||||
if (await this.runCallback()) {
|
||||
this.resetInterval();
|
||||
} else {
|
||||
this.intervalSec = Math.min(960, this.intervalSec * 2);
|
||||
}
|
||||
log.info("Job completed");
|
||||
} catch (e) {
|
||||
console.error("Error while running Job: ", e);
|
||||
} finally {
|
||||
|
@ -255,7 +247,14 @@ class MLWorkManager {
|
|||
this.syncJobWorker = undefined;
|
||||
}
|
||||
|
||||
private async runMLSyncJob(): Promise<MLSyncJobResult> {
|
||||
/**
|
||||
* Returns `false` to indicate that either an error occurred, or there are
|
||||
* not more files to process, or that we cannot currently process files.
|
||||
*
|
||||
* Which means that when it returns true, all is well and there are more
|
||||
* things pending to process, so we should chug along at full speed.
|
||||
*/
|
||||
private async runMLSyncJob(): Promise<boolean> {
|
||||
try {
|
||||
// TODO: skipping is not required if we are caching chunks through service worker
|
||||
// currently worker chunk itself is not loaded when network is not there
|
||||
|
@ -263,29 +262,16 @@ class MLWorkManager {
|
|||
log.info(
|
||||
"Skipping ml-sync job run as not connected to internet.",
|
||||
);
|
||||
return {
|
||||
shouldBackoff: true,
|
||||
mlSyncResult: undefined,
|
||||
};
|
||||
return false;
|
||||
}
|
||||
|
||||
const token = getToken();
|
||||
const userID = getUserID();
|
||||
const jobWorkerProxy = await this.getSyncJobWorker();
|
||||
|
||||
const mlSyncResult = await jobWorkerProxy.sync(token, userID);
|
||||
|
||||
return await jobWorkerProxy.sync(token, userID);
|
||||
// this.terminateSyncJobWorker();
|
||||
const jobResult: MLSyncJobResult = {
|
||||
shouldBackoff:
|
||||
!!mlSyncResult.error || mlSyncResult.nOutOfSyncFiles < 1,
|
||||
mlSyncResult,
|
||||
};
|
||||
log.info("ML Sync Job result: ", JSON.stringify(jobResult));
|
||||
|
||||
// TODO: redirect/refresh to gallery in case of session_expired, stop ml sync job
|
||||
|
||||
return jobResult;
|
||||
} catch (e) {
|
||||
log.error("Failed to run MLSync Job", e);
|
||||
}
|
||||
|
|
|
@ -1,468 +0,0 @@
|
|||
// these utils only work in env where OffscreenCanvas is available
|
||||
|
||||
import { Matrix, inverse } from "ml-matrix";
|
||||
import { Box, Dimensions, enlargeBox } from "services/face/geom";
|
||||
import { FaceAlignment } from "services/face/types";
|
||||
|
||||
export function normalizePixelBetween0And1(pixelValue: number) {
|
||||
return pixelValue / 255.0;
|
||||
}
|
||||
|
||||
export function normalizePixelBetweenMinus1And1(pixelValue: number) {
|
||||
return pixelValue / 127.5 - 1.0;
|
||||
}
|
||||
|
||||
export function unnormalizePixelFromBetweenMinus1And1(pixelValue: number) {
|
||||
return clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
|
||||
}
|
||||
|
||||
export function readPixelColor(
|
||||
imageData: Uint8ClampedArray,
|
||||
width: number,
|
||||
height: number,
|
||||
x: number,
|
||||
y: number,
|
||||
) {
|
||||
if (x < 0 || x >= width || y < 0 || y >= height) {
|
||||
return { r: 0, g: 0, b: 0, a: 0 };
|
||||
}
|
||||
const index = (y * width + x) * 4;
|
||||
return {
|
||||
r: imageData[index],
|
||||
g: imageData[index + 1],
|
||||
b: imageData[index + 2],
|
||||
a: imageData[index + 3],
|
||||
};
|
||||
}
|
||||
|
||||
export function clamp(value: number, min: number, max: number) {
|
||||
return Math.min(max, Math.max(min, value));
|
||||
}
|
||||
|
||||
export function getPixelBicubic(
|
||||
fx: number,
|
||||
fy: number,
|
||||
imageData: Uint8ClampedArray,
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
) {
|
||||
// Clamp to image boundaries
|
||||
fx = clamp(fx, 0, imageWidth - 1);
|
||||
fy = clamp(fy, 0, imageHeight - 1);
|
||||
|
||||
const x = Math.trunc(fx) - (fx >= 0.0 ? 0 : 1);
|
||||
const px = x - 1;
|
||||
const nx = x + 1;
|
||||
const ax = x + 2;
|
||||
const y = Math.trunc(fy) - (fy >= 0.0 ? 0 : 1);
|
||||
const py = y - 1;
|
||||
const ny = y + 1;
|
||||
const ay = y + 2;
|
||||
const dx = fx - x;
|
||||
const dy = fy - y;
|
||||
|
||||
function cubic(
|
||||
dx: number,
|
||||
ipp: number,
|
||||
icp: number,
|
||||
inp: number,
|
||||
iap: number,
|
||||
) {
|
||||
return (
|
||||
icp +
|
||||
0.5 *
|
||||
(dx * (-ipp + inp) +
|
||||
dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
|
||||
dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap))
|
||||
);
|
||||
}
|
||||
|
||||
const icc = readPixelColor(imageData, imageWidth, imageHeight, x, y);
|
||||
|
||||
const ipp =
|
||||
px < 0 || py < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, py);
|
||||
const icp =
|
||||
px < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, x, py);
|
||||
const inp =
|
||||
py < 0 || nx >= imageWidth
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, py);
|
||||
const iap =
|
||||
ax >= imageWidth || py < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, py);
|
||||
|
||||
const ip0 = cubic(dx, ipp.r, icp.r, inp.r, iap.r);
|
||||
const ip1 = cubic(dx, ipp.g, icp.g, inp.g, iap.g);
|
||||
const ip2 = cubic(dx, ipp.b, icp.b, inp.b, iap.b);
|
||||
// const ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);
|
||||
|
||||
const ipc =
|
||||
px < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, y);
|
||||
const inc =
|
||||
nx >= imageWidth
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, y);
|
||||
const iac =
|
||||
ax >= imageWidth
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, y);
|
||||
|
||||
const ic0 = cubic(dx, ipc.r, icc.r, inc.r, iac.r);
|
||||
const ic1 = cubic(dx, ipc.g, icc.g, inc.g, iac.g);
|
||||
const ic2 = cubic(dx, ipc.b, icc.b, inc.b, iac.b);
|
||||
// const ic3 = cubic(dx, ipc.a, icc.a, inc.a, iac.a);
|
||||
|
||||
const ipn =
|
||||
px < 0 || ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, ny);
|
||||
const icn =
|
||||
ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, x, ny);
|
||||
const inn =
|
||||
nx >= imageWidth || ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, ny);
|
||||
const ian =
|
||||
ax >= imageWidth || ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, ny);
|
||||
|
||||
const in0 = cubic(dx, ipn.r, icn.r, inn.r, ian.r);
|
||||
const in1 = cubic(dx, ipn.g, icn.g, inn.g, ian.g);
|
||||
const in2 = cubic(dx, ipn.b, icn.b, inn.b, ian.b);
|
||||
// const in3 = cubic(dx, ipn.a, icn.a, inn.a, ian.a);
|
||||
|
||||
const ipa =
|
||||
px < 0 || ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, ay);
|
||||
const ica =
|
||||
ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, x, ay);
|
||||
const ina =
|
||||
nx >= imageWidth || ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, ay);
|
||||
const iaa =
|
||||
ax >= imageWidth || ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, ay);
|
||||
|
||||
const ia0 = cubic(dx, ipa.r, ica.r, ina.r, iaa.r);
|
||||
const ia1 = cubic(dx, ipa.g, ica.g, ina.g, iaa.g);
|
||||
const ia2 = cubic(dx, ipa.b, ica.b, ina.b, iaa.b);
|
||||
// const ia3 = cubic(dx, ipa.a, ica.a, ina.a, iaa.a);
|
||||
|
||||
const c0 = Math.trunc(clamp(cubic(dy, ip0, ic0, in0, ia0), 0, 255));
|
||||
const c1 = Math.trunc(clamp(cubic(dy, ip1, ic1, in1, ia1), 0, 255));
|
||||
const c2 = Math.trunc(clamp(cubic(dy, ip2, ic2, in2, ia2), 0, 255));
|
||||
// const c3 = cubic(dy, ip3, ic3, in3, ia3);
|
||||
|
||||
return { r: c0, g: c1, b: c2 };
|
||||
}
|
||||
|
||||
/// Returns the pixel value (RGB) at the given coordinates using bilinear interpolation.
|
||||
export function getPixelBilinear(
|
||||
fx: number,
|
||||
fy: number,
|
||||
imageData: Uint8ClampedArray,
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
) {
|
||||
// Clamp to image boundaries
|
||||
fx = clamp(fx, 0, imageWidth - 1);
|
||||
fy = clamp(fy, 0, imageHeight - 1);
|
||||
|
||||
// Get the surrounding coordinates and their weights
|
||||
const x0 = Math.floor(fx);
|
||||
const x1 = Math.ceil(fx);
|
||||
const y0 = Math.floor(fy);
|
||||
const y1 = Math.ceil(fy);
|
||||
const dx = fx - x0;
|
||||
const dy = fy - y0;
|
||||
const dx1 = 1.0 - dx;
|
||||
const dy1 = 1.0 - dy;
|
||||
|
||||
// Get the original pixels
|
||||
const pixel1 = readPixelColor(imageData, imageWidth, imageHeight, x0, y0);
|
||||
const pixel2 = readPixelColor(imageData, imageWidth, imageHeight, x1, y0);
|
||||
const pixel3 = readPixelColor(imageData, imageWidth, imageHeight, x0, y1);
|
||||
const pixel4 = readPixelColor(imageData, imageWidth, imageHeight, x1, y1);
|
||||
|
||||
function bilinear(val1: number, val2: number, val3: number, val4: number) {
|
||||
return Math.round(
|
||||
val1 * dx1 * dy1 +
|
||||
val2 * dx * dy1 +
|
||||
val3 * dx1 * dy +
|
||||
val4 * dx * dy,
|
||||
);
|
||||
}
|
||||
|
||||
// Interpolate the pixel values
|
||||
const red = bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r);
|
||||
const green = bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g);
|
||||
const blue = bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b);
|
||||
|
||||
return { r: red, g: green, b: blue };
|
||||
}
|
||||
|
||||
export function warpAffineFloat32List(
|
||||
imageBitmap: ImageBitmap,
|
||||
faceAlignment: FaceAlignment,
|
||||
faceSize: number,
|
||||
inputData: Float32Array,
|
||||
inputStartIndex: number,
|
||||
): void {
|
||||
// Get the pixel data
|
||||
const offscreenCanvas = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const ctx = offscreenCanvas.getContext("2d");
|
||||
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const imageData = ctx.getImageData(
|
||||
0,
|
||||
0,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const pixelData = imageData.data;
|
||||
|
||||
const transformationMatrix = faceAlignment.affineMatrix.map((row) =>
|
||||
row.map((val) => (val != 1.0 ? val * faceSize : 1.0)),
|
||||
); // 3x3
|
||||
|
||||
const A: Matrix = new Matrix([
|
||||
[transformationMatrix[0][0], transformationMatrix[0][1]],
|
||||
[transformationMatrix[1][0], transformationMatrix[1][1]],
|
||||
]);
|
||||
const Ainverse = inverse(A);
|
||||
|
||||
const b00 = transformationMatrix[0][2];
|
||||
const b10 = transformationMatrix[1][2];
|
||||
const a00Prime = Ainverse.get(0, 0);
|
||||
const a01Prime = Ainverse.get(0, 1);
|
||||
const a10Prime = Ainverse.get(1, 0);
|
||||
const a11Prime = Ainverse.get(1, 1);
|
||||
|
||||
for (let yTrans = 0; yTrans < faceSize; ++yTrans) {
|
||||
for (let xTrans = 0; xTrans < faceSize; ++xTrans) {
|
||||
// Perform inverse affine transformation
|
||||
const xOrigin =
|
||||
a00Prime * (xTrans - b00) + a01Prime * (yTrans - b10);
|
||||
const yOrigin =
|
||||
a10Prime * (xTrans - b00) + a11Prime * (yTrans - b10);
|
||||
|
||||
// Get the pixel from interpolation
|
||||
const pixel = getPixelBicubic(
|
||||
xOrigin,
|
||||
yOrigin,
|
||||
pixelData,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
|
||||
// Set the pixel in the input data
|
||||
const index = (yTrans * faceSize + xTrans) * 3;
|
||||
inputData[inputStartIndex + index] =
|
||||
normalizePixelBetweenMinus1And1(pixel.r);
|
||||
inputData[inputStartIndex + index + 1] =
|
||||
normalizePixelBetweenMinus1And1(pixel.g);
|
||||
inputData[inputStartIndex + index + 2] =
|
||||
normalizePixelBetweenMinus1And1(pixel.b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function createGrayscaleIntMatrixFromNormalized2List(
|
||||
imageList: Float32Array,
|
||||
faceNumber: number,
|
||||
width: number = 112,
|
||||
height: number = 112,
|
||||
): number[][] {
|
||||
const startIndex = faceNumber * width * height * 3;
|
||||
return Array.from({ length: height }, (_, y) =>
|
||||
Array.from({ length: width }, (_, x) => {
|
||||
// 0.299 ∙ Red + 0.587 ∙ Green + 0.114 ∙ Blue
|
||||
const pixelIndex = startIndex + 3 * (y * width + x);
|
||||
return clamp(
|
||||
Math.round(
|
||||
0.299 *
|
||||
unnormalizePixelFromBetweenMinus1And1(
|
||||
imageList[pixelIndex],
|
||||
) +
|
||||
0.587 *
|
||||
unnormalizePixelFromBetweenMinus1And1(
|
||||
imageList[pixelIndex + 1],
|
||||
) +
|
||||
0.114 *
|
||||
unnormalizePixelFromBetweenMinus1And1(
|
||||
imageList[pixelIndex + 2],
|
||||
),
|
||||
),
|
||||
0,
|
||||
255,
|
||||
);
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
export function resizeToSquare(img: ImageBitmap, size: number) {
|
||||
const scale = size / Math.max(img.height, img.width);
|
||||
const width = scale * img.width;
|
||||
const height = scale * img.height;
|
||||
const offscreen = new OffscreenCanvas(size, size);
|
||||
const ctx = offscreen.getContext("2d");
|
||||
ctx.imageSmoothingQuality = "high";
|
||||
ctx.drawImage(img, 0, 0, width, height);
|
||||
const resizedImage = offscreen.transferToImageBitmap();
|
||||
return { image: resizedImage, width, height };
|
||||
}
|
||||
|
||||
export function transform(
|
||||
imageBitmap: ImageBitmap,
|
||||
affineMat: number[][],
|
||||
outputWidth: number,
|
||||
outputHeight: number,
|
||||
) {
|
||||
const offscreen = new OffscreenCanvas(outputWidth, outputHeight);
|
||||
const context = offscreen.getContext("2d");
|
||||
context.imageSmoothingQuality = "high";
|
||||
|
||||
context.transform(
|
||||
affineMat[0][0],
|
||||
affineMat[1][0],
|
||||
affineMat[0][1],
|
||||
affineMat[1][1],
|
||||
affineMat[0][2],
|
||||
affineMat[1][2],
|
||||
);
|
||||
|
||||
context.drawImage(imageBitmap, 0, 0);
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
||||
export function crop(imageBitmap: ImageBitmap, cropBox: Box, size: number) {
|
||||
const dimensions: Dimensions = {
|
||||
width: size,
|
||||
height: size,
|
||||
};
|
||||
|
||||
return cropWithRotation(imageBitmap, cropBox, 0, dimensions, dimensions);
|
||||
}
|
||||
|
||||
export function cropWithRotation(
|
||||
imageBitmap: ImageBitmap,
|
||||
cropBox: Box,
|
||||
rotation?: number,
|
||||
maxSize?: Dimensions,
|
||||
minSize?: Dimensions,
|
||||
) {
|
||||
const box = cropBox.round();
|
||||
|
||||
const outputSize = { width: box.width, height: box.height };
|
||||
if (maxSize) {
|
||||
const minScale = Math.min(
|
||||
maxSize.width / box.width,
|
||||
maxSize.height / box.height,
|
||||
);
|
||||
if (minScale < 1) {
|
||||
outputSize.width = Math.round(minScale * box.width);
|
||||
outputSize.height = Math.round(minScale * box.height);
|
||||
}
|
||||
}
|
||||
|
||||
if (minSize) {
|
||||
const maxScale = Math.max(
|
||||
minSize.width / box.width,
|
||||
minSize.height / box.height,
|
||||
);
|
||||
if (maxScale > 1) {
|
||||
outputSize.width = Math.round(maxScale * box.width);
|
||||
outputSize.height = Math.round(maxScale * box.height);
|
||||
}
|
||||
}
|
||||
|
||||
// log.info({ imageBitmap, box, outputSize });
|
||||
|
||||
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
|
||||
const offscreenCtx = offscreen.getContext("2d");
|
||||
offscreenCtx.imageSmoothingQuality = "high";
|
||||
|
||||
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
|
||||
rotation && offscreenCtx.rotate(rotation);
|
||||
|
||||
const outputBox = new Box({
|
||||
x: -outputSize.width / 2,
|
||||
y: -outputSize.height / 2,
|
||||
width: outputSize.width,
|
||||
height: outputSize.height,
|
||||
});
|
||||
|
||||
const enlargedBox = enlargeBox(box, 1.5);
|
||||
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
|
||||
|
||||
offscreenCtx.drawImage(
|
||||
imageBitmap,
|
||||
enlargedBox.x,
|
||||
enlargedBox.y,
|
||||
enlargedBox.width,
|
||||
enlargedBox.height,
|
||||
enlargedOutputBox.x,
|
||||
enlargedOutputBox.y,
|
||||
enlargedOutputBox.width,
|
||||
enlargedOutputBox.height,
|
||||
);
|
||||
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
||||
export function addPadding(image: ImageBitmap, padding: number) {
|
||||
const scale = 1 + padding * 2;
|
||||
const width = scale * image.width;
|
||||
const height = scale * image.height;
|
||||
const offscreen = new OffscreenCanvas(width, height);
|
||||
const ctx = offscreen.getContext("2d");
|
||||
ctx.imageSmoothingEnabled = false;
|
||||
ctx.drawImage(
|
||||
image,
|
||||
width / 2 - image.width / 2,
|
||||
height / 2 - image.height / 2,
|
||||
image.width,
|
||||
image.height,
|
||||
);
|
||||
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
||||
export interface BlobOptions {
|
||||
type?: string;
|
||||
quality?: number;
|
||||
}
|
||||
|
||||
export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
|
||||
const offscreen = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
|
||||
|
||||
return offscreen.convertToBlob({
|
||||
type: "image/jpeg",
|
||||
quality: 0.8,
|
||||
});
|
||||
}
|
||||
|
||||
export async function imageBitmapFromBlob(blob: Blob) {
|
||||
return createImageBitmap(blob);
|
||||
}
|
Loading…
Reference in a new issue