[web] ML cleanup - Part 4/x (#1761)

This commit is contained in:
Manav Rathi 2024-05-18 09:28:02 +05:30 committed by GitHub
commit 1edafd3568
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 1253 additions and 1424 deletions

View file

@ -1,88 +0,0 @@
import { Matrix } from "ml-matrix";
import { Point } from "services/face/geom";
import { FaceAlignment, FaceDetection } from "services/face/types";
import { getSimilarityTransformation } from "similarity-transformation";
const ARCFACE_LANDMARKS = [
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[56.1396, 92.2848],
] as Array<[number, number]>;
const ARCFACE_LANDMARKS_FACE_SIZE = 112;
const ARC_FACE_5_LANDMARKS = [
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041],
] as Array<[number, number]>;
/**
* Compute and return an {@link FaceAlignment} for the given face detection.
*
* @param faceDetection A geometry indicating a face detected in an image.
*/
export const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
const landmarkCount = faceDetection.landmarks.length;
return getFaceAlignmentUsingSimilarityTransform(
faceDetection,
normalizeLandmarks(
landmarkCount === 5 ? ARC_FACE_5_LANDMARKS : ARCFACE_LANDMARKS,
ARCFACE_LANDMARKS_FACE_SIZE,
),
);
};
function getFaceAlignmentUsingSimilarityTransform(
faceDetection: FaceDetection,
alignedLandmarks: Array<[number, number]>,
): FaceAlignment {
const landmarksMat = new Matrix(
faceDetection.landmarks
.map((p) => [p.x, p.y])
.slice(0, alignedLandmarks.length),
).transpose();
const alignedLandmarksMat = new Matrix(alignedLandmarks).transpose();
const simTransform = getSimilarityTransformation(
landmarksMat,
alignedLandmarksMat,
);
const RS = Matrix.mul(simTransform.rotation, simTransform.scale);
const TR = simTransform.translation;
const affineMatrix = [
[RS.get(0, 0), RS.get(0, 1), TR.get(0, 0)],
[RS.get(1, 0), RS.get(1, 1), TR.get(1, 0)],
[0, 0, 1],
];
const size = 1 / simTransform.scale;
const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
const centerMat = simTransform.fromMean.sub(meanTranslation);
const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
const rotation = -Math.atan2(
simTransform.rotation.get(0, 1),
simTransform.rotation.get(0, 0),
);
return {
affineMatrix,
center,
size,
rotation,
};
}
function normalizeLandmarks(
landmarks: Array<[number, number]>,
faceSize: number,
): Array<[number, number]> {
return landmarks.map((landmark) =>
landmark.map((p) => p / faceSize),
) as Array<[number, number]>;
}

View file

@ -1,187 +0,0 @@
import { Face } from "services/face/types";
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
import { mobileFaceNetFaceSize } from "./embed";
/**
* Laplacian blur detection.
*/
export const detectBlur = (
alignedFaces: Float32Array,
faces: Face[],
): number[] => {
const numFaces = Math.round(
alignedFaces.length /
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
);
const blurValues: number[] = [];
for (let i = 0; i < numFaces; i++) {
const face = faces[i];
const direction = faceDirection(face);
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
alignedFaces,
i,
);
const laplacian = applyLaplacian(faceImage, direction);
blurValues.push(matrixVariance(laplacian));
}
return blurValues;
};
type FaceDirection = "left" | "right" | "straight";
const faceDirection = (face: Face): FaceDirection => {
const landmarks = face.detection.landmarks;
const leftEye = landmarks[0];
const rightEye = landmarks[1];
const nose = landmarks[2];
const leftMouth = landmarks[3];
const rightMouth = landmarks[4];
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
const faceIsUpright =
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
const noseStickingOutLeft =
nose.x < Math.min(leftEye.x, rightEye.x) &&
nose.x < Math.min(leftMouth.x, rightMouth.x);
const noseStickingOutRight =
nose.x > Math.max(leftEye.x, rightEye.x) &&
nose.x > Math.max(leftMouth.x, rightMouth.x);
const noseCloseToLeftEye =
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
const noseCloseToRightEye =
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
return "left";
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
return "right";
}
return "straight";
};
/**
* Return a new image by applying a Laplacian blur kernel to each pixel.
*/
const applyLaplacian = (
image: number[][],
direction: FaceDirection,
): number[][] => {
const paddedImage: number[][] = padImage(image, direction);
const numRows = paddedImage.length - 2;
const numCols = paddedImage[0].length - 2;
// Create an output image initialized to 0.
const outputImage: number[][] = Array.from({ length: numRows }, () =>
new Array(numCols).fill(0),
);
// Define the Laplacian kernel.
const kernel: number[][] = [
[0, 1, 0],
[1, -4, 1],
[0, 1, 0],
];
// Apply the kernel to each pixel
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < numCols; j++) {
let sum = 0;
for (let ki = 0; ki < 3; ki++) {
for (let kj = 0; kj < 3; kj++) {
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
}
}
// Adjust the output value if necessary (e.g., clipping).
outputImage[i][j] = sum;
}
}
return outputImage;
};
const padImage = (image: number[][], direction: FaceDirection): number[][] => {
const removeSideColumns = 56; /* must be even */
const numRows = image.length;
const numCols = image[0].length;
const paddedNumCols = numCols + 2 - removeSideColumns;
const paddedNumRows = numRows + 2;
// Create a new matrix with extra padding.
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
new Array(paddedNumCols).fill(0),
);
if (direction === "straight") {
// Copy original image into the center of the padded image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] =
image[i][j + Math.round(removeSideColumns / 2)];
}
}
} else if (direction === "left") {
// If the face is facing left, we only take the right side of the face image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
}
}
} else if (direction === "right") {
// If the face is facing right, we only take the left side of the face image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j];
}
}
}
// Reflect padding
// Top and bottom rows
for (let j = 1; j <= paddedNumCols - 2; j++) {
paddedImage[0][j] = paddedImage[2][j]; // Top row
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
}
// Left and right columns
for (let i = 0; i < numRows + 2; i++) {
paddedImage[i][0] = paddedImage[i][2]; // Left column
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
}
return paddedImage;
};
const matrixVariance = (matrix: number[][]): number => {
const numRows = matrix.length;
const numCols = matrix[0].length;
const totalElements = numRows * numCols;
// Calculate the mean.
let mean: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
mean += value;
});
});
mean /= totalElements;
// Calculate the variance.
let variance: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
const diff: number = value - mean;
variance += diff * diff;
});
});
variance /= totalElements;
return variance;
};

View file

@ -1,32 +0,0 @@
import { Box, enlargeBox } from "services/face/geom";
import { FaceCrop, FaceDetection } from "services/face/types";
import { cropWithRotation } from "utils/image";
import { faceAlignment } from "./align";
export const getFaceCrop = (
imageBitmap: ImageBitmap,
faceDetection: FaceDetection,
): FaceCrop => {
const alignment = faceAlignment(faceDetection);
const padding = 0.25;
const maxSize = 256;
const alignmentBox = new Box({
x: alignment.center.x - alignment.size / 2,
y: alignment.center.y - alignment.size / 2,
width: alignment.size,
height: alignment.size,
}).round();
const scaleForPadding = 1 + padding * 2;
const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
width: maxSize,
height: maxSize,
});
return {
image: faceImageBitmap,
imageBox: paddedBox,
};
};

View file

@ -9,7 +9,7 @@ import {
openDB,
} from "idb";
import isElectron from "is-electron";
import { Face, MLLibraryData, MlFileData, Person } from "services/face/types";
import { Face, MlFileData, Person } from "services/face/types";
import {
DEFAULT_ML_SEARCH_CONFIG,
MAX_ML_SYNC_ERROR_COUNT,
@ -50,7 +50,7 @@ interface MLDb extends DBSchema {
};
library: {
key: string;
value: MLLibraryData;
value: unknown;
};
configs: {
key: string;
@ -177,6 +177,7 @@ class MLIDbStorage {
ML_SEARCH_CONFIG_NAME,
);
db.deleteObjectStore("library");
db.deleteObjectStore("things");
} catch {
// TODO: ignore for now as we finalize the new version
@ -400,16 +401,6 @@ class MLIDbStorage {
return db.put("versions", version, index);
}
public async getLibraryData() {
const db = await this.db;
return db.get("library", "data");
}
public async putLibraryData(data: MLLibraryData) {
const db = await this.db;
return db.put("library", data, "data");
}
public async getConfig<T extends Config>(name: string, def: T) {
const db = await this.db;
const tx = db.transaction("configs", "readwrite");

View file

@ -1,316 +0,0 @@
import { workerBridge } from "@/next/worker/worker-bridge";
import { euclidean } from "hdbscan";
import {
Box,
Dimensions,
Point,
boxFromBoundingBox,
newBox,
} from "services/face/geom";
import { FaceDetection } from "services/face/types";
import {
Matrix,
applyToPoint,
compose,
scale,
translate,
} from "transformation-matrix";
import {
clamp,
getPixelBilinear,
normalizePixelBetween0And1,
} from "utils/image";
/**
* Detect faces in the given {@link imageBitmap}.
*
* The model used is YOLO, running in an ONNX runtime.
*/
export const detectFaces = async (
imageBitmap: ImageBitmap,
): Promise<Array<FaceDetection>> => {
const maxFaceDistancePercent = Math.sqrt(2) / 100;
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
imageBitmap,
640,
640,
);
const data = preprocessResult.data;
const resized = preprocessResult.newSize;
const outputData = await workerBridge.detectFaces(data);
const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
const inBox = newBox(0, 0, resized.width, resized.height);
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
const transform = computeTransformToBox(inBox, toBox);
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
const box = transformBox(f.box, transform);
const normLandmarks = f.landmarks;
const landmarks = transformPoints(normLandmarks, transform);
return {
box,
landmarks,
probability: f.probability as number,
} as FaceDetection;
});
return removeDuplicateDetections(faceDetections, maxFaceDistance);
};
const preprocessImageBitmapToFloat32ChannelsFirst = (
imageBitmap: ImageBitmap,
requiredWidth: number,
requiredHeight: number,
maintainAspectRatio: boolean = true,
normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
) => {
// Create an OffscreenCanvas and set its size.
const offscreenCanvas = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
const ctx = offscreenCanvas.getContext("2d");
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
const imageData = ctx.getImageData(
0,
0,
imageBitmap.width,
imageBitmap.height,
);
const pixelData = imageData.data;
let scaleW = requiredWidth / imageBitmap.width;
let scaleH = requiredHeight / imageBitmap.height;
if (maintainAspectRatio) {
const scale = Math.min(
requiredWidth / imageBitmap.width,
requiredHeight / imageBitmap.height,
);
scaleW = scale;
scaleH = scale;
}
const scaledWidth = clamp(
Math.round(imageBitmap.width * scaleW),
0,
requiredWidth,
);
const scaledHeight = clamp(
Math.round(imageBitmap.height * scaleH),
0,
requiredHeight,
);
const processedImage = new Float32Array(
1 * 3 * requiredWidth * requiredHeight,
);
// Populate the Float32Array with normalized pixel values
let pixelIndex = 0;
const channelOffsetGreen = requiredHeight * requiredWidth;
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
for (let h = 0; h < requiredHeight; h++) {
for (let w = 0; w < requiredWidth; w++) {
let pixel: {
r: number;
g: number;
b: number;
};
if (w >= scaledWidth || h >= scaledHeight) {
pixel = { r: 114, g: 114, b: 114 };
} else {
pixel = getPixelBilinear(
w / scaleW,
h / scaleH,
pixelData,
imageBitmap.width,
imageBitmap.height,
);
}
processedImage[pixelIndex] = normFunction(pixel.r);
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
pixel.g,
);
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
pixel.b,
);
pixelIndex++;
}
}
return {
data: processedImage,
originalSize: {
width: imageBitmap.width,
height: imageBitmap.height,
},
newSize: { width: scaledWidth, height: scaledHeight },
};
};
/**
* @param rowOutput A Float32Array of shape [25200, 16], where each row
* represents a bounding box.
*/
const getFacesFromYOLOOutput = (
rowOutput: Float32Array,
minScore: number,
): Array<FaceDetection> => {
const faces: Array<FaceDetection> = [];
// Iterate over each row.
for (let i = 0; i < rowOutput.length; i += 16) {
const score = rowOutput[i + 4];
if (score < minScore) {
continue;
}
// The first 4 values represent the bounding box's coordinates:
//
// (x1, y1, x2, y2)
//
const xCenter = rowOutput[i];
const yCenter = rowOutput[i + 1];
const width = rowOutput[i + 2];
const height = rowOutput[i + 3];
const xMin = xCenter - width / 2.0; // topLeft
const yMin = yCenter - height / 2.0; // topLeft
const leftEyeX = rowOutput[i + 5];
const leftEyeY = rowOutput[i + 6];
const rightEyeX = rowOutput[i + 7];
const rightEyeY = rowOutput[i + 8];
const noseX = rowOutput[i + 9];
const noseY = rowOutput[i + 10];
const leftMouthX = rowOutput[i + 11];
const leftMouthY = rowOutput[i + 12];
const rightMouthX = rowOutput[i + 13];
const rightMouthY = rowOutput[i + 14];
const box = new Box({
x: xMin,
y: yMin,
width: width,
height: height,
});
const probability = score as number;
const landmarks = [
new Point(leftEyeX, leftEyeY),
new Point(rightEyeX, rightEyeY),
new Point(noseX, noseY),
new Point(leftMouthX, leftMouthY),
new Point(rightMouthX, rightMouthY),
];
faces.push({ box, landmarks, probability });
}
return faces;
};
export const getRelativeDetection = (
faceDetection: FaceDetection,
dimensions: Dimensions,
): FaceDetection => {
const oldBox: Box = faceDetection.box;
const box = new Box({
x: oldBox.x / dimensions.width,
y: oldBox.y / dimensions.height,
width: oldBox.width / dimensions.width,
height: oldBox.height / dimensions.height,
});
const oldLandmarks: Point[] = faceDetection.landmarks;
const landmarks = oldLandmarks.map((l) => {
return new Point(l.x / dimensions.width, l.y / dimensions.height);
});
const probability = faceDetection.probability;
return { box, landmarks, probability };
};
/**
* Removes duplicate face detections from an array of detections.
*
* This function sorts the detections by their probability in descending order,
* then iterates over them.
*
* For each detection, it calculates the Euclidean distance to all other
* detections.
*
* If the distance is less than or equal to the specified threshold
* (`withinDistance`), the other detection is considered a duplicate and is
* removed.
*
* @param detections - An array of face detections to remove duplicates from.
*
* @param withinDistance - The maximum Euclidean distance between two detections
* for them to be considered duplicates.
*
* @returns An array of face detections with duplicates removed.
*/
const removeDuplicateDetections = (
detections: Array<FaceDetection>,
withinDistance: number,
) => {
detections.sort((a, b) => b.probability - a.probability);
const isSelected = new Map<number, boolean>();
for (let i = 0; i < detections.length; i++) {
if (isSelected.get(i) === false) {
continue;
}
isSelected.set(i, true);
for (let j = i + 1; j < detections.length; j++) {
if (isSelected.get(j) === false) {
continue;
}
const centeri = getDetectionCenter(detections[i]);
const centerj = getDetectionCenter(detections[j]);
const dist = euclidean(
[centeri.x, centeri.y],
[centerj.x, centerj.y],
);
if (dist <= withinDistance) {
isSelected.set(j, false);
}
}
}
const uniques: Array<FaceDetection> = [];
for (let i = 0; i < detections.length; i++) {
isSelected.get(i) && uniques.push(detections[i]);
}
return uniques;
};
function getDetectionCenter(detection: FaceDetection) {
const center = new Point(0, 0);
// TODO: first 4 landmarks is applicable to blazeface only
// this needs to consider eyes, nose and mouth landmarks to take center
detection.landmarks?.slice(0, 4).forEach((p) => {
center.x += p.x;
center.y += p.y;
});
return new Point(center.x / 4, center.y / 4);
}
function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
return compose(
translate(toBox.x, toBox.y),
scale(toBox.width / inBox.width, toBox.height / inBox.height),
);
}
function transformPoint(point: Point, transform: Matrix) {
const txdPoint = applyToPoint(transform, point);
return new Point(txdPoint.x, txdPoint.y);
}
function transformPoints(points: Point[], transform: Matrix) {
return points?.map((p) => transformPoint(p, transform));
}
function transformBox(box: Box, transform: Matrix) {
const topLeft = transformPoint(box.topLeft, transform);
const bottomRight = transformPoint(box.bottomRight, transform);
return boxFromBoundingBox({
left: topLeft.x,
top: topLeft.y,
right: bottomRight.x,
bottom: bottomRight.y,
});
}

View file

@ -1,26 +0,0 @@
import { workerBridge } from "@/next/worker/worker-bridge";
import { FaceEmbedding } from "services/face/types";
export const mobileFaceNetFaceSize = 112;
/**
* Compute embeddings for the given {@link faceData}.
*
* The model used is MobileFaceNet, running in an ONNX runtime.
*/
export const faceEmbeddings = async (
faceData: Float32Array,
): Promise<Array<FaceEmbedding>> => {
const outputData = await workerBridge.faceEmbeddings(faceData);
const embeddingSize = 192;
const embeddings = new Array<FaceEmbedding>(
outputData.length / embeddingSize,
);
for (let i = 0; i < embeddings.length; i++) {
embeddings[i] = new Float32Array(
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
);
}
return embeddings;
};

View file

@ -1,26 +1,76 @@
import { openCache } from "@/next/blob-cache";
import log from "@/next/log";
import { faceAlignment } from "services/face/align";
import mlIDbStorage from "services/face/db";
import { detectFaces, getRelativeDetection } from "services/face/detect";
import { faceEmbeddings, mobileFaceNetFaceSize } from "services/face/embed";
import { workerBridge } from "@/next/worker/worker-bridge";
import { euclidean } from "hdbscan";
import { Matrix } from "ml-matrix";
import { Box, Dimensions, Point, enlargeBox, newBox } from "services/face/geom";
import {
DetectedFace,
Face,
FaceAlignment,
FaceCrop,
FaceDetection,
FaceEmbedding,
MLSyncFileContext,
type FaceAlignment,
type MlFileData,
} from "services/face/types";
import { imageBitmapToBlob, warpAffineFloat32List } from "utils/image";
import { detectBlur } from "./blur";
import { getFaceCrop } from "./crop";
import { defaultMLVersion } from "services/machineLearning/machineLearningService";
import { getSimilarityTransformation } from "similarity-transformation";
import type { EnteFile } from "types/file";
import {
fetchImageBitmap,
clamp,
createGrayscaleIntMatrixFromNormalized2List,
cropWithRotation,
fetchImageBitmapForContext,
getFaceId,
getLocalFile,
getPixelBilinear,
imageBitmapToBlob,
normalizePixelBetween0And1,
warpAffineFloat32List,
} from "./image";
import { transformFaceDetections } from "./transform-box";
export const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
/**
* Index faces in the given file.
*
* This function is the entry point to the indexing pipeline. The file goes
* through various stages:
*
* 1. Downloading the original if needed.
* 2. Detect faces using ONNX/YOLO
* 3. Align the face rectangles, compute blur.
* 4. Compute embbeddings for the detected face (crops).
*
* Once all of it is done, it returns the face rectangles and embeddings to the
* higher layer (which saves them to locally for offline use, and encrypts and
* uploads them to the user's remote storage so that their other devices can
* download them instead of needing to reindex).
*/
export const indexFaces = async (
enteFile: EnteFile,
localFile?: globalThis.File,
) => {
log.debug(() => ({ a: "Indexing faces in file", enteFile }));
const fileContext: MLSyncFileContext = { enteFile, localFile };
const newMlFile = (fileContext.newMlFile = {
fileId: enteFile.id,
mlVersion: defaultMLVersion,
errorCount: 0,
} as MlFileData);
try {
await fetchImageBitmapForContext(fileContext);
await syncFileAnalyzeFaces(fileContext);
newMlFile.errorCount = 0;
} finally {
fileContext.imageBitmap && fileContext.imageBitmap.close();
}
return newMlFile;
};
const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
const { newMlFile } = fileContext;
const startTime = Date.now();
@ -43,10 +93,6 @@ export const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
const { newMlFile } = fileContext;
newMlFile.faceDetectionMethod = {
value: "YoloFace",
version: 1,
};
fileContext.newDetection = true;
const imageBitmap = await fetchImageBitmapForContext(fileContext);
const faceDetections = await detectFaces(imageBitmap);
@ -67,14 +113,265 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
log.info("[MLService] Detected Faces: ", newMlFile.faces?.length);
};
/**
* Detect faces in the given {@link imageBitmap}.
*
* The model used is YOLO, running in an ONNX runtime.
*/
const detectFaces = async (
imageBitmap: ImageBitmap,
): Promise<Array<FaceDetection>> => {
const maxFaceDistancePercent = Math.sqrt(2) / 100;
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
imageBitmap,
640,
640,
);
const data = preprocessResult.data;
const resized = preprocessResult.newSize;
const outputData = await workerBridge.detectFaces(data);
const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
const inBox = newBox(0, 0, resized.width, resized.height);
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
const faceDetections = transformFaceDetections(faces, inBox, toBox);
return removeDuplicateDetections(faceDetections, maxFaceDistance);
};
const preprocessImageBitmapToFloat32ChannelsFirst = (
imageBitmap: ImageBitmap,
requiredWidth: number,
requiredHeight: number,
maintainAspectRatio: boolean = true,
normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
) => {
// Create an OffscreenCanvas and set its size.
const offscreenCanvas = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
const ctx = offscreenCanvas.getContext("2d");
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
const imageData = ctx.getImageData(
0,
0,
imageBitmap.width,
imageBitmap.height,
);
const pixelData = imageData.data;
let scaleW = requiredWidth / imageBitmap.width;
let scaleH = requiredHeight / imageBitmap.height;
if (maintainAspectRatio) {
const scale = Math.min(
requiredWidth / imageBitmap.width,
requiredHeight / imageBitmap.height,
);
scaleW = scale;
scaleH = scale;
}
const scaledWidth = clamp(
Math.round(imageBitmap.width * scaleW),
0,
requiredWidth,
);
const scaledHeight = clamp(
Math.round(imageBitmap.height * scaleH),
0,
requiredHeight,
);
const processedImage = new Float32Array(
1 * 3 * requiredWidth * requiredHeight,
);
// Populate the Float32Array with normalized pixel values
let pixelIndex = 0;
const channelOffsetGreen = requiredHeight * requiredWidth;
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
for (let h = 0; h < requiredHeight; h++) {
for (let w = 0; w < requiredWidth; w++) {
let pixel: {
r: number;
g: number;
b: number;
};
if (w >= scaledWidth || h >= scaledHeight) {
pixel = { r: 114, g: 114, b: 114 };
} else {
pixel = getPixelBilinear(
w / scaleW,
h / scaleH,
pixelData,
imageBitmap.width,
imageBitmap.height,
);
}
processedImage[pixelIndex] = normFunction(pixel.r);
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
pixel.g,
);
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
pixel.b,
);
pixelIndex++;
}
}
return {
data: processedImage,
originalSize: {
width: imageBitmap.width,
height: imageBitmap.height,
},
newSize: { width: scaledWidth, height: scaledHeight },
};
};
/**
* @param rowOutput A Float32Array of shape [25200, 16], where each row
* represents a bounding box.
*/
const getFacesFromYOLOOutput = (
rowOutput: Float32Array,
minScore: number,
): Array<FaceDetection> => {
const faces: Array<FaceDetection> = [];
// Iterate over each row.
for (let i = 0; i < rowOutput.length; i += 16) {
const score = rowOutput[i + 4];
if (score < minScore) {
continue;
}
// The first 4 values represent the bounding box's coordinates:
//
// (x1, y1, x2, y2)
//
const xCenter = rowOutput[i];
const yCenter = rowOutput[i + 1];
const width = rowOutput[i + 2];
const height = rowOutput[i + 3];
const xMin = xCenter - width / 2.0; // topLeft
const yMin = yCenter - height / 2.0; // topLeft
const leftEyeX = rowOutput[i + 5];
const leftEyeY = rowOutput[i + 6];
const rightEyeX = rowOutput[i + 7];
const rightEyeY = rowOutput[i + 8];
const noseX = rowOutput[i + 9];
const noseY = rowOutput[i + 10];
const leftMouthX = rowOutput[i + 11];
const leftMouthY = rowOutput[i + 12];
const rightMouthX = rowOutput[i + 13];
const rightMouthY = rowOutput[i + 14];
const box = new Box({
x: xMin,
y: yMin,
width: width,
height: height,
});
const probability = score as number;
const landmarks = [
new Point(leftEyeX, leftEyeY),
new Point(rightEyeX, rightEyeY),
new Point(noseX, noseY),
new Point(leftMouthX, leftMouthY),
new Point(rightMouthX, rightMouthY),
];
faces.push({ box, landmarks, probability });
}
return faces;
};
const getRelativeDetection = (
faceDetection: FaceDetection,
dimensions: Dimensions,
): FaceDetection => {
const oldBox: Box = faceDetection.box;
const box = new Box({
x: oldBox.x / dimensions.width,
y: oldBox.y / dimensions.height,
width: oldBox.width / dimensions.width,
height: oldBox.height / dimensions.height,
});
const oldLandmarks: Point[] = faceDetection.landmarks;
const landmarks = oldLandmarks.map((l) => {
return new Point(l.x / dimensions.width, l.y / dimensions.height);
});
const probability = faceDetection.probability;
return { box, landmarks, probability };
};
/**
* Removes duplicate face detections from an array of detections.
*
* This function sorts the detections by their probability in descending order,
* then iterates over them.
*
* For each detection, it calculates the Euclidean distance to all other
* detections.
*
* If the distance is less than or equal to the specified threshold
* (`withinDistance`), the other detection is considered a duplicate and is
* removed.
*
* @param detections - An array of face detections to remove duplicates from.
*
* @param withinDistance - The maximum Euclidean distance between two detections
* for them to be considered duplicates.
*
* @returns An array of face detections with duplicates removed.
*/
const removeDuplicateDetections = (
detections: Array<FaceDetection>,
withinDistance: number,
) => {
detections.sort((a, b) => b.probability - a.probability);
const isSelected = new Map<number, boolean>();
for (let i = 0; i < detections.length; i++) {
if (isSelected.get(i) === false) {
continue;
}
isSelected.set(i, true);
for (let j = i + 1; j < detections.length; j++) {
if (isSelected.get(j) === false) {
continue;
}
const centeri = getDetectionCenter(detections[i]);
const centerj = getDetectionCenter(detections[j]);
const dist = euclidean(
[centeri.x, centeri.y],
[centerj.x, centerj.y],
);
if (dist <= withinDistance) {
isSelected.set(j, false);
}
}
}
const uniques: Array<FaceDetection> = [];
for (let i = 0; i < detections.length; i++) {
isSelected.get(i) && uniques.push(detections[i]);
}
return uniques;
};
function getDetectionCenter(detection: FaceDetection) {
const center = new Point(0, 0);
// TODO: first 4 landmarks is applicable to blazeface only
// this needs to consider eyes, nose and mouth landmarks to take center
detection.landmarks?.slice(0, 4).forEach((p) => {
center.x += p.x;
center.y += p.y;
});
return new Point(center.x / 4, center.y / 4);
}
const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => {
const { newMlFile } = fileContext;
const imageBitmap = await fetchImageBitmapForContext(fileContext);
newMlFile.faceCropMethod = {
value: "ArcFace",
version: 1,
};
for (const face of newMlFile.faces) {
await saveFaceCrop(imageBitmap, face);
}
@ -84,10 +381,6 @@ const syncFileFaceAlignments = async (
fileContext: MLSyncFileContext,
): Promise<Float32Array> => {
const { newMlFile } = fileContext;
newMlFile.faceAlignmentMethod = {
value: "ArcFace",
version: 1,
};
fileContext.newAlignment = true;
const imageBitmap =
fileContext.imageBitmap ||
@ -113,15 +406,277 @@ const syncFileFaceAlignments = async (
return faceImages;
};
// TODO-ML(MR): When is this used or is it as Blazeface leftover?
const ARCFACE_LANDMARKS = [
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[56.1396, 92.2848],
] as Array<[number, number]>;
const ARCFACE_LANDMARKS_FACE_SIZE = 112;
const ARC_FACE_5_LANDMARKS = [
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041],
] as Array<[number, number]>;
/**
* Compute and return an {@link FaceAlignment} for the given face detection.
*
* @param faceDetection A geometry indicating a face detected in an image.
*/
const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
const landmarkCount = faceDetection.landmarks.length;
return getFaceAlignmentUsingSimilarityTransform(
faceDetection,
normalizeLandmarks(
landmarkCount === 5 ? ARC_FACE_5_LANDMARKS : ARCFACE_LANDMARKS,
ARCFACE_LANDMARKS_FACE_SIZE,
),
);
};
function getFaceAlignmentUsingSimilarityTransform(
faceDetection: FaceDetection,
alignedLandmarks: Array<[number, number]>,
): FaceAlignment {
const landmarksMat = new Matrix(
faceDetection.landmarks
.map((p) => [p.x, p.y])
.slice(0, alignedLandmarks.length),
).transpose();
const alignedLandmarksMat = new Matrix(alignedLandmarks).transpose();
const simTransform = getSimilarityTransformation(
landmarksMat,
alignedLandmarksMat,
);
const RS = Matrix.mul(simTransform.rotation, simTransform.scale);
const TR = simTransform.translation;
const affineMatrix = [
[RS.get(0, 0), RS.get(0, 1), TR.get(0, 0)],
[RS.get(1, 0), RS.get(1, 1), TR.get(1, 0)],
[0, 0, 1],
];
const size = 1 / simTransform.scale;
const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
const centerMat = simTransform.fromMean.sub(meanTranslation);
const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
const rotation = -Math.atan2(
simTransform.rotation.get(0, 1),
simTransform.rotation.get(0, 0),
);
return {
affineMatrix,
center,
size,
rotation,
};
}
function normalizeLandmarks(
landmarks: Array<[number, number]>,
faceSize: number,
): Array<[number, number]> {
return landmarks.map((landmark) =>
landmark.map((p) => p / faceSize),
) as Array<[number, number]>;
}
/**
* Laplacian blur detection.
*/
const detectBlur = (alignedFaces: Float32Array, faces: Face[]): number[] => {
const numFaces = Math.round(
alignedFaces.length /
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
);
const blurValues: number[] = [];
for (let i = 0; i < numFaces; i++) {
const face = faces[i];
const direction = faceDirection(face);
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
alignedFaces,
i,
);
const laplacian = applyLaplacian(faceImage, direction);
blurValues.push(matrixVariance(laplacian));
}
return blurValues;
};
type FaceDirection = "left" | "right" | "straight";
const faceDirection = (face: Face): FaceDirection => {
const landmarks = face.detection.landmarks;
const leftEye = landmarks[0];
const rightEye = landmarks[1];
const nose = landmarks[2];
const leftMouth = landmarks[3];
const rightMouth = landmarks[4];
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
const faceIsUpright =
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
const noseStickingOutLeft =
nose.x < Math.min(leftEye.x, rightEye.x) &&
nose.x < Math.min(leftMouth.x, rightMouth.x);
const noseStickingOutRight =
nose.x > Math.max(leftEye.x, rightEye.x) &&
nose.x > Math.max(leftMouth.x, rightMouth.x);
const noseCloseToLeftEye =
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
const noseCloseToRightEye =
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
return "left";
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
return "right";
}
return "straight";
};
/**
* Return a new image by applying a Laplacian blur kernel to each pixel.
*/
const applyLaplacian = (
image: number[][],
direction: FaceDirection,
): number[][] => {
const paddedImage: number[][] = padImage(image, direction);
const numRows = paddedImage.length - 2;
const numCols = paddedImage[0].length - 2;
// Create an output image initialized to 0.
const outputImage: number[][] = Array.from({ length: numRows }, () =>
new Array(numCols).fill(0),
);
// Define the Laplacian kernel.
const kernel: number[][] = [
[0, 1, 0],
[1, -4, 1],
[0, 1, 0],
];
// Apply the kernel to each pixel
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < numCols; j++) {
let sum = 0;
for (let ki = 0; ki < 3; ki++) {
for (let kj = 0; kj < 3; kj++) {
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
}
}
// Adjust the output value if necessary (e.g., clipping).
outputImage[i][j] = sum;
}
}
return outputImage;
};
const padImage = (image: number[][], direction: FaceDirection): number[][] => {
const removeSideColumns = 56; /* must be even */
const numRows = image.length;
const numCols = image[0].length;
const paddedNumCols = numCols + 2 - removeSideColumns;
const paddedNumRows = numRows + 2;
// Create a new matrix with extra padding.
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
new Array(paddedNumCols).fill(0),
);
if (direction === "straight") {
// Copy original image into the center of the padded image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] =
image[i][j + Math.round(removeSideColumns / 2)];
}
}
} else if (direction === "left") {
// If the face is facing left, we only take the right side of the face image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
}
}
} else if (direction === "right") {
// If the face is facing right, we only take the left side of the face image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j];
}
}
}
// Reflect padding
// Top and bottom rows
for (let j = 1; j <= paddedNumCols - 2; j++) {
paddedImage[0][j] = paddedImage[2][j]; // Top row
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
}
// Left and right columns
for (let i = 0; i < numRows + 2; i++) {
paddedImage[i][0] = paddedImage[i][2]; // Left column
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
}
return paddedImage;
};
const matrixVariance = (matrix: number[][]): number => {
const numRows = matrix.length;
const numCols = matrix[0].length;
const totalElements = numRows * numCols;
// Calculate the mean.
let mean: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
mean += value;
});
});
mean /= totalElements;
// Calculate the variance.
let variance: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
const diff: number = value - mean;
variance += diff * diff;
});
});
variance /= totalElements;
return variance;
};
const syncFileFaceEmbeddings = async (
fileContext: MLSyncFileContext,
alignedFacesInput: Float32Array,
) => {
const { newMlFile } = fileContext;
newMlFile.faceEmbeddingMethod = {
value: "MobileFaceNet",
version: 2,
};
// TODO: when not storing face crops, image will be needed to extract faces
// fileContext.imageBitmap ||
// (await this.getImageBitmap(fileContext));
@ -132,6 +687,30 @@ const syncFileFaceEmbeddings = async (
log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length);
};
const mobileFaceNetFaceSize = 112;
/**
* Compute embeddings for the given {@link faceData}.
*
* The model used is MobileFaceNet, running in an ONNX runtime.
*/
const faceEmbeddings = async (
faceData: Float32Array,
): Promise<Array<FaceEmbedding>> => {
const outputData = await workerBridge.faceEmbeddings(faceData);
const embeddingSize = 192;
const embeddings = new Array<FaceEmbedding>(
outputData.length / embeddingSize,
);
for (let i = 0; i < embeddings.length; i++) {
embeddings[i] = new Float32Array(
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
);
}
return embeddings;
};
const syncFileFaceMakeRelativeDetections = async (
fileContext: MLSyncFileContext,
) => {
@ -159,16 +738,32 @@ export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
return blob;
};
export const regenerateFaceCrop = async (faceID: string) => {
const fileID = Number(faceID.split("-")[0]);
const personFace = await mlIDbStorage.getFace(fileID, faceID);
if (!personFace) {
throw Error("Face not found");
}
const getFaceCrop = (
imageBitmap: ImageBitmap,
faceDetection: FaceDetection,
): FaceCrop => {
const alignment = faceAlignment(faceDetection);
const file = await getLocalFile(personFace.fileId);
const imageBitmap = await fetchImageBitmap(file);
return await saveFaceCrop(imageBitmap, personFace);
const padding = 0.25;
const maxSize = 256;
const alignmentBox = new Box({
x: alignment.center.x - alignment.size / 2,
y: alignment.center.y - alignment.size / 2,
width: alignment.size,
height: alignment.size,
}).round();
const scaleForPadding = 1 + padding * 2;
const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
width: maxSize,
height: maxSize,
});
return {
image: faceImageBitmap,
imageBox: paddedBox,
};
};
async function extractFaceImagesToFloat32(

View file

@ -1,13 +1,17 @@
import { FILE_TYPE } from "@/media/file-type";
import { decodeLivePhoto } from "@/media/live-photo";
import log from "@/next/log";
import { Matrix, inverse } from "ml-matrix";
import DownloadManager from "services/download";
import { Dimensions } from "services/face/geom";
import { DetectedFace, MLSyncFileContext } from "services/face/types";
import { Box, Dimensions, enlargeBox } from "services/face/geom";
import {
DetectedFace,
FaceAlignment,
MLSyncFileContext,
} from "services/face/types";
import { getLocalFiles } from "services/fileService";
import { EnteFile } from "types/file";
import { getRenderableImage } from "utils/file";
import { clamp } from "utils/image";
export const fetchImageBitmapForContext = async (
fileContext: MLSyncFileContext,
@ -37,7 +41,6 @@ export const fetchImageBitmapForContext = async (
);
}
fileContext.newMlFile.imageSource = "Original";
const { width, height } = fileContext.imageBitmap;
fileContext.newMlFile.imageDimensions = { width, height };
@ -119,3 +122,468 @@ export async function getLocalFileImageBitmap(
fileBlob = await getRenderableImage(enteFile.metadata.title, fileBlob);
return createImageBitmap(fileBlob);
}
export function normalizePixelBetween0And1(pixelValue: number) {
return pixelValue / 255.0;
}
export function normalizePixelBetweenMinus1And1(pixelValue: number) {
return pixelValue / 127.5 - 1.0;
}
export function unnormalizePixelFromBetweenMinus1And1(pixelValue: number) {
return clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
}
export function readPixelColor(
imageData: Uint8ClampedArray,
width: number,
height: number,
x: number,
y: number,
) {
if (x < 0 || x >= width || y < 0 || y >= height) {
return { r: 0, g: 0, b: 0, a: 0 };
}
const index = (y * width + x) * 4;
return {
r: imageData[index],
g: imageData[index + 1],
b: imageData[index + 2],
a: imageData[index + 3],
};
}
export function clamp(value: number, min: number, max: number) {
return Math.min(max, Math.max(min, value));
}
export function getPixelBicubic(
fx: number,
fy: number,
imageData: Uint8ClampedArray,
imageWidth: number,
imageHeight: number,
) {
// Clamp to image boundaries
fx = clamp(fx, 0, imageWidth - 1);
fy = clamp(fy, 0, imageHeight - 1);
const x = Math.trunc(fx) - (fx >= 0.0 ? 0 : 1);
const px = x - 1;
const nx = x + 1;
const ax = x + 2;
const y = Math.trunc(fy) - (fy >= 0.0 ? 0 : 1);
const py = y - 1;
const ny = y + 1;
const ay = y + 2;
const dx = fx - x;
const dy = fy - y;
function cubic(
dx: number,
ipp: number,
icp: number,
inp: number,
iap: number,
) {
return (
icp +
0.5 *
(dx * (-ipp + inp) +
dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap))
);
}
const icc = readPixelColor(imageData, imageWidth, imageHeight, x, y);
const ipp =
px < 0 || py < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, py);
const icp =
px < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, x, py);
const inp =
py < 0 || nx >= imageWidth
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, py);
const iap =
ax >= imageWidth || py < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, py);
const ip0 = cubic(dx, ipp.r, icp.r, inp.r, iap.r);
const ip1 = cubic(dx, ipp.g, icp.g, inp.g, iap.g);
const ip2 = cubic(dx, ipp.b, icp.b, inp.b, iap.b);
// const ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);
const ipc =
px < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, y);
const inc =
nx >= imageWidth
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, y);
const iac =
ax >= imageWidth
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, y);
const ic0 = cubic(dx, ipc.r, icc.r, inc.r, iac.r);
const ic1 = cubic(dx, ipc.g, icc.g, inc.g, iac.g);
const ic2 = cubic(dx, ipc.b, icc.b, inc.b, iac.b);
// const ic3 = cubic(dx, ipc.a, icc.a, inc.a, iac.a);
const ipn =
px < 0 || ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, ny);
const icn =
ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, x, ny);
const inn =
nx >= imageWidth || ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, ny);
const ian =
ax >= imageWidth || ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, ny);
const in0 = cubic(dx, ipn.r, icn.r, inn.r, ian.r);
const in1 = cubic(dx, ipn.g, icn.g, inn.g, ian.g);
const in2 = cubic(dx, ipn.b, icn.b, inn.b, ian.b);
// const in3 = cubic(dx, ipn.a, icn.a, inn.a, ian.a);
const ipa =
px < 0 || ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, ay);
const ica =
ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, x, ay);
const ina =
nx >= imageWidth || ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, ay);
const iaa =
ax >= imageWidth || ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, ay);
const ia0 = cubic(dx, ipa.r, ica.r, ina.r, iaa.r);
const ia1 = cubic(dx, ipa.g, ica.g, ina.g, iaa.g);
const ia2 = cubic(dx, ipa.b, ica.b, ina.b, iaa.b);
// const ia3 = cubic(dx, ipa.a, ica.a, ina.a, iaa.a);
const c0 = Math.trunc(clamp(cubic(dy, ip0, ic0, in0, ia0), 0, 255));
const c1 = Math.trunc(clamp(cubic(dy, ip1, ic1, in1, ia1), 0, 255));
const c2 = Math.trunc(clamp(cubic(dy, ip2, ic2, in2, ia2), 0, 255));
// const c3 = cubic(dy, ip3, ic3, in3, ia3);
return { r: c0, g: c1, b: c2 };
}
/// Returns the pixel value (RGB) at the given coordinates using bilinear interpolation.
export function getPixelBilinear(
fx: number,
fy: number,
imageData: Uint8ClampedArray,
imageWidth: number,
imageHeight: number,
) {
// Clamp to image boundaries
fx = clamp(fx, 0, imageWidth - 1);
fy = clamp(fy, 0, imageHeight - 1);
// Get the surrounding coordinates and their weights
const x0 = Math.floor(fx);
const x1 = Math.ceil(fx);
const y0 = Math.floor(fy);
const y1 = Math.ceil(fy);
const dx = fx - x0;
const dy = fy - y0;
const dx1 = 1.0 - dx;
const dy1 = 1.0 - dy;
// Get the original pixels
const pixel1 = readPixelColor(imageData, imageWidth, imageHeight, x0, y0);
const pixel2 = readPixelColor(imageData, imageWidth, imageHeight, x1, y0);
const pixel3 = readPixelColor(imageData, imageWidth, imageHeight, x0, y1);
const pixel4 = readPixelColor(imageData, imageWidth, imageHeight, x1, y1);
function bilinear(val1: number, val2: number, val3: number, val4: number) {
return Math.round(
val1 * dx1 * dy1 +
val2 * dx * dy1 +
val3 * dx1 * dy +
val4 * dx * dy,
);
}
// Interpolate the pixel values
const red = bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r);
const green = bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g);
const blue = bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b);
return { r: red, g: green, b: blue };
}
export function warpAffineFloat32List(
imageBitmap: ImageBitmap,
faceAlignment: FaceAlignment,
faceSize: number,
inputData: Float32Array,
inputStartIndex: number,
): void {
// Get the pixel data
const offscreenCanvas = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
const ctx = offscreenCanvas.getContext("2d");
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
const imageData = ctx.getImageData(
0,
0,
imageBitmap.width,
imageBitmap.height,
);
const pixelData = imageData.data;
const transformationMatrix = faceAlignment.affineMatrix.map((row) =>
row.map((val) => (val != 1.0 ? val * faceSize : 1.0)),
); // 3x3
const A: Matrix = new Matrix([
[transformationMatrix[0][0], transformationMatrix[0][1]],
[transformationMatrix[1][0], transformationMatrix[1][1]],
]);
const Ainverse = inverse(A);
const b00 = transformationMatrix[0][2];
const b10 = transformationMatrix[1][2];
const a00Prime = Ainverse.get(0, 0);
const a01Prime = Ainverse.get(0, 1);
const a10Prime = Ainverse.get(1, 0);
const a11Prime = Ainverse.get(1, 1);
for (let yTrans = 0; yTrans < faceSize; ++yTrans) {
for (let xTrans = 0; xTrans < faceSize; ++xTrans) {
// Perform inverse affine transformation
const xOrigin =
a00Prime * (xTrans - b00) + a01Prime * (yTrans - b10);
const yOrigin =
a10Prime * (xTrans - b00) + a11Prime * (yTrans - b10);
// Get the pixel from interpolation
const pixel = getPixelBicubic(
xOrigin,
yOrigin,
pixelData,
imageBitmap.width,
imageBitmap.height,
);
// Set the pixel in the input data
const index = (yTrans * faceSize + xTrans) * 3;
inputData[inputStartIndex + index] =
normalizePixelBetweenMinus1And1(pixel.r);
inputData[inputStartIndex + index + 1] =
normalizePixelBetweenMinus1And1(pixel.g);
inputData[inputStartIndex + index + 2] =
normalizePixelBetweenMinus1And1(pixel.b);
}
}
}
export function createGrayscaleIntMatrixFromNormalized2List(
imageList: Float32Array,
faceNumber: number,
width: number = 112,
height: number = 112,
): number[][] {
const startIndex = faceNumber * width * height * 3;
return Array.from({ length: height }, (_, y) =>
Array.from({ length: width }, (_, x) => {
// 0.299 ∙ Red + 0.587 ∙ Green + 0.114 ∙ Blue
const pixelIndex = startIndex + 3 * (y * width + x);
return clamp(
Math.round(
0.299 *
unnormalizePixelFromBetweenMinus1And1(
imageList[pixelIndex],
) +
0.587 *
unnormalizePixelFromBetweenMinus1And1(
imageList[pixelIndex + 1],
) +
0.114 *
unnormalizePixelFromBetweenMinus1And1(
imageList[pixelIndex + 2],
),
),
0,
255,
);
}),
);
}
export function resizeToSquare(img: ImageBitmap, size: number) {
const scale = size / Math.max(img.height, img.width);
const width = scale * img.width;
const height = scale * img.height;
const offscreen = new OffscreenCanvas(size, size);
const ctx = offscreen.getContext("2d");
ctx.imageSmoothingQuality = "high";
ctx.drawImage(img, 0, 0, width, height);
const resizedImage = offscreen.transferToImageBitmap();
return { image: resizedImage, width, height };
}
export function transform(
imageBitmap: ImageBitmap,
affineMat: number[][],
outputWidth: number,
outputHeight: number,
) {
const offscreen = new OffscreenCanvas(outputWidth, outputHeight);
const context = offscreen.getContext("2d");
context.imageSmoothingQuality = "high";
context.transform(
affineMat[0][0],
affineMat[1][0],
affineMat[0][1],
affineMat[1][1],
affineMat[0][2],
affineMat[1][2],
);
context.drawImage(imageBitmap, 0, 0);
return offscreen.transferToImageBitmap();
}
export function crop(imageBitmap: ImageBitmap, cropBox: Box, size: number) {
const dimensions: Dimensions = {
width: size,
height: size,
};
return cropWithRotation(imageBitmap, cropBox, 0, dimensions, dimensions);
}
// these utils only work in env where OffscreenCanvas is available
export function cropWithRotation(
imageBitmap: ImageBitmap,
cropBox: Box,
rotation?: number,
maxSize?: Dimensions,
minSize?: Dimensions,
) {
const box = cropBox.round();
const outputSize = { width: box.width, height: box.height };
if (maxSize) {
const minScale = Math.min(
maxSize.width / box.width,
maxSize.height / box.height,
);
if (minScale < 1) {
outputSize.width = Math.round(minScale * box.width);
outputSize.height = Math.round(minScale * box.height);
}
}
if (minSize) {
const maxScale = Math.max(
minSize.width / box.width,
minSize.height / box.height,
);
if (maxScale > 1) {
outputSize.width = Math.round(maxScale * box.width);
outputSize.height = Math.round(maxScale * box.height);
}
}
// log.info({ imageBitmap, box, outputSize });
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
const offscreenCtx = offscreen.getContext("2d");
offscreenCtx.imageSmoothingQuality = "high";
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
rotation && offscreenCtx.rotate(rotation);
const outputBox = new Box({
x: -outputSize.width / 2,
y: -outputSize.height / 2,
width: outputSize.width,
height: outputSize.height,
});
const enlargedBox = enlargeBox(box, 1.5);
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
offscreenCtx.drawImage(
imageBitmap,
enlargedBox.x,
enlargedBox.y,
enlargedBox.width,
enlargedBox.height,
enlargedOutputBox.x,
enlargedOutputBox.y,
enlargedOutputBox.width,
enlargedOutputBox.height,
);
return offscreen.transferToImageBitmap();
}
export function addPadding(image: ImageBitmap, padding: number) {
const scale = 1 + padding * 2;
const width = scale * image.width;
const height = scale * image.height;
const offscreen = new OffscreenCanvas(width, height);
const ctx = offscreen.getContext("2d");
ctx.imageSmoothingEnabled = false;
ctx.drawImage(
image,
width / 2 - image.width / 2,
height / 2 - image.height / 2,
image.width,
image.height,
);
return offscreen.transferToImageBitmap();
}
export interface BlobOptions {
type?: string;
quality?: number;
}
export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
const offscreen = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
return offscreen.convertToBlob({
type: "image/jpeg",
quality: 0.8,
});
}
export async function imageBitmapFromBlob(blob: Blob) {
return createImageBitmap(blob);
}

View file

@ -1,37 +1,53 @@
import log from "@/next/log";
import mlIDbStorage from "services/face/db";
import { Face, Person } from "services/face/types";
import { type MLSyncContext } from "services/machineLearning/machineLearningService";
import { Person } from "services/face/types";
import { clusterFaces } from "./cluster";
import { saveFaceCrop } from "./f-index";
import { fetchImageBitmap, getLocalFile } from "./image";
export const syncPeopleIndex = async (syncContext: MLSyncContext) => {
export const syncPeopleIndex = async () => {
// TODO-ML(MR): Forced disable clustering. It doesn't currently work,
// need to finalize it before we move out of beta.
//
// > Error: Failed to execute 'transferToImageBitmap' on
// > 'OffscreenCanvas': ImageBitmap construction failed
/*
if (
syncContext.outOfSyncFiles.length <= 0 ||
(syncContext.nSyncedFiles === batchSize && Math.random() < 0)
) {
await this.syncIndex(syncContext);
}
public async syncIndex(syncContext: MLSyncContext) {
await this.getMLLibraryData(syncContext);
// TODO-ML(MR): Ensure this doesn't run until fixed.
await syncPeopleIndex(syncContext);
await this.persistMLLibraryData(syncContext);
}
const filesVersion = await mlIDbStorage.getIndexVersion("files");
if (filesVersion <= (await mlIDbStorage.getIndexVersion("people"))) {
return;
}
*/
// TODO: have faces addresable through fileId + faceId
// to avoid index based addressing, which is prone to wrong results
// one way could be to match nearest face within threshold in the file
/*
const allFacesMap =
syncContext.allSyncedFacesMap ??
(syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap());
const allFaces = [...allFacesMap.values()].flat();
*/
await runFaceClustering(syncContext, allFaces);
await syncPeopleFromClusters(syncContext, allFacesMap, allFaces);
await mlIDbStorage.setIndexVersion("people", filesVersion);
};
const runFaceClustering = async (
syncContext: MLSyncContext,
allFaces: Array<Face>,
) => {
// await this.init();
const allFacesMap = await mlIDbStorage.getAllFacesMap();
const allFaces = [...allFacesMap.values()].flat();
if (!allFaces || allFaces.length < 50) {
log.info(
`Skipping clustering since number of faces (${allFaces.length}) is less than the clustering threshold (50)`,
@ -40,34 +56,15 @@ const runFaceClustering = async (
}
log.info("Running clustering allFaces: ", allFaces.length);
syncContext.mlLibraryData.faceClusteringResults = await clusterFaces(
const faceClusteringResults = await clusterFaces(
allFaces.map((f) => Array.from(f.embedding)),
);
syncContext.mlLibraryData.faceClusteringMethod = {
value: "Hdbscan",
version: 1,
};
log.info(
"[MLService] Got face clustering results: ",
JSON.stringify(syncContext.mlLibraryData.faceClusteringResults),
JSON.stringify(faceClusteringResults),
);
// syncContext.faceClustersWithNoise = {
// clusters: syncContext.faceClusteringResults.clusters.map(
// (faces) => ({
// faces,
// })
// ),
// noise: syncContext.faceClusteringResults.noise,
// };
};
const syncPeopleFromClusters = async (
syncContext: MLSyncContext,
allFacesMap: Map<number, Array<Face>>,
allFaces: Array<Face>,
) => {
const clusters = syncContext.mlLibraryData.faceClusteringResults?.clusters;
const clusters = faceClusteringResults?.clusters;
if (!clusters || clusters.length < 1) {
return;
}
@ -108,4 +105,6 @@ const syncPeopleFromClusters = async (
}
await mlIDbStorage.updateFaces(allFacesMap);
// await mlIDbStorage.setIndexVersion("people", filesVersion);
};

View file

@ -0,0 +1,64 @@
import { Box, Point, boxFromBoundingBox } from "services/face/geom";
import { FaceDetection } from "services/face/types";
// TODO-ML(MR): Do we need two separate Matrix libraries?
//
// Keeping this in a separate file so that we can audit this. If these can be
// expressed using ml-matrix, then we can move the code to f-index.
import {
Matrix,
applyToPoint,
compose,
scale,
translate,
} from "transformation-matrix";
/**
* Detect faces in the given {@link imageBitmap}.
*
* The model used is YOLO, running in an ONNX runtime.
*/
export const transformFaceDetections = (
faces: FaceDetection[],
inBox: Box,
toBox: Box,
): FaceDetection[] => {
const transform = computeTransformToBox(inBox, toBox);
return faces.map((f) => {
const box = transformBox(f.box, transform);
const normLandmarks = f.landmarks;
const landmarks = transformPoints(normLandmarks, transform);
return {
box,
landmarks,
probability: f.probability as number,
} as FaceDetection;
});
};
function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
return compose(
translate(toBox.x, toBox.y),
scale(toBox.width / inBox.width, toBox.height / inBox.height),
);
}
function transformPoint(point: Point, transform: Matrix) {
const txdPoint = applyToPoint(transform, point);
return new Point(txdPoint.x, txdPoint.y);
}
function transformPoints(points: Point[], transform: Matrix) {
return points?.map((p) => transformPoint(p, transform));
}
function transformBox(box: Box, transform: Matrix) {
const topLeft = transformPoint(box.topLeft, transform);
const bottomRight = transformPoint(box.bottomRight, transform);
return boxFromBoundingBox({
left: topLeft.x,
top: topLeft.y,
right: bottomRight.x,
bottom: bottomRight.y,
});
}

View file

@ -1,62 +1,10 @@
import type { ClusterFacesResult } from "services/face/cluster";
import { Dimensions } from "services/face/geom";
import { Box, Dimensions, Point } from "services/face/geom";
import { EnteFile } from "types/file";
import { Box, Point } from "./geom";
export interface MLSyncResult {
nOutOfSyncFiles: number;
nSyncedFiles: number;
nSyncedFaces: number;
nFaceClusters: number;
nFaceNoise: number;
error?: Error;
}
export declare type FaceDescriptor = Float32Array;
export declare type Cluster = Array<number>;
export interface FacesCluster {
faces: Cluster;
summary?: FaceDescriptor;
}
export interface FacesClustersWithNoise {
clusters: Array<FacesCluster>;
noise: Cluster;
}
export interface NearestCluster {
cluster: FacesCluster;
distance: number;
}
export declare type Landmark = Point;
export declare type ImageType = "Original" | "Preview";
export declare type FaceDetectionMethod = "YoloFace";
export declare type FaceCropMethod = "ArcFace";
export declare type FaceAlignmentMethod = "ArcFace";
export declare type FaceEmbeddingMethod = "MobileFaceNet";
export declare type BlurDetectionMethod = "Laplacian";
export declare type ClusteringMethod = "Hdbscan" | "Dbscan";
export class AlignedBox {
box: Box;
rotation: number;
}
export interface Versioned<T> {
value: T;
version: number;
}
export interface FaceDetection {
// box and landmarks is relative to image dimentions stored at mlFileData
box: Box;
@ -124,15 +72,9 @@ export interface Person {
export interface MlFileData {
fileId: number;
faces?: Face[];
imageSource?: ImageType;
imageDimensions?: Dimensions;
faceDetectionMethod?: Versioned<FaceDetectionMethod>;
faceCropMethod?: Versioned<FaceCropMethod>;
faceAlignmentMethod?: Versioned<FaceAlignmentMethod>;
faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
mlVersion: number;
errorCount: number;
lastErrorMessage?: string;
}
export interface MLSearchConfig {
@ -152,10 +94,4 @@ export interface MLSyncFileContext {
newAlignment?: boolean;
}
export interface MLLibraryData {
faceClusteringMethod?: Versioned<ClusteringMethod>;
faceClusteringResults?: ClusterFacesResult;
faceClustersWithNoise?: FacesClustersWithNoise;
}
export declare type MLIndex = "files" | "people";

View file

@ -9,22 +9,18 @@ import { CustomError, parseUploadErrorCodes } from "@ente/shared/error";
import PQueue from "p-queue";
import { putEmbedding } from "services/embeddingService";
import mlIDbStorage, { ML_SEARCH_CONFIG_NAME } from "services/face/db";
import { fetchImageBitmap, getLocalFile } from "services/face/image";
import {
Face,
FaceDetection,
Landmark,
MLLibraryData,
MLSearchConfig,
MLSyncFileContext,
MLSyncResult,
MlFileData,
} from "services/face/types";
import { getLocalFiles } from "services/fileService";
import { EnteFile } from "types/file";
import { isInternalUserForML } from "utils/user";
import { regenerateFaceCrop, syncFileAnalyzeFaces } from "../face/f-index";
import { fetchImageBitmapForContext } from "../face/image";
import { syncPeopleIndex } from "../face/people";
import { indexFaces, saveFaceCrop } from "../face/f-index";
/**
* TODO-ML(MR): What and why.
@ -56,41 +52,16 @@ export async function updateMLSearchConfig(newConfig: MLSearchConfig) {
return mlIDbStorage.putConfig(ML_SEARCH_CONFIG_NAME, newConfig);
}
export interface MLSyncContext {
token: string;
userID: number;
localFilesMap: Map<number, EnteFile>;
outOfSyncFiles: EnteFile[];
nSyncedFiles: number;
nSyncedFaces: number;
allSyncedFacesMap?: Map<number, Array<Face>>;
error?: Error;
// oldMLLibraryData: MLLibraryData;
mlLibraryData: MLLibraryData;
syncQueue: PQueue;
getEnteWorker(id: number): Promise<any>;
dispose(): Promise<void>;
}
export class LocalMLSyncContext implements MLSyncContext {
class MLSyncContext {
public token: string;
public userID: number;
public localFilesMap: Map<number, EnteFile>;
public outOfSyncFiles: EnteFile[];
public nSyncedFiles: number;
public nSyncedFaces: number;
public allSyncedFacesMap?: Map<number, Array<Face>>;
public error?: Error;
public mlLibraryData: MLLibraryData;
public syncQueue: PQueue;
// TODO: wheather to limit concurrent downloads
// private downloadQueue: PQueue;
@ -107,7 +78,6 @@ export class LocalMLSyncContext implements MLSyncContext {
this.outOfSyncFiles = [];
this.nSyncedFiles = 0;
this.nSyncedFaces = 0;
this.concurrency = concurrency ?? getConcurrency();
@ -151,7 +121,7 @@ class MachineLearningService {
private localSyncContext: Promise<MLSyncContext>;
private syncContext: Promise<MLSyncContext>;
public async sync(token: string, userID: number): Promise<MLSyncResult> {
public async sync(token: string, userID: number): Promise<boolean> {
if (!token) {
throw Error("Token needed by ml service to sync file");
}
@ -166,34 +136,9 @@ class MachineLearningService {
await this.syncFiles(syncContext);
}
// TODO-ML(MR): Forced disable clustering. It doesn't currently work,
// need to finalize it before we move out of beta.
//
// > Error: Failed to execute 'transferToImageBitmap' on
// > 'OffscreenCanvas': ImageBitmap construction failed
/*
if (
syncContext.outOfSyncFiles.length <= 0 ||
(syncContext.nSyncedFiles === batchSize && Math.random() < 0)
) {
await this.syncIndex(syncContext);
}
*/
const mlSyncResult: MLSyncResult = {
nOutOfSyncFiles: syncContext.outOfSyncFiles.length,
nSyncedFiles: syncContext.nSyncedFiles,
nSyncedFaces: syncContext.nSyncedFaces,
nFaceClusters:
syncContext.mlLibraryData?.faceClusteringResults?.clusters
.length,
nFaceNoise:
syncContext.mlLibraryData?.faceClusteringResults?.noise.length,
error: syncContext.error,
};
// log.info('[MLService] sync results: ', mlSyncResult);
return mlSyncResult;
const error = syncContext.error;
const nOutOfSyncFiles = syncContext.outOfSyncFiles.length;
return !error && nOutOfSyncFiles > 0;
}
public async regenerateFaceCrop(faceID: string) {
@ -309,7 +254,6 @@ class MachineLearningService {
syncContext.error = error;
}
await syncContext.syncQueue.onIdle();
log.info("allFaces: ", syncContext.nSyncedFaces);
// TODO: In case syncJob has to use multiple ml workers
// do in same transaction with each file update
@ -324,7 +268,7 @@ class MachineLearningService {
// TODO-ML(MR): Keep as promise for now.
this.syncContext = new Promise((resolve) => {
resolve(new LocalMLSyncContext(token, userID));
resolve(new MLSyncContext(token, userID));
});
} else {
log.info("reusing existing syncContext");
@ -338,7 +282,7 @@ class MachineLearningService {
log.info("Creating localSyncContext");
// TODO-ML(MR):
this.localSyncContext = new Promise((resolve) => {
resolve(new LocalMLSyncContext(token, userID));
resolve(new MLSyncContext(token, userID));
});
} else {
log.info("reusing existing localSyncContext");
@ -389,7 +333,6 @@ class MachineLearningService {
`Indexing ${enteFile.title ?? "<untitled>"} ${enteFile.id}`,
);
const mlFileData = await this.syncFile(enteFile, localFile);
syncContext.nSyncedFaces += mlFileData.faces?.length || 0;
syncContext.nSyncedFiles += 1;
return mlFileData;
} catch (e) {
@ -422,31 +365,14 @@ class MachineLearningService {
}
private async syncFile(enteFile: EnteFile, localFile?: globalThis.File) {
log.debug(() => ({ a: "Syncing file", enteFile }));
const fileContext: MLSyncFileContext = { enteFile, localFile };
const oldMlFile = await this.getMLFileData(enteFile.id);
if (oldMlFile && oldMlFile.mlVersion) {
return oldMlFile;
}
const newMlFile = (fileContext.newMlFile = this.newMlData(enteFile.id));
newMlFile.mlVersion = defaultMLVersion;
try {
await fetchImageBitmapForContext(fileContext);
await syncFileAnalyzeFaces(fileContext);
newMlFile.errorCount = 0;
newMlFile.lastErrorMessage = undefined;
await this.persistOnServer(newMlFile, enteFile);
await mlIDbStorage.putFile(newMlFile);
} catch (e) {
log.error("ml detection failed", e);
newMlFile.mlVersion = oldMlFile.mlVersion;
throw e;
} finally {
fileContext.imageBitmap && fileContext.imageBitmap.close();
}
const newMlFile = await indexFaces(enteFile, localFile);
await this.persistOnServer(newMlFile, enteFile);
await mlIDbStorage.putFile(newMlFile);
return newMlFile;
}
@ -484,7 +410,7 @@ class MachineLearningService {
mlFileData = this.newMlData(enteFile.id);
}
mlFileData.errorCount = (mlFileData.errorCount || 0) + 1;
mlFileData.lastErrorMessage = e.message;
console.error(`lastError for ${enteFile.id}`, e);
return mlFileData;
});
@ -493,26 +419,6 @@ class MachineLearningService {
console.error("Error while storing ml sync error", e);
}
}
private async getMLLibraryData(syncContext: MLSyncContext) {
syncContext.mlLibraryData = await mlIDbStorage.getLibraryData();
if (!syncContext.mlLibraryData) {
syncContext.mlLibraryData = {};
}
}
private async persistMLLibraryData(syncContext: MLSyncContext) {
return mlIDbStorage.putLibraryData(syncContext.mlLibraryData);
}
public async syncIndex(syncContext: MLSyncContext) {
await this.getMLLibraryData(syncContext);
// TODO-ML(MR): Ensure this doesn't run until fixed.
await syncPeopleIndex(syncContext);
await this.persistMLLibraryData(syncContext);
}
}
export default new MachineLearningService();
@ -543,19 +449,14 @@ class ServerFileMl {
class ServerFaceEmbeddings {
public faces: ServerFace[];
public version: number;
/* TODO
public client?: string;
public error?: boolean;
*/
public constructor(
faces: ServerFace[],
version: number,
client?: string,
error?: boolean,
) {
public constructor(faces: ServerFace[], version: number) {
this.faces = faces;
this.version = version;
this.client = client;
this.error = error;
}
}
@ -613,10 +514,7 @@ class ServerFaceBox {
function LocalFileMlDataToServerFileMl(
localFileMlData: MlFileData,
): ServerFileMl {
if (
localFileMlData.errorCount > 0 &&
localFileMlData.lastErrorMessage !== undefined
) {
if (localFileMlData.errorCount > 0) {
return null;
}
const imageDimensions = localFileMlData.imageDimensions;
@ -640,6 +538,7 @@ function LocalFileMlDataToServerFileMl(
} as Landmark);
}
// TODO: Add client UA and version
const newFaceObject = new ServerFace(
faceID,
Array.from(embedding),
@ -649,11 +548,7 @@ function LocalFileMlDataToServerFileMl(
);
faces.push(newFaceObject);
}
const faceEmbeddings = new ServerFaceEmbeddings(
faces,
1,
localFileMlData.lastErrorMessage,
);
const faceEmbeddings = new ServerFaceEmbeddings(faces, 1);
return new ServerFileMl(
localFileMlData.fileId,
faceEmbeddings,
@ -673,3 +568,15 @@ export function logQueueStats(queue: PQueue, name: string) {
console.error(`queuestats: ${name}: Error, `, error),
);
}
export const regenerateFaceCrop = async (faceID: string) => {
const fileID = Number(faceID.split("-")[0]);
const personFace = await mlIDbStorage.getFace(fileID, faceID);
if (!personFace) {
throw Error("Face not found");
}
const file = await getLocalFile(personFace.fileId);
const imageBitmap = await fetchImageBitmap(file);
return await saveFaceCrop(imageBitmap, personFace);
};

View file

@ -8,25 +8,19 @@ import PQueue from "p-queue";
import { createFaceComlinkWorker } from "services/face";
import mlIDbStorage from "services/face/db";
import type { DedicatedMLWorker } from "services/face/face.worker";
import { MLSyncResult } from "services/face/types";
import { EnteFile } from "types/file";
import { logQueueStats } from "./machineLearningService";
export type JobState = "Scheduled" | "Running" | "NotScheduled";
export interface MLSyncJobResult {
shouldBackoff: boolean;
mlSyncResult: MLSyncResult;
}
export class MLSyncJob {
private runCallback: () => Promise<MLSyncJobResult>;
private runCallback: () => Promise<boolean>;
private state: JobState;
private stopped: boolean;
private intervalSec: number;
private nextTimeoutId: ReturnType<typeof setTimeout>;
constructor(runCallback: () => Promise<MLSyncJobResult>) {
constructor(runCallback: () => Promise<boolean>) {
this.runCallback = runCallback;
this.state = "NotScheduled";
this.stopped = true;
@ -65,13 +59,11 @@ export class MLSyncJob {
this.state = "Running";
try {
const jobResult = await this.runCallback();
if (jobResult && jobResult.shouldBackoff) {
this.intervalSec = Math.min(960, this.intervalSec * 2);
} else {
if (await this.runCallback()) {
this.resetInterval();
} else {
this.intervalSec = Math.min(960, this.intervalSec * 2);
}
log.info("Job completed");
} catch (e) {
console.error("Error while running Job: ", e);
} finally {
@ -255,7 +247,14 @@ class MLWorkManager {
this.syncJobWorker = undefined;
}
private async runMLSyncJob(): Promise<MLSyncJobResult> {
/**
* Returns `false` to indicate that either an error occurred, or there are
* not more files to process, or that we cannot currently process files.
*
* Which means that when it returns true, all is well and there are more
* things pending to process, so we should chug along at full speed.
*/
private async runMLSyncJob(): Promise<boolean> {
try {
// TODO: skipping is not required if we are caching chunks through service worker
// currently worker chunk itself is not loaded when network is not there
@ -263,29 +262,16 @@ class MLWorkManager {
log.info(
"Skipping ml-sync job run as not connected to internet.",
);
return {
shouldBackoff: true,
mlSyncResult: undefined,
};
return false;
}
const token = getToken();
const userID = getUserID();
const jobWorkerProxy = await this.getSyncJobWorker();
const mlSyncResult = await jobWorkerProxy.sync(token, userID);
return await jobWorkerProxy.sync(token, userID);
// this.terminateSyncJobWorker();
const jobResult: MLSyncJobResult = {
shouldBackoff:
!!mlSyncResult.error || mlSyncResult.nOutOfSyncFiles < 1,
mlSyncResult,
};
log.info("ML Sync Job result: ", JSON.stringify(jobResult));
// TODO: redirect/refresh to gallery in case of session_expired, stop ml sync job
return jobResult;
} catch (e) {
log.error("Failed to run MLSync Job", e);
}

View file

@ -1,468 +0,0 @@
// these utils only work in env where OffscreenCanvas is available
import { Matrix, inverse } from "ml-matrix";
import { Box, Dimensions, enlargeBox } from "services/face/geom";
import { FaceAlignment } from "services/face/types";
export function normalizePixelBetween0And1(pixelValue: number) {
return pixelValue / 255.0;
}
export function normalizePixelBetweenMinus1And1(pixelValue: number) {
return pixelValue / 127.5 - 1.0;
}
export function unnormalizePixelFromBetweenMinus1And1(pixelValue: number) {
return clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
}
export function readPixelColor(
imageData: Uint8ClampedArray,
width: number,
height: number,
x: number,
y: number,
) {
if (x < 0 || x >= width || y < 0 || y >= height) {
return { r: 0, g: 0, b: 0, a: 0 };
}
const index = (y * width + x) * 4;
return {
r: imageData[index],
g: imageData[index + 1],
b: imageData[index + 2],
a: imageData[index + 3],
};
}
export function clamp(value: number, min: number, max: number) {
return Math.min(max, Math.max(min, value));
}
export function getPixelBicubic(
fx: number,
fy: number,
imageData: Uint8ClampedArray,
imageWidth: number,
imageHeight: number,
) {
// Clamp to image boundaries
fx = clamp(fx, 0, imageWidth - 1);
fy = clamp(fy, 0, imageHeight - 1);
const x = Math.trunc(fx) - (fx >= 0.0 ? 0 : 1);
const px = x - 1;
const nx = x + 1;
const ax = x + 2;
const y = Math.trunc(fy) - (fy >= 0.0 ? 0 : 1);
const py = y - 1;
const ny = y + 1;
const ay = y + 2;
const dx = fx - x;
const dy = fy - y;
function cubic(
dx: number,
ipp: number,
icp: number,
inp: number,
iap: number,
) {
return (
icp +
0.5 *
(dx * (-ipp + inp) +
dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap))
);
}
const icc = readPixelColor(imageData, imageWidth, imageHeight, x, y);
const ipp =
px < 0 || py < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, py);
const icp =
px < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, x, py);
const inp =
py < 0 || nx >= imageWidth
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, py);
const iap =
ax >= imageWidth || py < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, py);
const ip0 = cubic(dx, ipp.r, icp.r, inp.r, iap.r);
const ip1 = cubic(dx, ipp.g, icp.g, inp.g, iap.g);
const ip2 = cubic(dx, ipp.b, icp.b, inp.b, iap.b);
// const ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);
const ipc =
px < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, y);
const inc =
nx >= imageWidth
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, y);
const iac =
ax >= imageWidth
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, y);
const ic0 = cubic(dx, ipc.r, icc.r, inc.r, iac.r);
const ic1 = cubic(dx, ipc.g, icc.g, inc.g, iac.g);
const ic2 = cubic(dx, ipc.b, icc.b, inc.b, iac.b);
// const ic3 = cubic(dx, ipc.a, icc.a, inc.a, iac.a);
const ipn =
px < 0 || ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, ny);
const icn =
ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, x, ny);
const inn =
nx >= imageWidth || ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, ny);
const ian =
ax >= imageWidth || ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, ny);
const in0 = cubic(dx, ipn.r, icn.r, inn.r, ian.r);
const in1 = cubic(dx, ipn.g, icn.g, inn.g, ian.g);
const in2 = cubic(dx, ipn.b, icn.b, inn.b, ian.b);
// const in3 = cubic(dx, ipn.a, icn.a, inn.a, ian.a);
const ipa =
px < 0 || ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, ay);
const ica =
ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, x, ay);
const ina =
nx >= imageWidth || ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, ay);
const iaa =
ax >= imageWidth || ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, ay);
const ia0 = cubic(dx, ipa.r, ica.r, ina.r, iaa.r);
const ia1 = cubic(dx, ipa.g, ica.g, ina.g, iaa.g);
const ia2 = cubic(dx, ipa.b, ica.b, ina.b, iaa.b);
// const ia3 = cubic(dx, ipa.a, ica.a, ina.a, iaa.a);
const c0 = Math.trunc(clamp(cubic(dy, ip0, ic0, in0, ia0), 0, 255));
const c1 = Math.trunc(clamp(cubic(dy, ip1, ic1, in1, ia1), 0, 255));
const c2 = Math.trunc(clamp(cubic(dy, ip2, ic2, in2, ia2), 0, 255));
// const c3 = cubic(dy, ip3, ic3, in3, ia3);
return { r: c0, g: c1, b: c2 };
}
/// Returns the pixel value (RGB) at the given coordinates using bilinear interpolation.
export function getPixelBilinear(
fx: number,
fy: number,
imageData: Uint8ClampedArray,
imageWidth: number,
imageHeight: number,
) {
// Clamp to image boundaries
fx = clamp(fx, 0, imageWidth - 1);
fy = clamp(fy, 0, imageHeight - 1);
// Get the surrounding coordinates and their weights
const x0 = Math.floor(fx);
const x1 = Math.ceil(fx);
const y0 = Math.floor(fy);
const y1 = Math.ceil(fy);
const dx = fx - x0;
const dy = fy - y0;
const dx1 = 1.0 - dx;
const dy1 = 1.0 - dy;
// Get the original pixels
const pixel1 = readPixelColor(imageData, imageWidth, imageHeight, x0, y0);
const pixel2 = readPixelColor(imageData, imageWidth, imageHeight, x1, y0);
const pixel3 = readPixelColor(imageData, imageWidth, imageHeight, x0, y1);
const pixel4 = readPixelColor(imageData, imageWidth, imageHeight, x1, y1);
function bilinear(val1: number, val2: number, val3: number, val4: number) {
return Math.round(
val1 * dx1 * dy1 +
val2 * dx * dy1 +
val3 * dx1 * dy +
val4 * dx * dy,
);
}
// Interpolate the pixel values
const red = bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r);
const green = bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g);
const blue = bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b);
return { r: red, g: green, b: blue };
}
export function warpAffineFloat32List(
imageBitmap: ImageBitmap,
faceAlignment: FaceAlignment,
faceSize: number,
inputData: Float32Array,
inputStartIndex: number,
): void {
// Get the pixel data
const offscreenCanvas = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
const ctx = offscreenCanvas.getContext("2d");
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
const imageData = ctx.getImageData(
0,
0,
imageBitmap.width,
imageBitmap.height,
);
const pixelData = imageData.data;
const transformationMatrix = faceAlignment.affineMatrix.map((row) =>
row.map((val) => (val != 1.0 ? val * faceSize : 1.0)),
); // 3x3
const A: Matrix = new Matrix([
[transformationMatrix[0][0], transformationMatrix[0][1]],
[transformationMatrix[1][0], transformationMatrix[1][1]],
]);
const Ainverse = inverse(A);
const b00 = transformationMatrix[0][2];
const b10 = transformationMatrix[1][2];
const a00Prime = Ainverse.get(0, 0);
const a01Prime = Ainverse.get(0, 1);
const a10Prime = Ainverse.get(1, 0);
const a11Prime = Ainverse.get(1, 1);
for (let yTrans = 0; yTrans < faceSize; ++yTrans) {
for (let xTrans = 0; xTrans < faceSize; ++xTrans) {
// Perform inverse affine transformation
const xOrigin =
a00Prime * (xTrans - b00) + a01Prime * (yTrans - b10);
const yOrigin =
a10Prime * (xTrans - b00) + a11Prime * (yTrans - b10);
// Get the pixel from interpolation
const pixel = getPixelBicubic(
xOrigin,
yOrigin,
pixelData,
imageBitmap.width,
imageBitmap.height,
);
// Set the pixel in the input data
const index = (yTrans * faceSize + xTrans) * 3;
inputData[inputStartIndex + index] =
normalizePixelBetweenMinus1And1(pixel.r);
inputData[inputStartIndex + index + 1] =
normalizePixelBetweenMinus1And1(pixel.g);
inputData[inputStartIndex + index + 2] =
normalizePixelBetweenMinus1And1(pixel.b);
}
}
}
export function createGrayscaleIntMatrixFromNormalized2List(
imageList: Float32Array,
faceNumber: number,
width: number = 112,
height: number = 112,
): number[][] {
const startIndex = faceNumber * width * height * 3;
return Array.from({ length: height }, (_, y) =>
Array.from({ length: width }, (_, x) => {
// 0.299 ∙ Red + 0.587 ∙ Green + 0.114 ∙ Blue
const pixelIndex = startIndex + 3 * (y * width + x);
return clamp(
Math.round(
0.299 *
unnormalizePixelFromBetweenMinus1And1(
imageList[pixelIndex],
) +
0.587 *
unnormalizePixelFromBetweenMinus1And1(
imageList[pixelIndex + 1],
) +
0.114 *
unnormalizePixelFromBetweenMinus1And1(
imageList[pixelIndex + 2],
),
),
0,
255,
);
}),
);
}
export function resizeToSquare(img: ImageBitmap, size: number) {
const scale = size / Math.max(img.height, img.width);
const width = scale * img.width;
const height = scale * img.height;
const offscreen = new OffscreenCanvas(size, size);
const ctx = offscreen.getContext("2d");
ctx.imageSmoothingQuality = "high";
ctx.drawImage(img, 0, 0, width, height);
const resizedImage = offscreen.transferToImageBitmap();
return { image: resizedImage, width, height };
}
export function transform(
imageBitmap: ImageBitmap,
affineMat: number[][],
outputWidth: number,
outputHeight: number,
) {
const offscreen = new OffscreenCanvas(outputWidth, outputHeight);
const context = offscreen.getContext("2d");
context.imageSmoothingQuality = "high";
context.transform(
affineMat[0][0],
affineMat[1][0],
affineMat[0][1],
affineMat[1][1],
affineMat[0][2],
affineMat[1][2],
);
context.drawImage(imageBitmap, 0, 0);
return offscreen.transferToImageBitmap();
}
export function crop(imageBitmap: ImageBitmap, cropBox: Box, size: number) {
const dimensions: Dimensions = {
width: size,
height: size,
};
return cropWithRotation(imageBitmap, cropBox, 0, dimensions, dimensions);
}
export function cropWithRotation(
imageBitmap: ImageBitmap,
cropBox: Box,
rotation?: number,
maxSize?: Dimensions,
minSize?: Dimensions,
) {
const box = cropBox.round();
const outputSize = { width: box.width, height: box.height };
if (maxSize) {
const minScale = Math.min(
maxSize.width / box.width,
maxSize.height / box.height,
);
if (minScale < 1) {
outputSize.width = Math.round(minScale * box.width);
outputSize.height = Math.round(minScale * box.height);
}
}
if (minSize) {
const maxScale = Math.max(
minSize.width / box.width,
minSize.height / box.height,
);
if (maxScale > 1) {
outputSize.width = Math.round(maxScale * box.width);
outputSize.height = Math.round(maxScale * box.height);
}
}
// log.info({ imageBitmap, box, outputSize });
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
const offscreenCtx = offscreen.getContext("2d");
offscreenCtx.imageSmoothingQuality = "high";
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
rotation && offscreenCtx.rotate(rotation);
const outputBox = new Box({
x: -outputSize.width / 2,
y: -outputSize.height / 2,
width: outputSize.width,
height: outputSize.height,
});
const enlargedBox = enlargeBox(box, 1.5);
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
offscreenCtx.drawImage(
imageBitmap,
enlargedBox.x,
enlargedBox.y,
enlargedBox.width,
enlargedBox.height,
enlargedOutputBox.x,
enlargedOutputBox.y,
enlargedOutputBox.width,
enlargedOutputBox.height,
);
return offscreen.transferToImageBitmap();
}
export function addPadding(image: ImageBitmap, padding: number) {
const scale = 1 + padding * 2;
const width = scale * image.width;
const height = scale * image.height;
const offscreen = new OffscreenCanvas(width, height);
const ctx = offscreen.getContext("2d");
ctx.imageSmoothingEnabled = false;
ctx.drawImage(
image,
width / 2 - image.width / 2,
height / 2 - image.height / 2,
image.width,
image.height,
);
return offscreen.transferToImageBitmap();
}
export interface BlobOptions {
type?: string;
quality?: number;
}
export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
const offscreen = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
return offscreen.convertToBlob({
type: "image/jpeg",
quality: 0.8,
});
}
export async function imageBitmapFromBlob(blob: Blob) {
return createImageBitmap(blob);
}