[web] ML cleanup - Part 3/x (#1737)

This commit is contained in:
Manav Rathi 2024-05-16 15:11:10 +05:30 committed by GitHub
commit a0cb8b850e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
41 changed files with 1236 additions and 2063 deletions

View file

@ -46,7 +46,7 @@ import {
clipImageEmbedding,
clipTextEmbeddingIfAvailable,
} from "./services/ml-clip";
import { detectFaces, faceEmbedding } from "./services/ml-face";
import { detectFaces, faceEmbeddings } from "./services/ml-face";
import { encryptionKey, saveEncryptionKey } from "./services/store";
import {
clearPendingUploads,
@ -182,8 +182,8 @@ export const attachIPCHandlers = () => {
detectFaces(input),
);
ipcMain.handle("faceEmbedding", (_, input: Float32Array) =>
faceEmbedding(input),
ipcMain.handle("faceEmbeddings", (_, input: Float32Array) =>
faceEmbeddings(input),
);
ipcMain.handle("legacyFaceCrop", (_, faceID: string) =>

View file

@ -32,7 +32,7 @@ const cachedFaceEmbeddingSession = makeCachedInferenceSession(
5286998 /* 5 MB */,
);
export const faceEmbedding = async (input: Float32Array) => {
export const faceEmbeddings = async (input: Float32Array) => {
// Dimension of each face (alias)
const mobileFaceNetFaceSize = 112;
// Smaller alias

View file

@ -162,8 +162,8 @@ const clipTextEmbeddingIfAvailable = (text: string) =>
const detectFaces = (input: Float32Array) =>
ipcRenderer.invoke("detectFaces", input);
const faceEmbedding = (input: Float32Array) =>
ipcRenderer.invoke("faceEmbedding", input);
const faceEmbeddings = (input: Float32Array) =>
ipcRenderer.invoke("faceEmbeddings", input);
const legacyFaceCrop = (faceID: string) =>
ipcRenderer.invoke("legacyFaceCrop", faceID);
@ -343,7 +343,7 @@ contextBridge.exposeInMainWorld("electron", {
clipImageEmbedding,
clipTextEmbeddingIfAvailable,
detectFaces,
faceEmbedding,
faceEmbeddings,
legacyFaceCrop,
// - Watch

View file

@ -16,7 +16,6 @@
"chrono-node": "^2.2.6",
"date-fns": "^2",
"debounce": "^2.0.0",
"density-clustering": "^1.3.0",
"eventemitter3": "^4.0.7",
"exifr": "^7.1.3",
"fast-srp-hap": "^2.0.4",

View file

@ -5,7 +5,7 @@ import { t } from "i18next";
import { AppContext } from "pages/_app";
import { useContext } from "react";
import { components } from "react-select";
import { IndexStatus } from "services/ml/db";
import { IndexStatus } from "services/face/db";
import { Suggestion, SuggestionType } from "types/search";
const { Menu } = components;

View file

@ -9,8 +9,8 @@ import { useCallback, useContext, useEffect, useRef, useState } from "react";
import { components } from "react-select";
import AsyncSelect from "react-select/async";
import { InputActionMeta } from "react-select/src/types";
import { Person } from "services/face/types";
import { City } from "services/locationSearchService";
import { Person } from "services/ml/types";
import {
getAutoCompleteSuggestions,
getDefaultOptions,

View file

@ -3,8 +3,8 @@ import { Skeleton, styled } from "@mui/material";
import { Legend } from "components/PhotoViewer/styledComponents/Legend";
import { t } from "i18next";
import React, { useEffect, useState } from "react";
import mlIDbStorage from "services/ml/db";
import { Face, Person, type MlFileData } from "services/ml/types";
import mlIDbStorage from "services/face/db";
import { Face, Person, type MlFileData } from "services/face/types";
import { EnteFile } from "types/file";
const FaceChipContainer = styled("div")`
@ -167,10 +167,7 @@ const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({
.legacyFaceCrop(faceID)
/*
cachedOrNew("face-crops", cacheKey, async () => {
const user = await ensureLocalUser();
return machineLearningService.regenerateFaceCrop(
user.token,
user.id,
faceId,
);
})*/

View file

@ -102,6 +102,10 @@ export const syncCLIPEmbeddings = async () => {
if (!response.diff?.length) {
return;
}
// Note: in rare cases we might get a diff entry for an embedding
// corresponding to a file which has been deleted (but whose
// embedding is enqueued for deletion). Client should expect such a
// scenario (all it has to do is just ignore them).
const newEmbeddings = await Promise.all(
response.diff.map(async (embedding) => {
try {

View file

@ -1,31 +1,8 @@
import { Matrix } from "ml-matrix";
import { Point } from "services/ml/geom";
import {
FaceAlignment,
FaceAlignmentMethod,
FaceAlignmentService,
FaceDetection,
Versioned,
} from "services/ml/types";
import { Point } from "services/face/geom";
import { FaceAlignment, FaceDetection } from "services/face/types";
import { getSimilarityTransformation } from "similarity-transformation";
class ArcfaceAlignmentService implements FaceAlignmentService {
public method: Versioned<FaceAlignmentMethod>;
constructor() {
this.method = {
value: "ArcFace",
version: 1,
};
}
public getFaceAlignment(faceDetection: FaceDetection): FaceAlignment {
return getArcfaceAlignment(faceDetection);
}
}
export default new ArcfaceAlignmentService();
const ARCFACE_LANDMARKS = [
[38.2946, 51.6963],
[73.5318, 51.5014],
@ -43,9 +20,12 @@ const ARC_FACE_5_LANDMARKS = [
[70.7299, 92.2041],
] as Array<[number, number]>;
export function getArcfaceAlignment(
faceDetection: FaceDetection,
): FaceAlignment {
/**
* Compute and return an {@link FaceAlignment} for the given face detection.
*
* @param faceDetection A geometry indicating a face detected in an image.
*/
export const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
const landmarkCount = faceDetection.landmarks.length;
return getFaceAlignmentUsingSimilarityTransform(
faceDetection,
@ -54,12 +34,11 @@ export function getArcfaceAlignment(
ARCFACE_LANDMARKS_FACE_SIZE,
),
);
}
};
function getFaceAlignmentUsingSimilarityTransform(
faceDetection: FaceDetection,
alignedLandmarks: Array<[number, number]>,
// alignmentMethod: Versioned<FaceAlignmentMethod>
): FaceAlignment {
const landmarksMat = new Matrix(
faceDetection.landmarks
@ -90,7 +69,6 @@ function getFaceAlignmentUsingSimilarityTransform(
simTransform.rotation.get(0, 1),
simTransform.rotation.get(0, 0),
);
// log.info({ affineMatrix, meanTranslation, centerMat, center, toMean: simTransform.toMean, fromMean: simTransform.fromMean, size });
return {
affineMatrix,

View file

@ -0,0 +1,187 @@
import { Face } from "services/face/types";
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
import { mobileFaceNetFaceSize } from "./embed";
/**
* Laplacian blur detection.
*/
export const detectBlur = (
alignedFaces: Float32Array,
faces: Face[],
): number[] => {
const numFaces = Math.round(
alignedFaces.length /
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
);
const blurValues: number[] = [];
for (let i = 0; i < numFaces; i++) {
const face = faces[i];
const direction = faceDirection(face);
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
alignedFaces,
i,
);
const laplacian = applyLaplacian(faceImage, direction);
blurValues.push(matrixVariance(laplacian));
}
return blurValues;
};
type FaceDirection = "left" | "right" | "straight";
const faceDirection = (face: Face): FaceDirection => {
const landmarks = face.detection.landmarks;
const leftEye = landmarks[0];
const rightEye = landmarks[1];
const nose = landmarks[2];
const leftMouth = landmarks[3];
const rightMouth = landmarks[4];
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
const faceIsUpright =
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
const noseStickingOutLeft =
nose.x < Math.min(leftEye.x, rightEye.x) &&
nose.x < Math.min(leftMouth.x, rightMouth.x);
const noseStickingOutRight =
nose.x > Math.max(leftEye.x, rightEye.x) &&
nose.x > Math.max(leftMouth.x, rightMouth.x);
const noseCloseToLeftEye =
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
const noseCloseToRightEye =
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
return "left";
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
return "right";
}
return "straight";
};
/**
* Return a new image by applying a Laplacian blur kernel to each pixel.
*/
const applyLaplacian = (
image: number[][],
direction: FaceDirection,
): number[][] => {
const paddedImage: number[][] = padImage(image, direction);
const numRows = paddedImage.length - 2;
const numCols = paddedImage[0].length - 2;
// Create an output image initialized to 0.
const outputImage: number[][] = Array.from({ length: numRows }, () =>
new Array(numCols).fill(0),
);
// Define the Laplacian kernel.
const kernel: number[][] = [
[0, 1, 0],
[1, -4, 1],
[0, 1, 0],
];
// Apply the kernel to each pixel
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < numCols; j++) {
let sum = 0;
for (let ki = 0; ki < 3; ki++) {
for (let kj = 0; kj < 3; kj++) {
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
}
}
// Adjust the output value if necessary (e.g., clipping).
outputImage[i][j] = sum;
}
}
return outputImage;
};
const padImage = (image: number[][], direction: FaceDirection): number[][] => {
const removeSideColumns = 56; /* must be even */
const numRows = image.length;
const numCols = image[0].length;
const paddedNumCols = numCols + 2 - removeSideColumns;
const paddedNumRows = numRows + 2;
// Create a new matrix with extra padding.
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
new Array(paddedNumCols).fill(0),
);
if (direction === "straight") {
// Copy original image into the center of the padded image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] =
image[i][j + Math.round(removeSideColumns / 2)];
}
}
} else if (direction === "left") {
// If the face is facing left, we only take the right side of the face image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
}
}
} else if (direction === "right") {
// If the face is facing right, we only take the left side of the face image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j];
}
}
}
// Reflect padding
// Top and bottom rows
for (let j = 1; j <= paddedNumCols - 2; j++) {
paddedImage[0][j] = paddedImage[2][j]; // Top row
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
}
// Left and right columns
for (let i = 0; i < numRows + 2; i++) {
paddedImage[i][0] = paddedImage[i][2]; // Left column
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
}
return paddedImage;
};
const matrixVariance = (matrix: number[][]): number => {
const numRows = matrix.length;
const numCols = matrix[0].length;
const totalElements = numRows * numCols;
// Calculate the mean.
let mean: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
mean += value;
});
});
mean /= totalElements;
// Calculate the variance.
let variance: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
const diff: number = value - mean;
variance += diff * diff;
});
});
variance /= totalElements;
return variance;
};

View file

@ -0,0 +1,34 @@
import { Hdbscan, type DebugInfo } from "hdbscan";
import { type Cluster } from "services/face/types";
export interface ClusterFacesResult {
clusters: Array<Cluster>;
noise: Cluster;
debugInfo?: DebugInfo;
}
/**
* Cluster the given {@link faceEmbeddings}.
*
* @param faceEmbeddings An array of embeddings produced by our face indexing
* pipeline. Each embedding is for a face detected in an image (a single image
* may have multiple faces detected within it).
*/
export const clusterFaces = async (
faceEmbeddings: Array<Array<number>>,
): Promise<ClusterFacesResult> => {
const hdbscan = new Hdbscan({
input: faceEmbeddings,
minClusterSize: 3,
minSamples: 5,
clusterSelectionEpsilon: 0.6,
clusterSelectionMethod: "leaf",
debug: true,
});
return {
clusters: hdbscan.getClusters(),
noise: hdbscan.getNoise(),
debugInfo: hdbscan.getDebugInfo(),
};
};

View file

@ -0,0 +1,32 @@
import { Box, enlargeBox } from "services/face/geom";
import { FaceCrop, FaceDetection } from "services/face/types";
import { cropWithRotation } from "utils/image";
import { faceAlignment } from "./align";
export const getFaceCrop = (
imageBitmap: ImageBitmap,
faceDetection: FaceDetection,
): FaceCrop => {
const alignment = faceAlignment(faceDetection);
const padding = 0.25;
const maxSize = 256;
const alignmentBox = new Box({
x: alignment.center.x - alignment.size / 2,
y: alignment.center.y - alignment.size / 2,
width: alignment.size,
height: alignment.size,
}).round();
const scaleForPadding = 1 + padding * 2;
const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
width: maxSize,
height: maxSize,
});
return {
image: faceImageBitmap,
imageBox: paddedBox,
};
};

View file

@ -9,12 +9,11 @@ import {
openDB,
} from "idb";
import isElectron from "is-electron";
import { Face, MLLibraryData, MlFileData, Person } from "services/face/types";
import {
DEFAULT_ML_SEARCH_CONFIG,
DEFAULT_ML_SYNC_CONFIG,
MAX_ML_SYNC_ERROR_COUNT,
} from "services/machineLearning/machineLearningService";
import { Face, MLLibraryData, MlFileData, Person } from "services/ml/types";
export interface IndexStatus {
outOfSyncFilesExists: boolean;
@ -26,7 +25,6 @@ export interface IndexStatus {
interface Config {}
export const ML_SYNC_CONFIG_NAME = "ml-sync";
export const ML_SEARCH_CONFIG_NAME = "ml-search";
const MLDATA_DB_NAME = "mldata";
@ -141,10 +139,11 @@ class MLIDbStorage {
DEFAULT_ML_SYNC_JOB_CONFIG,
"ml-sync-job",
);
*/
await tx
.objectStore("configs")
.add(DEFAULT_ML_SYNC_CONFIG, ML_SYNC_CONFIG_NAME);
*/
}
if (oldVersion < 3) {
await tx
@ -163,6 +162,10 @@ class MLIDbStorage {
.objectStore("configs")
.delete(ML_SEARCH_CONFIG_NAME);
await tx
.objectStore("configs")
.delete(""ml-sync"");
await tx
.objectStore("configs")
.delete("ml-sync-job");

View file

@ -0,0 +1,316 @@
import { workerBridge } from "@/next/worker/worker-bridge";
import { euclidean } from "hdbscan";
import {
Box,
Dimensions,
Point,
boxFromBoundingBox,
newBox,
} from "services/face/geom";
import { FaceDetection } from "services/face/types";
import {
Matrix,
applyToPoint,
compose,
scale,
translate,
} from "transformation-matrix";
import {
clamp,
getPixelBilinear,
normalizePixelBetween0And1,
} from "utils/image";
/**
* Detect faces in the given {@link imageBitmap}.
*
* The model used is YOLO, running in an ONNX runtime.
*/
export const detectFaces = async (
imageBitmap: ImageBitmap,
): Promise<Array<FaceDetection>> => {
const maxFaceDistancePercent = Math.sqrt(2) / 100;
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
imageBitmap,
640,
640,
);
const data = preprocessResult.data;
const resized = preprocessResult.newSize;
const outputData = await workerBridge.detectFaces(data);
const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
const inBox = newBox(0, 0, resized.width, resized.height);
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
const transform = computeTransformToBox(inBox, toBox);
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
const box = transformBox(f.box, transform);
const normLandmarks = f.landmarks;
const landmarks = transformPoints(normLandmarks, transform);
return {
box,
landmarks,
probability: f.probability as number,
} as FaceDetection;
});
return removeDuplicateDetections(faceDetections, maxFaceDistance);
};
const preprocessImageBitmapToFloat32ChannelsFirst = (
imageBitmap: ImageBitmap,
requiredWidth: number,
requiredHeight: number,
maintainAspectRatio: boolean = true,
normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
) => {
// Create an OffscreenCanvas and set its size.
const offscreenCanvas = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
const ctx = offscreenCanvas.getContext("2d");
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
const imageData = ctx.getImageData(
0,
0,
imageBitmap.width,
imageBitmap.height,
);
const pixelData = imageData.data;
let scaleW = requiredWidth / imageBitmap.width;
let scaleH = requiredHeight / imageBitmap.height;
if (maintainAspectRatio) {
const scale = Math.min(
requiredWidth / imageBitmap.width,
requiredHeight / imageBitmap.height,
);
scaleW = scale;
scaleH = scale;
}
const scaledWidth = clamp(
Math.round(imageBitmap.width * scaleW),
0,
requiredWidth,
);
const scaledHeight = clamp(
Math.round(imageBitmap.height * scaleH),
0,
requiredHeight,
);
const processedImage = new Float32Array(
1 * 3 * requiredWidth * requiredHeight,
);
// Populate the Float32Array with normalized pixel values
let pixelIndex = 0;
const channelOffsetGreen = requiredHeight * requiredWidth;
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
for (let h = 0; h < requiredHeight; h++) {
for (let w = 0; w < requiredWidth; w++) {
let pixel: {
r: number;
g: number;
b: number;
};
if (w >= scaledWidth || h >= scaledHeight) {
pixel = { r: 114, g: 114, b: 114 };
} else {
pixel = getPixelBilinear(
w / scaleW,
h / scaleH,
pixelData,
imageBitmap.width,
imageBitmap.height,
);
}
processedImage[pixelIndex] = normFunction(pixel.r);
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
pixel.g,
);
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
pixel.b,
);
pixelIndex++;
}
}
return {
data: processedImage,
originalSize: {
width: imageBitmap.width,
height: imageBitmap.height,
},
newSize: { width: scaledWidth, height: scaledHeight },
};
};
/**
* @param rowOutput A Float32Array of shape [25200, 16], where each row
* represents a bounding box.
*/
const getFacesFromYOLOOutput = (
rowOutput: Float32Array,
minScore: number,
): Array<FaceDetection> => {
const faces: Array<FaceDetection> = [];
// Iterate over each row.
for (let i = 0; i < rowOutput.length; i += 16) {
const score = rowOutput[i + 4];
if (score < minScore) {
continue;
}
// The first 4 values represent the bounding box's coordinates:
//
// (x1, y1, x2, y2)
//
const xCenter = rowOutput[i];
const yCenter = rowOutput[i + 1];
const width = rowOutput[i + 2];
const height = rowOutput[i + 3];
const xMin = xCenter - width / 2.0; // topLeft
const yMin = yCenter - height / 2.0; // topLeft
const leftEyeX = rowOutput[i + 5];
const leftEyeY = rowOutput[i + 6];
const rightEyeX = rowOutput[i + 7];
const rightEyeY = rowOutput[i + 8];
const noseX = rowOutput[i + 9];
const noseY = rowOutput[i + 10];
const leftMouthX = rowOutput[i + 11];
const leftMouthY = rowOutput[i + 12];
const rightMouthX = rowOutput[i + 13];
const rightMouthY = rowOutput[i + 14];
const box = new Box({
x: xMin,
y: yMin,
width: width,
height: height,
});
const probability = score as number;
const landmarks = [
new Point(leftEyeX, leftEyeY),
new Point(rightEyeX, rightEyeY),
new Point(noseX, noseY),
new Point(leftMouthX, leftMouthY),
new Point(rightMouthX, rightMouthY),
];
faces.push({ box, landmarks, probability });
}
return faces;
};
export const getRelativeDetection = (
faceDetection: FaceDetection,
dimensions: Dimensions,
): FaceDetection => {
const oldBox: Box = faceDetection.box;
const box = new Box({
x: oldBox.x / dimensions.width,
y: oldBox.y / dimensions.height,
width: oldBox.width / dimensions.width,
height: oldBox.height / dimensions.height,
});
const oldLandmarks: Point[] = faceDetection.landmarks;
const landmarks = oldLandmarks.map((l) => {
return new Point(l.x / dimensions.width, l.y / dimensions.height);
});
const probability = faceDetection.probability;
return { box, landmarks, probability };
};
/**
* Removes duplicate face detections from an array of detections.
*
* This function sorts the detections by their probability in descending order,
* then iterates over them.
*
* For each detection, it calculates the Euclidean distance to all other
* detections.
*
* If the distance is less than or equal to the specified threshold
* (`withinDistance`), the other detection is considered a duplicate and is
* removed.
*
* @param detections - An array of face detections to remove duplicates from.
*
* @param withinDistance - The maximum Euclidean distance between two detections
* for them to be considered duplicates.
*
* @returns An array of face detections with duplicates removed.
*/
const removeDuplicateDetections = (
detections: Array<FaceDetection>,
withinDistance: number,
) => {
detections.sort((a, b) => b.probability - a.probability);
const isSelected = new Map<number, boolean>();
for (let i = 0; i < detections.length; i++) {
if (isSelected.get(i) === false) {
continue;
}
isSelected.set(i, true);
for (let j = i + 1; j < detections.length; j++) {
if (isSelected.get(j) === false) {
continue;
}
const centeri = getDetectionCenter(detections[i]);
const centerj = getDetectionCenter(detections[j]);
const dist = euclidean(
[centeri.x, centeri.y],
[centerj.x, centerj.y],
);
if (dist <= withinDistance) {
isSelected.set(j, false);
}
}
}
const uniques: Array<FaceDetection> = [];
for (let i = 0; i < detections.length; i++) {
isSelected.get(i) && uniques.push(detections[i]);
}
return uniques;
};
function getDetectionCenter(detection: FaceDetection) {
const center = new Point(0, 0);
// TODO: first 4 landmarks is applicable to blazeface only
// this needs to consider eyes, nose and mouth landmarks to take center
detection.landmarks?.slice(0, 4).forEach((p) => {
center.x += p.x;
center.y += p.y;
});
return new Point(center.x / 4, center.y / 4);
}
function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
return compose(
translate(toBox.x, toBox.y),
scale(toBox.width / inBox.width, toBox.height / inBox.height),
);
}
function transformPoint(point: Point, transform: Matrix) {
const txdPoint = applyToPoint(transform, point);
return new Point(txdPoint.x, txdPoint.y);
}
function transformPoints(points: Point[], transform: Matrix) {
return points?.map((p) => transformPoint(p, transform));
}
function transformBox(box: Box, transform: Matrix) {
const topLeft = transformPoint(box.topLeft, transform);
const bottomRight = transformPoint(box.bottomRight, transform);
return boxFromBoundingBox({
left: topLeft.x,
top: topLeft.y,
right: bottomRight.x,
bottom: bottomRight.y,
});
}

View file

@ -0,0 +1,26 @@
import { workerBridge } from "@/next/worker/worker-bridge";
import { FaceEmbedding } from "services/face/types";
export const mobileFaceNetFaceSize = 112;
/**
* Compute embeddings for the given {@link faceData}.
*
* The model used is MobileFaceNet, running in an ONNX runtime.
*/
export const faceEmbeddings = async (
faceData: Float32Array,
): Promise<Array<FaceEmbedding>> => {
const outputData = await workerBridge.faceEmbeddings(faceData);
const embeddingSize = 192;
const embeddings = new Array<FaceEmbedding>(
outputData.length / embeddingSize,
);
for (let i = 0; i < embeddings.length; i++) {
embeddings[i] = new Float32Array(
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
);
}
return embeddings;
};

View file

@ -0,0 +1,194 @@
import { openCache } from "@/next/blob-cache";
import log from "@/next/log";
import { faceAlignment } from "services/face/align";
import mlIDbStorage from "services/face/db";
import { detectFaces, getRelativeDetection } from "services/face/detect";
import { faceEmbeddings, mobileFaceNetFaceSize } from "services/face/embed";
import {
DetectedFace,
Face,
MLSyncFileContext,
type FaceAlignment,
} from "services/face/types";
import { imageBitmapToBlob, warpAffineFloat32List } from "utils/image";
import { detectBlur } from "./blur";
import { getFaceCrop } from "./crop";
import {
fetchImageBitmap,
fetchImageBitmapForContext,
getFaceId,
getLocalFile,
} from "./image";
export const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
const { newMlFile } = fileContext;
const startTime = Date.now();
await syncFileFaceDetections(fileContext);
if (newMlFile.faces && newMlFile.faces.length > 0) {
await syncFileFaceCrops(fileContext);
const alignedFacesData = await syncFileFaceAlignments(fileContext);
await syncFileFaceEmbeddings(fileContext, alignedFacesData);
await syncFileFaceMakeRelativeDetections(fileContext);
}
log.debug(
() =>
`Face detection for file ${fileContext.enteFile.id} took ${Math.round(Date.now() - startTime)} ms`,
);
};
const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
const { newMlFile } = fileContext;
newMlFile.faceDetectionMethod = {
value: "YoloFace",
version: 1,
};
fileContext.newDetection = true;
const imageBitmap = await fetchImageBitmapForContext(fileContext);
const faceDetections = await detectFaces(imageBitmap);
// TODO: reenable faces filtering based on width
const detectedFaces = faceDetections?.map((detection) => {
return {
fileId: fileContext.enteFile.id,
detection,
} as DetectedFace;
});
newMlFile.faces = detectedFaces?.map((detectedFace) => ({
...detectedFace,
id: getFaceId(detectedFace, newMlFile.imageDimensions),
}));
// ?.filter((f) =>
// f.box.width > syncContext.config.faceDetection.minFaceSize
// );
log.info("[MLService] Detected Faces: ", newMlFile.faces?.length);
};
const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => {
const { newMlFile } = fileContext;
const imageBitmap = await fetchImageBitmapForContext(fileContext);
newMlFile.faceCropMethod = {
value: "ArcFace",
version: 1,
};
for (const face of newMlFile.faces) {
await saveFaceCrop(imageBitmap, face);
}
};
const syncFileFaceAlignments = async (
fileContext: MLSyncFileContext,
): Promise<Float32Array> => {
const { newMlFile } = fileContext;
newMlFile.faceAlignmentMethod = {
value: "ArcFace",
version: 1,
};
fileContext.newAlignment = true;
const imageBitmap =
fileContext.imageBitmap ||
(await fetchImageBitmapForContext(fileContext));
// Execute the face alignment calculations
for (const face of newMlFile.faces) {
face.alignment = faceAlignment(face.detection);
}
// Extract face images and convert to Float32Array
const faceAlignments = newMlFile.faces.map((f) => f.alignment);
const faceImages = await extractFaceImagesToFloat32(
faceAlignments,
mobileFaceNetFaceSize,
imageBitmap,
);
const blurValues = detectBlur(faceImages, newMlFile.faces);
newMlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i]));
imageBitmap.close();
log.info("[MLService] alignedFaces: ", newMlFile.faces?.length);
return faceImages;
};
const syncFileFaceEmbeddings = async (
fileContext: MLSyncFileContext,
alignedFacesInput: Float32Array,
) => {
const { newMlFile } = fileContext;
newMlFile.faceEmbeddingMethod = {
value: "MobileFaceNet",
version: 2,
};
// TODO: when not storing face crops, image will be needed to extract faces
// fileContext.imageBitmap ||
// (await this.getImageBitmap(fileContext));
const embeddings = await faceEmbeddings(alignedFacesInput);
newMlFile.faces.forEach((f, i) => (f.embedding = embeddings[i]));
log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length);
};
const syncFileFaceMakeRelativeDetections = async (
fileContext: MLSyncFileContext,
) => {
const { newMlFile } = fileContext;
for (let i = 0; i < newMlFile.faces.length; i++) {
const face = newMlFile.faces[i];
if (face.detection.box.x + face.detection.box.width < 2) continue; // Skip if somehow already relative
face.detection = getRelativeDetection(
face.detection,
newMlFile.imageDimensions,
);
}
};
export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
const faceCrop = getFaceCrop(imageBitmap, face.detection);
const blob = await imageBitmapToBlob(faceCrop.image);
const cache = await openCache("face-crops");
await cache.put(face.id, blob);
faceCrop.image.close();
return blob;
};
export const regenerateFaceCrop = async (faceID: string) => {
const fileID = Number(faceID.split("-")[0]);
const personFace = await mlIDbStorage.getFace(fileID, faceID);
if (!personFace) {
throw Error("Face not found");
}
const file = await getLocalFile(personFace.fileId);
const imageBitmap = await fetchImageBitmap(file);
return await saveFaceCrop(imageBitmap, personFace);
};
async function extractFaceImagesToFloat32(
faceAlignments: Array<FaceAlignment>,
faceSize: number,
image: ImageBitmap,
): Promise<Float32Array> {
const faceData = new Float32Array(
faceAlignments.length * faceSize * faceSize * 3,
);
for (let i = 0; i < faceAlignments.length; i++) {
const alignedFace = faceAlignments[i];
const faceDataOffset = i * faceSize * faceSize * 3;
warpAffineFloat32List(
image,
alignedFace,
faceSize,
faceData,
faceDataOffset,
);
}
return faceData;
}

View file

@ -1,14 +1,10 @@
import log from "@/next/log";
import { APPS } from "@ente/shared/apps/constants";
import { expose } from "comlink";
import downloadManager from "services/download";
import mlService from "services/machineLearning/machineLearningService";
import { MachineLearningWorker } from "services/ml/types";
import { EnteFile } from "types/file";
export class DedicatedMLWorker implements MachineLearningWorker {
constructor() {
log.info("DedicatedMLWorker constructor called");
}
export class DedicatedMLWorker {
public async closeLocalSyncContext() {
return mlService.closeLocalSyncContext();
}
@ -19,23 +15,17 @@ export class DedicatedMLWorker implements MachineLearningWorker {
enteFile: EnteFile,
localFile: globalThis.File,
) {
return mlService.syncLocalFile(token, userID, enteFile, localFile);
mlService.syncLocalFile(token, userID, enteFile, localFile);
}
public async sync(token: string, userID: number) {
await downloadManager.init(APPS.PHOTOS, { token });
return mlService.sync(token, userID);
}
public async regenerateFaceCrop(
token: string,
userID: number,
faceID: string,
) {
return mlService.regenerateFaceCrop(token, userID, faceID);
}
public close() {
self.close();
public async regenerateFaceCrop(token: string, faceID: string) {
await downloadManager.init(APPS.PHOTOS, { token });
return mlService.regenerateFaceCrop(faceID);
}
}

View file

@ -2,65 +2,47 @@ import { FILE_TYPE } from "@/media/file-type";
import { decodeLivePhoto } from "@/media/live-photo";
import log from "@/next/log";
import DownloadManager from "services/download";
import { Dimensions } from "services/face/geom";
import { DetectedFace, MLSyncFileContext } from "services/face/types";
import { getLocalFiles } from "services/fileService";
import { Dimensions } from "services/ml/geom";
import {
DetectedFace,
MLSyncContext,
MLSyncFileContext,
} from "services/ml/types";
import { EnteFile } from "types/file";
import { getRenderableImage } from "utils/file";
import { clamp } from "utils/image";
class ReaderService {
async getImageBitmap(
syncContext: MLSyncContext,
fileContext: MLSyncFileContext,
) {
try {
if (fileContext.imageBitmap) {
return fileContext.imageBitmap;
}
if (fileContext.localFile) {
if (
fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE
) {
throw new Error(
"Local file of only image type is supported",
);
}
fileContext.imageBitmap = await getLocalFileImageBitmap(
fileContext.enteFile,
fileContext.localFile,
);
} else if (
syncContext.config.imageSource === "Original" &&
[FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
fileContext.enteFile.metadata.fileType,
)
) {
fileContext.imageBitmap = await fetchImageBitmap(
fileContext.enteFile,
);
} else {
fileContext.imageBitmap = await getThumbnailImageBitmap(
fileContext.enteFile,
);
}
fileContext.newMlFile.imageSource = syncContext.config.imageSource;
const { width, height } = fileContext.imageBitmap;
fileContext.newMlFile.imageDimensions = { width, height };
return fileContext.imageBitmap;
} catch (e) {
log.error("failed to create image bitmap", e);
throw e;
}
export const fetchImageBitmapForContext = async (
fileContext: MLSyncFileContext,
) => {
if (fileContext.imageBitmap) {
return fileContext.imageBitmap;
}
}
export default new ReaderService();
if (fileContext.localFile) {
if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) {
throw new Error("Local file of only image type is supported");
}
fileContext.imageBitmap = await getLocalFileImageBitmap(
fileContext.enteFile,
fileContext.localFile,
);
} else if (
[FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
fileContext.enteFile.metadata.fileType,
)
) {
fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile);
} else {
// TODO-ML(MR): We don't do it on videos, when will we ever come
// here?
fileContext.imageBitmap = await getThumbnailImageBitmap(
fileContext.enteFile,
);
}
fileContext.newMlFile.imageSource = "Original";
const { width, height } = fileContext.imageBitmap;
fileContext.newMlFile.imageDimensions = { width, height };
return fileContext.imageBitmap;
};
export async function getLocalFile(fileId: number) {
const localFiles = await getLocalFiles();

View file

@ -1,5 +1,5 @@
import { ComlinkWorker } from "@/next/worker/comlink-worker";
import type { DedicatedMLWorker } from "services/ml/face.worker";
import type { DedicatedMLWorker } from "services/face/face.worker";
const createFaceWebWorker = () =>
new Worker(new URL("face.worker.ts", import.meta.url));

View file

@ -0,0 +1,111 @@
import log from "@/next/log";
import mlIDbStorage from "services/face/db";
import { Face, Person } from "services/face/types";
import { type MLSyncContext } from "services/machineLearning/machineLearningService";
import { clusterFaces } from "./cluster";
import { saveFaceCrop } from "./f-index";
import { fetchImageBitmap, getLocalFile } from "./image";
export const syncPeopleIndex = async (syncContext: MLSyncContext) => {
const filesVersion = await mlIDbStorage.getIndexVersion("files");
if (filesVersion <= (await mlIDbStorage.getIndexVersion("people"))) {
return;
}
// TODO: have faces addresable through fileId + faceId
// to avoid index based addressing, which is prone to wrong results
// one way could be to match nearest face within threshold in the file
const allFacesMap =
syncContext.allSyncedFacesMap ??
(syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap());
const allFaces = [...allFacesMap.values()].flat();
await runFaceClustering(syncContext, allFaces);
await syncPeopleFromClusters(syncContext, allFacesMap, allFaces);
await mlIDbStorage.setIndexVersion("people", filesVersion);
};
const runFaceClustering = async (
syncContext: MLSyncContext,
allFaces: Array<Face>,
) => {
// await this.init();
if (!allFaces || allFaces.length < 50) {
log.info(
`Skipping clustering since number of faces (${allFaces.length}) is less than the clustering threshold (50)`,
);
return;
}
log.info("Running clustering allFaces: ", allFaces.length);
syncContext.mlLibraryData.faceClusteringResults = await clusterFaces(
allFaces.map((f) => Array.from(f.embedding)),
);
syncContext.mlLibraryData.faceClusteringMethod = {
value: "Hdbscan",
version: 1,
};
log.info(
"[MLService] Got face clustering results: ",
JSON.stringify(syncContext.mlLibraryData.faceClusteringResults),
);
// syncContext.faceClustersWithNoise = {
// clusters: syncContext.faceClusteringResults.clusters.map(
// (faces) => ({
// faces,
// })
// ),
// noise: syncContext.faceClusteringResults.noise,
// };
};
const syncPeopleFromClusters = async (
syncContext: MLSyncContext,
allFacesMap: Map<number, Array<Face>>,
allFaces: Array<Face>,
) => {
const clusters = syncContext.mlLibraryData.faceClusteringResults?.clusters;
if (!clusters || clusters.length < 1) {
return;
}
for (const face of allFaces) {
face.personId = undefined;
}
await mlIDbStorage.clearAllPeople();
for (const [index, cluster] of clusters.entries()) {
const faces = cluster.map((f) => allFaces[f]).filter((f) => f);
// TODO: take default display face from last leaves of hdbscan clusters
const personFace = faces.reduce((best, face) =>
face.detection.probability > best.detection.probability
? face
: best,
);
if (personFace && !personFace.crop?.cacheKey) {
const file = await getLocalFile(personFace.fileId);
const imageBitmap = await fetchImageBitmap(file);
await saveFaceCrop(imageBitmap, personFace);
}
const person: Person = {
id: index,
files: faces.map((f) => f.fileId),
displayFaceId: personFace?.id,
faceCropCacheKey: personFace?.crop?.cacheKey,
};
await mlIDbStorage.putPerson(person);
faces.forEach((face) => {
face.personId = person.id;
});
// log.info("Creating person: ", person, faces);
}
await mlIDbStorage.updateFaces(allFacesMap);
};

View file

@ -0,0 +1,161 @@
import type { ClusterFacesResult } from "services/face/cluster";
import { Dimensions } from "services/face/geom";
import { EnteFile } from "types/file";
import { Box, Point } from "./geom";
export interface MLSyncResult {
nOutOfSyncFiles: number;
nSyncedFiles: number;
nSyncedFaces: number;
nFaceClusters: number;
nFaceNoise: number;
error?: Error;
}
export declare type FaceDescriptor = Float32Array;
export declare type Cluster = Array<number>;
export interface FacesCluster {
faces: Cluster;
summary?: FaceDescriptor;
}
export interface FacesClustersWithNoise {
clusters: Array<FacesCluster>;
noise: Cluster;
}
export interface NearestCluster {
cluster: FacesCluster;
distance: number;
}
export declare type Landmark = Point;
export declare type ImageType = "Original" | "Preview";
export declare type FaceDetectionMethod = "YoloFace";
export declare type FaceCropMethod = "ArcFace";
export declare type FaceAlignmentMethod = "ArcFace";
export declare type FaceEmbeddingMethod = "MobileFaceNet";
export declare type BlurDetectionMethod = "Laplacian";
export declare type ClusteringMethod = "Hdbscan" | "Dbscan";
export class AlignedBox {
box: Box;
rotation: number;
}
export interface Versioned<T> {
value: T;
version: number;
}
export interface FaceDetection {
// box and landmarks is relative to image dimentions stored at mlFileData
box: Box;
landmarks?: Array<Landmark>;
probability?: number;
}
export interface DetectedFace {
fileId: number;
detection: FaceDetection;
}
export interface DetectedFaceWithId extends DetectedFace {
id: string;
}
export interface FaceCrop {
image: ImageBitmap;
// imageBox is relative to image dimentions stored at mlFileData
imageBox: Box;
}
export interface StoredFaceCrop {
cacheKey: string;
imageBox: Box;
}
export interface CroppedFace extends DetectedFaceWithId {
crop?: StoredFaceCrop;
}
export interface FaceAlignment {
// TODO: remove affine matrix as rotation, size and center
// are simple to store and use, affine matrix adds complexity while getting crop
affineMatrix: Array<Array<number>>;
rotation: number;
// size and center is relative to image dimentions stored at mlFileData
size: number;
center: Point;
}
export interface AlignedFace extends CroppedFace {
alignment?: FaceAlignment;
blurValue?: number;
}
export declare type FaceEmbedding = Float32Array;
export interface FaceWithEmbedding extends AlignedFace {
embedding?: FaceEmbedding;
}
export interface Face extends FaceWithEmbedding {
personId?: number;
}
export interface Person {
id: number;
name?: string;
files: Array<number>;
displayFaceId?: string;
faceCropCacheKey?: string;
}
export interface MlFileData {
fileId: number;
faces?: Face[];
imageSource?: ImageType;
imageDimensions?: Dimensions;
faceDetectionMethod?: Versioned<FaceDetectionMethod>;
faceCropMethod?: Versioned<FaceCropMethod>;
faceAlignmentMethod?: Versioned<FaceAlignmentMethod>;
faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
mlVersion: number;
errorCount: number;
lastErrorMessage?: string;
}
export interface MLSearchConfig {
enabled: boolean;
}
export interface MLSyncFileContext {
enteFile: EnteFile;
localFile?: globalThis.File;
oldMlFile?: MlFileData;
newMlFile?: MlFileData;
imageBitmap?: ImageBitmap;
newDetection?: boolean;
newAlignment?: boolean;
}
export interface MLLibraryData {
faceClusteringMethod?: Versioned<ClusteringMethod>;
faceClusteringResults?: ClusterFacesResult;
faceClustersWithNoise?: FacesClustersWithNoise;
}
export declare type MLIndex = "files" | "people";

View file

@ -1,60 +0,0 @@
import { Box, enlargeBox } from "services/ml/geom";
import {
FaceAlignment,
FaceCrop,
FaceCropConfig,
FaceCropMethod,
FaceCropService,
FaceDetection,
Versioned,
} from "services/ml/types";
import { cropWithRotation } from "utils/image";
import { getArcfaceAlignment } from "./arcfaceAlignmentService";
class ArcFaceCropService implements FaceCropService {
public method: Versioned<FaceCropMethod>;
constructor() {
this.method = {
value: "ArcFace",
version: 1,
};
}
public async getFaceCrop(
imageBitmap: ImageBitmap,
faceDetection: FaceDetection,
config: FaceCropConfig,
): Promise<FaceCrop> {
const alignedFace = getArcfaceAlignment(faceDetection);
const faceCrop = getFaceCrop(imageBitmap, alignedFace, config);
return faceCrop;
}
}
export default new ArcFaceCropService();
export function getFaceCrop(
imageBitmap: ImageBitmap,
alignment: FaceAlignment,
config: FaceCropConfig,
): FaceCrop {
const alignmentBox = new Box({
x: alignment.center.x - alignment.size / 2,
y: alignment.center.y - alignment.size / 2,
width: alignment.size,
height: alignment.size,
}).round();
const scaleForPadding = 1 + config.padding * 2;
const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
width: config.maxSize,
height: config.maxSize,
});
return {
image: faceImageBitmap,
imageBox: paddedBox,
};
}

View file

@ -1,88 +0,0 @@
import { DBSCAN, KMEANS, OPTICS } from "density-clustering";
import { Hdbscan } from "hdbscan";
import { HdbscanInput } from "hdbscan/dist/types";
import {
ClusteringConfig,
ClusteringInput,
ClusteringMethod,
ClusteringResults,
HdbscanResults,
Versioned,
} from "services/ml/types";
class ClusteringService {
private dbscan: DBSCAN;
private optics: OPTICS;
private kmeans: KMEANS;
constructor() {
this.dbscan = new DBSCAN();
this.optics = new OPTICS();
this.kmeans = new KMEANS();
}
public clusterUsingDBSCAN(
dataset: Array<Array<number>>,
epsilon: number = 1.0,
minPts: number = 2,
): ClusteringResults {
// log.info("distanceFunction", DBSCAN._);
const clusters = this.dbscan.run(dataset, epsilon, minPts);
const noise = this.dbscan.noise;
return { clusters, noise };
}
public clusterUsingOPTICS(
dataset: Array<Array<number>>,
epsilon: number = 1.0,
minPts: number = 2,
) {
const clusters = this.optics.run(dataset, epsilon, minPts);
return { clusters, noise: [] };
}
public clusterUsingKMEANS(
dataset: Array<Array<number>>,
numClusters: number = 5,
) {
const clusters = this.kmeans.run(dataset, numClusters);
return { clusters, noise: [] };
}
public clusterUsingHdbscan(hdbscanInput: HdbscanInput): HdbscanResults {
if (hdbscanInput.input.length < 10) {
throw Error("too few samples to run Hdbscan");
}
const hdbscan = new Hdbscan(hdbscanInput);
const clusters = hdbscan.getClusters();
const noise = hdbscan.getNoise();
const debugInfo = hdbscan.getDebugInfo();
return { clusters, noise, debugInfo };
}
public cluster(
method: Versioned<ClusteringMethod>,
input: ClusteringInput,
config: ClusteringConfig,
) {
if (method.value === "Hdbscan") {
return this.clusterUsingHdbscan({
input,
minClusterSize: config.minClusterSize,
debug: config.generateDebugInfo,
});
} else if (method.value === "Dbscan") {
return this.clusterUsingDBSCAN(
input,
config.maxDistanceInsideCluster,
config.minClusterSize,
);
} else {
throw Error("Unknown clustering method: " + method.value);
}
}
}
export default ClusteringService;

View file

@ -1,37 +0,0 @@
import { DBSCAN } from "density-clustering";
import {
ClusteringConfig,
ClusteringInput,
ClusteringMethod,
ClusteringService,
HdbscanResults,
Versioned,
} from "services/ml/types";
class DbscanClusteringService implements ClusteringService {
public method: Versioned<ClusteringMethod>;
constructor() {
this.method = {
value: "Dbscan",
version: 1,
};
}
public async cluster(
input: ClusteringInput,
config: ClusteringConfig,
): Promise<HdbscanResults> {
// log.info('Clustering input: ', input);
const dbscan = new DBSCAN();
const clusters = dbscan.run(
input,
config.clusterSelectionEpsilon,
config.minClusterSize,
);
const noise = dbscan.noise;
return { clusters, noise };
}
}
export default new DbscanClusteringService();

View file

@ -1,359 +0,0 @@
import { openCache } from "@/next/blob-cache";
import log from "@/next/log";
import mlIDbStorage from "services/ml/db";
import {
DetectedFace,
Face,
MLSyncContext,
MLSyncFileContext,
type FaceAlignment,
type Versioned,
} from "services/ml/types";
import { imageBitmapToBlob, warpAffineFloat32List } from "utils/image";
import ReaderService, {
fetchImageBitmap,
getFaceId,
getLocalFile,
} from "./readerService";
class FaceService {
async syncFileFaceDetections(
syncContext: MLSyncContext,
fileContext: MLSyncFileContext,
) {
const { oldMlFile, newMlFile } = fileContext;
if (
!isDifferentOrOld(
oldMlFile?.faceDetectionMethod,
syncContext.faceDetectionService.method,
) &&
oldMlFile?.imageSource === syncContext.config.imageSource
) {
newMlFile.faces = oldMlFile?.faces?.map((existingFace) => ({
id: existingFace.id,
fileId: existingFace.fileId,
detection: existingFace.detection,
}));
newMlFile.imageSource = oldMlFile.imageSource;
newMlFile.imageDimensions = oldMlFile.imageDimensions;
newMlFile.faceDetectionMethod = oldMlFile.faceDetectionMethod;
return;
}
newMlFile.faceDetectionMethod = syncContext.faceDetectionService.method;
fileContext.newDetection = true;
const imageBitmap = await ReaderService.getImageBitmap(
syncContext,
fileContext,
);
const timerId = `faceDetection-${fileContext.enteFile.id}`;
console.time(timerId);
const faceDetections =
await syncContext.faceDetectionService.detectFaces(imageBitmap);
console.timeEnd(timerId);
console.log("faceDetections: ", faceDetections?.length);
// TODO: reenable faces filtering based on width
const detectedFaces = faceDetections?.map((detection) => {
return {
fileId: fileContext.enteFile.id,
detection,
} as DetectedFace;
});
newMlFile.faces = detectedFaces?.map((detectedFace) => ({
...detectedFace,
id: getFaceId(detectedFace, newMlFile.imageDimensions),
}));
// ?.filter((f) =>
// f.box.width > syncContext.config.faceDetection.minFaceSize
// );
log.info("[MLService] Detected Faces: ", newMlFile.faces?.length);
}
async syncFileFaceCrops(
syncContext: MLSyncContext,
fileContext: MLSyncFileContext,
) {
const { oldMlFile, newMlFile } = fileContext;
if (
// !syncContext.config.faceCrop.enabled ||
!fileContext.newDetection &&
!isDifferentOrOld(
oldMlFile?.faceCropMethod,
syncContext.faceCropService.method,
) &&
areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
) {
for (const [index, face] of newMlFile.faces.entries()) {
face.crop = oldMlFile.faces[index].crop;
}
newMlFile.faceCropMethod = oldMlFile.faceCropMethod;
return;
}
const imageBitmap = await ReaderService.getImageBitmap(
syncContext,
fileContext,
);
newMlFile.faceCropMethod = syncContext.faceCropService.method;
for (const face of newMlFile.faces) {
await this.saveFaceCrop(imageBitmap, face, syncContext);
}
}
async syncFileFaceAlignments(
syncContext: MLSyncContext,
fileContext: MLSyncFileContext,
): Promise<Float32Array> {
const { oldMlFile, newMlFile } = fileContext;
if (
!fileContext.newDetection &&
!isDifferentOrOld(
oldMlFile?.faceAlignmentMethod,
syncContext.faceAlignmentService.method,
) &&
areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
) {
for (const [index, face] of newMlFile.faces.entries()) {
face.alignment = oldMlFile.faces[index].alignment;
}
newMlFile.faceAlignmentMethod = oldMlFile.faceAlignmentMethod;
return;
}
newMlFile.faceAlignmentMethod = syncContext.faceAlignmentService.method;
fileContext.newAlignment = true;
const imageBitmap =
fileContext.imageBitmap ||
(await ReaderService.getImageBitmap(syncContext, fileContext));
// Execute the face alignment calculations
for (const face of newMlFile.faces) {
face.alignment = syncContext.faceAlignmentService.getFaceAlignment(
face.detection,
);
}
// Extract face images and convert to Float32Array
const faceAlignments = newMlFile.faces.map((f) => f.alignment);
const faceImages = await extractFaceImagesToFloat32(
faceAlignments,
syncContext.faceEmbeddingService.faceSize,
imageBitmap,
);
const blurValues = syncContext.blurDetectionService.detectBlur(
faceImages,
newMlFile.faces,
);
newMlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i]));
imageBitmap.close();
log.info("[MLService] alignedFaces: ", newMlFile.faces?.length);
return faceImages;
}
async syncFileFaceEmbeddings(
syncContext: MLSyncContext,
fileContext: MLSyncFileContext,
alignedFacesInput: Float32Array,
) {
const { oldMlFile, newMlFile } = fileContext;
if (
!fileContext.newAlignment &&
!isDifferentOrOld(
oldMlFile?.faceEmbeddingMethod,
syncContext.faceEmbeddingService.method,
) &&
areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
) {
for (const [index, face] of newMlFile.faces.entries()) {
face.embedding = oldMlFile.faces[index].embedding;
}
newMlFile.faceEmbeddingMethod = oldMlFile.faceEmbeddingMethod;
return;
}
newMlFile.faceEmbeddingMethod = syncContext.faceEmbeddingService.method;
// TODO: when not storing face crops, image will be needed to extract faces
// fileContext.imageBitmap ||
// (await this.getImageBitmap(syncContext, fileContext));
const embeddings =
await syncContext.faceEmbeddingService.getFaceEmbeddings(
alignedFacesInput,
);
newMlFile.faces.forEach((f, i) => (f.embedding = embeddings[i]));
log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length);
}
async syncFileFaceMakeRelativeDetections(
syncContext: MLSyncContext,
fileContext: MLSyncFileContext,
) {
const { oldMlFile, newMlFile } = fileContext;
if (
!fileContext.newAlignment &&
!isDifferentOrOld(
oldMlFile?.faceEmbeddingMethod,
syncContext.faceEmbeddingService.method,
) &&
areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
) {
return;
}
for (let i = 0; i < newMlFile.faces.length; i++) {
const face = newMlFile.faces[i];
if (face.detection.box.x + face.detection.box.width < 2) continue; // Skip if somehow already relative
face.detection =
syncContext.faceDetectionService.getRelativeDetection(
face.detection,
newMlFile.imageDimensions,
);
}
}
async saveFaceCrop(
imageBitmap: ImageBitmap,
face: Face,
syncContext: MLSyncContext,
) {
const faceCrop = await syncContext.faceCropService.getFaceCrop(
imageBitmap,
face.detection,
syncContext.config.faceCrop,
);
const blobOptions = syncContext.config.faceCrop.blobOptions;
const blob = await imageBitmapToBlob(faceCrop.image, blobOptions);
const cache = await openCache("face-crops");
await cache.put(face.id, blob);
faceCrop.image.close();
return blob;
}
async getAllSyncedFacesMap(syncContext: MLSyncContext) {
if (syncContext.allSyncedFacesMap) {
return syncContext.allSyncedFacesMap;
}
syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap();
return syncContext.allSyncedFacesMap;
}
public async runFaceClustering(
syncContext: MLSyncContext,
allFaces: Array<Face>,
) {
// await this.init();
const clusteringConfig = syncContext.config.faceClustering;
if (!allFaces || allFaces.length < clusteringConfig.minInputSize) {
log.info(
"[MLService] Too few faces to cluster, not running clustering: ",
allFaces.length,
);
return;
}
log.info("Running clustering allFaces: ", allFaces.length);
syncContext.mlLibraryData.faceClusteringResults =
await syncContext.faceClusteringService.cluster(
allFaces.map((f) => Array.from(f.embedding)),
syncContext.config.faceClustering,
);
syncContext.mlLibraryData.faceClusteringMethod =
syncContext.faceClusteringService.method;
log.info(
"[MLService] Got face clustering results: ",
JSON.stringify(syncContext.mlLibraryData.faceClusteringResults),
);
// syncContext.faceClustersWithNoise = {
// clusters: syncContext.faceClusteringResults.clusters.map(
// (faces) => ({
// faces,
// })
// ),
// noise: syncContext.faceClusteringResults.noise,
// };
}
public async regenerateFaceCrop(
syncContext: MLSyncContext,
faceID: string,
) {
const fileID = Number(faceID.split("-")[0]);
const personFace = await mlIDbStorage.getFace(fileID, faceID);
if (!personFace) {
throw Error("Face not found");
}
const file = await getLocalFile(personFace.fileId);
const imageBitmap = await fetchImageBitmap(file);
return await this.saveFaceCrop(imageBitmap, personFace, syncContext);
}
}
export default new FaceService();
export function areFaceIdsSame(ofFaces: Array<Face>, toFaces: Array<Face>) {
if (
(ofFaces === null || ofFaces === undefined) &&
(toFaces === null || toFaces === undefined)
) {
return true;
}
return primitiveArrayEquals(
ofFaces?.map((f) => f.id),
toFaces?.map((f) => f.id),
);
}
function primitiveArrayEquals(a, b) {
return (
Array.isArray(a) &&
Array.isArray(b) &&
a.length === b.length &&
a.every((val, index) => val === b[index])
);
}
export function isDifferentOrOld(
method: Versioned<string>,
thanMethod: Versioned<string>,
) {
return (
!method ||
method.value !== thanMethod.value ||
method.version < thanMethod.version
);
}
async function extractFaceImagesToFloat32(
faceAlignments: Array<FaceAlignment>,
faceSize: number,
image: ImageBitmap,
): Promise<Float32Array> {
const faceData = new Float32Array(
faceAlignments.length * faceSize * faceSize * 3,
);
for (let i = 0; i < faceAlignments.length; i++) {
const alignedFace = faceAlignments[i];
const faceDataOffset = i * faceSize * faceSize * 3;
warpAffineFloat32List(
image,
alignedFace,
faceSize,
faceData,
faceDataOffset,
);
}
return faceData;
}

View file

@ -1,44 +0,0 @@
import { Hdbscan } from "hdbscan";
import {
ClusteringConfig,
ClusteringInput,
ClusteringMethod,
ClusteringService,
HdbscanResults,
Versioned,
} from "services/ml/types";
class HdbscanClusteringService implements ClusteringService {
public method: Versioned<ClusteringMethod>;
constructor() {
this.method = {
value: "Hdbscan",
version: 1,
};
}
public async cluster(
input: ClusteringInput,
config: ClusteringConfig,
): Promise<HdbscanResults> {
// log.info('Clustering input: ', input);
const hdbscan = new Hdbscan({
input,
minClusterSize: config.minClusterSize,
minSamples: config.minSamples,
clusterSelectionEpsilon: config.clusterSelectionEpsilon,
clusterSelectionMethod: config.clusterSelectionMethod,
debug: config.generateDebugInfo,
});
return {
clusters: hdbscan.getClusters(),
noise: hdbscan.getNoise(),
debugInfo: hdbscan.getDebugInfo(),
};
}
}
export default new HdbscanClusteringService();

View file

@ -1,211 +0,0 @@
import {
BlurDetectionMethod,
BlurDetectionService,
Face,
Versioned,
} from "services/ml/types";
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
import { mobileFaceNetFaceSize } from "./mobileFaceNetEmbeddingService";
class LaplacianBlurDetectionService implements BlurDetectionService {
public method: Versioned<BlurDetectionMethod>;
public constructor() {
this.method = {
value: "Laplacian",
version: 1,
};
}
public detectBlur(alignedFaces: Float32Array, faces: Face[]): number[] {
const numFaces = Math.round(
alignedFaces.length /
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
);
const blurValues: number[] = [];
for (let i = 0; i < numFaces; i++) {
const face = faces[i];
const direction = getFaceDirection(face);
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
alignedFaces,
i,
);
const laplacian = this.applyLaplacian(faceImage, direction);
const variance = this.calculateVariance(laplacian);
blurValues.push(variance);
}
return blurValues;
}
private calculateVariance(matrix: number[][]): number {
const numRows = matrix.length;
const numCols = matrix[0].length;
const totalElements = numRows * numCols;
// Calculate the mean
let mean: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
mean += value;
});
});
mean /= totalElements;
// Calculate the variance
let variance: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
const diff: number = value - mean;
variance += diff * diff;
});
});
variance /= totalElements;
return variance;
}
private padImage(
image: number[][],
removeSideColumns: number = 56,
direction: FaceDirection = "straight",
): number[][] {
// Exception is removeSideColumns is not even
if (removeSideColumns % 2 != 0) {
throw new Error("removeSideColumns must be even");
}
const numRows = image.length;
const numCols = image[0].length;
const paddedNumCols = numCols + 2 - removeSideColumns;
const paddedNumRows = numRows + 2;
// Create a new matrix with extra padding
const paddedImage: number[][] = Array.from(
{ length: paddedNumRows },
() => new Array(paddedNumCols).fill(0),
);
// Copy original image into the center of the padded image
if (direction === "straight") {
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] =
image[i][j + Math.round(removeSideColumns / 2)];
}
}
} // If the face is facing left, we only take the right side of the face image
else if (direction === "left") {
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
}
}
} // If the face is facing right, we only take the left side of the face image
else if (direction === "right") {
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j];
}
}
}
// Reflect padding
// Top and bottom rows
for (let j = 1; j <= paddedNumCols - 2; j++) {
paddedImage[0][j] = paddedImage[2][j]; // Top row
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
}
// Left and right columns
for (let i = 0; i < numRows + 2; i++) {
paddedImage[i][0] = paddedImage[i][2]; // Left column
paddedImage[i][paddedNumCols - 1] =
paddedImage[i][paddedNumCols - 3]; // Right column
}
return paddedImage;
}
private applyLaplacian(
image: number[][],
direction: FaceDirection = "straight",
): number[][] {
const paddedImage: number[][] = this.padImage(
image,
undefined,
direction,
);
const numRows = paddedImage.length - 2;
const numCols = paddedImage[0].length - 2;
// Create an output image initialized to 0
const outputImage: number[][] = Array.from({ length: numRows }, () =>
new Array(numCols).fill(0),
);
// Define the Laplacian kernel
const kernel: number[][] = [
[0, 1, 0],
[1, -4, 1],
[0, 1, 0],
];
// Apply the kernel to each pixel
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < numCols; j++) {
let sum = 0;
for (let ki = 0; ki < 3; ki++) {
for (let kj = 0; kj < 3; kj++) {
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
}
}
// Adjust the output value if necessary (e.g., clipping)
outputImage[i][j] = sum;
}
}
return outputImage;
}
}
export default new LaplacianBlurDetectionService();
type FaceDirection = "left" | "right" | "straight";
const getFaceDirection = (face: Face): FaceDirection => {
const landmarks = face.detection.landmarks;
const leftEye = landmarks[0];
const rightEye = landmarks[1];
const nose = landmarks[2];
const leftMouth = landmarks[3];
const rightMouth = landmarks[4];
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
const faceIsUpright =
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
const noseStickingOutLeft =
nose.x < Math.min(leftEye.x, rightEye.x) &&
nose.x < Math.min(leftMouth.x, rightMouth.x);
const noseStickingOutRight =
nose.x > Math.max(leftEye.x, rightEye.x) &&
nose.x > Math.max(leftMouth.x, rightMouth.x);
const noseCloseToLeftEye =
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
const noseCloseToRightEye =
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
// if (faceIsUpright && (noseStickingOutLeft || noseCloseToLeftEye)) {
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
return "left";
// } else if (faceIsUpright && (noseStickingOutRight || noseCloseToRightEye)) {
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
return "right";
}
return "straight";
};

View file

@ -1,108 +1,45 @@
import { haveWindow } from "@/next/env";
import log from "@/next/log";
import { ComlinkWorker } from "@/next/worker/comlink-worker";
import { APPS } from "@ente/shared/apps/constants";
import ComlinkCryptoWorker, {
getDedicatedCryptoWorker,
} from "@ente/shared/crypto";
import { DedicatedCryptoWorker } from "@ente/shared/crypto/internal/crypto.worker";
import { CustomError, parseUploadErrorCodes } from "@ente/shared/error";
import PQueue from "p-queue";
import downloadManager from "services/download";
import { putEmbedding } from "services/embeddingService";
import { getLocalFiles } from "services/fileService";
import mlIDbStorage, {
ML_SEARCH_CONFIG_NAME,
ML_SYNC_CONFIG_NAME,
} from "services/ml/db";
import mlIDbStorage, { ML_SEARCH_CONFIG_NAME } from "services/face/db";
import {
BlurDetectionMethod,
BlurDetectionService,
ClusteringMethod,
ClusteringService,
Face,
FaceAlignmentMethod,
FaceAlignmentService,
FaceCropMethod,
FaceCropService,
FaceDetection,
FaceDetectionMethod,
FaceDetectionService,
FaceEmbeddingMethod,
FaceEmbeddingService,
Landmark,
MLLibraryData,
MLSearchConfig,
MLSyncConfig,
MLSyncContext,
MLSyncFileContext,
MLSyncResult,
MlFileData,
} from "services/ml/types";
} from "services/face/types";
import { getLocalFiles } from "services/fileService";
import { EnteFile } from "types/file";
import { isInternalUserForML } from "utils/user";
import arcfaceAlignmentService from "./arcfaceAlignmentService";
import arcfaceCropService from "./arcfaceCropService";
import dbscanClusteringService from "./dbscanClusteringService";
import FaceService from "./faceService";
import hdbscanClusteringService from "./hdbscanClusteringService";
import laplacianBlurDetectionService from "./laplacianBlurDetectionService";
import mobileFaceNetEmbeddingService from "./mobileFaceNetEmbeddingService";
import PeopleService from "./peopleService";
import ReaderService from "./readerService";
import yoloFaceDetectionService from "./yoloFaceDetectionService";
import { regenerateFaceCrop, syncFileAnalyzeFaces } from "../face/f-index";
import { fetchImageBitmapForContext } from "../face/image";
import { syncPeopleIndex } from "../face/people";
export const DEFAULT_ML_SYNC_CONFIG: MLSyncConfig = {
batchSize: 200,
imageSource: "Original",
faceDetection: {
method: "YoloFace",
},
faceCrop: {
enabled: true,
method: "ArcFace",
padding: 0.25,
maxSize: 256,
blobOptions: {
type: "image/jpeg",
quality: 0.8,
},
},
faceAlignment: {
method: "ArcFace",
},
blurDetection: {
method: "Laplacian",
threshold: 15,
},
faceEmbedding: {
method: "MobileFaceNet",
faceSize: 112,
generateTsne: true,
},
faceClustering: {
method: "Hdbscan",
minClusterSize: 3,
minSamples: 5,
clusterSelectionEpsilon: 0.6,
clusterSelectionMethod: "leaf",
minInputSize: 50,
// maxDistanceInsideCluster: 0.4,
generateDebugInfo: true,
},
mlVersion: 3,
};
/**
* TODO-ML(MR): What and why.
* Also, needs to be 1 (in sync with mobile) when we move out of beta.
*/
export const defaultMLVersion = 3;
const batchSize = 200;
export const MAX_ML_SYNC_ERROR_COUNT = 1;
export const DEFAULT_ML_SEARCH_CONFIG: MLSearchConfig = {
enabled: false,
};
export const MAX_ML_SYNC_ERROR_COUNT = 1;
export async function getMLSyncConfig() {
return mlIDbStorage.getConfig(ML_SYNC_CONFIG_NAME, DEFAULT_ML_SYNC_CONFIG);
}
export async function getMLSearchConfig() {
if (isInternalUserForML()) {
return mlIDbStorage.getConfig(
@ -119,95 +56,30 @@ export async function updateMLSearchConfig(newConfig: MLSearchConfig) {
return mlIDbStorage.putConfig(ML_SEARCH_CONFIG_NAME, newConfig);
}
export class MLFactory {
public static getFaceDetectionService(
method: FaceDetectionMethod,
): FaceDetectionService {
if (method === "YoloFace") {
return yoloFaceDetectionService;
}
export interface MLSyncContext {
token: string;
userID: number;
throw Error("Unknon face detection method: " + method);
}
localFilesMap: Map<number, EnteFile>;
outOfSyncFiles: EnteFile[];
nSyncedFiles: number;
nSyncedFaces: number;
allSyncedFacesMap?: Map<number, Array<Face>>;
public static getFaceCropService(method: FaceCropMethod) {
if (method === "ArcFace") {
return arcfaceCropService;
}
error?: Error;
throw Error("Unknon face crop method: " + method);
}
// oldMLLibraryData: MLLibraryData;
mlLibraryData: MLLibraryData;
public static getFaceAlignmentService(
method: FaceAlignmentMethod,
): FaceAlignmentService {
if (method === "ArcFace") {
return arcfaceAlignmentService;
}
syncQueue: PQueue;
throw Error("Unknon face alignment method: " + method);
}
public static getBlurDetectionService(
method: BlurDetectionMethod,
): BlurDetectionService {
if (method === "Laplacian") {
return laplacianBlurDetectionService;
}
throw Error("Unknon blur detection method: " + method);
}
public static getFaceEmbeddingService(
method: FaceEmbeddingMethod,
): FaceEmbeddingService {
if (method === "MobileFaceNet") {
return mobileFaceNetEmbeddingService;
}
throw Error("Unknon face embedding method: " + method);
}
public static getClusteringService(
method: ClusteringMethod,
): ClusteringService {
if (method === "Hdbscan") {
return hdbscanClusteringService;
}
if (method === "Dbscan") {
return dbscanClusteringService;
}
throw Error("Unknon clustering method: " + method);
}
public static getMLSyncContext(
token: string,
userID: number,
config: MLSyncConfig,
shouldUpdateMLVersion: boolean = true,
) {
return new LocalMLSyncContext(
token,
userID,
config,
shouldUpdateMLVersion,
);
}
getEnteWorker(id: number): Promise<any>;
dispose(): Promise<void>;
}
export class LocalMLSyncContext implements MLSyncContext {
public token: string;
public userID: number;
public config: MLSyncConfig;
public shouldUpdateMLVersion: boolean;
public faceDetectionService: FaceDetectionService;
public faceCropService: FaceCropService;
public faceAlignmentService: FaceAlignmentService;
public blurDetectionService: BlurDetectionService;
public faceEmbeddingService: FaceEmbeddingService;
public faceClusteringService: ClusteringService;
public localFilesMap: Map<number, EnteFile>;
public outOfSyncFiles: EnteFile[];
@ -229,36 +101,9 @@ export class LocalMLSyncContext implements MLSyncContext {
>;
private enteWorkers: Array<any>;
constructor(
token: string,
userID: number,
config: MLSyncConfig,
shouldUpdateMLVersion: boolean = true,
concurrency?: number,
) {
constructor(token: string, userID: number, concurrency?: number) {
this.token = token;
this.userID = userID;
this.config = config;
this.shouldUpdateMLVersion = shouldUpdateMLVersion;
this.faceDetectionService = MLFactory.getFaceDetectionService(
this.config.faceDetection.method,
);
this.faceCropService = MLFactory.getFaceCropService(
this.config.faceCrop.method,
);
this.faceAlignmentService = MLFactory.getFaceAlignmentService(
this.config.faceAlignment.method,
);
this.blurDetectionService = MLFactory.getBlurDetectionService(
this.config.blurDetection.method,
);
this.faceEmbeddingService = MLFactory.getFaceEmbeddingService(
this.config.faceEmbedding.method,
);
this.faceClusteringService = MLFactory.getClusteringService(
this.config.faceClustering.method,
);
this.outOfSyncFiles = [];
this.nSyncedFiles = 0;
@ -311,8 +156,6 @@ class MachineLearningService {
throw Error("Token needed by ml service to sync file");
}
await downloadManager.init(APPS.PHOTOS, { token });
const syncContext = await this.getSyncContext(token, userID);
await this.syncLocalFiles(syncContext);
@ -323,12 +166,10 @@ class MachineLearningService {
await this.syncFiles(syncContext);
}
// TODO: running index before all files are on latest ml version
// may be need to just take synced files on latest ml version for indexing
if (
syncContext.outOfSyncFiles.length <= 0 ||
(syncContext.nSyncedFiles === syncContext.config.batchSize &&
Math.random() < 0.2)
// TODO-ML(MR): Forced disable.
(syncContext.nSyncedFiles === batchSize && Math.random() < 0)
) {
await this.syncIndex(syncContext);
}
@ -349,14 +190,8 @@ class MachineLearningService {
return mlSyncResult;
}
public async regenerateFaceCrop(
token: string,
userID: number,
faceID: string,
) {
await downloadManager.init(APPS.PHOTOS, { token });
const syncContext = await this.getSyncContext(token, userID);
return FaceService.regenerateFaceCrop(syncContext, faceID);
public async regenerateFaceCrop(faceID: string) {
return regenerateFaceCrop(faceID);
}
private newMlData(fileId: number) {
@ -434,8 +269,8 @@ class MachineLearningService {
private async getOutOfSyncFiles(syncContext: MLSyncContext) {
const startTime = Date.now();
const fileIds = await mlIDbStorage.getFileIds(
syncContext.config.batchSize,
syncContext.config.mlVersion,
batchSize,
defaultMLVersion,
MAX_ML_SYNC_ERROR_COUNT,
);
@ -481,9 +316,10 @@ class MachineLearningService {
if (!this.syncContext) {
log.info("Creating syncContext");
this.syncContext = getMLSyncConfig().then((mlSyncConfig) =>
MLFactory.getMLSyncContext(token, userID, mlSyncConfig, true),
);
// TODO-ML(MR): Keep as promise for now.
this.syncContext = new Promise((resolve) => {
resolve(new LocalMLSyncContext(token, userID));
});
} else {
log.info("reusing existing syncContext");
}
@ -491,11 +327,13 @@ class MachineLearningService {
}
private async getLocalSyncContext(token: string, userID: number) {
// TODO-ML(MR): This is updating the file ML version. verify.
if (!this.localSyncContext) {
log.info("Creating localSyncContext");
this.localSyncContext = getMLSyncConfig().then((mlSyncConfig) =>
MLFactory.getMLSyncContext(token, userID, mlSyncConfig, false),
);
// TODO-ML(MR):
this.localSyncContext = new Promise((resolve) => {
resolve(new LocalMLSyncContext(token, userID));
});
} else {
log.info("reusing existing localSyncContext");
}
@ -516,24 +354,22 @@ class MachineLearningService {
userID: number,
enteFile: EnteFile,
localFile?: globalThis.File,
): Promise<MlFileData | Error> {
) {
const syncContext = await this.getLocalSyncContext(token, userID);
try {
const mlFileData = await this.syncFileWithErrorHandler(
await this.syncFileWithErrorHandler(
syncContext,
enteFile,
localFile,
);
if (syncContext.nSyncedFiles >= syncContext.config.batchSize) {
if (syncContext.nSyncedFiles >= batchSize) {
await this.closeLocalSyncContext();
}
// await syncContext.dispose();
return mlFileData;
} catch (e) {
console.error("Error while syncing local file: ", enteFile.id, e);
return e;
}
}
@ -541,16 +377,12 @@ class MachineLearningService {
syncContext: MLSyncContext,
enteFile: EnteFile,
localFile?: globalThis.File,
): Promise<MlFileData> {
) {
try {
console.log(
`Indexing ${enteFile.title ?? "<untitled>"} ${enteFile.id}`,
);
const mlFileData = await this.syncFile(
syncContext,
enteFile,
localFile,
);
const mlFileData = await this.syncFile(enteFile, localFile);
syncContext.nSyncedFaces += mlFileData.faces?.length || 0;
syncContext.nSyncedFiles += 1;
return mlFileData;
@ -583,35 +415,20 @@ class MachineLearningService {
}
}
private async syncFile(
syncContext: MLSyncContext,
enteFile: EnteFile,
localFile?: globalThis.File,
) {
console.log("Syncing for file" + enteFile.title);
private async syncFile(enteFile: EnteFile, localFile?: globalThis.File) {
log.debug(() => ({ a: "Syncing file", enteFile }));
const fileContext: MLSyncFileContext = { enteFile, localFile };
const oldMlFile =
(fileContext.oldMlFile = await this.getMLFileData(enteFile.id)) ??
this.newMlData(enteFile.id);
if (
fileContext.oldMlFile?.mlVersion === syncContext.config.mlVersion
// TODO: reset mlversion of all files when user changes image source
) {
return fileContext.oldMlFile;
const oldMlFile = await this.getMLFileData(enteFile.id);
if (oldMlFile && oldMlFile.mlVersion) {
return oldMlFile;
}
const newMlFile = (fileContext.newMlFile = this.newMlData(enteFile.id));
if (syncContext.shouldUpdateMLVersion) {
newMlFile.mlVersion = syncContext.config.mlVersion;
} else if (fileContext.oldMlFile?.mlVersion) {
newMlFile.mlVersion = fileContext.oldMlFile.mlVersion;
}
const newMlFile = (fileContext.newMlFile = this.newMlData(enteFile.id));
newMlFile.mlVersion = defaultMLVersion;
try {
await ReaderService.getImageBitmap(syncContext, fileContext);
await Promise.all([
this.syncFileAnalyzeFaces(syncContext, fileContext),
]);
await fetchImageBitmapForContext(fileContext);
await syncFileAnalyzeFaces(fileContext);
newMlFile.errorCount = 0;
newMlFile.lastErrorMessage = undefined;
await this.persistOnServer(newMlFile, enteFile);
@ -685,44 +502,11 @@ class MachineLearningService {
public async syncIndex(syncContext: MLSyncContext) {
await this.getMLLibraryData(syncContext);
await PeopleService.syncPeopleIndex(syncContext);
// TODO-ML(MR): Ensure this doesn't run until fixed.
await syncPeopleIndex(syncContext);
await this.persistMLLibraryData(syncContext);
}
private async syncFileAnalyzeFaces(
syncContext: MLSyncContext,
fileContext: MLSyncFileContext,
) {
const { newMlFile } = fileContext;
const startTime = Date.now();
await FaceService.syncFileFaceDetections(syncContext, fileContext);
if (newMlFile.faces && newMlFile.faces.length > 0) {
await FaceService.syncFileFaceCrops(syncContext, fileContext);
const alignedFacesData = await FaceService.syncFileFaceAlignments(
syncContext,
fileContext,
);
await FaceService.syncFileFaceEmbeddings(
syncContext,
fileContext,
alignedFacesData,
);
await FaceService.syncFileFaceMakeRelativeDetections(
syncContext,
fileContext,
);
}
log.info(
`face detection time taken ${fileContext.enteFile.id}`,
Date.now() - startTime,
"ms",
);
}
}
export default new MachineLearningService();

View file

@ -5,24 +5,15 @@ import { eventBus, Events } from "@ente/shared/events";
import { getToken, getUserID } from "@ente/shared/storage/localStorage/helpers";
import debounce from "debounce";
import PQueue from "p-queue";
import mlIDbStorage from "services/ml/db";
import { createFaceComlinkWorker } from "services/ml/face";
import type { DedicatedMLWorker } from "services/ml/face.worker";
import { MLSyncResult } from "services/ml/types";
import { createFaceComlinkWorker } from "services/face";
import mlIDbStorage from "services/face/db";
import type { DedicatedMLWorker } from "services/face/face.worker";
import { MLSyncResult } from "services/face/types";
import { EnteFile } from "types/file";
import { logQueueStats } from "./machineLearningService";
const LIVE_SYNC_IDLE_DEBOUNCE_SEC = 30;
const LIVE_SYNC_QUEUE_TIMEOUT_SEC = 300;
const LOCAL_FILES_UPDATED_DEBOUNCE_SEC = 30;
export type JobState = "Scheduled" | "Running" | "NotScheduled";
export interface JobConfig {
intervalSec: number;
backoffMultiplier: number;
}
export interface MLSyncJobResult {
shouldBackoff: boolean;
mlSyncResult: MLSyncResult;
@ -118,18 +109,18 @@ class MLWorkManager {
this.liveSyncQueue = new PQueue({
concurrency: 1,
// TODO: temp, remove
timeout: LIVE_SYNC_QUEUE_TIMEOUT_SEC * 1000,
timeout: 300 * 1000,
throwOnTimeout: true,
});
this.mlSearchEnabled = false;
this.debouncedLiveSyncIdle = debounce(
() => this.onLiveSyncIdle(),
LIVE_SYNC_IDLE_DEBOUNCE_SEC * 1000,
30 * 1000,
);
this.debouncedFilesUpdated = debounce(
() => this.mlSearchEnabled && this.localFilesUpdatedHandler(),
LOCAL_FILES_UPDATED_DEBOUNCE_SEC * 1000,
30 * 1000,
);
}
@ -241,19 +232,13 @@ class MLWorkManager {
}
public async syncLocalFile(enteFile: EnteFile, localFile: globalThis.File) {
const result = await this.liveSyncQueue.add(async () => {
await this.liveSyncQueue.add(async () => {
this.stopSyncJob();
const token = getToken();
const userID = getUserID();
const mlWorker = await this.getLiveSyncWorker();
return mlWorker.syncLocalFile(token, userID, enteFile, localFile);
});
if (result instanceof Error) {
// TODO: redirect/refresh to gallery in case of session_expired
// may not be required as uploader should anyways take care of this
console.error("Error while syncing local file: ", result);
}
}
// Sync Job
@ -326,11 +311,11 @@ class MLWorkManager {
}
}
public stopSyncJob(terminateWorker: boolean = true) {
public stopSyncJob() {
try {
log.info("MLWorkManager.stopSyncJob");
this.mlSyncJob?.stop();
terminateWorker && this.terminateSyncJobWorker();
this.terminateSyncJobWorker();
} catch (e) {
log.error("Failed to stop MLSync Job", e);
}

View file

@ -1,41 +0,0 @@
import { workerBridge } from "@/next/worker/worker-bridge";
import {
FaceEmbedding,
FaceEmbeddingMethod,
FaceEmbeddingService,
Versioned,
} from "services/ml/types";
export const mobileFaceNetFaceSize = 112;
class MobileFaceNetEmbeddingService implements FaceEmbeddingService {
public method: Versioned<FaceEmbeddingMethod>;
public faceSize: number;
public constructor() {
this.method = {
value: "MobileFaceNet",
version: 2,
};
this.faceSize = mobileFaceNetFaceSize;
}
public async getFaceEmbeddings(
faceData: Float32Array,
): Promise<Array<FaceEmbedding>> {
const outputData = await workerBridge.faceEmbedding(faceData);
const embeddingSize = 192;
const embeddings = new Array<FaceEmbedding>(
outputData.length / embeddingSize,
);
for (let i = 0; i < embeddings.length; i++) {
embeddings[i] = new Float32Array(
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
);
}
return embeddings;
}
}
export default new MobileFaceNetEmbeddingService();

View file

@ -1,113 +0,0 @@
import log from "@/next/log";
import mlIDbStorage from "services/ml/db";
import { Face, MLSyncContext, Person } from "services/ml/types";
import FaceService, { isDifferentOrOld } from "./faceService";
import { fetchImageBitmap, getLocalFile } from "./readerService";
class PeopleService {
async syncPeopleIndex(syncContext: MLSyncContext) {
const filesVersion = await mlIDbStorage.getIndexVersion("files");
if (
filesVersion <= (await mlIDbStorage.getIndexVersion("people")) &&
!isDifferentOrOld(
syncContext.mlLibraryData?.faceClusteringMethod,
syncContext.faceClusteringService.method,
)
) {
log.info(
"[MLService] Skipping people index as already synced to latest version",
);
return;
}
// TODO: have faces addresable through fileId + faceId
// to avoid index based addressing, which is prone to wrong results
// one way could be to match nearest face within threshold in the file
const allFacesMap = await FaceService.getAllSyncedFacesMap(syncContext);
const allFaces = getAllFacesFromMap(allFacesMap);
await FaceService.runFaceClustering(syncContext, allFaces);
await this.syncPeopleFromClusters(syncContext, allFacesMap, allFaces);
await mlIDbStorage.setIndexVersion("people", filesVersion);
}
private async syncPeopleFromClusters(
syncContext: MLSyncContext,
allFacesMap: Map<number, Array<Face>>,
allFaces: Array<Face>,
) {
const clusters =
syncContext.mlLibraryData.faceClusteringResults?.clusters;
if (!clusters || clusters.length < 1) {
return;
}
for (const face of allFaces) {
face.personId = undefined;
}
await mlIDbStorage.clearAllPeople();
for (const [index, cluster] of clusters.entries()) {
const faces = cluster.map((f) => allFaces[f]).filter((f) => f);
// TODO: take default display face from last leaves of hdbscan clusters
const personFace = findFirstIfSorted(
faces,
(a, b) => b.detection.probability - a.detection.probability,
);
if (personFace && !personFace.crop?.cacheKey) {
const file = await getLocalFile(personFace.fileId);
const imageBitmap = await fetchImageBitmap(file);
await FaceService.saveFaceCrop(
imageBitmap,
personFace,
syncContext,
);
}
const person: Person = {
id: index,
files: faces.map((f) => f.fileId),
displayFaceId: personFace?.id,
faceCropCacheKey: personFace?.crop?.cacheKey,
};
await mlIDbStorage.putPerson(person);
faces.forEach((face) => {
face.personId = person.id;
});
// log.info("Creating person: ", person, faces);
}
await mlIDbStorage.updateFaces(allFacesMap);
}
}
export default new PeopleService();
function findFirstIfSorted<T>(
elements: Array<T>,
comparator: (a: T, b: T) => number,
) {
if (!elements || elements.length < 1) {
return;
}
let first = elements[0];
for (let i = 1; i < elements.length; i++) {
const comp = comparator(elements[i], first);
if (comp < 0) {
first = elements[i];
}
}
return first;
}
function getAllFacesFromMap(allFacesMap: Map<number, Array<Face>>) {
const allFaces = [...allFacesMap.values()].flat();
return allFaces;
}

View file

@ -1,332 +0,0 @@
import { workerBridge } from "@/next/worker/worker-bridge";
import { euclidean } from "hdbscan";
import {
Box,
Dimensions,
Point,
boxFromBoundingBox,
newBox,
} from "services/ml/geom";
import {
FaceDetection,
FaceDetectionMethod,
FaceDetectionService,
Versioned,
} from "services/ml/types";
import {
Matrix,
applyToPoint,
compose,
scale,
translate,
} from "transformation-matrix";
import {
clamp,
getPixelBilinear,
normalizePixelBetween0And1,
} from "utils/image";
class YoloFaceDetectionService implements FaceDetectionService {
public method: Versioned<FaceDetectionMethod>;
public constructor() {
this.method = {
value: "YoloFace",
version: 1,
};
}
public async detectFaces(
imageBitmap: ImageBitmap,
): Promise<Array<FaceDetection>> {
const maxFaceDistancePercent = Math.sqrt(2) / 100;
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
const preprocessResult =
this.preprocessImageBitmapToFloat32ChannelsFirst(
imageBitmap,
640,
640,
);
const data = preprocessResult.data;
const resized = preprocessResult.newSize;
const outputData = await workerBridge.detectFaces(data);
const faces = this.getFacesFromYoloOutput(
outputData as Float32Array,
0.7,
);
const inBox = newBox(0, 0, resized.width, resized.height);
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
const transform = computeTransformToBox(inBox, toBox);
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
const box = transformBox(f.box, transform);
const normLandmarks = f.landmarks;
const landmarks = transformPoints(normLandmarks, transform);
return {
box,
landmarks,
probability: f.probability as number,
} as FaceDetection;
});
return removeDuplicateDetections(faceDetections, maxFaceDistance);
}
private preprocessImageBitmapToFloat32ChannelsFirst(
imageBitmap: ImageBitmap,
requiredWidth: number,
requiredHeight: number,
maintainAspectRatio: boolean = true,
normFunction: (
pixelValue: number,
) => number = normalizePixelBetween0And1,
) {
// Create an OffscreenCanvas and set its size
const offscreenCanvas = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
const ctx = offscreenCanvas.getContext("2d");
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
const imageData = ctx.getImageData(
0,
0,
imageBitmap.width,
imageBitmap.height,
);
const pixelData = imageData.data;
let scaleW = requiredWidth / imageBitmap.width;
let scaleH = requiredHeight / imageBitmap.height;
if (maintainAspectRatio) {
const scale = Math.min(
requiredWidth / imageBitmap.width,
requiredHeight / imageBitmap.height,
);
scaleW = scale;
scaleH = scale;
}
const scaledWidth = clamp(
Math.round(imageBitmap.width * scaleW),
0,
requiredWidth,
);
const scaledHeight = clamp(
Math.round(imageBitmap.height * scaleH),
0,
requiredHeight,
);
const processedImage = new Float32Array(
1 * 3 * requiredWidth * requiredHeight,
);
// Populate the Float32Array with normalized pixel values
let pixelIndex = 0;
const channelOffsetGreen = requiredHeight * requiredWidth;
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
for (let h = 0; h < requiredHeight; h++) {
for (let w = 0; w < requiredWidth; w++) {
let pixel: {
r: number;
g: number;
b: number;
};
if (w >= scaledWidth || h >= scaledHeight) {
pixel = { r: 114, g: 114, b: 114 };
} else {
pixel = getPixelBilinear(
w / scaleW,
h / scaleH,
pixelData,
imageBitmap.width,
imageBitmap.height,
);
}
processedImage[pixelIndex] = normFunction(pixel.r);
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
pixel.g,
);
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
pixel.b,
);
pixelIndex++;
}
}
return {
data: processedImage,
originalSize: {
width: imageBitmap.width,
height: imageBitmap.height,
},
newSize: { width: scaledWidth, height: scaledHeight },
};
}
// The rowOutput is a Float32Array of shape [25200, 16], where each row represents a bounding box.
private getFacesFromYoloOutput(
rowOutput: Float32Array,
minScore: number,
): Array<FaceDetection> {
const faces: Array<FaceDetection> = [];
// iterate over each row
for (let i = 0; i < rowOutput.length; i += 16) {
const score = rowOutput[i + 4];
if (score < minScore) {
continue;
}
// The first 4 values represent the bounding box's coordinates (x1, y1, x2, y2)
const xCenter = rowOutput[i];
const yCenter = rowOutput[i + 1];
const width = rowOutput[i + 2];
const height = rowOutput[i + 3];
const xMin = xCenter - width / 2.0; // topLeft
const yMin = yCenter - height / 2.0; // topLeft
const leftEyeX = rowOutput[i + 5];
const leftEyeY = rowOutput[i + 6];
const rightEyeX = rowOutput[i + 7];
const rightEyeY = rowOutput[i + 8];
const noseX = rowOutput[i + 9];
const noseY = rowOutput[i + 10];
const leftMouthX = rowOutput[i + 11];
const leftMouthY = rowOutput[i + 12];
const rightMouthX = rowOutput[i + 13];
const rightMouthY = rowOutput[i + 14];
const box = new Box({
x: xMin,
y: yMin,
width: width,
height: height,
});
const probability = score as number;
const landmarks = [
new Point(leftEyeX, leftEyeY),
new Point(rightEyeX, rightEyeY),
new Point(noseX, noseY),
new Point(leftMouthX, leftMouthY),
new Point(rightMouthX, rightMouthY),
];
const face: FaceDetection = {
box,
landmarks,
probability,
// detectionMethod: this.method,
};
faces.push(face);
}
return faces;
}
public getRelativeDetection(
faceDetection: FaceDetection,
dimensions: Dimensions,
): FaceDetection {
const oldBox: Box = faceDetection.box;
const box = new Box({
x: oldBox.x / dimensions.width,
y: oldBox.y / dimensions.height,
width: oldBox.width / dimensions.width,
height: oldBox.height / dimensions.height,
});
const oldLandmarks: Point[] = faceDetection.landmarks;
const landmarks = oldLandmarks.map((l) => {
return new Point(l.x / dimensions.width, l.y / dimensions.height);
});
return {
box,
landmarks,
probability: faceDetection.probability,
};
}
}
export default new YoloFaceDetectionService();
/**
* Removes duplicate face detections from an array of detections.
*
* This function sorts the detections by their probability in descending order, then iterates over them.
* For each detection, it calculates the Euclidean distance to all other detections.
* If the distance is less than or equal to the specified threshold (`withinDistance`), the other detection is considered a duplicate and is removed.
*
* @param detections - An array of face detections to remove duplicates from.
* @param withinDistance - The maximum Euclidean distance between two detections for them to be considered duplicates.
*
* @returns An array of face detections with duplicates removed.
*/
function removeDuplicateDetections(
detections: Array<FaceDetection>,
withinDistance: number,
) {
// console.time('removeDuplicates');
detections.sort((a, b) => b.probability - a.probability);
const isSelected = new Map<number, boolean>();
for (let i = 0; i < detections.length; i++) {
if (isSelected.get(i) === false) {
continue;
}
isSelected.set(i, true);
for (let j = i + 1; j < detections.length; j++) {
if (isSelected.get(j) === false) {
continue;
}
const centeri = getDetectionCenter(detections[i]);
const centerj = getDetectionCenter(detections[j]);
const dist = euclidean(
[centeri.x, centeri.y],
[centerj.x, centerj.y],
);
if (dist <= withinDistance) {
isSelected.set(j, false);
}
}
}
const uniques: Array<FaceDetection> = [];
for (let i = 0; i < detections.length; i++) {
isSelected.get(i) && uniques.push(detections[i]);
}
// console.timeEnd('removeDuplicates');
return uniques;
}
function getDetectionCenter(detection: FaceDetection) {
const center = new Point(0, 0);
// TODO: first 4 landmarks is applicable to blazeface only
// this needs to consider eyes, nose and mouth landmarks to take center
detection.landmarks?.slice(0, 4).forEach((p) => {
center.x += p.x;
center.y += p.y;
});
return new Point(center.x / 4, center.y / 4);
}
function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
return compose(
translate(toBox.x, toBox.y),
scale(toBox.width / inBox.width, toBox.height / inBox.height),
);
}
function transformPoint(point: Point, transform: Matrix) {
const txdPoint = applyToPoint(transform, point);
return new Point(txdPoint.x, txdPoint.y);
}
function transformPoints(points: Point[], transform: Matrix) {
return points?.map((p) => transformPoint(p, transform));
}
function transformBox(box: Box, transform: Matrix) {
const topLeft = transformPoint(box.topLeft, transform);
const bottomRight = transformPoint(box.bottomRight, transform);
return boxFromBoundingBox({
left: topLeft.x,
top: topLeft.y,
right: bottomRight.x,
bottom: bottomRight.y,
});
}

View file

@ -1,331 +0,0 @@
import { DebugInfo } from "hdbscan";
import PQueue from "p-queue";
import { Dimensions } from "services/ml/geom";
import { EnteFile } from "types/file";
import { Box, Point } from "./geom";
export interface MLSyncResult {
nOutOfSyncFiles: number;
nSyncedFiles: number;
nSyncedFaces: number;
nFaceClusters: number;
nFaceNoise: number;
error?: Error;
}
export declare type FaceDescriptor = Float32Array;
export declare type Cluster = Array<number>;
export interface ClusteringResults {
clusters: Array<Cluster>;
noise: Cluster;
}
export interface HdbscanResults extends ClusteringResults {
debugInfo?: DebugInfo;
}
export interface FacesCluster {
faces: Cluster;
summary?: FaceDescriptor;
}
export interface FacesClustersWithNoise {
clusters: Array<FacesCluster>;
noise: Cluster;
}
export interface NearestCluster {
cluster: FacesCluster;
distance: number;
}
export declare type Landmark = Point;
export declare type ImageType = "Original" | "Preview";
export declare type FaceDetectionMethod = "YoloFace";
export declare type FaceCropMethod = "ArcFace";
export declare type FaceAlignmentMethod = "ArcFace";
export declare type FaceEmbeddingMethod = "MobileFaceNet";
export declare type BlurDetectionMethod = "Laplacian";
export declare type ClusteringMethod = "Hdbscan" | "Dbscan";
export class AlignedBox {
box: Box;
rotation: number;
}
export interface Versioned<T> {
value: T;
version: number;
}
export interface FaceDetection {
// box and landmarks is relative to image dimentions stored at mlFileData
box: Box;
landmarks?: Array<Landmark>;
probability?: number;
}
export interface DetectedFace {
fileId: number;
detection: FaceDetection;
}
export interface DetectedFaceWithId extends DetectedFace {
id: string;
}
export interface FaceCrop {
image: ImageBitmap;
// imageBox is relative to image dimentions stored at mlFileData
imageBox: Box;
}
export interface StoredFaceCrop {
cacheKey: string;
imageBox: Box;
}
export interface CroppedFace extends DetectedFaceWithId {
crop?: StoredFaceCrop;
}
export interface FaceAlignment {
// TODO: remove affine matrix as rotation, size and center
// are simple to store and use, affine matrix adds complexity while getting crop
affineMatrix: Array<Array<number>>;
rotation: number;
// size and center is relative to image dimentions stored at mlFileData
size: number;
center: Point;
}
export interface AlignedFace extends CroppedFace {
alignment?: FaceAlignment;
blurValue?: number;
}
export declare type FaceEmbedding = Float32Array;
export interface FaceWithEmbedding extends AlignedFace {
embedding?: FaceEmbedding;
}
export interface Face extends FaceWithEmbedding {
personId?: number;
}
export interface Person {
id: number;
name?: string;
files: Array<number>;
displayFaceId?: string;
faceCropCacheKey?: string;
}
export interface MlFileData {
fileId: number;
faces?: Face[];
imageSource?: ImageType;
imageDimensions?: Dimensions;
faceDetectionMethod?: Versioned<FaceDetectionMethod>;
faceCropMethod?: Versioned<FaceCropMethod>;
faceAlignmentMethod?: Versioned<FaceAlignmentMethod>;
faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
mlVersion: number;
errorCount: number;
lastErrorMessage?: string;
}
export interface FaceDetectionConfig {
method: FaceDetectionMethod;
}
export interface FaceCropConfig {
enabled: boolean;
method: FaceCropMethod;
padding: number;
maxSize: number;
blobOptions: {
type: string;
quality: number;
};
}
export interface FaceAlignmentConfig {
method: FaceAlignmentMethod;
}
export interface BlurDetectionConfig {
method: BlurDetectionMethod;
threshold: number;
}
export interface FaceEmbeddingConfig {
method: FaceEmbeddingMethod;
faceSize: number;
generateTsne?: boolean;
}
export interface FaceClusteringConfig extends ClusteringConfig {}
export declare type TSNEMetric = "euclidean" | "manhattan";
export interface TSNEConfig {
samples: number;
dim: number;
perplexity?: number;
earlyExaggeration?: number;
learningRate?: number;
nIter?: number;
metric?: TSNEMetric;
}
export interface MLSyncConfig {
batchSize: number;
imageSource: ImageType;
faceDetection: FaceDetectionConfig;
faceCrop: FaceCropConfig;
faceAlignment: FaceAlignmentConfig;
blurDetection: BlurDetectionConfig;
faceEmbedding: FaceEmbeddingConfig;
faceClustering: FaceClusteringConfig;
mlVersion: number;
}
export interface MLSearchConfig {
enabled: boolean;
}
export interface MLSyncContext {
token: string;
userID: number;
config: MLSyncConfig;
shouldUpdateMLVersion: boolean;
faceDetectionService: FaceDetectionService;
faceCropService: FaceCropService;
faceAlignmentService: FaceAlignmentService;
faceEmbeddingService: FaceEmbeddingService;
blurDetectionService: BlurDetectionService;
faceClusteringService: ClusteringService;
localFilesMap: Map<number, EnteFile>;
outOfSyncFiles: EnteFile[];
nSyncedFiles: number;
nSyncedFaces: number;
allSyncedFacesMap?: Map<number, Array<Face>>;
error?: Error;
// oldMLLibraryData: MLLibraryData;
mlLibraryData: MLLibraryData;
syncQueue: PQueue;
getEnteWorker(id: number): Promise<any>;
dispose(): Promise<void>;
}
export interface MLSyncFileContext {
enteFile: EnteFile;
localFile?: globalThis.File;
oldMlFile?: MlFileData;
newMlFile?: MlFileData;
imageBitmap?: ImageBitmap;
newDetection?: boolean;
newAlignment?: boolean;
}
export interface MLLibraryData {
faceClusteringMethod?: Versioned<ClusteringMethod>;
faceClusteringResults?: ClusteringResults;
faceClustersWithNoise?: FacesClustersWithNoise;
}
export declare type MLIndex = "files" | "people";
export interface FaceDetectionService {
method: Versioned<FaceDetectionMethod>;
detectFaces(image: ImageBitmap): Promise<Array<FaceDetection>>;
getRelativeDetection(
faceDetection: FaceDetection,
imageDimensions: Dimensions,
): FaceDetection;
}
export interface FaceCropService {
method: Versioned<FaceCropMethod>;
getFaceCrop(
imageBitmap: ImageBitmap,
face: FaceDetection,
config: FaceCropConfig,
): Promise<FaceCrop>;
}
export interface FaceAlignmentService {
method: Versioned<FaceAlignmentMethod>;
getFaceAlignment(faceDetection: FaceDetection): FaceAlignment;
}
export interface FaceEmbeddingService {
method: Versioned<FaceEmbeddingMethod>;
faceSize: number;
getFaceEmbeddings(faceImages: Float32Array): Promise<Array<FaceEmbedding>>;
}
export interface BlurDetectionService {
method: Versioned<BlurDetectionMethod>;
detectBlur(alignedFaces: Float32Array, faces: Face[]): number[];
}
export interface ClusteringService {
method: Versioned<ClusteringMethod>;
cluster(
input: ClusteringInput,
config: ClusteringConfig,
): Promise<ClusteringResults>;
}
export interface ClusteringConfig {
method: ClusteringMethod;
minClusterSize: number;
minSamples?: number;
clusterSelectionEpsilon?: number;
clusterSelectionMethod?: "eom" | "leaf";
maxDistanceInsideCluster?: number;
minInputSize?: number;
generateDebugInfo?: boolean;
}
export declare type ClusteringInput = Array<Array<number>>;
export interface MachineLearningWorker {
closeLocalSyncContext(): Promise<void>;
syncLocalFile(
token: string,
userID: number,
enteFile: EnteFile,
localFile: globalThis.File,
): Promise<MlFileData | Error>;
sync(token: string, userID: number): Promise<MLSyncResult>;
close(): void;
}

View file

@ -2,9 +2,9 @@ import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import * as chrono from "chrono-node";
import { t } from "i18next";
import { getMLSyncConfig } from "services/machineLearning/machineLearningService";
import mlIDbStorage from "services/ml/db";
import { Person } from "services/ml/types";
import mlIDbStorage from "services/face/db";
import { Person } from "services/face/types";
import { defaultMLVersion } from "services/machineLearning/machineLearningService";
import { Collection } from "types/collection";
import { EntityType, LocationTag, LocationTagData } from "types/entity";
import { EnteFile } from "types/file";
@ -175,8 +175,7 @@ export async function getAllPeopleSuggestion(): Promise<Array<Suggestion>> {
export async function getIndexStatusSuggestion(): Promise<Suggestion> {
try {
const config = await getMLSyncConfig();
const indexStatus = await mlIDbStorage.getIndexStatus(config.mlVersion);
const indexStatus = await mlIDbStorage.getIndexStatus(defaultMLVersion);
let label;
if (!indexStatus.localFilesSynced) {

View file

@ -1,7 +1,7 @@
import { FILE_TYPE } from "@/media/file-type";
import { IndexStatus } from "services/face/db";
import { Person } from "services/face/types";
import { City } from "services/locationSearchService";
import { IndexStatus } from "services/ml/db";
import { Person } from "services/ml/types";
import { LocationTagData } from "types/entity";
import { EnteFile } from "types/file";

View file

@ -1,8 +1,8 @@
// these utils only work in env where OffscreenCanvas is available
import { Matrix, inverse } from "ml-matrix";
import { Box, Dimensions, enlargeBox } from "services/ml/geom";
import { FaceAlignment } from "services/ml/types";
import { Box, Dimensions, enlargeBox } from "services/face/geom";
import { FaceAlignment } from "services/face/types";
export function normalizePixelBetween0And1(pixelValue: number) {
return pixelValue / 255.0;
@ -450,17 +450,17 @@ export interface BlobOptions {
quality?: number;
}
export async function imageBitmapToBlob(
imageBitmap: ImageBitmap,
options?: BlobOptions,
) {
export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
const offscreen = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
return offscreen.convertToBlob(options);
return offscreen.convertToBlob({
type: "image/jpeg",
quality: 0.8,
});
}
export async function imageBitmapFromBlob(blob: Blob) {

View file

@ -174,3 +174,15 @@ some cases.
- [sanitize-filename](https://github.com/parshap/node-sanitize-filename) is
for converting arbitrary strings into strings that are suitable for being
used as filenames.
## Face search
- [matrix](https://github.com/mljs/matrix) and
[similarity-transformation](https://github.com/shaileshpandit/similarity-transformation-js)
are used during face alignment.
- [transformation-matrix](https://github.com/chrvadala/transformation-matrix)
is used during face detection.
- [hdbscan](https://github.com/shaileshpandit/hdbscan-js) is used for face
clustering.

View file

@ -332,12 +332,12 @@ export interface Electron {
detectFaces: (input: Float32Array) => Promise<Float32Array>;
/**
* Return a MobileFaceNet embedding for the given face data.
* Return a MobileFaceNet embeddings for the given faces.
*
* Both the input and output are opaque binary data whose internal structure
* is specific to our implementation and the model (MobileFaceNet) we use.
*/
faceEmbedding: (input: Float32Array) => Promise<Float32Array>;
faceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
/**
* Return a face crop stored by a previous version of ML.

View file

@ -47,8 +47,8 @@ const workerBridge = {
convertToJPEG: (imageData: Uint8Array) =>
ensureElectron().convertToJPEG(imageData),
detectFaces: (input: Float32Array) => ensureElectron().detectFaces(input),
faceEmbedding: (input: Float32Array) =>
ensureElectron().faceEmbedding(input),
faceEmbeddings: (input: Float32Array) =>
ensureElectron().faceEmbeddings(input),
};
export type WorkerBridge = typeof workerBridge;

View file

@ -1896,11 +1896,6 @@ delayed-stream@~1.0.0:
resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
density-clustering@^1.3.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/density-clustering/-/density-clustering-1.3.0.tgz#dc9f59c8f0ab97e1624ac64930fd3194817dcac5"
integrity sha512-icpmBubVTwLnsaor9qH/4tG5+7+f61VcqMN3V3pm9sxxSCt2Jcs0zWOgwZW9ARJYaKD3FumIgHiMOcIMRRAzFQ==
dequal@^2.0.3:
version "2.0.3"
resolved "https://registry.yarnpkg.com/dequal/-/dequal-2.0.3.tgz#2644214f1997d39ed0ee0ece72335490a7ac67be"