From bef7574c297327166e161651ded1db476bd803db Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 15:53:10 +0530 Subject: [PATCH 01/27] Trim --- web/apps/photos/src/services/face/f-index.ts | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 64eaa8d1a..f1785bfec 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -38,18 +38,18 @@ import { transformFaceDetections } from "./transform-box"; * 1. Downloading the original if needed. * 2. Detect faces using ONNX/YOLO * 3. Align the face rectangles, compute blur. - * 4. Compute embbeddings for the detected face (crops). + * 4. Compute embeddings for the detected face (crops). * - * Once all of it is done, it returns the face rectangles and embeddings to the - * higher layer (which saves them to locally for offline use, and encrypts and - * uploads them to the user's remote storage so that their other devices can - * download them instead of needing to reindex). + * Once all of it is done, it returns the face rectangles and embeddings so that + * they can be saved locally for offline use, and encrypts and uploads them to + * the user's remote storage so that their other devices can download them + * instead of needing to reindex. */ export const indexFaces = async ( enteFile: EnteFile, localFile?: globalThis.File, ) => { - log.debug(() => ({ a: "Indexing faces in file", enteFile })); + const startTime = Date.now(); const fileContext: MLSyncFileContext = { enteFile, localFile }; const newMlFile = (fileContext.newMlFile = { @@ -66,6 +66,8 @@ export const indexFaces = async ( fileContext.imageBitmap && fileContext.imageBitmap.close(); } + const ms = Math.round(Date.now() - startTime); + log.debug(() => `Indexing faces in file ${enteFile.id} took ${ms} ms`); return newMlFile; }; @@ -103,8 +105,6 @@ const fetchImageBitmapForContext = async (fileContext: MLSyncFileContext) => { const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => { const { newMlFile } = fileContext; - const startTime = Date.now(); - await syncFileFaceDetections(fileContext); if (newMlFile.faces && newMlFile.faces.length > 0) { @@ -116,10 +116,6 @@ const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => { await syncFileFaceMakeRelativeDetections(fileContext); } - log.debug( - () => - `Face detection for file ${fileContext.enteFile.id} took ${Math.round(Date.now() - startTime)} ms`, - ); }; const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => { From 433d0e81fc4c9bbf1a3fb7bbe6f5f7db927bfeeb Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 15:56:45 +0530 Subject: [PATCH 02/27] Prune --- web/apps/photos/src/services/face/f-index.ts | 23 ++++++-------------- web/apps/photos/src/services/face/image.ts | 8 ------- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index f1785bfec..dbc7162fa 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -23,7 +23,6 @@ import { createGrayscaleIntMatrixFromNormalized2List, fetchImageBitmap, getLocalFileImageBitmap, - getThumbnailImageBitmap, pixelRGBBilinear, warpAffineFloat32List, } from "./image"; @@ -72,29 +71,21 @@ export const indexFaces = async ( }; const fetchImageBitmapForContext = async (fileContext: MLSyncFileContext) => { - if (fileContext.imageBitmap) { - return fileContext.imageBitmap; - } + if (fileContext.imageBitmap) return fileContext.imageBitmap; + + const fileType = fileContext.enteFile.metadata.fileType; if (fileContext.localFile) { - if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) { + if (fileType !== FILE_TYPE.IMAGE) throw new Error("Local file of only image type is supported"); - } + fileContext.imageBitmap = await getLocalFileImageBitmap( fileContext.enteFile, fileContext.localFile, ); - } else if ( - [FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes( - fileContext.enteFile.metadata.fileType, - ) - ) { + } else if ([FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(fileType)) { fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile); } else { - // TODO-ML(MR): We don't do it on videos, when will we ever come - // here? - fileContext.imageBitmap = await getThumbnailImageBitmap( - fileContext.enteFile, - ); + throw new Error(`Cannot index unsupported file type ${fileType}`); } const { width, height } = fileContext.imageBitmap; diff --git a/web/apps/photos/src/services/face/image.ts b/web/apps/photos/src/services/face/image.ts index 350b37589..09e20b40b 100644 --- a/web/apps/photos/src/services/face/image.ts +++ b/web/apps/photos/src/services/face/image.ts @@ -1,6 +1,5 @@ import { FILE_TYPE } from "@/media/file-type"; import { decodeLivePhoto } from "@/media/live-photo"; -import log from "@/next/log"; import { Matrix, inverse } from "ml-matrix"; import DownloadManager from "services/download"; import { FaceAlignment } from "services/face/types"; @@ -36,13 +35,6 @@ async function fetchRenderableBlob(file: EnteFile) { } } -export async function getThumbnailImageBitmap(file: EnteFile) { - const thumb = await DownloadManager.getThumbnail(file); - log.info("[MLService] Got thumbnail: ", file.id.toString()); - - return createImageBitmap(new Blob([thumb])); -} - export async function getLocalFileImageBitmap( enteFile: EnteFile, localFile: globalThis.File, From bcbd8054045567c86287724718b9ef5c1472bebf Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 15:58:31 +0530 Subject: [PATCH 03/27] Inline --- web/apps/photos/src/services/face/f-index.ts | 14 ++++++++++++++ web/apps/photos/src/services/face/types.ts | 13 ------------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index dbc7162fa..8cdf80f49 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -70,11 +70,25 @@ export const indexFaces = async ( return newMlFile; }; +export interface MLSyncFileContext { + enteFile: EnteFile; + localFile?: globalThis.File; + + oldMlFile?: MlFileData; + newMlFile?: MlFileData; + + imageBitmap?: ImageBitmap; + + newDetection?: boolean; + newAlignment?: boolean; +} + const fetchImageBitmapForContext = async (fileContext: MLSyncFileContext) => { if (fileContext.imageBitmap) return fileContext.imageBitmap; const fileType = fileContext.enteFile.metadata.fileType; if (fileContext.localFile) { + // TODO-ML(MR): Could also be image part of live photo? if (fileType !== FILE_TYPE.IMAGE) throw new Error("Local file of only image type is supported"); diff --git a/web/apps/photos/src/services/face/types.ts b/web/apps/photos/src/services/face/types.ts index 80f44c2f1..e893dcc70 100644 --- a/web/apps/photos/src/services/face/types.ts +++ b/web/apps/photos/src/services/face/types.ts @@ -81,17 +81,4 @@ export interface MLSearchConfig { enabled: boolean; } -export interface MLSyncFileContext { - enteFile: EnteFile; - localFile?: globalThis.File; - - oldMlFile?: MlFileData; - newMlFile?: MlFileData; - - imageBitmap?: ImageBitmap; - - newDetection?: boolean; - newAlignment?: boolean; -} - export declare type MLIndex = "files" | "people"; From b29436e160a9efcb47e92c5ad1216c06d9008c6d Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 15:59:31 +0530 Subject: [PATCH 04/27] Prune --- web/apps/photos/src/services/face/f-index.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 8cdf80f49..f63f3e9bb 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -70,7 +70,7 @@ export const indexFaces = async ( return newMlFile; }; -export interface MLSyncFileContext { +interface MLSyncFileContext { enteFile: EnteFile; localFile?: globalThis.File; @@ -79,7 +79,6 @@ export interface MLSyncFileContext { imageBitmap?: ImageBitmap; - newDetection?: boolean; newAlignment?: boolean; } @@ -125,7 +124,6 @@ const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => { const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => { const { newMlFile } = fileContext; - fileContext.newDetection = true; const imageBitmap = await fetchImageBitmapForContext(fileContext); const faceDetections = await detectFaces(imageBitmap); // TODO-ML(MR): reenable faces filtering based on width From 76cfae12a58d5070a78c1c127306b3e6646b2919 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 16:02:27 +0530 Subject: [PATCH 05/27] Point of use --- web/apps/photos/src/services/face/f-index.ts | 4 ---- web/apps/photos/src/services/face/types.ts | 7 ------- .../machineLearning/machineLearningService.ts | 12 +++++------- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index f63f3e9bb..f2da8ee99 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -12,7 +12,6 @@ import { FaceCrop, FaceDetection, FaceEmbedding, - MLSyncFileContext, type MlFileData, } from "services/face/types"; import { defaultMLVersion } from "services/machineLearning/machineLearningService"; @@ -78,8 +77,6 @@ interface MLSyncFileContext { newMlFile?: MlFileData; imageBitmap?: ImageBitmap; - - newAlignment?: boolean; } const fetchImageBitmapForContext = async (fileContext: MLSyncFileContext) => { @@ -366,7 +363,6 @@ const syncFileFaceAlignments = async ( fileContext: MLSyncFileContext, ): Promise => { const { newMlFile } = fileContext; - fileContext.newAlignment = true; const imageBitmap = fileContext.imageBitmap || (await fetchImageBitmapForContext(fileContext)); diff --git a/web/apps/photos/src/services/face/types.ts b/web/apps/photos/src/services/face/types.ts index e893dcc70..815592771 100644 --- a/web/apps/photos/src/services/face/types.ts +++ b/web/apps/photos/src/services/face/types.ts @@ -1,5 +1,4 @@ import { Box, Dimensions, Point } from "services/face/geom"; -import { EnteFile } from "types/file"; export declare type Cluster = Array; @@ -76,9 +75,3 @@ export interface MlFileData { mlVersion: number; errorCount: number; } - -export interface MLSearchConfig { - enabled: boolean; -} - -export declare type MLIndex = "files" | "people"; diff --git a/web/apps/photos/src/services/machineLearning/machineLearningService.ts b/web/apps/photos/src/services/machineLearning/machineLearningService.ts index 33e6457af..56257299f 100644 --- a/web/apps/photos/src/services/machineLearning/machineLearningService.ts +++ b/web/apps/photos/src/services/machineLearning/machineLearningService.ts @@ -10,13 +10,7 @@ import PQueue from "p-queue"; import { putEmbedding } from "services/embeddingService"; import mlIDbStorage, { ML_SEARCH_CONFIG_NAME } from "services/face/db"; import { fetchImageBitmap, getLocalFile } from "services/face/image"; -import { - Face, - FaceDetection, - Landmark, - MLSearchConfig, - MlFileData, -} from "services/face/types"; +import { Face, FaceDetection, Landmark, MlFileData } from "services/face/types"; import { getLocalFiles } from "services/fileService"; import { EnteFile } from "types/file"; import { isInternalUserForML } from "utils/user"; @@ -32,6 +26,10 @@ const batchSize = 200; export const MAX_ML_SYNC_ERROR_COUNT = 1; +export interface MLSearchConfig { + enabled: boolean; +} + export const DEFAULT_ML_SEARCH_CONFIG: MLSearchConfig = { enabled: false, }; From 8f43c3d71259f240fae68126de3a137aabb67bab Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 16:19:55 +0530 Subject: [PATCH 06/27] Simplify --- web/apps/photos/src/services/face/f-index.ts | 44 +++++++++----------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index f2da8ee99..b50332e62 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -57,7 +57,11 @@ export const indexFaces = async ( } as MlFileData); try { - await fetchImageBitmapForContext(fileContext); + const imageBitmap = await fetchOrCreateImageBitmap(enteFile, localFile); + fileContext.imageBitmap = imageBitmap; + const { width, height } = imageBitmap; + fileContext.newMlFile.imageDimensions = { width, height }; + await syncFileAnalyzeFaces(fileContext); newMlFile.errorCount = 0; } finally { @@ -79,29 +83,26 @@ interface MLSyncFileContext { imageBitmap?: ImageBitmap; } -const fetchImageBitmapForContext = async (fileContext: MLSyncFileContext) => { - if (fileContext.imageBitmap) return fileContext.imageBitmap; - - const fileType = fileContext.enteFile.metadata.fileType; - if (fileContext.localFile) { +/** + * Return a {@link ImageBitmap}, using {@link localFile} if present otherwise + * downloading the source image corresponding to {@link enteFile} from remote. + */ +const fetchOrCreateImageBitmap = async ( + enteFile: EnteFile, + localFile: File, +) => { + const fileType = enteFile.metadata.fileType; + if (localFile) { // TODO-ML(MR): Could also be image part of live photo? if (fileType !== FILE_TYPE.IMAGE) throw new Error("Local file of only image type is supported"); - fileContext.imageBitmap = await getLocalFileImageBitmap( - fileContext.enteFile, - fileContext.localFile, - ); + return await getLocalFileImageBitmap(enteFile, localFile); } else if ([FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(fileType)) { - fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile); + return await fetchImageBitmap(enteFile); } else { throw new Error(`Cannot index unsupported file type ${fileType}`); } - - const { width, height } = fileContext.imageBitmap; - fileContext.newMlFile.imageDimensions = { width, height }; - - return fileContext.imageBitmap; }; const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => { @@ -121,7 +122,7 @@ const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => { const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => { const { newMlFile } = fileContext; - const imageBitmap = await fetchImageBitmapForContext(fileContext); + const imageBitmap = fileContext.imageBitmap; const faceDetections = await detectFaces(imageBitmap); // TODO-ML(MR): reenable faces filtering based on width const detectedFaces = faceDetections?.map((detection) => { @@ -353,7 +354,7 @@ function getDetectionCenter(detection: FaceDetection) { const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => { const { newMlFile } = fileContext; - const imageBitmap = await fetchImageBitmapForContext(fileContext); + const imageBitmap = fileContext.imageBitmap; for (const face of newMlFile.faces) { await saveFaceCrop(imageBitmap, face); } @@ -363,9 +364,7 @@ const syncFileFaceAlignments = async ( fileContext: MLSyncFileContext, ): Promise => { const { newMlFile } = fileContext; - const imageBitmap = - fileContext.imageBitmap || - (await fetchImageBitmapForContext(fileContext)); + const imageBitmap = fileContext.imageBitmap; // Execute the face alignment calculations for (const face of newMlFile.faces) { @@ -697,9 +696,6 @@ const syncFileFaceEmbeddings = async ( alignedFacesInput: Float32Array, ) => { const { newMlFile } = fileContext; - // TODO: when not storing face crops, image will be needed to extract faces - // fileContext.imageBitmap || - // (await this.getImageBitmap(fileContext)); const embeddings = await faceEmbeddings(alignedFacesInput); newMlFile.faces.forEach((f, i) => (f.embedding = embeddings[i])); From 9ba028b79dde92ac2c468bad84de2ed7a1990dee Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 16:52:25 +0530 Subject: [PATCH 07/27] Isolate --- web/apps/photos/src/services/face/f-index.ts | 46 +++++++++++--------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index b50332e62..deb3c3238 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -48,29 +48,21 @@ export const indexFaces = async ( localFile?: globalThis.File, ) => { const startTime = Date.now(); - const fileContext: MLSyncFileContext = { enteFile, localFile }; - - const newMlFile = (fileContext.newMlFile = { - fileId: enteFile.id, - mlVersion: defaultMLVersion, - errorCount: 0, - } as MlFileData); + const imageBitmap = await fetchOrCreateImageBitmap(enteFile, localFile); + let mlFile: MlFileData; try { - const imageBitmap = await fetchOrCreateImageBitmap(enteFile, localFile); - fileContext.imageBitmap = imageBitmap; - const { width, height } = imageBitmap; - fileContext.newMlFile.imageDimensions = { width, height }; - - await syncFileAnalyzeFaces(fileContext); - newMlFile.errorCount = 0; + mlFile = await indexFaces_(enteFile, imageBitmap); } finally { - fileContext.imageBitmap && fileContext.imageBitmap.close(); + imageBitmap.close(); } - const ms = Math.round(Date.now() - startTime); - log.debug(() => `Indexing faces in file ${enteFile.id} took ${ms} ms`); - return newMlFile; + log.debug(() => { + const ms = Math.round(Date.now() - startTime); + const nf = mlFile.faces?.length ?? 0; + return `Indexed ${nf} faces in file ${enteFile.id} (${ms} ms)`; + }); + return mlFile; }; interface MLSyncFileContext { @@ -105,8 +97,19 @@ const fetchOrCreateImageBitmap = async ( } }; -const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => { - const { newMlFile } = fileContext; +const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { + const fileContext: MLSyncFileContext = { enteFile }; + + const newMlFile = (fileContext.newMlFile = { + fileId: enteFile.id, + mlVersion: defaultMLVersion, + errorCount: 0, + } as MlFileData); + + fileContext.imageBitmap = imageBitmap; + const { width, height } = imageBitmap; + fileContext.newMlFile.imageDimensions = { width, height }; + await syncFileFaceDetections(fileContext); if (newMlFile.faces && newMlFile.faces.length > 0) { @@ -118,6 +121,9 @@ const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => { await syncFileFaceMakeRelativeDetections(fileContext); } + newMlFile.errorCount = 0; + + return newMlFile; }; const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => { From a2e7231c378485bc5b2df7c44635c59b3b110811 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 16:52:49 +0530 Subject: [PATCH 08/27] Prune --- web/apps/photos/src/services/face/f-index.ts | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index deb3c3238..2883eb014 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -43,10 +43,7 @@ import { transformFaceDetections } from "./transform-box"; * the user's remote storage so that their other devices can download them * instead of needing to reindex. */ -export const indexFaces = async ( - enteFile: EnteFile, - localFile?: globalThis.File, -) => { +export const indexFaces = async (enteFile: EnteFile, localFile?: File) => { const startTime = Date.now(); const imageBitmap = await fetchOrCreateImageBitmap(enteFile, localFile); @@ -67,9 +64,7 @@ export const indexFaces = async ( interface MLSyncFileContext { enteFile: EnteFile; - localFile?: globalThis.File; - oldMlFile?: MlFileData; newMlFile?: MlFileData; imageBitmap?: ImageBitmap; From c557e4a7a590d6b15bf771b8f4867a5910d01266 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 16:56:18 +0530 Subject: [PATCH 09/27] Inline --- web/apps/photos/src/services/face/f-index.ts | 37 ++++++++------------ 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 2883eb014..68c73172d 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -105,7 +105,21 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { const { width, height } = imageBitmap; fileContext.newMlFile.imageDimensions = { width, height }; - await syncFileFaceDetections(fileContext); + const faceDetections = await detectFaces(imageBitmap); + const detectedFaces = faceDetections?.map((detection) => { + return { + fileId: fileContext.enteFile.id, + detection, + } as DetectedFace; + }); + newMlFile.faces = detectedFaces?.map((detectedFace) => ({ + ...detectedFace, + id: makeFaceID(detectedFace, newMlFile.imageDimensions), + })); + // TODO-ML(MR): reenable faces filtering based on width + // ?.filter((f) => + // f.box.width > syncContext.config.faceDetection.minFaceSize + // ); if (newMlFile.faces && newMlFile.faces.length > 0) { await syncFileFaceCrops(fileContext); @@ -121,27 +135,6 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { return newMlFile; }; -const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => { - const { newMlFile } = fileContext; - const imageBitmap = fileContext.imageBitmap; - const faceDetections = await detectFaces(imageBitmap); - // TODO-ML(MR): reenable faces filtering based on width - const detectedFaces = faceDetections?.map((detection) => { - return { - fileId: fileContext.enteFile.id, - detection, - } as DetectedFace; - }); - newMlFile.faces = detectedFaces?.map((detectedFace) => ({ - ...detectedFace, - id: makeFaceID(detectedFace, newMlFile.imageDimensions), - })); - // ?.filter((f) => - // f.box.width > syncContext.config.faceDetection.minFaceSize - // ); - log.info("[MLService] Detected Faces: ", newMlFile.faces?.length); -}; - /** * Detect faces in the given {@link imageBitmap}. * From 39a75430a57d8178e32f8d682448fd43579d2e91 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 17:13:22 +0530 Subject: [PATCH 10/27] Inline --- web/apps/photos/src/services/face/f-index.ts | 13 +++++++++---- web/apps/photos/src/services/face/geom.ts | 4 ---- web/apps/photos/src/services/face/transform-box.ts | 10 +++++----- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 68c73172d..9df912392 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -4,7 +4,7 @@ import log from "@/next/log"; import { workerBridge } from "@/next/worker/worker-bridge"; import { euclidean } from "hdbscan"; import { Matrix } from "ml-matrix"; -import { Box, Dimensions, Point, enlargeBox, newBox } from "services/face/geom"; +import { Box, Dimensions, Point, enlargeBox } from "services/face/geom"; import { DetectedFace, Face, @@ -143,13 +143,18 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { const detectFaces = async ( imageBitmap: ImageBitmap, ): Promise> => { + const rect = ({ width, height }: { width: number; height: number }) => + new Box({ x: 0, y: 0, width, height }); + const { yoloInput, yoloSize } = convertToYOLOInputFloat32ChannelsFirst(imageBitmap); const yoloOutput = await workerBridge.detectFaces(yoloInput); const faces = faceDetectionsFromYOLOOutput(yoloOutput); - const inBox = newBox(0, 0, yoloSize.width, yoloSize.height); - const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height); - const faceDetections = transformFaceDetections(faces, inBox, toBox); + const faceDetections = transformFaceDetections( + faces, + rect(yoloSize), + rect(imageBitmap), + ); const maxFaceDistancePercent = Math.sqrt(2) / 100; const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent; diff --git a/web/apps/photos/src/services/face/geom.ts b/web/apps/photos/src/services/face/geom.ts index 556e2b309..b97f292e1 100644 --- a/web/apps/photos/src/services/face/geom.ts +++ b/web/apps/photos/src/services/face/geom.ts @@ -27,10 +27,6 @@ export interface IRect { height: number; } -export function newBox(x: number, y: number, width: number, height: number) { - return new Box({ x, y, width, height }); -} - export const boxFromBoundingBox = ({ left, top, diff --git a/web/apps/photos/src/services/face/transform-box.ts b/web/apps/photos/src/services/face/transform-box.ts index 8234b8739..2a0d243b8 100644 --- a/web/apps/photos/src/services/face/transform-box.ts +++ b/web/apps/photos/src/services/face/transform-box.ts @@ -13,17 +13,17 @@ import { } from "transformation-matrix"; /** - * Detect faces in the given {@link imageBitmap}. - * - * The model used is YOLO, running in an ONNX runtime. + * Transform the given {@link faceDetections} from their coordinate system in + * which they were detected ({@link inBox}) back to the coordinate system of the + * original image ({@link toBox}). */ export const transformFaceDetections = ( - faces: FaceDetection[], + faceDetections: FaceDetection[], inBox: Box, toBox: Box, ): FaceDetection[] => { const transform = computeTransformToBox(inBox, toBox); - return faces.map((f) => { + return faceDetections.map((f) => { const box = transformBox(f.box, transform); const normLandmarks = f.landmarks; const landmarks = transformPoints(normLandmarks, transform); From 3f18fb84a107002f507e863f597d03b596030a38 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 17:20:42 +0530 Subject: [PATCH 11/27] Rearrange --- web/apps/photos/src/services/face/f-index.ts | 38 ++++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 9df912392..a1b7c94d9 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -266,25 +266,6 @@ const faceDetectionsFromYOLOOutput = (rows: Float32Array): FaceDetection[] => { return faces; }; -const getRelativeDetection = ( - faceDetection: FaceDetection, - dimensions: Dimensions, -): FaceDetection => { - const oldBox: Box = faceDetection.box; - const box = new Box({ - x: oldBox.x / dimensions.width, - y: oldBox.y / dimensions.height, - width: oldBox.width / dimensions.width, - height: oldBox.height / dimensions.height, - }); - const oldLandmarks: Point[] = faceDetection.landmarks; - const landmarks = oldLandmarks.map((l) => { - return new Point(l.x / dimensions.width, l.y / dimensions.height); - }); - const probability = faceDetection.probability; - return { box, landmarks, probability }; -}; - /** * Removes duplicate face detections from an array of detections. * @@ -740,6 +721,25 @@ const syncFileFaceMakeRelativeDetections = async ( } }; +const getRelativeDetection = ( + faceDetection: FaceDetection, + dimensions: Dimensions, +): FaceDetection => { + const oldBox: Box = faceDetection.box; + const box = new Box({ + x: oldBox.x / dimensions.width, + y: oldBox.y / dimensions.height, + width: oldBox.width / dimensions.width, + height: oldBox.height / dimensions.height, + }); + const oldLandmarks: Point[] = faceDetection.landmarks; + const landmarks = oldLandmarks.map((l) => { + return new Point(l.x / dimensions.width, l.y / dimensions.height); + }); + const probability = faceDetection.probability; + return { box, landmarks, probability }; +}; + export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => { const faceCrop = getFaceCrop(imageBitmap, face.detection); From 4d5ba47be42db83b4e24be7a9e0b6d87853ee39a Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 17:27:41 +0530 Subject: [PATCH 12/27] Sugar --- web/apps/photos/src/services/face/f-index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index a1b7c94d9..0f6446e46 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -287,7 +287,7 @@ const faceDetectionsFromYOLOOutput = (rows: Float32Array): FaceDetection[] => { * @returns An array of face detections with duplicates removed. */ const removeDuplicateDetections = ( - detections: Array, + detections: FaceDetection[], withinDistance: number, ) => { detections.sort((a, b) => b.probability - a.probability); @@ -313,7 +313,7 @@ const removeDuplicateDetections = ( } } - const uniques: Array = []; + const uniques: FaceDetection[] = []; for (let i = 0; i < detections.length; i++) { isSelected.get(i) && uniques.push(detections[i]); } From 139370c99718b6cb512dc61610ca7111c1be9a6b Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 17:31:39 +0530 Subject: [PATCH 13/27] Shorten --- web/apps/photos/src/services/face/f-index.ts | 33 +++++++------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 0f6446e46..23c37a678 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -293,44 +293,35 @@ const removeDuplicateDetections = ( detections.sort((a, b) => b.probability - a.probability); const isSelected = new Map(); for (let i = 0; i < detections.length; i++) { - if (isSelected.get(i) === false) { - continue; - } + if (!isSelected.get(i)) continue; + isSelected.set(i, true); for (let j = i + 1; j < detections.length; j++) { - if (isSelected.get(j) === false) { - continue; - } - const centeri = getDetectionCenter(detections[i]); - const centerj = getDetectionCenter(detections[j]); + if (!isSelected.get(j)) continue; + + const centeri = faceDetectionCenter(detections[i]); + const centerj = faceDetectionCenter(detections[j]); const dist = euclidean( [centeri.x, centeri.y], [centerj.x, centerj.y], ); - if (dist <= withinDistance) { - isSelected.set(j, false); - } + if (dist <= withinDistance) isSelected.set(j, false); } } - const uniques: FaceDetection[] = []; - for (let i = 0; i < detections.length; i++) { - isSelected.get(i) && uniques.push(detections[i]); - } - return uniques; + return detections.filter((_, i) => isSelected.get(i)); }; -function getDetectionCenter(detection: FaceDetection) { +const faceDetectionCenter = (detection: FaceDetection) => { const center = new Point(0, 0); - // TODO: first 4 landmarks is applicable to blazeface only - // this needs to consider eyes, nose and mouth landmarks to take center + // TODO-ML: first 4 landmarks is applicable to blazeface only this needs to + // consider eyes, nose and mouth landmarks to take center detection.landmarks?.slice(0, 4).forEach((p) => { center.x += p.x; center.y += p.y; }); - return new Point(center.x / 4, center.y / 4); -} +}; const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => { const { newMlFile } = fileContext; From aa76448747675ec5dc4f329f236fbbe4ed331809 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 17:32:42 +0530 Subject: [PATCH 14/27] Shorten --- web/apps/photos/src/services/face/f-index.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 23c37a678..fc7681b4e 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -291,6 +291,7 @@ const removeDuplicateDetections = ( withinDistance: number, ) => { detections.sort((a, b) => b.probability - a.probability); + const isSelected = new Map(); for (let i = 0; i < detections.length; i++) { if (!isSelected.get(i)) continue; @@ -305,6 +306,7 @@ const removeDuplicateDetections = ( [centeri.x, centeri.y], [centerj.x, centerj.y], ); + if (dist <= withinDistance) isSelected.set(j, false); } } From c8623bab12f5942013e0b81f857471a051f90d15 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 17:42:27 +0530 Subject: [PATCH 15/27] Cleanup --- web/apps/photos/src/services/face/f-index.ts | 61 ++++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index fc7681b4e..0e68a133b 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -6,7 +6,6 @@ import { euclidean } from "hdbscan"; import { Matrix } from "ml-matrix"; import { Box, Dimensions, Point, enlargeBox } from "services/face/geom"; import { - DetectedFace, Face, FaceAlignment, FaceCrop, @@ -95,8 +94,9 @@ const fetchOrCreateImageBitmap = async ( const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { const fileContext: MLSyncFileContext = { enteFile }; + const fileID = enteFile.id; const newMlFile = (fileContext.newMlFile = { - fileId: enteFile.id, + fileId: fileID, mlVersion: defaultMLVersion, errorCount: 0, } as MlFileData); @@ -106,20 +106,12 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { fileContext.newMlFile.imageDimensions = { width, height }; const faceDetections = await detectFaces(imageBitmap); - const detectedFaces = faceDetections?.map((detection) => { - return { - fileId: fileContext.enteFile.id, - detection, - } as DetectedFace; - }); - newMlFile.faces = detectedFaces?.map((detectedFace) => ({ - ...detectedFace, - id: makeFaceID(detectedFace, newMlFile.imageDimensions), + const detectedFaces = faceDetections.map((detection) => ({ + id: makeFaceID(fileID, detection, newMlFile.imageDimensions), + fileId: fileID, + detection, })); - // TODO-ML(MR): reenable faces filtering based on width - // ?.filter((f) => - // f.box.width > syncContext.config.faceDetection.minFaceSize - // ); + newMlFile.faces = detectedFaces; if (newMlFile.faces && newMlFile.faces.length > 0) { await syncFileFaceCrops(fileContext); @@ -142,7 +134,7 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { */ const detectFaces = async ( imageBitmap: ImageBitmap, -): Promise> => { +): Promise => { const rect = ({ width, height }: { width: number; height: number }) => new Box({ x: 0, y: 0, width, height }); @@ -156,6 +148,11 @@ const detectFaces = async ( rect(imageBitmap), ); + // TODO-ML: reenable faces filtering based on width ?? else remove me + // ?.filter((f) => + // f.box.width > syncContext.config.faceDetection.minFaceSize + // ); + const maxFaceDistancePercent = Math.sqrt(2) / 100; const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent; return removeDuplicateDetections(faceDetections, maxFaceDistance); @@ -325,6 +322,23 @@ const faceDetectionCenter = (detection: FaceDetection) => { return new Point(center.x / 4, center.y / 4); }; +const makeFaceID = ( + fileID: number, + detection: FaceDetection, + imageDims: Dimensions, +) => { + const part = (v: number) => clamp(v, 0.0, 0.999999).toFixed(5).substring(2); + const xMin = part(detection.box.x / imageDims.width); + const yMin = part(detection.box.y / imageDims.height); + const xMax = part( + (detection.box.x + detection.box.width) / imageDims.width, + ); + const yMax = part( + (detection.box.y + detection.box.height) / imageDims.height, + ); + return [`${fileID}`, xMin, yMin, xMax, yMax].join("_"); +}; + const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => { const { newMlFile } = fileContext; const imageBitmap = fileContext.imageBitmap; @@ -466,21 +480,6 @@ async function extractFaceImagesToFloat32( return faceData; } -const makeFaceID = (detectedFace: DetectedFace, imageDims: Dimensions) => { - const part = (v: number) => clamp(v, 0.0, 0.999999).toFixed(5).substring(2); - const xMin = part(detectedFace.detection.box.x / imageDims.width); - const yMin = part(detectedFace.detection.box.y / imageDims.height); - const xMax = part( - (detectedFace.detection.box.x + detectedFace.detection.box.width) / - imageDims.width, - ); - const yMax = part( - (detectedFace.detection.box.y + detectedFace.detection.box.height) / - imageDims.height, - ); - return [detectedFace.fileId, xMin, yMin, xMax, yMax].join("_"); -}; - /** * Laplacian blur detection. */ From a161203d0be1a668ede02d37e358ee6e1e586ac4 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 19:32:34 +0530 Subject: [PATCH 16/27] Inline --- web/apps/photos/src/services/face/f-index.ts | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 0e68a133b..70734daca 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -113,8 +113,10 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { })); newMlFile.faces = detectedFaces; - if (newMlFile.faces && newMlFile.faces.length > 0) { - await syncFileFaceCrops(fileContext); + if (detectedFaces.length > 0) { + await Promise.all( + detectedFaces.map((face) => saveFaceCrop(imageBitmap, face)), + ); const alignedFacesData = await syncFileFaceAlignments(fileContext); @@ -339,14 +341,6 @@ const makeFaceID = ( return [`${fileID}`, xMin, yMin, xMax, yMax].join("_"); }; -const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => { - const { newMlFile } = fileContext; - const imageBitmap = fileContext.imageBitmap; - for (const face of newMlFile.faces) { - await saveFaceCrop(imageBitmap, face); - } -}; - const syncFileFaceAlignments = async ( fileContext: MLSyncFileContext, ): Promise => { From 410b6e7d3ed0d31086fa8a02f9fdcd66712c77d8 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 19:33:27 +0530 Subject: [PATCH 17/27] Tinker --- web/apps/photos/src/services/face/f-index.ts | 49 +++++++------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 70734daca..6fe692163 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -95,7 +95,7 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { const fileContext: MLSyncFileContext = { enteFile }; const fileID = enteFile.id; - const newMlFile = (fileContext.newMlFile = { + const mlFile = (fileContext.newMlFile = { fileId: fileID, mlVersion: defaultMLVersion, errorCount: 0, @@ -107,26 +107,37 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { const faceDetections = await detectFaces(imageBitmap); const detectedFaces = faceDetections.map((detection) => ({ - id: makeFaceID(fileID, detection, newMlFile.imageDimensions), + id: makeFaceID(fileID, detection, mlFile.imageDimensions), fileId: fileID, detection, })); - newMlFile.faces = detectedFaces; + mlFile.faces = detectedFaces; if (detectedFaces.length > 0) { await Promise.all( detectedFaces.map((face) => saveFaceCrop(imageBitmap, face)), ); - const alignedFacesData = await syncFileFaceAlignments(fileContext); + // Execute the face alignment calculations + for (const face of mlFile.faces) { + face.alignment = faceAlignment(face.detection); + } + // Extract face images and convert to Float32Array + const faceAlignments = mlFile.faces.map((f) => f.alignment); + const alignedFacesData = await extractFaceImagesToFloat32( + faceAlignments, + mobileFaceNetFaceSize, + imageBitmap, + ); + const blurValues = detectBlur(alignedFacesData, mlFile.faces); + mlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i])); await syncFileFaceEmbeddings(fileContext, alignedFacesData); await syncFileFaceMakeRelativeDetections(fileContext); } - newMlFile.errorCount = 0; - return newMlFile; + return mlFile; }; /** @@ -341,32 +352,6 @@ const makeFaceID = ( return [`${fileID}`, xMin, yMin, xMax, yMax].join("_"); }; -const syncFileFaceAlignments = async ( - fileContext: MLSyncFileContext, -): Promise => { - const { newMlFile } = fileContext; - const imageBitmap = fileContext.imageBitmap; - - // Execute the face alignment calculations - for (const face of newMlFile.faces) { - face.alignment = faceAlignment(face.detection); - } - // Extract face images and convert to Float32Array - const faceAlignments = newMlFile.faces.map((f) => f.alignment); - const faceImages = await extractFaceImagesToFloat32( - faceAlignments, - mobileFaceNetFaceSize, - imageBitmap, - ); - const blurValues = detectBlur(faceImages, newMlFile.faces); - newMlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i])); - - imageBitmap.close(); - log.info("[MLService] alignedFaces: ", newMlFile.faces?.length); - - return faceImages; -}; - // TODO-ML(MR): When is this used or is it as Blazeface leftover? const ARCFACE_LANDMARKS = [ [38.2946, 51.6963], From 6304d90b522ceb3cf7856d3474baf610945e80bf Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 19:50:13 +0530 Subject: [PATCH 18/27] Inline --- web/apps/photos/src/services/face/f-index.ts | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 6fe692163..59f20c88f 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -122,6 +122,7 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { for (const face of mlFile.faces) { face.alignment = faceAlignment(face.detection); } + // Extract face images and convert to Float32Array const faceAlignments = mlFile.faces.map((f) => f.alignment); const alignedFacesData = await extractFaceImagesToFloat32( @@ -129,10 +130,12 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { mobileFaceNetFaceSize, imageBitmap, ); + const blurValues = detectBlur(alignedFacesData, mlFile.faces); mlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i])); - await syncFileFaceEmbeddings(fileContext, alignedFacesData); + const embeddings = await faceEmbeddings(alignedFacesData); + mlFile.faces.forEach((f, i) => (f.embedding = embeddings[i])); await syncFileFaceMakeRelativeDetections(fileContext); } @@ -642,18 +645,6 @@ const matrixVariance = (matrix: number[][]): number => { return variance; }; -const syncFileFaceEmbeddings = async ( - fileContext: MLSyncFileContext, - alignedFacesInput: Float32Array, -) => { - const { newMlFile } = fileContext; - - const embeddings = await faceEmbeddings(alignedFacesInput); - newMlFile.faces.forEach((f, i) => (f.embedding = embeddings[i])); - - log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length); -}; - const mobileFaceNetFaceSize = 112; /** From 3b6760c65ea897787c84cbd66adf5e715ad9685e Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 19:55:51 +0530 Subject: [PATCH 19/27] Remove unused blazeflaze landmarks --- web/apps/photos/src/services/face/f-index.ts | 21 ++++---------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 59f20c88f..10873a15c 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -355,23 +355,14 @@ const makeFaceID = ( return [`${fileID}`, xMin, yMin, xMax, yMax].join("_"); }; -// TODO-ML(MR): When is this used or is it as Blazeface leftover? -const ARCFACE_LANDMARKS = [ - [38.2946, 51.6963], - [73.5318, 51.5014], - [56.0252, 71.7366], - [56.1396, 92.2848], -] as Array<[number, number]>; - -const ARCFACE_LANDMARKS_FACE_SIZE = 112; - -const ARC_FACE_5_LANDMARKS = [ +// TODO-ML: Rename? +const arcFaceLandmarks: [number, number][] = [ [38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041], -] as Array<[number, number]>; +]; /** * Compute and return an {@link FaceAlignment} for the given face detection. @@ -379,13 +370,9 @@ const ARC_FACE_5_LANDMARKS = [ * @param faceDetection A geometry indicating a face detected in an image. */ const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => { - const landmarkCount = faceDetection.landmarks.length; return getFaceAlignmentUsingSimilarityTransform( faceDetection, - normalizeLandmarks( - landmarkCount === 5 ? ARC_FACE_5_LANDMARKS : ARCFACE_LANDMARKS, - ARCFACE_LANDMARKS_FACE_SIZE, - ), + normalizeLandmarks(arcFaceLandmarks, mobileFaceNetFaceSize), ); }; From acd3568dc637123d8ee05ae75839af294f205d91 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 20:04:32 +0530 Subject: [PATCH 20/27] Tinker --- web/apps/photos/src/services/face/f-index.ts | 51 ++++++++------------ 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 10873a15c..3f23567a9 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -355,6 +355,17 @@ const makeFaceID = ( return [`${fileID}`, xMin, yMin, xMax, yMax].join("_"); }; +/** + * Compute and return an {@link FaceAlignment} for the given face detection. + * + * @param faceDetection A geometry indicating a face detected in an image. + */ +const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => + faceAlignmentUsingSimilarityTransform( + faceDetection, + normalizeLandmarks(arcFaceLandmarks, mobileFaceNetFaceSize), + ); + // TODO-ML: Rename? const arcFaceLandmarks: [number, number][] = [ [38.2946, 51.6963], @@ -364,22 +375,16 @@ const arcFaceLandmarks: [number, number][] = [ [70.7299, 92.2041], ]; -/** - * Compute and return an {@link FaceAlignment} for the given face detection. - * - * @param faceDetection A geometry indicating a face detected in an image. - */ -const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => { - return getFaceAlignmentUsingSimilarityTransform( - faceDetection, - normalizeLandmarks(arcFaceLandmarks, mobileFaceNetFaceSize), - ); -}; +const normalizeLandmarks = ( + landmarks: [number, number][], + faceSize: number, +): [number, number][] => + landmarks.map(([x, y]) => [x / faceSize, y / faceSize]); -function getFaceAlignmentUsingSimilarityTransform( +const faceAlignmentUsingSimilarityTransform = ( faceDetection: FaceDetection, - alignedLandmarks: Array<[number, number]>, -): FaceAlignment { + alignedLandmarks: [number, number][], +): FaceAlignment => { const landmarksMat = new Matrix( faceDetection.landmarks .map((p) => [p.x, p.y]) @@ -410,22 +415,8 @@ function getFaceAlignmentUsingSimilarityTransform( simTransform.rotation.get(0, 0), ); - return { - affineMatrix, - center, - size, - rotation, - }; -} - -function normalizeLandmarks( - landmarks: Array<[number, number]>, - faceSize: number, -): Array<[number, number]> { - return landmarks.map((landmark) => - landmark.map((p) => p / faceSize), - ) as Array<[number, number]>; -} + return { affineMatrix, center, size, rotation }; +}; async function extractFaceImagesToFloat32( faceAlignments: Array, From 8975546294bc7b317f0c94e463c9710b18c0d4b8 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 20:12:20 +0530 Subject: [PATCH 21/27] Simplify --- web/apps/photos/src/services/face/f-index.ts | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 3f23567a9..a9f605981 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -442,27 +442,19 @@ async function extractFaceImagesToFloat32( /** * Laplacian blur detection. + * + * Return an array of detected blur values, one for each face in {@link faces}. */ -const detectBlur = (alignedFaces: Float32Array, faces: Face[]): number[] => { - const numFaces = Math.round( - alignedFaces.length / - (mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3), - ); - const blurValues: number[] = []; - for (let i = 0; i < numFaces; i++) { - const face = faces[i]; - const direction = faceDirection(face); +const detectBlur = (alignedFaces: Float32Array, faces: Face[]): number[] => + faces.map((face, i) => { const faceImage = createGrayscaleIntMatrixFromNormalized2List( alignedFaces, i, mobileFaceNetFaceSize, mobileFaceNetFaceSize, ); - const laplacian = applyLaplacian(faceImage, direction); - blurValues.push(matrixVariance(laplacian)); - } - return blurValues; -}; + return matrixVariance(applyLaplacian(faceImage, faceDirection(face))); + }); type FaceDirection = "left" | "right" | "straight"; From fe8ff0a12a1f13ffed3b7680a431924dc6fa6f43 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 20:16:00 +0530 Subject: [PATCH 22/27] Remove redundant --- web/apps/photos/src/services/face/f-index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index a9f605981..6d64dc556 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -503,7 +503,7 @@ const applyLaplacian = ( image: number[][], direction: FaceDirection, ): number[][] => { - const paddedImage: number[][] = padImage(image, direction); + const paddedImage = padImage(image, direction); const numRows = paddedImage.length - 2; const numCols = paddedImage[0].length - 2; @@ -513,7 +513,7 @@ const applyLaplacian = ( ); // Define the Laplacian kernel. - const kernel: number[][] = [ + const kernel = [ [0, 1, 0], [1, -4, 1], [0, 1, 0], From 97adb89494ea9d32c199bad2ec624412bfce1156 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 20:18:39 +0530 Subject: [PATCH 23/27] Tweak --- web/apps/photos/src/services/face/f-index.ts | 22 +++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 6d64dc556..b469189e3 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -558,14 +558,16 @@ const padImage = (image: number[][], direction: FaceDirection): number[][] => { } } } else if (direction === "left") { - // If the face is facing left, we only take the right side of the face image. + // If the face is facing left, we only take the right side of the face + // image. for (let i = 0; i < numRows; i++) { for (let j = 0; j < paddedNumCols - 2; j++) { paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns]; } } } else if (direction === "right") { - // If the face is facing right, we only take the left side of the face image. + // If the face is facing right, we only take the left side of the face + // image. for (let i = 0; i < numRows; i++) { for (let j = 0; j < paddedNumCols - 2; j++) { paddedImage[i + 1][j + 1] = image[i][j]; @@ -574,15 +576,19 @@ const padImage = (image: number[][], direction: FaceDirection): number[][] => { } // Reflect padding - // Top and bottom rows + // - Top and bottom rows for (let j = 1; j <= paddedNumCols - 2; j++) { - paddedImage[0][j] = paddedImage[2][j]; // Top row - paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row + // Top row + paddedImage[0][j] = paddedImage[2][j]; + // Bottom row + paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; } - // Left and right columns + // - Left and right columns for (let i = 0; i < numRows + 2; i++) { - paddedImage[i][0] = paddedImage[i][2]; // Left column - paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column + // Left column + paddedImage[i][0] = paddedImage[i][2]; + // Right column + paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; } return paddedImage; From a4494f5c6adf6bb50395db1aedfb362a427c643c Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 20:21:54 +0530 Subject: [PATCH 24/27] Tweak --- web/apps/photos/src/services/face/f-index.ts | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index b469189e3..62f61aef8 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -137,7 +137,7 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { const embeddings = await faceEmbeddings(alignedFacesData); mlFile.faces.forEach((f, i) => (f.embedding = embeddings[i])); - await syncFileFaceMakeRelativeDetections(fileContext); + convertFaceDetectionsToRelative(mlFile); } return mlFile; @@ -645,21 +645,19 @@ const faceEmbeddings = async ( return embeddings; }; -const syncFileFaceMakeRelativeDetections = async ( - fileContext: MLSyncFileContext, -) => { - const { newMlFile } = fileContext; - for (let i = 0; i < newMlFile.faces.length; i++) { - const face = newMlFile.faces[i]; - if (face.detection.box.x + face.detection.box.width < 2) continue; // Skip if somehow already relative - face.detection = getRelativeDetection( +const convertFaceDetectionsToRelative = (mlFile: MlFileData) => { + for (let i = 0; i < mlFile.faces.length; i++) { + const face = mlFile.faces[i]; + // Skip if somehow already relative. + if (face.detection.box.x + face.detection.box.width < 2) continue; + face.detection = relativeDetection( face.detection, - newMlFile.imageDimensions, + mlFile.imageDimensions, ); } }; -const getRelativeDetection = ( +const relativeDetection = ( faceDetection: FaceDetection, dimensions: Dimensions, ): FaceDetection => { From 684904173524be7afc283bef17a47bbbb4833877 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 20:26:20 +0530 Subject: [PATCH 25/27] Tweak --- web/apps/photos/src/services/face/f-index.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 62f61aef8..668612ad1 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -151,7 +151,7 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { const detectFaces = async ( imageBitmap: ImageBitmap, ): Promise => { - const rect = ({ width, height }: { width: number; height: number }) => + const rect = ({ width, height }: Dimensions) => new Box({ x: 0, y: 0, width, height }); const { yoloInput, yoloSize } = @@ -659,18 +659,17 @@ const convertFaceDetectionsToRelative = (mlFile: MlFileData) => { const relativeDetection = ( faceDetection: FaceDetection, - dimensions: Dimensions, + { width, height }: Dimensions, ): FaceDetection => { const oldBox: Box = faceDetection.box; const box = new Box({ - x: oldBox.x / dimensions.width, - y: oldBox.y / dimensions.height, - width: oldBox.width / dimensions.width, - height: oldBox.height / dimensions.height, + x: oldBox.x / width, + y: oldBox.y / height, + width: oldBox.width / width, + height: oldBox.height / height, }); - const oldLandmarks: Point[] = faceDetection.landmarks; - const landmarks = oldLandmarks.map((l) => { - return new Point(l.x / dimensions.width, l.y / dimensions.height); + const landmarks = faceDetection.landmarks.map((l) => { + return new Point(l.x / width, l.y / height); }); const probability = faceDetection.probability; return { box, landmarks, probability }; From 93c498b0f48b81e0f3f99a6438d45bb98d932195 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 20:39:57 +0530 Subject: [PATCH 26/27] Remove unused context --- web/apps/photos/src/services/face/f-index.ts | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 668612ad1..571dc52b3 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -61,14 +61,6 @@ export const indexFaces = async (enteFile: EnteFile, localFile?: File) => { return mlFile; }; -interface MLSyncFileContext { - enteFile: EnteFile; - - newMlFile?: MlFileData; - - imageBitmap?: ImageBitmap; -} - /** * Return a {@link ImageBitmap}, using {@link localFile} if present otherwise * downloading the source image corresponding to {@link enteFile} from remote. @@ -92,18 +84,14 @@ const fetchOrCreateImageBitmap = async ( }; const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { - const fileContext: MLSyncFileContext = { enteFile }; - const fileID = enteFile.id; - const mlFile = (fileContext.newMlFile = { + const { width, height } = imageBitmap; + const mlFile: MlFileData = { fileId: fileID, mlVersion: defaultMLVersion, + imageDimensions: { width, height }, errorCount: 0, - } as MlFileData); - - fileContext.imageBitmap = imageBitmap; - const { width, height } = imageBitmap; - fileContext.newMlFile.imageDimensions = { width, height }; + }; const faceDetections = await detectFaces(imageBitmap); const detectedFaces = faceDetections.map((detection) => ({ From 3ade7b797ebe0dc5bfafbc702c9cd9e74da81195 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 20:50:35 +0530 Subject: [PATCH 27/27] Fix duplicate check --- web/apps/photos/src/services/face/f-index.ts | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 571dc52b3..dfdd1c926 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -293,13 +293,12 @@ const removeDuplicateDetections = ( ) => { detections.sort((a, b) => b.probability - a.probability); - const isSelected = new Map(); + const dupIndices = new Set(); for (let i = 0; i < detections.length; i++) { - if (!isSelected.get(i)) continue; + if (dupIndices.has(i)) continue; - isSelected.set(i, true); for (let j = i + 1; j < detections.length; j++) { - if (!isSelected.get(j)) continue; + if (dupIndices.has(j)) continue; const centeri = faceDetectionCenter(detections[i]); const centerj = faceDetectionCenter(detections[j]); @@ -308,11 +307,11 @@ const removeDuplicateDetections = ( [centerj.x, centerj.y], ); - if (dist <= withinDistance) isSelected.set(j, false); + if (dist <= withinDistance) dupIndices.add(j); } } - return detections.filter((_, i) => isSelected.get(i)); + return detections.filter((_, i) => !dupIndices.has(i)); }; const faceDetectionCenter = (detection: FaceDetection) => {