[web] ML cleanup - Part 4/x (#1761)

2024-05-18 09:28:02 +05:30 · 2024-05-18 09:28:02 +05:30 · 1edafd3568
parent 19874e2186 0188749692
commit 1edafd3568
14 changed files with 1253 additions and 1424 deletions
--- a/web/apps/photos/src/services/face/align.ts
+++ b/web/apps/photos/src/services/face/align.ts
@ -1,88 +0,0 @@
-import { Matrix } from "ml-matrix";
-import { Point } from "services/face/geom";
-import { FaceAlignment, FaceDetection } from "services/face/types";
-import { getSimilarityTransformation } from "similarity-transformation";
-
-const ARCFACE_LANDMARKS = [
-    [38.2946, 51.6963],
-    [73.5318, 51.5014],
-    [56.0252, 71.7366],
-    [56.1396, 92.2848],
-] as Array<[number, number]>;
-
-const ARCFACE_LANDMARKS_FACE_SIZE = 112;
-
-const ARC_FACE_5_LANDMARKS = [
-    [38.2946, 51.6963],
-    [73.5318, 51.5014],
-    [56.0252, 71.7366],
-    [41.5493, 92.3655],
-    [70.7299, 92.2041],
-] as Array<[number, number]>;
-
-/**
- * Compute and return an {@link FaceAlignment} for the given face detection.
- *
- * @param faceDetection A geometry indicating a face detected in an image.
- */
-export const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
-    const landmarkCount = faceDetection.landmarks.length;
-    return getFaceAlignmentUsingSimilarityTransform(
-        faceDetection,
-        normalizeLandmarks(
-            landmarkCount === 5 ? ARC_FACE_5_LANDMARKS : ARCFACE_LANDMARKS,
-            ARCFACE_LANDMARKS_FACE_SIZE,
-        ),
-    );
-};
-
-function getFaceAlignmentUsingSimilarityTransform(
-    faceDetection: FaceDetection,
-    alignedLandmarks: Array<[number, number]>,
-): FaceAlignment {
-    const landmarksMat = new Matrix(
-        faceDetection.landmarks
-            .map((p) => [p.x, p.y])
-            .slice(0, alignedLandmarks.length),
-    ).transpose();
-    const alignedLandmarksMat = new Matrix(alignedLandmarks).transpose();
-
-    const simTransform = getSimilarityTransformation(
-        landmarksMat,
-        alignedLandmarksMat,
-    );
-
-    const RS = Matrix.mul(simTransform.rotation, simTransform.scale);
-    const TR = simTransform.translation;
-
-    const affineMatrix = [
-        [RS.get(0, 0), RS.get(0, 1), TR.get(0, 0)],
-        [RS.get(1, 0), RS.get(1, 1), TR.get(1, 0)],
-        [0, 0, 1],
-    ];
-
-    const size = 1 / simTransform.scale;
-    const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
-    const centerMat = simTransform.fromMean.sub(meanTranslation);
-    const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
-    const rotation = -Math.atan2(
-        simTransform.rotation.get(0, 1),
-        simTransform.rotation.get(0, 0),
-    );
-
-    return {
-        affineMatrix,
-        center,
-        size,
-        rotation,
-    };
-}
-
-function normalizeLandmarks(
-    landmarks: Array<[number, number]>,
-    faceSize: number,
-): Array<[number, number]> {
-    return landmarks.map((landmark) =>
-        landmark.map((p) => p / faceSize),
-    ) as Array<[number, number]>;
-}
--- a/web/apps/photos/src/services/face/blur.ts
+++ b/web/apps/photos/src/services/face/blur.ts
@ -1,187 +0,0 @@
-import { Face } from "services/face/types";
-import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
-import { mobileFaceNetFaceSize } from "./embed";
-
-/**
- * Laplacian blur detection.
- */
-export const detectBlur = (
-    alignedFaces: Float32Array,
-    faces: Face[],
-): number[] => {
-    const numFaces = Math.round(
-        alignedFaces.length /
-            (mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
-    );
-    const blurValues: number[] = [];
-    for (let i = 0; i < numFaces; i++) {
-        const face = faces[i];
-        const direction = faceDirection(face);
-        const faceImage = createGrayscaleIntMatrixFromNormalized2List(
-            alignedFaces,
-            i,
-        );
-        const laplacian = applyLaplacian(faceImage, direction);
-        blurValues.push(matrixVariance(laplacian));
-    }
-    return blurValues;
-};
-
-type FaceDirection = "left" | "right" | "straight";
-
-const faceDirection = (face: Face): FaceDirection => {
-    const landmarks = face.detection.landmarks;
-    const leftEye = landmarks[0];
-    const rightEye = landmarks[1];
-    const nose = landmarks[2];
-    const leftMouth = landmarks[3];
-    const rightMouth = landmarks[4];
-
-    const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
-    const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
-    const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
-
-    const faceIsUpright =
-        Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
-        nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
-
-    const noseStickingOutLeft =
-        nose.x < Math.min(leftEye.x, rightEye.x) &&
-        nose.x < Math.min(leftMouth.x, rightMouth.x);
-
-    const noseStickingOutRight =
-        nose.x > Math.max(leftEye.x, rightEye.x) &&
-        nose.x > Math.max(leftMouth.x, rightMouth.x);
-
-    const noseCloseToLeftEye =
-        Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
-    const noseCloseToRightEye =
-        Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
-
-    if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
-        return "left";
-    } else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
-        return "right";
-    }
-
-    return "straight";
-};
-
-/**
- * Return a new image by applying a Laplacian blur kernel to each pixel.
- */
-const applyLaplacian = (
-    image: number[][],
-    direction: FaceDirection,
-): number[][] => {
-    const paddedImage: number[][] = padImage(image, direction);
-    const numRows = paddedImage.length - 2;
-    const numCols = paddedImage[0].length - 2;
-
-    // Create an output image initialized to 0.
-    const outputImage: number[][] = Array.from({ length: numRows }, () =>
-        new Array(numCols).fill(0),
-    );
-
-    // Define the Laplacian kernel.
-    const kernel: number[][] = [
-        [0, 1, 0],
-        [1, -4, 1],
-        [0, 1, 0],
-    ];
-
-    // Apply the kernel to each pixel
-    for (let i = 0; i < numRows; i++) {
-        for (let j = 0; j < numCols; j++) {
-            let sum = 0;
-            for (let ki = 0; ki < 3; ki++) {
-                for (let kj = 0; kj < 3; kj++) {
-                    sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
-                }
-            }
-            // Adjust the output value if necessary (e.g., clipping).
-            outputImage[i][j] = sum;
-        }
-    }
-
-    return outputImage;
-};
-
-const padImage = (image: number[][], direction: FaceDirection): number[][] => {
-    const removeSideColumns = 56; /* must be even */
-
-    const numRows = image.length;
-    const numCols = image[0].length;
-    const paddedNumCols = numCols + 2 - removeSideColumns;
-    const paddedNumRows = numRows + 2;
-
-    // Create a new matrix with extra padding.
-    const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
-        new Array(paddedNumCols).fill(0),
-    );
-
-    if (direction === "straight") {
-        // Copy original image into the center of the padded image.
-        for (let i = 0; i < numRows; i++) {
-            for (let j = 0; j < paddedNumCols - 2; j++) {
-                paddedImage[i + 1][j + 1] =
-                    image[i][j + Math.round(removeSideColumns / 2)];
-            }
-        }
-    } else if (direction === "left") {
-        // If the face is facing left, we only take the right side of the face image.
-        for (let i = 0; i < numRows; i++) {
-            for (let j = 0; j < paddedNumCols - 2; j++) {
-                paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
-            }
-        }
-    } else if (direction === "right") {
-        // If the face is facing right, we only take the left side of the face image.
-        for (let i = 0; i < numRows; i++) {
-            for (let j = 0; j < paddedNumCols - 2; j++) {
-                paddedImage[i + 1][j + 1] = image[i][j];
-            }
-        }
-    }
-
-    // Reflect padding
-    // Top and bottom rows
-    for (let j = 1; j <= paddedNumCols - 2; j++) {
-        paddedImage[0][j] = paddedImage[2][j]; // Top row
-        paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
-    }
-    // Left and right columns
-    for (let i = 0; i < numRows + 2; i++) {
-        paddedImage[i][0] = paddedImage[i][2]; // Left column
-        paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
-    }
-
-    return paddedImage;
-};
-
-const matrixVariance = (matrix: number[][]): number => {
-    const numRows = matrix.length;
-    const numCols = matrix[0].length;
-    const totalElements = numRows * numCols;
-
-    // Calculate the mean.
-    let mean: number = 0;
-    matrix.forEach((row) => {
-        row.forEach((value) => {
-            mean += value;
-        });
-    });
-    mean /= totalElements;
-
-    // Calculate the variance.
-    let variance: number = 0;
-    matrix.forEach((row) => {
-        row.forEach((value) => {
-            const diff: number = value - mean;
-            variance += diff * diff;
-        });
-    });
-    variance /= totalElements;
-
-    return variance;
-};
--- a/web/apps/photos/src/services/face/crop.ts
+++ b/web/apps/photos/src/services/face/crop.ts
@ -1,32 +0,0 @@
-import { Box, enlargeBox } from "services/face/geom";
-import { FaceCrop, FaceDetection } from "services/face/types";
-import { cropWithRotation } from "utils/image";
-import { faceAlignment } from "./align";
-
-export const getFaceCrop = (
-    imageBitmap: ImageBitmap,
-    faceDetection: FaceDetection,
-): FaceCrop => {
-    const alignment = faceAlignment(faceDetection);
-
-    const padding = 0.25;
-    const maxSize = 256;
-
-    const alignmentBox = new Box({
-        x: alignment.center.x - alignment.size / 2,
-        y: alignment.center.y - alignment.size / 2,
-        width: alignment.size,
-        height: alignment.size,
-    }).round();
-    const scaleForPadding = 1 + padding * 2;
-    const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
-    const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
-        width: maxSize,
-        height: maxSize,
-    });
-
-    return {
-        image: faceImageBitmap,
-        imageBox: paddedBox,
-    };
-};
--- a/web/apps/photos/src/services/face/db.ts
+++ b/web/apps/photos/src/services/face/db.ts
@ -9,7 +9,7 @@ import {
    openDB,
 } from "idb";
 import isElectron from "is-electron";
-import { Face, MLLibraryData, MlFileData, Person } from "services/face/types";
+import { Face, MlFileData, Person } from "services/face/types";
 import {
    DEFAULT_ML_SEARCH_CONFIG,
    MAX_ML_SYNC_ERROR_COUNT,
@ -50,7 +50,7 @@ interface MLDb extends DBSchema {
    };
    library: {
        key: string;
-        value: MLLibraryData;
+        value: unknown;
    };
    configs: {
        key: string;
@ -177,6 +177,7 @@ class MLIDbStorage {
                                ML_SEARCH_CONFIG_NAME,
                            );

+                        db.deleteObjectStore("library");
                        db.deleteObjectStore("things");
                    } catch {
                        // TODO: ignore for now as we finalize the new version
@ -400,16 +401,6 @@ class MLIDbStorage {
        return db.put("versions", version, index);
    }

-    public async getLibraryData() {
-        const db = await this.db;
-        return db.get("library", "data");
-    }
-
-    public async putLibraryData(data: MLLibraryData) {
-        const db = await this.db;
-        return db.put("library", data, "data");
-    }
-
    public async getConfig<T extends Config>(name: string, def: T) {
        const db = await this.db;
        const tx = db.transaction("configs", "readwrite");
--- a/web/apps/photos/src/services/face/detect.ts
+++ b/web/apps/photos/src/services/face/detect.ts
@ -1,316 +0,0 @@
-import { workerBridge } from "@/next/worker/worker-bridge";
-import { euclidean } from "hdbscan";
-import {
-    Box,
-    Dimensions,
-    Point,
-    boxFromBoundingBox,
-    newBox,
-} from "services/face/geom";
-import { FaceDetection } from "services/face/types";
-import {
-    Matrix,
-    applyToPoint,
-    compose,
-    scale,
-    translate,
-} from "transformation-matrix";
-import {
-    clamp,
-    getPixelBilinear,
-    normalizePixelBetween0And1,
-} from "utils/image";
-
-/**
- * Detect faces in the given {@link imageBitmap}.
- *
- * The model used is YOLO, running in an ONNX runtime.
- */
-export const detectFaces = async (
-    imageBitmap: ImageBitmap,
-): Promise<Array<FaceDetection>> => {
-    const maxFaceDistancePercent = Math.sqrt(2) / 100;
-    const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
-    const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
-        imageBitmap,
-        640,
-        640,
-    );
-    const data = preprocessResult.data;
-    const resized = preprocessResult.newSize;
-    const outputData = await workerBridge.detectFaces(data);
-    const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
-    const inBox = newBox(0, 0, resized.width, resized.height);
-    const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
-    const transform = computeTransformToBox(inBox, toBox);
-    const faceDetections: Array<FaceDetection> = faces?.map((f) => {
-        const box = transformBox(f.box, transform);
-        const normLandmarks = f.landmarks;
-        const landmarks = transformPoints(normLandmarks, transform);
-        return {
-            box,
-            landmarks,
-            probability: f.probability as number,
-        } as FaceDetection;
-    });
-    return removeDuplicateDetections(faceDetections, maxFaceDistance);
-};
-
-const preprocessImageBitmapToFloat32ChannelsFirst = (
-    imageBitmap: ImageBitmap,
-    requiredWidth: number,
-    requiredHeight: number,
-    maintainAspectRatio: boolean = true,
-    normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
-) => {
-    // Create an OffscreenCanvas and set its size.
-    const offscreenCanvas = new OffscreenCanvas(
-        imageBitmap.width,
-        imageBitmap.height,
-    );
-    const ctx = offscreenCanvas.getContext("2d");
-    ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
-    const imageData = ctx.getImageData(
-        0,
-        0,
-        imageBitmap.width,
-        imageBitmap.height,
-    );
-    const pixelData = imageData.data;
-
-    let scaleW = requiredWidth / imageBitmap.width;
-    let scaleH = requiredHeight / imageBitmap.height;
-    if (maintainAspectRatio) {
-        const scale = Math.min(
-            requiredWidth / imageBitmap.width,
-            requiredHeight / imageBitmap.height,
-        );
-        scaleW = scale;
-        scaleH = scale;
-    }
-    const scaledWidth = clamp(
-        Math.round(imageBitmap.width * scaleW),
-        0,
-        requiredWidth,
-    );
-    const scaledHeight = clamp(
-        Math.round(imageBitmap.height * scaleH),
-        0,
-        requiredHeight,
-    );
-
-    const processedImage = new Float32Array(
-        1 * 3 * requiredWidth * requiredHeight,
-    );
-
-    // Populate the Float32Array with normalized pixel values
-    let pixelIndex = 0;
-    const channelOffsetGreen = requiredHeight * requiredWidth;
-    const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
-    for (let h = 0; h < requiredHeight; h++) {
-        for (let w = 0; w < requiredWidth; w++) {
-            let pixel: {
-                r: number;
-                g: number;
-                b: number;
-            };
-            if (w >= scaledWidth || h >= scaledHeight) {
-                pixel = { r: 114, g: 114, b: 114 };
-            } else {
-                pixel = getPixelBilinear(
-                    w / scaleW,
-                    h / scaleH,
-                    pixelData,
-                    imageBitmap.width,
-                    imageBitmap.height,
-                );
-            }
-            processedImage[pixelIndex] = normFunction(pixel.r);
-            processedImage[pixelIndex + channelOffsetGreen] = normFunction(
-                pixel.g,
-            );
-            processedImage[pixelIndex + channelOffsetBlue] = normFunction(
-                pixel.b,
-            );
-            pixelIndex++;
-        }
-    }
-
-    return {
-        data: processedImage,
-        originalSize: {
-            width: imageBitmap.width,
-            height: imageBitmap.height,
-        },
-        newSize: { width: scaledWidth, height: scaledHeight },
-    };
-};
-
-/**
- * @param rowOutput A Float32Array of shape [25200, 16], where each row
- * represents a bounding box.
- */
-const getFacesFromYOLOOutput = (
-    rowOutput: Float32Array,
-    minScore: number,
-): Array<FaceDetection> => {
-    const faces: Array<FaceDetection> = [];
-    // Iterate over each row.
-    for (let i = 0; i < rowOutput.length; i += 16) {
-        const score = rowOutput[i + 4];
-        if (score < minScore) {
-            continue;
-        }
-        // The first 4 values represent the bounding box's coordinates:
-        //
-        //     (x1, y1, x2, y2)
-        //
-        const xCenter = rowOutput[i];
-        const yCenter = rowOutput[i + 1];
-        const width = rowOutput[i + 2];
-        const height = rowOutput[i + 3];
-        const xMin = xCenter - width / 2.0; // topLeft
-        const yMin = yCenter - height / 2.0; // topLeft
-
-        const leftEyeX = rowOutput[i + 5];
-        const leftEyeY = rowOutput[i + 6];
-        const rightEyeX = rowOutput[i + 7];
-        const rightEyeY = rowOutput[i + 8];
-        const noseX = rowOutput[i + 9];
-        const noseY = rowOutput[i + 10];
-        const leftMouthX = rowOutput[i + 11];
-        const leftMouthY = rowOutput[i + 12];
-        const rightMouthX = rowOutput[i + 13];
-        const rightMouthY = rowOutput[i + 14];
-
-        const box = new Box({
-            x: xMin,
-            y: yMin,
-            width: width,
-            height: height,
-        });
-        const probability = score as number;
-        const landmarks = [
-            new Point(leftEyeX, leftEyeY),
-            new Point(rightEyeX, rightEyeY),
-            new Point(noseX, noseY),
-            new Point(leftMouthX, leftMouthY),
-            new Point(rightMouthX, rightMouthY),
-        ];
-        faces.push({ box, landmarks, probability });
-    }
-    return faces;
-};
-
-export const getRelativeDetection = (
-    faceDetection: FaceDetection,
-    dimensions: Dimensions,
-): FaceDetection => {
-    const oldBox: Box = faceDetection.box;
-    const box = new Box({
-        x: oldBox.x / dimensions.width,
-        y: oldBox.y / dimensions.height,
-        width: oldBox.width / dimensions.width,
-        height: oldBox.height / dimensions.height,
-    });
-    const oldLandmarks: Point[] = faceDetection.landmarks;
-    const landmarks = oldLandmarks.map((l) => {
-        return new Point(l.x / dimensions.width, l.y / dimensions.height);
-    });
-    const probability = faceDetection.probability;
-    return { box, landmarks, probability };
-};
-
-/**
- * Removes duplicate face detections from an array of detections.
- *
- * This function sorts the detections by their probability in descending order,
- * then iterates over them.
- *
- * For each detection, it calculates the Euclidean distance to all other
- * detections.
- *
- * If the distance is less than or equal to the specified threshold
- * (`withinDistance`), the other detection is considered a duplicate and is
- * removed.
- *
- * @param detections - An array of face detections to remove duplicates from.
- *
- * @param withinDistance - The maximum Euclidean distance between two detections
- * for them to be considered duplicates.
- *
- * @returns An array of face detections with duplicates removed.
- */
-const removeDuplicateDetections = (
-    detections: Array<FaceDetection>,
-    withinDistance: number,
-) => {
-    detections.sort((a, b) => b.probability - a.probability);
-    const isSelected = new Map<number, boolean>();
-    for (let i = 0; i < detections.length; i++) {
-        if (isSelected.get(i) === false) {
-            continue;
-        }
-        isSelected.set(i, true);
-        for (let j = i + 1; j < detections.length; j++) {
-            if (isSelected.get(j) === false) {
-                continue;
-            }
-            const centeri = getDetectionCenter(detections[i]);
-            const centerj = getDetectionCenter(detections[j]);
-            const dist = euclidean(
-                [centeri.x, centeri.y],
-                [centerj.x, centerj.y],
-            );
-            if (dist <= withinDistance) {
-                isSelected.set(j, false);
-            }
-        }
-    }
-
-    const uniques: Array<FaceDetection> = [];
-    for (let i = 0; i < detections.length; i++) {
-        isSelected.get(i) && uniques.push(detections[i]);
-    }
-    return uniques;
-};
-
-function getDetectionCenter(detection: FaceDetection) {
-    const center = new Point(0, 0);
-    // TODO: first 4 landmarks is applicable to blazeface only
-    // this needs to consider eyes, nose and mouth landmarks to take center
-    detection.landmarks?.slice(0, 4).forEach((p) => {
-        center.x += p.x;
-        center.y += p.y;
-    });
-
-    return new Point(center.x / 4, center.y / 4);
-}
-
-function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
-    return compose(
-        translate(toBox.x, toBox.y),
-        scale(toBox.width / inBox.width, toBox.height / inBox.height),
-    );
-}
-
-function transformPoint(point: Point, transform: Matrix) {
-    const txdPoint = applyToPoint(transform, point);
-    return new Point(txdPoint.x, txdPoint.y);
-}
-
-function transformPoints(points: Point[], transform: Matrix) {
-    return points?.map((p) => transformPoint(p, transform));
-}
-
-function transformBox(box: Box, transform: Matrix) {
-    const topLeft = transformPoint(box.topLeft, transform);
-    const bottomRight = transformPoint(box.bottomRight, transform);
-
-    return boxFromBoundingBox({
-        left: topLeft.x,
-        top: topLeft.y,
-        right: bottomRight.x,
-        bottom: bottomRight.y,
-    });
-}
--- a/web/apps/photos/src/services/face/embed.ts
+++ b/web/apps/photos/src/services/face/embed.ts
@ -1,26 +0,0 @@
-import { workerBridge } from "@/next/worker/worker-bridge";
-import { FaceEmbedding } from "services/face/types";
-
-export const mobileFaceNetFaceSize = 112;
-
-/**
- * Compute embeddings for the given {@link faceData}.
- *
- * The model used is MobileFaceNet, running in an ONNX runtime.
- */
-export const faceEmbeddings = async (
-    faceData: Float32Array,
-): Promise<Array<FaceEmbedding>> => {
-    const outputData = await workerBridge.faceEmbeddings(faceData);
-
-    const embeddingSize = 192;
-    const embeddings = new Array<FaceEmbedding>(
-        outputData.length / embeddingSize,
-    );
-    for (let i = 0; i < embeddings.length; i++) {
-        embeddings[i] = new Float32Array(
-            outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
-        );
-    }
-    return embeddings;
-};
--- a/web/apps/photos/src/services/face/f-index.ts
+++ b/web/apps/photos/src/services/face/f-index.ts
@ -1,26 +1,76 @@
 import { openCache } from "@/next/blob-cache";
 import log from "@/next/log";
-import { faceAlignment } from "services/face/align";
-import mlIDbStorage from "services/face/db";
-import { detectFaces, getRelativeDetection } from "services/face/detect";
-import { faceEmbeddings, mobileFaceNetFaceSize } from "services/face/embed";
+import { workerBridge } from "@/next/worker/worker-bridge";
+import { euclidean } from "hdbscan";
+import { Matrix } from "ml-matrix";
+import { Box, Dimensions, Point, enlargeBox, newBox } from "services/face/geom";
 import {
    DetectedFace,
    Face,
+    FaceAlignment,
+    FaceCrop,
+    FaceDetection,
+    FaceEmbedding,
    MLSyncFileContext,
-    type FaceAlignment,
+    type MlFileData,
 } from "services/face/types";
-import { imageBitmapToBlob, warpAffineFloat32List } from "utils/image";
-import { detectBlur } from "./blur";
-import { getFaceCrop } from "./crop";
+import { defaultMLVersion } from "services/machineLearning/machineLearningService";
+import { getSimilarityTransformation } from "similarity-transformation";
+import type { EnteFile } from "types/file";
 import {
-    fetchImageBitmap,
+    clamp,
+    createGrayscaleIntMatrixFromNormalized2List,
+    cropWithRotation,
    fetchImageBitmapForContext,
    getFaceId,
-    getLocalFile,
+    getPixelBilinear,
+    imageBitmapToBlob,
+    normalizePixelBetween0And1,
+    warpAffineFloat32List,
 } from "./image";
+import { transformFaceDetections } from "./transform-box";

-export const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
+/**
+ * Index faces in the given file.
+ *
+ * This function is the entry point to the indexing pipeline. The file goes
+ * through various stages:
+ *
+ * 1. Downloading the original if needed.
+ * 2. Detect faces using ONNX/YOLO
+ * 3. Align the face rectangles, compute blur.
+ * 4. Compute embbeddings for the detected face (crops).
+ *
+ * Once all of it is done, it returns the face rectangles and embeddings to the
+ * higher layer (which saves them to locally for offline use, and encrypts and
+ * uploads them to the user's remote storage so that their other devices can
+ * download them instead of needing to reindex).
+ */
+export const indexFaces = async (
+    enteFile: EnteFile,
+    localFile?: globalThis.File,
+) => {
+    log.debug(() => ({ a: "Indexing faces in file", enteFile }));
+    const fileContext: MLSyncFileContext = { enteFile, localFile };
+
+    const newMlFile = (fileContext.newMlFile = {
+        fileId: enteFile.id,
+        mlVersion: defaultMLVersion,
+        errorCount: 0,
+    } as MlFileData);
+
+    try {
+        await fetchImageBitmapForContext(fileContext);
+        await syncFileAnalyzeFaces(fileContext);
+        newMlFile.errorCount = 0;
+    } finally {
+        fileContext.imageBitmap && fileContext.imageBitmap.close();
+    }
+
+    return newMlFile;
+};
+
+const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
    const { newMlFile } = fileContext;
    const startTime = Date.now();

@ -43,10 +93,6 @@ export const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {

 const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
    const { newMlFile } = fileContext;
-    newMlFile.faceDetectionMethod = {
-        value: "YoloFace",
-        version: 1,
-    };
    fileContext.newDetection = true;
    const imageBitmap = await fetchImageBitmapForContext(fileContext);
    const faceDetections = await detectFaces(imageBitmap);
@ -67,14 +113,265 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
    log.info("[MLService] Detected Faces: ", newMlFile.faces?.length);
 };

+/**
+ * Detect faces in the given {@link imageBitmap}.
+ *
+ * The model used is YOLO, running in an ONNX runtime.
+ */
+const detectFaces = async (
+    imageBitmap: ImageBitmap,
+): Promise<Array<FaceDetection>> => {
+    const maxFaceDistancePercent = Math.sqrt(2) / 100;
+    const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
+    const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
+        imageBitmap,
+        640,
+        640,
+    );
+    const data = preprocessResult.data;
+    const resized = preprocessResult.newSize;
+    const outputData = await workerBridge.detectFaces(data);
+    const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
+    const inBox = newBox(0, 0, resized.width, resized.height);
+    const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
+    const faceDetections = transformFaceDetections(faces, inBox, toBox);
+    return removeDuplicateDetections(faceDetections, maxFaceDistance);
+};
+
+const preprocessImageBitmapToFloat32ChannelsFirst = (
+    imageBitmap: ImageBitmap,
+    requiredWidth: number,
+    requiredHeight: number,
+    maintainAspectRatio: boolean = true,
+    normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
+) => {
+    // Create an OffscreenCanvas and set its size.
+    const offscreenCanvas = new OffscreenCanvas(
+        imageBitmap.width,
+        imageBitmap.height,
+    );
+    const ctx = offscreenCanvas.getContext("2d");
+    ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
+    const imageData = ctx.getImageData(
+        0,
+        0,
+        imageBitmap.width,
+        imageBitmap.height,
+    );
+    const pixelData = imageData.data;
+
+    let scaleW = requiredWidth / imageBitmap.width;
+    let scaleH = requiredHeight / imageBitmap.height;
+    if (maintainAspectRatio) {
+        const scale = Math.min(
+            requiredWidth / imageBitmap.width,
+            requiredHeight / imageBitmap.height,
+        );
+        scaleW = scale;
+        scaleH = scale;
+    }
+    const scaledWidth = clamp(
+        Math.round(imageBitmap.width * scaleW),
+        0,
+        requiredWidth,
+    );
+    const scaledHeight = clamp(
+        Math.round(imageBitmap.height * scaleH),
+        0,
+        requiredHeight,
+    );
+
+    const processedImage = new Float32Array(
+        1 * 3 * requiredWidth * requiredHeight,
+    );
+
+    // Populate the Float32Array with normalized pixel values
+    let pixelIndex = 0;
+    const channelOffsetGreen = requiredHeight * requiredWidth;
+    const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
+    for (let h = 0; h < requiredHeight; h++) {
+        for (let w = 0; w < requiredWidth; w++) {
+            let pixel: {
+                r: number;
+                g: number;
+                b: number;
+            };
+            if (w >= scaledWidth || h >= scaledHeight) {
+                pixel = { r: 114, g: 114, b: 114 };
+            } else {
+                pixel = getPixelBilinear(
+                    w / scaleW,
+                    h / scaleH,
+                    pixelData,
+                    imageBitmap.width,
+                    imageBitmap.height,
+                );
+            }
+            processedImage[pixelIndex] = normFunction(pixel.r);
+            processedImage[pixelIndex + channelOffsetGreen] = normFunction(
+                pixel.g,
+            );
+            processedImage[pixelIndex + channelOffsetBlue] = normFunction(
+                pixel.b,
+            );
+            pixelIndex++;
+        }
+    }
+
+    return {
+        data: processedImage,
+        originalSize: {
+            width: imageBitmap.width,
+            height: imageBitmap.height,
+        },
+        newSize: { width: scaledWidth, height: scaledHeight },
+    };
+};
+
+/**
+ * @param rowOutput A Float32Array of shape [25200, 16], where each row
+ * represents a bounding box.
+ */
+const getFacesFromYOLOOutput = (
+    rowOutput: Float32Array,
+    minScore: number,
+): Array<FaceDetection> => {
+    const faces: Array<FaceDetection> = [];
+    // Iterate over each row.
+    for (let i = 0; i < rowOutput.length; i += 16) {
+        const score = rowOutput[i + 4];
+        if (score < minScore) {
+            continue;
+        }
+        // The first 4 values represent the bounding box's coordinates:
+        //
+        //     (x1, y1, x2, y2)
+        //
+        const xCenter = rowOutput[i];
+        const yCenter = rowOutput[i + 1];
+        const width = rowOutput[i + 2];
+        const height = rowOutput[i + 3];
+        const xMin = xCenter - width / 2.0; // topLeft
+        const yMin = yCenter - height / 2.0; // topLeft
+
+        const leftEyeX = rowOutput[i + 5];
+        const leftEyeY = rowOutput[i + 6];
+        const rightEyeX = rowOutput[i + 7];
+        const rightEyeY = rowOutput[i + 8];
+        const noseX = rowOutput[i + 9];
+        const noseY = rowOutput[i + 10];
+        const leftMouthX = rowOutput[i + 11];
+        const leftMouthY = rowOutput[i + 12];
+        const rightMouthX = rowOutput[i + 13];
+        const rightMouthY = rowOutput[i + 14];
+
+        const box = new Box({
+            x: xMin,
+            y: yMin,
+            width: width,
+            height: height,
+        });
+        const probability = score as number;
+        const landmarks = [
+            new Point(leftEyeX, leftEyeY),
+            new Point(rightEyeX, rightEyeY),
+            new Point(noseX, noseY),
+            new Point(leftMouthX, leftMouthY),
+            new Point(rightMouthX, rightMouthY),
+        ];
+        faces.push({ box, landmarks, probability });
+    }
+    return faces;
+};
+
+const getRelativeDetection = (
+    faceDetection: FaceDetection,
+    dimensions: Dimensions,
+): FaceDetection => {
+    const oldBox: Box = faceDetection.box;
+    const box = new Box({
+        x: oldBox.x / dimensions.width,
+        y: oldBox.y / dimensions.height,
+        width: oldBox.width / dimensions.width,
+        height: oldBox.height / dimensions.height,
+    });
+    const oldLandmarks: Point[] = faceDetection.landmarks;
+    const landmarks = oldLandmarks.map((l) => {
+        return new Point(l.x / dimensions.width, l.y / dimensions.height);
+    });
+    const probability = faceDetection.probability;
+    return { box, landmarks, probability };
+};
+
+/**
+ * Removes duplicate face detections from an array of detections.
+ *
+ * This function sorts the detections by their probability in descending order,
+ * then iterates over them.
+ *
+ * For each detection, it calculates the Euclidean distance to all other
+ * detections.
+ *
+ * If the distance is less than or equal to the specified threshold
+ * (`withinDistance`), the other detection is considered a duplicate and is
+ * removed.
+ *
+ * @param detections - An array of face detections to remove duplicates from.
+ *
+ * @param withinDistance - The maximum Euclidean distance between two detections
+ * for them to be considered duplicates.
+ *
+ * @returns An array of face detections with duplicates removed.
+ */
+const removeDuplicateDetections = (
+    detections: Array<FaceDetection>,
+    withinDistance: number,
+) => {
+    detections.sort((a, b) => b.probability - a.probability);
+    const isSelected = new Map<number, boolean>();
+    for (let i = 0; i < detections.length; i++) {
+        if (isSelected.get(i) === false) {
+            continue;
+        }
+        isSelected.set(i, true);
+        for (let j = i + 1; j < detections.length; j++) {
+            if (isSelected.get(j) === false) {
+                continue;
+            }
+            const centeri = getDetectionCenter(detections[i]);
+            const centerj = getDetectionCenter(detections[j]);
+            const dist = euclidean(
+                [centeri.x, centeri.y],
+                [centerj.x, centerj.y],
+            );
+            if (dist <= withinDistance) {
+                isSelected.set(j, false);
+            }
+        }
+    }
+
+    const uniques: Array<FaceDetection> = [];
+    for (let i = 0; i < detections.length; i++) {
+        isSelected.get(i) && uniques.push(detections[i]);
+    }
+    return uniques;
+};
+
+function getDetectionCenter(detection: FaceDetection) {
+    const center = new Point(0, 0);
+    // TODO: first 4 landmarks is applicable to blazeface only
+    // this needs to consider eyes, nose and mouth landmarks to take center
+    detection.landmarks?.slice(0, 4).forEach((p) => {
+        center.x += p.x;
+        center.y += p.y;
+    });
+
+    return new Point(center.x / 4, center.y / 4);
+}
+
 const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => {
    const { newMlFile } = fileContext;
    const imageBitmap = await fetchImageBitmapForContext(fileContext);
-    newMlFile.faceCropMethod = {
-        value: "ArcFace",
-        version: 1,
-    };
-
    for (const face of newMlFile.faces) {
        await saveFaceCrop(imageBitmap, face);
    }
@ -84,10 +381,6 @@ const syncFileFaceAlignments = async (
    fileContext: MLSyncFileContext,
 ): Promise<Float32Array> => {
    const { newMlFile } = fileContext;
-    newMlFile.faceAlignmentMethod = {
-        value: "ArcFace",
-        version: 1,
-    };
    fileContext.newAlignment = true;
    const imageBitmap =
        fileContext.imageBitmap ||
@ -113,15 +406,277 @@ const syncFileFaceAlignments = async (
    return faceImages;
 };

+// TODO-ML(MR): When is this used or is it as Blazeface leftover?
+const ARCFACE_LANDMARKS = [
+    [38.2946, 51.6963],
+    [73.5318, 51.5014],
+    [56.0252, 71.7366],
+    [56.1396, 92.2848],
+] as Array<[number, number]>;
+
+const ARCFACE_LANDMARKS_FACE_SIZE = 112;
+
+const ARC_FACE_5_LANDMARKS = [
+    [38.2946, 51.6963],
+    [73.5318, 51.5014],
+    [56.0252, 71.7366],
+    [41.5493, 92.3655],
+    [70.7299, 92.2041],
+] as Array<[number, number]>;
+
+/**
+ * Compute and return an {@link FaceAlignment} for the given face detection.
+ *
+ * @param faceDetection A geometry indicating a face detected in an image.
+ */
+const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
+    const landmarkCount = faceDetection.landmarks.length;
+    return getFaceAlignmentUsingSimilarityTransform(
+        faceDetection,
+        normalizeLandmarks(
+            landmarkCount === 5 ? ARC_FACE_5_LANDMARKS : ARCFACE_LANDMARKS,
+            ARCFACE_LANDMARKS_FACE_SIZE,
+        ),
+    );
+};
+
+function getFaceAlignmentUsingSimilarityTransform(
+    faceDetection: FaceDetection,
+    alignedLandmarks: Array<[number, number]>,
+): FaceAlignment {
+    const landmarksMat = new Matrix(
+        faceDetection.landmarks
+            .map((p) => [p.x, p.y])
+            .slice(0, alignedLandmarks.length),
+    ).transpose();
+    const alignedLandmarksMat = new Matrix(alignedLandmarks).transpose();
+
+    const simTransform = getSimilarityTransformation(
+        landmarksMat,
+        alignedLandmarksMat,
+    );
+
+    const RS = Matrix.mul(simTransform.rotation, simTransform.scale);
+    const TR = simTransform.translation;
+
+    const affineMatrix = [
+        [RS.get(0, 0), RS.get(0, 1), TR.get(0, 0)],
+        [RS.get(1, 0), RS.get(1, 1), TR.get(1, 0)],
+        [0, 0, 1],
+    ];
+
+    const size = 1 / simTransform.scale;
+    const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
+    const centerMat = simTransform.fromMean.sub(meanTranslation);
+    const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
+    const rotation = -Math.atan2(
+        simTransform.rotation.get(0, 1),
+        simTransform.rotation.get(0, 0),
+    );
+
+    return {
+        affineMatrix,
+        center,
+        size,
+        rotation,
+    };
+}
+
+function normalizeLandmarks(
+    landmarks: Array<[number, number]>,
+    faceSize: number,
+): Array<[number, number]> {
+    return landmarks.map((landmark) =>
+        landmark.map((p) => p / faceSize),
+    ) as Array<[number, number]>;
+}
+
+/**
+ * Laplacian blur detection.
+ */
+const detectBlur = (alignedFaces: Float32Array, faces: Face[]): number[] => {
+    const numFaces = Math.round(
+        alignedFaces.length /
+            (mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
+    );
+    const blurValues: number[] = [];
+    for (let i = 0; i < numFaces; i++) {
+        const face = faces[i];
+        const direction = faceDirection(face);
+        const faceImage = createGrayscaleIntMatrixFromNormalized2List(
+            alignedFaces,
+            i,
+        );
+        const laplacian = applyLaplacian(faceImage, direction);
+        blurValues.push(matrixVariance(laplacian));
+    }
+    return blurValues;
+};
+
+type FaceDirection = "left" | "right" | "straight";
+
+const faceDirection = (face: Face): FaceDirection => {
+    const landmarks = face.detection.landmarks;
+    const leftEye = landmarks[0];
+    const rightEye = landmarks[1];
+    const nose = landmarks[2];
+    const leftMouth = landmarks[3];
+    const rightMouth = landmarks[4];
+
+    const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
+    const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
+    const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
+
+    const faceIsUpright =
+        Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
+        nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
+
+    const noseStickingOutLeft =
+        nose.x < Math.min(leftEye.x, rightEye.x) &&
+        nose.x < Math.min(leftMouth.x, rightMouth.x);
+
+    const noseStickingOutRight =
+        nose.x > Math.max(leftEye.x, rightEye.x) &&
+        nose.x > Math.max(leftMouth.x, rightMouth.x);
+
+    const noseCloseToLeftEye =
+        Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
+    const noseCloseToRightEye =
+        Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
+
+    if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
+        return "left";
+    } else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
+        return "right";
+    }
+
+    return "straight";
+};
+
+/**
+ * Return a new image by applying a Laplacian blur kernel to each pixel.
+ */
+const applyLaplacian = (
+    image: number[][],
+    direction: FaceDirection,
+): number[][] => {
+    const paddedImage: number[][] = padImage(image, direction);
+    const numRows = paddedImage.length - 2;
+    const numCols = paddedImage[0].length - 2;
+
+    // Create an output image initialized to 0.
+    const outputImage: number[][] = Array.from({ length: numRows }, () =>
+        new Array(numCols).fill(0),
+    );
+
+    // Define the Laplacian kernel.
+    const kernel: number[][] = [
+        [0, 1, 0],
+        [1, -4, 1],
+        [0, 1, 0],
+    ];
+
+    // Apply the kernel to each pixel
+    for (let i = 0; i < numRows; i++) {
+        for (let j = 0; j < numCols; j++) {
+            let sum = 0;
+            for (let ki = 0; ki < 3; ki++) {
+                for (let kj = 0; kj < 3; kj++) {
+                    sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
+                }
+            }
+            // Adjust the output value if necessary (e.g., clipping).
+            outputImage[i][j] = sum;
+        }
+    }
+
+    return outputImage;
+};
+
+const padImage = (image: number[][], direction: FaceDirection): number[][] => {
+    const removeSideColumns = 56; /* must be even */
+
+    const numRows = image.length;
+    const numCols = image[0].length;
+    const paddedNumCols = numCols + 2 - removeSideColumns;
+    const paddedNumRows = numRows + 2;
+
+    // Create a new matrix with extra padding.
+    const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
+        new Array(paddedNumCols).fill(0),
+    );
+
+    if (direction === "straight") {
+        // Copy original image into the center of the padded image.
+        for (let i = 0; i < numRows; i++) {
+            for (let j = 0; j < paddedNumCols - 2; j++) {
+                paddedImage[i + 1][j + 1] =
+                    image[i][j + Math.round(removeSideColumns / 2)];
+            }
+        }
+    } else if (direction === "left") {
+        // If the face is facing left, we only take the right side of the face image.
+        for (let i = 0; i < numRows; i++) {
+            for (let j = 0; j < paddedNumCols - 2; j++) {
+                paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
+            }
+        }
+    } else if (direction === "right") {
+        // If the face is facing right, we only take the left side of the face image.
+        for (let i = 0; i < numRows; i++) {
+            for (let j = 0; j < paddedNumCols - 2; j++) {
+                paddedImage[i + 1][j + 1] = image[i][j];
+            }
+        }
+    }
+
+    // Reflect padding
+    // Top and bottom rows
+    for (let j = 1; j <= paddedNumCols - 2; j++) {
+        paddedImage[0][j] = paddedImage[2][j]; // Top row
+        paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
+    }
+    // Left and right columns
+    for (let i = 0; i < numRows + 2; i++) {
+        paddedImage[i][0] = paddedImage[i][2]; // Left column
+        paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
+    }
+
+    return paddedImage;
+};
+
+const matrixVariance = (matrix: number[][]): number => {
+    const numRows = matrix.length;
+    const numCols = matrix[0].length;
+    const totalElements = numRows * numCols;
+
+    // Calculate the mean.
+    let mean: number = 0;
+    matrix.forEach((row) => {
+        row.forEach((value) => {
+            mean += value;
+        });
+    });
+    mean /= totalElements;
+
+    // Calculate the variance.
+    let variance: number = 0;
+    matrix.forEach((row) => {
+        row.forEach((value) => {
+            const diff: number = value - mean;
+            variance += diff * diff;
+        });
+    });
+    variance /= totalElements;
+
+    return variance;
+};
+
 const syncFileFaceEmbeddings = async (
    fileContext: MLSyncFileContext,
    alignedFacesInput: Float32Array,
 ) => {
    const { newMlFile } = fileContext;
-    newMlFile.faceEmbeddingMethod = {
-        value: "MobileFaceNet",
-        version: 2,
-    };
    // TODO: when not storing face crops, image will be needed to extract faces
    // fileContext.imageBitmap ||
    //     (await this.getImageBitmap(fileContext));
@ -132,6 +687,30 @@ const syncFileFaceEmbeddings = async (
    log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length);
 };

+const mobileFaceNetFaceSize = 112;
+
+/**
+ * Compute embeddings for the given {@link faceData}.
+ *
+ * The model used is MobileFaceNet, running in an ONNX runtime.
+ */
+const faceEmbeddings = async (
+    faceData: Float32Array,
+): Promise<Array<FaceEmbedding>> => {
+    const outputData = await workerBridge.faceEmbeddings(faceData);
+
+    const embeddingSize = 192;
+    const embeddings = new Array<FaceEmbedding>(
+        outputData.length / embeddingSize,
+    );
+    for (let i = 0; i < embeddings.length; i++) {
+        embeddings[i] = new Float32Array(
+            outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
+        );
+    }
+    return embeddings;
+};
+
 const syncFileFaceMakeRelativeDetections = async (
    fileContext: MLSyncFileContext,
 ) => {
@ -159,16 +738,32 @@ export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
    return blob;
 };

-export const regenerateFaceCrop = async (faceID: string) => {
-    const fileID = Number(faceID.split("-")[0]);
-    const personFace = await mlIDbStorage.getFace(fileID, faceID);
-    if (!personFace) {
-        throw Error("Face not found");
-    }
+const getFaceCrop = (
+    imageBitmap: ImageBitmap,
+    faceDetection: FaceDetection,
+): FaceCrop => {
+    const alignment = faceAlignment(faceDetection);

-    const file = await getLocalFile(personFace.fileId);
-    const imageBitmap = await fetchImageBitmap(file);
-    return await saveFaceCrop(imageBitmap, personFace);
+    const padding = 0.25;
+    const maxSize = 256;
+
+    const alignmentBox = new Box({
+        x: alignment.center.x - alignment.size / 2,
+        y: alignment.center.y - alignment.size / 2,
+        width: alignment.size,
+        height: alignment.size,
+    }).round();
+    const scaleForPadding = 1 + padding * 2;
+    const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
+    const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
+        width: maxSize,
+        height: maxSize,
+    });
+
+    return {
+        image: faceImageBitmap,
+        imageBox: paddedBox,
+    };
 };

 async function extractFaceImagesToFloat32(
--- a/web/apps/photos/src/services/face/image.ts
+++ b/web/apps/photos/src/services/face/image.ts
@ -1,13 +1,17 @@
 import { FILE_TYPE } from "@/media/file-type";
 import { decodeLivePhoto } from "@/media/live-photo";
 import log from "@/next/log";
+import { Matrix, inverse } from "ml-matrix";
 import DownloadManager from "services/download";
-import { Dimensions } from "services/face/geom";
-import { DetectedFace, MLSyncFileContext } from "services/face/types";
+import { Box, Dimensions, enlargeBox } from "services/face/geom";
+import {
+    DetectedFace,
+    FaceAlignment,
+    MLSyncFileContext,
+} from "services/face/types";
 import { getLocalFiles } from "services/fileService";
 import { EnteFile } from "types/file";
 import { getRenderableImage } from "utils/file";
-import { clamp } from "utils/image";

 export const fetchImageBitmapForContext = async (
    fileContext: MLSyncFileContext,
@ -37,7 +41,6 @@ export const fetchImageBitmapForContext = async (
        );
    }

-    fileContext.newMlFile.imageSource = "Original";
    const { width, height } = fileContext.imageBitmap;
    fileContext.newMlFile.imageDimensions = { width, height };

@ -119,3 +122,468 @@ export async function getLocalFileImageBitmap(
    fileBlob = await getRenderableImage(enteFile.metadata.title, fileBlob);
    return createImageBitmap(fileBlob);
 }
+
+export function normalizePixelBetween0And1(pixelValue: number) {
+    return pixelValue / 255.0;
+}
+
+export function normalizePixelBetweenMinus1And1(pixelValue: number) {
+    return pixelValue / 127.5 - 1.0;
+}
+
+export function unnormalizePixelFromBetweenMinus1And1(pixelValue: number) {
+    return clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
+}
+
+export function readPixelColor(
+    imageData: Uint8ClampedArray,
+    width: number,
+    height: number,
+    x: number,
+    y: number,
+) {
+    if (x < 0 || x >= width || y < 0 || y >= height) {
+        return { r: 0, g: 0, b: 0, a: 0 };
+    }
+    const index = (y * width + x) * 4;
+    return {
+        r: imageData[index],
+        g: imageData[index + 1],
+        b: imageData[index + 2],
+        a: imageData[index + 3],
+    };
+}
+
+export function clamp(value: number, min: number, max: number) {
+    return Math.min(max, Math.max(min, value));
+}
+
+export function getPixelBicubic(
+    fx: number,
+    fy: number,
+    imageData: Uint8ClampedArray,
+    imageWidth: number,
+    imageHeight: number,
+) {
+    // Clamp to image boundaries
+    fx = clamp(fx, 0, imageWidth - 1);
+    fy = clamp(fy, 0, imageHeight - 1);
+
+    const x = Math.trunc(fx) - (fx >= 0.0 ? 0 : 1);
+    const px = x - 1;
+    const nx = x + 1;
+    const ax = x + 2;
+    const y = Math.trunc(fy) - (fy >= 0.0 ? 0 : 1);
+    const py = y - 1;
+    const ny = y + 1;
+    const ay = y + 2;
+    const dx = fx - x;
+    const dy = fy - y;
+
+    function cubic(
+        dx: number,
+        ipp: number,
+        icp: number,
+        inp: number,
+        iap: number,
+    ) {
+        return (
+            icp +
+            0.5 *
+                (dx * (-ipp + inp) +
+                    dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
+                    dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap))
+        );
+    }
+
+    const icc = readPixelColor(imageData, imageWidth, imageHeight, x, y);
+
+    const ipp =
+        px < 0 || py < 0
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, px, py);
+    const icp =
+        px < 0
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, x, py);
+    const inp =
+        py < 0 || nx >= imageWidth
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, nx, py);
+    const iap =
+        ax >= imageWidth || py < 0
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, ax, py);
+
+    const ip0 = cubic(dx, ipp.r, icp.r, inp.r, iap.r);
+    const ip1 = cubic(dx, ipp.g, icp.g, inp.g, iap.g);
+    const ip2 = cubic(dx, ipp.b, icp.b, inp.b, iap.b);
+    // const ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);
+
+    const ipc =
+        px < 0
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, px, y);
+    const inc =
+        nx >= imageWidth
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, nx, y);
+    const iac =
+        ax >= imageWidth
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, ax, y);
+
+    const ic0 = cubic(dx, ipc.r, icc.r, inc.r, iac.r);
+    const ic1 = cubic(dx, ipc.g, icc.g, inc.g, iac.g);
+    const ic2 = cubic(dx, ipc.b, icc.b, inc.b, iac.b);
+    // const ic3 = cubic(dx, ipc.a, icc.a, inc.a, iac.a);
+
+    const ipn =
+        px < 0 || ny >= imageHeight
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, px, ny);
+    const icn =
+        ny >= imageHeight
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, x, ny);
+    const inn =
+        nx >= imageWidth || ny >= imageHeight
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, nx, ny);
+    const ian =
+        ax >= imageWidth || ny >= imageHeight
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, ax, ny);
+
+    const in0 = cubic(dx, ipn.r, icn.r, inn.r, ian.r);
+    const in1 = cubic(dx, ipn.g, icn.g, inn.g, ian.g);
+    const in2 = cubic(dx, ipn.b, icn.b, inn.b, ian.b);
+    // const in3 = cubic(dx, ipn.a, icn.a, inn.a, ian.a);
+
+    const ipa =
+        px < 0 || ay >= imageHeight
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, px, ay);
+    const ica =
+        ay >= imageHeight
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, x, ay);
+    const ina =
+        nx >= imageWidth || ay >= imageHeight
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, nx, ay);
+    const iaa =
+        ax >= imageWidth || ay >= imageHeight
+            ? icc
+            : readPixelColor(imageData, imageWidth, imageHeight, ax, ay);
+
+    const ia0 = cubic(dx, ipa.r, ica.r, ina.r, iaa.r);
+    const ia1 = cubic(dx, ipa.g, ica.g, ina.g, iaa.g);
+    const ia2 = cubic(dx, ipa.b, ica.b, ina.b, iaa.b);
+    // const ia3 = cubic(dx, ipa.a, ica.a, ina.a, iaa.a);
+
+    const c0 = Math.trunc(clamp(cubic(dy, ip0, ic0, in0, ia0), 0, 255));
+    const c1 = Math.trunc(clamp(cubic(dy, ip1, ic1, in1, ia1), 0, 255));
+    const c2 = Math.trunc(clamp(cubic(dy, ip2, ic2, in2, ia2), 0, 255));
+    // const c3 = cubic(dy, ip3, ic3, in3, ia3);
+
+    return { r: c0, g: c1, b: c2 };
+}
+
+/// Returns the pixel value (RGB) at the given coordinates using bilinear interpolation.
+export function getPixelBilinear(
+    fx: number,
+    fy: number,
+    imageData: Uint8ClampedArray,
+    imageWidth: number,
+    imageHeight: number,
+) {
+    // Clamp to image boundaries
+    fx = clamp(fx, 0, imageWidth - 1);
+    fy = clamp(fy, 0, imageHeight - 1);
+
+    // Get the surrounding coordinates and their weights
+    const x0 = Math.floor(fx);
+    const x1 = Math.ceil(fx);
+    const y0 = Math.floor(fy);
+    const y1 = Math.ceil(fy);
+    const dx = fx - x0;
+    const dy = fy - y0;
+    const dx1 = 1.0 - dx;
+    const dy1 = 1.0 - dy;
+
+    // Get the original pixels
+    const pixel1 = readPixelColor(imageData, imageWidth, imageHeight, x0, y0);
+    const pixel2 = readPixelColor(imageData, imageWidth, imageHeight, x1, y0);
+    const pixel3 = readPixelColor(imageData, imageWidth, imageHeight, x0, y1);
+    const pixel4 = readPixelColor(imageData, imageWidth, imageHeight, x1, y1);
+
+    function bilinear(val1: number, val2: number, val3: number, val4: number) {
+        return Math.round(
+            val1 * dx1 * dy1 +
+                val2 * dx * dy1 +
+                val3 * dx1 * dy +
+                val4 * dx * dy,
+        );
+    }
+
+    // Interpolate the pixel values
+    const red = bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r);
+    const green = bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g);
+    const blue = bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b);
+
+    return { r: red, g: green, b: blue };
+}
+
+export function warpAffineFloat32List(
+    imageBitmap: ImageBitmap,
+    faceAlignment: FaceAlignment,
+    faceSize: number,
+    inputData: Float32Array,
+    inputStartIndex: number,
+): void {
+    // Get the pixel data
+    const offscreenCanvas = new OffscreenCanvas(
+        imageBitmap.width,
+        imageBitmap.height,
+    );
+    const ctx = offscreenCanvas.getContext("2d");
+    ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
+    const imageData = ctx.getImageData(
+        0,
+        0,
+        imageBitmap.width,
+        imageBitmap.height,
+    );
+    const pixelData = imageData.data;
+
+    const transformationMatrix = faceAlignment.affineMatrix.map((row) =>
+        row.map((val) => (val != 1.0 ? val * faceSize : 1.0)),
+    ); // 3x3
+
+    const A: Matrix = new Matrix([
+        [transformationMatrix[0][0], transformationMatrix[0][1]],
+        [transformationMatrix[1][0], transformationMatrix[1][1]],
+    ]);
+    const Ainverse = inverse(A);
+
+    const b00 = transformationMatrix[0][2];
+    const b10 = transformationMatrix[1][2];
+    const a00Prime = Ainverse.get(0, 0);
+    const a01Prime = Ainverse.get(0, 1);
+    const a10Prime = Ainverse.get(1, 0);
+    const a11Prime = Ainverse.get(1, 1);
+
+    for (let yTrans = 0; yTrans < faceSize; ++yTrans) {
+        for (let xTrans = 0; xTrans < faceSize; ++xTrans) {
+            // Perform inverse affine transformation
+            const xOrigin =
+                a00Prime * (xTrans - b00) + a01Prime * (yTrans - b10);
+            const yOrigin =
+                a10Prime * (xTrans - b00) + a11Prime * (yTrans - b10);
+
+            // Get the pixel from interpolation
+            const pixel = getPixelBicubic(
+                xOrigin,
+                yOrigin,
+                pixelData,
+                imageBitmap.width,
+                imageBitmap.height,
+            );
+
+            // Set the pixel in the input data
+            const index = (yTrans * faceSize + xTrans) * 3;
+            inputData[inputStartIndex + index] =
+                normalizePixelBetweenMinus1And1(pixel.r);
+            inputData[inputStartIndex + index + 1] =
+                normalizePixelBetweenMinus1And1(pixel.g);
+            inputData[inputStartIndex + index + 2] =
+                normalizePixelBetweenMinus1And1(pixel.b);
+        }
+    }
+}
+
+export function createGrayscaleIntMatrixFromNormalized2List(
+    imageList: Float32Array,
+    faceNumber: number,
+    width: number = 112,
+    height: number = 112,
+): number[][] {
+    const startIndex = faceNumber * width * height * 3;
+    return Array.from({ length: height }, (_, y) =>
+        Array.from({ length: width }, (_, x) => {
+            // 0.299 ∙ Red + 0.587 ∙ Green + 0.114 ∙ Blue
+            const pixelIndex = startIndex + 3 * (y * width + x);
+            return clamp(
+                Math.round(
+                    0.299 *
+                        unnormalizePixelFromBetweenMinus1And1(
+                            imageList[pixelIndex],
+                        ) +
+                        0.587 *
+                            unnormalizePixelFromBetweenMinus1And1(
+                                imageList[pixelIndex + 1],
+                            ) +
+                        0.114 *
+                            unnormalizePixelFromBetweenMinus1And1(
+                                imageList[pixelIndex + 2],
+                            ),
+                ),
+                0,
+                255,
+            );
+        }),
+    );
+}
+
+export function resizeToSquare(img: ImageBitmap, size: number) {
+    const scale = size / Math.max(img.height, img.width);
+    const width = scale * img.width;
+    const height = scale * img.height;
+    const offscreen = new OffscreenCanvas(size, size);
+    const ctx = offscreen.getContext("2d");
+    ctx.imageSmoothingQuality = "high";
+    ctx.drawImage(img, 0, 0, width, height);
+    const resizedImage = offscreen.transferToImageBitmap();
+    return { image: resizedImage, width, height };
+}
+
+export function transform(
+    imageBitmap: ImageBitmap,
+    affineMat: number[][],
+    outputWidth: number,
+    outputHeight: number,
+) {
+    const offscreen = new OffscreenCanvas(outputWidth, outputHeight);
+    const context = offscreen.getContext("2d");
+    context.imageSmoothingQuality = "high";
+
+    context.transform(
+        affineMat[0][0],
+        affineMat[1][0],
+        affineMat[0][1],
+        affineMat[1][1],
+        affineMat[0][2],
+        affineMat[1][2],
+    );
+
+    context.drawImage(imageBitmap, 0, 0);
+    return offscreen.transferToImageBitmap();
+}
+
+export function crop(imageBitmap: ImageBitmap, cropBox: Box, size: number) {
+    const dimensions: Dimensions = {
+        width: size,
+        height: size,
+    };
+
+    return cropWithRotation(imageBitmap, cropBox, 0, dimensions, dimensions);
+}
+
+// these utils only work in env where OffscreenCanvas is available
+
+export function cropWithRotation(
+    imageBitmap: ImageBitmap,
+    cropBox: Box,
+    rotation?: number,
+    maxSize?: Dimensions,
+    minSize?: Dimensions,
+) {
+    const box = cropBox.round();
+
+    const outputSize = { width: box.width, height: box.height };
+    if (maxSize) {
+        const minScale = Math.min(
+            maxSize.width / box.width,
+            maxSize.height / box.height,
+        );
+        if (minScale < 1) {
+            outputSize.width = Math.round(minScale * box.width);
+            outputSize.height = Math.round(minScale * box.height);
+        }
+    }
+
+    if (minSize) {
+        const maxScale = Math.max(
+            minSize.width / box.width,
+            minSize.height / box.height,
+        );
+        if (maxScale > 1) {
+            outputSize.width = Math.round(maxScale * box.width);
+            outputSize.height = Math.round(maxScale * box.height);
+        }
+    }
+
+    // log.info({ imageBitmap, box, outputSize });
+
+    const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
+    const offscreenCtx = offscreen.getContext("2d");
+    offscreenCtx.imageSmoothingQuality = "high";
+
+    offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
+    rotation && offscreenCtx.rotate(rotation);
+
+    const outputBox = new Box({
+        x: -outputSize.width / 2,
+        y: -outputSize.height / 2,
+        width: outputSize.width,
+        height: outputSize.height,
+    });
+
+    const enlargedBox = enlargeBox(box, 1.5);
+    const enlargedOutputBox = enlargeBox(outputBox, 1.5);
+
+    offscreenCtx.drawImage(
+        imageBitmap,
+        enlargedBox.x,
+        enlargedBox.y,
+        enlargedBox.width,
+        enlargedBox.height,
+        enlargedOutputBox.x,
+        enlargedOutputBox.y,
+        enlargedOutputBox.width,
+        enlargedOutputBox.height,
+    );
+
+    return offscreen.transferToImageBitmap();
+}
+
+export function addPadding(image: ImageBitmap, padding: number) {
+    const scale = 1 + padding * 2;
+    const width = scale * image.width;
+    const height = scale * image.height;
+    const offscreen = new OffscreenCanvas(width, height);
+    const ctx = offscreen.getContext("2d");
+    ctx.imageSmoothingEnabled = false;
+    ctx.drawImage(
+        image,
+        width / 2 - image.width / 2,
+        height / 2 - image.height / 2,
+        image.width,
+        image.height,
+    );
+
+    return offscreen.transferToImageBitmap();
+}
+
+export interface BlobOptions {
+    type?: string;
+    quality?: number;
+}
+
+export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
+    const offscreen = new OffscreenCanvas(
+        imageBitmap.width,
+        imageBitmap.height,
+    );
+    offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
+
+    return offscreen.convertToBlob({
+        type: "image/jpeg",
+        quality: 0.8,
+    });
+}
+
+export async function imageBitmapFromBlob(blob: Blob) {
+    return createImageBitmap(blob);
+}
--- a/web/apps/photos/src/services/face/people.ts
+++ b/web/apps/photos/src/services/face/people.ts
@ -1,37 +1,53 @@
 import log from "@/next/log";
 import mlIDbStorage from "services/face/db";
-import { Face, Person } from "services/face/types";
-import { type MLSyncContext } from "services/machineLearning/machineLearningService";
+import { Person } from "services/face/types";
 import { clusterFaces } from "./cluster";
 import { saveFaceCrop } from "./f-index";
 import { fetchImageBitmap, getLocalFile } from "./image";

-export const syncPeopleIndex = async (syncContext: MLSyncContext) => {
+export const syncPeopleIndex = async () => {
+    // TODO-ML(MR): Forced disable clustering. It doesn't currently work,
+    // need to finalize it before we move out of beta.
+    //
+    // > Error: Failed to execute 'transferToImageBitmap' on
+    // > 'OffscreenCanvas': ImageBitmap construction failed
+    /*
+        if (
+            syncContext.outOfSyncFiles.length <= 0 ||
+            (syncContext.nSyncedFiles === batchSize && Math.random() < 0)
+        ) {
+            await this.syncIndex(syncContext);
+        }
+
+        public async syncIndex(syncContext: MLSyncContext) {
+            await this.getMLLibraryData(syncContext);
+
+            // TODO-ML(MR): Ensure this doesn't run until fixed.
+            await syncPeopleIndex(syncContext);
+
+            await this.persistMLLibraryData(syncContext);
+        }
+
    const filesVersion = await mlIDbStorage.getIndexVersion("files");
    if (filesVersion <= (await mlIDbStorage.getIndexVersion("people"))) {
        return;
    }
+    */

    // TODO: have faces addresable through fileId + faceId
    // to avoid index based addressing, which is prone to wrong results
    // one way could be to match nearest face within threshold in the file
+    /*
    const allFacesMap =
        syncContext.allSyncedFacesMap ??
        (syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap());
-    const allFaces = [...allFacesMap.values()].flat();
+    */

-    await runFaceClustering(syncContext, allFaces);
-    await syncPeopleFromClusters(syncContext, allFacesMap, allFaces);
-
-    await mlIDbStorage.setIndexVersion("people", filesVersion);
-};
-
-const runFaceClustering = async (
-    syncContext: MLSyncContext,
-    allFaces: Array<Face>,
-) => {
    // await this.init();

+    const allFacesMap = await mlIDbStorage.getAllFacesMap();
+    const allFaces = [...allFacesMap.values()].flat();
+
    if (!allFaces || allFaces.length < 50) {
        log.info(
            `Skipping clustering since number of faces (${allFaces.length}) is less than the clustering threshold (50)`,
@ -40,34 +56,15 @@ const runFaceClustering = async (
    }

    log.info("Running clustering allFaces: ", allFaces.length);
-    syncContext.mlLibraryData.faceClusteringResults = await clusterFaces(
+    const faceClusteringResults = await clusterFaces(
        allFaces.map((f) => Array.from(f.embedding)),
    );
-    syncContext.mlLibraryData.faceClusteringMethod = {
-        value: "Hdbscan",
-        version: 1,
-    };
    log.info(
        "[MLService] Got face clustering results: ",
-        JSON.stringify(syncContext.mlLibraryData.faceClusteringResults),
+        JSON.stringify(faceClusteringResults),
    );

-    // syncContext.faceClustersWithNoise = {
-    //     clusters: syncContext.faceClusteringResults.clusters.map(
-    //         (faces) => ({
-    //             faces,
-    //         })
-    //     ),
-    //     noise: syncContext.faceClusteringResults.noise,
-    // };
-};
-
-const syncPeopleFromClusters = async (
-    syncContext: MLSyncContext,
-    allFacesMap: Map<number, Array<Face>>,
-    allFaces: Array<Face>,
-) => {
-    const clusters = syncContext.mlLibraryData.faceClusteringResults?.clusters;
+    const clusters = faceClusteringResults?.clusters;
    if (!clusters || clusters.length < 1) {
        return;
    }
@ -108,4 +105,6 @@ const syncPeopleFromClusters = async (
    }

    await mlIDbStorage.updateFaces(allFacesMap);
+
+    // await mlIDbStorage.setIndexVersion("people", filesVersion);
 };
--- a/web/apps/photos/src/services/face/transform-box.ts
+++ b/web/apps/photos/src/services/face/transform-box.ts
@ -0,0 +1,64 @@
+import { Box, Point, boxFromBoundingBox } from "services/face/geom";
+import { FaceDetection } from "services/face/types";
+// TODO-ML(MR): Do we need two separate Matrix libraries?
+//
+// Keeping this in a separate file so that we can audit this. If these can be
+// expressed using ml-matrix, then we can move the code to f-index.
+import {
+    Matrix,
+    applyToPoint,
+    compose,
+    scale,
+    translate,
+} from "transformation-matrix";
+
+/**
+ * Detect faces in the given {@link imageBitmap}.
+ *
+ * The model used is YOLO, running in an ONNX runtime.
+ */
+export const transformFaceDetections = (
+    faces: FaceDetection[],
+    inBox: Box,
+    toBox: Box,
+): FaceDetection[] => {
+    const transform = computeTransformToBox(inBox, toBox);
+    return faces.map((f) => {
+        const box = transformBox(f.box, transform);
+        const normLandmarks = f.landmarks;
+        const landmarks = transformPoints(normLandmarks, transform);
+        return {
+            box,
+            landmarks,
+            probability: f.probability as number,
+        } as FaceDetection;
+    });
+};
+
+function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
+    return compose(
+        translate(toBox.x, toBox.y),
+        scale(toBox.width / inBox.width, toBox.height / inBox.height),
+    );
+}
+
+function transformPoint(point: Point, transform: Matrix) {
+    const txdPoint = applyToPoint(transform, point);
+    return new Point(txdPoint.x, txdPoint.y);
+}
+
+function transformPoints(points: Point[], transform: Matrix) {
+    return points?.map((p) => transformPoint(p, transform));
+}
+
+function transformBox(box: Box, transform: Matrix) {
+    const topLeft = transformPoint(box.topLeft, transform);
+    const bottomRight = transformPoint(box.bottomRight, transform);
+
+    return boxFromBoundingBox({
+        left: topLeft.x,
+        top: topLeft.y,
+        right: bottomRight.x,
+        bottom: bottomRight.y,
+    });
+}
--- a/web/apps/photos/src/services/face/types.ts
+++ b/web/apps/photos/src/services/face/types.ts
@ -1,62 +1,10 @@
-import type { ClusterFacesResult } from "services/face/cluster";
-import { Dimensions } from "services/face/geom";
+import { Box, Dimensions, Point } from "services/face/geom";
 import { EnteFile } from "types/file";
-import { Box, Point } from "./geom";
-
-export interface MLSyncResult {
-    nOutOfSyncFiles: number;
-    nSyncedFiles: number;
-    nSyncedFaces: number;
-    nFaceClusters: number;
-    nFaceNoise: number;
-    error?: Error;
-}
-
-export declare type FaceDescriptor = Float32Array;

 export declare type Cluster = Array<number>;

-export interface FacesCluster {
-    faces: Cluster;
-    summary?: FaceDescriptor;
-}
-
-export interface FacesClustersWithNoise {
-    clusters: Array<FacesCluster>;
-    noise: Cluster;
-}
-
-export interface NearestCluster {
-    cluster: FacesCluster;
-    distance: number;
-}
-
 export declare type Landmark = Point;

-export declare type ImageType = "Original" | "Preview";
-
-export declare type FaceDetectionMethod = "YoloFace";
-
-export declare type FaceCropMethod = "ArcFace";
-
-export declare type FaceAlignmentMethod = "ArcFace";
-
-export declare type FaceEmbeddingMethod = "MobileFaceNet";
-
-export declare type BlurDetectionMethod = "Laplacian";
-
-export declare type ClusteringMethod = "Hdbscan" | "Dbscan";
-
-export class AlignedBox {
-    box: Box;
-    rotation: number;
-}
-
-export interface Versioned<T> {
-    value: T;
-    version: number;
-}
-
 export interface FaceDetection {
    // box and landmarks is relative to image dimentions stored at mlFileData
    box: Box;
@ -124,15 +72,9 @@ export interface Person {
 export interface MlFileData {
    fileId: number;
    faces?: Face[];
-    imageSource?: ImageType;
    imageDimensions?: Dimensions;
-    faceDetectionMethod?: Versioned<FaceDetectionMethod>;
-    faceCropMethod?: Versioned<FaceCropMethod>;
-    faceAlignmentMethod?: Versioned<FaceAlignmentMethod>;
-    faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
    mlVersion: number;
    errorCount: number;
-    lastErrorMessage?: string;
 }

 export interface MLSearchConfig {
@ -152,10 +94,4 @@ export interface MLSyncFileContext {
    newAlignment?: boolean;
 }

-export interface MLLibraryData {
-    faceClusteringMethod?: Versioned<ClusteringMethod>;
-    faceClusteringResults?: ClusterFacesResult;
-    faceClustersWithNoise?: FacesClustersWithNoise;
-}
-
 export declare type MLIndex = "files" | "people";
--- a/web/apps/photos/src/services/machineLearning/machineLearningService.ts
+++ b/web/apps/photos/src/services/machineLearning/machineLearningService.ts
@ -9,22 +9,18 @@ import { CustomError, parseUploadErrorCodes } from "@ente/shared/error";
 import PQueue from "p-queue";
 import { putEmbedding } from "services/embeddingService";
 import mlIDbStorage, { ML_SEARCH_CONFIG_NAME } from "services/face/db";
+import { fetchImageBitmap, getLocalFile } from "services/face/image";
 import {
    Face,
    FaceDetection,
    Landmark,
-    MLLibraryData,
    MLSearchConfig,
-    MLSyncFileContext,
-    MLSyncResult,
    MlFileData,
 } from "services/face/types";
 import { getLocalFiles } from "services/fileService";
 import { EnteFile } from "types/file";
 import { isInternalUserForML } from "utils/user";
-import { regenerateFaceCrop, syncFileAnalyzeFaces } from "../face/f-index";
-import { fetchImageBitmapForContext } from "../face/image";
-import { syncPeopleIndex } from "../face/people";
+import { indexFaces, saveFaceCrop } from "../face/f-index";

 /**
 * TODO-ML(MR): What and why.
@ -56,41 +52,16 @@ export async function updateMLSearchConfig(newConfig: MLSearchConfig) {
    return mlIDbStorage.putConfig(ML_SEARCH_CONFIG_NAME, newConfig);
 }

-export interface MLSyncContext {
-    token: string;
-    userID: number;
-
-    localFilesMap: Map<number, EnteFile>;
-    outOfSyncFiles: EnteFile[];
-    nSyncedFiles: number;
-    nSyncedFaces: number;
-    allSyncedFacesMap?: Map<number, Array<Face>>;
-
-    error?: Error;
-
-    // oldMLLibraryData: MLLibraryData;
-    mlLibraryData: MLLibraryData;
-
-    syncQueue: PQueue;
-
-    getEnteWorker(id: number): Promise<any>;
-    dispose(): Promise<void>;
-}
-
-export class LocalMLSyncContext implements MLSyncContext {
+class MLSyncContext {
    public token: string;
    public userID: number;

    public localFilesMap: Map<number, EnteFile>;
    public outOfSyncFiles: EnteFile[];
    public nSyncedFiles: number;
-    public nSyncedFaces: number;
-    public allSyncedFacesMap?: Map<number, Array<Face>>;

    public error?: Error;

-    public mlLibraryData: MLLibraryData;
-
    public syncQueue: PQueue;
    // TODO: wheather to limit concurrent downloads
    // private downloadQueue: PQueue;
@ -107,7 +78,6 @@ export class LocalMLSyncContext implements MLSyncContext {

        this.outOfSyncFiles = [];
        this.nSyncedFiles = 0;
-        this.nSyncedFaces = 0;

        this.concurrency = concurrency ?? getConcurrency();

@ -151,7 +121,7 @@ class MachineLearningService {
    private localSyncContext: Promise<MLSyncContext>;
    private syncContext: Promise<MLSyncContext>;

-    public async sync(token: string, userID: number): Promise<MLSyncResult> {
+    public async sync(token: string, userID: number): Promise<boolean> {
        if (!token) {
            throw Error("Token needed by ml service to sync file");
        }
@ -166,34 +136,9 @@ class MachineLearningService {
            await this.syncFiles(syncContext);
        }

-        // TODO-ML(MR): Forced disable clustering. It doesn't currently work,
-        // need to finalize it before we move out of beta.
-        //
-        // > Error: Failed to execute 'transferToImageBitmap' on
-        // > 'OffscreenCanvas': ImageBitmap construction failed
-        /*
-        if (
-            syncContext.outOfSyncFiles.length <= 0 ||
-            (syncContext.nSyncedFiles === batchSize && Math.random() < 0)
-        ) {
-            await this.syncIndex(syncContext);
-        }
-        */
-
-        const mlSyncResult: MLSyncResult = {
-            nOutOfSyncFiles: syncContext.outOfSyncFiles.length,
-            nSyncedFiles: syncContext.nSyncedFiles,
-            nSyncedFaces: syncContext.nSyncedFaces,
-            nFaceClusters:
-                syncContext.mlLibraryData?.faceClusteringResults?.clusters
-                    .length,
-            nFaceNoise:
-                syncContext.mlLibraryData?.faceClusteringResults?.noise.length,
-            error: syncContext.error,
-        };
-        // log.info('[MLService] sync results: ', mlSyncResult);
-
-        return mlSyncResult;
+        const error = syncContext.error;
+        const nOutOfSyncFiles = syncContext.outOfSyncFiles.length;
+        return !error && nOutOfSyncFiles > 0;
    }

    public async regenerateFaceCrop(faceID: string) {
@ -309,7 +254,6 @@ class MachineLearningService {
            syncContext.error = error;
        }
        await syncContext.syncQueue.onIdle();
-        log.info("allFaces: ", syncContext.nSyncedFaces);

        // TODO: In case syncJob has to use multiple ml workers
        // do in same transaction with each file update
@ -324,7 +268,7 @@ class MachineLearningService {

            // TODO-ML(MR): Keep as promise for now.
            this.syncContext = new Promise((resolve) => {
-                resolve(new LocalMLSyncContext(token, userID));
+                resolve(new MLSyncContext(token, userID));
            });
        } else {
            log.info("reusing existing syncContext");
@ -338,7 +282,7 @@ class MachineLearningService {
            log.info("Creating localSyncContext");
            // TODO-ML(MR):
            this.localSyncContext = new Promise((resolve) => {
-                resolve(new LocalMLSyncContext(token, userID));
+                resolve(new MLSyncContext(token, userID));
            });
        } else {
            log.info("reusing existing localSyncContext");
@ -389,7 +333,6 @@ class MachineLearningService {
                `Indexing ${enteFile.title ?? "<untitled>"} ${enteFile.id}`,
            );
            const mlFileData = await this.syncFile(enteFile, localFile);
-            syncContext.nSyncedFaces += mlFileData.faces?.length || 0;
            syncContext.nSyncedFiles += 1;
            return mlFileData;
        } catch (e) {
@ -422,31 +365,14 @@ class MachineLearningService {
    }

    private async syncFile(enteFile: EnteFile, localFile?: globalThis.File) {
-        log.debug(() => ({ a: "Syncing file", enteFile }));
-        const fileContext: MLSyncFileContext = { enteFile, localFile };
        const oldMlFile = await this.getMLFileData(enteFile.id);
        if (oldMlFile && oldMlFile.mlVersion) {
            return oldMlFile;
        }

-        const newMlFile = (fileContext.newMlFile = this.newMlData(enteFile.id));
-        newMlFile.mlVersion = defaultMLVersion;
-
-        try {
-            await fetchImageBitmapForContext(fileContext);
-            await syncFileAnalyzeFaces(fileContext);
-            newMlFile.errorCount = 0;
-            newMlFile.lastErrorMessage = undefined;
-            await this.persistOnServer(newMlFile, enteFile);
-            await mlIDbStorage.putFile(newMlFile);
-        } catch (e) {
-            log.error("ml detection failed", e);
-            newMlFile.mlVersion = oldMlFile.mlVersion;
-            throw e;
-        } finally {
-            fileContext.imageBitmap && fileContext.imageBitmap.close();
-        }
-
+        const newMlFile = await indexFaces(enteFile, localFile);
+        await this.persistOnServer(newMlFile, enteFile);
+        await mlIDbStorage.putFile(newMlFile);
        return newMlFile;
    }

@ -484,7 +410,7 @@ class MachineLearningService {
                    mlFileData = this.newMlData(enteFile.id);
                }
                mlFileData.errorCount = (mlFileData.errorCount || 0) + 1;
-                mlFileData.lastErrorMessage = e.message;
+                console.error(`lastError for ${enteFile.id}`, e);

                return mlFileData;
            });
@ -493,26 +419,6 @@ class MachineLearningService {
            console.error("Error while storing ml sync error", e);
        }
    }
-
-    private async getMLLibraryData(syncContext: MLSyncContext) {
-        syncContext.mlLibraryData = await mlIDbStorage.getLibraryData();
-        if (!syncContext.mlLibraryData) {
-            syncContext.mlLibraryData = {};
-        }
-    }
-
-    private async persistMLLibraryData(syncContext: MLSyncContext) {
-        return mlIDbStorage.putLibraryData(syncContext.mlLibraryData);
-    }
-
-    public async syncIndex(syncContext: MLSyncContext) {
-        await this.getMLLibraryData(syncContext);
-
-        // TODO-ML(MR): Ensure this doesn't run until fixed.
-        await syncPeopleIndex(syncContext);
-
-        await this.persistMLLibraryData(syncContext);
-    }
 }

 export default new MachineLearningService();
@ -543,19 +449,14 @@ class ServerFileMl {
 class ServerFaceEmbeddings {
    public faces: ServerFace[];
    public version: number;
+    /* TODO
    public client?: string;
    public error?: boolean;
+    */

-    public constructor(
-        faces: ServerFace[],
-        version: number,
-        client?: string,
-        error?: boolean,
-    ) {
+    public constructor(faces: ServerFace[], version: number) {
        this.faces = faces;
        this.version = version;
-        this.client = client;
-        this.error = error;
    }
 }

@ -613,10 +514,7 @@ class ServerFaceBox {
 function LocalFileMlDataToServerFileMl(
    localFileMlData: MlFileData,
 ): ServerFileMl {
-    if (
-        localFileMlData.errorCount > 0 &&
-        localFileMlData.lastErrorMessage !== undefined
-    ) {
+    if (localFileMlData.errorCount > 0) {
        return null;
    }
    const imageDimensions = localFileMlData.imageDimensions;
@ -640,6 +538,7 @@ function LocalFileMlDataToServerFileMl(
            } as Landmark);
        }

+        // TODO: Add client UA and version
        const newFaceObject = new ServerFace(
            faceID,
            Array.from(embedding),
@ -649,11 +548,7 @@ function LocalFileMlDataToServerFileMl(
        );
        faces.push(newFaceObject);
    }
-    const faceEmbeddings = new ServerFaceEmbeddings(
-        faces,
-        1,
-        localFileMlData.lastErrorMessage,
-    );
+    const faceEmbeddings = new ServerFaceEmbeddings(faces, 1);
    return new ServerFileMl(
        localFileMlData.fileId,
        faceEmbeddings,
@ -673,3 +568,15 @@ export function logQueueStats(queue: PQueue, name: string) {
        console.error(`queuestats: ${name}: Error, `, error),
    );
 }
+
+export const regenerateFaceCrop = async (faceID: string) => {
+    const fileID = Number(faceID.split("-")[0]);
+    const personFace = await mlIDbStorage.getFace(fileID, faceID);
+    if (!personFace) {
+        throw Error("Face not found");
+    }
+
+    const file = await getLocalFile(personFace.fileId);
+    const imageBitmap = await fetchImageBitmap(file);
+    return await saveFaceCrop(imageBitmap, personFace);
+};
--- a/web/apps/photos/src/services/machineLearning/mlWorkManager.ts
+++ b/web/apps/photos/src/services/machineLearning/mlWorkManager.ts
@ -8,25 +8,19 @@ import PQueue from "p-queue";
 import { createFaceComlinkWorker } from "services/face";
 import mlIDbStorage from "services/face/db";
 import type { DedicatedMLWorker } from "services/face/face.worker";
-import { MLSyncResult } from "services/face/types";
 import { EnteFile } from "types/file";
 import { logQueueStats } from "./machineLearningService";

 export type JobState = "Scheduled" | "Running" | "NotScheduled";

-export interface MLSyncJobResult {
-    shouldBackoff: boolean;
-    mlSyncResult: MLSyncResult;
-}
-
 export class MLSyncJob {
-    private runCallback: () => Promise<MLSyncJobResult>;
+    private runCallback: () => Promise<boolean>;
    private state: JobState;
    private stopped: boolean;
    private intervalSec: number;
    private nextTimeoutId: ReturnType<typeof setTimeout>;

-    constructor(runCallback: () => Promise<MLSyncJobResult>) {
+    constructor(runCallback: () => Promise<boolean>) {
        this.runCallback = runCallback;
        this.state = "NotScheduled";
        this.stopped = true;
@ -65,13 +59,11 @@ export class MLSyncJob {
        this.state = "Running";

        try {
-            const jobResult = await this.runCallback();
-            if (jobResult && jobResult.shouldBackoff) {
-                this.intervalSec = Math.min(960, this.intervalSec * 2);
-            } else {
+            if (await this.runCallback()) {
                this.resetInterval();
+            } else {
+                this.intervalSec = Math.min(960, this.intervalSec * 2);
            }
-            log.info("Job completed");
        } catch (e) {
            console.error("Error while running Job: ", e);
        } finally {
@ -255,7 +247,14 @@ class MLWorkManager {
        this.syncJobWorker = undefined;
    }

-    private async runMLSyncJob(): Promise<MLSyncJobResult> {
+    /**
+     * Returns `false` to indicate that either an error occurred, or there are
+     * not more files to process, or that we cannot currently process files.
+     *
+     * Which means that when it returns true, all is well and there are more
+     * things pending to process, so we should chug along at full speed.
+     */
+    private async runMLSyncJob(): Promise<boolean> {
        try {
            // TODO: skipping is not required if we are caching chunks through service worker
            // currently worker chunk itself is not loaded when network is not there
@ -263,29 +262,16 @@ class MLWorkManager {
                log.info(
                    "Skipping ml-sync job run as not connected to internet.",
                );
-                return {
-                    shouldBackoff: true,
-                    mlSyncResult: undefined,
-                };
+                return false;
            }

            const token = getToken();
            const userID = getUserID();
            const jobWorkerProxy = await this.getSyncJobWorker();

-            const mlSyncResult = await jobWorkerProxy.sync(token, userID);
-
+            return await jobWorkerProxy.sync(token, userID);
            // this.terminateSyncJobWorker();
-            const jobResult: MLSyncJobResult = {
-                shouldBackoff:
-                    !!mlSyncResult.error || mlSyncResult.nOutOfSyncFiles < 1,
-                mlSyncResult,
-            };
-            log.info("ML Sync Job result: ", JSON.stringify(jobResult));
-
            // TODO: redirect/refresh to gallery in case of session_expired, stop ml sync job
-
-            return jobResult;
        } catch (e) {
            log.error("Failed to run MLSync Job", e);
        }
--- a/web/apps/photos/src/utils/image/index.ts
+++ b/web/apps/photos/src/utils/image/index.ts
@ -1,468 +0,0 @@
-// these utils only work in env where OffscreenCanvas is available
-
-import { Matrix, inverse } from "ml-matrix";
-import { Box, Dimensions, enlargeBox } from "services/face/geom";
-import { FaceAlignment } from "services/face/types";
-
-export function normalizePixelBetween0And1(pixelValue: number) {
-    return pixelValue / 255.0;
-}
-
-export function normalizePixelBetweenMinus1And1(pixelValue: number) {
-    return pixelValue / 127.5 - 1.0;
-}
-
-export function unnormalizePixelFromBetweenMinus1And1(pixelValue: number) {
-    return clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
-}
-
-export function readPixelColor(
-    imageData: Uint8ClampedArray,
-    width: number,
-    height: number,
-    x: number,
-    y: number,
-) {
-    if (x < 0 || x >= width || y < 0 || y >= height) {
-        return { r: 0, g: 0, b: 0, a: 0 };
-    }
-    const index = (y * width + x) * 4;
-    return {
-        r: imageData[index],
-        g: imageData[index + 1],
-        b: imageData[index + 2],
-        a: imageData[index + 3],
-    };
-}
-
-export function clamp(value: number, min: number, max: number) {
-    return Math.min(max, Math.max(min, value));
-}
-
-export function getPixelBicubic(
-    fx: number,
-    fy: number,
-    imageData: Uint8ClampedArray,
-    imageWidth: number,
-    imageHeight: number,
-) {
-    // Clamp to image boundaries
-    fx = clamp(fx, 0, imageWidth - 1);
-    fy = clamp(fy, 0, imageHeight - 1);
-
-    const x = Math.trunc(fx) - (fx >= 0.0 ? 0 : 1);
-    const px = x - 1;
-    const nx = x + 1;
-    const ax = x + 2;
-    const y = Math.trunc(fy) - (fy >= 0.0 ? 0 : 1);
-    const py = y - 1;
-    const ny = y + 1;
-    const ay = y + 2;
-    const dx = fx - x;
-    const dy = fy - y;
-
-    function cubic(
-        dx: number,
-        ipp: number,
-        icp: number,
-        inp: number,
-        iap: number,
-    ) {
-        return (
-            icp +
-            0.5 *
-                (dx * (-ipp + inp) +
-                    dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
-                    dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap))
-        );
-    }
-
-    const icc = readPixelColor(imageData, imageWidth, imageHeight, x, y);
-
-    const ipp =
-        px < 0 || py < 0
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, px, py);
-    const icp =
-        px < 0
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, x, py);
-    const inp =
-        py < 0 || nx >= imageWidth
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, nx, py);
-    const iap =
-        ax >= imageWidth || py < 0
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, ax, py);
-
-    const ip0 = cubic(dx, ipp.r, icp.r, inp.r, iap.r);
-    const ip1 = cubic(dx, ipp.g, icp.g, inp.g, iap.g);
-    const ip2 = cubic(dx, ipp.b, icp.b, inp.b, iap.b);
-    // const ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);
-
-    const ipc =
-        px < 0
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, px, y);
-    const inc =
-        nx >= imageWidth
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, nx, y);
-    const iac =
-        ax >= imageWidth
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, ax, y);
-
-    const ic0 = cubic(dx, ipc.r, icc.r, inc.r, iac.r);
-    const ic1 = cubic(dx, ipc.g, icc.g, inc.g, iac.g);
-    const ic2 = cubic(dx, ipc.b, icc.b, inc.b, iac.b);
-    // const ic3 = cubic(dx, ipc.a, icc.a, inc.a, iac.a);
-
-    const ipn =
-        px < 0 || ny >= imageHeight
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, px, ny);
-    const icn =
-        ny >= imageHeight
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, x, ny);
-    const inn =
-        nx >= imageWidth || ny >= imageHeight
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, nx, ny);
-    const ian =
-        ax >= imageWidth || ny >= imageHeight
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, ax, ny);
-
-    const in0 = cubic(dx, ipn.r, icn.r, inn.r, ian.r);
-    const in1 = cubic(dx, ipn.g, icn.g, inn.g, ian.g);
-    const in2 = cubic(dx, ipn.b, icn.b, inn.b, ian.b);
-    // const in3 = cubic(dx, ipn.a, icn.a, inn.a, ian.a);
-
-    const ipa =
-        px < 0 || ay >= imageHeight
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, px, ay);
-    const ica =
-        ay >= imageHeight
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, x, ay);
-    const ina =
-        nx >= imageWidth || ay >= imageHeight
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, nx, ay);
-    const iaa =
-        ax >= imageWidth || ay >= imageHeight
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, ax, ay);
-
-    const ia0 = cubic(dx, ipa.r, ica.r, ina.r, iaa.r);
-    const ia1 = cubic(dx, ipa.g, ica.g, ina.g, iaa.g);
-    const ia2 = cubic(dx, ipa.b, ica.b, ina.b, iaa.b);
-    // const ia3 = cubic(dx, ipa.a, ica.a, ina.a, iaa.a);
-
-    const c0 = Math.trunc(clamp(cubic(dy, ip0, ic0, in0, ia0), 0, 255));
-    const c1 = Math.trunc(clamp(cubic(dy, ip1, ic1, in1, ia1), 0, 255));
-    const c2 = Math.trunc(clamp(cubic(dy, ip2, ic2, in2, ia2), 0, 255));
-    // const c3 = cubic(dy, ip3, ic3, in3, ia3);
-
-    return { r: c0, g: c1, b: c2 };
-}
-
-/// Returns the pixel value (RGB) at the given coordinates using bilinear interpolation.
-export function getPixelBilinear(
-    fx: number,
-    fy: number,
-    imageData: Uint8ClampedArray,
-    imageWidth: number,
-    imageHeight: number,
-) {
-    // Clamp to image boundaries
-    fx = clamp(fx, 0, imageWidth - 1);
-    fy = clamp(fy, 0, imageHeight - 1);
-
-    // Get the surrounding coordinates and their weights
-    const x0 = Math.floor(fx);
-    const x1 = Math.ceil(fx);
-    const y0 = Math.floor(fy);
-    const y1 = Math.ceil(fy);
-    const dx = fx - x0;
-    const dy = fy - y0;
-    const dx1 = 1.0 - dx;
-    const dy1 = 1.0 - dy;
-
-    // Get the original pixels
-    const pixel1 = readPixelColor(imageData, imageWidth, imageHeight, x0, y0);
-    const pixel2 = readPixelColor(imageData, imageWidth, imageHeight, x1, y0);
-    const pixel3 = readPixelColor(imageData, imageWidth, imageHeight, x0, y1);
-    const pixel4 = readPixelColor(imageData, imageWidth, imageHeight, x1, y1);
-
-    function bilinear(val1: number, val2: number, val3: number, val4: number) {
-        return Math.round(
-            val1 * dx1 * dy1 +
-                val2 * dx * dy1 +
-                val3 * dx1 * dy +
-                val4 * dx * dy,
-        );
-    }
-
-    // Interpolate the pixel values
-    const red = bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r);
-    const green = bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g);
-    const blue = bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b);
-
-    return { r: red, g: green, b: blue };
-}
-
-export function warpAffineFloat32List(
-    imageBitmap: ImageBitmap,
-    faceAlignment: FaceAlignment,
-    faceSize: number,
-    inputData: Float32Array,
-    inputStartIndex: number,
-): void {
-    // Get the pixel data
-    const offscreenCanvas = new OffscreenCanvas(
-        imageBitmap.width,
-        imageBitmap.height,
-    );
-    const ctx = offscreenCanvas.getContext("2d");
-    ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
-    const imageData = ctx.getImageData(
-        0,
-        0,
-        imageBitmap.width,
-        imageBitmap.height,
-    );
-    const pixelData = imageData.data;
-
-    const transformationMatrix = faceAlignment.affineMatrix.map((row) =>
-        row.map((val) => (val != 1.0 ? val * faceSize : 1.0)),
-    ); // 3x3
-
-    const A: Matrix = new Matrix([
-        [transformationMatrix[0][0], transformationMatrix[0][1]],
-        [transformationMatrix[1][0], transformationMatrix[1][1]],
-    ]);
-    const Ainverse = inverse(A);
-
-    const b00 = transformationMatrix[0][2];
-    const b10 = transformationMatrix[1][2];
-    const a00Prime = Ainverse.get(0, 0);
-    const a01Prime = Ainverse.get(0, 1);
-    const a10Prime = Ainverse.get(1, 0);
-    const a11Prime = Ainverse.get(1, 1);
-
-    for (let yTrans = 0; yTrans < faceSize; ++yTrans) {
-        for (let xTrans = 0; xTrans < faceSize; ++xTrans) {
-            // Perform inverse affine transformation
-            const xOrigin =
-                a00Prime * (xTrans - b00) + a01Prime * (yTrans - b10);
-            const yOrigin =
-                a10Prime * (xTrans - b00) + a11Prime * (yTrans - b10);
-
-            // Get the pixel from interpolation
-            const pixel = getPixelBicubic(
-                xOrigin,
-                yOrigin,
-                pixelData,
-                imageBitmap.width,
-                imageBitmap.height,
-            );
-
-            // Set the pixel in the input data
-            const index = (yTrans * faceSize + xTrans) * 3;
-            inputData[inputStartIndex + index] =
-                normalizePixelBetweenMinus1And1(pixel.r);
-            inputData[inputStartIndex + index + 1] =
-                normalizePixelBetweenMinus1And1(pixel.g);
-            inputData[inputStartIndex + index + 2] =
-                normalizePixelBetweenMinus1And1(pixel.b);
-        }
-    }
-}
-
-export function createGrayscaleIntMatrixFromNormalized2List(
-    imageList: Float32Array,
-    faceNumber: number,
-    width: number = 112,
-    height: number = 112,
-): number[][] {
-    const startIndex = faceNumber * width * height * 3;
-    return Array.from({ length: height }, (_, y) =>
-        Array.from({ length: width }, (_, x) => {
-            // 0.299 ∙ Red + 0.587 ∙ Green + 0.114 ∙ Blue
-            const pixelIndex = startIndex + 3 * (y * width + x);
-            return clamp(
-                Math.round(
-                    0.299 *
-                        unnormalizePixelFromBetweenMinus1And1(
-                            imageList[pixelIndex],
-                        ) +
-                        0.587 *
-                            unnormalizePixelFromBetweenMinus1And1(
-                                imageList[pixelIndex + 1],
-                            ) +
-                        0.114 *
-                            unnormalizePixelFromBetweenMinus1And1(
-                                imageList[pixelIndex + 2],
-                            ),
-                ),
-                0,
-                255,
-            );
-        }),
-    );
-}
-
-export function resizeToSquare(img: ImageBitmap, size: number) {
-    const scale = size / Math.max(img.height, img.width);
-    const width = scale * img.width;
-    const height = scale * img.height;
-    const offscreen = new OffscreenCanvas(size, size);
-    const ctx = offscreen.getContext("2d");
-    ctx.imageSmoothingQuality = "high";
-    ctx.drawImage(img, 0, 0, width, height);
-    const resizedImage = offscreen.transferToImageBitmap();
-    return { image: resizedImage, width, height };
-}
-
-export function transform(
-    imageBitmap: ImageBitmap,
-    affineMat: number[][],
-    outputWidth: number,
-    outputHeight: number,
-) {
-    const offscreen = new OffscreenCanvas(outputWidth, outputHeight);
-    const context = offscreen.getContext("2d");
-    context.imageSmoothingQuality = "high";
-
-    context.transform(
-        affineMat[0][0],
-        affineMat[1][0],
-        affineMat[0][1],
-        affineMat[1][1],
-        affineMat[0][2],
-        affineMat[1][2],
-    );
-
-    context.drawImage(imageBitmap, 0, 0);
-    return offscreen.transferToImageBitmap();
-}
-
-export function crop(imageBitmap: ImageBitmap, cropBox: Box, size: number) {
-    const dimensions: Dimensions = {
-        width: size,
-        height: size,
-    };
-
-    return cropWithRotation(imageBitmap, cropBox, 0, dimensions, dimensions);
-}
-
-export function cropWithRotation(
-    imageBitmap: ImageBitmap,
-    cropBox: Box,
-    rotation?: number,
-    maxSize?: Dimensions,
-    minSize?: Dimensions,
-) {
-    const box = cropBox.round();
-
-    const outputSize = { width: box.width, height: box.height };
-    if (maxSize) {
-        const minScale = Math.min(
-            maxSize.width / box.width,
-            maxSize.height / box.height,
-        );
-        if (minScale < 1) {
-            outputSize.width = Math.round(minScale * box.width);
-            outputSize.height = Math.round(minScale * box.height);
-        }
-    }
-
-    if (minSize) {
-        const maxScale = Math.max(
-            minSize.width / box.width,
-            minSize.height / box.height,
-        );
-        if (maxScale > 1) {
-            outputSize.width = Math.round(maxScale * box.width);
-            outputSize.height = Math.round(maxScale * box.height);
-        }
-    }
-
-    // log.info({ imageBitmap, box, outputSize });
-
-    const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
-    const offscreenCtx = offscreen.getContext("2d");
-    offscreenCtx.imageSmoothingQuality = "high";
-
-    offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
-    rotation && offscreenCtx.rotate(rotation);
-
-    const outputBox = new Box({
-        x: -outputSize.width / 2,
-        y: -outputSize.height / 2,
-        width: outputSize.width,
-        height: outputSize.height,
-    });
-
-    const enlargedBox = enlargeBox(box, 1.5);
-    const enlargedOutputBox = enlargeBox(outputBox, 1.5);
-
-    offscreenCtx.drawImage(
-        imageBitmap,
-        enlargedBox.x,
-        enlargedBox.y,
-        enlargedBox.width,
-        enlargedBox.height,
-        enlargedOutputBox.x,
-        enlargedOutputBox.y,
-        enlargedOutputBox.width,
-        enlargedOutputBox.height,
-    );
-
-    return offscreen.transferToImageBitmap();
-}
-
-export function addPadding(image: ImageBitmap, padding: number) {
-    const scale = 1 + padding * 2;
-    const width = scale * image.width;
-    const height = scale * image.height;
-    const offscreen = new OffscreenCanvas(width, height);
-    const ctx = offscreen.getContext("2d");
-    ctx.imageSmoothingEnabled = false;
-    ctx.drawImage(
-        image,
-        width / 2 - image.width / 2,
-        height / 2 - image.height / 2,
-        image.width,
-        image.height,
-    );
-
-    return offscreen.transferToImageBitmap();
-}
-
-export interface BlobOptions {
-    type?: string;
-    quality?: number;
-}
-
-export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
-    const offscreen = new OffscreenCanvas(
-        imageBitmap.width,
-        imageBitmap.height,
-    );
-    offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
-
-    return offscreen.convertToBlob({
-        type: "image/jpeg",
-        quality: 0.8,
-    });
-}
-
-export async function imageBitmapFromBlob(blob: Blob) {
-    return createImageBitmap(blob);
-}