Store face crops, extract aligned faces from face crops

Align faces using center, size and rotation only, using this aligned faces can be extracted without whole image
2021-12-23 18:27:53 +05:30 · 2021-12-23 18:27:53 +05:30 · b4c31c5845
parent 4ebcddbb84
commit b4c31c5845
18 changed files with 568 additions and 68 deletions
--- a/src/components/MLFileDebugView.tsx
+++ b/src/components/MLFileDebugView.tsx
@ -0,0 +1,65 @@
+import React, { useEffect, useState } from 'react';
+import { DetectedFace } from 'types/machineLearning';
+import { imageBitmapToBlob } from 'utils/image';
+
+interface MLFileDebugViewProps {
+    // mlFileData: MlFileData
+    faces: Array<DetectedFace>;
+    images: Array<ImageBitmap>;
+}
+
+export default function MLFileDebugView(props: MLFileDebugViewProps) {
+    return (
+        <div>
+            {props.faces?.map((face, i) => (
+                <MLFaceDebugView key={i} face={face}></MLFaceDebugView>
+            ))}
+            {props.images?.map((image, i) => (
+                <MLImageBitmapView key={i} image={image}></MLImageBitmapView>
+            ))}
+        </div>
+    );
+}
+
+function MLFaceDebugView(props: { face: DetectedFace }) {
+    const [imgUrl, setImgUrl] = useState<string>();
+
+    useEffect(() => {
+        const face = props?.face;
+        if (!face?.faceCrop?.image) {
+            return;
+        }
+        // console.log('faceCrop: ', face.faceCrop);
+        setImgUrl(URL.createObjectURL(face.faceCrop.image));
+    }, [props.face]);
+
+    return (
+        <>
+            <img src={imgUrl}></img>
+        </>
+    );
+}
+
+function MLImageBitmapView(props: { image: ImageBitmap }) {
+    const [imgUrl, setImgUrl] = useState<string>();
+
+    useEffect(() => {
+        const image = props?.image;
+        if (!image) {
+            return;
+        }
+        // console.log('image: ', image);
+        async function loadImage() {
+            const blob = await imageBitmapToBlob(image);
+            setImgUrl(URL.createObjectURL(blob));
+        }
+
+        loadImage();
+    }, [props.image]);
+
+    return (
+        <>
+            <img src={imgUrl}></img>
+        </>
+    );
+}
--- a/src/components/MlDebug.tsx
+++ b/src/components/MlDebug.tsx
@ -1,4 +1,4 @@
-import React, { useState, useEffect, useContext } from 'react';
+import React, { useState, useEffect, useContext, ChangeEvent } from 'react';
 import { getData, LS_KEYS } from 'utils/storage/localStorage';
 import { useRouter } from 'next/router';
 import { ComlinkWorker } from 'utils/crypto';
@ -6,10 +6,19 @@ import { AppContext } from 'pages/_app';
 import { PAGES } from 'types';
 import * as Comlink from 'comlink';
 import { runningInBrowser } from 'utils/common';
-// import { MLSyncResult } from 'utils/machineLearning/types';
 import TFJSImage from './TFJSImage';
-import { MLDebugResult } from 'types/machineLearning';
+import { DetectedFace, MLDebugResult } from 'types/machineLearning';
 import Tree from 'react-d3-tree';
+import MLFileDebugView from './MLFileDebugView';
+import tfjsFaceDetectionService from 'services/machineLearning/tfjsFaceDetectionService';
+import arcfaceAlignmentService from 'services/machineLearning/arcfaceAlignmentService';
+import arcfaceCropService from 'services/machineLearning/arcfaceCropService';
+import { ibExtractFaceImageFromCrop } from 'utils/machineLearning/faceCrop';
+import { getMLSyncConfig } from 'utils/machineLearning';
+import {
+    ibExtractFaceImage,
+    ibExtractFaceImageUsingTransform,
+} from 'utils/machineLearning/faceAlign';

 interface TSNEProps {
    mlResult: MLDebugResult;
@ -81,6 +90,10 @@ export default function MLDebug() {
        tree: null,
        tsne: null,
    });
+
+    const [faces, setFaces] = useState<DetectedFace[]>();
+    const [images, setImages] = useState<ImageBitmap[]>();
+
    const router = useRouter();
    const appContext = useContext(AppContext);

@ -109,6 +122,14 @@ export default function MLDebug() {
            setToken(user.token);
        }
        appContext.showNavBar(true);
+
+        // async function loadMlFileData() {
+        //     const mlFileData = await mlFilesStore.getItem<MlFileData>('10000007');
+        //     setMlFileData(mlFileData);
+        //     console.log('loaded mlFileData: ', mlFileData);
+        // }
+
+        // loadMlFileData();
    }, []);

    const onSync = async () => {
@ -156,6 +177,45 @@ export default function MLDebug() {
        }
    };

+    const onDebugFile = async (event: ChangeEvent<HTMLInputElement>) => {
+        // TODO: go through worker for these apis, to not include ml code in main bundle
+        const imageBitmap = await createImageBitmap(event.target.files[0]);
+        const detectedFaces = await tfjsFaceDetectionService.detectFaces(
+            imageBitmap
+        );
+        const mlSyncConfig = await getMLSyncConfig();
+        const facePromises = detectedFaces.map(async (face) => {
+            face.faceCrop = await arcfaceCropService.getFaceCrop(
+                imageBitmap,
+                face,
+                mlSyncConfig.faceCrop
+            );
+        });
+
+        await Promise.all(facePromises);
+        setFaces(detectedFaces);
+        console.log('detectedFaces: ', detectedFaces.length);
+
+        const alignedFaces =
+            arcfaceAlignmentService.getAlignedFaces(detectedFaces);
+        console.log('alignedFaces: ', alignedFaces);
+        const faceCropPromises = alignedFaces.map((face) => {
+            return ibExtractFaceImageFromCrop(face, 112);
+        });
+        const faceImagePromises = alignedFaces.map((face) => {
+            return ibExtractFaceImage(imageBitmap, face, 112);
+        });
+        const faceImageTransformPromises = alignedFaces.map((face) => {
+            return ibExtractFaceImageUsingTransform(imageBitmap, face, 112);
+        });
+        const faceImages = await Promise.all([
+            ...faceCropPromises,
+            ...faceImagePromises,
+            ...faceImageTransformPromises,
+        ]);
+        setImages(faceImages);
+    };
+
    const nodeSize = { x: 180, y: 180 };
    const foreignObjectProps = { width: 112, height: 150, x: -56 };

@ -206,6 +266,9 @@ export default function MLDebug() {
            </button>
            <button onClick={onStartMLSync}>Start ML Sync</button>
            <button onClick={onStopMLSync}>Stop ML Sync</button>
+            <input id="debugFile" type="file" onChange={onDebugFile} />
+
+            <MLFileDebugView faces={faces} images={images} />

            <p>{JSON.stringify(mlResult.clustersWithNoise)}</p>
            <div>
--- a/src/services/machineLearning/arcfaceAlignmentService.ts
+++ b/src/services/machineLearning/arcfaceAlignmentService.ts
@ -1,7 +1,4 @@
-import {
-    ARCFACE_LANDMARKS,
-    getAlignedFaceUsingSimilarityTransform,
-} from 'utils/machineLearning/faceAlign';
+import { getArcfaceAlignedFace } from 'utils/machineLearning/faceAlign';
 import {
    AlignedFace,
    DetectedFace,
@ -24,11 +21,7 @@ class ArcfaceAlignmentService implements FaceAlignmentService {
        const alignedFaces = new Array<AlignedFace>(faces.length);

        faces.forEach((face, index) => {
-            alignedFaces[index] = getAlignedFaceUsingSimilarityTransform(
-                face,
-                ARCFACE_LANDMARKS
-                // this.method
-            );
+            alignedFaces[index] = getArcfaceAlignedFace(face);
        });

        return alignedFaces;
--- a/src/services/machineLearning/arcfaceCropService.ts
+++ b/src/services/machineLearning/arcfaceCropService.ts
@ -0,0 +1,36 @@
+import {
+    DetectedFace,
+    FaceCropConfig,
+    FaceCropMethod,
+    FaceCropService,
+    StoredFaceCrop,
+    Versioned,
+} from 'types/machineLearning';
+import { getArcfaceAlignedFace } from 'utils/machineLearning/faceAlign';
+import { getFaceCrop, getStoredFaceCrop } from 'utils/machineLearning/faceCrop';
+
+class ArcFaceCropService implements FaceCropService {
+    public method: Versioned<FaceCropMethod>;
+
+    constructor() {
+        this.method = {
+            value: 'ArcFace',
+            version: 1,
+        };
+    }
+
+    public async getFaceCrop(
+        imageBitmap: ImageBitmap,
+        face: DetectedFace,
+        config: FaceCropConfig
+    ): Promise<StoredFaceCrop> {
+        const alignedFace = getArcfaceAlignedFace(face);
+        const faceCrop = getFaceCrop(imageBitmap, alignedFace, config);
+        const storedFaceCrop = getStoredFaceCrop(faceCrop, config.blobOptions);
+        faceCrop.image.close();
+
+        return storedFaceCrop;
+    }
+}
+
+export default new ArcFaceCropService();
--- a/src/services/machineLearning/machineLearningFactory.ts
+++ b/src/services/machineLearning/machineLearningFactory.ts
@ -5,6 +5,8 @@ import {
    Face,
    FaceAlignmentMethod,
    FaceAlignmentService,
+    FaceCropMethod,
+    FaceCropService,
    FaceDetectionMethod,
    FaceDetectionService,
    FaceEmbeddingMethod,
@ -13,6 +15,7 @@ import {
    MLSyncContext,
 } from 'types/machineLearning';
 import arcfaceAlignmentService from './arcfaceAlignmentService';
+import arcfaceCropService from './arcfaceCropService';
 import blazeFaceDetectionService from './tfjsFaceDetectionService';
 import mobileFaceNetEmbeddingService from './tfjsFaceEmbeddingService';

@ -27,6 +30,14 @@ export class MLFactory {
        throw Error('Unknon face detection method: ' + method);
    }

+    public static getFaceCropService(method: FaceCropMethod) {
+        if (method === 'ArcFace') {
+            return arcfaceCropService;
+        }
+
+        throw Error('Unknon face crop method: ' + method);
+    }
+
    public static getFaceAlignmentService(
        method: FaceAlignmentMethod
    ): FaceAlignmentService {
@ -62,6 +73,7 @@ export class LocalMLSyncContext implements MLSyncContext {
    public shouldUpdateMLVersion: boolean;

    public faceDetectionService: FaceDetectionService;
+    public faceCropService: FaceCropService;
    public faceAlignmentService: FaceAlignmentService;
    public faceEmbeddingService: FaceEmbeddingService;

@ -85,6 +97,9 @@ export class LocalMLSyncContext implements MLSyncContext {
        this.faceDetectionService = MLFactory.getFaceDetectionService(
            this.config.faceDetection.method
        );
+        this.faceCropService = MLFactory.getFaceCropService(
+            this.config.faceCrop.method
+        );
        this.faceAlignmentService = MLFactory.getFaceAlignmentService(
            this.config.faceAlignment.method
        );
--- a/src/services/machineLearning/machineLearningService.ts
+++ b/src/services/machineLearning/machineLearningService.ts
@ -100,7 +100,7 @@ class MachineLearningService {
            nFaceNoise: syncContext.faceClustersWithNoise?.noise.length,
            tsne: syncContext.tsne,
        };
-        console.log('[MLService] sync results: ', mlSyncResult);
+        // console.log('[MLService] sync results: ', mlSyncResult);

        // await syncContext.dispose();
        console.log('Final TF Memory stats: ', tf.memory());
@ -231,13 +231,15 @@ class MachineLearningService {
            fileContext.newMLFileData.mlVersion = syncContext.config.mlVersion;
        }

-        await this.syncFileFaceDetection(syncContext, fileContext);
+        await this.syncFileFaceDetections(syncContext, fileContext);

        if (
            fileContext.filtertedFaces &&
            fileContext.filtertedFaces.length > 0
        ) {
-            await this.syncFileFaceAlignment(syncContext, fileContext);
+            await this.syncFileFaceCrops(syncContext, fileContext);
+
+            await this.syncFileFaceAlignments(syncContext, fileContext);

            await this.syncFileFaceEmbeddings(syncContext, fileContext);

@ -250,6 +252,8 @@ class MachineLearningService {
                            ...faceWithEmbeddings,
                        } as Face)
                );
+        } else {
+            fileContext.newMLFileData.faces = undefined;
        }

        fileContext.tfImage && fileContext.tfImage.dispose();
@ -280,10 +284,15 @@ class MachineLearningService {
                syncContext.token
            );
        }
+
+        if (!fileContext.newMLFileData.imageDimentions) {
+            const { width, height } = fileContext.imageBitmap;
+            fileContext.newMLFileData.imageDimentions = { width, height };
+        }
        // console.log('2 TF Memory stats: ', tf.memory());
    }

-    private async syncFileFaceDetection(
+    private async syncFileFaceDetections(
        syncContext: MLSyncContext,
        fileContext: MLSyncFileContext
    ) {
@ -302,21 +311,43 @@ class MachineLearningService {
                    fileContext.imageBitmap
                );
            // console.log('3 TF Memory stats: ', tf.memory());
+            // TODO: reenable faces filtering based on width
            fileContext.filtertedFaces = detectedFaces;
-            // .filter(
-            //     (f) =>
-            //         f.box.width > syncContext.config.faceDetection.minFaceSize
+            // ?.filter((f) =>
+            //     f.box.width > syncContext.config.faceDetection.minFaceSize
            // );
            console.log(
                '[MLService] filtertedFaces: ',
-                fileContext.filtertedFaces.length
+                fileContext.filtertedFaces?.length
            );
        } else {
            fileContext.filtertedFaces = fileContext.oldMLFileData.faces;
        }
    }

-    private async syncFileFaceAlignment(
+    private async syncFileFaceCrops(
+        syncContext: MLSyncContext,
+        fileContext: MLSyncFileContext
+    ) {
+        const imageBitmap = fileContext.imageBitmap;
+        if (
+            !fileContext.newDetection ||
+            !syncContext.config.faceCrop.enabled ||
+            !imageBitmap
+        ) {
+            return;
+        }
+
+        for (const face of fileContext.filtertedFaces) {
+            face.faceCrop = await syncContext.faceCropService.getFaceCrop(
+                imageBitmap,
+                face,
+                syncContext.config.faceCrop
+            );
+        }
+    }
+
+    private async syncFileFaceAlignments(
        syncContext: MLSyncContext,
        fileContext: MLSyncFileContext
    ) {
@ -332,7 +363,10 @@ class MachineLearningService {
                syncContext.faceAlignmentService.getAlignedFaces(
                    fileContext.filtertedFaces
                );
-            console.log('[MLService] alignedFaces: ', fileContext.alignedFaces);
+            console.log(
+                '[MLService] alignedFaces: ',
+                fileContext.alignedFaces?.length
+            );
            // console.log('4 TF Memory stats: ', tf.memory());
        } else {
            fileContext.alignedFaces = fileContext.oldMLFileData.faces;
@ -500,11 +534,9 @@ class MachineLearningService {
                .map((f) => allFaces[f])
                .filter((f) => f);

-            // TODO: face box to be normalized to 0..1 scale
            const personFace = findFirstIfSorted(
                faces,
-                (a, b) =>
-                    a.probability * a.box.width - b.probability * b.box.width
+                (a, b) => a.probability * a.size - b.probability * b.size
            );
            const faceImageTensor = await getFaceImage(
                personFace,
--- a/src/services/machineLearning/tfjsFaceDetectionService.ts
+++ b/src/services/machineLearning/tfjsFaceDetectionService.ts
@ -46,7 +46,7 @@ class TFJSFaceDetectionService implements FaceDetectionService {
        });
        console.log(
            'loaded blazeFaceModel: ',
-            await this.blazeFaceModel,
+            // await this.blazeFaceModel,
            await tf.getBackend()
        );
    }
@ -160,7 +160,7 @@ class TFJSFaceDetectionService implements FaceDetectionService {
        const faces = await blazeFaceModel.estimateFaces(tfImage);
        tf.dispose(tfImage);

-        const detectedFaces: Array<DetectedFace> = faces.map(
+        const detectedFaces: Array<DetectedFace> = faces?.map(
            (normalizedFace) => {
                const landmarks = normalizedFace.landmarks as number[][];
                return {
--- a/src/services/machineLearning/tfjsFaceEmbeddingService.ts
+++ b/src/services/machineLearning/tfjsFaceEmbeddingService.ts
@ -86,7 +86,7 @@ class TFJSFaceEmbeddingService implements FaceEmbeddingService {
        }

        const faceImagesTensor = ibExtractFaceImages(
-            image as ImageBitmap,
+            image,
            faces,
            this.faceSize
        );
--- a/src/types/image/index.ts
+++ b/src/types/image/index.ts
@ -0,0 +1,9 @@
+export interface Dimensions {
+    width: number;
+    height: number;
+}
+
+export interface BlobOptions {
+    type?: string;
+    quality?: number;
+}
--- a/src/types/machineLearning/archface.ts
+++ b/src/types/machineLearning/archface.ts
@ -0,0 +1,8 @@
+export const ARCFACE_LANDMARKS = [
+    [38.2946, 51.6963],
+    [73.5318, 51.5014],
+    [56.0252, 71.7366],
+    [56.1396, 92.2848],
+] as Array<[number, number]>;
+
+export const ARCFACE_LANDMARKS_FACE_SIZE = 112;
--- a/src/types/machineLearning/index.ts
+++ b/src/types/machineLearning/index.ts
@ -10,6 +10,7 @@ import { DebugInfo } from 'hdbscan';

 import { Point as D3Point, RawNodeDatum } from 'react-d3-tree/lib/types/common';
 import { File } from 'services/fileService';
+import { Dimensions } from 'types/image';
 import { Box, Point } from '../../../thirdparty/face-api/classes';

 export interface MLSyncResult {
@ -89,6 +90,8 @@ export declare type ImageType = 'Original' | 'Preview';

 export declare type FaceDetectionMethod = 'BlazeFace' | 'FaceApiSSD';

+export declare type FaceCropMethod = 'ArcFace';
+
 export declare type FaceAlignmentMethod =
    | 'ArcFace'
    | 'FaceApiDlib'
@ -109,14 +112,32 @@ export interface Versioned<T> {
 }

 export interface DetectedFace {
+    // box and landmarks is relative to image dimentions stored at mlFileData
    box: Box;
    landmarks: Array<Landmark>;
    probability?: number;
+    faceCrop?: StoredFaceCrop;
    // detectionMethod: Versioned<FaceDetectionMethod>;
 }

+export interface FaceCrop {
+    image: ImageBitmap;
+    // imageBox is relative to image dimentions stored at mlFileData
+    imageBox: Box;
+}
+
+export interface StoredFaceCrop {
+    image: Blob;
+    imageBox: Box;
+}
+
 export interface AlignedFace extends DetectedFace {
+    // TODO: remove affine matrix as only works for fixed face size
    affineMatrix: Array<Array<number>>;
+    rotation: number;
+    // size and center is relative to image dimentions stored at mlFileData
+    size: number;
+    center: Point;
    // alignmentMethod: Versioned<FaceAlignmentMethod>;
 }

@ -141,6 +162,7 @@ export interface MlFileData {
    fileId: number;
    faces?: Face[];
    imageSource: ImageType;
+    imageDimentions?: Dimensions;
    detectionMethod: Versioned<FaceDetectionMethod>;
    alignmentMethod: Versioned<FaceAlignmentMethod>;
    embeddingMethod: Versioned<FaceEmbeddingMethod>;
@ -152,6 +174,17 @@ export interface FaceDetectionConfig {
    minFaceSize: number;
 }

+export interface FaceCropConfig {
+    enabled: boolean;
+    method: FaceCropMethod;
+    padding: number;
+    maxSize: number;
+    blobOptions: {
+        type: string;
+        quality: number;
+    };
+}
+
 export interface FaceAlignmentConfig {
    method: FaceAlignmentMethod;
 }
@ -184,6 +217,7 @@ export interface MLSyncConfig {
    batchSize: number;
    imageSource: ImageType;
    faceDetection: FaceDetectionConfig;
+    faceCrop: FaceCropConfig;
    faceAlignment: FaceAlignmentConfig;
    faceEmbedding: FaceEmbeddingConfig;
    faceClustering: FaceClusteringConfig;
@ -197,6 +231,7 @@ export interface MLSyncContext {
    shouldUpdateMLVersion: boolean;

    faceDetectionService: FaceDetectionService;
+    faceCropService: FaceCropService;
    faceAlignmentService: FaceAlignmentService;
    faceEmbeddingService: FaceEmbeddingService;

@ -243,6 +278,16 @@ export interface FaceDetectionService {
    dispose(): Promise<void>;
 }

+export interface FaceCropService {
+    method: Versioned<FaceCropMethod>;
+
+    getFaceCrop(
+        imageBitmap: ImageBitmap,
+        face: DetectedFace,
+        config: FaceCropConfig
+    ): Promise<StoredFaceCrop>;
+}
+
 export interface FaceAlignmentService {
    method: Versioned<FaceAlignmentMethod>;
    getAlignedFaces(faces: Array<DetectedFace>): Array<AlignedFace>;
--- a/src/utils/image/index.ts
+++ b/src/utils/image/index.ts
@ -1,17 +1,21 @@
+// TODO: these utils only work in env where OffscreenCanvas is available
+
+import { BlobOptions, Dimensions } from 'types/image';
+import { enlargeBox } from 'utils/machineLearning';
+import { Box } from '../../../thirdparty/face-api/classes';
+
 export function resizeToSquare(img: ImageBitmap, size: number) {
    const scale = size / Math.max(img.height, img.width);
    const width = scale * img.width;
    const height = scale * img.height;
-    // if (!offscreen) {
    const offscreen = new OffscreenCanvas(size, size);
-    // }
    offscreen.getContext('2d').drawImage(img, 0, 0, width, height);

    return { image: offscreen.transferToImageBitmap(), width, height };
 }

 export function transform(
-    img: ImageBitmap,
+    imageBitmap: ImageBitmap,
    affineMat: number[][],
    outputWidth: number,
    outputHeight: number
@ -28,6 +32,89 @@ export function transform(
        affineMat[1][2]
    );

-    context.drawImage(img, 0, 0);
+    context.drawImage(imageBitmap, 0, 0);
    return offscreen.transferToImageBitmap();
 }
+
+export function cropWithRotation(
+    imageBitmap: ImageBitmap,
+    cropBox: Box,
+    rotation?: number,
+    maxSize?: Dimensions,
+    minSize?: Dimensions
+) {
+    const box = cropBox.round();
+
+    const outputSize = { width: box.width, height: box.height };
+    if (maxSize) {
+        const minScale = Math.min(
+            maxSize.width / box.width,
+            maxSize.height / box.height
+        );
+        if (minScale < 1) {
+            outputSize.width = Math.round(minScale * box.width);
+            outputSize.height = Math.round(minScale * box.height);
+        }
+    }
+
+    if (minSize) {
+        const maxScale = Math.max(
+            minSize.width / box.width,
+            minSize.height / box.height
+        );
+        if (maxScale > 1) {
+            outputSize.width = Math.round(maxScale * box.width);
+            outputSize.height = Math.round(maxScale * box.height);
+        }
+    }
+
+    // console.log({ imageBitmap, box, outputSize });
+
+    const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
+    const offscreenCtx = offscreen.getContext('2d');
+    offscreenCtx.imageSmoothingQuality = 'high';
+
+    offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
+    rotation && offscreenCtx.rotate(rotation);
+
+    const outputBox = new Box({
+        x: -outputSize.width / 2,
+        y: -outputSize.height / 2,
+        width: outputSize.width,
+        height: outputSize.height,
+    });
+
+    const enlargedBox = enlargeBox(box, 1.5);
+    const enlargedOutputBox = enlargeBox(outputBox, 1.5);
+
+    offscreenCtx.drawImage(
+        imageBitmap,
+        enlargedBox.x,
+        enlargedBox.y,
+        enlargedBox.width,
+        enlargedBox.height,
+        enlargedOutputBox.x,
+        enlargedOutputBox.y,
+        enlargedOutputBox.width,
+        enlargedOutputBox.height
+    );
+
+    return offscreen.transferToImageBitmap();
+}
+
+export async function imageBitmapToBlob(
+    imageBitmap: ImageBitmap,
+    options?: BlobOptions
+) {
+    const offscreen = new OffscreenCanvas(
+        imageBitmap.width,
+        imageBitmap.height
+    );
+    offscreen.getContext('2d').drawImage(imageBitmap, 0, 0);
+
+    return offscreen.convertToBlob(options);
+}
+
+export async function imageBitmapFromBlob(blob: Blob) {
+    return createImageBitmap(blob);
+}
--- a/src/utils/machineLearning/faceAlign.ts
+++ b/src/utils/machineLearning/faceAlign.ts
@ -10,14 +10,22 @@ import {
    getBoxCenterPt,
    toTensor4D,
 } from '.';
-import { transform } from 'utils/image';
+import { cropWithRotation, transform } from 'utils/image';
+import {
+    ARCFACE_LANDMARKS,
+    ARCFACE_LANDMARKS_FACE_SIZE,
+} from 'types/machineLearning/archface';
+import { Box, Point } from '../../../thirdparty/face-api/classes';
+import { Dimensions } from 'types/image';

-export const ARCFACE_LANDMARKS = [
-    [38.2946, 51.6963],
-    [73.5318, 51.5014],
-    [56.0252, 71.7366],
-    [56.1396, 92.2848],
-] as Array<[number, number]>;
+export function normalizeLandmarks(
+    landmarks: Array<[number, number]>,
+    faceSize: number
+) {
+    return landmarks.map((landmark) =>
+        landmark.map((p) => p / faceSize)
+    ) as Array<[number, number]>;
+}

 export function getAlignedFaceUsingSimilarityTransform(
    face: DetectedFace,
@ -43,14 +51,34 @@ export function getAlignedFaceUsingSimilarityTransform(
        [0, 0, 1],
    ];

+    const size = 1 / simTransform.scale;
+    const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
+    const centerMat = simTransform.fromMean.sub(meanTranslation);
+    const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
+    const rotation = -Math.atan2(
+        simTransform.rotation.get(0, 1),
+        simTransform.rotation.get(0, 0)
+    );
+    // console.log({ affineMatrix, meanTranslation, centerMat, center, toMean: simTransform.toMean, fromMean: simTransform.fromMean, size });
+
    return {
        ...face,

        affineMatrix,
+        center,
+        size,
+        rotation,
        // alignmentMethod,
    };
 }

+export function getArcfaceAlignedFace(face: DetectedFace): AlignedFace {
+    return getAlignedFaceUsingSimilarityTransform(
+        face,
+        normalizeLandmarks(ARCFACE_LANDMARKS, ARCFACE_LANDMARKS_FACE_SIZE)
+    );
+}
+
 export function extractFaceImage(
    image: tf.Tensor4D,
    alignedFace: AlignedFace,
@ -105,17 +133,44 @@ export function extractFaceImages(
    });
 }

+export function getAlignedFaceBox(alignedFace: AlignedFace) {
+    return new Box({
+        x: alignedFace.center.x - alignedFace.size / 2,
+        y: alignedFace.center.y - alignedFace.size / 2,
+        width: alignedFace.size,
+        height: alignedFace.size,
+    }).round();
+}
+
 export function ibExtractFaceImage(
    image: ImageBitmap,
    alignedFace: AlignedFace,
    faceSize: number
-): tf.Tensor3D {
-    const affineMat = alignedFace.affineMatrix;
-    const faceImageBitmap = transform(image, affineMat, faceSize, faceSize);
-    const tfFaceImage = tf.browser.fromPixels(faceImageBitmap);
-    faceImageBitmap.close();
+): ImageBitmap {
+    const box = getAlignedFaceBox(alignedFace);
+    const faceSizeDimentions: Dimensions = {
+        width: faceSize,
+        height: faceSize,
+    };
+    return cropWithRotation(
+        image,
+        box,
+        alignedFace.rotation,
+        faceSizeDimentions,
+        faceSizeDimentions
+    );
+}

-    return tfFaceImage;
+export function ibExtractFaceImageUsingTransform(
+    image: ImageBitmap,
+    alignedFace: AlignedFace,
+    faceSize: number
+): ImageBitmap {
+    const scaledMatrix = new Matrix(alignedFace.affineMatrix)
+        .mul(faceSize)
+        .to2DArray();
+    // console.log("scaledMatrix: ", scaledMatrix);
+    return transform(image, scaledMatrix, faceSize, faceSize);
 }

 export function ibExtractFaceImages(
@ -126,7 +181,13 @@ export function ibExtractFaceImages(
    return tf.tidy(() => {
        const faceImages = new Array<tf.Tensor3D>(faces.length);
        for (let i = 0; i < faces.length; i++) {
-            faceImages[i] = ibExtractFaceImage(image, faces[i], faceSize);
+            const faceImageBitmap = ibExtractFaceImage(
+                image,
+                faces[i],
+                faceSize
+            );
+            faceImages[i] = tf.browser.fromPixels(faceImageBitmap);
+            faceImageBitmap.close();
        }

        return tf.stack(faceImages) as tf.Tensor4D;
@ -192,7 +253,7 @@ export function getRotatedFaceImage(
            foreheadCenter
        ); // landmarkPoints[BLAZEFACE_NOSE_INDEX]
        // angle = computeRotation(leftEye, rightEye);
-        console.log('angle: ', angle);
+        // console.log('angle: ', angle);

        const faceCenter = getBoxCenter(face.box);
        // console.log('faceCenter: ', faceCenter);
--- a/src/utils/machineLearning/faceCrop.ts
+++ b/src/utils/machineLearning/faceCrop.ts
@ -0,0 +1,74 @@
+import { BlobOptions, Dimensions } from 'types/image';
+import {
+    AlignedFace,
+    FaceCropConfig,
+    FaceCrop,
+    StoredFaceCrop,
+} from 'types/machineLearning';
+import { cropWithRotation, imageBitmapToBlob } from 'utils/image';
+import { enlargeBox } from '.';
+import { getAlignedFaceBox } from './faceAlign';
+
+export function getFaceCrop(
+    imageBitmap: ImageBitmap,
+    alignedFace: AlignedFace,
+    config: FaceCropConfig
+): FaceCrop {
+    const box = getAlignedFaceBox(alignedFace);
+    const scaleForPadding = 1 + config.padding * 2;
+    const paddedBox = enlargeBox(box, scaleForPadding).round();
+    const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
+        width: config.maxSize,
+        height: config.maxSize,
+    });
+
+    return {
+        image: faceImageBitmap,
+        imageBox: paddedBox,
+    };
+}
+
+export async function getStoredFaceCrop(
+    faceCrop: FaceCrop,
+    blobOptions: BlobOptions
+): Promise<StoredFaceCrop> {
+    const faceCropBlob = await imageBitmapToBlob(faceCrop.image, blobOptions);
+    return {
+        image: faceCropBlob,
+        imageBox: faceCrop.imageBox,
+    };
+}
+
+export async function ibExtractFaceImageFromCrop(
+    alignedFace: AlignedFace,
+    faceSize: number
+): Promise<ImageBitmap> {
+    const image = alignedFace.faceCrop?.image;
+    const imageBox = alignedFace.faceCrop?.imageBox;
+    if (!image || !imageBox) {
+        throw Error('Face crop not present');
+    }
+
+    const box = getAlignedFaceBox(alignedFace);
+    const faceCropImage = await createImageBitmap(alignedFace.faceCrop.image);
+
+    const scale = faceCropImage.width / imageBox.width;
+    const scaledImageBox = alignedFace.faceCrop.imageBox.rescale(scale).round();
+    const scaledBox = box.rescale(scale).round();
+    const shiftedBox = scaledBox.shift(-scaledImageBox.x, -scaledImageBox.y);
+    // console.log({ box, imageBox, faceCropImage, scale, scaledBox, scaledImageBox, shiftedBox });
+
+    const faceSizeDimentions: Dimensions = {
+        width: faceSize,
+        height: faceSize,
+    };
+    const faceImage = cropWithRotation(
+        faceCropImage,
+        shiftedBox,
+        alignedFace.rotation,
+        faceSizeDimentions,
+        faceSizeDimentions
+    );
+
+    return faceImage;
+}
--- a/src/utils/machineLearning/index.ts
+++ b/src/utils/machineLearning/index.ts
@ -165,8 +165,10 @@ export async function getFaceImage(
    const imageBitmap = await getOriginalImageBitmap(file, token);

    const faceImage = tf.tidy(() => {
-        const faceImage = ibExtractFaceImage(imageBitmap, face, faceSize);
-        const normalizedImage = tf.sub(tf.div(faceImage, 127.5), 1.0);
+        const faceImageBitmap = ibExtractFaceImage(imageBitmap, face, faceSize);
+        const tfFaceImage = tf.browser.fromPixels(faceImageBitmap);
+        faceImageBitmap.close();
+        const normalizedImage = tf.sub(tf.div(tfFaceImage, 127.5), 1.0);

        return normalizedImage as tf.Tensor3D;
    });
@ -312,6 +314,16 @@ const DEFAULT_ML_SYNC_CONFIG: MLSyncConfig = {
        method: 'BlazeFace',
        minFaceSize: 32,
    },
+    faceCrop: {
+        enabled: true,
+        method: 'ArcFace',
+        padding: 0.25,
+        maxSize: 256,
+        blobOptions: {
+            type: 'image/jpeg',
+            quality: 0.8,
+        },
+    },
    faceAlignment: {
        method: 'ArcFace',
    },
--- a/thirdparty/face-api/classes/Box.ts
+++ b/thirdparty/face-api/classes/Box.ts
@ -20,10 +20,10 @@ export class Box<BoxType = any> implements IBoundingBox, IRect {
    }
  }

-  private _x: number
-  private _y: number
-  private _width: number
-  private _height: number
+  public x: number
+  public y: number
+  public width: number
+  public height: number

  constructor(_box: IBoundingBox | IRect, allowNegativeDimensions: boolean = true) {
    const box = (_box || {}) as any
@ -41,16 +41,16 @@ export class Box<BoxType = any> implements IBoundingBox, IRect {

    Box.assertIsValidBox({ x, y, width, height }, 'Box.constructor', allowNegativeDimensions)

-    this._x = x
-    this._y = y
-    this._width = width
-    this._height = height
+    this.x = x
+    this.y = y
+    this.width = width
+    this.height = height
  }

-  public get x(): number { return this._x }
-  public get y(): number { return this._y }
-  public get width(): number { return this._width }
-  public get height(): number { return this._height }
+  // public get x(): number { return this._x }
+  // public get y(): number { return this._y }
+  // public get width(): number { return this._width }
+  // public get height(): number { return this._height }
  public get left(): number { return this.x }
  public get top(): number { return this.y }
  public get right(): number { return this.x + this.width }
--- a/thirdparty/face-api/classes/Point.ts
+++ b/thirdparty/face-api/classes/Point.ts
@ -4,16 +4,16 @@ export interface IPoint {
 }

 export class Point implements IPoint {
-  private _x: number
-  private _y: number
+  public x: number
+  public y: number

  constructor(x: number, y: number) {
-    this._x = x
-    this._y = y
+    this.x = x
+    this.y = y
  }

-  get x(): number { return this._x }
-  get y(): number { return this._y }
+  // get x(): number { return this._x }
+  // get y(): number { return this._y }

  public add(pt: IPoint): Point {
    return new Point(this.x + pt.x, this.y + pt.y)
--- a/thirdparty/similarity-transformation-js/main.js
+++ b/thirdparty/similarity-transformation-js/main.js
@ -102,7 +102,7 @@ export function getSimilarityTransformation(fromPoints,
    //     mlMatrix.Matrix.mul(rotation.mmul(fromPoints), scale),
    //     translation.repeat({ columns: numPoints }));

-    return { rotation, scale, translation };
+    return { rotation, scale, translation, fromMean, toMean };
 }

 /**