Convert point and box to regular objects

This commit is contained in:
Manav Rathi 2024-05-21 14:24:51 +05:30
parent 4dbc8ab31e
commit 2d5894c5d6
No known key found for this signature in database
5 changed files with 146 additions and 167 deletions

View file

@ -0,0 +1,94 @@
import { blobCache } from "@/next/blob-cache";
import type { Box, Face, FaceAlignment } from "./types";
export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
const faceCrop = extractFaceCrop(imageBitmap, face.alignment);
const blob = await imageBitmapToBlob(faceCrop);
faceCrop.close();
const cache = await blobCache("face-crops");
await cache.put(face.id, blob);
return blob;
};
const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
canvas.getContext("2d").drawImage(imageBitmap, 0, 0);
return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
};
const extractFaceCrop = (
imageBitmap: ImageBitmap,
alignment: FaceAlignment,
): ImageBitmap => {
// TODO-ML: This algorithm is different from what is used by the mobile app.
// Also, it needs to be something that can work fully using the embedding we
// receive from remote - the `alignment.boundingBox` will not be available
// to us in such cases.
const paddedBox = roundBox(enlargeBox(alignment.boundingBox, 1.5));
const outputSize = { width: paddedBox.width, height: paddedBox.height };
const maxDimension = 256;
const scale = Math.min(
maxDimension / paddedBox.width,
maxDimension / paddedBox.height,
);
if (scale < 1) {
outputSize.width = Math.round(scale * paddedBox.width);
outputSize.height = Math.round(scale * paddedBox.height);
}
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
const offscreenCtx = offscreen.getContext("2d");
offscreenCtx.imageSmoothingQuality = "high";
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
const outputBox = {
x: -outputSize.width / 2,
y: -outputSize.height / 2,
width: outputSize.width,
height: outputSize.height,
};
const enlargedBox = enlargeBox(paddedBox, 1.5);
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
offscreenCtx.drawImage(
imageBitmap,
enlargedBox.x,
enlargedBox.y,
enlargedBox.width,
enlargedBox.height,
enlargedOutputBox.x,
enlargedOutputBox.y,
enlargedOutputBox.width,
enlargedOutputBox.height,
);
return offscreen.transferToImageBitmap();
};
/** Round all the components of the box. */
const roundBox = (box: Box): Box => {
const [x, y, width, height] = [box.x, box.y, box.width, box.height].map(
(val) => Math.round(val),
);
return { x, y, width, height };
};
/** Increase the size of the given {@link box} by {@link factor}. */
const enlargeBox = (box: Box, factor: number): Box => {
const center = { x: box.x + box.width / 2, y: box.y + box.height / 2 };
const newWidth = factor * box.width;
const newHeight = factor * box.height;
return {
x: center.x - newWidth / 2,
y: center.y - newHeight / 2,
width: newWidth,
height: newHeight,
};
};

View file

@ -1,16 +1,10 @@
import { FILE_TYPE } from "@/media/file-type";
import { blobCache } from "@/next/blob-cache";
import log from "@/next/log";
import { workerBridge } from "@/next/worker/worker-bridge";
import { Matrix } from "ml-matrix";
import {
import type {
Box,
Dimensions,
Point,
enlargeBox,
roundBox,
} from "services/face/geom";
import type {
Face,
FaceAlignment,
FaceDetection,
@ -26,6 +20,7 @@ import {
translate,
} from "transformation-matrix";
import type { EnteFile } from "types/file";
import { saveFaceCrop } from "./crop";
import { fetchImageBitmap, getLocalFileImageBitmap } from "./file";
import {
clamp,
@ -148,8 +143,7 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => {
const detectFaces = async (
imageBitmap: ImageBitmap,
): Promise<FaceDetection[]> => {
const rect = ({ width, height }: Dimensions) =>
new Box({ x: 0, y: 0, width, height });
const rect = ({ width, height }) => ({ x: 0, y: 0, width, height });
const { yoloInput, yoloSize } =
convertToYOLOInputFloat32ChannelsFirst(imageBitmap);
@ -259,19 +253,19 @@ const filterExtractDetectionsFromYOLOOutput = (
const rightMouthX = rows[i + 13];
const rightMouthY = rows[i + 14];
const box = new Box({
const box = {
x: xMin,
y: yMin,
width: width,
height: height,
});
};
const probability = score as number;
const landmarks = [
new Point(leftEyeX, leftEyeY),
new Point(rightEyeX, rightEyeY),
new Point(noseX, noseY),
new Point(leftMouthX, leftMouthY),
new Point(rightMouthX, rightMouthY),
{ x: leftEyeX, y: leftEyeY },
{ x: rightEyeX, y: rightEyeY },
{ x: noseX, y: noseY },
{ x: leftMouthX, y: leftMouthY },
{ x: rightMouthX, y: rightMouthY },
];
faces.push({ box, landmarks, probability });
}
@ -291,7 +285,7 @@ const transformFaceDetections = (
const transform = boxTransformationMatrix(inBox, toBox);
return faceDetections.map((f) => ({
box: transformBox(f.box, transform),
landmarks: f.landmarks.map((p) => transformPoint(p, transform)),
landmarks: f.landmarks.map((p) => applyToPoint(transform, p)),
probability: f.probability,
}));
};
@ -305,24 +299,19 @@ const boxTransformationMatrix = (
scale(toBox.width / inBox.width, toBox.height / inBox.height),
);
const transformPoint = (point: Point, transform: TransformationMatrix) => {
const txdPoint = applyToPoint(transform, point);
return new Point(txdPoint.x, txdPoint.y);
};
const transformBox = (box: Box, transform: TransformationMatrix): Box => {
const topLeft = applyToPoint(transform, { x: box.x, y: box.y });
const bottomRight = applyToPoint(transform, {
x: box.x + box.width,
y: box.y + box.height,
});
const transformBox = (box: Box, transform: TransformationMatrix) => {
const topLeft = transformPoint(new Point(box.x, box.y), transform);
const bottomRight = transformPoint(
new Point(box.x + box.width, box.y + box.height),
transform,
);
return new Box({
return {
x: topLeft.x,
y: topLeft.y,
width: bottomRight.x - topLeft.x,
height: bottomRight.y - topLeft.y,
});
};
};
/**
@ -470,14 +459,14 @@ const faceAlignmentUsingSimilarityTransform = (
const size = 1 / simTransform.scale;
const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
const centerMat = simTransform.fromMean.sub(meanTranslation);
const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
const center = { x: centerMat.get(0, 0), y: centerMat.get(1, 0) };
const boundingBox = new Box({
const boundingBox = {
x: center.x - size / 2,
y: center.y - size / 2,
width: size,
height: size,
});
};
return { affineMatrix, boundingBox };
};
@ -720,85 +709,16 @@ const relativeDetection = (
{ width, height }: Dimensions,
): FaceDetection => {
const oldBox: Box = faceDetection.box;
const box = new Box({
const box = {
x: oldBox.x / width,
y: oldBox.y / height,
width: oldBox.width / width,
height: oldBox.height / height,
});
const landmarks = faceDetection.landmarks.map((l) => {
return new Point(l.x / width, l.y / height);
});
};
const landmarks = faceDetection.landmarks.map((l) => ({
x: l.x / width,
y: l.y / height,
}));
const probability = faceDetection.probability;
return { box, landmarks, probability };
};
export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
const faceCrop = extractFaceCrop(imageBitmap, face.alignment);
const blob = await imageBitmapToBlob(faceCrop);
faceCrop.close();
const cache = await blobCache("face-crops");
await cache.put(face.id, blob);
return blob;
};
const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
canvas.getContext("2d").drawImage(imageBitmap, 0, 0);
return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
};
const extractFaceCrop = (
imageBitmap: ImageBitmap,
alignment: FaceAlignment,
): ImageBitmap => {
// TODO-ML: This algorithm is different from what is used by the mobile app.
// Also, it needs to be something that can work fully using the embedding we
// receive from remote - the `alignment.boundingBox` will not be available
// to us in such cases.
const paddedBox = roundBox(enlargeBox(alignment.boundingBox, 1.5));
const outputSize = { width: paddedBox.width, height: paddedBox.height };
const maxDimension = 256;
const scale = Math.min(
maxDimension / paddedBox.width,
maxDimension / paddedBox.height,
);
if (scale < 1) {
outputSize.width = Math.round(scale * paddedBox.width);
outputSize.height = Math.round(scale * paddedBox.height);
}
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
const offscreenCtx = offscreen.getContext("2d");
offscreenCtx.imageSmoothingQuality = "high";
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
const outputBox = new Box({
x: -outputSize.width / 2,
y: -outputSize.height / 2,
width: outputSize.width,
height: outputSize.height,
});
const enlargedBox = enlargeBox(paddedBox, 1.5);
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
offscreenCtx.drawImage(
imageBitmap,
enlargedBox.x,
enlargedBox.y,
enlargedBox.width,
enlargedBox.height,
enlargedOutputBox.x,
enlargedOutputBox.y,
enlargedOutputBox.width,
enlargedOutputBox.height,
);
return offscreen.transferToImageBitmap();
};

View file

@ -1,57 +0,0 @@
export class Point {
public x: number;
public y: number;
constructor(x: number, y: number) {
this.x = x;
this.y = y;
}
}
export interface Dimensions {
width: number;
height: number;
}
export interface IRect {
x: number;
y: number;
width: number;
height: number;
}
export class Box implements IRect {
public x: number;
public y: number;
public width: number;
public height: number;
constructor({ x, y, width, height }: IRect) {
this.x = x;
this.y = y;
this.width = width;
this.height = height;
}
}
/** Round all the components of the box. */
export const roundBox = (box: Box): Box => {
const [x, y, width, height] = [box.x, box.y, box.width, box.height].map(
(val) => Math.round(val),
);
return new Box({ x, y, width, height });
};
/** Increase the size of the given {@link box} by {@link factor}. */
export const enlargeBox = (box: Box, factor: number) => {
const center = new Point(box.x + box.width / 2, box.y + box.height / 2);
const newWidth = factor * box.width;
const newHeight = factor * box.height;
return new Box({
x: center.x - newWidth / 2,
y: center.y - newHeight / 2,
width: newWidth,
height: newHeight,
});
};

View file

@ -2,7 +2,7 @@ import log from "@/next/log";
import ComlinkCryptoWorker from "@ente/shared/crypto";
import { putEmbedding } from "services/embeddingService";
import type { EnteFile } from "types/file";
import type { Point } from "./geom";
import type { Point } from "./crop";
import type { Face, FaceDetection, MlFileData } from "./types";
export const putFaceEmbedding = async (

View file

@ -1,4 +1,26 @@
import { Box, Dimensions, Point } from "services/face/geom";
/** The x and y coordinates of a point. */
export interface Point {
x: number;
y: number;
}
/** The dimensions of something, say an image. */
export interface Dimensions {
width: number;
height: number;
}
/** A rectangle given by its top left coordinates and dimensions. */
export interface Box {
/** The x coordinate of the the top left (xMin). */
x: number;
/** The y coodinate of the top left (yMin). */
y: number;
/** The width of the box. */
width: number;
/** The height of the box. */
height: number;
}
export interface FaceDetection {
// box and landmarks is relative to image dimentions stored at mlFileData