This commit is contained in:
Manav Rathi 2024-05-16 13:07:55 +05:30
parent db05afb9ff
commit 7160ae700f
No known key found for this signature in database
4 changed files with 135 additions and 177 deletions

View file

@ -201,7 +201,6 @@ export interface MLSyncContext {
faceDetectionService: FaceDetectionService;
faceCropService: FaceCropService;
faceEmbeddingService: FaceEmbeddingService;
blurDetectionService: BlurDetectionService;
localFilesMap: Map<number, EnteFile>;
outOfSyncFiles: EnteFile[];
@ -267,11 +266,6 @@ export interface FaceEmbeddingService {
getFaceEmbeddings(faceImages: Float32Array): Promise<Array<FaceEmbedding>>;
}
export interface BlurDetectionService {
method: Versioned<BlurDetectionMethod>;
detectBlur(alignedFaces: Float32Array, faces: Face[]): number[];
}
export interface MachineLearningWorker {
closeLocalSyncContext(): Promise<void>;

View file

@ -18,6 +18,7 @@ import {
getFaceId,
getLocalFile,
} from "../face/image";
import { detectBlur } from "./laplacianBlurDetectionService";
class FaceService {
async syncFileFaceDetections(
@ -85,10 +86,7 @@ class FaceService {
syncContext.faceEmbeddingService.faceSize,
imageBitmap,
);
const blurValues = syncContext.blurDetectionService.detectBlur(
faceImages,
newMlFile.faces,
);
const blurValues = detectBlur(faceImages, newMlFile.faces);
newMlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i]));
imageBitmap.close();

View file

@ -1,176 +1,158 @@
import {
BlurDetectionMethod,
BlurDetectionService,
Face,
Versioned,
} from "services/face/types";
import { Face } from "services/face/types";
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
import { mobileFaceNetFaceSize } from "./mobileFaceNetEmbeddingService";
class LaplacianBlurDetectionService implements BlurDetectionService {
public method: Versioned<BlurDetectionMethod>;
public constructor() {
this.method = {
value: "Laplacian",
version: 1,
};
}
public detectBlur(alignedFaces: Float32Array, faces: Face[]): number[] {
const numFaces = Math.round(
alignedFaces.length /
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
/**
* Laplacian blur detection.
*/
export const detectBlur = (
alignedFaces: Float32Array,
faces: Face[],
): number[] => {
const numFaces = Math.round(
alignedFaces.length /
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
);
const blurValues: number[] = [];
for (let i = 0; i < numFaces; i++) {
const face = faces[i];
const direction = faceDirection(face);
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
alignedFaces,
i,
);
const blurValues: number[] = [];
for (let i = 0; i < numFaces; i++) {
const face = faces[i];
const direction = getFaceDirection(face);
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
alignedFaces,
i,
);
const laplacian = this.applyLaplacian(faceImage, direction);
const variance = this.calculateVariance(laplacian);
blurValues.push(variance);
}
return blurValues;
const laplacian = applyLaplacian(faceImage, direction);
const variance = calculateVariance(laplacian);
blurValues.push(variance);
}
return blurValues;
};
private calculateVariance(matrix: number[][]): number {
const numRows = matrix.length;
const numCols = matrix[0].length;
const totalElements = numRows * numCols;
const calculateVariance = (matrix: number[][]): number => {
const numRows = matrix.length;
const numCols = matrix[0].length;
const totalElements = numRows * numCols;
// Calculate the mean
let mean: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
mean += value;
});
// Calculate the mean
let mean: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
mean += value;
});
mean /= totalElements;
});
mean /= totalElements;
// Calculate the variance
let variance: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
const diff: number = value - mean;
variance += diff * diff;
});
// Calculate the variance
let variance: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
const diff: number = value - mean;
variance += diff * diff;
});
variance /= totalElements;
});
variance /= totalElements;
return variance;
return variance;
};
const padImage = (
image: number[][],
removeSideColumns: number = 56,
direction: FaceDirection = "straight",
): number[][] => {
// Exception is removeSideColumns is not even
if (removeSideColumns % 2 != 0) {
throw new Error("removeSideColumns must be even");
}
const numRows = image.length;
const numCols = image[0].length;
const paddedNumCols = numCols + 2 - removeSideColumns;
const paddedNumRows = numRows + 2;
private padImage(
image: number[][],
removeSideColumns: number = 56,
direction: FaceDirection = "straight",
): number[][] {
// Exception is removeSideColumns is not even
if (removeSideColumns % 2 != 0) {
throw new Error("removeSideColumns must be even");
}
const numRows = image.length;
const numCols = image[0].length;
const paddedNumCols = numCols + 2 - removeSideColumns;
const paddedNumRows = numRows + 2;
// Create a new matrix with extra padding
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
new Array(paddedNumCols).fill(0),
);
// Create a new matrix with extra padding
const paddedImage: number[][] = Array.from(
{ length: paddedNumRows },
() => new Array(paddedNumCols).fill(0),
);
// Copy original image into the center of the padded image
if (direction === "straight") {
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] =
image[i][j + Math.round(removeSideColumns / 2)];
}
}
} // If the face is facing left, we only take the right side of the face image
else if (direction === "left") {
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
}
}
} // If the face is facing right, we only take the left side of the face image
else if (direction === "right") {
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j];
}
}
}
// Reflect padding
// Top and bottom rows
for (let j = 1; j <= paddedNumCols - 2; j++) {
paddedImage[0][j] = paddedImage[2][j]; // Top row
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
}
// Left and right columns
for (let i = 0; i < numRows + 2; i++) {
paddedImage[i][0] = paddedImage[i][2]; // Left column
paddedImage[i][paddedNumCols - 1] =
paddedImage[i][paddedNumCols - 3]; // Right column
}
return paddedImage;
}
private applyLaplacian(
image: number[][],
direction: FaceDirection = "straight",
): number[][] {
const paddedImage: number[][] = this.padImage(
image,
undefined,
direction,
);
const numRows = paddedImage.length - 2;
const numCols = paddedImage[0].length - 2;
// Create an output image initialized to 0
const outputImage: number[][] = Array.from({ length: numRows }, () =>
new Array(numCols).fill(0),
);
// Define the Laplacian kernel
const kernel: number[][] = [
[0, 1, 0],
[1, -4, 1],
[0, 1, 0],
];
// Apply the kernel to each pixel
// Copy original image into the center of the padded image
if (direction === "straight") {
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < numCols; j++) {
let sum = 0;
for (let ki = 0; ki < 3; ki++) {
for (let kj = 0; kj < 3; kj++) {
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
}
}
// Adjust the output value if necessary (e.g., clipping)
outputImage[i][j] = sum;
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] =
image[i][j + Math.round(removeSideColumns / 2)];
}
}
} // If the face is facing left, we only take the right side of the face image
else if (direction === "left") {
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
}
}
} // If the face is facing right, we only take the left side of the face image
else if (direction === "right") {
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j];
}
}
return outputImage;
}
}
export default new LaplacianBlurDetectionService();
// Reflect padding
// Top and bottom rows
for (let j = 1; j <= paddedNumCols - 2; j++) {
paddedImage[0][j] = paddedImage[2][j]; // Top row
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
}
// Left and right columns
for (let i = 0; i < numRows + 2; i++) {
paddedImage[i][0] = paddedImage[i][2]; // Left column
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
}
return paddedImage;
};
const applyLaplacian = (
image: number[][],
direction: FaceDirection = "straight",
): number[][] => {
const paddedImage: number[][] = padImage(image, undefined, direction);
const numRows = paddedImage.length - 2;
const numCols = paddedImage[0].length - 2;
// Create an output image initialized to 0
const outputImage: number[][] = Array.from({ length: numRows }, () =>
new Array(numCols).fill(0),
);
// Define the Laplacian kernel
const kernel: number[][] = [
[0, 1, 0],
[1, -4, 1],
[0, 1, 0],
];
// Apply the kernel to each pixel
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < numCols; j++) {
let sum = 0;
for (let ki = 0; ki < 3; ki++) {
for (let kj = 0; kj < 3; kj++) {
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
}
}
// Adjust the output value if necessary (e.g., clipping)
outputImage[i][j] = sum;
}
}
return outputImage;
};
type FaceDirection = "left" | "right" | "straight";
const getFaceDirection = (face: Face): FaceDirection => {
const faceDirection = (face: Face): FaceDirection => {
const landmarks = face.detection.landmarks;
const leftEye = landmarks[0];
const rightEye = landmarks[1];

View file

@ -12,8 +12,6 @@ import downloadManager from "services/download";
import { putEmbedding } from "services/embeddingService";
import mlIDbStorage, { ML_SEARCH_CONFIG_NAME } from "services/face/db";
import {
BlurDetectionMethod,
BlurDetectionService,
Face,
FaceCropService,
FaceDetection,
@ -34,7 +32,6 @@ import { getLocalFiles } from "services/fileService";
import { EnteFile } from "types/file";
import { isInternalUserForML } from "utils/user";
import FaceService from "./faceService";
import laplacianBlurDetectionService from "./laplacianBlurDetectionService";
import mobileFaceNetEmbeddingService from "./mobileFaceNetEmbeddingService";
import { fetchImageBitmapForContext } from "../face/image";
@ -123,16 +120,6 @@ export class MLFactory {
throw Error("Unknon face detection method: " + method);
}
public static getBlurDetectionService(
method: BlurDetectionMethod,
): BlurDetectionService {
if (method === "Laplacian") {
return laplacianBlurDetectionService;
}
throw Error("Unknon blur detection method: " + method);
}
public static getFaceEmbeddingService(
method: FaceEmbeddingMethod,
): FaceEmbeddingService {
@ -150,7 +137,6 @@ export class LocalMLSyncContext implements MLSyncContext {
public faceDetectionService: FaceDetectionService;
public faceCropService: FaceCropService;
public blurDetectionService: BlurDetectionService;
public faceEmbeddingService: FaceEmbeddingService;
public localFilesMap: Map<number, EnteFile>;
@ -179,8 +165,6 @@ export class LocalMLSyncContext implements MLSyncContext {
this.faceDetectionService =
MLFactory.getFaceDetectionService("YoloFace");
this.blurDetectionService =
MLFactory.getBlurDetectionService("Laplacian");
this.faceEmbeddingService =
MLFactory.getFaceEmbeddingService("MobileFaceNet");