WIP IPC API

This commit is contained in:
Manav Rathi 2024-04-11 13:58:52 +05:30
parent 2bb9e77e34
commit a88f551b6a
No known key found for this signature in database
6 changed files with 79 additions and 94 deletions

View file

@ -78,6 +78,30 @@ const faceEmbeddingSession = async () => {
return _faceEmbeddingSession;
};
private async initOnnx() {
console.log("start ort");
this.onnxInferenceSession = await ort.InferenceSession.create(
"/models/yoloface/yolov5s_face_640_640_dynamic.onnx",
);
const data = new Float32Array(1 * 3 * 640 * 640);
const inputTensor = new ort.Tensor("float32", data, [1, 3, 640, 640]);
// TODO(MR): onnx-yolo
// const feeds: Record<string, ort.Tensor> = {};
const feeds: Record<string, any> = {};
const name = this.onnxInferenceSession.inputNames[0];
feeds[name] = inputTensor;
await this.onnxInferenceSession.run(feeds);
console.log("start end");
}
private async getOnnxInferenceSession() {
if (!this.onnxInferenceSession) {
await this.initOnnx();
}
return this.onnxInferenceSession;
}
// export const clipImageEmbedding = async (jpegImageData: Uint8Array) => {
// const tempFilePath = await generateTempFilePath("");
// const imageStream = new Response(jpegImageData.buffer).body;

View file

@ -203,9 +203,6 @@ export class LocalMLSyncContext implements MLSyncContext {
}
public async dispose() {
// await this.faceDetectionService.dispose();
// await this.faceEmbeddingService.dispose();
this.localFilesMap = undefined;
await this.syncQueue.onIdle();
this.syncQueue.removeAllListeners();

View file

@ -96,12 +96,6 @@ class MobileFaceNetEmbeddingService implements FaceEmbeddingService {
}
return embeddings;
}
public async dispose() {
const inferenceSession = await this.getOnnxInferenceSession();
inferenceSession?.release();
this.onnxInferenceSession = undefined;
}
}
export default new MobileFaceNetEmbeddingService();

View file

@ -1,4 +1,5 @@
import { MAX_FACE_DISTANCE_PERCENT } from "constants/mlConfig";
import { euclidean } from "hdbscan";
import {
Matrix,
applyToPoint,
@ -21,17 +22,7 @@ import {
import { newBox } from "utils/machineLearning";
import { Box, Point } from "../../../thirdparty/face-api/classes";
// TODO(MR): onnx-yolo
// import * as ort from "onnxruntime-web";
// import { env } from "onnxruntime-web";
const ort: any = {};
// TODO(MR): onnx-yolo
// env.wasm.wasmPaths = "/js/onnx/";
class YoloFaceDetectionService implements FaceDetectionService {
// TODO(MR): onnx-yolo
// private onnxInferenceSession?: ort.InferenceSession;
private onnxInferenceSession?: any;
public method: Versioned<FaceDetectionMethod>;
public constructor() {
@ -41,27 +32,44 @@ class YoloFaceDetectionService implements FaceDetectionService {
};
}
private async initOnnx() {
console.log("start ort");
this.onnxInferenceSession = await ort.InferenceSession.create(
"/models/yoloface/yolov5s_face_640_640_dynamic.onnx",
);
const data = new Float32Array(1 * 3 * 640 * 640);
public async detectFaces(
imageBitmap: ImageBitmap,
): Promise<Array<FaceDetection>> {
const maxFaceDistance = imageBitmap.width * MAX_FACE_DISTANCE_PERCENT;
const preprocessResult =
this.preprocessImageBitmapToFloat32ChannelsFirst(
imageBitmap,
640,
640,
);
const data = preprocessResult.data;
const resized = preprocessResult.newSize;
const inputTensor = new ort.Tensor("float32", data, [1, 3, 640, 640]);
// TODO(MR): onnx-yolo
// const feeds: Record<string, ort.Tensor> = {};
const feeds: Record<string, any> = {};
const name = this.onnxInferenceSession.inputNames[0];
feeds[name] = inputTensor;
await this.onnxInferenceSession.run(feeds);
console.log("start end");
}
private async getOnnxInferenceSession() {
if (!this.onnxInferenceSession) {
await this.initOnnx();
}
return this.onnxInferenceSession;
feeds["input"] = inputTensor;
const inferenceSession = await this.getOnnxInferenceSession();
const runout = await inferenceSession.run(feeds);
const outputData = runout.output.data;
const faces = this.getFacesFromYoloOutput(
outputData as Float32Array,
0.7,
);
const inBox = newBox(0, 0, resized.width, resized.height);
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
const transform = computeTransformToBox(inBox, toBox);
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
const box = transformBox(f.box, transform);
const normLandmarks = f.landmarks;
const landmarks = transformPoints(normLandmarks, transform);
return {
box,
landmarks,
probability: f.probability as number,
} as FaceDetection;
});
return removeDuplicateDetections(faceDetections, maxFaceDistance);
}
private preprocessImageBitmapToFloat32ChannelsFirst(
@ -233,64 +241,10 @@ class YoloFaceDetectionService implements FaceDetectionService {
probability: faceDetection.probability,
};
}
private async estimateOnnx(imageBitmap: ImageBitmap) {
const maxFaceDistance = imageBitmap.width * MAX_FACE_DISTANCE_PERCENT;
const preprocessResult =
this.preprocessImageBitmapToFloat32ChannelsFirst(
imageBitmap,
640,
640,
);
const data = preprocessResult.data;
const resized = preprocessResult.newSize;
const inputTensor = new ort.Tensor("float32", data, [1, 3, 640, 640]);
// TODO(MR): onnx-yolo
// const feeds: Record<string, ort.Tensor> = {};
const feeds: Record<string, any> = {};
feeds["input"] = inputTensor;
const inferenceSession = await this.getOnnxInferenceSession();
const runout = await inferenceSession.run(feeds);
const outputData = runout.output.data;
const faces = this.getFacesFromYoloOutput(
outputData as Float32Array,
0.7,
);
const inBox = newBox(0, 0, resized.width, resized.height);
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
const transform = computeTransformToBox(inBox, toBox);
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
const box = transformBox(f.box, transform);
const normLandmarks = f.landmarks;
const landmarks = transformPoints(normLandmarks, transform);
return {
box,
landmarks,
probability: f.probability as number,
} as FaceDetection;
});
return removeDuplicateDetections(faceDetections, maxFaceDistance);
}
public async detectFaces(
imageBitmap: ImageBitmap,
): Promise<Array<FaceDetection>> {
// measure time taken
const facesFromOnnx = await this.estimateOnnx(imageBitmap);
return facesFromOnnx;
}
public async dispose() {
const inferenceSession = await this.getOnnxInferenceSession();
inferenceSession?.release();
this.onnxInferenceSession = undefined;
}
}
export default new YoloFaceDetectionService();
import { euclidean } from "hdbscan";
/**
* Removes duplicate face detections from an array of detections.
*

View file

@ -261,13 +261,12 @@ export declare type MLIndex = "files" | "people";
export interface FaceDetectionService {
method: Versioned<FaceDetectionMethod>;
// init(): Promise<void>;
detectFaces(image: ImageBitmap): Promise<Array<FaceDetection>>;
getRelativeDetection(
faceDetection: FaceDetection,
imageDimensions: Dimensions,
): FaceDetection;
dispose(): Promise<void>;
}
export interface FaceCropService {

View file

@ -196,7 +196,7 @@ export interface Electron {
// - ML
/**
* Compute and return a CLIP embedding of the given image.
* Return a CLIP embedding of the given image.
*
* See: [Note: CLIP based magic search]
*
@ -207,7 +207,7 @@ export interface Electron {
clipImageEmbedding: (jpegImageData: Uint8Array) => Promise<Float32Array>;
/**
* Compute and return a CLIP embedding of the given image.
* Return a CLIP embedding of the given image.
*
* See: [Note: CLIP based magic search]
*
@ -217,6 +217,23 @@ export interface Electron {
*/
clipTextEmbedding: (text: string) => Promise<Float32Array>;
/**
* Detect faces in the given image using YOLO.
*
* Both the input and output are opaque binary data whose internal structure
* is model (YOLO) and our implementation specific. That said, specifically
* the {@link inputImage} a particular bitmap encoding of an image.
*/
detectFaces: (inputImage: Uint8Array) => Promise<Float32Array>;
/**
* Return a mobilefacenet embedding for the given face data.
*
* Both the input and output are opaque binary data whose internal structure
* is model (mobilefacenet) and our implementation specific.
*/
faceEmbedding: (input: Float32Array) => Promise<Float32Array>;
// - File selection
// TODO: Deprecated - use dialogs on the renderer process itself