WIP IPC API
This commit is contained in:
parent
2bb9e77e34
commit
a88f551b6a
|
@ -78,6 +78,30 @@ const faceEmbeddingSession = async () => {
|
|||
return _faceEmbeddingSession;
|
||||
};
|
||||
|
||||
private async initOnnx() {
|
||||
console.log("start ort");
|
||||
this.onnxInferenceSession = await ort.InferenceSession.create(
|
||||
"/models/yoloface/yolov5s_face_640_640_dynamic.onnx",
|
||||
);
|
||||
const data = new Float32Array(1 * 3 * 640 * 640);
|
||||
const inputTensor = new ort.Tensor("float32", data, [1, 3, 640, 640]);
|
||||
// TODO(MR): onnx-yolo
|
||||
// const feeds: Record<string, ort.Tensor> = {};
|
||||
const feeds: Record<string, any> = {};
|
||||
const name = this.onnxInferenceSession.inputNames[0];
|
||||
feeds[name] = inputTensor;
|
||||
await this.onnxInferenceSession.run(feeds);
|
||||
console.log("start end");
|
||||
}
|
||||
|
||||
private async getOnnxInferenceSession() {
|
||||
if (!this.onnxInferenceSession) {
|
||||
await this.initOnnx();
|
||||
}
|
||||
return this.onnxInferenceSession;
|
||||
}
|
||||
|
||||
|
||||
// export const clipImageEmbedding = async (jpegImageData: Uint8Array) => {
|
||||
// const tempFilePath = await generateTempFilePath("");
|
||||
// const imageStream = new Response(jpegImageData.buffer).body;
|
||||
|
|
|
@ -203,9 +203,6 @@ export class LocalMLSyncContext implements MLSyncContext {
|
|||
}
|
||||
|
||||
public async dispose() {
|
||||
// await this.faceDetectionService.dispose();
|
||||
// await this.faceEmbeddingService.dispose();
|
||||
|
||||
this.localFilesMap = undefined;
|
||||
await this.syncQueue.onIdle();
|
||||
this.syncQueue.removeAllListeners();
|
||||
|
|
|
@ -96,12 +96,6 @@ class MobileFaceNetEmbeddingService implements FaceEmbeddingService {
|
|||
}
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
public async dispose() {
|
||||
const inferenceSession = await this.getOnnxInferenceSession();
|
||||
inferenceSession?.release();
|
||||
this.onnxInferenceSession = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export default new MobileFaceNetEmbeddingService();
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import { MAX_FACE_DISTANCE_PERCENT } from "constants/mlConfig";
|
||||
import { euclidean } from "hdbscan";
|
||||
import {
|
||||
Matrix,
|
||||
applyToPoint,
|
||||
|
@ -21,17 +22,7 @@ import {
|
|||
import { newBox } from "utils/machineLearning";
|
||||
import { Box, Point } from "../../../thirdparty/face-api/classes";
|
||||
|
||||
// TODO(MR): onnx-yolo
|
||||
// import * as ort from "onnxruntime-web";
|
||||
// import { env } from "onnxruntime-web";
|
||||
const ort: any = {};
|
||||
|
||||
// TODO(MR): onnx-yolo
|
||||
// env.wasm.wasmPaths = "/js/onnx/";
|
||||
class YoloFaceDetectionService implements FaceDetectionService {
|
||||
// TODO(MR): onnx-yolo
|
||||
// private onnxInferenceSession?: ort.InferenceSession;
|
||||
private onnxInferenceSession?: any;
|
||||
public method: Versioned<FaceDetectionMethod>;
|
||||
|
||||
public constructor() {
|
||||
|
@ -41,27 +32,44 @@ class YoloFaceDetectionService implements FaceDetectionService {
|
|||
};
|
||||
}
|
||||
|
||||
private async initOnnx() {
|
||||
console.log("start ort");
|
||||
this.onnxInferenceSession = await ort.InferenceSession.create(
|
||||
"/models/yoloface/yolov5s_face_640_640_dynamic.onnx",
|
||||
);
|
||||
const data = new Float32Array(1 * 3 * 640 * 640);
|
||||
public async detectFaces(
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<Array<FaceDetection>> {
|
||||
const maxFaceDistance = imageBitmap.width * MAX_FACE_DISTANCE_PERCENT;
|
||||
const preprocessResult =
|
||||
this.preprocessImageBitmapToFloat32ChannelsFirst(
|
||||
imageBitmap,
|
||||
640,
|
||||
640,
|
||||
);
|
||||
const data = preprocessResult.data;
|
||||
const resized = preprocessResult.newSize;
|
||||
const inputTensor = new ort.Tensor("float32", data, [1, 3, 640, 640]);
|
||||
// TODO(MR): onnx-yolo
|
||||
// const feeds: Record<string, ort.Tensor> = {};
|
||||
const feeds: Record<string, any> = {};
|
||||
const name = this.onnxInferenceSession.inputNames[0];
|
||||
feeds[name] = inputTensor;
|
||||
await this.onnxInferenceSession.run(feeds);
|
||||
console.log("start end");
|
||||
}
|
||||
|
||||
private async getOnnxInferenceSession() {
|
||||
if (!this.onnxInferenceSession) {
|
||||
await this.initOnnx();
|
||||
}
|
||||
return this.onnxInferenceSession;
|
||||
feeds["input"] = inputTensor;
|
||||
const inferenceSession = await this.getOnnxInferenceSession();
|
||||
const runout = await inferenceSession.run(feeds);
|
||||
const outputData = runout.output.data;
|
||||
const faces = this.getFacesFromYoloOutput(
|
||||
outputData as Float32Array,
|
||||
0.7,
|
||||
);
|
||||
const inBox = newBox(0, 0, resized.width, resized.height);
|
||||
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const transform = computeTransformToBox(inBox, toBox);
|
||||
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
|
||||
const box = transformBox(f.box, transform);
|
||||
const normLandmarks = f.landmarks;
|
||||
const landmarks = transformPoints(normLandmarks, transform);
|
||||
return {
|
||||
box,
|
||||
landmarks,
|
||||
probability: f.probability as number,
|
||||
} as FaceDetection;
|
||||
});
|
||||
return removeDuplicateDetections(faceDetections, maxFaceDistance);
|
||||
}
|
||||
|
||||
private preprocessImageBitmapToFloat32ChannelsFirst(
|
||||
|
@ -233,64 +241,10 @@ class YoloFaceDetectionService implements FaceDetectionService {
|
|||
probability: faceDetection.probability,
|
||||
};
|
||||
}
|
||||
|
||||
private async estimateOnnx(imageBitmap: ImageBitmap) {
|
||||
const maxFaceDistance = imageBitmap.width * MAX_FACE_DISTANCE_PERCENT;
|
||||
const preprocessResult =
|
||||
this.preprocessImageBitmapToFloat32ChannelsFirst(
|
||||
imageBitmap,
|
||||
640,
|
||||
640,
|
||||
);
|
||||
const data = preprocessResult.data;
|
||||
const resized = preprocessResult.newSize;
|
||||
const inputTensor = new ort.Tensor("float32", data, [1, 3, 640, 640]);
|
||||
// TODO(MR): onnx-yolo
|
||||
// const feeds: Record<string, ort.Tensor> = {};
|
||||
const feeds: Record<string, any> = {};
|
||||
feeds["input"] = inputTensor;
|
||||
const inferenceSession = await this.getOnnxInferenceSession();
|
||||
const runout = await inferenceSession.run(feeds);
|
||||
const outputData = runout.output.data;
|
||||
const faces = this.getFacesFromYoloOutput(
|
||||
outputData as Float32Array,
|
||||
0.7,
|
||||
);
|
||||
const inBox = newBox(0, 0, resized.width, resized.height);
|
||||
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const transform = computeTransformToBox(inBox, toBox);
|
||||
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
|
||||
const box = transformBox(f.box, transform);
|
||||
const normLandmarks = f.landmarks;
|
||||
const landmarks = transformPoints(normLandmarks, transform);
|
||||
return {
|
||||
box,
|
||||
landmarks,
|
||||
probability: f.probability as number,
|
||||
} as FaceDetection;
|
||||
});
|
||||
return removeDuplicateDetections(faceDetections, maxFaceDistance);
|
||||
}
|
||||
|
||||
public async detectFaces(
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<Array<FaceDetection>> {
|
||||
// measure time taken
|
||||
const facesFromOnnx = await this.estimateOnnx(imageBitmap);
|
||||
return facesFromOnnx;
|
||||
}
|
||||
|
||||
public async dispose() {
|
||||
const inferenceSession = await this.getOnnxInferenceSession();
|
||||
inferenceSession?.release();
|
||||
this.onnxInferenceSession = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export default new YoloFaceDetectionService();
|
||||
|
||||
import { euclidean } from "hdbscan";
|
||||
|
||||
/**
|
||||
* Removes duplicate face detections from an array of detections.
|
||||
*
|
||||
|
|
|
@ -261,13 +261,12 @@ export declare type MLIndex = "files" | "people";
|
|||
|
||||
export interface FaceDetectionService {
|
||||
method: Versioned<FaceDetectionMethod>;
|
||||
// init(): Promise<void>;
|
||||
|
||||
detectFaces(image: ImageBitmap): Promise<Array<FaceDetection>>;
|
||||
getRelativeDetection(
|
||||
faceDetection: FaceDetection,
|
||||
imageDimensions: Dimensions,
|
||||
): FaceDetection;
|
||||
dispose(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface FaceCropService {
|
||||
|
|
|
@ -196,7 +196,7 @@ export interface Electron {
|
|||
// - ML
|
||||
|
||||
/**
|
||||
* Compute and return a CLIP embedding of the given image.
|
||||
* Return a CLIP embedding of the given image.
|
||||
*
|
||||
* See: [Note: CLIP based magic search]
|
||||
*
|
||||
|
@ -207,7 +207,7 @@ export interface Electron {
|
|||
clipImageEmbedding: (jpegImageData: Uint8Array) => Promise<Float32Array>;
|
||||
|
||||
/**
|
||||
* Compute and return a CLIP embedding of the given image.
|
||||
* Return a CLIP embedding of the given image.
|
||||
*
|
||||
* See: [Note: CLIP based magic search]
|
||||
*
|
||||
|
@ -217,6 +217,23 @@ export interface Electron {
|
|||
*/
|
||||
clipTextEmbedding: (text: string) => Promise<Float32Array>;
|
||||
|
||||
/**
|
||||
* Detect faces in the given image using YOLO.
|
||||
*
|
||||
* Both the input and output are opaque binary data whose internal structure
|
||||
* is model (YOLO) and our implementation specific. That said, specifically
|
||||
* the {@link inputImage} a particular bitmap encoding of an image.
|
||||
*/
|
||||
detectFaces: (inputImage: Uint8Array) => Promise<Float32Array>;
|
||||
|
||||
/**
|
||||
* Return a mobilefacenet embedding for the given face data.
|
||||
*
|
||||
* Both the input and output are opaque binary data whose internal structure
|
||||
* is model (mobilefacenet) and our implementation specific.
|
||||
*/
|
||||
faceEmbedding: (input: Float32Array) => Promise<Float32Array>;
|
||||
|
||||
// - File selection
|
||||
// TODO: Deprecated - use dialogs on the renderer process itself
|
||||
|
||||
|
|
Loading…
Reference in a new issue