Distribute noise to nearest initial clusters if distance is within limit

This commit is contained in:
Shailesh Pandit 2021-11-06 14:03:12 +05:30
parent dbb5e99fda
commit 15fcfbde44
4 changed files with 141 additions and 26 deletions

View file

@ -17,7 +17,7 @@ export default function MLDebug() {
const [batchSize, setBatchSize] = useState<number>(50);
const [mlResult, setMlResult] = useState<MLSyncResult>({
allFaces: [],
clusterResults: {
clustersWithNoise: {
clusters: [],
noise: [],
},
@ -111,12 +111,12 @@ export default function MLDebug() {
<p></p>
<button onClick={onSync}>Run ML Sync</button>
<p>{JSON.stringify(mlResult.clusterResults)}</p>
<p>{JSON.stringify(mlResult.clustersWithNoise)}</p>
<div>
<p>Clusters: </p>
{mlResult.clusterResults.clusters.map((cluster, index) => (
{mlResult.clustersWithNoise.clusters.map((cluster, index) => (
<div key={index} style={{ display: 'flex' }}>
{cluster.map((faceIndex, ind) => (
{cluster.faces.map((faceIndex, ind) => (
<div key={ind}>
<TFJSImage
faceImage={
@ -129,14 +129,16 @@ export default function MLDebug() {
<p style={{ marginTop: '1em' }}>Noise: </p>
<div style={{ display: 'flex' }}>
{mlResult.clusterResults.noise.map((faceIndex, index) => (
{mlResult.clustersWithNoise.noise.map(
(faceIndex, index) => (
<div key={index}>
<TFJSImage
faceImage={
mlResult.allFaces[faceIndex].faceImage
}></TFJSImage>
</div>
))}
)
)}
</div>
</div>
</div>

View file

@ -1,4 +1,5 @@
import { DBSCAN, OPTICS, KMEANS } from 'density-clustering';
import { ClusteringResults } from 'utils/machineLearning/types';
class ClusteringService {
private dbscan: DBSCAN;
@ -15,7 +16,7 @@ class ClusteringService {
dataset: Array<Array<number>>,
epsilon: number = 1.0,
minPts: number = 2
) {
): ClusteringResults {
// console.log("distanceFunction", DBSCAN._);
const clusters = this.dbscan.run(dataset, epsilon, minPts);
const noise = this.dbscan.noise;

View file

@ -7,10 +7,16 @@ import * as tf from '@tensorflow/tfjs-core';
// import TFJSFaceDetectionService from './tfjsFaceDetectionService';
// import TFJSFaceEmbeddingService from './tfjsFaceEmbeddingService';
import {
Cluster,
ClusterFaces,
ClusteringResults,
ClustersWithNoise,
FaceApiResult,
FaceDescriptor,
FaceImage,
FaceWithEmbedding,
MLSyncResult,
NearestCluster,
} from 'utils/machineLearning/types';
import * as jpeg from 'jpeg-js';
@ -18,7 +24,7 @@ import ClusteringService from './clusteringService';
import './faceEnvPatch';
import * as faceapi from 'face-api.js';
import { SsdMobilenetv1Options } from 'face-api.js';
import { euclideanDistance, SsdMobilenetv1Options } from 'face-api.js';
class MachineLearningService {
// private faceDetectionService: TFJSFaceDetectionService;
@ -26,11 +32,14 @@ class MachineLearningService {
private clusteringService: ClusteringService;
private clusterFaceDistance = 0.4;
private maxFaceDistance = 0.6;
private minClusterSize = 4;
private minFaceSize = 24;
private batchSize = 50;
public allFaces: FaceWithEmbedding[];
private allFaces: FaceWithEmbedding[];
private clusteringResults: ClusteringResults;
private clustersWithNoise: ClustersWithNoise;
private allFaceImages: FaceImage[];
public constructor() {
@ -40,6 +49,14 @@ class MachineLearningService {
this.allFaces = [];
this.allFaceImages = [];
this.clusteringResults = {
clusters: [],
noise: [],
};
this.clustersWithNoise = {
clusters: [],
noise: [],
};
}
public async init(
@ -67,6 +84,78 @@ class MachineLearningService {
console.log('04 TF Memory stats: ', tf.memory());
}
private getClusterSummary(cluster: ClusterFaces): FaceDescriptor {
const faceScore = (f) => f.detection.score; // f.alignedRect.box.width *
return cluster
.map((f) => this.allFaces[f].face)
.sort((f1, f2) => faceScore(f2) - faceScore(f1))[0].descriptor;
}
private updateClusterSummaries() {
if (
!this.clusteringResults ||
!this.clusteringResults.clusters ||
this.clusteringResults.clusters.length < 1
) {
return;
}
const resultClusters = this.clusteringResults.clusters;
resultClusters.forEach((resultCluster) => {
this.clustersWithNoise.clusters.push({
faces: resultCluster,
summary: this.getClusterSummary(resultCluster),
});
});
}
private getNearestCluster(noise: FaceWithEmbedding): NearestCluster {
let nearest: Cluster = null;
let nearestDist = 100000;
this.clustersWithNoise.clusters.forEach((c) => {
const dist = euclideanDistance(noise.face.descriptor, c.summary);
if (dist < nearestDist) {
nearestDist = dist;
nearest = c;
}
});
console.log('nearestDist: ', nearestDist);
return { cluster: nearest, distance: nearestDist };
}
private assignNoiseWithinLimit() {
if (
!this.clusteringResults ||
!this.clusteringResults.noise ||
this.clusteringResults.noise.length < 1
) {
return;
}
const noise = this.clusteringResults.noise;
noise.forEach((n) => {
const noiseFace = this.allFaces[n];
const nearest = this.getNearestCluster(noiseFace);
if (nearest.cluster && nearest.distance < this.maxFaceDistance) {
console.log('Adding noise to cluser: ', n, nearest.distance);
nearest.cluster.faces.push(n);
} else {
console.log(
'No cluster for noise: ',
n,
'within distance: ',
this.maxFaceDistance
);
this.clustersWithNoise.noise.push(n);
}
});
}
private getUniqueFiles(files: File[], limit: number) {
const uniqueFiles: Map<number, File> = new Map<number, File>();
for (let i = 0; uniqueFiles.size < limit && i < files.length; i++) {
@ -125,7 +214,7 @@ class MachineLearningService {
// this.allFaces[0].alignedRect.box,
// this.allFaces[0].alignedRect.imageDims
const clusterResults = this.clusteringService.clusterUsingDBSCAN(
this.clusteringResults = this.clusteringService.clusterUsingDBSCAN(
this.allFaces.map((f) => Array.from(f.face.descriptor)),
this.clusterFaceDistance,
this.minClusterSize
@ -135,11 +224,17 @@ class MachineLearningService {
// this.allFaces.map((f) => f.embedding),
// 10);
console.log('[MLService] Got cluster results: ', clusterResults);
console.log(
'[MLService] Got cluster results: ',
this.clusteringResults
);
this.updateClusterSummaries();
this.assignNoiseWithinLimit();
return {
allFaces: this.allFaces,
clusterResults,
clustersWithNoise: this.clustersWithNoise,
};
}

View file

@ -8,7 +8,7 @@ import {
export interface MLSyncResult {
allFaces: FaceWithEmbedding[];
clusterResults: ClusteringResults;
clustersWithNoise: ClustersWithNoise;
}
export interface AlignedFace extends NormalizedFace {
@ -28,6 +28,30 @@ export declare type FaceApiResult = WithFaceDescriptor<
>
>;
export declare type FaceDescriptor = Float32Array;
export declare type ClusterFaces = Array<number>;
export interface Cluster {
faces: ClusterFaces;
summary: FaceDescriptor;
}
export interface ClustersWithNoise {
clusters: Array<Cluster>;
noise: ClusterFaces;
}
export interface ClusteringResults {
clusters: Array<ClusterFaces>;
noise: ClusterFaces;
}
export interface NearestCluster {
cluster: Cluster;
distance: number;
}
export interface FaceWithEmbedding {
fileId: string;
face: FaceApiResult;
@ -35,10 +59,3 @@ export interface FaceWithEmbedding {
// embedding: FaceEmbedding;
faceImage: FaceImage;
}
export declare type Cluster = Array<number>;
export interface ClusteringResults {
clusters: Cluster[];
noise: Cluster;
}