Distribute noise to nearest initial clusters if distance is within limit

This commit is contained in:
Shailesh Pandit 2021-11-06 14:03:12 +05:30
parent dbb5e99fda
commit 15fcfbde44
4 changed files with 141 additions and 26 deletions

View file

@ -17,7 +17,7 @@ export default function MLDebug() {
const [batchSize, setBatchSize] = useState<number>(50); const [batchSize, setBatchSize] = useState<number>(50);
const [mlResult, setMlResult] = useState<MLSyncResult>({ const [mlResult, setMlResult] = useState<MLSyncResult>({
allFaces: [], allFaces: [],
clusterResults: { clustersWithNoise: {
clusters: [], clusters: [],
noise: [], noise: [],
}, },
@ -111,12 +111,12 @@ export default function MLDebug() {
<p></p> <p></p>
<button onClick={onSync}>Run ML Sync</button> <button onClick={onSync}>Run ML Sync</button>
<p>{JSON.stringify(mlResult.clusterResults)}</p> <p>{JSON.stringify(mlResult.clustersWithNoise)}</p>
<div> <div>
<p>Clusters: </p> <p>Clusters: </p>
{mlResult.clusterResults.clusters.map((cluster, index) => ( {mlResult.clustersWithNoise.clusters.map((cluster, index) => (
<div key={index} style={{ display: 'flex' }}> <div key={index} style={{ display: 'flex' }}>
{cluster.map((faceIndex, ind) => ( {cluster.faces.map((faceIndex, ind) => (
<div key={ind}> <div key={ind}>
<TFJSImage <TFJSImage
faceImage={ faceImage={
@ -129,14 +129,16 @@ export default function MLDebug() {
<p style={{ marginTop: '1em' }}>Noise: </p> <p style={{ marginTop: '1em' }}>Noise: </p>
<div style={{ display: 'flex' }}> <div style={{ display: 'flex' }}>
{mlResult.clusterResults.noise.map((faceIndex, index) => ( {mlResult.clustersWithNoise.noise.map(
<div key={index}> (faceIndex, index) => (
<TFJSImage <div key={index}>
faceImage={ <TFJSImage
mlResult.allFaces[faceIndex].faceImage faceImage={
}></TFJSImage> mlResult.allFaces[faceIndex].faceImage
</div> }></TFJSImage>
))} </div>
)
)}
</div> </div>
</div> </div>
</div> </div>

View file

@ -1,4 +1,5 @@
import { DBSCAN, OPTICS, KMEANS } from 'density-clustering'; import { DBSCAN, OPTICS, KMEANS } from 'density-clustering';
import { ClusteringResults } from 'utils/machineLearning/types';
class ClusteringService { class ClusteringService {
private dbscan: DBSCAN; private dbscan: DBSCAN;
@ -15,7 +16,7 @@ class ClusteringService {
dataset: Array<Array<number>>, dataset: Array<Array<number>>,
epsilon: number = 1.0, epsilon: number = 1.0,
minPts: number = 2 minPts: number = 2
) { ): ClusteringResults {
// console.log("distanceFunction", DBSCAN._); // console.log("distanceFunction", DBSCAN._);
const clusters = this.dbscan.run(dataset, epsilon, minPts); const clusters = this.dbscan.run(dataset, epsilon, minPts);
const noise = this.dbscan.noise; const noise = this.dbscan.noise;

View file

@ -7,10 +7,16 @@ import * as tf from '@tensorflow/tfjs-core';
// import TFJSFaceDetectionService from './tfjsFaceDetectionService'; // import TFJSFaceDetectionService from './tfjsFaceDetectionService';
// import TFJSFaceEmbeddingService from './tfjsFaceEmbeddingService'; // import TFJSFaceEmbeddingService from './tfjsFaceEmbeddingService';
import { import {
Cluster,
ClusterFaces,
ClusteringResults,
ClustersWithNoise,
FaceApiResult, FaceApiResult,
FaceDescriptor,
FaceImage, FaceImage,
FaceWithEmbedding, FaceWithEmbedding,
MLSyncResult, MLSyncResult,
NearestCluster,
} from 'utils/machineLearning/types'; } from 'utils/machineLearning/types';
import * as jpeg from 'jpeg-js'; import * as jpeg from 'jpeg-js';
@ -18,7 +24,7 @@ import ClusteringService from './clusteringService';
import './faceEnvPatch'; import './faceEnvPatch';
import * as faceapi from 'face-api.js'; import * as faceapi from 'face-api.js';
import { SsdMobilenetv1Options } from 'face-api.js'; import { euclideanDistance, SsdMobilenetv1Options } from 'face-api.js';
class MachineLearningService { class MachineLearningService {
// private faceDetectionService: TFJSFaceDetectionService; // private faceDetectionService: TFJSFaceDetectionService;
@ -26,11 +32,14 @@ class MachineLearningService {
private clusteringService: ClusteringService; private clusteringService: ClusteringService;
private clusterFaceDistance = 0.4; private clusterFaceDistance = 0.4;
private maxFaceDistance = 0.6;
private minClusterSize = 4; private minClusterSize = 4;
private minFaceSize = 24; private minFaceSize = 24;
private batchSize = 50; private batchSize = 50;
public allFaces: FaceWithEmbedding[]; private allFaces: FaceWithEmbedding[];
private clusteringResults: ClusteringResults;
private clustersWithNoise: ClustersWithNoise;
private allFaceImages: FaceImage[]; private allFaceImages: FaceImage[];
public constructor() { public constructor() {
@ -40,6 +49,14 @@ class MachineLearningService {
this.allFaces = []; this.allFaces = [];
this.allFaceImages = []; this.allFaceImages = [];
this.clusteringResults = {
clusters: [],
noise: [],
};
this.clustersWithNoise = {
clusters: [],
noise: [],
};
} }
public async init( public async init(
@ -67,6 +84,78 @@ class MachineLearningService {
console.log('04 TF Memory stats: ', tf.memory()); console.log('04 TF Memory stats: ', tf.memory());
} }
private getClusterSummary(cluster: ClusterFaces): FaceDescriptor {
const faceScore = (f) => f.detection.score; // f.alignedRect.box.width *
return cluster
.map((f) => this.allFaces[f].face)
.sort((f1, f2) => faceScore(f2) - faceScore(f1))[0].descriptor;
}
private updateClusterSummaries() {
if (
!this.clusteringResults ||
!this.clusteringResults.clusters ||
this.clusteringResults.clusters.length < 1
) {
return;
}
const resultClusters = this.clusteringResults.clusters;
resultClusters.forEach((resultCluster) => {
this.clustersWithNoise.clusters.push({
faces: resultCluster,
summary: this.getClusterSummary(resultCluster),
});
});
}
private getNearestCluster(noise: FaceWithEmbedding): NearestCluster {
let nearest: Cluster = null;
let nearestDist = 100000;
this.clustersWithNoise.clusters.forEach((c) => {
const dist = euclideanDistance(noise.face.descriptor, c.summary);
if (dist < nearestDist) {
nearestDist = dist;
nearest = c;
}
});
console.log('nearestDist: ', nearestDist);
return { cluster: nearest, distance: nearestDist };
}
private assignNoiseWithinLimit() {
if (
!this.clusteringResults ||
!this.clusteringResults.noise ||
this.clusteringResults.noise.length < 1
) {
return;
}
const noise = this.clusteringResults.noise;
noise.forEach((n) => {
const noiseFace = this.allFaces[n];
const nearest = this.getNearestCluster(noiseFace);
if (nearest.cluster && nearest.distance < this.maxFaceDistance) {
console.log('Adding noise to cluser: ', n, nearest.distance);
nearest.cluster.faces.push(n);
} else {
console.log(
'No cluster for noise: ',
n,
'within distance: ',
this.maxFaceDistance
);
this.clustersWithNoise.noise.push(n);
}
});
}
private getUniqueFiles(files: File[], limit: number) { private getUniqueFiles(files: File[], limit: number) {
const uniqueFiles: Map<number, File> = new Map<number, File>(); const uniqueFiles: Map<number, File> = new Map<number, File>();
for (let i = 0; uniqueFiles.size < limit && i < files.length; i++) { for (let i = 0; uniqueFiles.size < limit && i < files.length; i++) {
@ -125,7 +214,7 @@ class MachineLearningService {
// this.allFaces[0].alignedRect.box, // this.allFaces[0].alignedRect.box,
// this.allFaces[0].alignedRect.imageDims // this.allFaces[0].alignedRect.imageDims
const clusterResults = this.clusteringService.clusterUsingDBSCAN( this.clusteringResults = this.clusteringService.clusterUsingDBSCAN(
this.allFaces.map((f) => Array.from(f.face.descriptor)), this.allFaces.map((f) => Array.from(f.face.descriptor)),
this.clusterFaceDistance, this.clusterFaceDistance,
this.minClusterSize this.minClusterSize
@ -135,11 +224,17 @@ class MachineLearningService {
// this.allFaces.map((f) => f.embedding), // this.allFaces.map((f) => f.embedding),
// 10); // 10);
console.log('[MLService] Got cluster results: ', clusterResults); console.log(
'[MLService] Got cluster results: ',
this.clusteringResults
);
this.updateClusterSummaries();
this.assignNoiseWithinLimit();
return { return {
allFaces: this.allFaces, allFaces: this.allFaces,
clusterResults, clustersWithNoise: this.clustersWithNoise,
}; };
} }

View file

@ -8,7 +8,7 @@ import {
export interface MLSyncResult { export interface MLSyncResult {
allFaces: FaceWithEmbedding[]; allFaces: FaceWithEmbedding[];
clusterResults: ClusteringResults; clustersWithNoise: ClustersWithNoise;
} }
export interface AlignedFace extends NormalizedFace { export interface AlignedFace extends NormalizedFace {
@ -28,6 +28,30 @@ export declare type FaceApiResult = WithFaceDescriptor<
> >
>; >;
export declare type FaceDescriptor = Float32Array;
export declare type ClusterFaces = Array<number>;
export interface Cluster {
faces: ClusterFaces;
summary: FaceDescriptor;
}
export interface ClustersWithNoise {
clusters: Array<Cluster>;
noise: ClusterFaces;
}
export interface ClusteringResults {
clusters: Array<ClusterFaces>;
noise: ClusterFaces;
}
export interface NearestCluster {
cluster: Cluster;
distance: number;
}
export interface FaceWithEmbedding { export interface FaceWithEmbedding {
fileId: string; fileId: string;
face: FaceApiResult; face: FaceApiResult;
@ -35,10 +59,3 @@ export interface FaceWithEmbedding {
// embedding: FaceEmbedding; // embedding: FaceEmbedding;
faceImage: FaceImage; faceImage: FaceImage;
} }
export declare type Cluster = Array<number>;
export interface ClusteringResults {
clusters: Cluster[];
noise: Cluster;
}