From 4d6d441798555a42b382da8ef60c3ac24a90e10e Mon Sep 17 00:00:00 2001 From: Abhinav Date: Sat, 19 Mar 2022 20:28:59 +0530 Subject: [PATCH] update text detection result format --- src/components/PhotoFrame.tsx | 2 +- src/components/SearchBar.tsx | 22 ++++--- src/services/machineLearning/textService.ts | 63 ++++++++++----------- src/services/searchService.ts | 15 ++--- src/types/gallery/index.ts | 4 +- src/types/machineLearning/index.ts | 17 ++++-- src/utils/machineLearning/index.ts | 4 +- src/utils/storage/mlIDbStorage.ts | 2 +- 8 files changed, 70 insertions(+), 59 deletions(-) diff --git a/src/components/PhotoFrame.tsx b/src/components/PhotoFrame.tsx index 4368ed838..249089358 100644 --- a/src/components/PhotoFrame.tsx +++ b/src/components/PhotoFrame.tsx @@ -199,7 +199,7 @@ const PhotoFrame = ({ return false; } - if (search.text && search.text.indexOf(item.id) === -1) { + if (search.text && search.text.files.indexOf(item.id) === -1) { return false; } diff --git a/src/components/SearchBar.tsx b/src/components/SearchBar.tsx index ab7ff33b5..28e297ec3 100644 --- a/src/components/SearchBar.tsx +++ b/src/components/SearchBar.tsx @@ -34,7 +34,7 @@ import { FILE_TYPE } from 'constants/file'; import { GalleryContext } from 'pages/gallery'; import { AppContext } from 'pages/_app'; import { Col } from 'react-bootstrap'; -import { Person, ThingClass } from 'types/machineLearning'; +import { Person, ThingClass, WordGroup } from 'types/machineLearning'; import { IndexStatus } from 'types/machineLearning/ui'; import { PeopleList } from './MachineLearning/PeopleList'; @@ -217,14 +217,18 @@ export default function SearchBar(props: Props) { ) ); - const textResult = await searchText(searchPhrase); - - options.push({ - type: SuggestionType.TEXT, - value: textResult.files, - label: textResult.text, - } as Suggestion); + const textResults = await searchText(searchPhrase); + options.push( + ...textResults.map( + (searchResult) => + ({ + type: SuggestionType.TEXT, + value: searchResult, + label: searchResult.word, + } as Suggestion) + ) + ); return options; }; @@ -266,7 +270,7 @@ export default function SearchBar(props: Props) { props.setOpen(true); break; case SuggestionType.TEXT: - props.setSearch({ text: selectedOption.value as number[] }); + props.setSearch({ text: selectedOption.value as WordGroup }); props.setOpen(true); break; } diff --git a/src/services/machineLearning/textService.ts b/src/services/machineLearning/textService.ts index 1aad093c0..dbff151dd 100644 --- a/src/services/machineLearning/textService.ts +++ b/src/services/machineLearning/textService.ts @@ -2,6 +2,7 @@ import { MLSyncContext, MLSyncFileContext, DetectedText, + WordGroup, } from 'types/machineLearning'; import { imageBitmapToBlob } from 'utils/image'; import { isDifferentOrOld, getAllTextFromMap } from 'utils/machineLearning'; @@ -35,19 +36,24 @@ class TextService { ); const textDetections = await syncContext.textDetectionService.detectText( - await imageBitmapToBlob(imageBitmap) + new File( + [await imageBitmapToBlob(imageBitmap)], + fileContext.enteFile.id.toString() + ) ); - // console.log('3 TF Memory stats: ', tf.memory()); - // TODO: reenable faces filtering based on width - const detectedText: DetectedText = { - fileID: fileContext.enteFile.id, - detection: textDetections, - }; + + const detectedText: DetectedText[] = textDetections.data.words.map( + ({ bbox, confidence, text }) => ({ + fileID: fileContext.enteFile.id, + detection: { bbox, confidence, word: text }, + }) + ); newMlFile.text = detectedText; - // ?.filter((f) => - // f.box.width > syncContext.config.faceDetection.minFaceSize - // ); - console.log('[MLService] Detected text: ', newMlFile.text); + console.log( + '[MLService] Detected text: ', + fileContext.enteFile.metadata.title, + newMlFile.text + ); } async getAllSyncedTextMap(syncContext: MLSyncContext) { @@ -59,32 +65,23 @@ class TextService { return syncContext.allSyncedTextMap; } - public async getAllText() { + public async clusterWords(): Promise { const allTextMap = await mlIDbStorage.getAllTextMap(); const allText = getAllTextFromMap(allTextMap); - return allText; + const textCluster = new Map(); + allText.map((text) => { + if (!textCluster.has(text.detection.word)) { + textCluster.set(text.detection.word, []); + } + const objectsInCluster = textCluster.get(text.detection.word); + objectsInCluster.push(text.fileID); + }); + return [...textCluster.entries()].map(([word, files]) => ({ + word, + files, + })); } - // public async clusterThingClasses( - // syncContext: MLSyncContext - // ): Promise { - // const allTextMap = await this.getAllSyncedTextMap(syncContext); - // const allText = getAllTextFromMap(allTextMap); - // const textCluster = new Map(); - // allObjects.map((object) => { - // if (!objectClusters.has(object.detection.class)) { - // objectClusters.set(object.detection.class, []); - // } - // const objectsInCluster = objectClusters.get(object.detection.class); - // objectsInCluster.push(object.fileID); - // }); - // return [...objectClusters.entries()].map(([className, files], id) => ({ - // id, - // className, - // files, - // })); - // } - // async syncThingClassesIndex(syncContext: MLSyncContext) { // const filesVersion = await mlIDbStorage.getIndexVersion('files'); // console.log( diff --git a/src/services/searchService.ts b/src/services/searchService.ts index b23fad087..66a9ab19f 100644 --- a/src/services/searchService.ts +++ b/src/services/searchService.ts @@ -184,11 +184,12 @@ export async function searchThing(searchPhrase: string) { } export async function searchText(searchPhrase: string) { - const texts = await textService.getAllText(); - const files = texts - .filter((text) => - text.detection.data.text.toLocaleLowerCase().includes(searchPhrase) - ) - .map(({ fileID }) => fileID); - return { text: searchPhrase, files }; + const texts = await textService.clusterWords(); + return texts + .filter((text) => text.word.toLocaleLowerCase().includes(searchPhrase)) + .map(({ word, files }) => ({ + word, + files, + })) + .slice(0, 4); } diff --git a/src/types/gallery/index.ts b/src/types/gallery/index.ts index 9f59645a1..7dd19aa29 100644 --- a/src/types/gallery/index.ts +++ b/src/types/gallery/index.ts @@ -1,7 +1,7 @@ import { SetDialogMessage } from 'components/MessageDialog'; import { Collection } from 'types/collection'; import { EnteFile } from 'types/file'; -import { Person, ThingClass } from 'types/machineLearning'; +import { Person, ThingClass, WordGroup } from 'types/machineLearning'; import { DateValue, Bbox } from 'types/search'; export type SelectedState = { @@ -20,7 +20,7 @@ export type Search = { fileIndex?: number; person?: Person; thing?: ThingClass; - text?: number[]; + text?: WordGroup; }; export interface SearchStats { resultCount: number; diff --git a/src/types/machineLearning/index.ts b/src/types/machineLearning/index.ts index 0147a2d5d..da18d70f1 100644 --- a/src/types/machineLearning/index.ts +++ b/src/types/machineLearning/index.ts @@ -205,7 +205,16 @@ export interface ThingClass { files: Array; } -export declare type TextDetection = Tesseract.RecognizeResult; +export interface WordGroup { + word: string; + files: Array; +} + +export interface TextDetection { + bbox: Tesseract.Bbox; + word: string; + confidence: number; +} export interface DetectedText { fileID: number; @@ -216,7 +225,7 @@ export interface MlFileData { fileId: number; faces?: Face[]; things?: Thing[]; - text?: DetectedText; + text?: DetectedText[]; imageSource?: ImageType; imageDimensions?: Dimensions; faceDetectionMethod?: Versioned; @@ -315,7 +324,7 @@ export interface MLSyncContext { nSyncedFaces: number; allSyncedFacesMap?: Map>; allSyncedThingsMap?: Map>; - allSyncedTextMap?: Map; + allSyncedTextMap?: Map>; tsne?: any; error?: Error; @@ -376,7 +385,7 @@ export interface ObjectDetectionService { export interface TextDetectionService { method: Versioned; // init(): Promise; - detectText(image: Blob): Promise; + detectText(image: File): Promise; dispose(): Promise; } diff --git a/src/utils/machineLearning/index.ts b/src/utils/machineLearning/index.ts index 8dc221966..467966649 100644 --- a/src/utils/machineLearning/index.ts +++ b/src/utils/machineLearning/index.ts @@ -205,8 +205,8 @@ export function getAllThingsFromMap(allObjectsMap: Map>) { return [...allObjectsMap.values()].flat(); } -export function getAllTextFromMap(allTextMap: Map) { - return [...allTextMap.values()]; +export function getAllTextFromMap(allTextMap: Map) { + return [...allTextMap.values()].flat(); } export async function getLocalFile(fileId: number) { const localFiles = await getLocalFiles(); diff --git a/src/utils/storage/mlIDbStorage.ts b/src/utils/storage/mlIDbStorage.ts index 0e283ea5d..8c6d39dff 100644 --- a/src/utils/storage/mlIDbStorage.ts +++ b/src/utils/storage/mlIDbStorage.ts @@ -315,7 +315,7 @@ class MLIDbStorage { console.time('getAllTextMap'); const db = await this.db; const allFiles = await db.getAll('files'); - const allTextMap = new Map(); + const allTextMap = new Map(); allFiles.forEach( (mlFileData) => mlFileData.text &&