From ffedf2e961d1d17cdb8cb51b79b3616965bb5001 Mon Sep 17 00:00:00 2001 From: Abhinav Date: Mon, 11 Apr 2022 14:15:55 +0530 Subject: [PATCH] move accurary filtering to tesseract Service --- .../machineLearning/tesseractService.ts | 24 ++++++++++++------- src/services/machineLearning/textService.ts | 18 +++++++------- src/types/machineLearning/index.ts | 5 ++-- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/services/machineLearning/tesseractService.ts b/src/services/machineLearning/tesseractService.ts index 172d42b85..877957fdf 100644 --- a/src/services/machineLearning/tesseractService.ts +++ b/src/services/machineLearning/tesseractService.ts @@ -7,20 +7,19 @@ import { Versioned, } from 'types/machineLearning'; -import Tesseract, { createWorker, RecognizeResult } from 'tesseract.js'; +import Tesseract, { createWorker } from 'tesseract.js'; import QueueProcessor from 'services/queueProcessor'; import { CustomError } from 'utils/error'; import { imageBitmapToBlob, resizeToSquare } from 'utils/image'; import { getFileType } from 'services/upload/readFileService'; import { FILE_TYPE } from 'constants/file'; +import { makeID } from 'utils/user'; class TesseractService implements TextDetectionService { private tesseractWorker: Tesseract.Worker; public method: Versioned; private ready: Promise; - private textDetector = new QueueProcessor< - Tesseract.RecognizeResult | Error - >(1); + private textDetector = new QueueProcessor(1); public constructor() { this.method = { value: 'Tesseract', @@ -59,8 +58,9 @@ class TesseractService implements TextDetectionService { } async detectText( - imageBitmap: ImageBitmap - ): Promise { + imageBitmap: ImageBitmap, + minAccuracy: number + ): Promise { const response = this.textDetector.queueUpRequest(async () => { const imageHeight = Math.min(imageBitmap.width, imageBitmap.height); const imageWidth = Math.max(imageBitmap.width, imageBitmap.height); @@ -102,9 +102,17 @@ class TesseractService implements TextDetectionService { } const tesseractWorker = await this.getTesseractWorker(); - const detections = await tesseractWorker.recognize(file); + const id = makeID(6); + console.time('detecting text ' + id); - return detections; + const detections = await tesseractWorker.recognize(file); + console.timeEnd('detecting text ' + id); + + const filteredDetections = detections.data.words.filter( + ({ confidence }) => confidence >= minAccuracy + ); + + return filteredDetections; }); try { return await response.promise; diff --git a/src/services/machineLearning/textService.ts b/src/services/machineLearning/textService.ts index 16b76e64d..4a1d2eca0 100644 --- a/src/services/machineLearning/textService.ts +++ b/src/services/machineLearning/textService.ts @@ -34,24 +34,22 @@ class TextService { fileContext ); - console.time('detecting text ' + fileContext.enteFile.id); const textDetections = - await syncContext.textDetectionService.detectText(imageBitmap); - console.timeEnd('detecting text ' + fileContext.enteFile.id); + await syncContext.textDetectionService.detectText( + imageBitmap, + syncContext.config.textDetection.minAccuracy + ); if (textDetections instanceof Error) { newMlFile.errorCount = 2; newMlFile.lastErrorMessage = textDetections.message; return; } - const detectedText: DetectedText[] = textDetections.data.words - .filter( - ({ confidence }) => - confidence >= syncContext.config.textDetection.minAccuracy - ) - .map(({ bbox, confidence, text }) => ({ + const detectedText: DetectedText[] = textDetections.map( + ({ bbox, confidence, text }) => ({ fileID: fileContext.enteFile.id, detection: { bbox, confidence, word: text.toLocaleLowerCase() }, - })); + }) + ); newMlFile.text = detectedText; console.log( '[MLService] Detected text: ', diff --git a/src/types/machineLearning/index.ts b/src/types/machineLearning/index.ts index 8b54fac8a..826683894 100644 --- a/src/types/machineLearning/index.ts +++ b/src/types/machineLearning/index.ts @@ -395,8 +395,9 @@ export interface TextDetectionService { method: Versioned; // init(): Promise; detectText( - imageBitmap: ImageBitmap - ): Promise; + imageBitmap: ImageBitmap, + minAccuracy: number + ): Promise; dispose(): Promise; }