move accurary filtering to tesseract Service

This commit is contained in:
Abhinav 2022-04-11 14:15:55 +05:30
parent 387f13a562
commit ffedf2e961
3 changed files with 27 additions and 20 deletions

View file

@ -7,20 +7,19 @@ import {
Versioned, Versioned,
} from 'types/machineLearning'; } from 'types/machineLearning';
import Tesseract, { createWorker, RecognizeResult } from 'tesseract.js'; import Tesseract, { createWorker } from 'tesseract.js';
import QueueProcessor from 'services/queueProcessor'; import QueueProcessor from 'services/queueProcessor';
import { CustomError } from 'utils/error'; import { CustomError } from 'utils/error';
import { imageBitmapToBlob, resizeToSquare } from 'utils/image'; import { imageBitmapToBlob, resizeToSquare } from 'utils/image';
import { getFileType } from 'services/upload/readFileService'; import { getFileType } from 'services/upload/readFileService';
import { FILE_TYPE } from 'constants/file'; import { FILE_TYPE } from 'constants/file';
import { makeID } from 'utils/user';
class TesseractService implements TextDetectionService { class TesseractService implements TextDetectionService {
private tesseractWorker: Tesseract.Worker; private tesseractWorker: Tesseract.Worker;
public method: Versioned<TextDetectionMethod>; public method: Versioned<TextDetectionMethod>;
private ready: Promise<void>; private ready: Promise<void>;
private textDetector = new QueueProcessor< private textDetector = new QueueProcessor<Tesseract.Word[] | Error>(1);
Tesseract.RecognizeResult | Error
>(1);
public constructor() { public constructor() {
this.method = { this.method = {
value: 'Tesseract', value: 'Tesseract',
@ -59,8 +58,9 @@ class TesseractService implements TextDetectionService {
} }
async detectText( async detectText(
imageBitmap: ImageBitmap imageBitmap: ImageBitmap,
): Promise<RecognizeResult | Error> { minAccuracy: number
): Promise<Tesseract.Word[] | Error> {
const response = this.textDetector.queueUpRequest(async () => { const response = this.textDetector.queueUpRequest(async () => {
const imageHeight = Math.min(imageBitmap.width, imageBitmap.height); const imageHeight = Math.min(imageBitmap.width, imageBitmap.height);
const imageWidth = Math.max(imageBitmap.width, imageBitmap.height); const imageWidth = Math.max(imageBitmap.width, imageBitmap.height);
@ -102,9 +102,17 @@ class TesseractService implements TextDetectionService {
} }
const tesseractWorker = await this.getTesseractWorker(); const tesseractWorker = await this.getTesseractWorker();
const detections = await tesseractWorker.recognize(file); const id = makeID(6);
console.time('detecting text ' + id);
return detections; const detections = await tesseractWorker.recognize(file);
console.timeEnd('detecting text ' + id);
const filteredDetections = detections.data.words.filter(
({ confidence }) => confidence >= minAccuracy
);
return filteredDetections;
}); });
try { try {
return await response.promise; return await response.promise;

View file

@ -34,24 +34,22 @@ class TextService {
fileContext fileContext
); );
console.time('detecting text ' + fileContext.enteFile.id);
const textDetections = const textDetections =
await syncContext.textDetectionService.detectText(imageBitmap); await syncContext.textDetectionService.detectText(
console.timeEnd('detecting text ' + fileContext.enteFile.id); imageBitmap,
syncContext.config.textDetection.minAccuracy
);
if (textDetections instanceof Error) { if (textDetections instanceof Error) {
newMlFile.errorCount = 2; newMlFile.errorCount = 2;
newMlFile.lastErrorMessage = textDetections.message; newMlFile.lastErrorMessage = textDetections.message;
return; return;
} }
const detectedText: DetectedText[] = textDetections.data.words const detectedText: DetectedText[] = textDetections.map(
.filter( ({ bbox, confidence, text }) => ({
({ confidence }) =>
confidence >= syncContext.config.textDetection.minAccuracy
)
.map(({ bbox, confidence, text }) => ({
fileID: fileContext.enteFile.id, fileID: fileContext.enteFile.id,
detection: { bbox, confidence, word: text.toLocaleLowerCase() }, detection: { bbox, confidence, word: text.toLocaleLowerCase() },
})); })
);
newMlFile.text = detectedText; newMlFile.text = detectedText;
console.log( console.log(
'[MLService] Detected text: ', '[MLService] Detected text: ',

View file

@ -395,8 +395,9 @@ export interface TextDetectionService {
method: Versioned<TextDetectionMethod>; method: Versioned<TextDetectionMethod>;
// init(): Promise<void>; // init(): Promise<void>;
detectText( detectText(
imageBitmap: ImageBitmap imageBitmap: ImageBitmap,
): Promise<Tesseract.RecognizeResult | Error>; minAccuracy: number
): Promise<Tesseract.Word[] | Error>;
dispose(): Promise<void>; dispose(): Promise<void>;
} }