move accurary filtering to tesseract Service

This commit is contained in:
Abhinav 2022-04-11 14:15:55 +05:30
parent 387f13a562
commit ffedf2e961
3 changed files with 27 additions and 20 deletions

View file

@ -7,20 +7,19 @@ import {
Versioned,
} from 'types/machineLearning';
import Tesseract, { createWorker, RecognizeResult } from 'tesseract.js';
import Tesseract, { createWorker } from 'tesseract.js';
import QueueProcessor from 'services/queueProcessor';
import { CustomError } from 'utils/error';
import { imageBitmapToBlob, resizeToSquare } from 'utils/image';
import { getFileType } from 'services/upload/readFileService';
import { FILE_TYPE } from 'constants/file';
import { makeID } from 'utils/user';
class TesseractService implements TextDetectionService {
private tesseractWorker: Tesseract.Worker;
public method: Versioned<TextDetectionMethod>;
private ready: Promise<void>;
private textDetector = new QueueProcessor<
Tesseract.RecognizeResult | Error
>(1);
private textDetector = new QueueProcessor<Tesseract.Word[] | Error>(1);
public constructor() {
this.method = {
value: 'Tesseract',
@ -59,8 +58,9 @@ class TesseractService implements TextDetectionService {
}
async detectText(
imageBitmap: ImageBitmap
): Promise<RecognizeResult | Error> {
imageBitmap: ImageBitmap,
minAccuracy: number
): Promise<Tesseract.Word[] | Error> {
const response = this.textDetector.queueUpRequest(async () => {
const imageHeight = Math.min(imageBitmap.width, imageBitmap.height);
const imageWidth = Math.max(imageBitmap.width, imageBitmap.height);
@ -102,9 +102,17 @@ class TesseractService implements TextDetectionService {
}
const tesseractWorker = await this.getTesseractWorker();
const detections = await tesseractWorker.recognize(file);
const id = makeID(6);
console.time('detecting text ' + id);
return detections;
const detections = await tesseractWorker.recognize(file);
console.timeEnd('detecting text ' + id);
const filteredDetections = detections.data.words.filter(
({ confidence }) => confidence >= minAccuracy
);
return filteredDetections;
});
try {
return await response.promise;

View file

@ -34,24 +34,22 @@ class TextService {
fileContext
);
console.time('detecting text ' + fileContext.enteFile.id);
const textDetections =
await syncContext.textDetectionService.detectText(imageBitmap);
console.timeEnd('detecting text ' + fileContext.enteFile.id);
await syncContext.textDetectionService.detectText(
imageBitmap,
syncContext.config.textDetection.minAccuracy
);
if (textDetections instanceof Error) {
newMlFile.errorCount = 2;
newMlFile.lastErrorMessage = textDetections.message;
return;
}
const detectedText: DetectedText[] = textDetections.data.words
.filter(
({ confidence }) =>
confidence >= syncContext.config.textDetection.minAccuracy
)
.map(({ bbox, confidence, text }) => ({
const detectedText: DetectedText[] = textDetections.map(
({ bbox, confidence, text }) => ({
fileID: fileContext.enteFile.id,
detection: { bbox, confidence, word: text.toLocaleLowerCase() },
}));
})
);
newMlFile.text = detectedText;
console.log(
'[MLService] Detected text: ',

View file

@ -395,8 +395,9 @@ export interface TextDetectionService {
method: Versioned<TextDetectionMethod>;
// init(): Promise<void>;
detectText(
imageBitmap: ImageBitmap
): Promise<Tesseract.RecognizeResult | Error>;
imageBitmap: ImageBitmap,
minAccuracy: number
): Promise<Tesseract.Word[] | Error>;
dispose(): Promise<void>;
}