move accurary filtering to tesseract Service
This commit is contained in:
parent
387f13a562
commit
ffedf2e961
|
@ -7,20 +7,19 @@ import {
|
|||
Versioned,
|
||||
} from 'types/machineLearning';
|
||||
|
||||
import Tesseract, { createWorker, RecognizeResult } from 'tesseract.js';
|
||||
import Tesseract, { createWorker } from 'tesseract.js';
|
||||
import QueueProcessor from 'services/queueProcessor';
|
||||
import { CustomError } from 'utils/error';
|
||||
import { imageBitmapToBlob, resizeToSquare } from 'utils/image';
|
||||
import { getFileType } from 'services/upload/readFileService';
|
||||
import { FILE_TYPE } from 'constants/file';
|
||||
import { makeID } from 'utils/user';
|
||||
|
||||
class TesseractService implements TextDetectionService {
|
||||
private tesseractWorker: Tesseract.Worker;
|
||||
public method: Versioned<TextDetectionMethod>;
|
||||
private ready: Promise<void>;
|
||||
private textDetector = new QueueProcessor<
|
||||
Tesseract.RecognizeResult | Error
|
||||
>(1);
|
||||
private textDetector = new QueueProcessor<Tesseract.Word[] | Error>(1);
|
||||
public constructor() {
|
||||
this.method = {
|
||||
value: 'Tesseract',
|
||||
|
@ -59,8 +58,9 @@ class TesseractService implements TextDetectionService {
|
|||
}
|
||||
|
||||
async detectText(
|
||||
imageBitmap: ImageBitmap
|
||||
): Promise<RecognizeResult | Error> {
|
||||
imageBitmap: ImageBitmap,
|
||||
minAccuracy: number
|
||||
): Promise<Tesseract.Word[] | Error> {
|
||||
const response = this.textDetector.queueUpRequest(async () => {
|
||||
const imageHeight = Math.min(imageBitmap.width, imageBitmap.height);
|
||||
const imageWidth = Math.max(imageBitmap.width, imageBitmap.height);
|
||||
|
@ -102,9 +102,17 @@ class TesseractService implements TextDetectionService {
|
|||
}
|
||||
|
||||
const tesseractWorker = await this.getTesseractWorker();
|
||||
const detections = await tesseractWorker.recognize(file);
|
||||
const id = makeID(6);
|
||||
console.time('detecting text ' + id);
|
||||
|
||||
return detections;
|
||||
const detections = await tesseractWorker.recognize(file);
|
||||
console.timeEnd('detecting text ' + id);
|
||||
|
||||
const filteredDetections = detections.data.words.filter(
|
||||
({ confidence }) => confidence >= minAccuracy
|
||||
);
|
||||
|
||||
return filteredDetections;
|
||||
});
|
||||
try {
|
||||
return await response.promise;
|
||||
|
|
|
@ -34,24 +34,22 @@ class TextService {
|
|||
fileContext
|
||||
);
|
||||
|
||||
console.time('detecting text ' + fileContext.enteFile.id);
|
||||
const textDetections =
|
||||
await syncContext.textDetectionService.detectText(imageBitmap);
|
||||
console.timeEnd('detecting text ' + fileContext.enteFile.id);
|
||||
await syncContext.textDetectionService.detectText(
|
||||
imageBitmap,
|
||||
syncContext.config.textDetection.minAccuracy
|
||||
);
|
||||
if (textDetections instanceof Error) {
|
||||
newMlFile.errorCount = 2;
|
||||
newMlFile.lastErrorMessage = textDetections.message;
|
||||
return;
|
||||
}
|
||||
const detectedText: DetectedText[] = textDetections.data.words
|
||||
.filter(
|
||||
({ confidence }) =>
|
||||
confidence >= syncContext.config.textDetection.minAccuracy
|
||||
)
|
||||
.map(({ bbox, confidence, text }) => ({
|
||||
const detectedText: DetectedText[] = textDetections.map(
|
||||
({ bbox, confidence, text }) => ({
|
||||
fileID: fileContext.enteFile.id,
|
||||
detection: { bbox, confidence, word: text.toLocaleLowerCase() },
|
||||
}));
|
||||
})
|
||||
);
|
||||
newMlFile.text = detectedText;
|
||||
console.log(
|
||||
'[MLService] Detected text: ',
|
||||
|
|
|
@ -395,8 +395,9 @@ export interface TextDetectionService {
|
|||
method: Versioned<TextDetectionMethod>;
|
||||
// init(): Promise<void>;
|
||||
detectText(
|
||||
imageBitmap: ImageBitmap
|
||||
): Promise<Tesseract.RecognizeResult | Error>;
|
||||
imageBitmap: ImageBitmap,
|
||||
minAccuracy: number
|
||||
): Promise<Tesseract.Word[] | Error>;
|
||||
dispose(): Promise<void>;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue