move accurary filtering to tesseract Service
This commit is contained in:
parent
387f13a562
commit
ffedf2e961
|
@ -7,20 +7,19 @@ import {
|
||||||
Versioned,
|
Versioned,
|
||||||
} from 'types/machineLearning';
|
} from 'types/machineLearning';
|
||||||
|
|
||||||
import Tesseract, { createWorker, RecognizeResult } from 'tesseract.js';
|
import Tesseract, { createWorker } from 'tesseract.js';
|
||||||
import QueueProcessor from 'services/queueProcessor';
|
import QueueProcessor from 'services/queueProcessor';
|
||||||
import { CustomError } from 'utils/error';
|
import { CustomError } from 'utils/error';
|
||||||
import { imageBitmapToBlob, resizeToSquare } from 'utils/image';
|
import { imageBitmapToBlob, resizeToSquare } from 'utils/image';
|
||||||
import { getFileType } from 'services/upload/readFileService';
|
import { getFileType } from 'services/upload/readFileService';
|
||||||
import { FILE_TYPE } from 'constants/file';
|
import { FILE_TYPE } from 'constants/file';
|
||||||
|
import { makeID } from 'utils/user';
|
||||||
|
|
||||||
class TesseractService implements TextDetectionService {
|
class TesseractService implements TextDetectionService {
|
||||||
private tesseractWorker: Tesseract.Worker;
|
private tesseractWorker: Tesseract.Worker;
|
||||||
public method: Versioned<TextDetectionMethod>;
|
public method: Versioned<TextDetectionMethod>;
|
||||||
private ready: Promise<void>;
|
private ready: Promise<void>;
|
||||||
private textDetector = new QueueProcessor<
|
private textDetector = new QueueProcessor<Tesseract.Word[] | Error>(1);
|
||||||
Tesseract.RecognizeResult | Error
|
|
||||||
>(1);
|
|
||||||
public constructor() {
|
public constructor() {
|
||||||
this.method = {
|
this.method = {
|
||||||
value: 'Tesseract',
|
value: 'Tesseract',
|
||||||
|
@ -59,8 +58,9 @@ class TesseractService implements TextDetectionService {
|
||||||
}
|
}
|
||||||
|
|
||||||
async detectText(
|
async detectText(
|
||||||
imageBitmap: ImageBitmap
|
imageBitmap: ImageBitmap,
|
||||||
): Promise<RecognizeResult | Error> {
|
minAccuracy: number
|
||||||
|
): Promise<Tesseract.Word[] | Error> {
|
||||||
const response = this.textDetector.queueUpRequest(async () => {
|
const response = this.textDetector.queueUpRequest(async () => {
|
||||||
const imageHeight = Math.min(imageBitmap.width, imageBitmap.height);
|
const imageHeight = Math.min(imageBitmap.width, imageBitmap.height);
|
||||||
const imageWidth = Math.max(imageBitmap.width, imageBitmap.height);
|
const imageWidth = Math.max(imageBitmap.width, imageBitmap.height);
|
||||||
|
@ -102,9 +102,17 @@ class TesseractService implements TextDetectionService {
|
||||||
}
|
}
|
||||||
|
|
||||||
const tesseractWorker = await this.getTesseractWorker();
|
const tesseractWorker = await this.getTesseractWorker();
|
||||||
const detections = await tesseractWorker.recognize(file);
|
const id = makeID(6);
|
||||||
|
console.time('detecting text ' + id);
|
||||||
|
|
||||||
return detections;
|
const detections = await tesseractWorker.recognize(file);
|
||||||
|
console.timeEnd('detecting text ' + id);
|
||||||
|
|
||||||
|
const filteredDetections = detections.data.words.filter(
|
||||||
|
({ confidence }) => confidence >= minAccuracy
|
||||||
|
);
|
||||||
|
|
||||||
|
return filteredDetections;
|
||||||
});
|
});
|
||||||
try {
|
try {
|
||||||
return await response.promise;
|
return await response.promise;
|
||||||
|
|
|
@ -34,24 +34,22 @@ class TextService {
|
||||||
fileContext
|
fileContext
|
||||||
);
|
);
|
||||||
|
|
||||||
console.time('detecting text ' + fileContext.enteFile.id);
|
|
||||||
const textDetections =
|
const textDetections =
|
||||||
await syncContext.textDetectionService.detectText(imageBitmap);
|
await syncContext.textDetectionService.detectText(
|
||||||
console.timeEnd('detecting text ' + fileContext.enteFile.id);
|
imageBitmap,
|
||||||
|
syncContext.config.textDetection.minAccuracy
|
||||||
|
);
|
||||||
if (textDetections instanceof Error) {
|
if (textDetections instanceof Error) {
|
||||||
newMlFile.errorCount = 2;
|
newMlFile.errorCount = 2;
|
||||||
newMlFile.lastErrorMessage = textDetections.message;
|
newMlFile.lastErrorMessage = textDetections.message;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const detectedText: DetectedText[] = textDetections.data.words
|
const detectedText: DetectedText[] = textDetections.map(
|
||||||
.filter(
|
({ bbox, confidence, text }) => ({
|
||||||
({ confidence }) =>
|
|
||||||
confidence >= syncContext.config.textDetection.minAccuracy
|
|
||||||
)
|
|
||||||
.map(({ bbox, confidence, text }) => ({
|
|
||||||
fileID: fileContext.enteFile.id,
|
fileID: fileContext.enteFile.id,
|
||||||
detection: { bbox, confidence, word: text.toLocaleLowerCase() },
|
detection: { bbox, confidence, word: text.toLocaleLowerCase() },
|
||||||
}));
|
})
|
||||||
|
);
|
||||||
newMlFile.text = detectedText;
|
newMlFile.text = detectedText;
|
||||||
console.log(
|
console.log(
|
||||||
'[MLService] Detected text: ',
|
'[MLService] Detected text: ',
|
||||||
|
|
|
@ -395,8 +395,9 @@ export interface TextDetectionService {
|
||||||
method: Versioned<TextDetectionMethod>;
|
method: Versioned<TextDetectionMethod>;
|
||||||
// init(): Promise<void>;
|
// init(): Promise<void>;
|
||||||
detectText(
|
detectText(
|
||||||
imageBitmap: ImageBitmap
|
imageBitmap: ImageBitmap,
|
||||||
): Promise<Tesseract.RecognizeResult | Error>;
|
minAccuracy: number
|
||||||
|
): Promise<Tesseract.Word[] | Error>;
|
||||||
dispose(): Promise<void>;
|
dispose(): Promise<void>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue