parrellelize text detection

This commit is contained in:
Abhinav 2022-04-18 12:22:44 +05:30
parent ca2c519e32
commit f5b0b34442

View file

@ -19,11 +19,16 @@ import {
} from 'constants/machineLearning/config'; } from 'constants/machineLearning/config';
import { promiseWithTimeout } from 'utils/common/promiseTimeout'; import { promiseWithTimeout } from 'utils/common/promiseTimeout';
const TESSERACT_MAX_CONCURRENT_PROCESSES = 4;
class TesseractService implements TextDetectionService { class TesseractService implements TextDetectionService {
private tesseractWorker: Tesseract.Worker;
public method: Versioned<TextDetectionMethod>; public method: Versioned<TextDetectionMethod>;
private ready: Promise<void>; private ready: Promise<void>;
private textDetector = new QueueProcessor<Tesseract.Word[] | Error>(1); private textDetector = new QueueProcessor<Tesseract.Word[] | Error>(
TESSERACT_MAX_CONCURRENT_PROCESSES
);
private tesseractWorkerPool = new Array<Tesseract.Worker>(
TESSERACT_MAX_CONCURRENT_PROCESSES
);
public constructor() { public constructor() {
this.method = { this.method = {
value: 'Tesseract', value: 'Tesseract',
@ -31,16 +36,16 @@ class TesseractService implements TextDetectionService {
}; };
} }
private async init() { private async createTesseractWorker() {
this.tesseractWorker = createWorker({ const tesseractWorker = createWorker({
workerBlobURL: false, workerBlobURL: false,
workerPath: '/js/tesseract/worker.min.js', workerPath: '/js/tesseract/worker.min.js',
corePath: '/js/tesseract/tesseract-core.wasm.js', corePath: '/js/tesseract/tesseract-core.wasm.js',
}); });
await this.tesseractWorker.load(); await tesseractWorker.load();
await this.tesseractWorker.loadLanguage('eng'); await tesseractWorker.loadLanguage('eng');
await this.tesseractWorker.initialize('eng'); await tesseractWorker.initialize('eng');
await this.tesseractWorker.setParameters({ await tesseractWorker.setParameters({
tessedit_char_whitelist: tessedit_char_whitelist:
'0123456789' + '0123456789' +
'abcdefghijklmnopqrstuvwxyz' + 'abcdefghijklmnopqrstuvwxyz' +
@ -48,15 +53,27 @@ class TesseractService implements TextDetectionService {
' ', ' ',
preserve_interword_spaces: '1', preserve_interword_spaces: '1',
}); });
console.log('loaded tesseract worker'); return tesseractWorker;
}
private async init() {
for (let i = 0; i < TESSERACT_MAX_CONCURRENT_PROCESSES; i++) {
this.tesseractWorkerPool[i] = await this.createTesseractWorker();
console.log('loaded tesseract worker no', i);
}
console.log('loaded tesseract worker pool');
} }
private async getTesseractWorker() { private async getTesseractWorker() {
if (!this.tesseractWorker) { if (!this.ready && typeof this.tesseractWorkerPool[0] === 'undefined') {
this.ready = this.init(); this.ready = this.init();
} }
await this.ready; await this.ready;
return this.tesseractWorker; return this.tesseractWorkerPool.shift();
}
private releaseWorker(tesseractWorker: Tesseract.Worker) {
this.tesseractWorkerPool.push(tesseractWorker);
} }
async detectText( async detectText(
@ -126,7 +143,7 @@ class TesseractService implements TextDetectionService {
); );
} }
const tesseractWorker = await this.getTesseractWorker(); let tesseractWorker = await this.getTesseractWorker();
const id = makeID(6); const id = makeID(6);
console.log( console.log(
`detecting text (${imageBitmap.width}px,${imageBitmap.height}px) fileType=${fileTypeInfo.exactType}` `detecting text (${imageBitmap.width}px,${imageBitmap.height}px) fileType=${fileTypeInfo.exactType}`
@ -144,16 +161,22 @@ class TesseractService implements TextDetectionService {
return filteredDetections; return filteredDetections;
} catch (e) { } catch (e) {
if (e.message === CustomError.WAIT_TIME_EXCEEDED) { if (e.message === CustomError.WAIT_TIME_EXCEEDED) {
this.dispose(); tesseractWorker?.terminate();
tesseractWorker = await this.createTesseractWorker();
} }
throw e; throw e;
} finally {
this.releaseWorker(tesseractWorker);
} }
}; };
public replaceWorkerWithNewOne() {}
public async dispose() { public async dispose() {
const tesseractWorker = await this.getTesseractWorker(); for (let i = 0; i < TESSERACT_MAX_CONCURRENT_PROCESSES; i++) {
tesseractWorker?.terminate(); this.tesseractWorkerPool[i]?.terminate();
this.tesseractWorker = null; this.tesseractWorkerPool[i] = undefined;
}
} }
} }