Add OCR, add configuration

This commit is contained in:
reis
2022-11-11 17:29:15 +00:00
parent 9962e4fe68
commit d70fcde227
7 changed files with 146 additions and 71 deletions

View File

@@ -1,16 +1,15 @@
import { serialize } from 'bson';
export default async function runAI(client, data, predict) {
const predictions = await predict(data.text);
export default async function runAI(client, data) {
const predictions = await global.ft.predict(data.text);
const jsonData = {
event: 'ai_response',
op: 2,
id: data.id,
predictions
};
console.log(predictions)
const bsonData = serialize(jsonData);
client.pipe(bsonData);
client.write(bsonData);
return;
}

View File

@@ -1,7 +1,9 @@
import runAI from './ai.js';
import trainAI from './trainAI.js';
import runOCR from './ocr.js';
export {
runAI,
trainAI
trainAI,
runOCR
}

23
server/events/ocr.js Normal file
View File

@@ -0,0 +1,23 @@
import { recognize } from 'node-tesseract-ocr';
import { serialize } from 'bson';
export default async function runOCR(client, eventData) {
const config = {
lang: 'eng',
oem: 3,
psm: 3,
};
const ocrText = await recognize(eventData.url, config);
const jsonData = {
op: 6,
id: eventData.id,
ocrText
};
const bsonData = serialize(jsonData);
client.write(bsonData);
return;
}

View File

@@ -1,51 +1,53 @@
import FastText from 'fasttext.js';
const ft = new FastText({
train: {
// number of concurrent threads
thread: 8,
// verbosity level [2]
verbose: 4,
// number of negatives sampled [5]
neg: 7,
// loss function {ns, hs, softmax} [ns]
loss: 'ns',
// learning rate [0.05]
lr: 1,
// change the rate of updates for the learning rate [100]
lrUpdateRate: 1000,
// max length of word ngram [1]
wordNgrams: 5,
// minimal number of word occurences
minCount: 1,
// minimal number of word occurences
minCountLabel: 1,
// size of word vectors [100]
dim: 100,
// size of the context window [5]
ws: 5,
// number of epochs [5]
epoch: 20,
// number of buckets [2000000]
bucket: 2000000,
// min length of char ngram [3]
minn: process.env.TRAIN_MINN || 3,
// max length of char ngram [6]
maxn: process.env.TRAIN_MAXN || 6,
// sampling threshold [0.0001]
t: 0.0001,
// load pre trained word vectors from unsupervised model
pretrainedVectors: ''
},
serializeTo: '/workspaces/revanced-helper/server/model/model',
trainFile: '/workspaces/revanced-helper/server/model/train.tsv',
});
import { join } from 'node:path';
export default async function trainAI(unload, load) {
//unload();
export default async function trainAI() {
const ft = new FastText({
train: {
// number of concurrent threads
thread: 8,
// verbosity level [2]
verbose: 4,
// number of negatives sampled [5]
neg: 7,
// loss function {ns, hs, softmax} [ns]
loss: 'ns',
// learning rate [0.05]
lr: 1,
// change the rate of updates for the learning rate [100]
lrUpdateRate: 1000,
// max length of word ngram [1]
wordNgrams: 5,
// minimal number of word occurences
minCount: 1,
// minimal number of word occurences
minCountLabel: 1,
// size of word vectors [100]
dim: 100,
// size of the context window [5]
ws: 5,
// number of epochs [5]
epoch: 20,
// number of buckets [2000000]
bucket: 2000000,
// min length of char ngram [3]
minn: process.env.TRAIN_MINN || 3,
// max length of char ngram [6]
maxn: process.env.TRAIN_MAXN || 6,
// sampling threshold [0.0001]
t: 0.0001,
// load pre trained word vectors from unsupervised model
pretrainedVectors: ''
},
serializeTo: join(global.__dirname, global.config.fasttext.loadModel).replace('.bin', ''),
trainFile: join(global.__dirname, global.config.fasttext.trainFile),
});
global.ft.unload();
await ft.train()
// load();
global.ft.load();
return;
}