mirror of
https://github.com/ReVanced/revanced-bots.git
synced 2026-01-11 13:56:15 +00:00
Add OCR, add configuration
This commit is contained in:
@@ -8,7 +8,7 @@ Sending the server this JSON (BSON) will send you back the AI predictions.
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "ai",
|
||||
"op": 1,
|
||||
"id": "String",
|
||||
"text": "How do i download ReVanced?"
|
||||
}
|
||||
@@ -18,7 +18,7 @@ And the server would return something like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "ai_response",
|
||||
"op": 2,
|
||||
"id": "String",
|
||||
"predictions": [
|
||||
{
|
||||
@@ -29,17 +29,13 @@ And the server would return something like this:
|
||||
}
|
||||
```
|
||||
|
||||
# OCR
|
||||
|
||||
Soon:tm:
|
||||
|
||||
# Training the AI
|
||||
|
||||
To add data to the train data, send a BSON (JSON) like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "add_train_data",
|
||||
"op": 3,
|
||||
"label": "FALSEPOSITIVE",
|
||||
"text": "how"
|
||||
}
|
||||
@@ -49,6 +45,28 @@ To train the AI and to re-load it, send this BSON (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "train_ai"
|
||||
"event": 4
|
||||
}
|
||||
```
|
||||
|
||||
# OCR
|
||||
|
||||
Sending the server this JSON (BSON) will send you back the read text.
|
||||
|
||||
```json
|
||||
{
|
||||
"op": 5,
|
||||
"id": "String",
|
||||
"url": "https://cdn.discordapp.com/attachments/1033338556493606963/1033338557231796224/Screenshot_20221022-121318.jpg"
|
||||
}
|
||||
```
|
||||
|
||||
And the server would return something like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"op": 6,
|
||||
"id": "String",
|
||||
"ocrText": "..."
|
||||
}
|
||||
```
|
||||
12
server/config.json
Normal file
12
server/config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"server": {
|
||||
"port": 3000
|
||||
},
|
||||
|
||||
"fasttext": {
|
||||
"bin": "./model/fastText/fasttext",
|
||||
"loadModel": "./model/model.bin",
|
||||
"trainFile": "./model/train.tsv",
|
||||
"debug": true
|
||||
}
|
||||
}
|
||||
@@ -1,16 +1,15 @@
|
||||
import { serialize } from 'bson';
|
||||
|
||||
export default async function runAI(client, data, predict) {
|
||||
const predictions = await predict(data.text);
|
||||
export default async function runAI(client, data) {
|
||||
const predictions = await global.ft.predict(data.text);
|
||||
const jsonData = {
|
||||
event: 'ai_response',
|
||||
op: 2,
|
||||
id: data.id,
|
||||
predictions
|
||||
};
|
||||
|
||||
console.log(predictions)
|
||||
const bsonData = serialize(jsonData);
|
||||
|
||||
client.pipe(bsonData);
|
||||
client.write(bsonData);
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
import runAI from './ai.js';
|
||||
import trainAI from './trainAI.js';
|
||||
import runOCR from './ocr.js';
|
||||
|
||||
export {
|
||||
runAI,
|
||||
trainAI
|
||||
trainAI,
|
||||
runOCR
|
||||
}
|
||||
23
server/events/ocr.js
Normal file
23
server/events/ocr.js
Normal file
@@ -0,0 +1,23 @@
|
||||
import { recognize } from 'node-tesseract-ocr';
|
||||
import { serialize } from 'bson';
|
||||
|
||||
export default async function runOCR(client, eventData) {
|
||||
const config = {
|
||||
lang: 'eng',
|
||||
oem: 3,
|
||||
psm: 3,
|
||||
};
|
||||
|
||||
const ocrText = await recognize(eventData.url, config);
|
||||
|
||||
const jsonData = {
|
||||
op: 6,
|
||||
id: eventData.id,
|
||||
ocrText
|
||||
};
|
||||
|
||||
const bsonData = serialize(jsonData);
|
||||
client.write(bsonData);
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -1,51 +1,53 @@
|
||||
import FastText from 'fasttext.js';
|
||||
const ft = new FastText({
|
||||
train: {
|
||||
// number of concurrent threads
|
||||
thread: 8,
|
||||
// verbosity level [2]
|
||||
verbose: 4,
|
||||
// number of negatives sampled [5]
|
||||
neg: 7,
|
||||
// loss function {ns, hs, softmax} [ns]
|
||||
loss: 'ns',
|
||||
// learning rate [0.05]
|
||||
lr: 1,
|
||||
// change the rate of updates for the learning rate [100]
|
||||
lrUpdateRate: 1000,
|
||||
// max length of word ngram [1]
|
||||
wordNgrams: 5,
|
||||
// minimal number of word occurences
|
||||
minCount: 1,
|
||||
// minimal number of word occurences
|
||||
minCountLabel: 1,
|
||||
// size of word vectors [100]
|
||||
dim: 100,
|
||||
// size of the context window [5]
|
||||
ws: 5,
|
||||
// number of epochs [5]
|
||||
epoch: 20,
|
||||
// number of buckets [2000000]
|
||||
bucket: 2000000,
|
||||
// min length of char ngram [3]
|
||||
minn: process.env.TRAIN_MINN || 3,
|
||||
// max length of char ngram [6]
|
||||
maxn: process.env.TRAIN_MAXN || 6,
|
||||
// sampling threshold [0.0001]
|
||||
t: 0.0001,
|
||||
// load pre trained word vectors from unsupervised model
|
||||
pretrainedVectors: ''
|
||||
},
|
||||
serializeTo: '/workspaces/revanced-helper/server/model/model',
|
||||
trainFile: '/workspaces/revanced-helper/server/model/train.tsv',
|
||||
});
|
||||
import { join } from 'node:path';
|
||||
|
||||
export default async function trainAI(unload, load) {
|
||||
//unload();
|
||||
export default async function trainAI() {
|
||||
const ft = new FastText({
|
||||
train: {
|
||||
// number of concurrent threads
|
||||
thread: 8,
|
||||
// verbosity level [2]
|
||||
verbose: 4,
|
||||
// number of negatives sampled [5]
|
||||
neg: 7,
|
||||
// loss function {ns, hs, softmax} [ns]
|
||||
loss: 'ns',
|
||||
// learning rate [0.05]
|
||||
lr: 1,
|
||||
// change the rate of updates for the learning rate [100]
|
||||
lrUpdateRate: 1000,
|
||||
// max length of word ngram [1]
|
||||
wordNgrams: 5,
|
||||
// minimal number of word occurences
|
||||
minCount: 1,
|
||||
// minimal number of word occurences
|
||||
minCountLabel: 1,
|
||||
// size of word vectors [100]
|
||||
dim: 100,
|
||||
// size of the context window [5]
|
||||
ws: 5,
|
||||
// number of epochs [5]
|
||||
epoch: 20,
|
||||
// number of buckets [2000000]
|
||||
bucket: 2000000,
|
||||
// min length of char ngram [3]
|
||||
minn: process.env.TRAIN_MINN || 3,
|
||||
// max length of char ngram [6]
|
||||
maxn: process.env.TRAIN_MAXN || 6,
|
||||
// sampling threshold [0.0001]
|
||||
t: 0.0001,
|
||||
// load pre trained word vectors from unsupervised model
|
||||
pretrainedVectors: ''
|
||||
},
|
||||
serializeTo: join(global.__dirname, global.config.fasttext.loadModel).replace('.bin', ''),
|
||||
trainFile: join(global.__dirname, global.config.fasttext.trainFile),
|
||||
});
|
||||
|
||||
global.ft.unload();
|
||||
|
||||
await ft.train()
|
||||
|
||||
// load();
|
||||
global.ft.load();
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -1,31 +1,50 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
// Fix __dirname not being defined in ES modules. (https://stackoverflow.com/a/64383997)
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dirname, join } from 'node:path';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
global.__dirname = dirname(__filename);
|
||||
|
||||
const configJSON = readFileSync('./config.json', 'utf-8');
|
||||
const config = JSON.parse(configJSON);
|
||||
global.config = config;
|
||||
console.log(config);
|
||||
import { createServer } from 'node:net';
|
||||
import { deserialize } from 'bson';
|
||||
import FastText from 'fasttext.js';
|
||||
import { runAI, trainAI } from './events/index.js';
|
||||
import { runAI, trainAI, runOCR } from './events/index.js';
|
||||
|
||||
const ft = new FastText({
|
||||
loadModel: './model/model.bin'
|
||||
});
|
||||
const ft = new FastText(global.config.fasttext);
|
||||
|
||||
ft.load();
|
||||
|
||||
// I'm sorry. This is probably the only solution.
|
||||
global.ft = ft;
|
||||
|
||||
const server = createServer(async (client) => {
|
||||
client.on('data', async (data) => {
|
||||
const eventData = deserialize(data);
|
||||
|
||||
switch(eventData.event) {
|
||||
case 'ai': {
|
||||
runAI(client, eventData, ft.predict);
|
||||
switch(eventData.op) {
|
||||
case 1: {
|
||||
runAI(client, eventData);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
case 'train_ai': {
|
||||
trainAI(ft.unload, ft.load);
|
||||
case 4: {
|
||||
trainAI();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
case 5: {
|
||||
runOCR(client, eventData);
|
||||
break;
|
||||
};
|
||||
};
|
||||
|
||||
});
|
||||
});
|
||||
|
||||
server.listen(process.env.PORT || 3000);
|
||||
server.listen(global.config.server.port || 3000);
|
||||
Reference in New Issue
Block a user