mirror of
https://github.com/ReVanced/revanced-bots.git
synced 2026-01-24 03:31:04 +00:00
feat: GODEL AI
This commit is contained in:
@@ -20,32 +20,7 @@ And the server would return something like this:
|
||||
{
|
||||
"op": 2,
|
||||
"id": "String",
|
||||
"predictions": [
|
||||
{
|
||||
"label": "DOWNLOAD",
|
||||
"score": "1"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
# Training the AI
|
||||
|
||||
To add data to the train data, send a BSON (JSON) like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"op": 3,
|
||||
"label": "FALSEPOSITIVE",
|
||||
"text": "how"
|
||||
}
|
||||
```
|
||||
|
||||
To train the AI and to re-load it, send this BSON (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"event": 4
|
||||
"response": "I think the term afn is just a generic slang term for the app that allows you to modify the behavior of Dalvik based android application..."
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -2,11 +2,13 @@
|
||||
"server": {
|
||||
"port": 3000
|
||||
},
|
||||
|
||||
"fasttext": {
|
||||
"bin": "./model/fastText/fasttext",
|
||||
"loadModel": "./model/model.bin",
|
||||
"trainFile": "./model/train.tsv",
|
||||
"debug": true
|
||||
|
||||
"transformers": {
|
||||
"model": "./model.onnx",
|
||||
"tokenizer": "./tokenizer.json",
|
||||
"instruction": "Instruction: given a dialog context and related knowledge, you need to answer the question based on the knowledge.",
|
||||
"knowledge": [
|
||||
"ReVanced is a generic patcher that allows you to modify the behavior of any Dalvik based Android application"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
import { readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
export default function addTrainData(eventData, __dirname, config) {
|
||||
const file = readFileSync(
|
||||
join(__dirname, config.fasttext.trainFile),
|
||||
'utf-8'
|
||||
);
|
||||
const { label, text } = eventData;
|
||||
|
||||
if (file.includes(text)) return;
|
||||
|
||||
const data = file.split('\n');
|
||||
|
||||
const labelIndex = data.findIndex((data) => data.startsWith(label));
|
||||
|
||||
data.splice(labelIndex === -1 ? 0 : labelIndex, 0, `${label} ${text}`);
|
||||
|
||||
writeFileSync(join(__dirname, config.fasttext.trainFile), data.join('\n'));
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -1,13 +1,24 @@
|
||||
import { serialize } from 'bson';
|
||||
|
||||
export default async function runAI(client, data, ft) {
|
||||
const predictions = await ft.predict(data.text);
|
||||
async function generateResponse(tokenizer, model, config, dialog) {
|
||||
const knowledge = `[KNOWLEDGE] ${config.knowledge.join(' ')}`;
|
||||
const context = `[CONTEXT] ${dialog.substring(0, 64)}`;
|
||||
|
||||
const query = `${config.instruction} ${context} ${knowledge}`;
|
||||
|
||||
const inputTokenIds = tokenizer.encode(query);
|
||||
const outputTokenIds = await model.generate(inputTokenIds, { maxLength: 64, topK: 10 });
|
||||
return await tokenizer.decode(outputTokenIds, true);
|
||||
}
|
||||
|
||||
export default async function runAI(client, data, tokenizer, model, config) {
|
||||
const response = await generateResponse(tokenizer, model, config, data.text);
|
||||
|
||||
client.write(
|
||||
serialize({
|
||||
op: 2,
|
||||
id: data.id,
|
||||
predictions
|
||||
response
|
||||
})
|
||||
);
|
||||
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
import FastText from 'fasttext.js';
|
||||
import { join } from 'node:path';
|
||||
|
||||
export default async function trainAI(ftext, __dirname, config) {
|
||||
const ft = new FastText({
|
||||
train: {
|
||||
// number of concurrent threads
|
||||
thread: 8,
|
||||
// verbosity level [2]
|
||||
verbose: 4,
|
||||
// number of negatives sampled [5]
|
||||
neg: 7,
|
||||
// loss function {ns, hs, softmax} [ns]
|
||||
loss: 'ns',
|
||||
// learning rate [0.05]
|
||||
lr: 1,
|
||||
// change the rate of updates for the learning rate [100]
|
||||
lrUpdateRate: 1000,
|
||||
// max length of word ngram [1]
|
||||
wordNgrams: 5,
|
||||
// minimal number of word occurences
|
||||
minCount: 1,
|
||||
// minimal number of word occurences
|
||||
minCountLabel: 1,
|
||||
// size of word vectors [100]
|
||||
dim: 100,
|
||||
// size of the context window [5]
|
||||
ws: 5,
|
||||
// number of epochs [5]
|
||||
epoch: 20,
|
||||
// number of buckets [2000000]
|
||||
bucket: 2000000,
|
||||
// min length of char ngram [3]
|
||||
minn: process.env.TRAIN_MINN || 3,
|
||||
// max length of char ngram [6]
|
||||
maxn: process.env.TRAIN_MAXN || 6,
|
||||
// sampling threshold [0.0001]
|
||||
t: 0.0001,
|
||||
// load pre trained word vectors from unsupervised model
|
||||
pretrainedVectors: ''
|
||||
},
|
||||
serializeTo: join(__dirname, config.fasttext.loadModel).replace('.bin', ''),
|
||||
trainFile: join(__dirname, config.fasttext.trainFile),
|
||||
bin: join(__dirname, config.fasttext.bin)
|
||||
});
|
||||
|
||||
ftext.unload();
|
||||
|
||||
await ft.train();
|
||||
|
||||
ftext.load();
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -10,12 +10,11 @@ const config = JSON.parse(readFileSync('./config.json', 'utf-8'));
|
||||
|
||||
import { createServer } from 'node:net';
|
||||
import { deserialize } from 'bson';
|
||||
import FastText from 'fasttext.js';
|
||||
import { runAI, trainAI, runOCR, addTrainData } from './events/index.js';
|
||||
import transformers from 'transformers-nodejs';
|
||||
import { runAI, runOCR } from './events/index.js';
|
||||
|
||||
const ft = new FastText(config.fasttext);
|
||||
|
||||
ft.load();
|
||||
const tokenizer = await transformers.AutoTokenizer.fromPretrained(config.transformers.tokenizer);
|
||||
const model = await transformers.AutoModelForSeq2SeqLM.fromPretrained(config.transformers.model);
|
||||
|
||||
const server = createServer(async (client) => {
|
||||
client.on('data', async (data) => {
|
||||
@@ -25,17 +24,7 @@ const server = createServer(async (client) => {
|
||||
|
||||
switch (eventData.op) {
|
||||
case 1: {
|
||||
runAI(client, eventData, ft);
|
||||
break;
|
||||
}
|
||||
|
||||
case 3: {
|
||||
addTrainData(eventData, __dirname, config);
|
||||
break;
|
||||
}
|
||||
|
||||
case 4: {
|
||||
trainAI(ft, __dirname, config);
|
||||
runAI(client, eventData, tokenizer, model, config.transformers);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
63
server/package-lock.json
generated
63
server/package-lock.json
generated
@@ -10,8 +10,8 @@
|
||||
"license": "GPL-3.0-or-later",
|
||||
"dependencies": {
|
||||
"bson": "^4.7.0",
|
||||
"fasttext.js": "^1.1.2",
|
||||
"node-tesseract-ocr": "^2.2.1"
|
||||
"node-tesseract-ocr": "^2.2.1",
|
||||
"transformers-nodejs": "github:reisxd/transformers-nodejs"
|
||||
}
|
||||
},
|
||||
"node_modules/base64-js": {
|
||||
@@ -67,14 +67,6 @@
|
||||
"ieee754": "^1.1.13"
|
||||
}
|
||||
},
|
||||
"node_modules/fasttext.js": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/fasttext.js/-/fasttext.js-1.1.2.tgz",
|
||||
"integrity": "sha512-ZdH7lJzOlHd2KfT5sr3Z88EQDiEKQzxt+v2J7t1ZVZZhmtM5XZtCQNVDD/wWRNl5zEjv8LBrBoRGTIpcMX52Vw==",
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/ieee754": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
|
||||
@@ -101,6 +93,32 @@
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/onnxruntime-common": {
|
||||
"version": "1.13.1",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.13.1.tgz",
|
||||
"integrity": "sha512-HtfNeuZ8p47avtZR2mGldXA728keJj46w/6yy5DQTLA9zmpjXF/iJdS8f9yAtyWbSGw3s95DfBPWw2myhA1r0A=="
|
||||
},
|
||||
"node_modules/onnxruntime-node": {
|
||||
"version": "1.13.1",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.13.1.tgz",
|
||||
"integrity": "sha512-HHwxs13dSCJX+iqr90p8WGlG54XTxNG6lKXhUC4xu4r95GFjLaA6JxNUjeuGQ6T6E06hCy61lZMdH5MTcBNLTA==",
|
||||
"os": [
|
||||
"win32",
|
||||
"darwin",
|
||||
"linux"
|
||||
],
|
||||
"dependencies": {
|
||||
"onnxruntime-common": "~1.13.1"
|
||||
}
|
||||
},
|
||||
"node_modules/transformers-nodejs": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "git+ssh://git@github.com/reisxd/transformers-nodejs.git#28e591496a7a231b0ad8dc62081365d4077638f9",
|
||||
"license": "GPL-3.0-or-later",
|
||||
"dependencies": {
|
||||
"onnxruntime-node": "^1.13.1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"dependencies": {
|
||||
@@ -126,11 +144,6 @@
|
||||
"ieee754": "^1.1.13"
|
||||
}
|
||||
},
|
||||
"fasttext.js": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/fasttext.js/-/fasttext.js-1.1.2.tgz",
|
||||
"integrity": "sha512-ZdH7lJzOlHd2KfT5sr3Z88EQDiEKQzxt+v2J7t1ZVZZhmtM5XZtCQNVDD/wWRNl5zEjv8LBrBoRGTIpcMX52Vw=="
|
||||
},
|
||||
"ieee754": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
|
||||
@@ -140,6 +153,26 @@
|
||||
"version": "2.2.1",
|
||||
"resolved": "https://registry.npmjs.org/node-tesseract-ocr/-/node-tesseract-ocr-2.2.1.tgz",
|
||||
"integrity": "sha512-Q9cD79JGpPNQBxbi1fV+OAsTxYKLpx22sagsxSyKbu1u+t6UarApf5m32uVc8a5QAP1Wk7fIPN0aJFGGEE9DyQ=="
|
||||
},
|
||||
"onnxruntime-common": {
|
||||
"version": "1.13.1",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.13.1.tgz",
|
||||
"integrity": "sha512-HtfNeuZ8p47avtZR2mGldXA728keJj46w/6yy5DQTLA9zmpjXF/iJdS8f9yAtyWbSGw3s95DfBPWw2myhA1r0A=="
|
||||
},
|
||||
"onnxruntime-node": {
|
||||
"version": "1.13.1",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.13.1.tgz",
|
||||
"integrity": "sha512-HHwxs13dSCJX+iqr90p8WGlG54XTxNG6lKXhUC4xu4r95GFjLaA6JxNUjeuGQ6T6E06hCy61lZMdH5MTcBNLTA==",
|
||||
"requires": {
|
||||
"onnxruntime-common": "~1.13.1"
|
||||
}
|
||||
},
|
||||
"transformers-nodejs": {
|
||||
"version": "git+ssh://git@github.com/reisxd/transformers-nodejs.git#28e591496a7a231b0ad8dc62081365d4077638f9",
|
||||
"from": "transformers-nodejs@https://github.com/reisxd/transformers-nodejs",
|
||||
"requires": {
|
||||
"onnxruntime-node": "^1.13.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"license": "GPL-3.0-or-later",
|
||||
"dependencies": {
|
||||
"bson": "^4.7.0",
|
||||
"fasttext.js": "^1.1.2",
|
||||
"node-tesseract-ocr": "^2.2.1"
|
||||
"node-tesseract-ocr": "^2.2.1",
|
||||
"transformers-nodejs": "github:reisxd/transformers-nodejs"
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user