feat: GODEL AI

This commit is contained in:
GramingFoxTeam
2022-12-25 12:49:16 +03:00
parent 18e57b0c32
commit 0ba525c4a5
14 changed files with 197 additions and 289 deletions

View File

@@ -20,32 +20,7 @@ And the server would return something like this:
{
"op": 2,
"id": "String",
"predictions": [
{
"label": "DOWNLOAD",
"score": "1"
}
]
}
```
# Training the AI
To add data to the train data, send a BSON (JSON) like this:
```json
{
"op": 3,
"label": "FALSEPOSITIVE",
"text": "how"
}
```
To train the AI and to re-load it, send this BSON (JSON):
```json
{
"event": 4
"response": "I think the term afn is just a generic slang term for the app that allows you to modify the behavior of Dalvik based android application..."
}
```

View File

@@ -2,11 +2,13 @@
"server": {
"port": 3000
},
"fasttext": {
"bin": "./model/fastText/fasttext",
"loadModel": "./model/model.bin",
"trainFile": "./model/train.tsv",
"debug": true
"transformers": {
"model": "./model.onnx",
"tokenizer": "./tokenizer.json",
"instruction": "Instruction: given a dialog context and related knowledge, you need to answer the question based on the knowledge.",
"knowledge": [
"ReVanced is a generic patcher that allows you to modify the behavior of any Dalvik based Android application"
]
}
}

View File

@@ -1,22 +0,0 @@
import { readFileSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
export default function addTrainData(eventData, __dirname, config) {
const file = readFileSync(
join(__dirname, config.fasttext.trainFile),
'utf-8'
);
const { label, text } = eventData;
if (file.includes(text)) return;
const data = file.split('\n');
const labelIndex = data.findIndex((data) => data.startsWith(label));
data.splice(labelIndex === -1 ? 0 : labelIndex, 0, `${label} ${text}`);
writeFileSync(join(__dirname, config.fasttext.trainFile), data.join('\n'));
return;
}

View File

@@ -1,13 +1,24 @@
import { serialize } from 'bson';
export default async function runAI(client, data, ft) {
const predictions = await ft.predict(data.text);
async function generateResponse(tokenizer, model, config, dialog) {
const knowledge = `[KNOWLEDGE] ${config.knowledge.join(' ')}`;
const context = `[CONTEXT] ${dialog.substring(0, 64)}`;
const query = `${config.instruction} ${context} ${knowledge}`;
const inputTokenIds = tokenizer.encode(query);
const outputTokenIds = await model.generate(inputTokenIds, { maxLength: 64, topK: 10 });
return await tokenizer.decode(outputTokenIds, true);
}
export default async function runAI(client, data, tokenizer, model, config) {
const response = await generateResponse(tokenizer, model, config, data.text);
client.write(
serialize({
op: 2,
id: data.id,
predictions
response
})
);

View File

@@ -1,54 +0,0 @@
import FastText from 'fasttext.js';
import { join } from 'node:path';
export default async function trainAI(ftext, __dirname, config) {
const ft = new FastText({
train: {
// number of concurrent threads
thread: 8,
// verbosity level [2]
verbose: 4,
// number of negatives sampled [5]
neg: 7,
// loss function {ns, hs, softmax} [ns]
loss: 'ns',
// learning rate [0.05]
lr: 1,
// change the rate of updates for the learning rate [100]
lrUpdateRate: 1000,
// max length of word ngram [1]
wordNgrams: 5,
// minimal number of word occurences
minCount: 1,
// minimal number of word occurences
minCountLabel: 1,
// size of word vectors [100]
dim: 100,
// size of the context window [5]
ws: 5,
// number of epochs [5]
epoch: 20,
// number of buckets [2000000]
bucket: 2000000,
// min length of char ngram [3]
minn: process.env.TRAIN_MINN || 3,
// max length of char ngram [6]
maxn: process.env.TRAIN_MAXN || 6,
// sampling threshold [0.0001]
t: 0.0001,
// load pre trained word vectors from unsupervised model
pretrainedVectors: ''
},
serializeTo: join(__dirname, config.fasttext.loadModel).replace('.bin', ''),
trainFile: join(__dirname, config.fasttext.trainFile),
bin: join(__dirname, config.fasttext.bin)
});
ftext.unload();
await ft.train();
ftext.load();
return;
}

View File

@@ -10,12 +10,11 @@ const config = JSON.parse(readFileSync('./config.json', 'utf-8'));
import { createServer } from 'node:net';
import { deserialize } from 'bson';
import FastText from 'fasttext.js';
import { runAI, trainAI, runOCR, addTrainData } from './events/index.js';
import transformers from 'transformers-nodejs';
import { runAI, runOCR } from './events/index.js';
const ft = new FastText(config.fasttext);
ft.load();
const tokenizer = await transformers.AutoTokenizer.fromPretrained(config.transformers.tokenizer);
const model = await transformers.AutoModelForSeq2SeqLM.fromPretrained(config.transformers.model);
const server = createServer(async (client) => {
client.on('data', async (data) => {
@@ -25,17 +24,7 @@ const server = createServer(async (client) => {
switch (eventData.op) {
case 1: {
runAI(client, eventData, ft);
break;
}
case 3: {
addTrainData(eventData, __dirname, config);
break;
}
case 4: {
trainAI(ft, __dirname, config);
runAI(client, eventData, tokenizer, model, config.transformers);
break;
}

View File

@@ -10,8 +10,8 @@
"license": "GPL-3.0-or-later",
"dependencies": {
"bson": "^4.7.0",
"fasttext.js": "^1.1.2",
"node-tesseract-ocr": "^2.2.1"
"node-tesseract-ocr": "^2.2.1",
"transformers-nodejs": "github:reisxd/transformers-nodejs"
}
},
"node_modules/base64-js": {
@@ -67,14 +67,6 @@
"ieee754": "^1.1.13"
}
},
"node_modules/fasttext.js": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/fasttext.js/-/fasttext.js-1.1.2.tgz",
"integrity": "sha512-ZdH7lJzOlHd2KfT5sr3Z88EQDiEKQzxt+v2J7t1ZVZZhmtM5XZtCQNVDD/wWRNl5zEjv8LBrBoRGTIpcMX52Vw==",
"engines": {
"node": "*"
}
},
"node_modules/ieee754": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
@@ -101,6 +93,32 @@
"engines": {
"node": ">=10"
}
},
"node_modules/onnxruntime-common": {
"version": "1.13.1",
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.13.1.tgz",
"integrity": "sha512-HtfNeuZ8p47avtZR2mGldXA728keJj46w/6yy5DQTLA9zmpjXF/iJdS8f9yAtyWbSGw3s95DfBPWw2myhA1r0A=="
},
"node_modules/onnxruntime-node": {
"version": "1.13.1",
"resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.13.1.tgz",
"integrity": "sha512-HHwxs13dSCJX+iqr90p8WGlG54XTxNG6lKXhUC4xu4r95GFjLaA6JxNUjeuGQ6T6E06hCy61lZMdH5MTcBNLTA==",
"os": [
"win32",
"darwin",
"linux"
],
"dependencies": {
"onnxruntime-common": "~1.13.1"
}
},
"node_modules/transformers-nodejs": {
"version": "1.0.0",
"resolved": "git+ssh://git@github.com/reisxd/transformers-nodejs.git#28e591496a7a231b0ad8dc62081365d4077638f9",
"license": "GPL-3.0-or-later",
"dependencies": {
"onnxruntime-node": "^1.13.1"
}
}
},
"dependencies": {
@@ -126,11 +144,6 @@
"ieee754": "^1.1.13"
}
},
"fasttext.js": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/fasttext.js/-/fasttext.js-1.1.2.tgz",
"integrity": "sha512-ZdH7lJzOlHd2KfT5sr3Z88EQDiEKQzxt+v2J7t1ZVZZhmtM5XZtCQNVDD/wWRNl5zEjv8LBrBoRGTIpcMX52Vw=="
},
"ieee754": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
@@ -140,6 +153,26 @@
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/node-tesseract-ocr/-/node-tesseract-ocr-2.2.1.tgz",
"integrity": "sha512-Q9cD79JGpPNQBxbi1fV+OAsTxYKLpx22sagsxSyKbu1u+t6UarApf5m32uVc8a5QAP1Wk7fIPN0aJFGGEE9DyQ=="
},
"onnxruntime-common": {
"version": "1.13.1",
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.13.1.tgz",
"integrity": "sha512-HtfNeuZ8p47avtZR2mGldXA728keJj46w/6yy5DQTLA9zmpjXF/iJdS8f9yAtyWbSGw3s95DfBPWw2myhA1r0A=="
},
"onnxruntime-node": {
"version": "1.13.1",
"resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.13.1.tgz",
"integrity": "sha512-HHwxs13dSCJX+iqr90p8WGlG54XTxNG6lKXhUC4xu4r95GFjLaA6JxNUjeuGQ6T6E06hCy61lZMdH5MTcBNLTA==",
"requires": {
"onnxruntime-common": "~1.13.1"
}
},
"transformers-nodejs": {
"version": "git+ssh://git@github.com/reisxd/transformers-nodejs.git#28e591496a7a231b0ad8dc62081365d4077638f9",
"from": "transformers-nodejs@https://github.com/reisxd/transformers-nodejs",
"requires": {
"onnxruntime-node": "^1.13.1"
}
}
}
}

View File

@@ -8,7 +8,7 @@
"license": "GPL-3.0-or-later",
"dependencies": {
"bson": "^4.7.0",
"fasttext.js": "^1.1.2",
"node-tesseract-ocr": "^2.2.1"
"node-tesseract-ocr": "^2.2.1",
"transformers-nodejs": "github:reisxd/transformers-nodejs"
}
}