From d70fcde2273a3041f696a1eb8a57570854c745d6 Mon Sep 17 00:00:00 2001
From: reis <ibrahimbozkurt0270@gmail.com>
Date: Fri, 11 Nov 2022 17:29:15 +0000
Subject: [PATCH] Add OCR, add configuration

---
 server/PROTOCOL.md       | 34 ++++++++++++----
 server/config.json       | 12 ++++++
 server/events/ai.js      | 11 +++--
 server/events/index.js   |  4 +-
 server/events/ocr.js     | 23 +++++++++++
 server/events/trainAI.js | 88 ++++++++++++++++++++--------------------
 server/index.js          | 45 ++++++++++++++------
 7 files changed, 146 insertions(+), 71 deletions(-)
 create mode 100644 server/config.json
 create mode 100644 server/events/ocr.js

diff --git a/server/PROTOCOL.md b/server/PROTOCOL.md
index 07394f5..6a19e0f 100644
--- a/server/PROTOCOL.md
+++ b/server/PROTOCOL.md
@@ -8,7 +8,7 @@ Sending the server this JSON (BSON) will send you back the AI predictions.
 
 ```json
 {
-    "event": "ai",
+    "op": 1,
     "id": "String", 
     "text": "How do i download ReVanced?"
 }
@@ -18,7 +18,7 @@ And the server would return something like this:
 
 ```json
 {
-    "event": "ai_response",
+    "op": 2,
     "id": "String",
     "predictions": [
         {
@@ -29,17 +29,13 @@ And the server would return something like this:
 }
 ```
 
-# OCR
-
-Soon:tm:
-
 # Training the AI
 
 To add data to the train data, send a BSON (JSON) like this:
 
 ```json
 {
-    "event": "add_train_data",
+    "op": 3,
     "label": "FALSEPOSITIVE",
     "text": "how"
 }
@@ -49,6 +45,28 @@ To train the AI and to re-load it, send this BSON (JSON):
 
 ```json
 {
-    "event": "train_ai"
+    "event": 4
+}
+```
+
+# OCR
+
+Sending the server this JSON (BSON) will send you back the read text.
+
+```json
+{
+    "op": 5,
+    "id": "String", 
+    "url": "https://cdn.discordapp.com/attachments/1033338556493606963/1033338557231796224/Screenshot_20221022-121318.jpg"
+}
+```
+
+And the server would return something like this:
+
+```json
+{
+    "op": 6,
+    "id": "String",
+    "ocrText": "..."
 }
 ```
\ No newline at end of file
diff --git a/server/config.json b/server/config.json
new file mode 100644
index 0000000..7412e9d
--- /dev/null
+++ b/server/config.json
@@ -0,0 +1,12 @@
+{
+    "server": {
+        "port": 3000
+    },
+
+    "fasttext": {
+        "bin": "./model/fastText/fasttext",
+        "loadModel": "./model/model.bin",
+        "trainFile": "./model/train.tsv",
+        "debug": true
+    }
+}
\ No newline at end of file
diff --git a/server/events/ai.js b/server/events/ai.js
index c9a3ce8..75079b2 100644
--- a/server/events/ai.js
+++ b/server/events/ai.js
@@ -1,16 +1,15 @@
 import { serialize } from 'bson';
 
-export default async function runAI(client, data, predict) {
-    const predictions = await predict(data.text);
+export default async function runAI(client, data) {
+    const predictions = await global.ft.predict(data.text);
     const jsonData = {
-        event: 'ai_response',
+        op: 2,
         id: data.id,
         predictions
     };
-
+    console.log(predictions)
     const bsonData = serialize(jsonData);
-
-    client.pipe(bsonData);
+    client.write(bsonData);
     
     return;
 }
\ No newline at end of file
diff --git a/server/events/index.js b/server/events/index.js
index 5e16c07..3a00b73 100644
--- a/server/events/index.js
+++ b/server/events/index.js
@@ -1,7 +1,9 @@
 import runAI from './ai.js';
 import trainAI from './trainAI.js';
+import runOCR from './ocr.js';
 
 export {
     runAI,
-    trainAI
+    trainAI,
+    runOCR
 }
\ No newline at end of file
diff --git a/server/events/ocr.js b/server/events/ocr.js
new file mode 100644
index 0000000..4175db5
--- /dev/null
+++ b/server/events/ocr.js
@@ -0,0 +1,23 @@
+import { recognize } from 'node-tesseract-ocr';
+import { serialize } from 'bson';
+
+export default async function runOCR(client, eventData) {
+    const config = {
+        lang: 'eng',
+        oem: 3,
+        psm: 3,
+      };
+
+      const ocrText = await recognize(eventData.url, config);
+
+      const jsonData = {
+        op: 6,
+        id: eventData.id,
+        ocrText
+      };
+
+      const bsonData = serialize(jsonData);
+      client.write(bsonData);
+
+      return;
+}
\ No newline at end of file
diff --git a/server/events/trainAI.js b/server/events/trainAI.js
index 279e76e..4647f25 100644
--- a/server/events/trainAI.js
+++ b/server/events/trainAI.js
@@ -1,51 +1,53 @@
 import FastText from 'fasttext.js';
-const ft = new FastText({
-    train: {
-        // number of concurrent threads
-        thread: 8,
-        // verbosity level [2]
-        verbose: 4,
-        // number of negatives sampled [5]
-        neg: 7,
-        // loss function {ns, hs, softmax} [ns]
-        loss: 'ns',
-        // learning rate [0.05]
-        lr: 1,
-        // change the rate of updates for the learning rate [100]
-        lrUpdateRate: 1000,
-        // max length of word ngram [1]
-        wordNgrams: 5,
-        // minimal number of word occurences
-        minCount: 1,
-        // minimal number of word occurences
-        minCountLabel: 1,
-        // size of word vectors [100]
-        dim: 100,
-        // size of the context window [5]
-        ws: 5,
-        //  number of epochs [5]
-        epoch: 20,
-        // number of buckets [2000000]
-        bucket: 2000000,
-        // min length of char ngram [3]
-        minn: process.env.TRAIN_MINN || 3,
-        // max length of char ngram [6]
-        maxn: process.env.TRAIN_MAXN || 6,
-        // sampling threshold [0.0001]
-        t: 0.0001,
-        // load pre trained word vectors from unsupervised model
-        pretrainedVectors: ''
-    },
-    serializeTo: '/workspaces/revanced-helper/server/model/model',
-    trainFile: '/workspaces/revanced-helper/server/model/train.tsv',
-});
+import { join } from 'node:path';
 
-export default async function trainAI(unload, load) {
-    //unload();
+export default async function trainAI() {
+    const ft = new FastText({
+        train: {
+            // number of concurrent threads
+            thread: 8,
+            // verbosity level [2]
+            verbose: 4,
+            // number of negatives sampled [5]
+            neg: 7,
+            // loss function {ns, hs, softmax} [ns]
+            loss: 'ns',
+            // learning rate [0.05]
+            lr: 1,
+            // change the rate of updates for the learning rate [100]
+            lrUpdateRate: 1000,
+            // max length of word ngram [1]
+            wordNgrams: 5,
+            // minimal number of word occurences
+            minCount: 1,
+            // minimal number of word occurences
+            minCountLabel: 1,
+            // size of word vectors [100]
+            dim: 100,
+            // size of the context window [5]
+            ws: 5,
+            //  number of epochs [5]
+            epoch: 20,
+            // number of buckets [2000000]
+            bucket: 2000000,
+            // min length of char ngram [3]
+            minn: process.env.TRAIN_MINN || 3,
+            // max length of char ngram [6]
+            maxn: process.env.TRAIN_MAXN || 6,
+            // sampling threshold [0.0001]
+            t: 0.0001,
+            // load pre trained word vectors from unsupervised model
+            pretrainedVectors: ''
+        },
+        serializeTo: join(global.__dirname, global.config.fasttext.loadModel).replace('.bin', ''),
+        trainFile: join(global.__dirname, global.config.fasttext.trainFile),
+    });
+    
+    global.ft.unload();
 
     await ft.train()
 
-  //  load();
+    global.ft.load();
 
     return;
 }
\ No newline at end of file
diff --git a/server/index.js b/server/index.js
index b17d373..6f0e3d6 100644
--- a/server/index.js
+++ b/server/index.js
@@ -1,31 +1,50 @@
+import { readFileSync } from 'node:fs';
+// Fix __dirname not being defined in ES modules. (https://stackoverflow.com/a/64383997)
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+global.__dirname = dirname(__filename);
+
+const configJSON = readFileSync('./config.json', 'utf-8');
+const config = JSON.parse(configJSON);
+global.config = config;
+console.log(config);
 import { createServer } from 'node:net';
 import { deserialize } from 'bson';
 import FastText from 'fasttext.js';
-import { runAI, trainAI } from './events/index.js';
+import { runAI, trainAI, runOCR } from './events/index.js';
 
-const ft = new FastText({
-    loadModel: './model/model.bin'
-});
+const ft = new FastText(global.config.fasttext);
 
 ft.load();
 
+// I'm sorry. This is probably the only solution.
+global.ft = ft;
+
 const server = createServer(async (client) => {
     client.on('data', async (data) => {
         const eventData = deserialize(data);
 
-        switch(eventData.event) {
-            case 'ai': {
-                runAI(client, eventData, ft.predict);
+        switch(eventData.op) {
+            case 1: {
+                runAI(client, eventData);
                 break;
-            }
+            };
 
-            case 'train_ai': {
-                trainAI(ft.unload, ft.load);
+            case 4: {
+                trainAI();
                 break;
-            }
-        }
+            
+            };
+
+            case 5: {
+                runOCR(client, eventData);
+                break;
+            };
+        };
 
     });
 });
 
-server.listen(process.env.PORT || 3000);
\ No newline at end of file
+server.listen(global.config.server.port || 3000);
\ No newline at end of file