1.1.2 • Published 12 months ago

nano-vectordb-js v1.1.2

Weekly downloads
-
License
AGPL-3.0-or-later
Repository
-
Last release
12 months ago

šŸŒ¬ļø A implementation of NanoVectorDB in js.

⚔ Fast speed to operate vectors.

šŸƒ Support naive multi-tenancy.

Install

Install from npm

npm install nano-vectordb-js

use in browser:

  • download the file dbs.min.js from here
  • use it in your html file:
<script src="dbs.min.js"></script>

the example is in the file here

Quick Start

Faking your data:

// Set data length and dimension
const dataLen = 100;
const fakeDim = 1536;

// Generate a random matrix (dataLen x fakeDim)
function generateRandomMatrix(rows, cols) {
    return Array.from({ length: rows }, () =>
        new Float32Array(Array.from({ length: cols }, () => Math.random()))
    );
}

// Generate random matrix
const fakeEmbeds = generateRandomMatrix(dataLen, fakeDim);

// Print matrix shape (for debugging only)
console.log(`Shape: [${dataLen}, ${fakeDim}]`);

// Building fake data
const fakesData = Array.from({ length: dataLen }, (_, i) => ({
    __vector__: fakeEmbeds[i],
    __id__: i.toString()
}));

// Example output the first few elements to verify the result
console.log(fakesData.slice(0, 1));

output:

[
  {
    __vector__: Float32Array(1536) [
       0.7478081583976746,   0.817471444606781, 
       ...,
       8189947009086609,
         0.8611364364624023,
       0.4231139123439789,
      ... 1436 more items
    ],
    __id__: 0
  }
]

You can add any fields to a data. But there are two keywords:

  • __id__: If passed, NanoVectorDB will use your id, otherwise a generated id will be used.
  • __vector__: must pass, your embedding type is Float32Array.

Init a DB

// Nodejs
const dbs = require("nano-vectordb-js"); 
const vdb = new dbs.NanoVectorDB({
        embedding_dim: fakeDim, 
        metric: "cosine", 
        storage_file: "test.json", 
    });

// ES6
// import { NanoVectorDB } from "nano-vectordb-js";
// const vdb = new dbs.NanoVectorDB({
//         embedding_dim: fakeDim, 
//         metric: "cosine", 
//         storage_file: "test.json", 
//     });

you can also use postInit to init the db sync in async function:

(async() => {
    const vdb = new dbs.NanoVectorDB({
        embedding_dim: fakeDim, 
        metric: "cosine", 
        storage_file: "test.json", 
        isSync: true
    });
    await vdb.postInit()
    r = vdb.upsert(fakesData)
    console.log(r["update"], r["insert"])
})()

Next time you init vdb from test.json, NanoVectorDB will load the index automatically.

Upsert

setTimeout(() => {
r = vdb.upsert(fakesData);
console.log(r["update"], r["insert"]);
}, 1000);

Query

setTimeout(() => {
    // query with embedding 
    const queryData = Float32Array.from({ length: fakeDim }, () => Math.random());

    // arguments:
    const topK = 5;
    const betterThanThreshold = 0.01;
    const queryResult = vdb.query(queryData, topK, betterThanThreshold);
    console.log(queryResult);
}, 1000);

Conditional filter

setTimeout(() => {
    const queryData = Float32Array.from({ length: fake_dim }, () => Math.random());
    const topK = 5;
    const betterThanThreshold = 0.01;
    const queryResult =vdb.query(queryData, topK, betterThanThreshold, (data) => parseInt(data.__id__) >= 70); // when  __id__ is a string of number
    // const queryResult = vdb.query(queryData, topK, betterThanThreshold, (data) => data.__id__ === "ANY_STRING"); // when __id__ is a string
}, 1000);

Save

// will create/overwrite 'test.json'
vdb.save()

Get, Delete

setTimeout(() => {
    const ids = vdb.get([vdb.storage.data[0][dbs.F_ID]]);
    console.log(ids);
    ids.forEach(id => {
        console.log(id);
        vdb.delete([id[dbs.F_ID]]);
    })
    console.log(vdb.get(ids));
}, 1000);

Additional Data

setTimeout(() => {
    vdb.storeAdditionalData({a:1, b:2, c:3});
    console.log(vdb.getAdditionalData());
}, 1000);

Multi-Tenancy

If you have multiple vectorDB to use, you can use MultiTenantNanoVDB to manage:

MultiTenantNanoVDB use a queue to manage the total vector dbs in memory, you can adjust the parameter: max_capacity.

const dbs = require("nano-vectordb-js"); // Nodejs
// import { MultiTenantNanoVDB } from "nano-vectordb-js"; // ES6

const multiTenant = new dbs.MultiTenantNanoVDB(1024, "cosine", 1000, "./test");
// const multiTenant = new MultiTenantNanoVDB(1024, "cosine", 1000, "./test");
const tenantId = multiTenant.createTenant("1");

// tenant is a NanoVectorDB, you can upsert, query, get... on this.
const tenant = multiTenant.getTenant(tenantId);
console.log(tenant);

// some chores:
multiTenant.containTenant(tenantId);
multiTenant.deleteTenant(tenantId);
multiTenant.containTenant(tenantId);

// save it
multiTenant.save();
1.1.2

12 months ago

1.1.1

12 months ago

1.1.0

12 months ago

1.0.7

12 months ago

1.0.6

12 months ago

1.0.5

12 months ago

1.0.4

12 months ago

1.0.3

12 months ago

1.0.2

12 months ago

1.0.1

12 months ago

1.0.0

12 months ago