1.1.2 • Published 6 months ago

nano-vectordb-js v1.1.2

Weekly downloads
-
License
AGPL-3.0-or-later
Repository
-
Last release
6 months ago

šŸŒ¬ļø A implementation of NanoVectorDB in js.

⚔ Fast speed to operate vectors.

šŸƒ Support naive multi-tenancy.

Install

Install from npm

npm install nano-vectordb-js

use in browser:

  • download the file dbs.min.js from here
  • use it in your html file:
<script src="dbs.min.js"></script>

the example is in the file here

Quick Start

Faking your data:

// Set data length and dimension
const dataLen = 100;
const fakeDim = 1536;

// Generate a random matrix (dataLen x fakeDim)
function generateRandomMatrix(rows, cols) {
    return Array.from({ length: rows }, () =>
        new Float32Array(Array.from({ length: cols }, () => Math.random()))
    );
}

// Generate random matrix
const fakeEmbeds = generateRandomMatrix(dataLen, fakeDim);

// Print matrix shape (for debugging only)
console.log(`Shape: [${dataLen}, ${fakeDim}]`);

// Building fake data
const fakesData = Array.from({ length: dataLen }, (_, i) => ({
    __vector__: fakeEmbeds[i],
    __id__: i.toString()
}));

// Example output the first few elements to verify the result
console.log(fakesData.slice(0, 1));

output:

[
  {
    __vector__: Float32Array(1536) [
       0.7478081583976746,   0.817471444606781, 
       ...,
       8189947009086609,
         0.8611364364624023,
       0.4231139123439789,
      ... 1436 more items
    ],
    __id__: 0
  }
]

You can add any fields to a data. But there are two keywords:

  • __id__: If passed, NanoVectorDB will use your id, otherwise a generated id will be used.
  • __vector__: must pass, your embedding type is Float32Array.

Init a DB

// Nodejs
const dbs = require("nano-vectordb-js"); 
const vdb = new dbs.NanoVectorDB({
        embedding_dim: fakeDim, 
        metric: "cosine", 
        storage_file: "test.json", 
    });

// ES6
// import { NanoVectorDB } from "nano-vectordb-js";
// const vdb = new dbs.NanoVectorDB({
//         embedding_dim: fakeDim, 
//         metric: "cosine", 
//         storage_file: "test.json", 
//     });

you can also use postInit to init the db sync in async function:

(async() => {
    const vdb = new dbs.NanoVectorDB({
        embedding_dim: fakeDim, 
        metric: "cosine", 
        storage_file: "test.json", 
        isSync: true
    });
    await vdb.postInit()
    r = vdb.upsert(fakesData)
    console.log(r["update"], r["insert"])
})()

Next time you init vdb from test.json, NanoVectorDB will load the index automatically.

Upsert

setTimeout(() => {
r = vdb.upsert(fakesData);
console.log(r["update"], r["insert"]);
}, 1000);

Query

setTimeout(() => {
    // query with embedding 
    const queryData = Float32Array.from({ length: fakeDim }, () => Math.random());

    // arguments:
    const topK = 5;
    const betterThanThreshold = 0.01;
    const queryResult = vdb.query(queryData, topK, betterThanThreshold);
    console.log(queryResult);
}, 1000);

Conditional filter

setTimeout(() => {
    const queryData = Float32Array.from({ length: fake_dim }, () => Math.random());
    const topK = 5;
    const betterThanThreshold = 0.01;
    const queryResult =vdb.query(queryData, topK, betterThanThreshold, (data) => parseInt(data.__id__) >= 70); // when  __id__ is a string of number
    // const queryResult = vdb.query(queryData, topK, betterThanThreshold, (data) => data.__id__ === "ANY_STRING"); // when __id__ is a string
}, 1000);

Save

// will create/overwrite 'test.json'
vdb.save()

Get, Delete

setTimeout(() => {
    const ids = vdb.get([vdb.storage.data[0][dbs.F_ID]]);
    console.log(ids);
    ids.forEach(id => {
        console.log(id);
        vdb.delete([id[dbs.F_ID]]);
    })
    console.log(vdb.get(ids));
}, 1000);

Additional Data

setTimeout(() => {
    vdb.storeAdditionalData({a:1, b:2, c:3});
    console.log(vdb.getAdditionalData());
}, 1000);

Multi-Tenancy

If you have multiple vectorDB to use, you can use MultiTenantNanoVDB to manage:

MultiTenantNanoVDB use a queue to manage the total vector dbs in memory, you can adjust the parameter: max_capacity.

const dbs = require("nano-vectordb-js"); // Nodejs
// import { MultiTenantNanoVDB } from "nano-vectordb-js"; // ES6

const multiTenant = new dbs.MultiTenantNanoVDB(1024, "cosine", 1000, "./test");
// const multiTenant = new MultiTenantNanoVDB(1024, "cosine", 1000, "./test");
const tenantId = multiTenant.createTenant("1");

// tenant is a NanoVectorDB, you can upsert, query, get... on this.
const tenant = multiTenant.getTenant(tenantId);
console.log(tenant);

// some chores:
multiTenant.containTenant(tenantId);
multiTenant.deleteTenant(tenantId);
multiTenant.containTenant(tenantId);

// save it
multiTenant.save();
1.1.2

6 months ago

1.1.1

6 months ago

1.1.0

6 months ago

1.0.7

6 months ago

1.0.6

6 months ago

1.0.5

6 months ago

1.0.4

6 months ago

1.0.3

6 months ago

1.0.2

6 months ago

1.0.1

6 months ago

1.0.0

6 months ago