Conditional-entropy NPM

conditional-entropy

utils for calculating conditional entropy given an array of objects with similar fields

Installation

npm i conditional-entropy

Usage

var {
    quantize, //quantize(data, nBuckets) //quantize float fields in data obj array by range + n-buckets
    normalize, //normalize(data) //normalize data by mean + std
    entropy, //entropy(data, field) //get total entropy in data[field]
    conditionalEntropy, //conditionalEntropy(data, conditionFields, targetField, recordConditionTargetMap=false /*see getConditionTargetMap below*/) //conditional entropy of conditionFields to predict targetField
    combinations, //combinations(arr) //get combinations of values in arr
    getFieldCombinations, //getFieldCombinations(data) //get combinations of fields in data
    getDateFields, //getDateFields(data) //detect date fields in data [requires npm moment]
    correlations, //correlations(data, conditionFields, targetField)
    getArrFieldRanges, //getArrFieldRanges(data)
    getArrFieldStats, //getArrFieldStats(data)
    getRandomSubRange, //getRandomSubRange(rangeDesc) get a random range description [similar to what returned by getArrFieldRanges], smaller than a given one
    findItemsLikeTemplate, //findItemsLikeTemplate(data, templateObj)
    getConditionTargetMap //get ConditionTargetMap used by recently-run conditionalEntropy function [assuming recordConditionTargetMap was true]
} = require('conditional-entropy');

function generateContData(){
    var data = [];
    for(var i=0;i<9999;i++){

        //suppose we want to predict 'label' using the other fields [a,b,c,d,sublabel] ...

        //note how a and b are redundant / almost information-less because they cancel e/o out
        //note how therefore, most of the info affecting the final "label" is in c

        //note how sublabel is also a signal for label

        //lower values for conditional entropy = given variable
        // tells us more about the label; eg there is less 'uncertainty left'

        var a = Math.random();
        var b = 1-a;
        var c = Math.random()*3;
        var d = a+b+c;
        data.push(
            {
                a,b,c,d,
                sublabel: d>1.6?"high":"low",
                label: d>2.0?"high":"low"
            });
    }
    return data;
}

var data = normalize(generateContData()); //normalizing is optional 
var nBucketsPerField = 5;
var qData = quantize(data, nBucketsPerField);
const permutationsOfFields = getFieldCombinations(qData, 'label');
permutationsOfFields.forEach(function(perm){
    const result = conditionalEntropy(qData, perm, 'label');
    console.log(`Conditional entropy given ${perm}: ${result}`);
})

// Conditional entropy given a: 0.9303379115523887 << very little info provided by a or b
// Conditional entropy given b: 0.9303379115523887
// Conditional entropy given c: 0.19209595425394121 << lots of info provided by c
// Conditional entropy given d: 0.19209595425394121 << same amount of info provided by d, because it is basically redundant [because a+b=1]]
// Conditional entropy given sublabel: 0.5348448213427465 << sublabel provides some info but not as much as c or d
// Conditional entropy given a,b: 0.9303379115523887 << combination of a and b gives us nothing more than either one
// Conditional entropy given a,c: 0.19191066437939053 << combination of a and c or d gives us about as much info as c or d because 'a' contains little to no info
// Conditional entropy given a,d: 0.19191066437939053
//   ...

//bonus - convenience functions:
console.log("correlations",correlations(qData, Object.keys(qData[0]), 'c')) //only works for numerical fields !
console.log("ranges",getArrFieldRanges(qData)) //only works for numerical fields !
console.log("random sub-range", getRandomSubRange(getArrFieldRanges(qData)));
console.log("stats",getArrFieldStats(qData)) //only works for numerical fields !


//correlations
// {
//     a: -0.0010239766902628352,
//     b: 0.0010239766902628352,
//     c: 1,
//     d: 1,
//     sublabel: NaN,
//     label: NaN
// }
//ranges
// {
//     a: { min: 0, max: 4 },
//     b: { min: 0, max: 4 },
//     c: { min: 0, max: 4 },
//     d: { min: 0, max: 4 }
// }
// random sub-range {
//     a: { min: 1.4271687649273508, max: 2.9265827904030477 },
//     b: { min: 1.6165176928519145, max: 2.1913277733064174 },
//     c: { min: 2.530036662657582, max: 2.8261725300383054 },
//     d: { min: 0.13376527561589402, max: 0.7824113880110799 }
// }
//
// stats
// {
//     a: {
//             sum: 20024,
//             sumOfSquares: 60178,
//             count: 9999,
//             mean: 2.0026002600260027,
//             stdDev: 1.4170370632865623
//     },
//     b: {
//             sum: 19972,
//             sumOfSquares: 59970,
//             count: 9999,
//             mean: 1.9973997399739973,
//             stdDev: 1.4170370632865625
//     },
//     c: {
//             sum: 19825,
//             sumOfSquares: 59367,
//             count: 9999,
//             mean: 1.9826982698269826,
//             stdDev: 1.4164043561772983
//     },
//     d: {
//             sum: 19825,
//             sumOfSquares: 59367,
//             count: 9999,
//             mean: 1.9826982698269826,
//             stdDev: 1.4164043561772983
//     }
// }

@everything-registry/sub-chunk-1369

2 years ago

2 years ago

2 years ago

2 years ago

2 years ago

2 years ago

2 years ago

2 years ago

2 years ago