0.0.117 • Published 6 months ago
conditional-entropy v0.0.117
conditional-entropy
utils for calculating conditional entropy given an array of objects with similar fields
Installation
npm i conditional-entropy
Usage
var {
quantize, //quantize(data, nBuckets) //quantize float fields in data obj array by range + n-buckets
normalize, //normalize(data) //normalize data by mean + std
entropy, //entropy(data, field) //get total entropy in data[field]
conditionalEntropy, //conditionalEntropy(data, conditionFields, targetField, recordConditionTargetMap=false /*see getConditionTargetMap below*/) //conditional entropy of conditionFields to predict targetField
combinations, //combinations(arr) //get combinations of values in arr
getFieldCombinations, //getFieldCombinations(data) //get combinations of fields in data
getDateFields, //getDateFields(data) //detect date fields in data [requires npm moment]
correlations, //correlations(data, conditionFields, targetField)
getArrFieldRanges, //getArrFieldRanges(data)
getArrFieldStats, //getArrFieldStats(data)
getRandomSubRange, //getRandomSubRange(rangeDesc) get a random range description [similar to what returned by getArrFieldRanges], smaller than a given one
findItemsLikeTemplate, //findItemsLikeTemplate(data, templateObj)
getConditionTargetMap //get ConditionTargetMap used by recently-run conditionalEntropy function [assuming recordConditionTargetMap was true]
} = require('conditional-entropy');
function generateContData(){
var data = [];
for(var i=0;i<9999;i++){
//suppose we want to predict 'label' using the other fields [a,b,c,d,sublabel] ...
//note how a and b are redundant / almost information-less because they cancel e/o out
//note how therefore, most of the info affecting the final "label" is in c
//note how sublabel is also a signal for label
//lower values for conditional entropy = given variable
// tells us more about the label; eg there is less 'uncertainty left'
var a = Math.random();
var b = 1-a;
var c = Math.random()*3;
var d = a+b+c;
data.push(
{
a,b,c,d,
sublabel: d>1.6?"high":"low",
label: d>2.0?"high":"low"
});
}
return data;
}
var data = normalize(generateContData()); //normalizing is optional
var nBucketsPerField = 5;
var qData = quantize(data, nBucketsPerField);
const permutationsOfFields = getFieldCombinations(qData, 'label');
permutationsOfFields.forEach(function(perm){
const result = conditionalEntropy(qData, perm, 'label');
console.log(`Conditional entropy given ${perm}: ${result}`);
})
// Conditional entropy given a: 0.9303379115523887 << very little info provided by a or b
// Conditional entropy given b: 0.9303379115523887
// Conditional entropy given c: 0.19209595425394121 << lots of info provided by c
// Conditional entropy given d: 0.19209595425394121 << same amount of info provided by d, because it is basically redundant [because a+b=1]]
// Conditional entropy given sublabel: 0.5348448213427465 << sublabel provides some info but not as much as c or d
// Conditional entropy given a,b: 0.9303379115523887 << combination of a and b gives us nothing more than either one
// Conditional entropy given a,c: 0.19191066437939053 << combination of a and c or d gives us about as much info as c or d because 'a' contains little to no info
// Conditional entropy given a,d: 0.19191066437939053
// ...
//bonus - convenience functions:
console.log("correlations",correlations(qData, Object.keys(qData[0]), 'c')) //only works for numerical fields !
console.log("ranges",getArrFieldRanges(qData)) //only works for numerical fields !
console.log("random sub-range", getRandomSubRange(getArrFieldRanges(qData)));
console.log("stats",getArrFieldStats(qData)) //only works for numerical fields !
//correlations
// {
// a: -0.0010239766902628352,
// b: 0.0010239766902628352,
// c: 1,
// d: 1,
// sublabel: NaN,
// label: NaN
// }
//ranges
// {
// a: { min: 0, max: 4 },
// b: { min: 0, max: 4 },
// c: { min: 0, max: 4 },
// d: { min: 0, max: 4 }
// }
// random sub-range {
// a: { min: 1.4271687649273508, max: 2.9265827904030477 },
// b: { min: 1.6165176928519145, max: 2.1913277733064174 },
// c: { min: 2.530036662657582, max: 2.8261725300383054 },
// d: { min: 0.13376527561589402, max: 0.7824113880110799 }
// }
//
// stats
// {
// a: {
// sum: 20024,
// sumOfSquares: 60178,
// count: 9999,
// mean: 2.0026002600260027,
// stdDev: 1.4170370632865623
// },
// b: {
// sum: 19972,
// sumOfSquares: 59970,
// count: 9999,
// mean: 1.9973997399739973,
// stdDev: 1.4170370632865625
// },
// c: {
// sum: 19825,
// sumOfSquares: 59367,
// count: 9999,
// mean: 1.9826982698269826,
// stdDev: 1.4164043561772983
// },
// d: {
// sum: 19825,
// sumOfSquares: 59367,
// count: 9999,
// mean: 1.9826982698269826,
// stdDev: 1.4164043561772983
// }
// }