Implementazioni di algoritmi/Test Chi Quadrato
Aspetto
Con Test Chi Quadrato si intende uno dei test di verifica d'ipotesi usati in statistica che utilizzano la variabile casuale Chi Quadrato per verificare se l'ipotesi nulla è probabilisticamente compatibile con i dati. A seconda delle ipotesi di partenza usate per costruire il test, tali test vengono considerati a volte parametrici e altre volte non parametrici.
Implementazione in Java
[modifica | modifica sorgente] /**
* Calculates the chi-square value for N positive integers less than r
* Source: "Algorithms in C" - Robert Sedgewick - pp. 517
* NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
* since it could be wrong in about one out of ten times."
* @param randomNums a uniformly-randomly-generated array of integers
* @param r upper bound for the random range
* @return whether it is likely to be uniformly randomly generated
*/
public static boolean isRandom(int[] randomNums, int r)
{
//According to Sedgewick: "This is valid if N is greater than about 10r"
if (randomNums.length <= 10 * r)
return false;
//PART A: Get frequency of randoms
Map<Integer,Integer> ht = getFrequencies(randomNums);
//PART B: Calculate chi-square - this approach is in Sedgewick
double n_r = (double)randomNums.length / r;
double chiSquare = 0;
for (int v : ht.values())
{
double f = v - n_r;
chiSquare += f * f;
}
chiSquare /= n_r;
//PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
//This is valid if N is greater than about 10r"
return Math.abs(chiSquare - r) <= 2 * Math.sqrt(r);
}
/**
* @param nums an array of integers
* @return a Map, key being the number and value its frequency
*/
private static Map<Integer,Integer> getFrequencies(int[] nums)
{
Map<Integer,Integer> freqs = new HashMap<Integer,Integer>();
for (int x : nums)
{
if (freqs.containsKey(x))
freqs.put(x, freqs.get(x) + 1);
else
freqs.put(x, 1);
}
return freqs;
}
Implementazione in Python
[modifica | modifica sorgente]import math # for sqrt
def isRandom(randomNums, r):
""" Calculates the chi-square value for N positive integers less than r
Source: "Algorithms in C" - Robert Sedgewick - pp. 517
NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
since it could be wrong in about one out of ten times." """
#Calculate the number of samples - N
N = len(randomNums)
#According to Sedgewick: "This is valid if N is greater than about 10r"
if N <= 10 * r:
return False
N_r = N / r
chi_square = 0
#PART A: Get frequency of randoms
HT = getFrequencies(randomNums)
#PART B: Calculate chi-square - this approach is in Sedgewick
for v in HT.itervalues():
f = v - N_r
chi_square += f * f
chi_square /= N_r
#PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
#This is valid if N is greater than about 10r"
return abs(chi_square - r) <= 2 * math.sqrt(r)
def getFrequencies(nums):
""" Gets the frequency of occurance of a randomly generated array of integers
Output: A dictionary, key being the random number and value its frequency """
freqs = {}
for x in nums:
if x in freqs:
freqs[x] += 1
else:
freqs[x] = 1
return freqs