###################################
## statistical distributions
## 10/2004 Wolfgang.Urban@schule.at
###################################
import math
# #######################
# binomial distribution #
# ##################### #
# straightforward calculation
#
def binomial_1(k,n,p):
## special cases
if p==0.0:
if k==0: return 1.0
else: return 0.0
if p==1.0:
if k==n: return 1.0
else: return 0.0
if k<=n/2: k1=k
else: k1=n-k # fewer calculations
## binomial coefficient
fact = 1.0
for i in range(0,k1): fact = fact*(n-i)/(k1-i)
## probability
return fact*p**k*(1.0-p)**(n-k) # binomial probability
# using recursion b(k,n,p)=b(k-1,n,p)*(n-k+1)/i*p/q
# special cases omitted
#
def binomial_2(k,n,p):
prob = (1.0-p)**n
for i in range(1,k+1):
prob = prob*(n-i+1)/i*p/(1.0-p)
return prob
# to avoid overflow with large n or underflow with small p
# e.g. 0.5**1075 = 0 (large n gives result 0), we use logarithms
#
def binomial_1a(k,n,p):
## special cases
if p==0.0:
if k==0: return 1.0
else: return 0.0
if p==1.0:
if k==n: return 1.0
else: return 0.0
if k<=n/2: k1=k
else: k1=n-k
fact = 0.0
for i in range(0,k1): fact += math.log(float(n-i)/(k1-i))
result_log = fact+k*math.log(p)+(n-k)*math.log(1.0-p)
return math.exp(result_log)
# we can do the same with version 2
# p must not be 0 or 1
#
def binomial_2a(k,n,p):
q = 1.0-p
m = float(n+1)
r = math.log(p/q)
prob = n*math.log(q)
for i in range(1,k+1):
prob = prob + r + math.log(m/i-1.0)
return math.exp(prob)
# we prefer version 1a to calculate binomial probabilities.
#
binomialp = binomial_1a
# cumulative probability
# version 2a is easily adapted
#
def binomialc(k,n,p):
## special cases
if p==0.0:
return 1.0
if p==1.0:
if k==n: return 1.0
else: return 0.0
q = 1.0-p
m = float(n+1)
r = math.log(p/q)
prob = n*math.log(q)
cum = math.exp(prob)
for i in range(1,k+1):
prob += r + math.log(m/i-1.0)
cum += math.exp(prob)
return cum
# ######################
# poisson distribution #
# ######################
# poisson probability
#
def poissonp(k,m):
prob = math.exp(-m)
for i in range(1,k+1):
prob = prob*m/i # m**k/k!
return prob
# cumulative probability
#
def poissonc(k,m):
prob = math.exp(-m)
cum = prob
for i in range(1,k+1):
prob = prob*m/i
cum += prob
return cum
# #####################
# normal distribution #
# #####################
# standard normal density
#
def normald(z):
return 1.0/math.sqrt(2.0*math.pi)*math.exp(-z*z/2.0)
# standard normal cumulative from z=a to z=b, simpson's rule
#
def normal_ab(z1,z2):
if z2<z1: z1,z2 = z2,z1
z1,z2 = float(z1),float(z2)
n = 50 # subdivisions
dz = (z2-z1)/n # length of interval
cum = math.exp(-z1*z1/2.0)+math.exp(-z2*z2/2.0) # f(a=0)+f(b=z)
t = z1+dz
for i in range(1,n,2):
cum = cum+4.0*math.exp(-t*t/2.0) # 4*f(odd)
t += 2*dz
t = z1+2.0*dz
for i in range(2,n-1,2):
cum = cum+2.0*math.exp(-t*t/2.0) # 2*f(even)
t += 2*dz
cum = cum*dz/3.0
cum = cum/math.sqrt(2*math.pi)
return cum
# standard normal cumulative, mapping to positive z
#
def normalc1(z):
z = float(z)
z_old = z
z = abs(z)
if z>8.0:
if z_old<0: return 0.0
else: return 1.0
n = 100 # subdivisions
if z<1.0: n /= 2 # don't overdo...
dz = z/n # length of interval
cum = 1+math.exp(-z*z/2.0) # f(a=0)+f(b=z)
t = dz
for i in range(1,n,2):
cum = cum+4.0*math.exp(-t*t/2.0) # 4*f(odd)
t += 2*dz
t = 2.0*dz
for i in range(2,n-1,2):
cum = cum+2.0*math.exp(-t*t/2.0) # 2*f(even)
t += 2*dz
cum = cum*dz/3.0
cum = cum/math.sqrt(2*math.pi) + 1.0/2.0 # factor + started at zero
if z_old>=0.0: return cum
else: return 1.0-cum
# summing up explicit series. Precision 7-8 decimals
# z=6: 3 times as fast, z=0.5 10 times as fast as integration
#
def normalc2(z):
z = float(z)
z_old = z
z = abs(z)
if z>6:
if z_old<0: return 0.0
else: return 1.0
cum = z
sign = -1.0
fact = z
n = 1
while 1:
fact = fact*z*z/(2*n)
add = fact/(2*n+1)
cum += sign*add
if add<10**(-8): break # sufficient precision
sign = -sign
n += 1
cum = cum/math.sqrt(2*math.pi) + 1.0/2.0
if z_old>=0.0: return cum
else: return 1.0-cum
normalc = normalc2
# ####################################################
# probabilities
# a single event X has probability p
# calculate prob. of k occurrences X within n events
#
def binomialP(k,n,p):
return binomialp(k,n,p)
def poissonP(k,n,p):
return poissonp(k,n*p)
def normalP(k,n,p):
m = n*float(p)
sigma = math.sqrt(n*p*(1.0-p))
z2 = (k+0.5-m)/sigma
z1 = (k-0.5-m)/sigma
return normalc(z2)-normalc(z1)
########################################################
# return probabilities k=0..n,n,p as list
#
def binomial_list(n,p):
prob = float(1-p)**n
cum = [prob]
for i in range(1,n+1):
prob = prob*(n-i+1)/i*p/(1.0-p)
cum.append(prob)
return cum
def poisson_list(n,p):
m = n*float(p)
prob = math.exp(-m)
cum = [prob]
for i in range(1,n+1):
prob = prob*m/i
cum.append(prob)
return cum
def normal_list(n,p):
m = n*float(p)
sigma = math.sqrt(n*p*(1.0-p))
cum = []
for k in range(0,n+1):
z2 = (k+0.5-m)/sigma
z1 = (k-0.5-m)/sigma
prob = normalc(z2)-normalc(z1)
cum.append(prob)
return cum
# ####################################################
# test routines
# ####################################################
# calculate a single probability
#
def testp(k,n,p):
print binomialP(k,n,p),poissonP(k,n,p),normalP(k,n,p),
# generate a list, k=0..n, for each distribution
#
def testlist(n,p):
bi = binomial_list(n,p)
po = poisson_list(n,p)
no = normal_list(n,p)
print " k biomial normal poisson"
for i in range(n+1):
print "%3d: %6.2f %6.2f %6.2f"%(i,100*bi[i],100*no[i],100*po[i])