- "A probability distribution estimated from counts in datafile."
- def __init__(self, data=[], N=None, missingfn=None):
- for key,count in data:
- self[key] = self.get(key, 0) + int(count)
- self.N = float(N or sum(self.itervalues()))
- self.missingfn = missingfn or (lambda k, N: 1./N)
- def __call__(self, key):
- if key in self: return self[key]/self.N
- else: return self.missingfn(key, self.N)
+ """A probability distribution estimated from counts in datafile.
+ Values are stored and returned as log probabilities.
+ """
+ def __init__(self, data=[], estimate_of_missing=None):
+ data1, data2 = itertools.tee(data)
+ self.total = sum([int(d[1]) for d in data1])
+ for key, count in data2:
+ self[key] = log10(int(count) / self.total)
+ self.estimate_of_missing = estimate_of_missing or (lambda k, N: 1./N)
+ def __missing__(self, key):
+ return self.estimate_of_missing(key, self.total)