#!/usr/bin/env python
import sys, os

print "opening", sys.argv[1]
dh = open(sys.argv[1])
data = dh.read().upper()
dh.close()

statdict = {}
didict = {}
tridict = {}
for i in range(256):
	statdict[chr(i)] = 0



ct = 0
dct = 0
tct = 0
for i,ch in enumerate(data):
	if ch in statdict:
		statdict[ch] += 1
		ct += 1
		if i+1 < len(data) and data[i+1] in statdict:
			dig = "%s%s" %(ch,data[i+1])
			dct += 1
			if dig in didict:
				didict[dig]+=1
			else:
				didict[dig] = 1
			if i+2 < len(data) and data[i+2] in statdict:
				trig = "%s%s" %(dig,data[i+2])
				tct += 1
				if trig in tridict:
					tridict[trig]+=1
				else:
					tridict[trig]=1
			
stats = sorted([(v,k) for k,v in statdict.iteritems()])
dstats = sorted([(v,k) for k,v in didict.iteritems()])
tstats = sorted([(v,k) for k,v in tridict.iteritems()])
for v,k in reversed(stats):
	print "%s OCCURRED\t%d (%.1f pct)" % (repr(k), v, 100*v/float(ct))

print ""
for i,(v,k) in enumerate(reversed(dstats)):
	pct = 100*v/float(dct)
	print "%s OCCURRED\t%d (%.1f pct)" % (repr(k), v, pct)
	if pct < 0.4:
		break
print ""
for i,(v,k) in enumerate(reversed(tstats)):
	pct = 100*v/float(tct)
	print "%s OCCURRED\t%d (%.1f pct)" % (repr(k),v,pct)
	if i > 20:
		break
