Tags

For Pi Day I published a post on my main blog, Logos Con Carne. Here is the Python code behind the data generated in that post.

The first program generates a frequency count of the digits in pi as well as frequency counts of pairs of digits:

'''\
data[digit-n]:
[   count
,   percentage
,   frequency-data[digit-n]:
[   count
,   percentage
]
]

November 2015
'''
from sys import stdin, stdout, stderr, argv
from os import path
from datetime import datetime

BasePath = r'C:\my\path\to\this\stuff'
InputFile  = 'pi.lst'
OutputFile = 'pi-1.out'

def pass_one (data):
'''Input digit data; count digits.'''
t0 = datetime.now()
fn = path.join(BasePath, InputFile)
fp = open(fn, 'r')
try:
last_char = None
lines = 0
chars = 0
# Read pi digits (line by line)...
for txt in fp:
lines += 1
txt = txt.strip()
for n in txt:
chars += 1
n = ord(n) - ord('0')
# Get the dataset for this digit...
d = data[n]
# Bump the digit frequency count...
d[0] += 1
# Bump the previous digit frequency count...
if last_char != None:
d[2][last_char][0] += 1
last_char = n
if 999999 < lines: break
except: raise
finally: fp.close()
# Return result...
t1 = datetime.now()
return {'t0':t0, 't1':t1, 'lines':lines, 'chars':chars}

def pass_two (data, r1):
'''Calculate frequency count percentages.'''
t0 = datetime.now()
cs = float(r1['chars'])
# Calculate percentages...
for dx,d in enumerate(data):
d[1] = float(100) * (float(d[0]) / cs)
for dx,d in enumerate(data):
d[1] = float(100) * (float(d[0]) / cs)
for px,p in enumerate(d[2]):
p[1] = float(100) * (float(p[0]) / cs)
# Return result...
t1 = datetime.now()
return {'t0':t0, 't1':t1}

def print_data (data, r1, r2):
'''Print data.'''
t0 = datetime.now()
e1 = (r1['t1']-r1['t0']).total_seconds()
e2 = (r2['t1']-r2['t0']).total_seconds()
fn = path.join(BasePath, OutputFile)
fp = open(fn, 'w') try: print >> fp, '%(t0)s' % r1
print >> fp, '%(lines)d lines, %(chars)d chars' % r1
print >> fp, 'Pass-1: %7.3f seconds' % e1
print >> fp, 'Pass-2: %7.3f seconds' % e2
print >> fp
# Print digit frequency table...
for dx,d in enumerate(data):
s = '%d: %8d (%9.6f, %+.6f)'
t = (dx, d[0], d[1], float(10)-d[1])
print >> fp, s % t
print >> fp
print >> fp
print >> fp
# Print previous digit frequency tables...
for dx,d in enumerate(data):
s = '%d: %8d (%9.6f)'
t = (dx, d[0], d[1])
print >> fp, s % t
for px,p in enumerate(d[2]):
s = '%d_%d: %8d (%.6f, %+.6f)'
t = (dx, px, p[0], p[1], float(1)-p[1])
print >> fp, s % t
print >> fp
print >> fp
except:
raise
finally:
fp.close()
t1 = datetime.now()
return {'t0':t0, 't1':t1}

Data = [[0,0,[[0,0] for px in range(10)]] for dx in range(10)]
# Do the thing...
result1 = pass_one(Data)
result2 = pass_two(Data, result1)
# Emit the result...
result3 = print_data(Data, result1, result2)

'''eof'''


The second program looks for repeating sequences of digits:

'''\
data[digit-n]:
[   sequence-data[length-n]:
{   count
}
]

November 2015
'''
from sys import stdin, stdout, stderr, argv
from os import path
from datetime import datetime

BasePath = r'C:\my\path\to\this\stuff'
InputFile  = 'pi.lst'
OutputFile = 'pi-2.out'

MaxLines = 9999999

def pass_one (data):
'''Input digit data; count digits.'''
t0 = datetime.now()
fn = path.join(BasePath, InputFile)
fp = open(fn, 'r')
try:
lines = 0
chars = 0
last_char = None
seq_length = 0
# Read pi digits (line by line)...
for txt in fp:
lines += 1
txt = txt.strip()
for n in txt:
chars += 1
n = ord(n) - ord('0')
# ...
if last_char == n:
seq_length += 1
else:
if 1 < seq_length:
d = data[last_char]
if seq_length not in d:
d[seq_length] = 1
else:
d[seq_length] += 1
last_char = n
seq_length = 1
if MaxLines < lines:
break
if 1 < seq_length:
d = data[last_char]
if seq_length not in d:
d[seq_length] = 1
else:
d[seq_length] += 1
except: raise
finally: fp.close()
# Return result...
t1 = datetime.now()
return {'t0':t0, 't1':t1, 'lines':lines, 'chars':chars}

def print_data (data, r1):
'''Print data.'''
t0 = datetime.now()
e1 = (r1['t1']-r1['t0']).total_seconds()
fn = path.join(BasePath, OutputFile)
fp = open(fn, 'w')
try:
print >> fp, '%(t0)s' % r1
print >> fp, '%(lines)d lines, %(chars)d chars' % r1
print >> fp, 'Pass-1: %7.3f seconds' % e1
print >> fp
# Print...
for dx,d in enumerate(data):
print >> fp, 'Digit: %d' % dx
for k in sorted(d):
print >> fp, '|-%s: %s' % (k, d[k])
print >> fp
print >> fp
print >> fp
except:
raise
finally:
fp.close()
t1 = datetime.now()
return {'t0':t0, 't1':t1}

Data = [{} for dx in range(10)]
# Do the thing...
result1 = pass_one(Data)
# Emit the result...
result3 = print_data(Data, result1)

'''eof'''

Both programs assume a text file (pi.lst) containing the digits of pi. The digits are arranged in lines, allowing the file to be read line-by-line. No assumptions are made about the length of the lines or the count of lines.

The MaxLines global variable allows throttling the amount read for testing and development.