Tags
For Pi Day I published a post on my main blog, Logos Con Carne. Here is the Python code behind the data generated in that post.
The first program generates a frequency count of the digits in pi as well as frequency counts of pairs of digits:
'''\ data[digit-n]: [ count , percentage , frequency-data[digit-n]: [ count , percentage ] ] November 2015 ''' from sys import stdin, stdout, stderr, argv from os import path from datetime import datetime BasePath = r'C:\my\path\to\this\stuff' InputFile = 'pi.lst' OutputFile = 'pi-1.out' def pass_one (data): '''Input digit data; count digits.''' t0 = datetime.now() fn = path.join(BasePath, InputFile) fp = open(fn, 'r') try: last_char = None lines = 0 chars = 0 # Read pi digits (line by line)... for txt in fp: lines += 1 txt = txt.strip() for n in txt: chars += 1 n = ord(n) - ord('0') # Get the dataset for this digit... d = data[n] # Bump the digit frequency count... d[0] += 1 # Bump the previous digit frequency count... if last_char != None: d[2][last_char][0] += 1 last_char = n if 999999 < lines: break except: raise finally: fp.close() # Return result... t1 = datetime.now() return {'t0':t0, 't1':t1, 'lines':lines, 'chars':chars} def pass_two (data, r1): '''Calculate frequency count percentages.''' t0 = datetime.now() cs = float(r1['chars']) # Calculate percentages... for dx,d in enumerate(data): d[1] = float(100) * (float(d[0]) / cs) for dx,d in enumerate(data): d[1] = float(100) * (float(d[0]) / cs) for px,p in enumerate(d[2]): p[1] = float(100) * (float(p[0]) / cs) # Return result... t1 = datetime.now() return {'t0':t0, 't1':t1} def print_data (data, r1, r2): '''Print data.''' t0 = datetime.now() e1 = (r1['t1']-r1['t0']).total_seconds() e2 = (r2['t1']-r2['t0']).total_seconds() fn = path.join(BasePath, OutputFile) fp = open(fn, 'w') try: print >> fp, '%(t0)s' % r1 print >> fp, '%(lines)d lines, %(chars)d chars' % r1 print >> fp, 'Pass-1: %7.3f seconds' % e1 print >> fp, 'Pass-2: %7.3f seconds' % e2 print >> fp # Print digit frequency table... for dx,d in enumerate(data): s = '%d: %8d (%9.6f, %+.6f)' t = (dx, d[0], d[1], float(10)-d[1]) print >> fp, s % t print >> fp print >> fp print >> fp # Print previous digit frequency tables... for dx,d in enumerate(data): s = '%d: %8d (%9.6f)' t = (dx, d[0], d[1]) print >> fp, s % t for px,p in enumerate(d[2]): s = '%d_%d: %8d (%.6f, %+.6f)' t = (dx, px, p[0], p[1], float(1)-p[1]) print >> fp, s % t print >> fp print >> fp except: raise finally: fp.close() t1 = datetime.now() return {'t0':t0, 't1':t1} Data = [[0,0,[[0,0] for px in range(10)]] for dx in range(10)] # Do the thing... result1 = pass_one(Data) result2 = pass_two(Data, result1) # Emit the result... result3 = print_data(Data, result1, result2) '''eof'''
The second program looks for repeating sequences of digits:
'''\ data[digit-n]: [ sequence-data[length-n]: { count } ] November 2015 ''' from sys import stdin, stdout, stderr, argv from os import path from datetime import datetime BasePath = r'C:\my\path\to\this\stuff' InputFile = 'pi.lst' OutputFile = 'pi-2.out' MaxLines = 9999999 def pass_one (data): '''Input digit data; count digits.''' t0 = datetime.now() fn = path.join(BasePath, InputFile) fp = open(fn, 'r') try: lines = 0 chars = 0 last_char = None seq_length = 0 # Read pi digits (line by line)... for txt in fp: lines += 1 txt = txt.strip() for n in txt: chars += 1 n = ord(n) - ord('0') # ... if last_char == n: seq_length += 1 else: if 1 < seq_length: d = data[last_char] if seq_length not in d: d[seq_length] = 1 else: d[seq_length] += 1 last_char = n seq_length = 1 if MaxLines < lines: break if 1 < seq_length: d = data[last_char] if seq_length not in d: d[seq_length] = 1 else: d[seq_length] += 1 except: raise finally: fp.close() # Return result... t1 = datetime.now() return {'t0':t0, 't1':t1, 'lines':lines, 'chars':chars} def print_data (data, r1): '''Print data.''' t0 = datetime.now() e1 = (r1['t1']-r1['t0']).total_seconds() fn = path.join(BasePath, OutputFile) fp = open(fn, 'w') try: print >> fp, '%(t0)s' % r1 print >> fp, '%(lines)d lines, %(chars)d chars' % r1 print >> fp, 'Pass-1: %7.3f seconds' % e1 print >> fp # Print... for dx,d in enumerate(data): print >> fp, 'Digit: %d' % dx for k in sorted(d): print >> fp, '|-%s: %s' % (k, d[k]) print >> fp print >> fp print >> fp except: raise finally: fp.close() t1 = datetime.now() return {'t0':t0, 't1':t1} Data = [{} for dx in range(10)] # Do the thing... result1 = pass_one(Data) # Emit the result... result3 = print_data(Data, result1) '''eof'''
Both programs assume a text file (pi.lst
) containing the digits of pi. The digits are arranged in lines, allowing the file to be read line-by-line. No assumptions are made about the length of the lines or the count of lines.
The MaxLines
global variable allows throttling the amount read for testing and development.