from examples import bf_basic

# Load a text file to scan...
fn = r'C:\users\wyrd\blog\hcc\source\chriscarol.txt'
with open(fn, mode='r', encoding='utf8') as fp:
    text = fp.read()

# Break it into a list of words...
char_test = lambda c: (c.isspace() or c in '"\'.,;:/?!')

words = set()
word = []

for ch in text:
    if 0 < len(word):
        if char_test(ch):
            words.add(''.join(word).lower())
            word = []
        else:
            word.append(ch)
    else:
        if not char_test(ch):
            word.append(ch)

if 0 < len(word):
    words.add(''.join(word))

#words = list(sorted(words))
#print(words)
#print()

# Create basic Bloom filter...
bf = bf_basic(64)

# Add words to filter...
for w in ['who', 'what', 'why', 'where', 'when']:
    bf.add(w)

# Test words against the filter...
for word in words:
    result = bf.query(word)
    if result:
        print(f'{word}')
print()

