mirror of
https://github.com/seigler/dash-docs
synced 2025-07-26 17:26:12 +00:00
Add python script for running bloom filter example
This commit is contained in:
parent
1e3bb6bb43
commit
86d1f40d88
1 changed files with 105 additions and 0 deletions
105
scripts/bloom_filter.py
Normal file
105
scripts/bloom_filter.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
#!/usr/bin/env python
|
||||
from math import log
|
||||
from bitarray import bitarray # from pypi.python.org/pypi/bitarray
|
||||
import pyhash # from https://github.com/flier/pyfasthash
|
||||
|
||||
# Based on BIP-37
|
||||
# https://github.com/QuantumExplorer/bips/blob/master/bip-0037.mediawiki
|
||||
|
||||
# Defined in bloom.h
|
||||
# https://github.com/dashpay/dash/blob/master/src/bloom.h#L17-#L18
|
||||
MAX_BLOOM_FILTER_SIZE = 36000
|
||||
MAX_HASH_FUNCS = 50
|
||||
|
||||
# Set of flags that control how matched items are added to the filter (per BIP-37)
|
||||
# https://github.com/dashpay/dash/blob/master/src/bloom.h#L24
|
||||
nFlags = 0
|
||||
|
||||
nElements = 1 # Number of elements
|
||||
nFPRate = 0.0001 # False positive rate
|
||||
|
||||
nFilterBytes = int(min((-1 / log(2)**2 * nElements * log(nFPRate)) / 8, MAX_BLOOM_FILTER_SIZE))
|
||||
|
||||
# Calculate the number of hash functions to use in the filter
|
||||
# Limit the maximum number to 50 per BIP-37
|
||||
nHashFuncs = int(min(nFilterBytes * 8 / nElements * log(2), MAX_HASH_FUNCS))
|
||||
|
||||
murmur3 = pyhash.murmur3_32()
|
||||
|
||||
TEST_TXID = "019f5b01d4195ecbc9398fbf3c3b1fa9bb3183301d7a1fb3bd174fcfa40a2b65"
|
||||
|
||||
def bloom_hash(nHashNum, data):
|
||||
seed = (nHashNum * 0xfba4c795 + nTweak) & 0xffffffff
|
||||
return( murmur3(data, seed=seed) % (nFilterBytes * 8) )
|
||||
|
||||
|
||||
# Bloom Filter creation
|
||||
def create_filter(nTweak):
|
||||
print('Creating bloom filter')
|
||||
vData = nFilterBytes * 8 * bitarray('0', endian="little")
|
||||
|
||||
data_to_hash = TEST_TXID
|
||||
data_to_hash = data_to_hash.decode("hex")
|
||||
|
||||
print('Filter bytes: {}; Hash functions: {}'.format(nFilterBytes, nHashFuncs))
|
||||
|
||||
print(" Filter (As Bits)")
|
||||
print("nHashNum nIndex Filter 0123456789abcdef")
|
||||
print("~~~~~~~~ ~~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~")
|
||||
for nHashNum in range(nHashFuncs):
|
||||
nIndex = bloom_hash(nHashNum, data_to_hash)
|
||||
|
||||
## Set the bit at nIndex to 1
|
||||
vData[nIndex] = True
|
||||
|
||||
## Debug: print current state
|
||||
print(' {0:2} {1:2} {2} {3}'.format(
|
||||
nHashNum,
|
||||
hex(int(nIndex)),
|
||||
vData.tobytes().encode("hex"),
|
||||
vData.to01()
|
||||
))
|
||||
|
||||
print('Bloom filter: {}\n'.format(vData.tobytes().encode("hex")))
|
||||
|
||||
|
||||
# Bloom Filter evaluation
|
||||
def evaluate_filter():
|
||||
print('Evaluating bloom filter')
|
||||
vData = bitarray(endian='little')
|
||||
vData.frombytes("b50f".decode("hex"))
|
||||
nHashFuncs = 11
|
||||
nTweak = 0
|
||||
nFlags = 0
|
||||
|
||||
def contains(nHashFuncs, data_to_hash):
|
||||
for nHashNum in range(nHashFuncs):
|
||||
## bloom_hash as defined in previous section
|
||||
nIndex = bloom_hash(nHashNum, data_to_hash)
|
||||
|
||||
if vData[nIndex] != True:
|
||||
print("MATCH FAILURE: Index {0} not set in {1}\n".format(
|
||||
hex(int(nIndex)),
|
||||
vData.to01()
|
||||
))
|
||||
return False
|
||||
print("MATCH SUCCESS\n")
|
||||
|
||||
## Test 1: Same TXID as previously added to filter
|
||||
data_to_hash = TEST_TXID
|
||||
print('\nChecking: {}'.format(data_to_hash))
|
||||
data_to_hash = data_to_hash.decode("hex")
|
||||
contains(nHashFuncs, data_to_hash)
|
||||
|
||||
## Test 2: Arbitrary string
|
||||
data_to_hash = "1/10,000 chance this ASCII string will match"
|
||||
print('Checking: {}'.format(data_to_hash))
|
||||
contains(nHashFuncs, data_to_hash)
|
||||
|
||||
|
||||
# Tweak is a random value added to the seed value in the hash function
|
||||
# used by the bloom filter
|
||||
nTweak = 0
|
||||
|
||||
create_filter(nTweak)
|
||||
evaluate_filter()
|
Loading…
Add table
Add a link
Reference in a new issue