class. See the following
example. Python uses the keyword self to refer to the object itself.
# ex.py
class Toy:
def set(self, a):
self.data = a
set is executed, a class
member variable data will be created!
We can check this phenomenon by inspecting the following code. Please read the inline comments carefully.
>>> from ex import * # import ex.py
>>> x = Toy() # x points to a newly created Toy object
>>> x.data # x.data is not yet created !!!!!
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'Toy' object has no attribute 'data'
>>> x.set(10) # now, x.data is created!!!!!
>>> x.data
10
__init__(self)
# ex2.py
class Toy:
def __init__(self):
self.data = 0
def set(self, a):
self.data = a
Now, no error takes place in the following code:
>>> from ex2 import *
>>> x = Toy()
>>> x.data
0
>>> x.set(10)
>>> x.data
10
# fruit.py
class Fruit:
def __init__(self):
self.fruitType = 'fruit'
def isA(self):
return self.fruitType
class Berry(Fruit): ####### Berry inherits Fruit
def __init__(self):
self.fruitType = 'berry'
def print_type(self):
print(self.fruitType)
The following code tests the above code.
>>> from fruit import *
>>> f = Fruit()
>>> f.isA()
'fruit'
>>> b = Berry()
>>> b.isA()
'berry'
>>> b.print_type()
berry
>>> f.print_type()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'Fruit' object has no attribute 'print_type'
p of a BF is computed as follows:
\[ p = (1 - e^{- \frac{kn} m})^k \]
In the above:
bf.py so that the above code works correctly. In particular,
BF.
BF that sets parameters
correctly. The input to the constructor is \(n\), that is, the maximum number
of items that the filter would contain.
bytearray
object. Note a cell of a bytearray object is an 8-bit number (0 - 255). So,
storing 80 bits needs 10 bytes.
>>> from bf import *
>>> filter = BF(5)
>>> filter.n
5
>>> filter.m
80
>>> filter.k
8
>>> type(filter.B)
<class 'bytearray'>
>>> len(filter.B)
10
>>> filter.B
bytearray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
>>> from bf import *
>>> filter = BF(13)
>>> filter.n
13
>>> filter.m
208
>>> filter.k
8
>>> filter.B
bytearray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
bf.py so that the your code works as follows.
set_one_at(), set_zero_at(), get_at() must
be implemented to take O(1) time.
Think about accessing the right element. Use the right index in the bytearray and read/write only that element.
>>> import bf ## you need these three lines
>>> from bf import * ## for constant reloading
>>> from importlib import reload
>>> B = bytearray([0]*2)
>>> bprint(B) ## write function bprint
0000000000000000
>>> reload(bf) ## whenever you modify your bf.py
>>> from bf import * ## you can reload the new code
>>> hprint(B) ## write function hprint
00 00
>>> reload(bf)
>>> from bf import *
>>> set_one_at(B, 0) ## write function set_one_at
>>> bprint(B)
1000000000000000
>>> set_one_at(B, 14)
>>> bprint(B)
1000000000000010
>>> reload(bf)
>>> from bf import *
>>> set_zero_at(B, 0) ## write funtion set_zero_at
>>> bprint(B)
0000000000000010
>>> hprint(B)
00 02
>>> reload(bf)
>>> from bf import * ## write function get_at that returns an integer
>>> t = get_at(B, 14)
>>> t
1
>>> type(t)
<class 'int'>
>>> get_at(B, 13)
0
sha256 to compute the
hashes.
In particular,
conv function in bf.py that works as follows:
>>> from bf import *
>>> conv(bytearray([0,0,0,7]))
7
>>> conv(bytearray([0,0,1,0]))
256
>>> conv(bytearray([0,1,0,0]))
65536
>>> conv(bytearray([0,1,0]))
too small input!
>>> conv(b"1234")
825373492
>>> filter = BF(5)
>>> filter.hashes(b"hello")
[754077114, 1605411598, 652753706, 3317293726, 454434396, 531055198, 1929655138, 2475399204]
Note each number is too big, since B contains 80 bits (i.e., m = 80). So we
take mod m. Finally,
>>> from bf import *
>>> filter = BF(5)
>>> filter.hashes(b"hello")
[74, 78, 26, 46, 76, 78, 18, 4]
add and
check!
>>> from bf import *
>>> filter = BF(5)
>>> filter.add(b"hello")
>>> bprint(filter.B)
00001000000000000010000000100000000000000000001000000000000000000000000000101010
>>> filter.check(b"hello")
True
>>> filter.check(b"Hello")
False
>>> filter.check(b"world")
False
>>> filter.add(b"world")
>>> bprint(filter.B)
00001000000000000010001000100001001000000000001100000000100000000000000100101010
>>> hprint(filter.B)
08 00 22 21 20 03 00 80 01 2a
>>> filter.check(b"world")
True
>>> filter.check(b"Hello")
False
# test_bf.py
from bf import *
blacklist = open("suspicious.txt").readlines()
# number of items in the blacklist
n = len(blacklist)
#======================================================================
# Creating a BF from the blacklist
# prepare a BF
filter = BF(n)
for url in blacklist:
url = url.strip() # remove white space
if not url or url[0] == '#': # we need to ignore empty strings or comments
continue
url = url.encode() # Call encode() to convert string into bytes
filter.add(url)
# Let's check how big is the size of BF B
print("BF size is", len(filter.B), "bytes!")
#======================================================================
# Inspect the observed domains based on BF
observed = open("observed.txt").readlines()
for url in observed:
url = url.strip()
if not url or url[0] == '#':
continue
url = url.encode()
if filter.check( url ): # Now call check function
print( url )
Your code should have the following results:
$ wc suspicious.txt 1870 1940 50164 suspicious.txt $ python3 test_bf.py BF size is 3740 bytes! b'gsebqsi.ru' b'pagaldaily.com' b'4kqd3hmqgptupi3p.nameuser.site' b'4kqd3hmqgptupi3p.chargecar.vip' b'celticno1.dish' b'cerberhhyed5frqa.fkri48.win' b'4kqd3hmqgptupi3p.k7oud1.top' b'teamoxsiempre.android'
sort suspicious.txt > sorted_sus.txt sort observed.txt > sorted_obs.txt comm -12 sorted_sus.txt sorted_obs.txt
sort comannd will sort the lines in
suspicoius.txt and store them in sorted_sus.txt
(using the redirection).
comm command will compare the two files.
sorted_sus.txt.
sorted_obs.txt.
comm will print out only the lines appearing in both
files.
# # # # # # # # 4kqd3hmqgptupi3p.chargecar.vip 4kqd3hmqgptupi3p.k7oud1.top 4kqd3hmqgptupi3p.nameuser.site cerberhhyed5frqa.fkri48.win gsebqsi.ru pagaldaily.comThere are roughly 3700 domains in the observed list. We have two false positive cases:
celticno1.dish and
teamoxsiempre.android in the BF output. Note that 2/3700 is roughly 0.00054, which
is quite consistent with the false positive parameter.
bf.py as shown in the submit command below.
~/bin/submit -c=IT432 -p=lab01 bf.py lab01_report.docx