Upload
others
View
3
Download
0
Embed Size (px)
Citation preview
Computer)Science)Fundamentals)107))Lecture)20)Contents)
Hashing(–(Why?(
Hash(Func0ons(–(folding,(mid7square(
Hash(Func0ons(–(keys(which(are(strings(
Load(Factor(
Collisions(and(Collision(Resolu0on(–(open(addressing(methods,(separate(chaining(
(
(
Textbook(pages(163(7(173(
(
2(Why)Look)for)Be>er)((If(our(collec0on(of(data(is(in(sorted(order(we(can(search(for(an(item(in(O(log(n)(0me(using(the(binary(search(algorithm.)
Can(we(improve(on(this(and(search(in(constant(0me(O(1)?(
Can(we(insert(and(deleted(items(in(constant(0me(O(1)?(
In(the(above(situa0on((items(are(sorted),(what(is(the(cost(of(inser0ng(and(dele0ng(items,(searching(for(items(which(are(not(in(the(collec0on?(
3(What)is)a)Hash)Table?)((A(collec0on(of(items(which(are(stored(in(such(a(way(making(the(items(easy(to(access.)
Ini0ally(every(slot(is(empty.(
Each(posi0on((each(slot)(in(the(hash(table(can(hold(one(item(and(is(named((indexed)(by(an(integer(value(star0ng(from(0.(
0""""""""
None
1""""""""
None
2""""""""
None
3""""""""
None
4""""""""
None
5""""""""
None
6""""""""
None
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
None
4(What)is)a)Hash)FuncCon?)((Every(item(which(is(added(to(the(hash(table(has(a(key((keys(must(be(unique)(and(a(corresponding(value.()
The(hash(func0on(is(the(mapping(between(an(item(key(and(the(slot(where(the(item(is(stored.(
To(add(an(item(to(the(hash(table(there(is(a(mapping(between(the(key(of(the(item(and(a(slot(in(the(table.(
0""""""""
None
1""""""""
None
2""""""""
None
3""""""""
None
4""""""""
None
5""""""""
None
6""""""""
None
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
None
5(Mapping)the)Key)into)a)Hash)Table)Slot))((
can(be(mapped(as(follows:(
For(example,(using(a(hash(table(of(size(13(and(mapping(integer(keys((the(items(are(just(the(keys(in(this(example)(using(the(key(modulus(the(size(of(the(table,(the(following(keys(
0""""""""
26
1""""""""
None
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
None
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
77
54 !26!94!17!77!31!
This(mapping(uses(the(remainder(method((i.e.,((key(%(13).(
6(Mapping)the)Key)into)a)Hash)Table)Slot))(( A(hash(func0on(takes(the(key((which(must(be(unique)(
and(returns(the(slot(number(in(the(hash(table.(
hash(item_key))=)…)%)m)
All(hash(func0ons(will(have(%(table_size((m)(as(part(of(the(formula(since(the(resul0ng(slot(number(must(be(within(the(range(of(the(table(size.(
The(result(of(applying(the(hash(func0on(to(the(key(is(an(index(into(the(table.(
7(Load)Factor)of)the)Hash)Table)((
The(load(factor((λ)(of(the(hash(table(is(the(number(of(items(in(the(table(divided(by(the(size(of(the(table.(
0""""""""
26
1""""""""
None
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
None
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
77
λ)=)6)/)13)
For(the(example(hash(table(below(has(a(load(factor(of(
8(Mapping)the)Key)–)Collisions)((
If(I(now(wish(to(insert(the(key(value,(44,(there(is(a(problem(((hash(44)(gives(the(index(5).(
Using(the(hash(func0on:(
0""""""""
26
1""""""""
None
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
None
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
77
This(problem(is(called(a('collision'((or('clash')(
hash(item_key))=)item_key)%)13)
6(items(are(mapped(into(the(table(below,((
9(Perfect)Hash)FuncCons)((
One(way(to(achieve(this(is(to(have(a(hash(table(which(is(big(enough(to(accommodate(the(full(range(of(the(keys,(e.g.,(if(the(keys(were(the(eight(digit(student(ID(numbers(we(would(need(an(108(sized(table.(((
A(hash(func0on(which(uniformly(distributes(items(over(the(whole(hash(table(area(is(a(perfect(hash(func0on.(((
A(perfect(hash(func0on(is(able(to(map(m(dis0nct(keys(into(a(table(of(size,(m,(with(no(collisions.(
10(Perfect)Hash)FuncCons)((
This(technique(can(be(very(wasteful(as(the(number(of(students(to(be(stored((and(retrieved)(may(be(much(smaller(than(the(actual(table(size.(
We(need(some(sort(of(compression(from(the(full(range(of(the(keys(into(the(number(of(hash(table(slots.(
Possible(key(values(
Actual(key((values(
11(A)Good)Hash)FuncCons)((
Quick(and(easy(to(compute.(
Achieves(even(distribu0on(of(key(values((uniformity).(
Ideally(we(want(a(1:1(correspondence(of(the(key(values(to(the(size(of(the(hash(table.(
12(Hash)FuncCons)–)Folding)Method)((
For(example,(if(the(keys(are(eight(digit(phone(numbers,(e.g.,(468(23496((
This(type(of(hash(func0on(takes(different(chunks(of(the(key((in(different(order),(performs(some(computa0on(on(the(chunks,(and,(perhaps,(adds(the(results(together.(
The(number(may(be(split(into(groups(and(summed((really(simple(hash),(e.g.,((and(finally,(using(the(remainder(method((…(%(table_size((13(in(this(example))(the(result(is:( 5(
A(hash(func0on(tries(to(use(all(parts(of(the(key(in(the(calcula0on(7(in(case(some(parts(of(the(keys(are(very(similar((causing(lots(of(collisions).(
468( 234( 96(
Sum(is(798(
13(Hash)FuncCons)–)Folding)Method)((
Some(hash(func0ons(use(shif7and7add(folding(algorithms,(e.g.,(shif(lef(2(places(and(then(add(the(next(bit(of(the(key.((Divisions(are(less(common(because(division(is(slower(than(mul0plica0on(and(addi0on.(
Hash(func0ons(need(to(use(very(quick(calcula0ons,(so,(ofen,(there(are(folding(methods(which(use(some(lef/right(bit(shifs(because(these(are(very(fast(opera0ons.(
13(
Shifing(lef(example:((shif(lef(2(bits(is(the(same(as(mul0plying(the(number(by(4(
0011012(
52( 1101002(
14(Hash)FuncCons)–)Mid)Square)Method)((
An(example(using(the(mid7square(method:((Square(the(key,(take(the(digits((except(the(lef7most(digit)(and,(finally,(modulus(of(the(table(size((13(in(this(example).(
Square(the(key(and(take(some(por0on(of(the(result.(
For(keys:( 654( 427716( 27716( 0(
653( 426409( 26409( 6(
655( 529025( 29025( 9(
key( key2( Remove(first( %(13(
15(Hash)FuncCons)–)Keys)which)are)Strings)((
The(ASCII(table(on(the(right(shows(the(numerical(representa0on(of(each(character.(
ord('a') is 97!
ord('b') is 98!
ord('c') is 99!
16(Hash)FuncCons)–)keys)which)are)strings)((
For(example:((add(the(ASCII(values(of(each(character(in(the(key(and(take(the(result(modulus(of(the(table(size.(
The(ASCII(values(of(the(characters(of(the(string(can(be(used(to(compute(the(slot(number(into(which(the(key(is(mapped.(
For(key:( "cat"( 99(+(97(+(116( 312( 0(key( Add(ord()s( (((((((Sum( %(13(
17(Hash)FuncCons)–)keys)which)are)strings)((Hash(func0on:(add(the(ASCII(values(of(each(character(in(the(key(and(take(the(result(modulus(of(the(table(size.(
def hash1(key_word, table_size):!
!…???!
!
def main():!
print("table size is 13") !!
for key_wd in ["cat","dog","abracadabra","abraabracad"]:!
!print(key_wd, hash1(key_wd, 13))!
table size is 13!cat 0!dog 2!abracadabra 3!abraabracad 3!
Using(the(above(hashing(algorithm,(which(kind(of(keys(
will(cause(collisions?(
18(Hash)FuncCons)–)Keys)which)are)Strings)((Improve(the(previous(algorithm(by(adding(a(weigh0ng(to(each(character((1(for(the(first,(2(for(the(second,(…).(
table size is 13!cat 4!dog 7!abracadabra 9!abraabracad 1!
def hash2(a_string, table_size):!
!…???!
!
!
print("table size is 13") !!
for key_word in ["cat","dog","abracadabra","abraabracad"]:!
!print(key_word, hash2(key_word, 13))!
19(Hashing)–)Collision)ResoluCon)((Perfect(hash(func0ons(are(hard(to(come(by,(especially(if(you(do(not(know(the(input(keys(beforehand.(
You(will(always(need(a(systema0c(way(of(handling(collisions.(
One(method(is(to(systema0cally(find(an(empty(slot(in(the(table,(and(put(the(value(in(this(slot.((This(technique(is(called('open(addressing'.(((For(example,(look(sequen0ally(un0l(you(find(a(slot(which(is(empty.(
"open(addressing"(refers(to(the(fact(that(the(loca0on(("address")(of(the(item(is(not(determined(by(its(hash(value.(
20(Collision)ResoluCon)–)Linear)Probing)((
hash(key,(0)(=(key(%(m(((#may(be(a(different(hash(func0on(hash(key,(1)(=((hash(key,(0)(+(1)(%(m(hash(key,(2)(=((hash(key,(0)(+(2)(%(m(hash(key,(3)(=((hash(key,(0)(+(3)(%(m(…(hash(key,(i)(=((hash(key,(0)(+(i)(%(m(
One(method(of(collision(resolu0on(is(to(look(sequen0ally(un0l(you(find(a(slot(which(is(empty.(
The(number(of(probes(is(the(number(of(amempts(made(un0l(an(empty(slot(posi0on(is(found.(
The(probe(sequence(is(the(sequence(of(slots(which(are(checked(un0l(an(available(slot(is(found.(
21(Collision)ResoluCon)–)Linear)Probing)((Look(sequen0ally(un0l(you(find(a(slot(which(is(empty,(e.g.,(using(the(following(table(and(the(hash(func0on(below:((
0""""""""
26
1""""""""
None
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
None
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
77
44(–(goes(into(slot(6((51(–(goes(into(slot(1((
0""""""""
26
1""""""""
51
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
44
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
77
Inser0ng(keys(44,(51(into(the(above(hash(table:(((
hash(key)(=(key(%(13(
22(Collision)ResoluCon)–)Clustering)((Clustering(happens(when(regions(of(the(table(become(very(full(and(there(are(long(runs(of(filled(slots.((Clustering(slows(down(performance.(
This(becomes(worse(and(worse(because(as(more(elements(are(hashed(to(the('clustered'(areas(these(increase(the(clustering(even(more.(
0""""""""
26
1""""""""
51
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
44
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
77
clustering(
Another('open(addressing'(approach:((instead(of(looking(for(an(empty(slot(sequen0ally,(we(can(skip(slots,(i.e.,(instead(of(looking(in(the(next(slot,(the(slot(afer(that,(etc.,(we(can(skip(slots,(e.g.,(skip(3.( hash(k,(i)(=((h(k,(0)(+(3)*)i)(%(m(
23(Collision)ResoluCon)–)QuadraCc)Probing)((Another(method(of(resolving(collisions(using('open(addressing'(is(to(add(the(square(of(1(to(the(slot(number((first(hash(result),(then(add(the(square(of(2,(and(so(on.(
hash(key(,(0)(=(key(%(m(((#may(be(different(hash(key(,(1)(=((hash(key,(0)(+(12)(%(m(hash(key(,(2)(=((hash(key(,(0)(+(22)(%(m(hash(key(,(3)(=((hash(key(,(0)(+(32)(%(m(…(hash(key(,(i)(=((hash(key(,(0)(+(i2)(%(m(
The(probe(sequence(is(the(sequence(of(slots(which(are(checked(un0l(an(available(slot(is(found.(
24(Collision)ResoluCon)–)QuadraCc)Probing)((Another(method(of('open(addressing'(is(to(add(the(square(of(1(to(the(slot(number,(then(add(the(square(of(2,(and(so(on.(
0""""""""
26
1""""""""
None
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
None
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
77
Inser0ng(keys(43,(25(into(the(above(hash(table:(((
43(–(goes(into(slot(8((probe(sequence(is(4,(5,(8)((25(–(goes(into(slot(11((probe(sequence(is(12,(0,(3,((
8,(2,(11)(((((
The(clustering(s0ll(happens(but(in(different(regions(of(the(table.(
0""""""""
26
1""""""""
None
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
None
7""""""""
None
8""""""""
43
9""""""""
None
10""""""""
None
11
25
12""""""""
77
hash(key)(=(key(%(13((
25(Collision)ResoluCon)–)Double)Hashing)(( We(have(looked(at(sequen0al(linear(probing(where(you(look(sequen0ally(un0l(you(find(a(slot(which(is(empty.(((Then(we(looked(at(different(versions(of(this(where(we(skipped(some(slots(e.g.,("plus73"(and(quadra0c(probing.(((These('open(addressing'(versions(spread(the(clustering(in(a(more(even(manner.(
We(can,(instead,(apply(a(second(hash(func0on(to(the(key(and(use(the(resul0ng(value(as(our(skip(number.((((In(this(way(the(number(of(skips(is(dependent(on(the(key(and(will(be(different(for(different(keys.(
26(Collision)ResoluCon)–)Double)Hashing)((Using(the(two(hash(func0ons(on(the(table(below:(
0""""""""
26
1""""""""
None
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
None
7""""""""
None
8""""""""
None
9""""""""
None
10""""""""
None
11
None
12""""""""
77
43(–(goes(into(slot(10((h2(43)(=(4(and(the(probe((sequence(is(4,(10)((
25(–(goes(into(slot(8((h2(25)(=(12(and(the(probe((sequence(is(12,(2,(5,(8)((
!
hash1(key)(=(key(%(13(hash2(key)(=(7(–(key(%(7(
0""""""""
26
1""""""""
None
2""""""""
54
3""""""""
94
4""""""""
17
5""""""""
31
6""""""""
None
7""""""""
None
8""""""""
25
9""""""""
None
10""""""""
43
11
None
12""""""""
77
Inser0ng(keys(43,(25(into(the(above(hash(table:(((
27(Collision)ResoluCon)–)Separate)Chaining)((Another(way(of(handling(collisions(is(to(use(chaining(where(every(element(of(the(hash(table(is(a(list(and(any(items(which(are(hashed(to(a(slot(are(added(to(the(list.(
If(the(hash(func0on(is(good(and(if(the(table(has(a(load(factor(which(is(reasonable,(the(lists(in(each(node(of(the(hash(table(will(be(quite(small.(Therefore(the(Big(O(for(inser0ng,(dele0ng(or(searching(for(an(item(will(be(reasonable((close(to(O(1)).(
Each(element(of(the(hash(table(could(be(a(linked(list(or(a(Python(list(object.(
Remember:(the(costs(of(maintaining(the(links(are(a(considera0on(especially(if(the(actual(associated(data(is(small.(
28(Collision)ResoluCon)–)Separate)Chaining)((For(example,(given(the(following(hash(table(which(uses(separate(chaining:(
inser0ng(the(keys:(((43,(69,(93,(56,(90((
0"""""""" 1"""""""" 2"""""""" 3"""""""" 4"""""""" 5"""""""" 6"""""""" 7"""""""" 8"""""""" 9"""""""" 10"""""""" 11 12""""""""
5426 93 17 7731
0"""""""" 1"""""""" 2"""""""" 3"""""""" 4"""""""" 5"""""""" 6"""""""" 7"""""""" 8"""""""" 9"""""""" 10"""""""" 11 12""""""""
5426 93 17 773143
69
93
56
90
hash(key)(=(key(%(13((
29(Python)Hash)FuncCon)Example)((((
print("Python hash values for the strings '15' to '19'")!for num in range(15, 20):!! print(num, "-", hash(str(num)))!
!print()!print("Python hash values for the integers 15 to 19") !!for num in range(15, 20):!! print(num, "-", hash(num)) ! ! !!Python(hash(values(for(the(strings('15'(to('19'(
15(7(3463324007810316761(16(7(74414834310162138642(17(7(7164126101736489360(18(7(6351382913715616161(19(7(73838919086090416598((Python(hash(values(for(the(integers(15(to(19(15(7(15(16(7(16(17(7(17(18(7(18(19(7(19)
30(Another)Hash)FuncCon)((Unix(opera0ng(system(system7level(hash(table(
def elf_hash(to_hash):!!hash_value = 0!!hex_number = 0xf0000000!!for pos in range(len(to_hash)):!! !hash_value = (hash_value << 4) + ord(to_hash[pos])!! !hi_bits = hash_value & hex_number!! !if hi_bits != 0:!! ! ! hash_value = hash_value ^ (hi_bits >> 26)!! !hash_value = hash_value & ~hi_bits!! !!!return hash_value!! !!
print("cat", elf_hash("cat"))!print("elephant", elf_hash("elephant"))!print("piglet", elf_hash("piglet"))!print("human", elf_hash("human"))!print("mig", elf_hash("mig")) ! ! ! !!
cat 27012!elephant 46589668!piglet 124773060!human 7324542!mig 29687!
~(complement(&(bit(and(
^(exclusive(or(<<(lef(shif,(>>(right(shif(