-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhopscotch.go
380 lines (318 loc) · 9.68 KB
/
hopscotch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
package hashmaps
import "fmt"
const (
reservedBits = uintptr(1)
maxNeighborhoodSize = 64 - reservedBits
)
type hBucket[K comparable, V any] struct {
hopInfo uint64
key K
val V
}
// go:inline
func flip(a uint64) uint64 {
a ^= 0xFFFFFFFFFFFFFFFF
return a
}
// go:inline
func (b *hBucket[K, V]) set(i uintptr, v bool) {
mask := uint64(1) << (i + reservedBits)
if v {
b.hopInfo |= mask
} else {
b.hopInfo &= flip(mask)
}
}
// go:inline
func (b *hBucket[K, V]) getNeighborhood() uint64 {
return b.hopInfo >> uint64(reservedBits)
}
// go:inline
func (b *hBucket[K, V]) isEmpty() bool {
return (b.hopInfo & 1) == 0
}
// go:inline
func (b *hBucket[K, V]) release() {
b.hopInfo &= flip(1)
}
// go:inline
func (b *hBucket[K, V]) occupy() {
b.hopInfo |= 1
}
// Hopscotch is a hashmap implementation which uses open addressing,
// where collisions are managed within a limited neighborhood. That is
// implemented as a dynamically growing bitmap with a default
// size of 4 and a upper bound of 63. From this it follows a constant
// lookup time for the Get function. To achieve this invariant
// linear probing is used for finding an empty slot in the table,
// if the next empty slot is not within the size of the neighborhood,
// subsequent swap of closer buckets are done or the size of the
// neighborhood is increased.
type Hopscotch[K comparable, V any] struct {
buckets []hBucket[K, V]
hasher HashFn[K]
// length stores the current inserted elements
length uintptr
// capMinus1 is used for a bitwise AND on the hash value,
// because the size of the underlying array is a power of two value
capMinus1 uintptr
neighborhoodSize uint8
maxLoad float32
nextResize uintptr
}
// NewHopscotch creates a ready to use `Hopscotch` hash map with default settings.
func NewHopscotch[K comparable, V any]() *Hopscotch[K, V] {
return NewHopscotchWithHasher[K, V](GetHasher[K]())
}
// NewHopscotchWithHasher same as `NewHopscotch` but with a given hash function.
func NewHopscotchWithHasher[K comparable, V any](hasher HashFn[K]) *Hopscotch[K, V] {
const (
DefaultNeighborhoodSize = 4
capacity = DefaultNeighborhoodSize
)
return &Hopscotch[K, V]{
buckets: make([]hBucket[K, V], capacity*2),
capMinus1: capacity - 1,
hasher: hasher,
neighborhoodSize: DefaultNeighborhoodSize,
maxLoad: DefaultMaxLoad,
nextResize: 2,
}
}
func (m *Hopscotch[K, V]) rehash(n uintptr) {
nmap := Hopscotch[K, V]{
buckets: make([]hBucket[K, V], n+uintptr(m.neighborhoodSize)),
hasher: m.hasher,
length: m.length,
capMinus1: n - 1,
neighborhoodSize: m.neighborhoodSize,
maxLoad: m.maxLoad,
nextResize: uintptr(float32(n) * m.maxLoad),
}
for i := range m.buckets {
if !m.buckets[i].isEmpty() {
homeIdx := nmap.hasher(m.buckets[i].key) & nmap.capMinus1
nmap.emplace(m.buckets[i].key, m.buckets[i].val, homeIdx)
}
}
m.buckets = nmap.buckets
m.capMinus1 = nmap.capMinus1
m.nextResize = nmap.nextResize
}
// Reserve sets the number of buckets to the most appropriate to contain at least n elements.
// If n is lower than that, the function may have no effect.
func (m *Hopscotch[K, V]) Reserve(n uintptr) {
var (
needed = uintptr(float32(n) / m.maxLoad)
newCap = uintptr(NextPowerOf2(uint64(needed)))
)
if uintptr(cap(m.buckets)) < newCap {
m.rehash(newCap)
}
}
// search looks within the neighborhood of the home bucket to find the desired key.
// This function has a constant runtime.
//
// go:inline
func (m *Hopscotch[K, V]) search(homeIdx uintptr, key K) (uintptr, bool) {
neighborhood := m.buckets[homeIdx].getNeighborhood()
for neighborhood != 0 {
if (neighborhood & 1) == 1 {
if m.buckets[homeIdx].key == key {
return homeIdx, true
}
}
homeIdx++
neighborhood >>= 1
}
return 0, false
}
// Get returns the value stored for this key, or false if there is no such value.
func (m *Hopscotch[K, V]) Get(key K) (V, bool) {
var (
homeIdx = m.hasher(key) & m.capMinus1
idx, found = m.search(homeIdx, key)
v V
)
if found {
// already inserted, update
return m.buckets[idx].val, true
}
return v, false
}
// moveCloser tries to achieve the neighborhood invariant by moving
// the given empty bucket closer to its home bucket. Therefore another
// buckets are moved more far-off. The parameter `emptyIdx`
// is a in-out variable, that is updated, if the movement was successful.
//
// go:inline
func (m *Hopscotch[K, V]) moveCloser(emptyIdx *uintptr) bool {
start := *emptyIdx - (uintptr(m.neighborhoodSize) - 1)
for homeIdx := start; homeIdx < *emptyIdx; homeIdx++ {
neighborhood := m.buckets[homeIdx].getNeighborhood()
for cIdx := homeIdx; neighborhood != 0 && cIdx < *emptyIdx; cIdx++ {
if (neighborhood & 1) == 1 {
distance := cIdx - homeIdx
// found a candidate, mark it as empty
m.buckets[cIdx].release()
// move the candidate to the empty bucket
m.buckets[*emptyIdx].occupy()
m.buckets[*emptyIdx].key = m.buckets[cIdx].key
m.buckets[*emptyIdx].val = m.buckets[cIdx].val
// update the neighborhood of the home bucket,
// because we moved the empty bucket closer
m.buckets[homeIdx].set(distance, false)
m.buckets[homeIdx].set(*emptyIdx-homeIdx, true)
// announce the new empty index
*emptyIdx = cIdx
return true
}
neighborhood >>= 1
}
}
return false
}
// emplace adds the key-value pair to the map. It does not check
// the occurrence, so it expects that the give key is not already
// in. Furthermore a resize or rehash can happen to achieve
// the neighborhood invariant.
func (m *Hopscotch[K, V]) emplace(key K, val V, homeIdx uintptr) {
var (
capacity = m.capMinus1 + 1
emptyIdx = homeIdx
)
// linear probing for the next empty bucket
for ; ; emptyIdx++ {
if emptyIdx == uintptr(cap(m.buckets)) {
// we reached the end of the bucket array, so we need to resize it
m.rehash(capacity * 2)
goto EMPLACE_AFTER_REHASH
}
if m.buckets[emptyIdx].isEmpty() {
// we found a empty bucket for the next insert, we are done
break
}
}
for {
distance := emptyIdx - homeIdx
if distance < uintptr(m.neighborhoodSize) {
// we found an empty bucket within the neighborhood.
// we are finished and can emplace the key-value pair.
m.buckets[emptyIdx].occupy()
m.buckets[emptyIdx].key = key
m.buckets[emptyIdx].val = val
m.buckets[homeIdx].set(distance, true)
return
}
// try to move the empty bucket closer, so that it is within the
// neighborhood size of the home bucket.
if !m.moveCloser(&emptyIdx) {
break
}
}
// move closer does not work, we need to find another solution!
if m.neighborhoodSize < 32 {
m.neighborhoodSize = 2 * m.neighborhoodSize
} else if m.neighborhoodSize < uint8(maxNeighborhoodSize-1) {
m.neighborhoodSize = uint8(maxNeighborhoodSize)
} else {
// that is the last hope to achieve the neighborhood invariant,
// but this case should happen really rare.
// Note: it is also possible to change the hash function here!
m.rehash(capacity * 2)
}
EMPLACE_AFTER_REHASH:
newIdx := m.hasher(key) & m.capMinus1
m.emplace(key, val, newIdx)
}
// Put maps the given key to the given value. If the key already exists its
// value will be overwritten with the new value.
// Returns true, if the element is a new item in the hash map.
func (m *Hopscotch[K, V]) Put(key K, val V) bool {
// check for resize
capacity := m.capMinus1 + 1
if m.length >= m.nextResize {
m.rehash(capacity * 2)
}
var (
homeIdx = m.hasher(key) & m.capMinus1
idx, found = m.search(homeIdx, key)
)
if found {
// already inserted, update
m.buckets[idx].val = val
return false
}
// search for empty bucket in neighborhoodSize
m.length++
m.emplace(key, val, homeIdx)
return true
}
// Remove removes the specified key-value pair from the map.
// Returns true, if the element was in the hash map.
func (m *Hopscotch[K, V]) Remove(key K) bool {
var (
homeIdx = m.hasher(key) & m.capMinus1
idx, found = m.search(homeIdx, key)
)
if !found {
return false
}
distance := idx - homeIdx
m.buckets[homeIdx].set(distance, false)
m.buckets[idx].release()
m.length--
return true
}
// Clear removes all key-value pairs from the map.
func (m *Hopscotch[K, V]) Clear() {
for i := range m.buckets {
m.buckets[i].hopInfo = 0
}
m.length = 0
}
// MaxLoad forces resizing if the ratio is reached.
// Useful values are in range [0.5-0.9].
// Returns ErrOutOfRange if `lf` is not in the open range (0.0,1.0).
func (m *Hopscotch[K, V]) MaxLoad(lf float32) error {
if lf <= 0.0 || lf >= 1.0 {
return fmt.Errorf("%f: %w", lf, ErrOutOfRange)
}
m.maxLoad = lf
m.nextResize = uintptr(float32(cap(m.buckets)) * lf)
return nil
}
// Load return the current load of the hash map.
func (m *Hopscotch[K, V]) Load() float32 {
return float32(m.length) / float32(cap(m.buckets))
}
// Size returns the number of items in the map.
func (m *Hopscotch[K, V]) Size() int {
return int(m.length)
}
// Copy returns a copy of this map.
func (m *Hopscotch[K, V]) Copy() *Hopscotch[K, V] {
newM := &Hopscotch[K, V]{
buckets: make([]hBucket[K, V], cap(m.buckets)),
capMinus1: m.capMinus1,
length: m.length,
hasher: m.hasher,
neighborhoodSize: m.neighborhoodSize,
maxLoad: m.maxLoad,
nextResize: m.nextResize,
}
copy(newM.buckets, m.buckets)
return newM
}
// Each calls 'fn' on every key-value pair in the hash map in no particular order.
// If 'fn' returns true, the iteration stops.
func (m *Hopscotch[K, V]) Each(fn func(key K, val V) bool) {
for i := range m.buckets {
if !m.buckets[i].isEmpty() {
if stop := fn(m.buckets[i].key, m.buckets[i].val); stop {
// stop iteration
return
}
}
}
}