-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaptstore_popularity_prediction_algorithm.py
150 lines (111 loc) · 4.08 KB
/
aptstore_popularity_prediction_algorithm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import random as rd
def predict_popularity(accessed_files, deleted_files, popularity,block_numbers, l, c, s, Pmin, Pmax):
P = {}
IP = 0
Pred = {}
avg_popularity = find_average_popularity(popularity)
for f in set(accessed_files):
P[f] = [0]
for k in range(0, len(accessed_files)-1):
f = accessed_files[k]
i = len(P[f])-1
if(i==0):
P[f].append(avg_popularity)
continue
else:
a = find_access_interval(f,k,accessed_files)
b = find_block_numbers(f, block_numbers)
calc = P[f][i] + c / (a*b*l*P[f][i])
P[f].append(calc)
if(P[f][i]<Pmin):
P[f][i] = Pmin
if(P[f][i]>Pmax):
P[f][i] = Pmax
IP = IP + P[f][i+1] - P[f][i]
for f in deleted_files:
#avg = find_average_popularity(popularity)
IP = IP + avg_popularity - popularity[f]
del P[f]
del popularity[f]
del block_numbers[f]
MIP = IP / len(P)
for f in P:
i = len(P[f]) - 1
P[f][i] = P[f][i] - MIP/s
Pred[f] = P[f][i] + popularity[f]
popularity[f] = P[f][i]
return Pred
#--------------------------HELPER FUNCTIONS-----------------------------
def find_average_popularity(popularity):
sum = 0
for f in popularity:
sum = sum + popularity[f]
return sum/len(popularity)
def find_access_interval(f,k,files):
key = accessed_files[k]
k -= 1
count = 1
while(k>=0):
if(key==files[k]):
break
k -= 1
count += 1
return count * 10
def find_block_numbers(f, block_number):
return block_number[f]
#==========================FOR PRINTING==============================
def print_popularity(popularity, block_numbers):
print("Files \t\t Popularity \t Number_of_blocks")
print("-----------------------------------------------")
for f in popularity:
print("file", f, ":\t", round(popularity[f],3),"\t\t", block_numbers[f] )
def print_predicted_popularity(pred):
print("Files \t\t Predicted_Popularity")
print("------------------------------------")
for f in pred:
print("file", f, ":\t", round(pred[f],3))
# =========================SIMULATION FUNCTIONS===================================
def simulate_popularity(accessed_files, Pmin, Pmax):
p = {}
for f in set(accessed_files):
p[f] = rd.uniform(Pmin, Pmax)
return p
def simulate_block_number(accessed_files, Bmin, Bmax):
b = {}
for f in set(accessed_files):
b[f] = rd.randint(Bmin, Bmax)
return b
def simulate_deleted_files(accessed_files):
deleted = set()
files = list(set(accessed_files))
n = rd.randint(1, int(len(files)/3) + 1)
for i in range(0, n):
x = rd.choice(files)
deleted.add(x)
return deleted
def simulate_accessed_files(number_of_files):
accessed_files = []
n = rd.randint(number_of_files+5, 2 * number_of_files)
for i in range(0, n):
accessed_files.append(rd.randint(1, number_of_files))
return accessed_files
#============================SIMULATION============================
Pmin = 0.1
Pmax = 0.9
accessed_files = simulate_accessed_files(9)
popularity = simulate_popularity(accessed_files, Pmin, Pmax)
Bmin = 1
Bmax = 50
block_numbers = simulate_block_number(accessed_files, Bmin, Bmax)
c = 10000
l = 9
s = 5
print_popularity(popularity, block_numbers)
print()
print("Accessing files: ", accessed_files)
deleted_files = list(simulate_deleted_files(accessed_files))
print("Deleting files: ", deleted_files)
print()
pred = predict_popularity(accessed_files, deleted_files, popularity,block_numbers, l, c, s, Pmin, Pmax)
print()
print_predicted_popularity(pred)