-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathwamspam.py
342 lines (289 loc) · 14.1 KB
/
wamspam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
"""
log in to my.unimelb results page using a student account, check for a results
update, and then send the student a notification if anything changed
:author: Matthew Farrugia-Roberts and contributors
"""
import json
import time
from datetime import datetime
import getpass
import requests
from bs4 import BeautifulSoup
import messages
# # #
# SCRIPT CONFIGURATION
#
print("Configuring script...")
# set this to True if you would like the script to repeatedly check the results
# page, or False if you only want it to run once
CHECK_REPEATEDLY = True
# if you set the script to check repeatedly above, you can configure the delay
# between WAM checks in minutes here
DELAY_BETWEEN_CHECKS = 60 # minutes
# leave these lines unchanged to be prompted for your username and password
# every time you run the script, or just hard code your credentials here if
# you're lazy (but then be careful not to let anyone else see this file)
UNIMELB_USERNAME = input("Username: ")
UNIMELB_PASSWORD = getpass.getpass()
# your results will be stored in this file in between checks
RESULTS_FILENAME = "results.txt"
# by default, the script will watch all of your degrees. you can alter this
# setting here by providing a set of degree indexes (based on the order from
# the results page, starting with 0) or "all" for all degrees:
DEGREES_TO_WATCH = "all"
# for example, to watch only your first degree:
# DEGREES_TO_WATCH = {0}
# for students with only a single degree, this option is ignored
# for students with a single degree, we can't scrape the degree name from the
# degree list, so we use a default
# you can optionally personalise your degree name here (e.g. replace with
# "Bachelor of Science (Computing and Software Systems)":
DEFAULT_DEGREE_NAME = "degree"
# # #
# WEB SCRAPING CONFIGURATION
#
# select the HTML parser for BeautifulSoup to use. in most cases, you won't
# have to touch this.
BS4_PARSER = "html.parser"
# # #
# NOTIFICATION CONFIGURATION
#
# we'll use a multi-notifier to allow for any number of
# notification methods (added below)
from notify.by_multiple import MultiNotifier
NOTIFIER = MultiNotifier()
# choose one or more notification methods to use when a change is detected.
# in most cases you can configure the notification method with the required
# secrets by including them in the corresponding script, or by leaving the
# scripts alone and typing them in at start-up time (see README for per-method
# instructions, and remember to keep your secrets safe!)
print("Configuring chosen notification method(s)...")
# option 0: student email notification, via SMTP
from notify.by_email import SMTPGmailNotifier
# the script will send email from and to your student email address
# by default.
# if you need to use an app-specific password to get around 2FA on
# your email account, or other authentication issues, you can set it
# here as the value of password.
GMAIL_ADDRESS = UNIMELB_USERNAME + "@student.unimelb.edu.au"
GMAIL_PASSWORD = UNIMELB_PASSWORD # or app-specific password
NOTIFIER.add_notifier(SMTPGmailNotifier(
address=GMAIL_ADDRESS,
password=GMAIL_PASSWORD))
# option 1: student email notification, via Gmail's API + OAuth
# from notify.by_email_oauth import GmailAPINotifier
# GMAIL_ADDRESS = UNIMELB_USERNAME + "@student.unimelb.edu.au"
# NOTIFIER.add_notifier(GmailAPINotifier(
# address=GMAIL_ADDRESS))
# option 2: wechat notification via ServerChan
# uncomment below and configure to enable
# from notify.by_wechat import ServerChanNotifier
# SERVERCHAN_API_KEY = # put API key here, as a string (see README)
# NOTIFIER.add_notifier(ServerChanNotifier(
# apikey=SERVERCHAN_API_KEY))
# option 3: telegram notification via a telegram bot
# uncomment below and configure to enable
# from notify.by_telegram import TelegramBotNotifier
# TELEGRAM_ACCESS_TOKEN = # put access token string here (see README)
# TELEGRAM_DESTINATION = # put destination chat name here (see README)
# NOTIFIER.add_notifier(TelegramBotNotifier(
# token=TELEGRAM_ACCESS_TOKEN,
# chat=TELEGRAM_DESTINATION))
# option 4: push notification via pushbullet
# uncomment below and configure to enable
# from notify.by_push import PushbulletNotifier
# PUSHBULLET_ACCESS_TOKEN = # put access token here (string) (see README)
# NOTIFIER.add_notifier(PushbulletNotifier(
# token=PUSHBULLET_ACCESS_TOKEN))
# option 5: ifttt notification via triggering a webhook
# uncomment below and configure to enable
# from notify.by_ifttt import IFTTTWebhookNotifier
# IFTTT_WEBHOOK_KEY = # put webhook key string here (see README)
# NOTIFIER.add_notifier(IFTTTWebhookNotifier(
# key=IFTTT_WEBHOOK_KEY))
# option 6: desktop notifications using notify2 python library
# uncomment below to enable
# from notify.by_desktop import DesktopNotifier
# NOTIFIER.add_notifier(DesktopNotifier())
# option 7: notifications via appending to a local file
# uncomment below and configure to enable
# from notify.by_logfile import LogFileNotifier
# LOGFILE_FILEPATH = # put filepath string here (see README)
# NOTIFIER.add_notifier(LogFileNotifier(
# filepath=LOGFILE_FILEPATH))
# option 8: push notification via slack incoming webhooks
# uncomment below and configure to enable
# from notify.by_slack import SlackAppNotifier
# SLACK_APP_WEBHOOK = # put webhook key string here (see README)
# NOTIFIER.add_notifier(SlackAppNotifier(
# hook_url=SLACK_APP_WEBHOOK))
# let's get to it!
def main():
"""Run the checking script, once or forever, depending on configuration."""
# send a test message to make sure the notification configuration works
NOTIFIER.notify(*messages.hello_message(delay=DELAY_BETWEEN_CHECKS))
# conduct the first check! don't catch any exceptions here, if the check
# fails this first time, it's likely to be a configuration problem (e.g.
# wrong username/password) so we should crash the script to let the user
# know.
poll_and_notify()
while CHECK_REPEATEDLY:
print("Completed a check at", datetime.now().strftime("%H:%M:%S"))
print("Sleeping", DELAY_BETWEEN_CHECKS, "minutes before next check.")
print("--------------------------------------")
time.sleep(DELAY_BETWEEN_CHECKS * 60) # seconds
print("Waking up at", datetime.now().strftime("%H:%M:%S"))
try:
poll_and_notify()
except Exception as e:
# if we get an exception now, it may have been some temporary
# problem accessing the website, let's just ignore it and try
# again next time.
print("Exception encountered:")
print(f"{e.__class__.__name__}: {e}")
print("Hopefully it won't happen again. Continuing.")
def poll_and_notify():
"""
Check for updated results, and send a notification if a change is detected.
"""
# check the results page for the latest results
new_results = scrape_results(UNIMELB_USERNAME, UNIMELB_PASSWORD)
# load the previous results from file
try:
with open(RESULTS_FILENAME) as resultsfile:
old_results = json.load(resultsfile)
except:
# the first run, there probably won't be such a file
# imagine a default
old_results = {DEFAULT_DEGREE_NAME: {"wam": None, "results": []}}
# compare the results for each degree:
degrees = new_results.keys() | old_results.keys()
for degree in degrees:
if degree not in new_results:
# maybe by error, the degree seems to have been removed
# this is more likely to have been a scraper error than an actual
# change, so we'll ignore it, and forward the old results
print("Missing results for", degree)
new_results[degree] = old_results[degree]
elif degree not in old_results:
# still unlikely during results period; but a new degree has
# appeared! send the initialisation message
print("Found new results for", degree)
results = new_results[degree]
NOTIFIER.notify(*messages.initial_message(degree, results))
elif new_results[degree] == {"wam": None, "results": []}:
# maybe by error, the degree seems to have had its data removed
# this is more likely to have been a scraper error than an actual
# change, so we'll ignore it, and forward the old results
print("Missing results for", degree)
new_results[degree] = old_results[degree]
# note: in the case where there were just no results yet, this
# will behave correctly by not notifying the user
else:
# more likely, we have seen the degree before, but the results may
# have changed:
old = old_results[degree]
new = new_results[degree]
if old != new:
# compute difference, and send notification
print("Found updated results for", degree)
NOTIFIER.notify(*messages.update_message(degree, old, new))
else:
# no change to results for this degree. ignore it!
print("No change for", degree)
# update the results file for next time
with open(RESULTS_FILENAME, 'w') as resultsfile:
json.dump(new_results, resultsfile, indent=2)
class InvalidLoginException(Exception):
"""Represent a login form validation error"""
def scrape_results(username, password):
with requests.Session() as session:
# step 1. load a login page to initialse session
print("Logging in to the results page")
response = session.get("https://prod.ss.unimelb.edu.au"
"/student/SM/ResultsDtls10.aspx?f=$S1.EST.RSLTDTLS.WEB")
soup = BeautifulSoup(response.content, BS4_PARSER)
# step 2. fill in login form and authenticate, reaching results page
# get the form's hidden field values into the POST data
hidden_fields = soup.find_all('input', type='hidden')
login_form = {tag['name']: tag['value'] for tag in hidden_fields}
# simulate filling in the form with username and password,
# and pressing the login button
login_form['ctl00$Content$txtUserName$txtText'] = username
login_form['ctl00$Content$txtPassword$txtText'] = password
login_form['__EVENTTARGET'] = "ctl00$Content$cmdLogin"
# post the form, with a URL that will take us back to the results page
response = session.post("https://prod.ss.unimelb.edu.au/student/"
"login.aspx?f=$S1.EST.RSLTDTLS.WEB&ReturnUrl=%2fstudent%2fSM%2f"
"ResultsDtls10.aspx%3ff%3d%24S1.EST.RSLTDTLS.WEB", data=login_form)
# detect a potential failed login
soup = BeautifulSoup(response.content, BS4_PARSER)
if soup.find(id="ctl00_Content_valErrors"):
raise InvalidLoginException("Your login attempt was not successful."
" Please check your details and try again.")
# now `soup` should be the parsed results page or multi-degree page...
# step 3. either way, we are ready to start building the transcript!
transcript = {}
title = soup.find(id="ctl00_h1PageTitle")
if title.text == "Results > Choose a Study Plan":
print("Multiple degrees detected. Walking results pages...")
grid = soup.find("table", id="ctl00_Content_grdResultPlans")
rows = grid.find_all("tr")[1:] # skip header
cells = [row.find_all("td")[2].text for row in rows]
# get the form's hidden field values into the POST data
hidden_fields = soup.find_all('input', type='hidden')
degree_form = {tag['name']: tag['value'] for tag in hidden_fields}
for degree_index, degree_name in enumerate(cells):
if DEGREES_TO_WATCH != "all":
if degree_index not in DEGREES_TO_WATCH:
print(f"Skipping degree {degree_index}: {degree_name}")
continue
print(f"Loading results for {degree_index}: {degree_name}")
# now simulate pressing the required degree button
degree_form['__EVENTTARGET'] = "ctl00$Content$grdResultPlans"
degree_form['__EVENTARGUMENT'] = f"ViewResults${degree_index}"
# post the form, to take us to the results page proper
response = session.post("https://prod.ss.unimelb.edu.au/"
"student/SM/ResultsDtls10.aspx?f=$S1.EST.RSLTDTLS.WEB",
data=degree_form)
soup = BeautifulSoup(response.content, BS4_PARSER)
# now `soup` should be the parsed results page for this degree
transcript[degree_name] = parse_page(soup)
else:
print("Single degree detected. Parsing results page directly...")
# in this case `soup` is already the parsed results page for the
# only degree
transcript[DEFAULT_DEGREE_NAME] = parse_page(soup)
return transcript
def parse_page(soup):
# step 4. extract the results data from the results page, as required
print("Extracting WAM")
wam_para = soup.find(class_='UMWAMText')
if wam_para is not None:
wam_text = wam_para.find('b').text
else:
print("Couldn't find WAM (no WAM yet?)")
wam_text = None
print("Extracting subject results")
results = []
results_table = soup.find("table", id='ctl00_Content_grdResultDetails')
if results_table is None:
print("Couldn't find results (no results yet?)")
else:
rows = results_table.find_all("tr")[1:] # skip header
for row in rows:
cells = [cell.text.strip() for cell in row.find_all("td")]
# cells is a list of strings containing this result's details e.g.:
# ['2019', 'Semester 1', 'COMP90045', 'PLI', '2', '99', 'H1', '12.500']
# extract the relevant details: (^ btw this is 'version')
result = {
"subject": f"{cells[2]} {cells[3]}",
"date": f"{cells[0]}, {cells[1]}",
"mark": cells[5],
"grade": cells[6],
"credits": cells[8]
}
results.append(result)
return {"wam": wam_text, "results": results}
if __name__ == '__main__':
main()