Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added staff model and page #8

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions pesuacademy/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@
AddressDetails,
QualifyingExamination,
)
from .professor import Professor
from .seating_information import SeatingInformation
28 changes: 28 additions & 0 deletions pesuacademy/models/professor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Optional


class Professor:
def __init__(
self,
name: str,
designation: str,
campus: str,
department: str,
email: str,
domains: Optional[list] = None,
responsibilities: Optional[list] = None,
education: Optional[list] = None,
experience: Optional[list] = None,
):
self.name = name
self.designation = designation
self.education = education
self.experience = experience
self.department = department
self.campus = campus
self.domains = domains
self.email = email
self.responsibilities = responsibilities

def __str__(self):
return f"{self.__dict__}"
1 change: 1 addition & 0 deletions pesuacademy/pages/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
from .attendance import AttendancePageHandler
from .courses import CoursesPageHandler
from .profile import ProfilePageHandler
from .faculty import FacultyPageHandler
from .seating_information import SeatingInformationHandler
246 changes: 246 additions & 0 deletions pesuacademy/pages/faculty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
from bs4 import BeautifulSoup
import requests_html
from typing import Optional
from pesuacademy.models.professor import Professor


class FacultyPageHandler:
departments = {
"arch": "architecture",
"bt": "biotechnology",
"cv": "civil",
"cse": "computer-science",
"cse-aiml": "computer-science-AIML",
"ca": "computer-application",
"des": "design",
"eee": "electrical-&-electronics",
"ece": "electronics-&-communications",
"law": "law",
"me": "mechanical",
"ms": "management-studies",
"sh": "science-&-humanities",
"com": "commerce",
"psy": "psychology",
"cie": "centre-for-innovation-&-entrepreneurship",
"ps": "pharmaceutical-sciences",
}
campuses = ["rr", "ec", "hn"]

@staticmethod
def get_urls_from_campus_and_department(
campus: Optional[str], department: Optional[str]
):
base_url = "https://staff.pes.edu/{campus}/atoz/{department}"
if department:
assert (
department in FacultyPageHandler.departments
), "Invalid department provided."
if campus:
assert campus in FacultyPageHandler.campuses, "Invalid campus provided."

if not department and not campus:
urls = [base_url.format(campus="", department="")]
elif department and not campus:
urls = [
base_url.format(
campus=campus, department=FacultyPageHandler.departments[department]
)
for campus in ["rr", "ec", "hn"]
]
elif campus and not department:
urls = [
base_url.format(
campus=campus, department=FacultyPageHandler.departments[department]
)
for department in FacultyPageHandler.departments
]
else:
urls = [
base_url.format(
campus=campus, department=FacultyPageHandler.departments[department]
)
]
return urls

@staticmethod
def get_all_faculty_ids_from_url(
session: requests_html.HTMLSession, url: str, page: int = 1
) -> list[str]:
try:
current_url = f"{url}?page={page}"
print("entered loop", page, current_url)
response = session.get(current_url)
if response.status_code != 200:
return []
else:
soup = BeautifulSoup(response.text, "html.parser")
if next_page := soup.find("a", class_="nextposts-link"):
next_page_number = int(next_page["href"].split("?page=")[-1])
else:
next_page_number = None

print("Next page number", next_page_number)
faculty_divs = soup.find_all("div", class_="staff-profile")
faculty_ids = [
div.find("a", class_="geodir-category-img_item")["href"].split("/")[
-2
]
for div in faculty_divs
]
if next_page_number is not None:
faculty_ids.extend(
FacultyPageHandler.get_all_faculty_ids_from_url(
session, url, next_page_number
)
)
return faculty_ids

except Exception:
return []

@staticmethod
def get_faculty_by_id(
session: requests_html.HTMLSession, faculty_id: str
) -> Professor:
url = f"https://staff.pes.edu/{faculty_id}"
# print(url)
response = session.get(url)
if response.status_code != 200:
raise ConnectionError(f"Failed to fetch URL: {url}")

soup = BeautifulSoup(response.text, "html.parser")
name = soup.find("h4").text.strip()
domains = [
item.text.strip()
for item in soup.select(
"#tab-teaching .bookings-item-content ul.ul-item-left li"
)
]
designation = soup.find("h5").text.strip()
designation = [d.strip() for d in designation.split(",")]
# print()
# Education
professor_education = []
education_section = soup.find_all("h3")
education_section_filter = [
h3 for h3 in education_section if h3.get_text(strip=True) == "Education"
]

for h3 in education_section_filter:
education_list = h3.find_next("ul", class_="ul-item-left")
if education_list:
education_items = education_list.find_all("li")
education_details = [
item.find("p").text.strip() for item in education_items
]
for detail in education_details:
professor_education.append(detail)

# print(professor_education)

# Experience
professor_experience = []
experience_section = soup.find_all("h3")
experience_section_filter = [
h3 for h3 in experience_section if h3.get_text(strip=True) == "Experience"
]
for h3 in experience_section_filter:
experience_list = h3.find_next("ul", class_="ul-item-left")
if experience_list:
experience_items = experience_list.find_all("li")
experience_details = [
item.find("p").text.strip() for item in experience_items
]
for detail in experience_details:
professor_experience.append(detail)

# print(professor_experience)

# email
all_a_tags = soup.find_all("a")
email = [
tag
for tag in all_a_tags
if "pes.edu" in tag.get("href", "") and "pes.edu" in tag.get_text()
]
if email:
email = email[0].get_text()
# department
department_element = soup.find("li", class_="contat-card")
department_paragraph = department_element.find("p")
department = department_paragraph.get_text(strip=True)
# campus
try:
campus_element = soup.find_all("li", class_="contat-card")[1]
if campus_element:
campus_paragraph = campus_element.find("p")
campus = campus_paragraph.get_text(strip=True)
except IndexError:
campus = None
# responsibilities
responsibilities = []
responsibilities_div = soup.find("div", id="tab-responsibilities")
if responsibilities_div is not None:
p_tags = responsibilities_div.find_all("p")
responsibilities = [p.text for p in p_tags]

Pesu_Staff = Professor(
name=name,
designation=designation,
education=professor_education,
experience=professor_experience,
department=department,
campus=campus,
domains=domains,
email=email,
responsibilities=responsibilities,
)
return Pesu_Staff

def get_faculty_by_name(self, name: str, session: requests_html.HTMLSession) -> list[Professor]:
professors: list[Professor] = []
url = f"https://staff.pes.edu/atoz/list/?search={name}"
response = session.get(url)
soup = BeautifulSoup(response.text, "html.parser")
faculty_divs = soup.find_all("div", class_="col-md-3 left-padding-0")

faculty_ids = [
div.find("a", class_="chat-contacts-item")["href"].split("/")[-2]
for div in faculty_divs
]
print(faculty_ids)
# Retrieve details for each faculty ID
for faculty_id in faculty_ids:
professor = self.get_faculty_by_id(session, faculty_id)
if professor:
professors.append(professor)

return professors



def get_page(
self,
session: requests_html.HTMLSession,
campus: Optional[str] = None,
department: Optional[str] = None,
designation: Optional[str] = None,
name:Optional[str] = None
) -> list[Professor]:
urls = self.get_urls_from_campus_and_department(campus, department)
# TODO: Add search functionality for name: https://staff.pes.edu/atoz/list/?search={name}
if name:
professors=self.get_faculty_by_name(name,session)
return professors
print(urls)
professors: list[Professor] = list()
for url in urls:
faculty_ids = self.get_all_faculty_ids_from_url(session, url, page=1)
for faculty_id in faculty_ids:
professor = self.get_faculty_by_id(session, faculty_id)
# print(professor.designation)
if designation is None or designation in professor.designation:
professors.append(professor)
return professors


29 changes: 26 additions & 3 deletions pesuacademy/pesuacademy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@
from bs4 import BeautifulSoup

from pesuacademy import util
from pesuacademy.models.seating_information import SeatingInformation
from pesuacademy.util.page import PageHandler
from .exceptions import CSRFTokenError, AuthenticationError
from .models import Profile, ClassAndSectionInfo, Course, Announcement
from .models import (
Profile,
ClassAndSectionInfo,
Course,
Announcement,
Professor,
SeatingInformation,
)


class PESUAcademy:
Expand Down Expand Up @@ -157,6 +163,23 @@ def attendance(self, semester: Optional[int] = None) -> dict[int, list[Course]]:
attendance_info = self.page_handler.get_attendance(semester)
return attendance_info

def faculty(
self,
campus: Optional[str] = None,
department: Optional[str] = None,
designation: Optional[str] = None,
) -> list[Professor]:
"""
Get the faculty information of the university.

:param campus: The campus name.
:param department: The department name.
:param designation: The designation of the faculty.
:return: The faculty information.
"""
faculty_info = self.page_handler.get_faculty(campus, department, designation)
return faculty_info

def seating_information(self) -> list[SeatingInformation]:
"""
Get the seating information of the currently authenticated user.
Expand All @@ -165,7 +188,7 @@ def seating_information(self) -> list[SeatingInformation]:
"""
if not self._authenticated:
raise AuthenticationError("You need to authenticate first.")
seating_info = self.page_handler.get_seating_info()
seating_info = self.page_handler.get_seating_information()
return seating_info

def announcements(
Expand Down
16 changes: 15 additions & 1 deletion pesuacademy/util/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def __init__(self, session: requests_html.HTMLSession):
self.course_page_handler = pages.CoursesPageHandler()
self.attendance_page_handler = pages.AttendancePageHandler()
self.profile_page_handler = pages.ProfilePageHandler()
self.faculty_page_handler = pages.FacultyPageHandler()
self.announcement_handler = pages.AnnouncementPageHandler()

def set_semester_id_to_number_mapping(self, csrf_token: str):
Expand Down Expand Up @@ -82,7 +83,20 @@ def get_attendance(self, semester: Optional[int] = None):
semester_ids = self.get_semester_ids_from_semester_number(semester)
return self.attendance_page_handler.get_page(self.__session, semester_ids)

def get_seating_info(self):
def get_faculty(
self,
campus: Optional[str] = None,
department: Optional[str] = None,
designation: Optional[str] = None,
):
return self.faculty_page_handler.get_page(
self.__session,
campus,
department,
designation,
)

def get_seating_information(self):
return pages.SeatingInformationHandler.get_page(self.__session)

def get_announcements(
Expand Down
Loading