diff --git a/pesuacademy/models/__init__.py b/pesuacademy/models/__init__.py index 525e957..dbfb542 100644 --- a/pesuacademy/models/__init__.py +++ b/pesuacademy/models/__init__.py @@ -10,4 +10,5 @@ AddressDetails, QualifyingExamination, ) +from .professor import Professor from .seating_information import SeatingInformation diff --git a/pesuacademy/models/professor.py b/pesuacademy/models/professor.py new file mode 100644 index 0000000..6644088 --- /dev/null +++ b/pesuacademy/models/professor.py @@ -0,0 +1,28 @@ +from typing import Optional + + +class Professor: + def __init__( + self, + name: str, + designation: str, + campus: str, + department: str, + email: str, + domains: Optional[list] = None, + responsibilities: Optional[list] = None, + education: Optional[list] = None, + experience: Optional[list] = None, + ): + self.name = name + self.designation = designation + self.education = education + self.experience = experience + self.department = department + self.campus = campus + self.domains = domains + self.email = email + self.responsibilities = responsibilities + + def __str__(self): + return f"{self.__dict__}" diff --git a/pesuacademy/pages/__init__.py b/pesuacademy/pages/__init__.py index 87b6160..0a5c881 100644 --- a/pesuacademy/pages/__init__.py +++ b/pesuacademy/pages/__init__.py @@ -2,4 +2,5 @@ from .attendance import AttendancePageHandler from .courses import CoursesPageHandler from .profile import ProfilePageHandler +from .faculty import FacultyPageHandler from .seating_information import SeatingInformationHandler diff --git a/pesuacademy/pages/faculty.py b/pesuacademy/pages/faculty.py new file mode 100644 index 0000000..8b5bcae --- /dev/null +++ b/pesuacademy/pages/faculty.py @@ -0,0 +1,246 @@ +from bs4 import BeautifulSoup +import requests_html +from typing import Optional +from pesuacademy.models.professor import Professor + + +class FacultyPageHandler: + departments = { + "arch": "architecture", + "bt": "biotechnology", + "cv": "civil", + "cse": "computer-science", + "cse-aiml": "computer-science-AIML", + "ca": "computer-application", + "des": "design", + "eee": "electrical-&-electronics", + "ece": "electronics-&-communications", + "law": "law", + "me": "mechanical", + "ms": "management-studies", + "sh": "science-&-humanities", + "com": "commerce", + "psy": "psychology", + "cie": "centre-for-innovation-&-entrepreneurship", + "ps": "pharmaceutical-sciences", + } + campuses = ["rr", "ec", "hn"] + + @staticmethod + def get_urls_from_campus_and_department( + campus: Optional[str], department: Optional[str] + ): + base_url = "https://staff.pes.edu/{campus}/atoz/{department}" + if department: + assert ( + department in FacultyPageHandler.departments + ), "Invalid department provided." + if campus: + assert campus in FacultyPageHandler.campuses, "Invalid campus provided." + + if not department and not campus: + urls = [base_url.format(campus="", department="")] + elif department and not campus: + urls = [ + base_url.format( + campus=campus, department=FacultyPageHandler.departments[department] + ) + for campus in ["rr", "ec", "hn"] + ] + elif campus and not department: + urls = [ + base_url.format( + campus=campus, department=FacultyPageHandler.departments[department] + ) + for department in FacultyPageHandler.departments + ] + else: + urls = [ + base_url.format( + campus=campus, department=FacultyPageHandler.departments[department] + ) + ] + return urls + + @staticmethod + def get_all_faculty_ids_from_url( + session: requests_html.HTMLSession, url: str, page: int = 1 + ) -> list[str]: + try: + current_url = f"{url}?page={page}" + print("entered loop", page, current_url) + response = session.get(current_url) + if response.status_code != 200: + return [] + else: + soup = BeautifulSoup(response.text, "html.parser") + if next_page := soup.find("a", class_="nextposts-link"): + next_page_number = int(next_page["href"].split("?page=")[-1]) + else: + next_page_number = None + + print("Next page number", next_page_number) + faculty_divs = soup.find_all("div", class_="staff-profile") + faculty_ids = [ + div.find("a", class_="geodir-category-img_item")["href"].split("/")[ + -2 + ] + for div in faculty_divs + ] + if next_page_number is not None: + faculty_ids.extend( + FacultyPageHandler.get_all_faculty_ids_from_url( + session, url, next_page_number + ) + ) + return faculty_ids + + except Exception: + return [] + + @staticmethod + def get_faculty_by_id( + session: requests_html.HTMLSession, faculty_id: str + ) -> Professor: + url = f"https://staff.pes.edu/{faculty_id}" + # print(url) + response = session.get(url) + if response.status_code != 200: + raise ConnectionError(f"Failed to fetch URL: {url}") + + soup = BeautifulSoup(response.text, "html.parser") + name = soup.find("h4").text.strip() + domains = [ + item.text.strip() + for item in soup.select( + "#tab-teaching .bookings-item-content ul.ul-item-left li" + ) + ] + designation = soup.find("h5").text.strip() + designation = [d.strip() for d in designation.split(",")] + # print() + # Education + professor_education = [] + education_section = soup.find_all("h3") + education_section_filter = [ + h3 for h3 in education_section if h3.get_text(strip=True) == "Education" + ] + + for h3 in education_section_filter: + education_list = h3.find_next("ul", class_="ul-item-left") + if education_list: + education_items = education_list.find_all("li") + education_details = [ + item.find("p").text.strip() for item in education_items + ] + for detail in education_details: + professor_education.append(detail) + + # print(professor_education) + + # Experience + professor_experience = [] + experience_section = soup.find_all("h3") + experience_section_filter = [ + h3 for h3 in experience_section if h3.get_text(strip=True) == "Experience" + ] + for h3 in experience_section_filter: + experience_list = h3.find_next("ul", class_="ul-item-left") + if experience_list: + experience_items = experience_list.find_all("li") + experience_details = [ + item.find("p").text.strip() for item in experience_items + ] + for detail in experience_details: + professor_experience.append(detail) + + # print(professor_experience) + + # email + all_a_tags = soup.find_all("a") + email = [ + tag + for tag in all_a_tags + if "pes.edu" in tag.get("href", "") and "pes.edu" in tag.get_text() + ] + if email: + email = email[0].get_text() + # department + department_element = soup.find("li", class_="contat-card") + department_paragraph = department_element.find("p") + department = department_paragraph.get_text(strip=True) + # campus + try: + campus_element = soup.find_all("li", class_="contat-card")[1] + if campus_element: + campus_paragraph = campus_element.find("p") + campus = campus_paragraph.get_text(strip=True) + except IndexError: + campus = None + # responsibilities + responsibilities = [] + responsibilities_div = soup.find("div", id="tab-responsibilities") + if responsibilities_div is not None: + p_tags = responsibilities_div.find_all("p") + responsibilities = [p.text for p in p_tags] + + Pesu_Staff = Professor( + name=name, + designation=designation, + education=professor_education, + experience=professor_experience, + department=department, + campus=campus, + domains=domains, + email=email, + responsibilities=responsibilities, + ) + return Pesu_Staff + + def get_faculty_by_name(self, name: str, session: requests_html.HTMLSession) -> list[Professor]: + professors: list[Professor] = [] + url = f"https://staff.pes.edu/atoz/list/?search={name}" + response = session.get(url) + soup = BeautifulSoup(response.text, "html.parser") + faculty_divs = soup.find_all("div", class_="col-md-3 left-padding-0") + + faculty_ids = [ + div.find("a", class_="chat-contacts-item")["href"].split("/")[-2] + for div in faculty_divs + ] + print(faculty_ids) + # Retrieve details for each faculty ID + for faculty_id in faculty_ids: + professor = self.get_faculty_by_id(session, faculty_id) + if professor: + professors.append(professor) + + return professors + + + + def get_page( + self, + session: requests_html.HTMLSession, + campus: Optional[str] = None, + department: Optional[str] = None, + designation: Optional[str] = None, + name:Optional[str] = None + ) -> list[Professor]: + urls = self.get_urls_from_campus_and_department(campus, department) + # TODO: Add search functionality for name: https://staff.pes.edu/atoz/list/?search={name} + if name: + professors=self.get_faculty_by_name(name,session) + return professors + print(urls) + professors: list[Professor] = list() + for url in urls: + faculty_ids = self.get_all_faculty_ids_from_url(session, url, page=1) + for faculty_id in faculty_ids: + professor = self.get_faculty_by_id(session, faculty_id) + # print(professor.designation) + if designation is None or designation in professor.designation: + professors.append(professor) + return professors + + diff --git a/pesuacademy/pesuacademy.py b/pesuacademy/pesuacademy.py index bf5ced0..2b47a97 100644 --- a/pesuacademy/pesuacademy.py +++ b/pesuacademy/pesuacademy.py @@ -4,10 +4,16 @@ from bs4 import BeautifulSoup from pesuacademy import util -from pesuacademy.models.seating_information import SeatingInformation from pesuacademy.util.page import PageHandler from .exceptions import CSRFTokenError, AuthenticationError -from .models import Profile, ClassAndSectionInfo, Course, Announcement +from .models import ( + Profile, + ClassAndSectionInfo, + Course, + Announcement, + Professor, + SeatingInformation, +) class PESUAcademy: @@ -157,6 +163,23 @@ def attendance(self, semester: Optional[int] = None) -> dict[int, list[Course]]: attendance_info = self.page_handler.get_attendance(semester) return attendance_info + def faculty( + self, + campus: Optional[str] = None, + department: Optional[str] = None, + designation: Optional[str] = None, + ) -> list[Professor]: + """ + Get the faculty information of the university. + + :param campus: The campus name. + :param department: The department name. + :param designation: The designation of the faculty. + :return: The faculty information. + """ + faculty_info = self.page_handler.get_faculty(campus, department, designation) + return faculty_info + def seating_information(self) -> list[SeatingInformation]: """ Get the seating information of the currently authenticated user. @@ -165,7 +188,7 @@ def seating_information(self) -> list[SeatingInformation]: """ if not self._authenticated: raise AuthenticationError("You need to authenticate first.") - seating_info = self.page_handler.get_seating_info() + seating_info = self.page_handler.get_seating_information() return seating_info def announcements( diff --git a/pesuacademy/util/page.py b/pesuacademy/util/page.py index 7eb6b58..8cef248 100644 --- a/pesuacademy/util/page.py +++ b/pesuacademy/util/page.py @@ -14,6 +14,7 @@ def __init__(self, session: requests_html.HTMLSession): self.course_page_handler = pages.CoursesPageHandler() self.attendance_page_handler = pages.AttendancePageHandler() self.profile_page_handler = pages.ProfilePageHandler() + self.faculty_page_handler = pages.FacultyPageHandler() self.announcement_handler = pages.AnnouncementPageHandler() def set_semester_id_to_number_mapping(self, csrf_token: str): @@ -82,7 +83,20 @@ def get_attendance(self, semester: Optional[int] = None): semester_ids = self.get_semester_ids_from_semester_number(semester) return self.attendance_page_handler.get_page(self.__session, semester_ids) - def get_seating_info(self): + def get_faculty( + self, + campus: Optional[str] = None, + department: Optional[str] = None, + designation: Optional[str] = None, + ): + return self.faculty_page_handler.get_page( + self.__session, + campus, + department, + designation, + ) + + def get_seating_information(self): return pages.SeatingInformationHandler.get_page(self.__session) def get_announcements(