-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmenu_scraper.py
115 lines (94 loc) · 3.81 KB
/
menu_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from selenium import webdriver
def scrape_menu(url):
driver = webdriver.Chrome(executable_path="chromedriver_win32/chromedriver.exe")
driver.maximize_window()
driver.get(url)
# ===== Header details =====
detail = ''
rating = ''
num = ''
try:
detail = driver.find_element_by_xpath("/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[1]").text
except:
detail = ''
try:
rating = driver.find_element_by_xpath(
"/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[1]").text
except:
rating = 'N/A'
try:
num = driver.find_element_by_xpath(
"/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/div[2]/div[3]").text
except:
num = '(0)'
restaurant = {
'title': driver.find_element_by_xpath("/html/body/div/div/main/div[1]/div/div/div[2]/div/div[2]/h1").text,
'detail': detail,
'rating': rating,
'num_reviews': num,
'menu': []
}
# ===== Menu =====
list_item_element = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/ul").find_element_by_tag_name("li")
menu = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/ul").find_elements_by_class_name(
list_item_element.get_attribute("class"))
name = ''
description = ''
status = ''
price = ''
img_url = ''
for x in range(len(menu) - 1):
category = driver.find_element_by_xpath("/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/h2").text
restaurant['menu'].append({
category: []
})
section = driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul").find_elements_by_tag_name("li")
for y in range(len(section)):
# Get Product Name
try:
name = str(driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
y + 1) + "]/a/div/div[1]/h4").text)
except:
name = ''
# Get Product Description
try:
description = str(driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
y + 1) + "]/a/div/div[1]/div[1]").text)
except:
description = ''
# Get Product Price
try:
price = str(driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
y + 1) + "]/a/div/div[1]/div[2]").text)
if price == description:
description = ''
if "Sold" in price:
status = "Sold out"
price = "$" + price.split("$", 1)[1]
else:
status = "In stock"
except:
if "$" in description:
price = description
description = ''
else:
price = ''
# Get Image URL
try:
img_url = str(driver.find_element_by_xpath(
"/html/body/div/div/main/div[2]/ul/li[" + str(x + 1) + "]/ul/li[" + str(
y + 1) + "]/a/div/div[2]/img").get_attribute("src"))
except:
img_url = ''
restaurant['menu'][x][category].append({
'name': name,
'description': description,
'price': price,
'status': status,
'img_url': img_url
})
return restaurant