-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathfacebook-friends.py
163 lines (138 loc) · 5.34 KB
/
facebook-friends.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import os, datetime, time, csv
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from datetime import datetime
import configparser
from sys import argv
import random
print("\n" * 100)
os.environ["DEBUSSY"] = "1"
# Configure browser session
wd_options = Options()
wd_options.add_argument("--disable-notifications")
wd_options.add_argument("--disable-infobars")
wd_options.add_argument("--mute-audio")
browser = webdriver.Chrome(chrome_options=wd_options)
# --------------- Ask user to log in -----------------
def fb_login(credentials):
print("Opening browser...")
email = credentials.get('credentials', 'email')
password = credentials.get('credentials', 'password')
browser.get("https://www.facebook.com/")
browser.find_element_by_id('email').send_keys(email)
browser.find_element_by_id('pass').send_keys(password)
browser.find_element_by_id('loginbutton').click()
# --------------- Scroll to bottom of page -----------------
def scroll_to_bottom():
SCROLL_PAUSE_TIME = 0.5
# Get scroll height
last_height = browser.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = browser.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
# --------------- Get list of all friends on page ---------------
def scan_friends():
print('Scanning page for friends...')
friends = []
friend_cards = browser.find_elements_by_xpath('//div[@id="pagelet_timeline_medley_friends"]//div[@class="fsl fwb fcb"]/a')
for friend in friend_cards:
if friend.get_attribute('data-hovercard') is None:
print(" %s (INACTIVE)" % friend.text)
friend_id = friend.get_attribute('ajaxify').split('id=')[1]
friend_active = 0
else:
print(" %s" % friend.text)
friend_id = friend.get_attribute('data-hovercard').split('id=')[1].split('&')[0]
friend_active = 1
friends.append({
'name': friend.text.encode('utf-8', 'ignore'), #to prevent CSV writing issues
'id': friend_id,
'active': friend_active
})
print('Found %r friends on page!' % len(friends))
return friends
# ----------------- Load list from CSV -----------------
def load_csv(filename):
inact = 0
myfriends = []
with open(filename, 'rt', encoding="utf-8") as input_csv:
reader = csv.DictReader(input_csv)
for idx,row in enumerate(reader):
if row['active'] is '1':
myfriends.append({
"name":row['B_name'],
"uid":row['B_id']
})
else:
print("Skipping %s (inactive)" % row['B_name'])
inact = inact + 1
print("%d friends in imported list" % (idx+1))
print("%d ready for scanning (%d inactive)" % (idx-inact+1, inact))
return myfriends
# --------------- Scrape 1st degree friends ---------------
def scrape_1st_degrees():
#Prep CSV Output File
csvOut = '1st-degree_%s.csv' % now.strftime("%Y-%m-%d_%H%M")
writer = csv.writer(open(csvOut, 'w', encoding="utf-8"))
writer.writerow(['A_id','A_name','B_id','B_name','active'])
#Get your unique Facebook ID
profile_icon = browser.find_element_by_css_selector("[data-click='profile_icon'] > a > span > img")
myid = profile_icon.get_attribute("id")[19:]
#Scan your Friends page (1st-degree friends)
print("Opening Friends page...")
browser.get("https://www.facebook.com/" + myid + "/friends")
scroll_to_bottom()
myfriends = scan_friends()
#Write friends to CSV File
for friend in myfriends:
writer.writerow([myid,"Me",friend['id'],friend['name'],friend['active']])
print("Successfully saved to %s" % csvOut)
# --------------- Scrape 2nd degree friends. ---------------
#This can take several days if you have a lot of friends!!
def scrape_2nd_degrees():
#Prep CSV Output File
csvOut = '2nd-degree_%s.csv' % now.strftime("%Y-%m-%d_%H%M")
writer = csv.writer(open(csvOut, 'w', encoding="utf-8"))
writer.writerow(['A_id', 'B_id', 'A_name','B_name','active'])
#Load friends from CSV Input File
script, filename = argv
print("Loading list from %s..." % filename)
myfriends = load_csv(filename)
print("------------------------------------------")
for idx,friend in enumerate(myfriends):
#Load URL of friend's friend page
scrape_url = "https://www.facebook.com/"+ friend['uid'] + "/friends?source_ref=pb_friends_tl"
browser.get(scrape_url)
#Scan your friends' Friends page (2nd-degree friends)
print("%d) %s" % (idx+1, scrape_url))
scroll_to_bottom()
their_friends = scan_friends()
#Write friends to CSV File
print('Writing friends to CSV...')
for person in their_friends:
writer.writerow([friend['uid'],person['id'],friend['name'],person['name'],person['active']])
# --------------- Start Scraping ---------------
now = datetime.now()
configPath = "config.txt"
if configPath:
configObj = configparser.ConfigParser()
configObj.read(configPath)
email = configObj.get('credentials', 'email')
password = configObj.get('credentials', 'password')
else:
print('Enter the config path')
fb_login(configObj)
if len(argv) is 1:
scrape_1st_degrees()
elif len(argv) is 2:
scrape_2nd_degrees()
else:
print("Invalid # of arguments specified. Use none to scrape your 1st degree connections, or specify the name of the CSV file as the first argument.")