-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQuery_Postgres.py
95 lines (65 loc) · 2.89 KB
/
Query_Postgres.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#This script will download a data set on Causes of Death in the United States. It will connect to PostgreSQL and load
#the data into a table. It will prompt a user to input a year and a state.
#It will then perform an SQL query using a where statement to find causes of death for the given year and state.
#download the data set from the website - Change download address as needed
#link to data: https://data.cdc.gov/api/views/bi63-dtpu/rows.csv?accessType=DOWNLOAD
#pip3 install wget
import psycopg2
from psycopg2.extensions import AsIs
import wget
import csv
import os
import os.path
#set Python working directory to folder with download
def setwd():
os.chdir('/home/monte/Documents/Python Scripts/')
setwd()
def get_data():
if os.path.isfile('/home/monte/Documents/Python Scripts/data.csv'):
print ('Data file exists and is readable')
else:
print('Downloading Data on Leading Causes of Death in the USA')
url = 'https://data.cdc.gov/api/views/bi63-dtpu/rows.csv?accessType=DOWNLOAD'
wget.download(url, '/home/monte/Documents/Python Scripts/data.csv')
get_data()
#clean data column that gives syntax errors
#install pandas package: pip3 install pandas
def clean_data():
import pandas as pd
data = pd.read_csv('./data.csv')
drop = data.drop('113 Cause Name', axis=1)
drop.to_csv('cause_death.csv')
clean_data()
#connect to PostgreSQL server, create a table and import the data
# install packages:
# C Compiler: sudo apt install build-essential
# Python header: sudo apt install python-dev
# Libpq package: sudo apt install libpq-dev
# Psycopg2: pip3 install psycopg2
def import_data():
conn = psycopg2.connect(host="localhost", database="postgres", user="postgres", password="postgres")
cur=conn.cursor()
try:
cur.execute("""CREATE TABLE Cause_of_Death(ID integer, Year integer, "Cause Name" text,
State text, Deaths numeric, "Age-adjusted Death Rate" numeric)""")
conn.commit()
with open('cause_death.csv', 'r') as f:
next(f)
cur.copy_from(f, 'Cause_of_Death', sep = ',')
conn.commit()
except psycopg2.errors.DuplicateTable:
pass
import_data()
#get input from user
query1 = int(input('please enter a year: '))
query2 = str(input('please enter a state with the first letter capitalized: '))
#query table with user input, print results
def query_table(query1, query2):
conn = psycopg2.connect(host="localhost", database="postgres", user="postgres", password="postgres")
cur=conn.cursor()
cur.execute("""SELECT "Cause Name", Deaths, Year, State FROM Cause_of_Death WHERE Year= %s AND State = %s;""", (AsIs(query1), (query2)))
data=cur.fetchall()
for row in data:
print(row)
conn.commit()
query_table(query1, query2)