-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdaily-theft-from-vehicle-report.R
80 lines (68 loc) · 3.6 KB
/
daily-theft-from-vehicle-report.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
## Dependencies
# install.packages("ggmap") # ignore current CRAN package; it's out-of-date
if(!requireNamespace("devtools")) install.packages("devtools")
devtools::install_github("dkahle/ggmap",ref="tidyup",force=TRUE) # install latest ggmap from GitHub, until CRAN is updated
getDailyTheftFromVehicleReport <- function(workingDirectory,dataSetDirectory="./data/") {
require(ggmap)
## Initial set up
source("cloud-storage-env-vars.R") # my Google Cloud Storage API variables
register_google(key = GC_API_KEY) # GC_API_KEY var is the Google Maps API key
url <- "https://gis2.nngov.com/ssrs/report/?rs:Name=/12-Police/Daily_Theft_From_Vehicle_Public&rs:Command=Render&rs:Format=CSV"
fileName <- "Daily_Theft_From_Vehicle_Public.csv"
destinationFile <- paste0(dataSetDirectory,fileName)
setwd(workingDirectory)
columnNames = c(
"Report", # Report__
"DateTime", # Date_Time
"Address", # Address
"Status", # Status
"Disposition", # Disposition
"ReportingArea", # RA
"Officer" # Officer
)
columnClasses <- c(
"character", # Report__
"character", # Date_Time
"character", # Address
"character", # Status
"character", # Disposition
"character", # RA
"character" # OFFICER
)
cityName <- "NEWPORT NEWS"
stateName <- "VIRGINIA"
## Create data directory
if(!file.exists(dataSetDirectory)) {
dir.create(dataSetDirectory)
}
## Download our data
download.file(url,destfile=destinationFile,method="curl",quiet=TRUE)
## Create data frame
data <- read.csv(destinationFile,skip=5,col.names=columnNames,colClasses=columnClasses,stringsAsFactors=FALSE)
## Delete daily report file
if (file.exists(destinationFile)) file.remove(destinationFile)
## Reformating
data <- data.frame(lapply(data,function(x) if(class(x)=="character") trimws(x) else(x)),stringsAsFactors=FALSE) # remove trailing whitespace
data$Address <- gsub("BLOCK","",data$Address) # remove "BLOCK" from addresses
data$Address <- gsub("/","AT",data$Address) # convert cross street indicator
data$Address <- gsub("^0 ","1",data$Address) # replace addresses with a house number of 0 with a 1
data$Address <- paste(data$Address,cityName,stateName,sep=", ") # add city name to street address
## Geocode addresses to latitude and longitude
## From: http://www.storybench.org/geocode-csv-addresses-r/
for(i in 1:nrow(data)) {
# geocode and split lat and long into new columns
result <- geocode(data$Address[i],output="latlona",source="google",messaging=FALSE,force=TRUE)
data$Longitude[i] <- as.numeric(result[1])
data$Latitude[i] <- as.numeric(result[2])
# reformat DateTime
splitDateTime <- strsplit(data$DateTime[i],":") # split string at colon
splitDateTime[[1]][[2]] <- gsub("(\\d{2})(?=\\d{2})","\\1:",splitDateTime[[1]][[2]],perl=TRUE) # add colon back into time
splitDateTime[[1]][[2]] <- format(strptime(splitDateTime[[1]][[2]],format='%H:%M',tz="EST"),'%I:%M %p') # format into readable 12-hours
data$Date[i] <- splitDateTime[[1]][[1]] # split into Date column
data$Time[i] <- splitDateTime[[1]][[2]] # split into Time column
}
## Swap out DateTime for tidy Date and Time columns
data <- subset(data, select = -DateTime ) # drop DateTime column
data <- data[,c("Report", "Date", "Time", "Address", "Status", "Disposition", "ReportingArea", "Officer", "Longitude", "Latitude")] # new column order
data # return the clean data frame
}