From 91b36369c98c8bfcb9ffe97275735b02a07f34fd Mon Sep 17 00:00:00 2001 From: Frank Hereford Date: Thu, 28 Mar 2024 10:37:48 -0500 Subject: [PATCH 01/35] remove version designation and leave a breadcrumb on how to get a bash terminal easily --- atd-etl/cr3_download/docker-compose.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/atd-etl/cr3_download/docker-compose.yml b/atd-etl/cr3_download/docker-compose.yml index 5b3add923..624eb60fa 100644 --- a/atd-etl/cr3_download/docker-compose.yml +++ b/atd-etl/cr3_download/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3' - services: cr3_download: build: . @@ -7,3 +5,4 @@ services: - .:/app env_file: - ./.env + #entrypoint: /bin/bash From fd2107ff4fa27d9deecc52dc094e8c5c41f23765 Mon Sep 17 00:00:00 2001 From: Frank Hereford Date: Thu, 28 Mar 2024 10:45:50 -0500 Subject: [PATCH 02/35] remove cap and thread downloading --- atd-etl/cr3_download/cr3_download.py | 61 +++++++++++++++------ atd-etl/cr3_download/process/helpers_cr3.py | 10 ++-- 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/atd-etl/cr3_download/cr3_download.py b/atd-etl/cr3_download/cr3_download.py index ab1f19090..41c9f559f 100755 --- a/atd-etl/cr3_download/cr3_download.py +++ b/atd-etl/cr3_download/cr3_download.py @@ -5,19 +5,21 @@ Description: This script allows the user to log in to the CRIS website and download CR3 pdf files as needed. The list of CR3 files to be downloaded -is obtained from Hasura, and it is contingent to records that do not have +is obtained from Hasura, and it is made up of the records that do not have any CR3 files associated. """ import os import time import json +from concurrent.futures import ThreadPoolExecutor from process.helpers_cr3 import * from onepasswordconnectsdk.client import Client, new_client import onepasswordconnectsdk + # Start timer start = time.time() @@ -86,7 +88,7 @@ # the CR3 pdf, upload to S3 # -# ask user for a set of valid cookies for requests to the CRIS website +# # ask user for a set of valid cookies for requests to the CRIS website CRIS_BROWSER_COOKIES = input( "Please login to CRIS and extract the contents of the Cookie: header and please paste it here:" ) @@ -107,35 +109,58 @@ try: print("Hasura endpoint: '%s' " % os.getenv("HASURA_ENDPOINT")) - downloads_per_run = os.getenv("ATD_CRIS_CR3_DOWNLOADS_PER_RUN") - downloads_per_run = 2000 - print("Downloads Per This Run: %s" % str(downloads_per_run)) - response = get_crash_id_list(downloads_per_run=downloads_per_run) - print("\nResponse from Hasura: %s" % json.dumps(response)) + response = get_crash_id_list() + # print("\nResponse from Hasura: %s" % json.dumps(response)) crashes_list = response["data"]["atd_txdot_crashes"] crashes_list_without_skips = [ x for x in crashes_list if x["crash_id"] not in known_skips ] - print( - f"\nList of {len(crashes_list_without_skips)} crashes needing CR3 download: %s" - % json.dumps(crashes_list_without_skips) - ) - print("\nStarting CR3 downloads:") + except Exception as e: crashes_list_without_skips = [] print("Error, could not run CR3 processing: " + str(e)) -for crash_record in crashes_list_without_skips: - process_crash_cr3( - crash_record, - CRIS_BROWSER_COOKIES, - skipped_uploads_and_updates, - ) +print(f"Length of queue: {len(crashes_list_without_skips)}") + +print("\nStarting CR3 downloads:") + + +def process_crash_cr3_threaded(crash_record): + try: + process_crash_cr3( + crash_record, + CRIS_BROWSER_COOKIES, + skipped_uploads_and_updates, + ) + print(f"Processed crash ID: {crash_record['crash_id']}") + except Exception as e: + print(f"Error processing crash ID {crash_record['crash_id']}: {str(e)}") + skipped_uploads_and_updates.append(str(crash_record["crash_id"])) + + +max_workers = 4 # Specify the number of concurrent downloaders + +with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [] + for crash_record in crashes_list_without_skips: + future = executor.submit(process_crash_cr3_threaded, crash_record) + futures.append(future) + + for future in futures: + future.result() + + +# for crash_record in crashes_list_without_skips: +# process_crash_cr3( +# crash_record, +# CRIS_BROWSER_COOKIES, +# skipped_uploads_and_updates, +# ) print("\nProcess done.") diff --git a/atd-etl/cr3_download/process/helpers_cr3.py b/atd-etl/cr3_download/process/helpers_cr3.py index 2fd8e7c3f..07f12424a 100644 --- a/atd-etl/cr3_download/process/helpers_cr3.py +++ b/atd-etl/cr3_download/process/helpers_cr3.py @@ -83,7 +83,7 @@ def delete_cr3s(crash_id): run_command("rm %s" % file) -def get_crash_id_list(downloads_per_run="25"): +def get_crash_id_list(): """ Downloads a list of crashes that do not have a CR3 associated. :return: dict - Response from request.post @@ -91,7 +91,6 @@ def get_crash_id_list(downloads_per_run="25"): query_crashes_cr3 = """ query CrashesWithoutCR3 { atd_txdot_crashes( - limit: %s, where: { cr3_stored_flag: {_eq: "N"} temp_record: {_eq: false} @@ -101,9 +100,7 @@ def get_crash_id_list(downloads_per_run="25"): crash_id } } - """ % ( - str(downloads_per_run) - ) + """ return run_query(query_crashes_cr3) @@ -157,6 +154,9 @@ def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates): if not is_file_pdf: print(f"\nFile {download_path} is not a pdf - skipping upload and update") + with open(download_path, "r") as file: + print(file.read()) + time.sleep(10) skipped_uploads_and_updates.append(crash_id) else: upload_cr3(crash_id) From 214266ae2da083141ffec450f8f25615c8e25eb8 Mon Sep 17 00:00:00 2001 From: Frank Hereford Date: Fri, 29 Mar 2024 10:40:08 -0500 Subject: [PATCH 03/35] remove comment --- atd-etl/cr3_download/cr3_download.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/atd-etl/cr3_download/cr3_download.py b/atd-etl/cr3_download/cr3_download.py index 41c9f559f..bc1b3fce2 100755 --- a/atd-etl/cr3_download/cr3_download.py +++ b/atd-etl/cr3_download/cr3_download.py @@ -154,14 +154,6 @@ def process_crash_cr3_threaded(crash_record): for future in futures: future.result() - -# for crash_record in crashes_list_without_skips: -# process_crash_cr3( -# crash_record, -# CRIS_BROWSER_COOKIES, -# skipped_uploads_and_updates, -# ) - print("\nProcess done.") if skipped_uploads_and_updates: From a8a3d990bd6382a965d0437f101c4ad941d9f145 Mon Sep 17 00:00:00 2001 From: Frank Hereford Date: Fri, 29 Mar 2024 11:15:45 -0500 Subject: [PATCH 04/35] pass over this to organize it and improve UX --- atd-etl/cr3_download/Dockerfile | 2 +- atd-etl/cr3_download/cr3_download.py | 294 +++++++++++--------- atd-etl/cr3_download/docker-compose.yml | 3 +- atd-etl/cr3_download/process/helpers_cr3.py | 44 +-- 4 files changed, 190 insertions(+), 153 deletions(-) diff --git a/atd-etl/cr3_download/Dockerfile b/atd-etl/cr3_download/Dockerfile index f184a87a9..214bc5b22 100644 --- a/atd-etl/cr3_download/Dockerfile +++ b/atd-etl/cr3_download/Dockerfile @@ -10,4 +10,4 @@ COPY . /app RUN cd /app && pip install -r requirements.txt -CMD python /app/cr3_download.py +CMD -t 5 \ No newline at end of file diff --git a/atd-etl/cr3_download/cr3_download.py b/atd-etl/cr3_download/cr3_download.py index bc1b3fce2..08f3ab08a 100755 --- a/atd-etl/cr3_download/cr3_download.py +++ b/atd-etl/cr3_download/cr3_download.py @@ -11,156 +11,180 @@ import os import time -import json +import argparse from concurrent.futures import ThreadPoolExecutor -from process.helpers_cr3 import * - -from onepasswordconnectsdk.client import Client, new_client +from process.helpers_cr3 import process_crash_cr3, get_crash_id_list +from onepasswordconnectsdk.client import new_client import onepasswordconnectsdk -# Start timer -start = time.time() - - -# Get 1Password secrets from environment -ONEPASSWORD_CONNECT_HOST = os.getenv("OP_CONNECT") -ONEPASSWORD_CONNECT_TOKEN = os.getenv("OP_API_TOKEN") -VAULT_ID = os.getenv("OP_VAULT_ID") - -# Setup 1Password server connection -one_password_client = new_client(ONEPASSWORD_CONNECT_HOST, ONEPASSWORD_CONNECT_TOKEN) - -# Get secrets from 1Password -REQUIRED_SECRETS = { - "HASURA_ENDPOINT": { - "opitem": "Vision Zero graphql-engine Endpoints", - "opfield": "production.GraphQL Endpoint", - "opvault": VAULT_ID, - }, - "HASURA_ADMIN_KEY": { - "opitem": "Vision Zero graphql-engine Endpoints", - "opfield": "production.Admin Key", - "opvault": VAULT_ID, - }, - "AWS_ACCESS_KEY_ID": { - "opitem": "CR3 Download IAM Access Key and Secret", - "opfield": "production.accessKeyId", - "opvault": VAULT_ID, - }, - "AWS_SECRET_ACCESS_KEY": { - "opitem": "CR3 Download IAM Access Key and Secret", - "opfield": "production.accessSecret", - "opvault": VAULT_ID, - }, - "AWS_DEFAULT_REGION": { - "opitem": "CR3 Download IAM Access Key and Secret", - "opfield": "production.awsDefaultRegion", - "opvault": VAULT_ID, - }, - "ATD_CRIS_CR3_URL": { - "opitem": "Vision Zero CRIS CR3 Download", - "opfield": "production.ATD_CRIS_CR3_URL", - "opvault": VAULT_ID, - }, - "AWS_CRIS_CR3_BUCKET_NAME": { - "opitem": "Vision Zero CRIS CR3 Download", - "opfield": "production.AWS_CRIS_CR3_BUCKET_NAME", - "opvault": VAULT_ID, - }, - "AWS_CRIS_CR3_BUCKET_PATH": { - "opitem": "Vision Zero CRIS CR3 Download", - "opfield": "production.AWS_CRIS_CR3_BUCKET_PATH", - "opvault": VAULT_ID, - }, -} - -env_vars = onepasswordconnectsdk.load_dict(one_password_client, REQUIRED_SECRETS) - -# Set secrets from 1Password in environment -for key, value in env_vars.items(): - os.environ[key] = value - -# -# We now need to request a list of N number of records -# that do not have a CR3. For each record we must download -# the CR3 pdf, upload to S3 -# - -# # ask user for a set of valid cookies for requests to the CRIS website -CRIS_BROWSER_COOKIES = input( - "Please login to CRIS and extract the contents of the Cookie: header and please paste it here:" -) - -print("Preparing download loop.") - -print("Gathering list of crashes.") -# Track crash IDs that we don't successfully retrieve a pdf file for -skipped_uploads_and_updates = [] - -# Some crash IDs were manually added at the request of the VZ team so -# CR3s for these crash IDs are not available in the CRIS database. -# We can skip requesting them. -# See https://github.com/cityofaustin/atd-data-tech/issues/9786 -known_skips = [180290542, 144720068] - -crashes_list_without_skips = [] - -try: - print("Hasura endpoint: '%s' " % os.getenv("HASURA_ENDPOINT")) - - response = get_crash_id_list() - # print("\nResponse from Hasura: %s" % json.dumps(response)) - - crashes_list = response["data"]["atd_txdot_crashes"] - - crashes_list_without_skips = [ - x for x in crashes_list if x["crash_id"] not in known_skips - ] - - -except Exception as e: - crashes_list_without_skips = [] - print("Error, could not run CR3 processing: " + str(e)) - - -print(f"Length of queue: {len(crashes_list_without_skips)}") - -print("\nStarting CR3 downloads:") - - -def process_crash_cr3_threaded(crash_record): +def load_secrets(one_password_client, vault_id): + """Load required secrets from 1Password.""" + required_secrets = { + "HASURA_ENDPOINT": { + "opitem": "Vision Zero graphql-engine Endpoints", + "opfield": "production.GraphQL Endpoint", + "opvault": vault_id, + }, + "HASURA_ADMIN_KEY": { + "opitem": "Vision Zero graphql-engine Endpoints", + "opfield": "production.Admin Key", + "opvault": vault_id, + }, + "AWS_ACCESS_KEY_ID": { + "opitem": "CR3 Download IAM Access Key and Secret", + "opfield": "production.accessKeyId", + "opvault": vault_id, + }, + "AWS_SECRET_ACCESS_KEY": { + "opitem": "CR3 Download IAM Access Key and Secret", + "opfield": "production.accessSecret", + "opvault": vault_id, + }, + "AWS_DEFAULT_REGION": { + "opitem": "CR3 Download IAM Access Key and Secret", + "opfield": "production.awsDefaultRegion", + "opvault": vault_id, + }, + "ATD_CRIS_CR3_URL": { + "opitem": "Vision Zero CRIS CR3 Download", + "opfield": "production.ATD_CRIS_CR3_URL", + "opvault": vault_id, + }, + "AWS_CRIS_CR3_BUCKET_NAME": { + "opitem": "Vision Zero CRIS CR3 Download", + "opfield": "production.AWS_CRIS_CR3_BUCKET_NAME", + "opvault": vault_id, + }, + "AWS_CRIS_CR3_BUCKET_PATH": { + "opitem": "Vision Zero CRIS CR3 Download", + "opfield": "production.AWS_CRIS_CR3_BUCKET_PATH", + "opvault": vault_id, + }, + } + + return onepasswordconnectsdk.load_dict(one_password_client, required_secrets) + + +def process_crash_cr3_threaded( + crash_record, cris_browser_cookies, skipped_uploads_and_updates, verbose +): + """Process a crash record in a separate thread.""" try: process_crash_cr3( - crash_record, - CRIS_BROWSER_COOKIES, - skipped_uploads_and_updates, + crash_record, cris_browser_cookies, skipped_uploads_and_updates, verbose ) - print(f"Processed crash ID: {crash_record['crash_id']}") + if verbose: + print(f"Processed crash ID: {crash_record['crash_id']}") except Exception as e: print(f"Error processing crash ID {crash_record['crash_id']}: {str(e)}") skipped_uploads_and_updates.append(str(crash_record["crash_id"])) -max_workers = 4 # Specify the number of concurrent downloaders +def main(): + # Parse command-line arguments + parser = argparse.ArgumentParser(description="CRIS - CR3 Downloader") + parser.add_argument( + "-t", + "--threads", + type=int, + default=1, + help="Number of concurrent downloaders (default: 1)", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Enable verbose logging" + ) + args = parser.parse_args() + + # Start timer + start = time.time() + + # Get 1Password secrets from environment + ONEPASSWORD_CONNECT_HOST = os.getenv("OP_CONNECT") + ONEPASSWORD_CONNECT_TOKEN = os.getenv("OP_API_TOKEN") + VAULT_ID = os.getenv("OP_VAULT_ID") + + # Setup 1Password server connection + one_password_client = new_client( + ONEPASSWORD_CONNECT_HOST, ONEPASSWORD_CONNECT_TOKEN + ) + + # Load secrets from 1Password and set them in the environment + env_vars = load_secrets(one_password_client, VAULT_ID) + for key, value in env_vars.items(): + os.environ[key] = value + + # Ask user for a set of valid cookies for requests to the CRIS website + CRIS_BROWSER_COOKIES = input( + "Please login to CRIS and extract the contents of the Cookie: header and please paste it here: " + ) + + if args.verbose: + print("Preparing download loop.") + print("Gathering list of crashes.") + + # Track crash IDs that we don't successfully retrieve a pdf file for + skipped_uploads_and_updates = [] + + # Some crash IDs were manually added at the request of the VZ team so + # CR3s for these crash IDs are not available in the CRIS database. + # We can skip requesting them. + # See https://github.com/cityofaustin/atd-data-tech/issues/9786 + known_skips = [180290542, 144720068] -with ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = [] - for crash_record in crashes_list_without_skips: - future = executor.submit(process_crash_cr3_threaded, crash_record) - futures.append(future) + crashes_list_without_skips = [] + + try: + if args.verbose: + print(f"Hasura endpoint: '{os.getenv('HASURA_ENDPOINT')}'") - for future in futures: - future.result() + response = get_crash_id_list() -print("\nProcess done.") + crashes_list = response["data"]["atd_txdot_crashes"] + crashes_list_without_skips = [ + x for x in crashes_list if x["crash_id"] not in known_skips + ] + + except Exception as e: + crashes_list_without_skips = [] + print(f"Error, could not run CR3 processing: {str(e)}") + + if args.verbose: + print(f"Length of queue: {len(crashes_list_without_skips)}") + print("Starting CR3 downloads:") + + max_workers = args.threads + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [] + for crash_record in crashes_list_without_skips: + future = executor.submit( + process_crash_cr3_threaded, + crash_record, + CRIS_BROWSER_COOKIES, + skipped_uploads_and_updates, + args.verbose, + ) + futures.append(future) + + for future in futures: + future.result() + + print("Process done.") + + if skipped_uploads_and_updates: + skipped_downloads = ", ".join(skipped_uploads_and_updates) + print(f"\nUnable to download PDFs for crash IDs: {skipped_downloads}") + + end = time.time() + hours, rem = divmod(end - start, 3600) + minutes, seconds = divmod(rem, 60) + print( + "\nFinished in: {:0>2}:{:0>2}:{:05.2f}".format( + int(hours), int(minutes), seconds + ) + ) -if skipped_uploads_and_updates: - skipped_downloads = ", ".join(skipped_uploads_and_updates) - print(f"\nUnable to download pdfs for crash IDs: {skipped_downloads}") -end = time.time() -hours, rem = divmod(end - start, 3600) -minutes, seconds = divmod(rem, 60) -print("\nFinished in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds)) +if __name__ == "__main__": + main() diff --git a/atd-etl/cr3_download/docker-compose.yml b/atd-etl/cr3_download/docker-compose.yml index 624eb60fa..a1bb6c6e8 100644 --- a/atd-etl/cr3_download/docker-compose.yml +++ b/atd-etl/cr3_download/docker-compose.yml @@ -5,4 +5,5 @@ services: - .:/app env_file: - ./.env - #entrypoint: /bin/bash + # entrypoint: /bin/bash + entrypoint: /app/cr3_download.py diff --git a/atd-etl/cr3_download/process/helpers_cr3.py b/atd-etl/cr3_download/process/helpers_cr3.py index 07f12424a..017847664 100644 --- a/atd-etl/cr3_download/process/helpers_cr3.py +++ b/atd-etl/cr3_download/process/helpers_cr3.py @@ -23,17 +23,18 @@ from .request import run_query -def run_command(command): +def run_command(command, verbose): """ Runs a command :param command: array of strings containing the command and flags """ - print(command) - print(subprocess.check_output(command, shell=True).decode("utf-8")) + if verbose: + print(command) + print(subprocess.check_output(command, shell=True).decode("utf-8")) # Now we need to implement our methods. -def download_cr3(crash_id, cookies): +def download_cr3(crash_id, cookies, verbose): """ Downloads a CR3 pdf from the CRIS website. :param crash_id: string - The crash id @@ -52,14 +53,15 @@ def download_cr3(crash_id, cookies): url = os.getenv("ATD_CRIS_CR3_URL") + crash_id_encoded download_path = "/tmp/" + "%s.pdf" % crash_id - print("Downloading (%s): '%s' from %s" % (crash_id, download_path, url)) + if verbose: + print("Downloading (%s): '%s' from %s" % (crash_id, download_path, url)) resp = requests.get(url, allow_redirects=True, cookies=baked_cookies) open(download_path, "wb").write(resp.content) return download_path -def upload_cr3(crash_id): +def upload_cr3(crash_id, verbose): """ Uploads a file to S3 using the awscli command :param crash_id: string - The crash id @@ -71,16 +73,25 @@ def upload_cr3(crash_id): crash_id, ) - run_command("aws s3 cp %s %s --no-progress" % (file, destination)) + quiet_option = "--quiet" if not verbose else "" + run_command( + "aws s3 cp %s %s --no-progress %s" + % ( + file, + destination, + quiet_option, + ), + verbose, + ) -def delete_cr3s(crash_id): +def delete_cr3s(crash_id, verbose): """ Deletes the downloaded CR3 pdf file :param crash_id: string - The crash id """ file = "/tmp/%s.pdf" % crash_id - run_command("rm %s" % file) + run_command("rm %s" % file, verbose) def get_crash_id_list(): @@ -105,7 +116,7 @@ def get_crash_id_list(): return run_query(query_crashes_cr3) -def update_crash_id(crash_id): +def update_crash_id(crash_id, verbose): """ Updates the status of a crash to having an available CR3 pdf in the S3 bucket. :param crash_id: string - The Crash ID that needs to be updated @@ -122,7 +133,8 @@ def update_crash_id(crash_id): """ % crash_id ) - print(update_record_cr3) + if verbose: + print(f"Marking CR3 status as downloaded for crash_id: {crash_id}") return run_query(update_record_cr3) @@ -137,7 +149,7 @@ def check_if_pdf(file_path): return file_type == "application/pdf" -def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates): +def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates, verbose): """ Downloads a CR3 pdf, uploads it to s3, updates the database and deletes the pdf. :param crash_record: dict - The individual crash record being processed @@ -149,7 +161,7 @@ def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates): print("Processing Crash: " + crash_id) - download_path = download_cr3(crash_id, cookies) + download_path = download_cr3(crash_id, cookies, verbose) is_file_pdf = check_if_pdf(download_path) if not is_file_pdf: @@ -159,10 +171,10 @@ def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates): time.sleep(10) skipped_uploads_and_updates.append(crash_id) else: - upload_cr3(crash_id) - update_crash_id(crash_id) + upload_cr3(crash_id, verbose) + update_crash_id(crash_id, verbose) - delete_cr3s(crash_id) + delete_cr3s(crash_id, verbose) except Exception as e: print("Error: %s" % str(e)) From 368da64b11f571bb32b1c6aa5e37f85d77a49abe Mon Sep 17 00:00:00 2001 From: Frank Hereford Date: Fri, 29 Mar 2024 11:19:00 -0500 Subject: [PATCH 05/35] remove CMD, using the entrypoint --- atd-etl/cr3_download/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/atd-etl/cr3_download/Dockerfile b/atd-etl/cr3_download/Dockerfile index 214bc5b22..aef8d2dc6 100644 --- a/atd-etl/cr3_download/Dockerfile +++ b/atd-etl/cr3_download/Dockerfile @@ -10,4 +10,3 @@ COPY . /app RUN cd /app && pip install -r requirements.txt -CMD -t 5 \ No newline at end of file From c419587e25c7468588a2b6fcbedeb1ca262e47bb Mon Sep 17 00:00:00 2001 From: Frank Hereford Date: Fri, 29 Mar 2024 11:22:43 -0500 Subject: [PATCH 06/35] make the default 5 threads --- atd-etl/cr3_download/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-etl/cr3_download/docker-compose.yml b/atd-etl/cr3_download/docker-compose.yml index a1bb6c6e8..ba6ba2d77 100644 --- a/atd-etl/cr3_download/docker-compose.yml +++ b/atd-etl/cr3_download/docker-compose.yml @@ -6,4 +6,4 @@ services: env_file: - ./.env # entrypoint: /bin/bash - entrypoint: /app/cr3_download.py + entrypoint: /app/cr3_download.py -t 5 From b7958689edac2d14d22ef62f1257c4b95779067c Mon Sep 17 00:00:00 2001 From: John Clary Date: Tue, 9 Apr 2024 13:38:54 -0400 Subject: [PATCH 07/35] print queue length when not verbose --- atd-etl/cr3_download/cr3_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-etl/cr3_download/cr3_download.py b/atd-etl/cr3_download/cr3_download.py index 08f3ab08a..96851a14d 100755 --- a/atd-etl/cr3_download/cr3_download.py +++ b/atd-etl/cr3_download/cr3_download.py @@ -150,8 +150,8 @@ def main(): crashes_list_without_skips = [] print(f"Error, could not run CR3 processing: {str(e)}") + print(f"Length of queue: {len(crashes_list_without_skips)}") if args.verbose: - print(f"Length of queue: {len(crashes_list_without_skips)}") print("Starting CR3 downloads:") max_workers = args.threads From 8e3b8f53500010a97671559a36ee47582c485bc8 Mon Sep 17 00:00:00 2001 From: John Clary Date: Tue, 9 Apr 2024 13:39:09 -0400 Subject: [PATCH 08/35] upload cr3 when not verbose --- atd-etl/cr3_download/process/helpers_cr3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/atd-etl/cr3_download/process/helpers_cr3.py b/atd-etl/cr3_download/process/helpers_cr3.py index 017847664..114fbd1e8 100644 --- a/atd-etl/cr3_download/process/helpers_cr3.py +++ b/atd-etl/cr3_download/process/helpers_cr3.py @@ -31,6 +31,8 @@ def run_command(command, verbose): if verbose: print(command) print(subprocess.check_output(command, shell=True).decode("utf-8")) + else: + subprocess.check_output(command, shell=True).decode("utf-8") # Now we need to implement our methods. From bb5a8faf5f15bf62c0c5fd4d49491833f9482ecf Mon Sep 17 00:00:00 2001 From: rose Date: Wed, 10 Apr 2024 18:04:18 -0500 Subject: [PATCH 09/35] add spinners to all the cards before data is fetched --- atd-vzv/src/views/summary/CrashesByMode.js | 304 +++++++++--------- .../src/views/summary/CrashesByPopulation.js | 127 ++++---- .../src/views/summary/CrashesByTimeOfDay.js | 170 +++++----- atd-vzv/src/views/summary/CrashesByYear.js | 17 +- .../src/views/summary/PeopleByDemographics.js | 66 ++-- 5 files changed, 357 insertions(+), 327 deletions(-) diff --git a/atd-vzv/src/views/summary/CrashesByMode.js b/atd-vzv/src/views/summary/CrashesByMode.js index bc5d9950b..92add9921 100644 --- a/atd-vzv/src/views/summary/CrashesByMode.js +++ b/atd-vzv/src/views/summary/CrashesByMode.js @@ -1,7 +1,7 @@ import React, { useEffect, useState, useRef, useMemo } from "react"; import axios from "axios"; import { Bar } from "react-chartjs-2"; -import { Container, Row, Col } from "reactstrap"; +import { Container, Row, Col, Spinner } from "reactstrap"; import styled from "styled-components"; import { format } from "date-fns"; @@ -224,8 +224,8 @@ const CrashesByMode = () => { -

By Travel Mode{" "} - +

+ By Travel Mode

@@ -242,157 +242,165 @@ const CrashesByMode = () => {
- - - {chartLegend} - { - - (chartRef.current = ref)} - data={data} - height={null} - width={null} - options={{ - responsive: true, - aspectRatio: 1.37, - maintainAspectRatio: false, - scales: { - xAxes: [ - { - stacked: true, - }, - ], - yAxes: [ - { - stacked: true, - }, - ], - }, - legend: { - display: false, - }, - legendCallback: function (chart) { - return ( - - - -
-

-
- {chart.data.datasets.map((dataset, i) => { - const updateLegendColors = () => { - const legendColorsClone = [...legendColors]; - legendColors[i] !== "dimgray" - ? legendColorsClone.splice(i, 1, "dimgray") - : legendColorsClone.splice( - i, - 1, - chartColors[i] - ); - setLegendColors(legendColorsClone); - }; + {!!data.datasets ? ( + + + {chartLegend} + { + + (chartRef.current = ref)} + data={data} + height={null} + width={null} + options={{ + responsive: true, + aspectRatio: 1.37, + maintainAspectRatio: false, + scales: { + xAxes: [ + { + stacked: true, + }, + ], + yAxes: [ + { + stacked: true, + }, + ], + }, + legend: { + display: false, + }, + legendCallback: function (chart) { + return ( + + + +
+

+
+ {chart.data.datasets.map((dataset, i) => { + const updateLegendColors = () => { + const legendColorsClone = [...legendColors]; + legendColors[i] !== "dimgray" + ? legendColorsClone.splice(i, 1, "dimgray") + : legendColorsClone.splice( + i, + 1, + chartColors[i] + ); + setLegendColors(legendColorsClone); + }; - const customLegendClickHandler = () => { - const legendItem = chart.legend.legendItems[i]; - const index = legendItem.datasetIndex; - const ci = chartRef.current.chartInstance.chart; - const meta = ci.getDatasetMeta(index); + const customLegendClickHandler = () => { + const legendItem = + chart.legend.legendItems[i]; + const index = legendItem.datasetIndex; + const ci = + chartRef.current.chartInstance.chart; + const meta = ci.getDatasetMeta(index); - // See controller.isDatasetVisible comment - meta.hidden = - meta.hidden === null - ? !ci.data.datasets[index].hidden - : null; + // See controller.isDatasetVisible comment + meta.hidden = + meta.hidden === null + ? !ci.data.datasets[index].hidden + : null; - // We hid a dataset ... rerender the chart, - // then update the legend colors - updateLegendColors(ci.update()); - }; + // We hid a dataset ... rerender the chart, + // then update the legend colors + updateLegendColors(ci.update()); + }; - return ( -
-
-

-

-
- ); - })} -
-
-

- Total - Total -

-
-
- - {chart.data.labels.map((year, yearIterator) => { - let paddingRight = - yearIterator === 4 ? "null" : "pr-1"; - return ( - - -
-
-

- {year} + return ( +

+
+

+

- {chart.data.datasets.map( - (mode, modeIterator) => { - return ( -
-
-

- {mode.data[yearIterator]} -

-
- ); - } - )} -
-

- {data.datasets && - yearTotalsArray[yearIterator]} -

-
- - - ); - })} - - ); - }, - }} - /> - - } - - + ); + })} +
+
+

+ Total + Total +

+
+ + + {chart.data.labels.map((year, yearIterator) => { + let paddingRight = + yearIterator === 4 ? "null" : "pr-1"; + return ( + + +
+
+

+ {year} +

+
+ {chart.data.datasets.map( + (mode, modeIterator) => { + return ( +
+
+

+ {mode.data[yearIterator]} +

+
+ ); + } + )} +
+

+ {data.datasets && + yearTotalsArray[yearIterator]} +

+
+
+ + ); + })} + + ); + }, + }} + /> + + } + + + ) : ( + + )} ); }; diff --git a/atd-vzv/src/views/summary/CrashesByPopulation.js b/atd-vzv/src/views/summary/CrashesByPopulation.js index 63af03be1..aefef52d4 100644 --- a/atd-vzv/src/views/summary/CrashesByPopulation.js +++ b/atd-vzv/src/views/summary/CrashesByPopulation.js @@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react"; import axios from "axios"; import styled from "styled-components"; import { Bar } from "react-chartjs-2"; -import { Container, Row, Col } from "reactstrap"; +import { Container, Row, Col, Spinner } from "reactstrap"; import { format } from "date-fns"; import CrashTypeSelector from "./Components/CrashTypeSelector"; @@ -108,68 +108,73 @@ const CrashesByPopulation = () => {
- - -
-

- Year -

-
-

Ratio

-
- - {!!chartData && - chartData.labels && - chartData.labels.map((year, i) => { - const yearRatio = chartData.datasets?.[0]?.data?.[i]; + {!!chartData.datasets ? ( +
+ + +
+

+ Year +

+
+

Ratio

+
+ + {chartData.labels && + chartData.labels.map((year, i) => { + const yearRatio = chartData.datasets?.[0]?.data?.[i]; - return ( - - -
-

- {year} -

-
-

- {/* Fallback if we haven't added the population for the year yet in popEsts.js */} - {yearRatio ? yearRatio : "-"} -

-
-
- - ); - })} -
- - - + +
+

+ {year} +

+
+

+ {/* Fallback if we haven't added the population for the year yet in popEsts.js */} + {yearRatio ? yearRatio : "-"} +

+
+
+ + ); + })} +
+ + + - - + scales: { + yAxes: [ + { + ticks: { + beginAtZero: true, + }, + }, + ], + }, + legend: { + display: false, + }, + }} + /> + + +
+ ) : ( + + )} ); }; diff --git a/atd-vzv/src/views/summary/CrashesByTimeOfDay.js b/atd-vzv/src/views/summary/CrashesByTimeOfDay.js index 038c7861c..ab2cc6268 100644 --- a/atd-vzv/src/views/summary/CrashesByTimeOfDay.js +++ b/atd-vzv/src/views/summary/CrashesByTimeOfDay.js @@ -4,7 +4,7 @@ import { format, parseISO, sub } from "date-fns"; import clonedeep from "lodash.clonedeep"; import CrashTypeSelector from "./Components/CrashTypeSelector"; -import { Row, Col, Container, Button } from "reactstrap"; +import { Row, Col, Container, Button, Spinner } from "reactstrap"; import styled from "styled-components"; import classnames from "classnames"; import { @@ -221,99 +221,105 @@ const CrashesByTimeOfDay = () => {
- - - - {yearsArray() // Calculate years ago for each year in data window - .map((year) => { - const currentYear = parseInt(format(dataEndDate, "yyyy")); - return currentYear - year; - }) - .map((yearsAgo) => ( - - ))} - - - - - - - `${d.x} ∙ + {!!heatmapDataWithPlaceholder ? ( +
+ + + + {yearsArray() // Calculate years ago for each year in data window + .map((year) => { + const currentYear = parseInt(format(dataEndDate, "yyyy")); + return currentYear - year; + }) + .map((yearsAgo) => ( + + ))} + + + + + + + `${d.x} ∙ ${formatValue(d)}` + } + /> } /> } /> } - /> - } - xAxis={ - } /> } /> } /> - } - /> - - - - - {!!maxForLegend && ( - - )} - - + + + + + {!!maxForLegend && ( + + )} + + +
+ ) : ( + + )} ); }; diff --git a/atd-vzv/src/views/summary/CrashesByYear.js b/atd-vzv/src/views/summary/CrashesByYear.js index 4a0a7640c..576126cad 100644 --- a/atd-vzv/src/views/summary/CrashesByYear.js +++ b/atd-vzv/src/views/summary/CrashesByYear.js @@ -3,7 +3,7 @@ import axios from "axios"; import CrashesByYearCumulative from "./CrashesByYearCumulative"; import CrashesByYearAverage from "./CrashesByYearAverage"; import ChartTypeSelector from "./Components/ChartTypeSelector"; -import { Container, Row, Col } from "reactstrap"; +import { Container, Row, Col, Spinner } from "reactstrap"; import CrashTypeSelector from "./Components/CrashTypeSelector"; import InfoPopover from "../../Components/Popover/InfoPopover"; @@ -101,7 +101,10 @@ const CrashesByYear = () => {
- + @@ -114,9 +117,13 @@ const CrashesByYear = () => { chartType={chartType} setChartType={setChartType} /> - - {renderChartByType(chartType)} - + {avgData.length > 0 && currentYearData.length > 0 ? ( + + {renderChartByType(chartType)} + + ) : ( + + )} ); }; diff --git a/atd-vzv/src/views/summary/PeopleByDemographics.js b/atd-vzv/src/views/summary/PeopleByDemographics.js index 19e791706..5aaf5bc95 100644 --- a/atd-vzv/src/views/summary/PeopleByDemographics.js +++ b/atd-vzv/src/views/summary/PeopleByDemographics.js @@ -3,7 +3,7 @@ import axios from "axios"; import { HorizontalBar } from "react-chartjs-2"; import "chartjs-plugin-stacked100"; import ChartTypeSelector from "./Components/ChartTypeSelector"; -import { Container, Row, Col } from "reactstrap"; +import { Container, Row, Col, Spinner } from "reactstrap"; import { format } from "date-fns"; import CrashTypeSelector from "./Components/CrashTypeSelector"; @@ -304,38 +304,42 @@ const PeopleByDemographics = () => { chartType={chartType} setChartType={setChartType} /> - - - { - const datasetIndex = tooltipItem.datasetIndex; - const datasetLabel = data.datasets[datasetIndex].label; - const originalValue = - data.originalData[datasetIndex][tooltipItem.index]; - const rateValue = - data.calculatedData[datasetIndex][tooltipItem.index]; - return `${datasetLabel}: ${originalValue} (${rateValue}%)`; + {!!chartData.datasets ? ( + + + { + const datasetIndex = tooltipItem.datasetIndex; + const datasetLabel = data.datasets[datasetIndex].label; + const originalValue = + data.originalData[datasetIndex][tooltipItem.index]; + const rateValue = + data.calculatedData[datasetIndex][tooltipItem.index]; + return `${datasetLabel}: ${originalValue} (${rateValue}%)`; + }, }, }, - }, - }} - /> - - + }} + /> + + + ) : ( + + )} ); }; From cad37d1ef336d2b600b159bf412db01fcdda900a Mon Sep 17 00:00:00 2001 From: rose Date: Wed, 10 Apr 2024 18:25:25 -0500 Subject: [PATCH 10/35] spinner wasnt rendering correctly needed to change this logic --- atd-vzv/src/views/summary/CrashesByTimeOfDay.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-vzv/src/views/summary/CrashesByTimeOfDay.js b/atd-vzv/src/views/summary/CrashesByTimeOfDay.js index ab2cc6268..2b6a293ad 100644 --- a/atd-vzv/src/views/summary/CrashesByTimeOfDay.js +++ b/atd-vzv/src/views/summary/CrashesByTimeOfDay.js @@ -221,7 +221,7 @@ const CrashesByTimeOfDay = () => {
- {!!heatmapDataWithPlaceholder ? ( + {!!heatmapDataWithPlaceholder.length > 0 ? (
From 1b87e8fa9eeaaeed8ea6315f19af3005cd40add3 Mon Sep 17 00:00:00 2001 From: rose Date: Wed, 10 Apr 2024 18:25:57 -0500 Subject: [PATCH 11/35] move the loading check up a level --- atd-vzv/src/views/summary/CrashesByYear.js | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/atd-vzv/src/views/summary/CrashesByYear.js b/atd-vzv/src/views/summary/CrashesByYear.js index 576126cad..72e7b30c5 100644 --- a/atd-vzv/src/views/summary/CrashesByYear.js +++ b/atd-vzv/src/views/summary/CrashesByYear.js @@ -112,15 +112,17 @@ const CrashesByYear = () => {
- {avgData.length > 0 && currentYearData.length > 0 ? ( - - {renderChartByType(chartType)} - +
+ + + {renderChartByType(chartType)} + +
) : ( )} From 5725787ceb025182494bed0d04d73baf54536093 Mon Sep 17 00:00:00 2001 From: rose Date: Wed, 10 Apr 2024 18:26:25 -0500 Subject: [PATCH 12/35] move the loading check up a level --- .../src/views/summary/PeopleByDemographics.js | 72 ++++++++++--------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/atd-vzv/src/views/summary/PeopleByDemographics.js b/atd-vzv/src/views/summary/PeopleByDemographics.js index 5aaf5bc95..f44f27c6e 100644 --- a/atd-vzv/src/views/summary/PeopleByDemographics.js +++ b/atd-vzv/src/views/summary/PeopleByDemographics.js @@ -299,44 +299,46 @@ const PeopleByDemographics = () => {
- value.label)} - chartType={chartType} - setChartType={setChartType} - /> {!!chartData.datasets ? ( - - - { - const datasetIndex = tooltipItem.datasetIndex; - const datasetLabel = data.datasets[datasetIndex].label; - const originalValue = - data.originalData[datasetIndex][tooltipItem.index]; - const rateValue = - data.calculatedData[datasetIndex][tooltipItem.index]; - return `${datasetLabel}: ${originalValue} (${rateValue}%)`; +
+ value.label)} + chartType={chartType} + setChartType={setChartType} + /> + + + { + const datasetIndex = tooltipItem.datasetIndex; + const datasetLabel = data.datasets[datasetIndex].label; + const originalValue = + data.originalData[datasetIndex][tooltipItem.index]; + const rateValue = + data.calculatedData[datasetIndex][tooltipItem.index]; + return `${datasetLabel}: ${originalValue} (${rateValue}%)`; + }, }, }, - }, - }} - /> - - + }} + /> + + +
) : ( )} From ccb3eb151f08949ce67d5601f3514f00bcaacb6c Mon Sep 17 00:00:00 2001 From: rose Date: Wed, 10 Apr 2024 18:39:22 -0500 Subject: [PATCH 13/35] this was causing slight misallignment --- atd-vzv/src/views/summary/CrashesByMode.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-vzv/src/views/summary/CrashesByMode.js b/atd-vzv/src/views/summary/CrashesByMode.js index 92add9921..6d44f62d1 100644 --- a/atd-vzv/src/views/summary/CrashesByMode.js +++ b/atd-vzv/src/views/summary/CrashesByMode.js @@ -239,7 +239,7 @@ const CrashesByMode = () => {
-
+
{!!data.datasets ? ( From 9839b5bf9722b8126faddbed232ec4b226d5c383 Mon Sep 17 00:00:00 2001 From: rose Date: Wed, 10 Apr 2024 18:39:30 -0500 Subject: [PATCH 14/35] add margin to bottom --- atd-vzv/src/views/summary/SummaryCard.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-vzv/src/views/summary/SummaryCard.js b/atd-vzv/src/views/summary/SummaryCard.js index ec93f3c56..9f7414f92 100644 --- a/atd-vzv/src/views/summary/SummaryCard.js +++ b/atd-vzv/src/views/summary/SummaryCard.js @@ -14,7 +14,7 @@ const SummaryCard = ({ child }) => { {/* Set height to fill parent column */} - + {child.title} {child.component} From 6c9cb3dd75a26bba37233594354db8677119f7ae Mon Sep 17 00:00:00 2001 From: rose Date: Wed, 10 Apr 2024 19:10:50 -0500 Subject: [PATCH 15/35] change spinner component to match the summarywidget cards --- atd-vzv/src/views/summary/CrashesByMode.js | 5 ++++- atd-vzv/src/views/summary/CrashesByPopulation.js | 5 ++++- atd-vzv/src/views/summary/CrashesByTimeOfDay.js | 5 ++++- atd-vzv/src/views/summary/CrashesByYear.js | 5 ++++- atd-vzv/src/views/summary/PeopleByDemographics.js | 5 ++++- 5 files changed, 20 insertions(+), 5 deletions(-) diff --git a/atd-vzv/src/views/summary/CrashesByMode.js b/atd-vzv/src/views/summary/CrashesByMode.js index 6d44f62d1..8850be264 100644 --- a/atd-vzv/src/views/summary/CrashesByMode.js +++ b/atd-vzv/src/views/summary/CrashesByMode.js @@ -24,6 +24,7 @@ import { summaryCurrentYearEndDate, } from "../../constants/time"; import { crashEndpointUrl } from "./queries/socrataQueries"; +import ColorSpinner from "../../Components/Spinner/ColorSpinner"; const CrashesByMode = () => { const chartColors = [ @@ -399,7 +400,9 @@ const CrashesByMode = () => { ) : ( - +

+ +

)} ); diff --git a/atd-vzv/src/views/summary/CrashesByPopulation.js b/atd-vzv/src/views/summary/CrashesByPopulation.js index aefef52d4..b66ccaa9f 100644 --- a/atd-vzv/src/views/summary/CrashesByPopulation.js +++ b/atd-vzv/src/views/summary/CrashesByPopulation.js @@ -12,6 +12,7 @@ import { crashEndpointUrl } from "./queries/socrataQueries"; import { dataStartDate, fiveYearAvgEndDateByPop } from "../../constants/time"; import { popEsts } from "../../constants/popEsts"; import { colors } from "../../constants/colors"; +import ColorSpinner from "../../Components/Spinner/ColorSpinner"; const CrashesByPopulation = () => { const [crashType, setCrashType] = useState(null); @@ -173,7 +174,9 @@ const CrashesByPopulation = () => {
) : ( - +

+ +

)} ); diff --git a/atd-vzv/src/views/summary/CrashesByTimeOfDay.js b/atd-vzv/src/views/summary/CrashesByTimeOfDay.js index 2b6a293ad..c2b2bb2a7 100644 --- a/atd-vzv/src/views/summary/CrashesByTimeOfDay.js +++ b/atd-vzv/src/views/summary/CrashesByTimeOfDay.js @@ -27,6 +27,7 @@ import { import { crashEndpointUrl } from "./queries/socrataQueries"; import { getYearsAgoLabel } from "./helpers/helpers"; import { colors } from "../../constants/colors"; +import ColorSpinner from "../../Components/Spinner/ColorSpinner"; const dayOfWeekArray = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]; @@ -318,7 +319,9 @@ const CrashesByTimeOfDay = () => {
) : ( - +

+ +

)}
); diff --git a/atd-vzv/src/views/summary/CrashesByYear.js b/atd-vzv/src/views/summary/CrashesByYear.js index 72e7b30c5..4f1f7beca 100644 --- a/atd-vzv/src/views/summary/CrashesByYear.js +++ b/atd-vzv/src/views/summary/CrashesByYear.js @@ -15,6 +15,7 @@ import { summaryCurrentYearEndDate, summaryCurrentYearStartDate, } from "../../constants/time"; +import ColorSpinner from "../../Components/Spinner/ColorSpinner"; const CrashesByYear = () => { const chartTypes = ["Monthly", "Cumulative"]; @@ -124,7 +125,9 @@ const CrashesByYear = () => {
) : ( - +

+ +

)}
); diff --git a/atd-vzv/src/views/summary/PeopleByDemographics.js b/atd-vzv/src/views/summary/PeopleByDemographics.js index f44f27c6e..729b17ffa 100644 --- a/atd-vzv/src/views/summary/PeopleByDemographics.js +++ b/atd-vzv/src/views/summary/PeopleByDemographics.js @@ -12,6 +12,7 @@ import { dataEndDate, yearsArray, dataStartDate } from "../../constants/time"; import { personEndpointUrl } from "./queries/socrataQueries"; import InfoPopover from "../../Components/Popover/InfoPopover"; import { popoverConfig } from "../../Components/Popover/popoverConfig"; +import ColorSpinner from "../../Components/Spinner/ColorSpinner"; const url = `${personEndpointUrl}?$query=`; @@ -340,7 +341,9 @@ const PeopleByDemographics = () => {
) : ( - +

+ +

)} ); From f9062a576854cb018198dfdb4e442210ac0e49f8 Mon Sep 17 00:00:00 2001 From: rose Date: Wed, 10 Apr 2024 19:11:57 -0500 Subject: [PATCH 16/35] remove the unused spinner imports --- atd-vzv/src/views/summary/CrashesByMode.js | 2 +- atd-vzv/src/views/summary/CrashesByPopulation.js | 2 +- atd-vzv/src/views/summary/CrashesByTimeOfDay.js | 2 +- atd-vzv/src/views/summary/CrashesByYear.js | 2 +- atd-vzv/src/views/summary/PeopleByDemographics.js | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/atd-vzv/src/views/summary/CrashesByMode.js b/atd-vzv/src/views/summary/CrashesByMode.js index 8850be264..f613cb4aa 100644 --- a/atd-vzv/src/views/summary/CrashesByMode.js +++ b/atd-vzv/src/views/summary/CrashesByMode.js @@ -1,7 +1,7 @@ import React, { useEffect, useState, useRef, useMemo } from "react"; import axios from "axios"; import { Bar } from "react-chartjs-2"; -import { Container, Row, Col, Spinner } from "reactstrap"; +import { Container, Row, Col } from "reactstrap"; import styled from "styled-components"; import { format } from "date-fns"; diff --git a/atd-vzv/src/views/summary/CrashesByPopulation.js b/atd-vzv/src/views/summary/CrashesByPopulation.js index b66ccaa9f..63fe881e7 100644 --- a/atd-vzv/src/views/summary/CrashesByPopulation.js +++ b/atd-vzv/src/views/summary/CrashesByPopulation.js @@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react"; import axios from "axios"; import styled from "styled-components"; import { Bar } from "react-chartjs-2"; -import { Container, Row, Col, Spinner } from "reactstrap"; +import { Container, Row, Col } from "reactstrap"; import { format } from "date-fns"; import CrashTypeSelector from "./Components/CrashTypeSelector"; diff --git a/atd-vzv/src/views/summary/CrashesByTimeOfDay.js b/atd-vzv/src/views/summary/CrashesByTimeOfDay.js index c2b2bb2a7..9f1a6eeed 100644 --- a/atd-vzv/src/views/summary/CrashesByTimeOfDay.js +++ b/atd-vzv/src/views/summary/CrashesByTimeOfDay.js @@ -4,7 +4,7 @@ import { format, parseISO, sub } from "date-fns"; import clonedeep from "lodash.clonedeep"; import CrashTypeSelector from "./Components/CrashTypeSelector"; -import { Row, Col, Container, Button, Spinner } from "reactstrap"; +import { Row, Col, Container, Button } from "reactstrap"; import styled from "styled-components"; import classnames from "classnames"; import { diff --git a/atd-vzv/src/views/summary/CrashesByYear.js b/atd-vzv/src/views/summary/CrashesByYear.js index 4f1f7beca..d44ca17ba 100644 --- a/atd-vzv/src/views/summary/CrashesByYear.js +++ b/atd-vzv/src/views/summary/CrashesByYear.js @@ -3,7 +3,7 @@ import axios from "axios"; import CrashesByYearCumulative from "./CrashesByYearCumulative"; import CrashesByYearAverage from "./CrashesByYearAverage"; import ChartTypeSelector from "./Components/ChartTypeSelector"; -import { Container, Row, Col, Spinner } from "reactstrap"; +import { Container, Row, Col } from "reactstrap"; import CrashTypeSelector from "./Components/CrashTypeSelector"; import InfoPopover from "../../Components/Popover/InfoPopover"; diff --git a/atd-vzv/src/views/summary/PeopleByDemographics.js b/atd-vzv/src/views/summary/PeopleByDemographics.js index 729b17ffa..5e14108a0 100644 --- a/atd-vzv/src/views/summary/PeopleByDemographics.js +++ b/atd-vzv/src/views/summary/PeopleByDemographics.js @@ -3,7 +3,7 @@ import axios from "axios"; import { HorizontalBar } from "react-chartjs-2"; import "chartjs-plugin-stacked100"; import ChartTypeSelector from "./Components/ChartTypeSelector"; -import { Container, Row, Col, Spinner } from "reactstrap"; +import { Container, Row, Col } from "reactstrap"; import { format } from "date-fns"; import CrashTypeSelector from "./Components/CrashTypeSelector"; From 8079c93d5fb171e2b22521bad5f2ece02587458f Mon Sep 17 00:00:00 2001 From: rose Date: Thu, 11 Apr 2024 16:18:20 -0500 Subject: [PATCH 17/35] fix links to our team website --- atd-vze/src/containers/DefaultLayout/DefaultFooter.js | 4 ++-- atd-vzv/src/views/nav/Footer.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/atd-vze/src/containers/DefaultLayout/DefaultFooter.js b/atd-vze/src/containers/DefaultLayout/DefaultFooter.js index 2c932cfc8..48d1d5d88 100644 --- a/atd-vze/src/containers/DefaultLayout/DefaultFooter.js +++ b/atd-vze/src/containers/DefaultLayout/DefaultFooter.js @@ -20,8 +20,8 @@ class DefaultFooter extends Component { Powered by{" "} - - ATD Data & Technology Services + + TPW Data & Technology Services diff --git a/atd-vzv/src/views/nav/Footer.js b/atd-vzv/src/views/nav/Footer.js index 31931f1a6..2de286ab1 100644 --- a/atd-vzv/src/views/nav/Footer.js +++ b/atd-vzv/src/views/nav/Footer.js @@ -102,7 +102,7 @@ const Footer = () => { }, { text: "Powered by Data & Technology Services", - url: "https://data.mobility.austin.gov/about/", + url: "https://austinmobility.io/", }, { text:
v{pckg.version}
}, ]; From 87fc98b08138ca65eacd396e4c4e6d9a3541fecf Mon Sep 17 00:00:00 2001 From: rose Date: Thu, 11 Apr 2024 16:18:35 -0500 Subject: [PATCH 18/35] update cr3 form manual link --- atd-vzv/src/Components/Popover/popoverConfig.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-vzv/src/Components/Popover/popoverConfig.js b/atd-vzv/src/Components/Popover/popoverConfig.js index b7df80626..7c4d8863b 100644 --- a/atd-vzv/src/Components/Popover/popoverConfig.js +++ b/atd-vzv/src/Components/Popover/popoverConfig.js @@ -250,7 +250,7 @@ export const popoverConfig = { worth of property damage or any level of injury. For additional reference, see{" "} From af5c508237e1a82d5f817c6fb8a54052570ce8b4 Mon Sep 17 00:00:00 2001 From: rose Date: Fri, 12 Apr 2024 12:20:25 -0500 Subject: [PATCH 19/35] make a permanent no data option so all editable fields can be nulled out --- atd-vze/src/views/Crashes/RelatedRecordsTable.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/atd-vze/src/views/Crashes/RelatedRecordsTable.js b/atd-vze/src/views/Crashes/RelatedRecordsTable.js index d374448f7..e25611838 100644 --- a/atd-vze/src/views/Crashes/RelatedRecordsTable.js +++ b/atd-vze/src/views/Crashes/RelatedRecordsTable.js @@ -208,10 +208,7 @@ const RelatedRecordsTable = ({ } type="select" > - {/* Show a NO DATA option only when formatValue is displayed. */} - {formatValue(row, field) === "NO DATA" && ( - - )} + {lookupOptions[ fieldConfig.fields[field].lookupOptions ].map(option => { @@ -271,7 +268,8 @@ const RelatedRecordsTable = ({ )} {!isEditing && - (fieldConfig.fields[field].badge ? ( + (fieldConfig.fields[field].badge && + formatValue(row, field) != "NO DATA" ? ( Date: Fri, 12 Apr 2024 12:21:05 -0500 Subject: [PATCH 20/35] remove our custom no data field from the body style lkp table, doesnt make sense to have it instead of just letting users make the value null when there is no data --- .../default/1712941514048_delete_no_data_option/down.sql | 1 + .../default/1712941514048_delete_no_data_option/up.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 atd-vzd/migrations/default/1712941514048_delete_no_data_option/down.sql create mode 100644 atd-vzd/migrations/default/1712941514048_delete_no_data_option/up.sql diff --git a/atd-vzd/migrations/default/1712941514048_delete_no_data_option/down.sql b/atd-vzd/migrations/default/1712941514048_delete_no_data_option/down.sql new file mode 100644 index 000000000..30b71cd61 --- /dev/null +++ b/atd-vzd/migrations/default/1712941514048_delete_no_data_option/down.sql @@ -0,0 +1 @@ +INSERT INTO public.atd_txdot__veh_body_styl_lkp(veh_body_styl_id, veh_body_styl_desc) VALUES (-1, 'NO DATA'); \ No newline at end of file diff --git a/atd-vzd/migrations/default/1712941514048_delete_no_data_option/up.sql b/atd-vzd/migrations/default/1712941514048_delete_no_data_option/up.sql new file mode 100644 index 000000000..e8c284f19 --- /dev/null +++ b/atd-vzd/migrations/default/1712941514048_delete_no_data_option/up.sql @@ -0,0 +1 @@ +DELETE FROM public.atd_txdot__veh_body_styl_lkp WHERE veh_body_styl_id = -1; From e0a568b843452c42206be98e9fffa33018d6cc03 Mon Sep 17 00:00:00 2001 From: rose Date: Fri, 12 Apr 2024 12:44:59 -0500 Subject: [PATCH 21/35] update these migrations --- .../default/1712941514048_delete_no_data_option/down.sql | 2 +- .../default/1712941514048_delete_no_data_option/up.sql | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/atd-vzd/migrations/default/1712941514048_delete_no_data_option/down.sql b/atd-vzd/migrations/default/1712941514048_delete_no_data_option/down.sql index 30b71cd61..77cd5693e 100644 --- a/atd-vzd/migrations/default/1712941514048_delete_no_data_option/down.sql +++ b/atd-vzd/migrations/default/1712941514048_delete_no_data_option/down.sql @@ -1 +1 @@ -INSERT INTO public.atd_txdot__veh_body_styl_lkp(veh_body_styl_id, veh_body_styl_desc) VALUES (-1, 'NO DATA'); \ No newline at end of file +INSERT INTO public.atd_txdot__veh_body_styl_lkp(veh_body_styl_id, veh_body_styl_desc) VALUES (-1, 'NO DATA'); diff --git a/atd-vzd/migrations/default/1712941514048_delete_no_data_option/up.sql b/atd-vzd/migrations/default/1712941514048_delete_no_data_option/up.sql index e8c284f19..135b299d6 100644 --- a/atd-vzd/migrations/default/1712941514048_delete_no_data_option/up.sql +++ b/atd-vzd/migrations/default/1712941514048_delete_no_data_option/up.sql @@ -1 +1,4 @@ +-- Remove the custom "NO DATA" lookup option, instead we should just use null to mean null DELETE FROM public.atd_txdot__veh_body_styl_lkp WHERE veh_body_styl_id = -1; + +UPDATE public.atd_txdot_units SET veh_body_styl_id = null WHERE veh_body_styl_id = -1; From a770014156e2dfbebdb10deeef5bcefe0d616939 Mon Sep 17 00:00:00 2001 From: chiaberry Date: Mon, 15 Apr 2024 13:18:35 -0500 Subject: [PATCH 22/35] add flags to readme --- atd-etl/cr3_download/README.md | 6 +++++- atd-etl/cr3_download/docker-compose.yml | 2 +- atd-etl/cr3_download/process/helpers_cr3.py | 7 ++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/atd-etl/cr3_download/README.md b/atd-etl/cr3_download/README.md index e109a331b..8c04f6ffb 100644 --- a/atd-etl/cr3_download/README.md +++ b/atd-etl/cr3_download/README.md @@ -14,4 +14,8 @@ Otherwise, you can run: $ docker compose run cr3_download ``` -The script will prompt for the cookie and then download any pending CR3s. \ No newline at end of file +There are two optional flags you can include +`-t`, `--threads` Number of concurrent downloaders, default is 5 +`-v`, `--verbose` Enable verbose logging + +The script will prompt for the cookie and then download any pending CR3s. diff --git a/atd-etl/cr3_download/docker-compose.yml b/atd-etl/cr3_download/docker-compose.yml index ba6ba2d77..909a59051 100644 --- a/atd-etl/cr3_download/docker-compose.yml +++ b/atd-etl/cr3_download/docker-compose.yml @@ -5,5 +5,5 @@ services: - .:/app env_file: - ./.env - # entrypoint: /bin/bash + # entrypoint: /bin/bash #enables a shell entrypoint: /app/cr3_download.py -t 5 diff --git a/atd-etl/cr3_download/process/helpers_cr3.py b/atd-etl/cr3_download/process/helpers_cr3.py index 114fbd1e8..d6d5df297 100644 --- a/atd-etl/cr3_download/process/helpers_cr3.py +++ b/atd-etl/cr3_download/process/helpers_cr3.py @@ -27,6 +27,7 @@ def run_command(command, verbose): """ Runs a command :param command: array of strings containing the command and flags + :param verbose: boolean, handles level of logging """ if verbose: print(command) @@ -35,12 +36,12 @@ def run_command(command, verbose): subprocess.check_output(command, shell=True).decode("utf-8") -# Now we need to implement our methods. def download_cr3(crash_id, cookies, verbose): """ Downloads a CR3 pdf from the CRIS website. :param crash_id: string - The crash id :param cookies: dict - A dictionary containing key=value pairs with cookie name and values. + :param verbose: boolean, handles level of logging """ cookie = SimpleCookie() @@ -67,6 +68,7 @@ def upload_cr3(crash_id, verbose): """ Uploads a file to S3 using the awscli command :param crash_id: string - The crash id + :param verbose: boolean, handles level of logging """ file = "/tmp/%s.pdf" % crash_id destination = "s3://%s/%s/%s.pdf" % ( @@ -91,6 +93,7 @@ def delete_cr3s(crash_id, verbose): """ Deletes the downloaded CR3 pdf file :param crash_id: string - The crash id + :param verbose: boolean, handles level of logging """ file = "/tmp/%s.pdf" % crash_id run_command("rm %s" % file, verbose) @@ -122,6 +125,7 @@ def update_crash_id(crash_id, verbose): """ Updates the status of a crash to having an available CR3 pdf in the S3 bucket. :param crash_id: string - The Crash ID that needs to be updated + :param verbose: boolean, handles level of logging :return: dict - Response from request.post """ @@ -157,6 +161,7 @@ def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates, verbos :param crash_record: dict - The individual crash record being processed :param cookies: dict - The cookies taken from the browser object :param skipped_uploads_and_updates: list - Crash IDs of unsuccessful pdf downloads + :param verbose: boolean, handles level of logging """ try: crash_id = str(crash_record["crash_id"]) From f0f709cedbe75157eac4b59cedaf8260f7cdcc36 Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 15 Apr 2024 14:27:57 -0500 Subject: [PATCH 23/35] Add env-template and add to README --- atd-etl/cris_import/README.md | 9 +++++++++ atd-etl/cris_import/env-template | 3 +++ 2 files changed, 12 insertions(+) create mode 100644 atd-etl/cris_import/env-template diff --git a/atd-etl/cris_import/README.md b/atd-etl/cris_import/README.md index b6af7ee3e..cf80d734e 100644 --- a/atd-etl/cris_import/README.md +++ b/atd-etl/cris_import/README.md @@ -12,3 +12,12 @@ The contents of this directory define a Docker image that can be used in an Airf ### Zip file acquisition The CRIS import usually works by pulling down a zip archive from the SFTP endpoint. However, during development, it's much easier if you can have it use a zip file that you have locally on your machine instead. This can be accomplished by putting a zip file (still encrypted and using "CRIS extract" password) in a folder named `atd-etl/cris_import/development_extracts/`. Create the directory if needed. If there are no zip files in that folder, the program will automatically revert to inspecting the SFTP endpoint. + +### Local testing + +Make a copy of `env-template` and name it `env`. Fill in the values using the 1Password Connect Server secrets (see entries titled `Endpoint for 1Password Connect Server API` and `Vault ID of API Accessible Secrets vault`) and your personal access token. + +Drop a CRIS extract zip file into your development folder described above, and run the import script: +```bash +docker compose run cris-import +``` diff --git a/atd-etl/cris_import/env-template b/atd-etl/cris_import/env-template new file mode 100644 index 000000000..7650c06f4 --- /dev/null +++ b/atd-etl/cris_import/env-template @@ -0,0 +1,3 @@ +OP_API_TOKEN= +OP_CONNECT= +OP_VAULT_ID= From ca3ae69f2af87e278bdab349d1de7a3e9bd8cdc4 Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 15 Apr 2024 15:21:07 -0500 Subject: [PATCH 24/35] Skip update if a unique key column for a record is missing --- atd-etl/cris_import/cris_import.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/atd-etl/cris_import/cris_import.py b/atd-etl/cris_import/cris_import.py index 9141ceee7..788fb3be9 100755 --- a/atd-etl/cris_import/cris_import.py +++ b/atd-etl/cris_import/cris_import.py @@ -612,7 +612,24 @@ def align_records(map_state): input_column_names = util.get_input_column_names(pg, map_state["import_schema"], table, target_columns) # iterate over each imported record and determine correct action + should_skip_update = False + for source in imported_records: + # Check unique key columns to make sure they all have a value + for key_column in key_columns: + key_column_value = source[key_column] + if key_column_value == None: + + print("\nSkipping because unique key column is missing") + print(f"Table: {table}") + print(f"Missing key column: {key_column}") + should_skip_update = True + + # If we are missing a column that uniquely identifies the record, we should skip the update + if should_skip_update: + for key_column in key_columns: + print(f"{key_column}: {source[key_column]}") + continue # generate some record specific SQL fragments to identify the record in larger queries record_key_sql, import_key_sql = util.get_key_clauses(table_keys, output_map, table, source, map_state["import_schema"]) From fae3c9d116da6e82d810937bcb1bb6d3c9e3c88e Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 15 Apr 2024 15:35:44 -0500 Subject: [PATCH 25/35] Remove space and typo --- atd-etl/cris_import/README.md | 2 +- atd-etl/cris_import/cris_import.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/atd-etl/cris_import/README.md b/atd-etl/cris_import/README.md index cf80d734e..58567c065 100644 --- a/atd-etl/cris_import/README.md +++ b/atd-etl/cris_import/README.md @@ -17,7 +17,7 @@ The CRIS import usually works by pulling down a zip archive from the SFTP endpoi Make a copy of `env-template` and name it `env`. Fill in the values using the 1Password Connect Server secrets (see entries titled `Endpoint for 1Password Connect Server API` and `Vault ID of API Accessible Secrets vault`) and your personal access token. -Drop a CRIS extract zip file into your development folder described above, and run the import script: +Drop a CRIS extract zip file into your development folder as described above, and run the import script: ```bash docker compose run cris-import ``` diff --git a/atd-etl/cris_import/cris_import.py b/atd-etl/cris_import/cris_import.py index 788fb3be9..b57bc0f6e 100755 --- a/atd-etl/cris_import/cris_import.py +++ b/atd-etl/cris_import/cris_import.py @@ -619,7 +619,6 @@ def align_records(map_state): for key_column in key_columns: key_column_value = source[key_column] if key_column_value == None: - print("\nSkipping because unique key column is missing") print(f"Table: {table}") print(f"Missing key column: {key_column}") From fa587d973ae39fd9a61f0385297cc8e2ddeb76ea Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 15 Apr 2024 16:20:02 -0500 Subject: [PATCH 26/35] Update check for None --- atd-etl/cris_import/cris_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-etl/cris_import/cris_import.py b/atd-etl/cris_import/cris_import.py index b57bc0f6e..e5bed7c74 100755 --- a/atd-etl/cris_import/cris_import.py +++ b/atd-etl/cris_import/cris_import.py @@ -618,7 +618,7 @@ def align_records(map_state): # Check unique key columns to make sure they all have a value for key_column in key_columns: key_column_value = source[key_column] - if key_column_value == None: + if key_column_value is None: print("\nSkipping because unique key column is missing") print(f"Table: {table}") print(f"Missing key column: {key_column}") From 11d881f489de0e1c0e05ff98ee9a93922d0f467a Mon Sep 17 00:00:00 2001 From: rose Date: Tue, 16 Apr 2024 16:30:53 -0500 Subject: [PATCH 27/35] fix not equals operator --- atd-vze/src/views/Crashes/RelatedRecordsTable.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-vze/src/views/Crashes/RelatedRecordsTable.js b/atd-vze/src/views/Crashes/RelatedRecordsTable.js index e25611838..2dcc7cc47 100644 --- a/atd-vze/src/views/Crashes/RelatedRecordsTable.js +++ b/atd-vze/src/views/Crashes/RelatedRecordsTable.js @@ -269,7 +269,7 @@ const RelatedRecordsTable = ({ {!isEditing && (fieldConfig.fields[field].badge && - formatValue(row, field) != "NO DATA" ? ( + formatValue(row, field) !== "NO DATA" ? ( Date: Wed, 24 Apr 2024 09:37:33 -0500 Subject: [PATCH 28/35] expand on the uses of entrypoints in this stack --- atd-etl/cr3_download/docker-compose.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/atd-etl/cr3_download/docker-compose.yml b/atd-etl/cr3_download/docker-compose.yml index 909a59051..e1ec69527 100644 --- a/atd-etl/cr3_download/docker-compose.yml +++ b/atd-etl/cr3_download/docker-compose.yml @@ -5,5 +5,14 @@ services: - .:/app env_file: - ./.env - # entrypoint: /bin/bash #enables a shell + + # For normal operation, the entry point is defined to simply run the script + # and then exit. entrypoint: /app/cr3_download.py -t 5 + + # During development or during CRIS reprocessing, it can be helpful to run + # the container interactively, so the the following entrypoint can be used + # to drop the user into a shell where they can run the script manually. + # This can also be done via the --entrypoint=/bin/bash flag when running. + + # entrypoint: /bin/bash From e27c3a9738a0aadf8fcab1d653a62d9382c76448 Mon Sep 17 00:00:00 2001 From: Frank Hereford Date: Wed, 24 Apr 2024 11:35:51 -0500 Subject: [PATCH 29/35] incorporate logging module --- atd-etl/cr3_download/cr3_download.py | 58 +++++++++++++-------- atd-etl/cr3_download/process/helpers_cr3.py | 39 +++++++------- 2 files changed, 56 insertions(+), 41 deletions(-) diff --git a/atd-etl/cr3_download/cr3_download.py b/atd-etl/cr3_download/cr3_download.py index 96851a14d..27b534d22 100755 --- a/atd-etl/cr3_download/cr3_download.py +++ b/atd-etl/cr3_download/cr3_download.py @@ -18,6 +18,9 @@ from onepasswordconnectsdk.client import new_client import onepasswordconnectsdk +import sys +import logging + def load_secrets(one_password_client, vault_id): """Load required secrets from 1Password.""" @@ -68,17 +71,20 @@ def load_secrets(one_password_client, vault_id): def process_crash_cr3_threaded( - crash_record, cris_browser_cookies, skipped_uploads_and_updates, verbose + crash_record, cris_browser_cookies, skipped_uploads_and_updates, verbose, log ): """Process a crash record in a separate thread.""" try: process_crash_cr3( - crash_record, cris_browser_cookies, skipped_uploads_and_updates, verbose + crash_record, + cris_browser_cookies, + skipped_uploads_and_updates, + verbose, + log, ) - if verbose: - print(f"Processed crash ID: {crash_record['crash_id']}") + log.info(f"Processed crash ID: {crash_record['crash_id']}") except Exception as e: - print(f"Error processing crash ID {crash_record['crash_id']}: {str(e)}") + log.warning(f"Error processing crash ID {crash_record['crash_id']}: {str(e)}") skipped_uploads_and_updates.append(str(crash_record["crash_id"])) @@ -90,14 +96,24 @@ def main(): "--threads", type=int, default=1, - help="Number of concurrent downloaders (default: 1)", + help="Number of concurrent downloading threads(default: 1)", ) parser.add_argument( "-v", "--verbose", action="store_true", help="Enable verbose logging" ) args = parser.parse_args() - # Start timer + log = logging.getLogger("cr3_download") + log.setLevel(logging.DEBUG if args.verbose else logging.INFO) + + # Create a StreamHandler for stdout + stdout_handler = logging.StreamHandler(sys.stdout) + # Optional: set the level of the stdout handler + stdout_handler.setLevel(logging.DEBUG if args.verbose else logging.INFO) + + # Add the handler to the logger + log.addHandler(stdout_handler) + start = time.time() # Get 1Password secrets from environment @@ -117,12 +133,13 @@ def main(): # Ask user for a set of valid cookies for requests to the CRIS website CRIS_BROWSER_COOKIES = input( - "Please login to CRIS and extract the contents of the Cookie: header and please paste it here: " + "Please login to CRIS and extract the contents of the Cookie: header and please paste it here:\n\n" ) - if args.verbose: - print("Preparing download loop.") - print("Gathering list of crashes.") + print("\n") # pad pasted cookie value with some whitespace for clarity + + log.debug("Preparing download loop.") + log.debug("Gathering list of crashes.") # Track crash IDs that we don't successfully retrieve a pdf file for skipped_uploads_and_updates = [] @@ -136,8 +153,7 @@ def main(): crashes_list_without_skips = [] try: - if args.verbose: - print(f"Hasura endpoint: '{os.getenv('HASURA_ENDPOINT')}'") + log.debug(f"Hasura endpoint: '{os.getenv('HASURA_ENDPOINT')}'") response = get_crash_id_list() @@ -150,9 +166,8 @@ def main(): crashes_list_without_skips = [] print(f"Error, could not run CR3 processing: {str(e)}") - print(f"Length of queue: {len(crashes_list_without_skips)}") - if args.verbose: - print("Starting CR3 downloads:") + log.info(f"Length of queue: {len(crashes_list_without_skips)}") + log.debug("Starting CR3 downloads:") max_workers = args.threads with ThreadPoolExecutor(max_workers=max_workers) as executor: @@ -164,25 +179,24 @@ def main(): CRIS_BROWSER_COOKIES, skipped_uploads_and_updates, args.verbose, + log, ) futures.append(future) for future in futures: future.result() - print("Process done.") + log.debug("Process done.") if skipped_uploads_and_updates: skipped_downloads = ", ".join(skipped_uploads_and_updates) - print(f"\nUnable to download PDFs for crash IDs: {skipped_downloads}") + log.debug(f"\nUnable to download PDFs for crash IDs: {skipped_downloads}") end = time.time() hours, rem = divmod(end - start, 3600) minutes, seconds = divmod(rem, 60) - print( - "\nFinished in: {:0>2}:{:0>2}:{:05.2f}".format( - int(hours), int(minutes), seconds - ) + log.info( + "Finished in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds) ) diff --git a/atd-etl/cr3_download/process/helpers_cr3.py b/atd-etl/cr3_download/process/helpers_cr3.py index d6d5df297..96e63db3f 100644 --- a/atd-etl/cr3_download/process/helpers_cr3.py +++ b/atd-etl/cr3_download/process/helpers_cr3.py @@ -23,20 +23,20 @@ from .request import run_query -def run_command(command, verbose): +def run_command(command, verbose, log): """ Runs a command :param command: array of strings containing the command and flags :param verbose: boolean, handles level of logging """ if verbose: - print(command) - print(subprocess.check_output(command, shell=True).decode("utf-8")) + log.info(command) + log.info(subprocess.check_output(command, shell=True).decode("utf-8")) else: subprocess.check_output(command, shell=True).decode("utf-8") -def download_cr3(crash_id, cookies, verbose): +def download_cr3(crash_id, cookies, verbose, log): """ Downloads a CR3 pdf from the CRIS website. :param crash_id: string - The crash id @@ -56,15 +56,14 @@ def download_cr3(crash_id, cookies, verbose): url = os.getenv("ATD_CRIS_CR3_URL") + crash_id_encoded download_path = "/tmp/" + "%s.pdf" % crash_id - if verbose: - print("Downloading (%s): '%s' from %s" % (crash_id, download_path, url)) + log.info("Downloading (%s): '%s' from %s" % (crash_id, download_path, url)) resp = requests.get(url, allow_redirects=True, cookies=baked_cookies) open(download_path, "wb").write(resp.content) return download_path -def upload_cr3(crash_id, verbose): +def upload_cr3(crash_id, verbose, log): """ Uploads a file to S3 using the awscli command :param crash_id: string - The crash id @@ -86,17 +85,18 @@ def upload_cr3(crash_id, verbose): quiet_option, ), verbose, + log, ) -def delete_cr3s(crash_id, verbose): +def delete_cr3s(crash_id, verbose, log): """ Deletes the downloaded CR3 pdf file :param crash_id: string - The crash id :param verbose: boolean, handles level of logging """ file = "/tmp/%s.pdf" % crash_id - run_command("rm %s" % file, verbose) + run_command("rm %s" % file, verbose, log) def get_crash_id_list(): @@ -121,7 +121,7 @@ def get_crash_id_list(): return run_query(query_crashes_cr3) -def update_crash_id(crash_id, verbose): +def update_crash_id(crash_id, log): """ Updates the status of a crash to having an available CR3 pdf in the S3 bucket. :param crash_id: string - The Crash ID that needs to be updated @@ -139,8 +139,7 @@ def update_crash_id(crash_id, verbose): """ % crash_id ) - if verbose: - print(f"Marking CR3 status as downloaded for crash_id: {crash_id}") + log.info(f"Marking CR3 status as downloaded for crash_id: {crash_id}") return run_query(update_record_cr3) @@ -155,7 +154,7 @@ def check_if_pdf(file_path): return file_type == "application/pdf" -def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates, verbose): +def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates, verbose, log): """ Downloads a CR3 pdf, uploads it to s3, updates the database and deletes the pdf. :param crash_record: dict - The individual crash record being processed @@ -168,21 +167,23 @@ def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates, verbos print("Processing Crash: " + crash_id) - download_path = download_cr3(crash_id, cookies, verbose) + download_path = download_cr3(crash_id, cookies, verbose, log) is_file_pdf = check_if_pdf(download_path) if not is_file_pdf: - print(f"\nFile {download_path} is not a pdf - skipping upload and update") + log.warning( + f"\nFile {download_path} is not a pdf - skipping upload and update" + ) with open(download_path, "r") as file: print(file.read()) time.sleep(10) skipped_uploads_and_updates.append(crash_id) else: - upload_cr3(crash_id, verbose) - update_crash_id(crash_id, verbose) + upload_cr3(crash_id, verbose, log) + update_crash_id(crash_id, log) - delete_cr3s(crash_id, verbose) + delete_cr3s(crash_id, verbose, log) except Exception as e: - print("Error: %s" % str(e)) + log.error("Error: %s" % str(e)) return From 655f345a2e2e7ac14d492dc319121b80fb26a056 Mon Sep 17 00:00:00 2001 From: Frank Hereford Date: Wed, 24 Apr 2024 13:19:11 -0500 Subject: [PATCH 30/35] update docstrings --- atd-etl/cr3_download/process/helpers_cr3.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/atd-etl/cr3_download/process/helpers_cr3.py b/atd-etl/cr3_download/process/helpers_cr3.py index 96e63db3f..a653fdab0 100644 --- a/atd-etl/cr3_download/process/helpers_cr3.py +++ b/atd-etl/cr3_download/process/helpers_cr3.py @@ -28,6 +28,7 @@ def run_command(command, verbose, log): Runs a command :param command: array of strings containing the command and flags :param verbose: boolean, handles level of logging + :param log: logger - The logger object """ if verbose: log.info(command) @@ -42,6 +43,7 @@ def download_cr3(crash_id, cookies, verbose, log): :param crash_id: string - The crash id :param cookies: dict - A dictionary containing key=value pairs with cookie name and values. :param verbose: boolean, handles level of logging + :param log: logger - The logger object """ cookie = SimpleCookie() @@ -125,7 +127,7 @@ def update_crash_id(crash_id, log): """ Updates the status of a crash to having an available CR3 pdf in the S3 bucket. :param crash_id: string - The Crash ID that needs to be updated - :param verbose: boolean, handles level of logging + :param log: logger - The logger object :return: dict - Response from request.post """ @@ -161,6 +163,7 @@ def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates, verbos :param cookies: dict - The cookies taken from the browser object :param skipped_uploads_and_updates: list - Crash IDs of unsuccessful pdf downloads :param verbose: boolean, handles level of logging + :param log: logger - The logger object """ try: crash_id = str(crash_record["crash_id"]) From 052f8a20f728186d5873b4a49a2839b31e50f0f4 Mon Sep 17 00:00:00 2001 From: Frank Hereford Date: Wed, 24 Apr 2024 13:19:24 -0500 Subject: [PATCH 31/35] switch to an in-memory model for the CR3 handling --- atd-etl/cr3_download/process/helpers_cr3.py | 79 +++++++++------------ 1 file changed, 35 insertions(+), 44 deletions(-) diff --git a/atd-etl/cr3_download/process/helpers_cr3.py b/atd-etl/cr3_download/process/helpers_cr3.py index a653fdab0..8df39e15f 100644 --- a/atd-etl/cr3_download/process/helpers_cr3.py +++ b/atd-etl/cr3_download/process/helpers_cr3.py @@ -15,8 +15,8 @@ import base64 import subprocess import time +from io import BytesIO from http.cookies import SimpleCookie - import magic # We need the run_query method @@ -56,49 +56,43 @@ def download_cr3(crash_id, cookies, verbose, log): str("CrashId=" + crash_id).encode("utf-8") ).decode("utf-8") url = os.getenv("ATD_CRIS_CR3_URL") + crash_id_encoded - download_path = "/tmp/" + "%s.pdf" % crash_id - log.info("Downloading (%s): '%s' from %s" % (crash_id, download_path, url)) + log.info("Downloading (%s) from %s" % (crash_id, url)) resp = requests.get(url, allow_redirects=True, cookies=baked_cookies) - open(download_path, "wb").write(resp.content) - return download_path + # Create a BytesIO object and write the content to it + file_in_memory = BytesIO() + file_in_memory.write(resp.content) + file_in_memory.seek(0) # Reset the file pointer to the beginning + + return file_in_memory + + +import boto3 +from botocore.exceptions import NoCredentialsError -def upload_cr3(crash_id, verbose, log): +def upload_cr3(crash_id, cr3: BytesIO, verbose, log): """ - Uploads a file to S3 using the awscli command + Uploads a BytesIO object to S3 :param crash_id: string - The crash id + :param cr3: BytesIO - The BytesIO object containing the PDF data :param verbose: boolean, handles level of logging + :param log: logger - The logger object """ - file = "/tmp/%s.pdf" % crash_id - destination = "s3://%s/%s/%s.pdf" % ( - os.getenv("AWS_CRIS_CR3_BUCKET_NAME"), + s3 = boto3.client("s3") + + destination = "%s/%s.pdf" % ( os.getenv("AWS_CRIS_CR3_BUCKET_PATH"), crash_id, ) - quiet_option = "--quiet" if not verbose else "" - run_command( - "aws s3 cp %s %s --no-progress %s" - % ( - file, - destination, - quiet_option, - ), - verbose, - log, - ) - - -def delete_cr3s(crash_id, verbose, log): - """ - Deletes the downloaded CR3 pdf file - :param crash_id: string - The crash id - :param verbose: boolean, handles level of logging - """ - file = "/tmp/%s.pdf" % crash_id - run_command("rm %s" % file, verbose, log) + try: + s3.upload_fileobj(cr3, os.getenv("AWS_CRIS_CR3_BUCKET_NAME"), destination) + if verbose: + log.info(f"File uploaded to {destination}") + except NoCredentialsError: + log.error("No AWS credentials found") def get_crash_id_list(): @@ -145,14 +139,15 @@ def update_crash_id(crash_id, log): return run_query(update_record_cr3) -def check_if_pdf(file_path): +def check_if_pdf(file_in_memory: BytesIO): """ - Checks if a file is a pdf - :param file_path: string - The file path - :return: boolean - True if the file is a pdf + Checks if a BytesIO object contains a pdf + :param file_in_memory: BytesIO - The BytesIO object + :return: boolean - True if the BytesIO object contains a pdf """ mime = magic.Magic(mime=True) - file_type = mime.from_file(file_path) + file_type = mime.from_buffer(file_in_memory.read()) + file_in_memory.seek(0) # Reset the file pointer to the beginning return file_type == "application/pdf" @@ -170,23 +165,19 @@ def process_crash_cr3(crash_record, cookies, skipped_uploads_and_updates, verbos print("Processing Crash: " + crash_id) - download_path = download_cr3(crash_id, cookies, verbose, log) - is_file_pdf = check_if_pdf(download_path) + cr3 = download_cr3(crash_id, cookies, verbose, log) + is_file_pdf = check_if_pdf(cr3) if not is_file_pdf: log.warning( - f"\nFile {download_path} is not a pdf - skipping upload and update" + f"\nFile for crash ID {crash_id} is not a pdf - skipping upload and update" ) - with open(download_path, "r") as file: - print(file.read()) time.sleep(10) skipped_uploads_and_updates.append(crash_id) else: - upload_cr3(crash_id, verbose, log) + upload_cr3(crash_id, cr3, verbose, log) update_crash_id(crash_id, log) - delete_cr3s(crash_id, verbose, log) - except Exception as e: log.error("Error: %s" % str(e)) return From 0dd5e55899f236a57ad715b2aeac033e76a0aae3 Mon Sep 17 00:00:00 2001 From: John Clary Date: Thu, 2 May 2024 12:08:31 -0400 Subject: [PATCH 32/35] update description of 'Other' modes in by travel mode modal --- atd-vzv/src/Components/Popover/popoverConfig.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/atd-vzv/src/Components/Popover/popoverConfig.js b/atd-vzv/src/Components/Popover/popoverConfig.js index 7c4d8863b..de6f073b9 100644 --- a/atd-vzv/src/Components/Popover/popoverConfig.js +++ b/atd-vzv/src/Components/Popover/popoverConfig.js @@ -223,9 +223,8 @@ export const popoverConfig = { motorized scooter
  • - Other: Any modality other than motorist, - pedestrian, motorcyclist, bicyclist, or e-scooter rider, such as - a pedicab + Other: Any other modality, such as + a pedicab, or the modality may be unreported or unknown
  • From 0a041a105181ba0b267b42d5a3cc426b72527e19 Mon Sep 17 00:00:00 2001 From: John Clary Date: Thu, 2 May 2024 12:33:47 -0400 Subject: [PATCH 33/35] make the text better --- atd-vzv/src/Components/Popover/popoverConfig.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-vzv/src/Components/Popover/popoverConfig.js b/atd-vzv/src/Components/Popover/popoverConfig.js index de6f073b9..7de1a2c7d 100644 --- a/atd-vzv/src/Components/Popover/popoverConfig.js +++ b/atd-vzv/src/Components/Popover/popoverConfig.js @@ -224,7 +224,7 @@ export const popoverConfig = {
  • Other: Any other modality, such as - a pedicab, or the modality may be unreported or unknown + a pedicab, or when the modality is unreported or unknown
  • From e8f7a36427efdb17e741e3dc5e837ffa38dbecb2 Mon Sep 17 00:00:00 2001 From: rose Date: Thu, 2 May 2024 12:15:57 -0500 Subject: [PATCH 34/35] bump VZE and VZV versions --- atd-vze/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-vze/package.json b/atd-vze/package.json index 12ffcb9a3..f2a99f77c 100644 --- a/atd-vze/package.json +++ b/atd-vze/package.json @@ -1,6 +1,6 @@ { "name": "atd-vz-data", - "version": "1.45.0", + "version": "1.46.0", "homepage": "./", "description": "ATD Vision Zero Editor", "author": "ATD Data & Technology Services", From 56b086e6ffc9afa8605c4f8bd0018725b6b26dbf Mon Sep 17 00:00:00 2001 From: rose Date: Thu, 2 May 2024 12:16:07 -0500 Subject: [PATCH 35/35] bump VZE and VZV versions --- atd-vzv/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-vzv/package.json b/atd-vzv/package.json index b6d90a6de..5a985ddf4 100644 --- a/atd-vzv/package.json +++ b/atd-vzv/package.json @@ -1,6 +1,6 @@ { "name": "atd-vzv", - "version": "1.45.0", + "version": "1.46.0", "homepage": "/viewer", "description": "ATD Vision Zero Viewer", "author": "ATD Data & Technology Services",