From f0f709cedbe75157eac4b59cedaf8260f7cdcc36 Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 15 Apr 2024 14:27:57 -0500 Subject: [PATCH 1/4] Add env-template and add to README --- atd-etl/cris_import/README.md | 9 +++++++++ atd-etl/cris_import/env-template | 3 +++ 2 files changed, 12 insertions(+) create mode 100644 atd-etl/cris_import/env-template diff --git a/atd-etl/cris_import/README.md b/atd-etl/cris_import/README.md index b6af7ee3e..cf80d734e 100644 --- a/atd-etl/cris_import/README.md +++ b/atd-etl/cris_import/README.md @@ -12,3 +12,12 @@ The contents of this directory define a Docker image that can be used in an Airf ### Zip file acquisition The CRIS import usually works by pulling down a zip archive from the SFTP endpoint. However, during development, it's much easier if you can have it use a zip file that you have locally on your machine instead. This can be accomplished by putting a zip file (still encrypted and using "CRIS extract" password) in a folder named `atd-etl/cris_import/development_extracts/`. Create the directory if needed. If there are no zip files in that folder, the program will automatically revert to inspecting the SFTP endpoint. + +### Local testing + +Make a copy of `env-template` and name it `env`. Fill in the values using the 1Password Connect Server secrets (see entries titled `Endpoint for 1Password Connect Server API` and `Vault ID of API Accessible Secrets vault`) and your personal access token. + +Drop a CRIS extract zip file into your development folder described above, and run the import script: +```bash +docker compose run cris-import +``` diff --git a/atd-etl/cris_import/env-template b/atd-etl/cris_import/env-template new file mode 100644 index 000000000..7650c06f4 --- /dev/null +++ b/atd-etl/cris_import/env-template @@ -0,0 +1,3 @@ +OP_API_TOKEN= +OP_CONNECT= +OP_VAULT_ID= From ca3ae69f2af87e278bdab349d1de7a3e9bd8cdc4 Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 15 Apr 2024 15:21:07 -0500 Subject: [PATCH 2/4] Skip update if a unique key column for a record is missing --- atd-etl/cris_import/cris_import.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/atd-etl/cris_import/cris_import.py b/atd-etl/cris_import/cris_import.py index 9141ceee7..788fb3be9 100755 --- a/atd-etl/cris_import/cris_import.py +++ b/atd-etl/cris_import/cris_import.py @@ -612,7 +612,24 @@ def align_records(map_state): input_column_names = util.get_input_column_names(pg, map_state["import_schema"], table, target_columns) # iterate over each imported record and determine correct action + should_skip_update = False + for source in imported_records: + # Check unique key columns to make sure they all have a value + for key_column in key_columns: + key_column_value = source[key_column] + if key_column_value == None: + + print("\nSkipping because unique key column is missing") + print(f"Table: {table}") + print(f"Missing key column: {key_column}") + should_skip_update = True + + # If we are missing a column that uniquely identifies the record, we should skip the update + if should_skip_update: + for key_column in key_columns: + print(f"{key_column}: {source[key_column]}") + continue # generate some record specific SQL fragments to identify the record in larger queries record_key_sql, import_key_sql = util.get_key_clauses(table_keys, output_map, table, source, map_state["import_schema"]) From fae3c9d116da6e82d810937bcb1bb6d3c9e3c88e Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 15 Apr 2024 15:35:44 -0500 Subject: [PATCH 3/4] Remove space and typo --- atd-etl/cris_import/README.md | 2 +- atd-etl/cris_import/cris_import.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/atd-etl/cris_import/README.md b/atd-etl/cris_import/README.md index cf80d734e..58567c065 100644 --- a/atd-etl/cris_import/README.md +++ b/atd-etl/cris_import/README.md @@ -17,7 +17,7 @@ The CRIS import usually works by pulling down a zip archive from the SFTP endpoi Make a copy of `env-template` and name it `env`. Fill in the values using the 1Password Connect Server secrets (see entries titled `Endpoint for 1Password Connect Server API` and `Vault ID of API Accessible Secrets vault`) and your personal access token. -Drop a CRIS extract zip file into your development folder described above, and run the import script: +Drop a CRIS extract zip file into your development folder as described above, and run the import script: ```bash docker compose run cris-import ``` diff --git a/atd-etl/cris_import/cris_import.py b/atd-etl/cris_import/cris_import.py index 788fb3be9..b57bc0f6e 100755 --- a/atd-etl/cris_import/cris_import.py +++ b/atd-etl/cris_import/cris_import.py @@ -619,7 +619,6 @@ def align_records(map_state): for key_column in key_columns: key_column_value = source[key_column] if key_column_value == None: - print("\nSkipping because unique key column is missing") print(f"Table: {table}") print(f"Missing key column: {key_column}") From fa587d973ae39fd9a61f0385297cc8e2ddeb76ea Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 15 Apr 2024 16:20:02 -0500 Subject: [PATCH 4/4] Update check for None --- atd-etl/cris_import/cris_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atd-etl/cris_import/cris_import.py b/atd-etl/cris_import/cris_import.py index b57bc0f6e..e5bed7c74 100755 --- a/atd-etl/cris_import/cris_import.py +++ b/atd-etl/cris_import/cris_import.py @@ -618,7 +618,7 @@ def align_records(map_state): # Check unique key columns to make sure they all have a value for key_column in key_columns: key_column_value = source[key_column] - if key_column_value == None: + if key_column_value is None: print("\nSkipping because unique key column is missing") print(f"Table: {table}") print(f"Missing key column: {key_column}")