diff --git a/atd-etl/cris_import/README.md b/atd-etl/cris_import/README.md index b6af7ee3e..58567c065 100644 --- a/atd-etl/cris_import/README.md +++ b/atd-etl/cris_import/README.md @@ -12,3 +12,12 @@ The contents of this directory define a Docker image that can be used in an Airf ### Zip file acquisition The CRIS import usually works by pulling down a zip archive from the SFTP endpoint. However, during development, it's much easier if you can have it use a zip file that you have locally on your machine instead. This can be accomplished by putting a zip file (still encrypted and using "CRIS extract" password) in a folder named `atd-etl/cris_import/development_extracts/`. Create the directory if needed. If there are no zip files in that folder, the program will automatically revert to inspecting the SFTP endpoint. + +### Local testing + +Make a copy of `env-template` and name it `env`. Fill in the values using the 1Password Connect Server secrets (see entries titled `Endpoint for 1Password Connect Server API` and `Vault ID of API Accessible Secrets vault`) and your personal access token. + +Drop a CRIS extract zip file into your development folder as described above, and run the import script: +```bash +docker compose run cris-import +``` diff --git a/atd-etl/cris_import/cris_import.py b/atd-etl/cris_import/cris_import.py index 9141ceee7..e5bed7c74 100755 --- a/atd-etl/cris_import/cris_import.py +++ b/atd-etl/cris_import/cris_import.py @@ -612,7 +612,23 @@ def align_records(map_state): input_column_names = util.get_input_column_names(pg, map_state["import_schema"], table, target_columns) # iterate over each imported record and determine correct action + should_skip_update = False + for source in imported_records: + # Check unique key columns to make sure they all have a value + for key_column in key_columns: + key_column_value = source[key_column] + if key_column_value is None: + print("\nSkipping because unique key column is missing") + print(f"Table: {table}") + print(f"Missing key column: {key_column}") + should_skip_update = True + + # If we are missing a column that uniquely identifies the record, we should skip the update + if should_skip_update: + for key_column in key_columns: + print(f"{key_column}: {source[key_column]}") + continue # generate some record specific SQL fragments to identify the record in larger queries record_key_sql, import_key_sql = util.get_key_clauses(table_keys, output_map, table, source, map_state["import_schema"]) diff --git a/atd-etl/cris_import/env-template b/atd-etl/cris_import/env-template new file mode 100644 index 000000000..7650c06f4 --- /dev/null +++ b/atd-etl/cris_import/env-template @@ -0,0 +1,3 @@ +OP_API_TOKEN= +OP_CONNECT= +OP_VAULT_ID=