Skip to content

Commit

Permalink
Merge pull request #56 from multnomah-county-it/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
john-c-houser authored May 18, 2023
2 parents a55fcab + 930c683 commit 68ebb36
Show file tree
Hide file tree
Showing 7 changed files with 1,415 additions and 259 deletions.
300 changes: 300 additions & 0 deletions DataHandler.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,300 @@
package DataHandler;

# Copyright (c) Multnomah County (Oregon)
#
# Module for handling data mapping and validation for reports
#
# John Houser
# john.houser@multco.us
#
# 2022-06-10

# Pragmas
use strict;
use warnings;

# Required modules
use JSON;
use Text::CSV_XS qw(csv);
use File::Basename qw(basename);
use File::Find;
use Date::Calc qw(check_date Today_and_Now);
use Switch;
use base 'Exporter';

our @EXPORT = qw(load_maps evaluate_map validate validate_date);

# Set to 1 for dubugging messages
our $debug = 0;

# The data structure containing maps
our %MAPS = ();

#################################################################################

#################################################################################
# Find mapping table files in specific directory and load them into memory
# as hashes we can evaluate in &evaluate_map

sub load_maps {
my $mapping_path = shift;

find(\&wanted, $mapping_path);
}

#################################################################################

#################################################################################
# Load individual mapping file into memory as hash

sub wanted {
my $mapping_file = $_;

my $basename = basename($mapping_file);

if ( $basename =~ /^Data_Handler-(.*)\.csv$/ ) {
my $field = $basename;
$field =~ s/^(Data_Handler-)(.*)(\.csv)$/$2/;

my $csv = Text::CSV_XS->new();
my $count = 0;
my @columns = ();

open my $fh, "<:encoding(utf8)", $mapping_file or die "Could not open mapping file: $!";
while ( my $row = $csv->getline ($fh) ) {
foreach my $i ( 0 .. $#{$row} ) {
if ( $count == 0 ) {
$MAPS{$field}[$count][$i] = $row->[$i];
} else {
$MAPS{$field}[$count]{$MAPS{$field}[0][$i]} = $row->[$i];
}
}
$count++;
}
close $fh;
}
}

#################################################################################

#################################################################################
# Evaluate mapping hash to determine output value. Each mapping file must consist
# of a hash something like this:
#
# %Cost_Center = (
# [ 'Organization', 'Department', 'Cost_Center' ],
# {
# Organization => 'Waffle Makers, Ltd.',
# Department => 'Accounting',
# Cost_Center => 813510
# },
# {
# Organization => 'Waffle Makers, Ltd.',
# Department => 'IT',
# Cost_Center => 813520
# },
# );
#
# If the input hash contains Organization which equals "Waffle Makers, Ltd.", and
# the input hash contains Department which equals "Accounting", then return
# a value of 813510.

sub evaluate_map {
my $input_hashref = shift;
my $map = shift;

# Get the output field from the last column in $map->[0];
my $field = $map->[0][$#{$map->[0]}];
my $retval = '';

foreach my $i ( 1 .. $#{$map} ) {

my $match = 0;
foreach my $key ( keys %{$map->[$i]} ) {

if ( defined($input_hashref->{$key}) && $input_hashref->{$key} eq $map->[$i]{$key} ) {
print "input: $input_hashref->{$key}, map: $map->[$i]{$key}\n" if ( $debug );
$match++;
}
}

# If we matched every column, then return output field value
if ( $match == $#{$map->[0]} ) {
$retval = $map->[$i]->{$field};
last;
}
}

return $retval;
}

#################################################################################

#################################################################################
# Validates various types of incoming field data
# Sample fields hash with validation rules:
#
# %fields = (
# Date1 => 'd:YYYY-MM-DD',
# Date2 => 'd:YYYY/MM/DD',
# Date3 => 'd:MM-DD-YYYY',
# Date4 => 'd:MM/DD/YYYY',
# Timestamp1 => 'd:YYYY/MM/DD HH:MM',
# Timestamp2 => 'd:YYYY-MM-DD HH:MM',
# Timestamp3 => 'd:YYYYMMDDHHMMSS',
# Customer_Reference => 'i:8', # int(8)
# Invoice_Memo => 's:256', # string(256)
# Posting => 'v:01|11', # list('01', '11')
# Customer_PO_Number => 'b', # must be blank
# Extended_Amount => 'n:3.2', # number(000.00)
# Range => 'r:100000,999999', # integer between x and y
# );
#
# Returns 0 or 1

sub validate {
my $value = shift;
my $validation_rule = shift;

my $retval = 0;
my ($type, $param) = split /:/, $validation_rule, 2;

switch($type) {
case ('b') {
# Value be undefined
if ( ! defined($value) ) {
$retval = 1;
}
}
case ('d') {
# Send to date validation routine
if ( $value && &validate_date($value, $param) ) {
$retval = 1;
}
}
case ('i') {
# Must be an integer of length specified
if ( $value && $value =~ /^\d+$/ && length($value) <= $param ) {
$retval = 1;
}
}
case ('n') {
# Number of specific lengths in both the whole and fractional parts.
# Most numbers of this type will need to be sent through sprintf
# before being validated. Zero is acceptable but undefined will
# return invalid.
my ($whole_len, $frac_len) = split /\./, $param;
if ( $value && $value =~ /^\d+\.\d+$/ ) {
my ($whole, $frac) = split /\./, scalar($value);
if ( length($whole) <= $whole_len && length($frac) <= $frac_len ) {
$retval = 1;
}
} elsif ( $value && $value =~ /^\d+$/ ) {
if ( length($value) <= $whole_len ) {
$retval = 1;
}
}
}
case ('s') {
# String no larger than specified length. Strings are allowed to be blank.
if ( ! defined($value) || length($value) <= $param ) {
$retval = 1;
}
}
case ('r') {
# Integer range between x and y
my ($x, $y) = split /,/, $param;
if ( $value >= $x && $value <= $y ) {
$retval = 1;
}
}
case ('v') {
# Value must match one of those listed in | delimited form in the parameter.
if ( $value ) {
my @params = split /\|/, $param;
foreach my $param ( @params ) {
if ( $value eq $param ) {
$retval = 1;
last;
}
}
}
}
else {
die "No validation rule for type $type";
}
}
return $retval;
}
################################################################################
################################################################################
# Validates various date formats. Returns nothing or the valid date in
# YYYY-MM-DD format.
sub validate_date {
my $date = shift;
my $format = shift;
my $retval = '';
switch($format) {
case ( /^(YYYY)([\-\/]){1}(MM)([\-\/]){1}(DD\sHH:MM)$/ ) {
if ( $date =~ /^\d{4}[\-\/]{1}\d{2}[\-\/]{1}\d{2}\s\d{2}:\d{2}$/ ) {
my ($year, $month, $day, $time) = split /[\-\/\s]/, $date;
if ( check_date($year, $month, $day) ) {
$retval = $year . '-' . sprintf("%02d", $month) . '-' . sprintf("%02d", $day);
}
}
}
case ( /^(YYYY)([\-\/]{1})(MM)([\-\/]{1})(DD)$/ ) {
if ( $date =~ /^\d{4}[\-\/]{1}\d{2}[\-\/]{1}\d{2}(\s\d{2}:\d{2}){0,1}$/ ) {
my ($year, $month, $day) = split /[\-\/]/, $date;
if ( check_date($year, $month, $day) ) {
$retval = $year . '-' . sprintf("%02d", $month) . '-' . sprintf("%02d", $day);
}
}
}
case ( /^(MM)([\-\/]{1})(DD)([\-\/]{1})(YYYY)$/ ) {
if ( $date =~ /^(\d{2})([\-\/]{1})(\d{2})([\-\/]{1})(\d{4})$/ ) {
my ($month, $day, $year) = split /[\-\/]/, $date;
if ( check_date($year, $month, $day) ) {
$retval = $year . '-' . sprintf("%02d", $month) . '-' . sprintf("%02d", $day);
}
}
}
case ( /^YYYYMMDDHHMMSS$/ ) {
if ( $date =~ /^\d{14}$/ ) {
my $year = substr $date, 0, 4;
my $month = substr $date, 4, 2;
my $day = substr $date, 6, 2;
if ( check_date($year, $month, $day) ) {
$retval = $year . '-' . sprintf("%02d", $month) . '-' . sprintf("%02d", $day);
}
}
}
case ( /^YYYYMMDD$/ ) {
if ( $date =~ /^\d{8}$/ ) {
my $year = substr $date, 0, 4;
my $month = substr $date, 4, 2;
my $day = substr $date, 6, 2;
if ( check_date($year, $month, $day) ) {
$retval = $year . '-' . sprintf("%02d", $month) . '-' . sprintf("%02d", $day);
}
}
}
else {
die "Unsupported date format: $format";
}
}

return $retval;
}

###############################################################################

###############################################################################

1;
38 changes: 37 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ software.
the software, under `/var/log/`. You may want to add a `relibconnected`
configuration file under `/etc/logrotate.d/` to avoid uncontrolled log growth.

8. Copy `AddressFormat.pm` and `ILSWS.pm` to `/usr/local/lib/site_perl/`. Create
8. Copy `AddressFormat.pm`,`DataHandler.pm`, and `ILSWS.pm` to `/usr/local/lib/site_perl/`
or some other directory in the Perl include path. You may need to create
the directory (as root) if it doesn't already exist. This will put the modules
into a path where Perl looks for modules. Recent versions of Perl do not, by
default, look in the current working directory.
Expand Down Expand Up @@ -183,3 +184,38 @@ to seven days. For example:
```
bin/randomize_checksum_ages.pl config.yaml
```

# Configuration Notes

## Field Definition Keywords
When configuring client districts, there a number of keywords that may be used
to define the way the software will handle incoming data and derivative fields:
* `type`: Symphony field type (used to determine data structure needed in JSON)
* `overlay`: If true, update field when updating existing record
* `validate`: Field validation rule to apply to incoming data (ingestor will throw error and skip record if validation fails)
* `transform`: Transformation function (in ingestor.pl) which takes validated input from one field and returns a valid value
* `overlay_default`: Value to use in update IF FIELD CURRENTLY EMPTY
* `overlay_value`: Value to ALWAYS overlay existing value during update
* `new_default`: Value to use in create IF FIELD CURRENTLY EMPTY
* `new_value`: Value to be used during new create

## Validation Rules

Sample validation rules used in conjunction with the validate field definition keyword:
| Type | Example | Comments |
| --- | --- | --- |
| Date1 | "d:YYYY-MM-DD" | |
| Date2 | "d:YYYY/MM/DD" | |
| Date3 | "d:MM-DD-YYYY" | |
| Date4 | "d:MM/DD/YYYY" | |
| Timestamp1 | "d:YYYY/MM/DD HH:MM" | |
| Timestamp2 | "d:YYYY-MM-DD HH:MM" | |
| Timestamp3 | "d:YYYYMMDDHHMMSS" | |
| Integer | "i:8" | Length of 8 |
| String | "s:256" | Max length of 256 |
| List | "v:01\|11" | Pipe delimited list of valid entries |
| Blank | "b" | Must be blank |
| Decimal number | "n:3.2" | Number(000.00) |
| Integer range | "r:1,9999" | Range between 1 and 9999 |

Note: All dates will be validated against the calendar
4 changes: 2 additions & 2 deletions bin/stuck_clear.pl
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
use utf8;
use File::Copy;

my $path = '/opt/relibconnected';
my $path = '/opt/librelibconnected';
my $run_path = "$path/run";
my $log_path = "$path/log";
my $mailer = '/usr/bin/mailx';
my $mail_template = '/opt/relibconnected/bin/stuck_clear.txt';
my $mail_template = '/opt/librelibconnected/bin/stuck_clear.txt';

if ( -e "$run_path/ingestor.flg" ) {
my $file_age = -M "$run_path/ingestor.flg";
Expand Down
4 changes: 2 additions & 2 deletions bin/stuck_warn.pl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

my $path = '/srv/libconnected';
my $mailer = '/usr/bin/mailx';
my $mail_template = '/opt/relibconnected/bin/stuck_warn.txt';
my $flag = '/opt/relibconnected/bin/stuck.flag';
my $mail_template = '/opt/librelibconnected/bin/stuck_warn.txt';
my $flag = '/opt/librelibconnected/bin/stuck.flag';
my $touch = '/usr/bin/touch';
my $rm = '/usr/bin/rm';

Expand Down
Loading

0 comments on commit 68ebb36

Please sign in to comment.