Skip to content

Commit

Permalink
Standardize threshold naming
Browse files Browse the repository at this point in the history
  • Loading branch information
wagnerlmichael committed Jan 6, 2025
1 parent 324e47a commit e294c73
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 13 deletions.
18 changes: 12 additions & 6 deletions glue/flagging_script_glue/flagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def go(
iso_forest_cols: list,
dev_bounds: tuple,
condos: bool,
raw_price_thresh: int,
raw_price_threshold: int,
):
"""
This function runs all of our other functions in the correct sequence.
Expand Down Expand Up @@ -52,7 +52,7 @@ def go(
df = iso_forest(df, groups, iso_forest_cols)
print("iso_forest() done")
df = outlier_taxonomy(
df, dev_bounds, groups, condos=condos, raw_price_thresh=raw_price_thresh
df, dev_bounds, groups, condos=condos, raw_price_threshold=raw_price_threshold
)
print("outlier_taxonomy() done\nfinished")

Expand All @@ -73,7 +73,11 @@ def create_group_string(groups: tuple, sep: str) -> str:


def outlier_taxonomy(
df: pd.DataFrame, permut: tuple, groups: tuple, condos: bool, raw_price_thresh: int
df: pd.DataFrame,
permut: tuple,
groups: tuple,
condos: bool,
raw_price_threshold: int,
):
"""
Creates columns having to do with our chosen outlier taxonomy.
Expand All @@ -89,7 +93,7 @@ def outlier_taxonomy(

df = check_days(df, SHORT_TERM_OWNER_THRESHOLD)
df = pricing_info(df, permut, groups, condos=condos)
df = outlier_type(df, condos=condos, raw_price_thresh=raw_price_thresh)
df = outlier_type(df, condos=condos, raw_price_threshold=raw_price_threshold)

return df

Expand Down Expand Up @@ -745,7 +749,9 @@ def z_normalize_groupby(s: pd.Series):
return zscore(s, nan_policy="omit")


def outlier_type(df: pd.DataFrame, condos: bool, raw_price_thresh: int) -> pd.DataFrame:
def outlier_type(
df: pd.DataFrame, condos: bool, raw_price_threshold: int
) -> pd.DataFrame:
"""
This function create indicator columns for each distinct outlier type between price
and characteristic outliers. These columns are prefixed with 'sv_ind_'.
Expand Down Expand Up @@ -813,7 +819,7 @@ def outlier_type(df: pd.DataFrame, condos: bool, raw_price_thresh: int) -> pd.Da
]

# Implement raw threshold, unlog price
price_conditions.append((10 ** df["meta_sale_price"]) > raw_price_thresh)
price_conditions.append((10 ** df["meta_sale_price"]) > raw_price_threshold)
price_labels.append("sv_ind_raw_price_threshold")

combined_conditions = price_conditions + char_conditions
Expand Down
12 changes: 6 additions & 6 deletions glue/sales_val_flagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,9 +480,9 @@ def get_parameter_df(
ptax_sd,
rolling_window,
time_frame,
short_term_thresh,
min_group_thresh,
raw_price_thresh,
short_term_threshold,
min_group_threshold,
raw_price_threshold,
run_id,
):
"""
Expand Down Expand Up @@ -523,9 +523,9 @@ def get_parameter_df(
"ptax_sd": [ptax_sd],
"rolling_window": [rolling_window],
"time_frame": [time_frame],
"short_term_owner_threshold": [short_term_thresh],
"min_group_thresh": [min_group_thresh],
"raw_price_thresh": [raw_price_thresh],
"short_term_owner_threshold": [short_term_threshold],
"min_group_thresh": [min_group_threshold],
"raw_price_threshold": [raw_price_threshold],
}

df_parameters = pd.DataFrame(parameter_dict_to_df)
Expand Down
4 changes: 3 additions & 1 deletion manual_flagging/flagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ def create_bins_and_labels(input_list):
iso_forest_cols=df_info["iso_forest_cols"],
dev_bounds=tuple(inputs["dev_bounds"]),
condos=df_info["condos_boolean"],
raw_price_threshold=inputs["raw_price_threshold"],
)

# Add the edited or unedited dataframe to the new dictionary
Expand Down Expand Up @@ -400,8 +401,9 @@ def create_bins_and_labels(input_list):
ptax_sd=inputs["ptax_sd"],
rolling_window=inputs["rolling_window_months"],
time_frame=inputs["time_frame"],
short_term_thresh=flg_model.SHORT_TERM_OWNER_THRESHOLD,
short_term_threshold=flg_model.SHORT_TERM_OWNER_THRESHOLD,
min_group_thresh=inputs["min_groups_threshold"],
raw_price_threshold=inputs["raw_price_threshold"],
run_id=run_id,
)

Expand Down

0 comments on commit e294c73

Please sign in to comment.