diff --git a/.gitignore b/.gitignore index e081ab01..3c68387d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,7 @@ src/data/debug/*.json **/__pycache__/* .DS_Store + +*my-venv/ +*new-venv +node_modules diff --git a/.prettierignore b/.prettierignore index c6d72715..d648c41e 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,2 +1,9 @@ # Ignore built files dist + +# Ignore data analysis HTML outputs, which are auto-generated +src/data/analysis/output + +# Ignore static blog HTML and JSON files from data analysis +static/blog/**/*.json +static/blog/**/*.html \ No newline at end of file diff --git a/src/data/analysis/GHG_intensity_compliance_correlation.ipynb b/src/data/analysis/GHG_intensity_compliance_correlation.ipynb new file mode 100644 index 00000000..8fbe1db4 --- /dev/null +++ b/src/data/analysis/GHG_intensity_compliance_correlation.ipynb @@ -0,0 +1,1275 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compliance Analysis: \n", + "\n", + "## Correlation of GHG Intensity Last Year and Reporting Compliance This Year\n", + "\n", + "### Issue #114\n", + "Colton Lapp \n", + "\n", + "November 2024\n", + "\n", + "\n", + "### Description: \n", + "Does the GHG intensity the year prior help predict reporting non-compliance this year? \n", + "Does the trend of a GHG intensity (i.e. is a building increasing or decreasing its GHG intensity) help predict reporting non-compliance this year? \n", + "\n", + "I look at the raw data from the Chicago open data portal and use some basic group means and regression modeling. \n", + "\n", + "### Conclusion:\n", + "It doesn't seem like the level of GHG intensity or the trend of GHG intensity help predict compliance at all. \n", + "Building size DOES help predict compliance though. For every million additional square feet, the building is roughly 1.5% less likely to be NON compliant" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mRunning cells with 'my-venv (Python 3.9.18)' requires the ipykernel package.\n", + "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", + "\u001b[1;31mCommand: '\"/Users/coltonlapp/Dropbox/My Mac (Coltons-MacBook-Pro.local)/Desktop/Life/ChiHack/electrify-chicago/my-venv/bin/python\" -m pip install ipykernel -U --force-reinstall'" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import plotly.subplots as sp\n", + "import plotly.graph_objects as go\n", + "import plotly.express as px\n", + "from plotly.offline import iplot\n", + "import plotly.io as pio\n", + "from plotly.subplots import make_subplots\n", + "import math\n", + "import statsmodels.api as sm \n", + "import os\n", + "from pathlib import Path\n", + "import json \n", + "\n", + "from IPython.display import Image\n", + "\n", + "from plotly.offline import init_notebook_mode\n", + "init_notebook_mode(connected=True)\n", + "\n", + "pd.set_option('display.max_columns', None)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set pathing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get static dir for saving images\n", + "current_dir = Path.cwd()\n", + "project_root = current_dir\n", + "while True:\n", + " if os.path.basename(project_root) == 'electrify-chicago':\n", + " print(\"Success: Found 'electrify-chicago' as the base directory.\")\n", + " break\n", + " new_root = os.path.dirname(project_root)\n", + " if new_root == project_root: # Reached the filesystem root\n", + " raise FileNotFoundError(\"Error: 'electrify-chicago' directory not found in the path hierarchy.\")\n", + " project_root = new_root\n", + "static_blog_pth = os.path.join(project_root, 'static', 'blog', 'GHGIntensityPredictCompliance')\n", + "os.makedirs(static_blog_pth, exist_ok=True)\n", + "\n", + "expected_dir_name = \"analysis\"\n", + "fig_dir = os.path.join(current_dir, 'output', 'compliance_analysis')\n", + "\n", + "# Check if the current directory is the \"analysis\" folder\n", + "if current_dir.name != expected_dir_name:\n", + " raise AssertionError(f\"Expected working directory to be '{expected_dir_name}', but got '{current_dir.name}'.\\n\"\n", + " f\"Please ensure you are in the correct directory.\")\n", + "\n", + "print(f\"Current working directory is correctly set to '{current_dir}'.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook options and custom plotting function" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "reduce_memory = True # option to display some plotly graphs as static images to reduce memory, if possible\n", + "export_to_blog = True # if true, saves plots and regressions to blog static folder for website publishing" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "if export_to_blog:\n", + " dirs = [static_blog_pth, fig_dir]\n", + "else:\n", + " dirs = [fig_dir]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function to display graphs\n", + "If you want to reduce memory size of notebook, set reduce_memory to True and this function will save some graphs as static image files instead of HTML" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def show_fig(fig, reduce_memory):\n", + " \"\"\"\n", + " Render a Plotly figure as a PNG image or an HTML visualization.\n", + "\n", + " Parameters:\n", + " - fig (plotly.graph_objs.Figure): The Plotly figure to render.\n", + " - reduce_memory (bool): If True, attempt to export the figure as a PNG \n", + " to save memory; defaults to HTML rendering on failure.\n", + "\n", + " Returns:\n", + " - tuple: The rendered figure (PNG or HTML) and the updated reduce_memory flag.\n", + " \"\"\"\n", + "\n", + " if reduce_memory:\n", + " try:\n", + " png_image = pio.to_image(fig, format='png')\n", + " return (png_image, reduce_memory)\n", + " \n", + " except RuntimeError as e:\n", + " print(\"Error exporting plotly to png, displaying html graph instead\\n\")\n", + " reduce_memory = False\n", + " \n", + " print(\"\"\"\n", + "Note: You may be recieving this error because of a unsolved bug in the Kaleido package (plotly dependency)\n", + "If you are using a virtual environment on a Mac with spaces in your directory pathname, you might get the error stating:\n", + " 'ValueError: Failed to start Kaleido subprocess. Error stream:'\n", + " \n", + "If this is the case, go to \n", + "'{Your venv name}/lib/python3.11/site-packages/kaleido/executable/kaleido'\n", + "\n", + "And add quotations to $DIR and \"$@\" like this:\n", + "\n", + " #!/bin/bash\n", + " DIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" >/dev/null 2>&1 && pwd )\"\n", + "\n", + " # Quote $DIR to handle spaces in the path\n", + " cd \"$DIR\"\n", + " ./bin/kaleido \"$@\"\n", + "\"\"\")\n", + "\n", + " if not reduce_memory:\n", + " return (iplot(fig), reduce_memory)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read in data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Construct the path to the CSV file (one level above the current directory)\n", + "data_path = os.path.join( current_dir.parent, 'dist', 'benchmarking-all-years.csv')\n", + "df = pd.read_csv(data_path)\n", + "\n", + "# Create the \"reported\" column\n", + "df['Reported'] = df['GHGIntensity'].notna().astype(int)\n", + "\n", + "print(f\"There are {df['ID'].unique().shape[0]} unique building ids\")\n", + "\n", + "# Convert year to int\n", + "df['DataYear'] = df['DataYear'].astype(int)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read in Building Benchmark Data to get Building Names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "names_path = os.path.join( current_dir.parent, 'dist', 'building-benchmarks.csv')\n", + "\n", + "building_names = pd.read_csv(names_path)[['ID', 'PropertyName' ]]\n", + "building_names.drop_duplicates(keep='first')\n", + "building_names.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Merge names to data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.merge( df, building_names, how='left', on='ID')\n", + "df['PropertyName'] = df['PropertyName'].fillna(\"[Building Name Unavailable]\").replace(\"\", \"[Building Name Unavailable]\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check that every building/year combo exists only once" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "group_counts = df.groupby(['ID', 'DataYear']).size()\n", + "\n", + "# Assert that the maximum count in any group is at most 1\n", + "assert group_counts.max() <= 1, \"There are buildings with more than one row in a given year!\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis: Unique values for GHG Intensity\n", + "\n", + "Conclusion: strange that some values are highly represented while others are not. How are these calculated? " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Round GHG Intensity values to 1 digit\n", + "df['GHGIntensity'] = df['GHGIntensity'].round(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot Distribution of GHG Intensities\n", + "\n", + "Show histogram of how common GHG intensity values are, breaking out the outliers into a seperate plot " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "fig = sp.make_subplots(\n", + " rows=1,\n", + " cols=2,\n", + " column_widths=[0.8, 0.2],\n", + " horizontal_spacing=0.2,\n", + " subplot_titles = [\n", + " '', \n", + " 'GHG Intensity Outliers
(GHG Intensity values > 50)'\n", + " ]\n", + ")\n", + "\n", + "fig.add_trace( go.Histogram(\n", + " x=df['GHGIntensity'],\n", + " xbins = dict(start=0, end=100, size=.5),\n", + " name='Histogram of Values',\n", + " hovertemplate = \" %{y} Buildings
with GHG Intensity between %{x}\"\n", + " ),\n", + " row=1,col=1\n", + ")\n", + "\n", + "# Add a light red box to the first plot to show outliers\n", + "fig.add_shape(\n", + " type=\"rect\",\n", + " x0=50,\n", + " x1=100,\n", + " y0=0,\n", + " y1=1500,\n", + " fillcolor=\"rgba(255, 0, 0, 0.1)\", # Light red with transparency\n", + " line=dict(width=0),\n", + " row=1, col=1\n", + ")\n", + "\n", + "outlier_subset = df.dropna(subset=['GHGIntensity'])\n", + "outlier_subset = outlier_subset[outlier_subset['GHGIntensity']>50]\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=[0] * len(outlier_subset['GHGIntensity']), # Make x an array of zeros with the correct length\n", + " y=outlier_subset['GHGIntensity'],\n", + " mode='markers',\n", + " marker=dict( color='blue', opacity=0.6),\n", + " customdata=df['DataYear'],\n", + " hovertext=outlier_subset['PropertyName'], # Add PropertyName to hovertext\n", + " hovertemplate=\"%{hovertext}
GHG Intensity: %{y} in %{customdata}\",\n", + " name=''\n", + " ),\n", + " row=1, col=2\n", + ")\n", + "\n", + "# Add a light red background to the second subplot to show outliers\n", + "fig.add_shape(\n", + " type=\"rect\",\n", + " x0=-1,\n", + " x1=1,\n", + " y0=50,\n", + " y1=900,\n", + " fillcolor=\"rgba(255, 0, 0, 0.1)\", # Light red with transparency\n", + " line=dict(width=0),\n", + " layer=\"below\",\n", + " row=1, col=2\n", + ")\n", + "\n", + "\n", + "fig.update_xaxes(visible=False, row=1, col=2)\n", + "fig.update_xaxes(range=[0, 100], row=1,col=1)\n", + "fig.update_xaxes( title_text='' , row=1,col=2)\n", + "fig.update_yaxes( title_text='GHG Intensity', row=1,col=2)\n", + "\n", + "## Add an outline to the bars\n", + "fig.update_traces(marker=dict(line=dict(width=.1, color='black')))\n", + "\n", + "fig.add_annotation(\n", + " x=80,\n", + " y=300,\n", + " text=\"Some buildings had
outlier GHG intensity
levels (up to 800) →
\",\n", + " showarrow=False, # No arrow for this annotation\n", + " font=dict(size=10), # Customize font size\n", + ")\n", + "\n", + "# Update layout for better display\n", + "fig.update_layout(\n", + " xaxis_title='GHG Intensity',\n", + " yaxis_title='Count',\n", + " showlegend=False,\n", + " title='Distribution of GHG Intensities',\n", + " height=400,\n", + " width=800\n", + ")\n", + "\n", + "# Show the plot\n", + "#pio.show(fig)\n", + "iplot(fig)\n", + "\n", + "for dir in [static_blog_pth, fig_dir]:\n", + " fig.write_html( os.path.join(dir,'distribution_of_GHG_intensity.html'), include_plotlyjs=\"cdn\" ) # 'cdn' reduces memory of file by not including all necessary JS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compliance type counts over time\n", + "\n", + "Count the number of reporting / non-reporting for every year, highlighting the impact of COVID" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Count each new column per year\n", + "value_counts = df.groupby('DataYear')['Reported'].value_counts()\n", + "non_reporting_counts = value_counts.xs(0, level='Reported')\n", + "reporting_counts = value_counts.xs(1, level='Reported')\n", + "\n", + "# Create the figure\n", + "fig = go.Figure()\n", + "\n", + "# Add traces for each category\n", + "fig.add_trace(go.Scatter(x=reporting_counts.index, y=reporting_counts.values,\n", + " mode='lines+markers', name='Reported',\n", + " line=dict(width=4, color='rgba(0, 0,255, 0.7)'),\n", + " marker=dict(symbol='circle', size=10)))\n", + "\n", + "fig.add_trace(go.Scatter(x=non_reporting_counts.index, y=non_reporting_counts.values,\n", + " mode='lines+markers', name=\"Didn't Report\",\n", + " line=dict(width=4, color='rgba(255, 0, 0, 0.7)'),\n", + " marker=dict(symbol='circle', size=10)))\n", + "\n", + "fig.add_trace( go.Scatter( x=[2018.5, 2019.5, 2019.5, 2018.5, 2018.5], \n", + " y=[0, 0, np.max(reporting_counts)*1.2, np.max(reporting_counts)*1.2, 0 ], \n", + " fill='toself', mode='lines', name='Covid Data Disruption' )\n", + ")\n", + "\n", + "# Update layout\n", + "fig.update_layout(title=\"Count of Buildings That Did/Didn't Report Emissions by Year\",\n", + " xaxis_title='Year of Emissions
(One year before data is reported)',\n", + " yaxis_title='Count of Buildings',\n", + " legend_title='Category')\n", + "\n", + "# Show the plot\n", + "#pio.show(fig)\n", + "iplot(fig)\n", + "\n", + "\n", + "for dir in [static_blog_pth, fig_dir]:\n", + " fig.write_html( os.path.join(dir,'reporting_counts_over_time.html'), include_plotlyjs=\"cdn\" )\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis: Covid saw temporary drop in building reporting\n", + "Buildings report the prior years emissions data the following spring. Thus, 2019's data was supposed to be reported in Spring 2020 but it seems like Covid disrupted this. As a result, many observations of non-reporting are because of Covid. \n", + "\n", + "For robustness, we will create a subset of the dataframe without the covid data later in this code" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis: Correlation of \"GHG Intensity Last Year\" and \"Compliance This Year\"\n", + "\n", + "### First: calculate \"GHG Intensity last year\" and \"Chang in GHG Intensity Last Year\"\n", + "\n", + "- GHG Intensity Last Year is just the GHG intensity the year prior\n", + " - i.e. if the Data Year is 2019, then 'GHG Intensity Last Year' would be equal to GHG Intensity in 2018\n", + "\n", + "- Change in GHG Intensity Last Year is equal to the difference between the GHG intensity from two years ago to one year ago\n", + " - i.e. if the Data Year is 2019, Change in GHG Intensity Last year is equal to the GHG intensity from 2018 minus the GHG Intensity from 2017" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Create the new column 'GHGIntensity_last_year'\n", + "df['GHGIntensity_LastYear'] = df.apply(\n", + " lambda row: df.loc[\n", + " (df['DataYear'] == row['DataYear'] - 1) & \n", + " (df['Reported'] == 1) & \n", + " (df['ID'] == row['ID']), # Match by id\n", + " 'GHGIntensity'\n", + " ].iloc[0] if len(df.loc[\n", + " (df['DataYear'] == row['DataYear'] - 1) & \n", + " (df['Reported'] == 1) & \n", + " (df['ID'] == row['ID']), # Match by id\n", + " 'GHGIntensity'\n", + " ]) > 0 else None,\n", + " axis=1\n", + ")\n", + "\n", + "# Create the new column 'pct_chng_GHGIntensity_last_year'\n", + "df['Chng_GHGIntensity_LastYear'] = df.apply(\n", + " lambda row: (\n", + " (\n", + " df.loc[\n", + " (df['DataYear'] == row['DataYear'] - 1) & \n", + " (df['Reported'] == 1) & \n", + " (df['ID'] == row['ID']), # Match by id\n", + " 'GHGIntensity'\n", + " ].iloc[0]\n", + "\n", + " -\n", + "\n", + " df.loc[\n", + " (df['DataYear'] == row['DataYear'] - 2) & \n", + " (df['Reported'] == 1) & \n", + " (df['ID'] == row['ID']), # Match by id\n", + " 'GHGIntensity'\n", + " ].iloc[0]\n", + " ) \n", + " \n", + " ) if len(df.loc[\n", + " (df['DataYear'] == row['DataYear'] - 1) & \n", + " (df['Reported'] == 1) & \n", + " (df['ID'] == row['ID']), # Match by id\n", + " 'GHGIntensity'\n", + " ]) == 1 and len(df.loc[\n", + " (df['DataYear'] == row['DataYear'] - 2) & \n", + " (df['Reported'] == 1) & \n", + " (df['ID'] == row['ID']), # Match by id\n", + " 'GHGIntensity'\n", + " ]) == 1 else None,\n", + " axis=1\n", + ")\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Demonstrate newly calculated values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.sort_values( ['ID', 'DataYear']).head()[['DataYear', 'ID','GHGIntensity', 'GHGIntensity_LastYear', 'Chng_GHGIntensity_LastYear']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic Regression Models to Determine Effects of GHG Intensity on Reporting" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def linear_prob_model(df, regressors, target):\n", + "\n", + " # Start with a copy of the DataFrame\n", + " df_cleaned = df.copy()\n", + "\n", + "\n", + " # Initialize a dictionary to track dropped rows for each regressor\n", + " dropped_rows = {}\n", + "\n", + " # Drop rows with missing values for each regressor and track the counts\n", + " nrow_init = df_cleaned.shape[0]\n", + " for regressor in regressors:\n", + " initial_rows = len(df_cleaned)\n", + " df_cleaned = df_cleaned.dropna(subset=[regressor])\n", + " dropped_rows[regressor] = initial_rows - len(df_cleaned)\n", + "\n", + " # Report the number of dropped rows for each regressor\n", + " for regressor, count in dropped_rows.items():\n", + " print(f\"Rows dropped due to missing values in '{regressor}': {count} ({round(100*(count/nrow_init), 1)}%)\")\n", + "\n", + " # Set up the linear probability model\n", + " X = df_cleaned[regressors]\n", + " X = sm.add_constant(X) # Add a constant for the intercept\n", + " y = df_cleaned[target]\n", + "\n", + " # Fit the regression model\n", + " model = sm.OLS(y, X).fit()\n", + "\n", + " # Display the summary of the model\n", + " print(model.summary())\n", + "\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the binary target column and regressors\n", + "df['GrossFloorArea_Mil'] = df['GrossFloorArea']/1000000\n", + "df['non_reporting'] = (df['Reported']==0).astype(int)\n", + "\n", + "target_var = 'non_reporting'\n", + "regressors = [\n", + " 'GHGIntensity_LastYear',\n", + " 'Chng_GHGIntensity_LastYear',\n", + " 'GrossFloorArea_Mil'\n", + "]\n", + "\n", + "# Define variables\n", + "target_var = 'non_reporting'\n", + "regressors = [\n", + " 'GHGIntensity_LastYear',\n", + " 'Chng_GHGIntensity_LastYear',\n", + " 'GrossFloorArea_Mil'\n", + "]\n", + "\n", + "# Map pretty names for each variable\n", + "pretty_names = {\n", + " 'non_reporting': 'Non-Reporting',\n", + " 'GHGIntensity_LastYear': 'GHG Intensity (Last Year)',\n", + " 'Chng_GHGIntensity_LastYear': 'Change in GHG Intensity Last Two Years',\n", + " 'GrossFloorArea_Mil': 'Gross Floor Area (Millions)'\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic Regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = linear_prob_model(df, regressors, target_var)\n", + "\n", + "# Extract regression results\n", + "results = {\n", + " \"dependent_variable\": pretty_names[target_var], # Pretty name for dependent variable\n", + " \"number_of_observations\": int(model.nobs), # Number of observations\n", + " \"r_squared\": model.rsquared, # R-squared\n", + " \"adj_r_squared\": model.rsquared_adj, # Adjusted R-squared\n", + " \"coefficients\": { # Coefficients with pretty names\n", + " pretty_names.get(name, name): coef for name, coef in model.params.items()\n", + " },\n", + " \"p_values\": { # P-values with pretty names\n", + " pretty_names.get(name, name): pval for name, pval in model.pvalues.items()\n", + " },\n", + " \"confidence_intervals\": { # 95% confidence intervals with pretty names\n", + " pretty_names.get(name, name): list(ci) for name, ci in model.conf_int().iterrows()\n", + " },\n", + " \"covariance_type\": model.cov_type, # Covariance type\n", + "}\n", + "\n", + "# Save as JSON\n", + "for dir in dirs:\n", + " fpath = os.path.join(dir,'regression_results_w_covid.json')\n", + " with open(fpath, \"w\") as f:\n", + " json.dump(results, f, indent=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis of Regression Results: No Effect Found\n", + "Using different regression combinations of \n", + "- GHG Intensity Last Year\n", + "- Change in GHG Intensity from 2 years ago to 1 year ago\n", + "- Square footage of building\n", + "\n", + "there generally is no effect of GHG Intensity last year or change in GHG intensity from two years ago to 1 year ago. \n", + "\n", + "Interestingly but unrelatedly, it seems that larger buildings are more likely to be compliant in their reporting. The effect size is small though" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Graph to Show Averages\n", + "\n", + "Show the mean and median value of GHG Intensity (or Change in GHG Intensity Last Year) by compliance status" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "def create_scatter_with_means(df, x, y, hover_data, value_mapping, base_fontsize=12, x_label=None, y_label=None, title=None):\n", + " \"\"\"\n", + " Create a series of side-by-side subplot scatter plots (one per unique x category),\n", + " each with a horizontal mean line. The subplots are arranged horizontally with minimal gap.\n", + " \n", + " Parameters:\n", + " - df: DataFrame containing the data.\n", + " - x: Column name for the x-axis category variable.\n", + " - y: Column name for the continuous y-axis variable.\n", + " - hover_data: List of columns to display on hover.\n", + " - value_mapping: Dictionary to map the values of the x column (often binary) to meaningful labels.\n", + " - base_fontsize: Base font size for scaling other font sizes.\n", + " - x_label: Label for the x-axis (overall).\n", + " - y_label: Label for the y-axis (overall).\n", + " - title: Main title of the entire figure.\n", + " \"\"\"\n", + " \n", + " # Copy and prepare the dataframe\n", + " df = df.copy()\n", + " df[x] = df[x].map(value_mapping)\n", + " \n", + " # Identify unique categories in the x variable\n", + " categories = df[x].unique()\n", + " categories = [cat for cat in categories if pd.notnull(cat)] # Ensure no NaN\n", + " \n", + " # Calculate mean and median per category\n", + " mean_values = df.groupby(x)[y].mean()\n", + " median_values = df.groupby(x)[y].median()\n", + " \n", + " # Create subplots: one row, as many columns as unique categories\n", + " fig = make_subplots(\n", + " rows=1, cols=len(categories),\n", + " horizontal_spacing=0.1, # Narrow gap between subplots\n", + " shared_yaxes=True, # Share the same y-axis\n", + " subplot_titles=categories\n", + " )\n", + "\n", + " colors = ['rgba(0, 0, 255, 1)', 'rgba(255, 0, 0, 1)'] \n", + " colors_t = ['rgba(0, 0, 255, 0.5)', 'rgba(255, 0, 0, 0.5)'] # Transparent versions of 'blue' and 'red'\n", + " \n", + " # For each category, add a scatter trace\n", + " for i, category in enumerate(categories, start=1):\n", + "\n", + " cat_data = df[df[x] == category]\n", + " \n", + " # Add a small horizontal jitter around x=1 for the scatter\n", + " jitter = np.random.uniform(0.999, 1.001, size=len(cat_data))\n", + " \n", + " # Create the scatter trace\n", + " fig.add_trace(\n", + " go.Scatter(\n", + " x=jitter,\n", + " y=cat_data[y],\n", + " mode='markers',\n", + " marker=dict(size=8, opacity=0.8, color=colors_t[i-1]),\n", + " hovertemplate='
'.join([f\"{col}: %{{customdata[{idx}]}}\" \n", + " for idx, col in enumerate(hover_data)]) if hover_data else None,\n", + " customdata=cat_data[hover_data] if hover_data else None,\n", + " name = f'{y_label}
for \"{category}\"'\n", + " ),\n", + " row=1, col=i\n", + " )\n", + " \n", + " # Add mean line\n", + " mean_val = mean_values.loc[category]\n", + " fig.add_hline(\n", + " y=mean_val,\n", + " line_dash=\"dash\",\n", + " line_color=colors[i-1],\n", + " row=1, col=i,\n", + " name = f'Mean {y_label}',\n", + " line_width=3\n", + " )\n", + " \n", + " # Add annotation for mean and median\n", + " median_val = median_values.loc[category]\n", + " fig.add_annotation(\n", + " x=1.001, # slightly to the right of the main cluster\n", + " y=mean_val+20,\n", + " text=f\"Mean: {mean_val:.2f}
Median: {median_val:.2f}\",\n", + " showarrow=False,\n", + " xanchor='left',\n", + " yanchor='bottom',\n", + " font=dict(size=base_fontsize * 1.2, color=colors[i-1]),\n", + " row=1, col=i\n", + " )\n", + " \n", + " # Update x-axis for this subplot (just show a single vertical line)\n", + " fig.update_xaxes(\n", + " showline=True,\n", + " linecolor='black',\n", + " zeroline=True,\n", + " showticklabels=False,\n", + " showgrid=False,\n", + " range=[0.99, 1.01], # tight range around jitter\n", + " tickfont=dict(size=base_fontsize * 1.2),\n", + " row=1, col=i\n", + " )\n", + "\n", + " # Update the y-axis (shared) styling\n", + " fig.update_yaxes(\n", + " showline=True, \n", + " linecolor='black',\n", + " showgrid=False,\n", + " title_text=y_label, \n", + " title_font=dict(size=base_fontsize * 1.4),\n", + " tickfont=dict(size=base_fontsize * 1.2),\n", + " row=1, col=1\n", + " )\n", + " \n", + " # Update the layout\n", + " fig.update_layout(\n", + " height=400,\n", + " width=400*len(categories), # adjust width based on number of categories\n", + " title=dict(\n", + " text=title or f'Scatterplot of {y} by {x}',\n", + " x=0.5,\n", + " font=dict(size=base_fontsize * 1.3)\n", + " ),\n", + " plot_bgcolor='white',\n", + " margin=dict(l=40, r=40, t=150, b=80)\n", + " )\n", + "\n", + " return fig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Comparing Change in GHG Intensity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "value_mapping = {1: \"Didn't Report\", 0: \"Reported\"}\n", + "fig = create_scatter_with_means(\n", + " df=df.dropna(subset=['Chng_GHGIntensity_LastYear']),\n", + " x='non_reporting',\n", + " y='Chng_GHGIntensity_LastYear',\n", + " #hover_data=['PropertyName', 'DataYear', 'non_reporting', 'GHGIntensity_LastYear', 'Chng_GHGIntensity_LastYear', 'GrossFloorArea_Mil'],\n", + " hover_data=['PropertyName', 'DataYear'], # Reduced columns to make file size smaller\n", + " value_mapping=value_mapping,\n", + " base_fontsize=12,\n", + " x_label='Reported This Year?',\n", + " y_label='Change In GHG Intensity',\n", + " title='Change In GHG Intensity (In Last 2 Years)
By Reporting Compliance This Year'\n", + ")\n", + "\n", + "\n", + "iplot(fig)\n", + "for dir in dirs:\n", + " fig.write_html( os.path.join(dir,'change_GHG_trend_by_compliance.html'), include_plotlyjs=\"cdn\" )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Same graph, but look at GHG Intensity instead of Change in GHG Intensity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "value_mapping = {1: \"Didn't Report\", 0: \"Reported\"}\n", + "fig = create_scatter_with_means(\n", + " df=df.dropna(subset=['GHGIntensity_LastYear']),\n", + " x='non_reporting',\n", + " y='GHGIntensity_LastYear',\n", + " #hover_data=['PropertyName', 'DataYear', 'non_reporting', 'GHGIntensity_LastYear', 'Chng_GHGIntensity_LastYear', 'GrossFloorArea_Mil'],\n", + " hover_data=['PropertyName', 'DataYear'], # Reduced columns to make file size smaller\n", + " value_mapping=value_mapping,\n", + " base_fontsize=12,\n", + " x_label='Reported This Year?',\n", + " y_label='GHG Intensity',\n", + " title='GHG Intensity Last Year
By Reporting Compliance This Year'\n", + ")\n", + "\n", + "\n", + "iplot(fig)\n", + "for dir in dirs:\n", + " fig.write_html( os.path.join(dir,'GHG_last_year_by_compliance.html'), include_plotlyjs=\"cdn\" )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Robustness: Dropping Covid\n", + "\n", + "Do the results change at all if we drop year=2019? (the year of the covid data anomaly)\n", + "\n", + "### Regression results:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_obs_original = df.shape[0]\n", + "df_no_covid = df.copy()\n", + "df_no_covid = df_no_covid[df_no_covid['DataYear']!=2019]\n", + "\n", + "\n", + "print(f\"Dropped {n_obs_original - len(df_no_covid)} observations out of {n_obs_original} ({round(100*(n_obs_original - len(df_no_covid))/n_obs_original, 2)}%) by dropping Covid year (2019)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Count each new column per year\n", + "value_counts = df_no_covid.groupby('DataYear')['Reported'].value_counts()\n", + "non_reporting_counts = value_counts.xs(0, level='Reported')\n", + "reporting_counts = value_counts.xs(1, level='Reported')\n", + "\n", + "non_reporting_counts[2019] = 0\n", + "reporting_counts[2019] = 0\n", + "\n", + "non_reporting_counts.index = non_reporting_counts.index.astype(int)\n", + "reporting_counts.index = reporting_counts.index.astype(int)\n", + "\n", + "non_reporting_counts = non_reporting_counts.sort_index()\n", + "reporting_counts = reporting_counts.sort_index()\n", + "\n", + "# Create the figure\n", + "fig = go.Figure()\n", + "\n", + "# Add traces for each category\n", + "fig.add_trace(go.Scatter(x=reporting_counts.index, y=reporting_counts.values,\n", + " mode='lines+markers', name='Reported',\n", + " line=dict(width=4, color='rgba(0, 0,255, 0.7)'),\n", + " marker=dict(symbol='circle', size=10)))\n", + "\n", + "fig.add_trace(go.Scatter(x=non_reporting_counts.index, y=non_reporting_counts.values,\n", + " mode='lines+markers', name=\"Didn't Report\",\n", + " line=dict(width=4, color='rgba(255, 0, 0, 0.7)'),\n", + " marker=dict(symbol='circle', size=10)))\n", + "\n", + "fig.add_trace( go.Scatter( x=[2018.5, 2019.5, 2019.5, 2018.5, 2018.5], \n", + " y=[0, 0, np.max(reporting_counts)*1.2, np.max(reporting_counts)*1.2, 0 ], \n", + " fill='toself', mode='lines', name='Covid Data Disruption' )\n", + ")\n", + "\n", + "# Update layout\n", + "fig.update_layout(title=\"No Covid: Count of Buildings That Did/Didn't Report Emissions by Year\",\n", + " xaxis_title='Year of Emissions
(One year before data is reported)',\n", + " yaxis_title='Count of Buildings',\n", + " legend_title='Category')\n", + "\n", + "\n", + "\n", + "out, reduce_memory = show_fig(fig, reduce_memory)\n", + "\n", + "if reduce_memory:\n", + " with open(os.path.join(fig_dir,'reporting_counts_over_time_NO_COVID.png'), \"wb\") as f:\n", + " f.write(out)\n", + " out = Image(out)\n", + "out\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "linear_prob_model(df_no_covid, regressors, target_var)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Results: It still seems like the isn't really a relationship, although GHG intensity seems to be associated with a tiny increase in non-reporting. The magnitude is very small and the likelihood this is from chance is high. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "value_mapping = {1: \"Didn't Report\", 0: \"Reported\"}\n", + "fig = create_scatter_with_means(\n", + " df=df_no_covid,\n", + " x='non_reporting',\n", + " y='Chng_GHGIntensity_LastYear',\n", + " hover_data=['PropertyName', 'DataYear', 'non_reporting', 'GHGIntensity_LastYear', 'Chng_GHGIntensity_LastYear', 'GrossFloorArea_Mil'],\n", + " value_mapping=value_mapping,\n", + " base_fontsize=12,\n", + " x_label='Reported This Year?',\n", + " y_label='Change In GHG Intensity',\n", + " title='No Covid: Change In GHG Intensity (In Last 2 Years)
By Reporting Compliance This Year'\n", + ")\n", + "\n", + "\n", + "out, reduce_memory = show_fig(fig, reduce_memory)\n", + "\n", + "\n", + "if reduce_memory:\n", + " with open(os.path.join(fig_dir,'change_GHG_trend_by_compliance_NO_COVID.png'), \"wb\") as f:\n", + " f.write(out)\n", + " out = Image(out)\n", + "out" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "value_mapping = {1: \"Didn't Report\", 0: \"Reported\"}\n", + "fig = create_scatter_with_means(\n", + " df=df_no_covid,\n", + " x='non_reporting',\n", + " y='GHGIntensity_LastYear',\n", + " hover_data=['PropertyName', 'DataYear', 'non_reporting', 'GHGIntensity_LastYear', 'Chng_GHGIntensity_LastYear', 'GrossFloorArea_Mil'],\n", + " value_mapping=value_mapping,\n", + " base_fontsize=12,\n", + " x_label='Reported This Year?',\n", + " y_label='GHG Intensity',\n", + " title='No Covid: GHG Intensity Last Year
By Reporting Compliance This Year'\n", + ")\n", + "\n", + "out, reduce_memory = show_fig(fig, reduce_memory)\n", + "out \n", + "\n", + "if reduce_memory:\n", + " with open(os.path.join(fig_dir,'GHG_last_year_by_compliance_NO_COVID.png'), \"wb\") as f:\n", + " f.write(out)\n", + " out = Image(out)\n", + "out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Drop outliers " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_obs_original = df.shape[0]\n", + "\n", + "# Identify buildings with any row having GHGIntensity > 100\n", + "buildings_with_outliers = df.loc[df['GHGIntensity'] > 100, 'ID'].unique()\n", + "\n", + "# Drop all rows for those buildings\n", + "df_no_outliers = df[~df['ID'].isin(buildings_with_outliers)]\n", + "\n", + "print(f\"Dropped {n_obs_original - len(df_no_outliers)} observations out of {n_obs_original} ({round(100*(n_obs_original - len(df_no_outliers))/n_obs_original, 2)}%) by dropping all rows for buildings with any GHGIntensity over 100\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "linear_prob_model(df_no_outliers, regressors, target_var)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "value_mapping = {1: \"Didn't Report\", 0: \"Reported\"}\n", + "fig = create_scatter_with_means(\n", + " df=df_no_outliers,\n", + " x='non_reporting',\n", + " y='Chng_GHGIntensity_LastYear',\n", + " hover_data=['PropertyName', 'DataYear', 'non_reporting', 'GHGIntensity_LastYear', 'Chng_GHGIntensity_LastYear', 'GrossFloorArea_Mil'],\n", + " value_mapping=value_mapping,\n", + " base_fontsize=12,\n", + " x_label='Reported This Year?',\n", + " y_label='Change In GHG Intensity',\n", + " title='No Outliers: Change In GHG Intensity (In Last 2 Years)
By Reporting Compliance This Year'\n", + ")\n", + "\n", + "out, reduce_memory = show_fig(fig, reduce_memory)\n", + "out \n", + "\n", + "if reduce_memory:\n", + " with open(os.path.join(fig_dir,'change_GHG_trend_by_compliance_NO_OUTLIERS.png'), \"wb\") as f:\n", + " f.write(out)\n", + " out = Image(out)\n", + "out\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example usage:\n", + "\n", + "value_mapping = {1: \"Didn't Report\", 0: \"Reported\"}\n", + "fig = create_scatter_with_means(\n", + " df=df_no_outliers,\n", + " x='non_reporting',\n", + " y='GHGIntensity_LastYear',\n", + " hover_data=['PropertyName', 'DataYear', 'non_reporting', 'GHGIntensity_LastYear', 'Chng_GHGIntensity_LastYear', 'GrossFloorArea_Mil'],\n", + " value_mapping=value_mapping,\n", + " base_fontsize=12,\n", + " x_label='Reported This Year?',\n", + " y_label='GHG Intensity',\n", + " title='No Outliers: GHG Intensity Last Year
By Reporting Compliance This Year'\n", + ")\n", + "\n", + "out, reduce_memory = show_fig(fig, reduce_memory)\n", + "out \n", + "\n", + " \n", + "if reduce_memory:\n", + " with open(os.path.join(fig_dir,'GHG_last_year_by_compliance_NO_OUTLIERS.png'), \"wb\") as f:\n", + " f.write(out)\n", + " out = Image(out)\n", + "out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results: Dropping outliers\n", + "\n", + "Again, no significant result. The only thing we see from dropping the outliers is that now it seems that having a higher GHG intensity \n", + "seems to be associated WITH reporting, which is the opposite of our hypothesis (reporting buildings have higher mean and median GHG intenstity). Again,\n", + "the magnitude is very small so we can likely ignore this. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Some extra graphs comparing the two variables: Change in GHG Intensity and GHG Intensity last year" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "def customizable_scatterplot(df, x_var, y_var, color_var, x_limits=None, y_limits=None, title=None):\n", + " \"\"\"\n", + " Create a customizable scatterplot.\n", + "\n", + " Parameters:\n", + " - df: DataFrame containing the data.\n", + " - x_var: Column name for the x-axis.\n", + " - y_var: Column name for the y-axis.\n", + " - color_var: Column name for the binary variable to set discrete coloring.\n", + " - x_limits: Tuple (min, max) to set x-axis limits.\n", + " - y_limits: Tuple (min, max) to set y-axis limits.\n", + " - title: Title of the plot (default: None).\n", + " \"\"\"\n", + " # Ensure the color_var is treated as a categorical variable for discrete coloring\n", + " df[color_var] = df[color_var].astype(str)\n", + "\n", + " # Create scatterplot\n", + " fig = px.scatter(\n", + " df,\n", + " x=x_var,\n", + " y=y_var,\n", + " color=color_var,\n", + " color_discrete_sequence=[\"blue\", \"orange\"], # Discrete colors for binary 0 and 1\n", + " opacity=0.2, # Add transparency here\n", + " labels={x_var: x_var, y_var: y_var, color_var: color_var},\n", + " title=title or f'Scatterplot of {y_var} vs {x_var}'\n", + " )\n", + " \n", + " # Set axis limits if provided\n", + " if x_limits:\n", + " fig.update_xaxes(range=x_limits)\n", + " if y_limits:\n", + " fig.update_yaxes(range=y_limits)\n", + " \n", + " # Update layout for better aesthetics\n", + " fig.update_layout(\n", + " height=600,\n", + " width=800,\n", + " legend_title=color_var\n", + " )\n", + " \n", + "\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = customizable_scatterplot(df, 'GHGIntensity_LastYear', 'Chng_GHGIntensity_LastYear', 'non_reporting' , \n", + " x_limits =[0,40], y_limits=[-10,10], title=\"Scatterplot of 'Change in GHG Intensity' vs 'GHG Intensity Last Year'\")\n", + "\n", + "out, reduce_memory = show_fig(fig, reduce_memory)\n", + "\n", + "if reduce_memory:\n", + " with open(os.path.join(fig_dir,'scatterplot_of_GHG_last_year_by_GHG_trend.png'), \"wb\") as f:\n", + " f.write(out)\n", + " out = Image(out)\n", + "out" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "my-venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/data/analysis/output/compliance_analysis/GHG_last_year_by_compliance.html b/src/data/analysis/output/compliance_analysis/GHG_last_year_by_compliance.html new file mode 100644 index 00000000..5e9da90f --- /dev/null +++ b/src/data/analysis/output/compliance_analysis/GHG_last_year_by_compliance.html @@ -0,0 +1,7 @@ + + + +
+
+ + \ No newline at end of file diff --git a/src/data/analysis/output/compliance_analysis/GHG_last_year_by_compliance_NO_COVID.png b/src/data/analysis/output/compliance_analysis/GHG_last_year_by_compliance_NO_COVID.png new file mode 100644 index 00000000..1850d98c Binary files /dev/null and b/src/data/analysis/output/compliance_analysis/GHG_last_year_by_compliance_NO_COVID.png differ diff --git a/src/data/analysis/output/compliance_analysis/GHG_last_year_by_compliance_NO_OUTLIERS.png b/src/data/analysis/output/compliance_analysis/GHG_last_year_by_compliance_NO_OUTLIERS.png new file mode 100644 index 00000000..208281fe Binary files /dev/null and b/src/data/analysis/output/compliance_analysis/GHG_last_year_by_compliance_NO_OUTLIERS.png differ diff --git a/src/data/analysis/output/compliance_analysis/change_GHG_trend_by_compliance.html b/src/data/analysis/output/compliance_analysis/change_GHG_trend_by_compliance.html new file mode 100644 index 00000000..62d87c79 --- /dev/null +++ b/src/data/analysis/output/compliance_analysis/change_GHG_trend_by_compliance.html @@ -0,0 +1,7 @@ + + + +
+
+ + \ No newline at end of file diff --git a/src/data/analysis/output/compliance_analysis/change_GHG_trend_by_compliance_NO_COVID.png b/src/data/analysis/output/compliance_analysis/change_GHG_trend_by_compliance_NO_COVID.png new file mode 100644 index 00000000..7b5bed80 Binary files /dev/null and b/src/data/analysis/output/compliance_analysis/change_GHG_trend_by_compliance_NO_COVID.png differ diff --git a/src/data/analysis/output/compliance_analysis/change_GHG_trend_by_compliance_NO_OUTLIERS.png b/src/data/analysis/output/compliance_analysis/change_GHG_trend_by_compliance_NO_OUTLIERS.png new file mode 100644 index 00000000..2a809266 Binary files /dev/null and b/src/data/analysis/output/compliance_analysis/change_GHG_trend_by_compliance_NO_OUTLIERS.png differ diff --git a/src/data/analysis/output/compliance_analysis/distribution_of_GHG_intensity.html b/src/data/analysis/output/compliance_analysis/distribution_of_GHG_intensity.html new file mode 100644 index 00000000..10cb4dee --- /dev/null +++ b/src/data/analysis/output/compliance_analysis/distribution_of_GHG_intensity.html @@ -0,0 +1,7 @@ + + + +
+
+ + \ No newline at end of file diff --git a/src/data/analysis/output/compliance_analysis/regression_results_w_covid.json b/src/data/analysis/output/compliance_analysis/regression_results_w_covid.json new file mode 100644 index 00000000..2951a75b --- /dev/null +++ b/src/data/analysis/output/compliance_analysis/regression_results_w_covid.json @@ -0,0 +1,37 @@ +{ + "dependent_variable": "Non-Reporting", + "number_of_observations": 10588, + "r_squared": 0.0006814442215383743, + "adj_r_squared": 0.00039819066264434877, + "coefficients": { + "const": 0.12735628464907744, + "GHG Intensity (Last Year)": -6.42240400826807e-08, + "Change in GHG Intensity Last Two Years": 0.00020244263665042202, + "Gross Floor Area (Millions)": -0.017037109384991228 + }, + "p_values": { + "const": 8.632077178330187e-174, + "GHG Intensity (Last Year)": 0.9997894807085226, + "Change in GHG Intensity Last Two Years": 0.4398536725259733, + "Gross Floor Area (Millions)": 0.010176545692003403 + }, + "confidence_intervals": { + "const": [ + 0.11863904983380956, + 0.13607351946434532 + ], + "GHG Intensity (Last Year)": [ + -0.0004771904393046634, + 0.000477061991224498 + ], + "Change in GHG Intensity Last Two Years": [ + -0.0003112678381680631, + 0.0007161531114689071 + ], + "Gross Floor Area (Millions)": [ + -0.030030457208608786, + -0.004043761561373668 + ] + }, + "covariance_type": "nonrobust" +} \ No newline at end of file diff --git a/src/data/analysis/output/compliance_analysis/reporting_counts_over_time.html b/src/data/analysis/output/compliance_analysis/reporting_counts_over_time.html new file mode 100644 index 00000000..d06ed83c --- /dev/null +++ b/src/data/analysis/output/compliance_analysis/reporting_counts_over_time.html @@ -0,0 +1,7 @@ + + + +
+
+ + \ No newline at end of file diff --git a/src/data/analysis/output/compliance_analysis/reporting_counts_over_time_NO_COVID.png b/src/data/analysis/output/compliance_analysis/reporting_counts_over_time_NO_COVID.png new file mode 100644 index 00000000..3a32c2d5 Binary files /dev/null and b/src/data/analysis/output/compliance_analysis/reporting_counts_over_time_NO_COVID.png differ diff --git a/src/data/analysis/output/compliance_analysis/scatterplot_of_GHG_last_year_by_GHG_trend.png b/src/data/analysis/output/compliance_analysis/scatterplot_of_GHG_last_year_by_GHG_trend.png new file mode 100644 index 00000000..1ebd24d5 Binary files /dev/null and b/src/data/analysis/output/compliance_analysis/scatterplot_of_GHG_last_year_by_GHG_trend.png differ diff --git a/src/data/requirements.txt b/src/data/requirements.txt index 99bd7570..7c0162dc 100644 --- a/src/data/requirements.txt +++ b/src/data/requirements.txt @@ -2,3 +2,57 @@ python-slugify==4.0.1 pandas==2.1.2 numpy pytest==7.4.4 + +# Packages for Jupyter notebook data analysis, creating figures, running regressions +appnope==0.1.4 +asttokens==3.0.0 +attrs==24.3.0 +comm==0.2.2 +debugpy==1.8.12 +decorator==5.1.1 +executing==2.1.0 +fastjsonschema==2.21.1 +iniconfig==2.0.0 +ipykernel==6.29.5 +ipython>=7.0.0,<8.0.0 # Fixed to work with python 3.9 +jedi==0.19.2 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +jupyter_client==8.6.3 +jupyter_core==5.7.2 +kaleido==0.1.0 +matplotlib-inline==0.1.7 +nbformat==5.10.4 +nest-asyncio==1.6.0 +packaging==24.2 +parso==0.8.4 +patsy==1.0.1 +pexpect==4.9.0 +platformdirs==4.3.6 +plotly==5.3.1 +pluggy==1.5.0 +prompt_toolkit==3.0.50 +psutil==6.1.1 +ptyprocess==0.7.0 +pure_eval==0.2.3 +Pygments==2.19.1 +python-dateutil==2.9.0.post0 +pytz==2024.2 +pyzmq==26.2.0 +referencing==0.36.1 +rpds-py==0.22.3 +scipy==1.15.1 +six==1.17.0 +stack-data==0.6.3 +statsmodels==0.14.4 +tenacity==9.0.0 +text-unidecode==1.3 +tornado==6.4.2 +traitlets==5.14.3 +typing_extensions==4.12.2 +tzdata==2025.1 +wcwidth==0.2.13 +notebook==7.3.2 +jupyterlab==4.3.4 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.27.3 diff --git a/src/pages/About.vue b/src/pages/About.vue index 0e311904..7abeec21 100644 --- a/src/pages/About.vue +++ b/src/pages/About.vue @@ -24,10 +24,14 @@ export default class About extends Vue {

About Us

- Electrify Chicago is an independent open-source project looking to shed - light onto one of the biggest sources of Chicago's CO2 + Electrify Chicago is an independent open-source project based out of + Chi Hack Night + looking to shed light onto one of the biggest sources of Chicago's + CO2 emissions - buildings. By providing more information about some of the - city's largest and most polluting buildings, we hope t encourage these + city's largest and most polluting buildings, we hope to encourage these buildings to electrify, particularly by mobilizing people related to the building - whether that be students and faculty for a college building or employees and patients at a hospital. diff --git a/src/pages/Blog.vue b/src/pages/Blog.vue index 94b67e8c..70d2c1a4 100644 --- a/src/pages/Blog.vue +++ b/src/pages/Blog.vue @@ -23,7 +23,30 @@ export default class About extends Vue {

Electrify Chicago Blog

-