-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_data_analysis.py
176 lines (127 loc) · 5.8 KB
/
test_data_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# Author: Miguel Ibarra 【=◈︿◈=】
import pandas as pd
import numpy as np
import pytest
import os
import data_analysis_tool
def test_calculate_mean_age():
df = pd.DataFrame({'age': [25, 30, 40, 50, 35, 45, 55]})
mean_age = np.mean(df['age'])
assert data_analysis_tool.calculate_mean_age(df) == pytest.approx(mean_age)
def test_count_males_and_females():
df = pd.DataFrame({'gender': ['Male', 'Female', 'Male', 'Female', 'Female', 'Male', 'Female']})
male_count = df[df['gender'] == 'Male'].shape[0]
female_count = df[df['gender'] == 'Female'].shape[0]
assert data_analysis_tool.count_males_and_females(df) == (male_count, female_count)
def test_calculate_standard_deviation():
df = pd.DataFrame({'age': [25, 30, 40, 50, 35, 45, 55]})
std_age = np.std(df['age'])
assert data_analysis_tool.calculate_standard_deviation(df) == pytest.approx(std_age)
def test_calculate_percentage_of_males_and_females():
df = pd.DataFrame({'gender': ['Male', 'Female', 'Male', 'Female', 'Female', 'Male', 'Female']})
male_percentage = df[df['gender'] == 'Male'].shape[0] / df.shape[0]
female_percentage = df[df['gender'] == 'Female'].shape[0] / df.shape[0]
assert data_analysis_tool.calculate_percentage_of_males_and_females(df) == (male_percentage, female_percentage)
def test_plot_age_distribution():
# Create a temporary file for the plot
temp_file = 'age_distribution.png'
df = pd.DataFrame({'age': [25, 30, 40, 50, 35, 45, 55]})
data_analysis_tool.plot_age_distribution(df, temp_file)
# Check if the file was created
assert os.path.isfile(temp_file)
# Clean up the temporary file
os.remove(temp_file)
def test_add_user_input():
df = pd.DataFrame({'id': [1, 2, 3], 'name': ['John', 'Jane', 'Peter'], 'age': [25, 30, 40],
'gender': ['Male', 'Female', 'Male'], 'country': ['USA', 'UK', 'Canada']})
new_row = (4, 'Mary', 35, 'Female', 'USA')
expected_df = pd.DataFrame({'id': [1, 2, 3, 4], 'name': ['John', 'Jane', 'Peter', 'Mary'], 'age': [25, 30, 40, 35],
'gender': ['Male', 'Female', 'Male', 'Female'], 'country': ['USA', 'UK', 'Canada', 'USA']})
df = data_analysis_tool.add_user_input(df, new_row)
pd.testing.assert_frame_equal(df, expected_df)
def test_save_results():
df = pd.DataFrame({'id': [1, 2, 3], 'name': ['John', 'Jane', 'Peter'], 'age': [25, 30, 40],
'gender': ['Male', 'Female', 'Male'], 'country': ['USA', 'UK', 'Canada']})
file_name = 'results.csv'
data_analysis_tool.save_results(df, file_name)
# Check if the file was created
assert os.path.isfile(file_name)
# Read the saved file and compare its content with the original DataFrame
saved_df = pd.read_csv(file_name)
pd.testing.assert_frame_equal(df, saved_df)
# Clean up the saved file
os.remove(file_name)
def test_plot_boxplot_age_by_gender():
# Create a temporary file for the plot
temp_file = 'boxplot_age_by_gender.png'
df = pd.DataFrame({
'age': [25, 30, 40, 50, 35, 45, 55],
'gender': ['Male', 'Female', 'Male', 'Female', 'Female', 'Male', 'Female']
})
data_analysis_tool.plot_boxplot_age_by_gender(df, temp_file)
# Check if the file was created
assert os.path.isfile(temp_file)
# Clean up the temporary file
os.remove(temp_file)
def test_plot_violin_plot_age_by_country():
# Create a temporary file for the plot
temp_file = 'violin_plot_age_by_country.png'
df = pd.DataFrame({
'age': [25, 30, 40, 50, 35, 45, 55],
'country': ['United States', 'United Kingdom', 'Canada', 'Australia', 'United States', 'Canada', 'Australia']
})
data_analysis_tool.plot_violin_plot_age_by_country(df, temp_file)
# Check if the file was created
assert os.path.isfile(temp_file)
# Clean up the temporary file
os.remove(temp_file)
def test_plot_income_by_education_level():
# Create a temporary file for the plot
temp_file = 'barplot_income_by_education.png'
df = pd.DataFrame({
'education': ['Bachelor\'s Degree', 'Master\'s Degree', 'Bachelor\'s Degree', 'Associate Degree'],
'income': [50000, 60000, 45000, 55000]
})
data_analysis_tool.plot_income_by_education_level(df, temp_file)
# Check if the file was created
assert os.path.isfile(temp_file)
# Clean up the temporary file
os.remove(temp_file)
def test_plot_income_by_occupation():
# Create a temporary file for the plot
temp_file = 'barplot_income_by_occupation.png'
df = pd.DataFrame({
'occupation': ['Engineer', 'Manager', 'Doctor', 'Teacher'],
'income': [50000, 60000, 70000, 55000]
})
data_analysis_tool.plot_income_by_occupation(df, temp_file)
# Check if the file was created
assert os.path.isfile(temp_file)
# Clean up the temporary file
os.remove(temp_file)
def test_plot_income_by_marital_status():
# Create a temporary file for the plot
temp_file = 'barplot_income_by_marital_status.png'
df = pd.DataFrame({
'marital_status': ['Single', 'Married', 'Married', 'Single'],
'income': [50000, 60000, 70000, 55000]
})
data_analysis_tool.plot_income_by_marital_status(df, temp_file)
# Check if the file was created
assert os.path.isfile(temp_file)
# Clean up the temporary file
os.remove(temp_file)
def test_plot_income_by_number_of_children():
# Create a temporary file for the plot
temp_file = 'barplot_income_by_children.png'
df = pd.DataFrame({
'children': [0, 2, 1, 3],
'income': [50000, 60000, 70000, 55000]
})
data_analysis_tool.plot_income_by_number_of_children(df, temp_file)
# Check if the file was created
assert os.path.isfile(temp_file)
# Clean up the temporary file
os.remove(temp_file)
if __name__ == '__main__':
pytest.main()