Skip to content

Commit

Permalink
fix: remove unused Haversine distance function
Browse files Browse the repository at this point in the history
- it used the lat-lng attributes which are no longer saved by default
- update test data
  • Loading branch information
cbueth committed Dec 4, 2024
1 parent 8c85c51 commit 17cf648
Show file tree
Hide file tree
Showing 12 changed files with 1,035,471 additions and 1,078,414 deletions.
4 changes: 2 additions & 2 deletions superblockify/cities.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ place_lists:
- 1572779
pop_GHSL2023: 328740.3926395578
Liechtenstein:
query: Liechtenstein, Europe
query: Liechtenstein
country: LI
region: EU
nominatim link:
- https://nominatim.openstreetmap.org/ui/search.html?q=Liechtenstein,+Europe
- https://nominatim.openstreetmap.org/ui/search.html?q=Liechtenstein
OSM relation:
- https://www.openstreetmap.org/relation/1155955
osm_id:
Expand Down
96 changes: 0 additions & 96 deletions superblockify/metrics/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,102 +236,6 @@ def calculate_euclidean_distance_matrix_projected(
return dist_matrix


def calculate_euclidean_distance_matrix_haversine(
graph, node_order=None, plot_distributions=False
):
"""Calculate the Euclidean distances between all nodes in the graph.
Uses the **Haversine formula** to calculate the distances between all nodes in
the graph. The coordinates are in degrees.
Parameters
----------
graph : networkx.Graph
The graph to calculate the distance matrix for
node_order : list, optional
The order of the nodes in the distance matrix. If None, the ordering is
produced by graph.nodes().
plot_distributions : bool, optional
If True, plot the distributions of the Euclidean distances and coordinates.
Sanity check for the coordinate values.
Returns
-------
dist_matrix : ndarray
The distance matrix for the partitioning. dist_matrix[i, j] is the Euclidean
distance between node i and node j.
Raises
------
ValueError
If coordinates are not numeric or not in the range [-90, 90] for latitude
and [-180, 180] for longitude.
"""

if node_order is None:
node_order = list(graph.nodes())

start_time = time()

# Calculate the Euclidean distances between all nodes
# Do vectorized calculation for all nodes
lat = np.array([graph.nodes[node]["lat"] for node in node_order])
lon = np.array([graph.nodes[node]["lon"] for node in node_order])

# Check that all values are float or int and proper lat/lon values
if not np.issubdtype(lat.dtype, np.number) or not np.issubdtype(
lon.dtype, np.number
):
raise ValueError("Latitude and longitude values must be numeric.")
if np.any(lat > 90) or np.any(lat < -90):
raise ValueError("Latitude values are not in the range [-90, 90].")
if np.any(lon > 180) or np.any(lon < -180):
raise ValueError("Longitude values are not in the range [-180, 180].")

node1_lat = np.expand_dims(lat, axis=0)
node1_lon = np.expand_dims(lon, axis=0)
node2_lat = np.expand_dims(lat, axis=1)
node2_lon = np.expand_dims(lon, axis=1)

# Calculate haversine distance,
# see https://en.wikipedia.org/wiki/Haversine_formula
# and https://github.com/mapado/haversine/blob/master/haversine/haversine.py
lat = node2_lat - node1_lat
lon = node2_lon - node1_lon
hav = (
np.sin(lat / 2) ** 2
+ np.cos(node1_lat) * np.cos(node2_lat) * np.sin(lon / 2) ** 2
)
dist_matrix = 2 * _AVG_EARTH_RADIUS_M * np.arcsin(np.sqrt(hav))
logger.debug(
"Euclidean distances for graph with %s nodes and %s edges "
"calculated in %s. "
"Min/max lat/lon values: %s, %s, %s, %s; Difference: %s, %s",
graph.number_of_nodes(),
graph.number_of_edges(),
timedelta(seconds=time() - start_time),
np.min(node1_lat),
np.max(node1_lat),
np.min(node1_lon),
np.max(node1_lon),
np.max(node1_lat) - np.min(node1_lat),
np.max(node1_lon) - np.min(node1_lon),
)

if plot_distributions:
# Plot distribution of distances and scatter plot of lat/lon
plot_distance_distributions(
dist_matrix,
dist_title="Distribution of Euclidean distances",
coords=(node1_lon, node1_lat),
coord_title="Scatter plot of unprojected coordinates",
labels=("Longitude [°]", "Latitude [°]"),
)

return dist_matrix


def calculate_partitioning_distance_matrix(
partitioner,
weight=None,
Expand Down
5 changes: 4 additions & 1 deletion superblockify/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,10 @@ def load_graphml_dtypes(filepath=None, attribute_label=None, attribute_dtype=Non
The graph.
"""

node_dtypes = {}
node_dtypes = {
"y": float,
"x": float,
}
edge_dtypes = {
"bearing": float,
"length": float,
Expand Down
43 changes: 0 additions & 43 deletions tests/metrics/test_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from superblockify.metrics.distances import (
calculate_path_distance_matrix,
calculate_euclidean_distance_matrix_projected,
calculate_euclidean_distance_matrix_haversine,
calculate_partitioning_distance_matrix,
)

Expand Down Expand Up @@ -91,48 +90,6 @@ def test_calculate_euclidean_distance_matrix_projected_unprojected_graph(
calculate_euclidean_distance_matrix_projected(graph)


def test_calculate_euclidean_distance_matrix_haversine(test_city_small_copy):
"""Test calculating all pairwise Euclidean distances for the full graphs.
Haversine."""
_, graph = test_city_small_copy
calculate_euclidean_distance_matrix_haversine(graph, plot_distributions=True)
# With node ordering
calculate_euclidean_distance_matrix_haversine(
graph, node_order=list(graph.nodes), plot_distributions=True
)
plt.close("all")


@pytest.mark.parametrize(
"key,value",
[
("lat", None),
("lon", None),
("lat", "a"),
("lon", "a"),
("lat", -90.1),
("lon", -180.1),
("lat", 90.1),
("lon", 180.1),
("lat", inf),
("lon", inf),
("lat", -inf),
("lon", -inf),
],
)
def test_calculate_euclidean_distance_matrix_haversine_faulty_coords(
test_city_small_copy, key, value
):
"""Test calculating all pairwise Euclidean distances for the full graphs
with missing coordinates. Haversine.
"""
_, graph = test_city_small_copy
# Change key attribute of first node
graph.nodes[list(graph.nodes)[0]][key] = value
with pytest.raises(ValueError):
calculate_euclidean_distance_matrix_haversine(graph)


@pytest.mark.parametrize("predefined_node_order", [True, False])
@pytest.mark.parametrize("max_mem_factor", [0.5, 0.0])
def test_calculate_partitioning_distance_matrix(
Expand Down
Loading

0 comments on commit 17cf648

Please sign in to comment.