In [163]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from geopy.distance import geodesic
import re
from matplotlib.colors import LinearSegmentedColormap

import numpy as np
from shapely.geometry import Point, shape
from shapely.ops import unary_union
from shapely.prepared import prep

import json
In [164]:
# Read all tables from the Wikipedia page
url = "https://en.wikipedia.org/wiki/List_of_national_parks_of_Australia"
tables = pd.read_html(url)

names = []
lats = []
longs = []

def extract_coordinates(coord_str):
    if pd.isna(coord_str):
        return None, None
    
    # Extract latitude and longitude using regex
    pattern = r'(-?\d+\.?\d*°?S).*?(-?\d+\.?\d*°?E)'
    match = re.search(pattern, coord_str)
    
    if match:
        lat_str, long_str = match.groups()
        
        # Convert to decimal degrees
        lat = float(lat_str.replace('°S', '').replace('°', '')) * -1  # Southern hemisphere is negative
        long = float(long_str.replace('°E', '').replace('°', ''))
        
        return lat, long
    return None, None

# Process each table
for table in tables:
    if 'Name' in table.columns and 'Coordinates' in table.columns:
        for _, row in table.iterrows():
            name = row['Name']
            if isinstance(name, str) and 'National Park' in name:
                # Clean the name by removing links and citations
                clean_name = re.sub(r'\[.*?\]', '', name)
                
                # Extract coordinates
                lat, long = extract_coordinates(row['Coordinates'])
                
                if lat is not None and long is not None:
                    names.append(clean_name)
                    lats.append(lat)
                    longs.append(long)

# Create the DataFrame
national_parks = pd.DataFrame({
    'name': names,
    'latitude': lats,
    'longitude': longs
})

# Display the first few rows
print(national_parks.head())
                              name   latitude   longitude
0            Namadgi National Park -35.666667  148.950000
1  Abercrombie River National Park -34.093900  149.708000
2            Arakwal National Park -28.660300  153.621000
3         Bago Bluff National Park -31.532200  152.629000
4          Bald Rock National Park -28.852500  152.055556
In [171]:
# Optimize distance calculations using vectorization
def calculate_distances(parks_df, lat_grid, lon_grid):
    # Create meshgrid of latitude and longitude
    lat_mesh, lon_mesh = np.meshgrid(lat_grid, lon_grid)
    
    # Reshape park coordinates for broadcasting
    park_lats = parks_df['latitude'].values[:, np.newaxis, np.newaxis]
    park_lons = parks_df['longitude'].values[:, np.newaxis, np.newaxis]
    
    # Calculate distances using haversine formula
    R = 6371  # Earth's radius in kilometers
    
    lat1, lat2 = np.radians(lat_mesh), np.radians(park_lats)
    lon1, lon2 = np.radians(lon_mesh), np.radians(park_lons)
    
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    distances = R * c
    
    # Get minimum distance for each point
    min_distances = np.min(distances, axis=0)
    
    return min_distances.T  # Transpose to match original orientation

# Calculate grid points (can save these)
latitudes = np.linspace(-45, -5, 400)
longitudes = np.linspace(110, 160, 400)
distances = calculate_distances(national_parks, latitudes, longitudes)

# Save the calculated distances
np.save('australia_distances.npy', distances)
In [172]:
def get_australia_geojson():
    with open('australia-with-states_782.geojson') as f:
        geojson = json.load(f)
    
    # Extract all state geometries with a small buffer
    geometries = []
    for feature in geojson['features']:
        state = shape(feature['geometry'])
        # Add a very small buffer to handle topology issues
        state = state.buffer(0.0001)
        geometries.append(state)
    
    # Combine all states
    australia = unary_union(geometries)
    
    return australia
In [173]:
australia = get_australia_geojson()

australia_shape = shape(australia)
australia_shape.boundary
Out[173]:
No description has been provided for this image
In [174]:
# Calculate and save the mask 
def create_australia_mask(latitudes, longitudes, australia_shape):
    lat_mesh, lon_mesh = np.meshgrid(latitudes, longitudes)
    mask = np.zeros_like(distances, dtype=bool)
    
    # Prepare the geometry for faster contains checking
    prepared_shape = prep(australia_shape)
    
    # Vectorize the point creation and checking
    points = [Point(lon, lat) for lon, lat in zip(lon_mesh.flatten(), lat_mesh.flatten())]
    mask_flat = [prepared_shape.contains(point) for point in points]
    mask = np.array(mask_flat).reshape(lon_mesh.shape).T
    
    # Save the mask
    np.save('australia_mask.npy', mask)
    return mask

# Create and save mask (run once)
mask = create_australia_mask(latitudes, longitudes, australia_shape)

# For subsequent runs, just load the mask
mask = np.load('australia_mask.npy')
masked_distances = np.ma.masked_array(distances, ~mask)
In [178]:
distances_miles = masked_distances * 0.621371
masked_distances_miles = np.ma.masked_array(distances_miles, ~mask)
In [197]:
colors = ['#444198', '#2e7eb0','#68c294', '#bbe68f', '#e9f793', '#fdec90', '#f7a958', '#f25b35', '#c7263c']
n_bins = 256
custom_cmap = LinearSegmentedColormap.from_list('custom', colors, N=n_bins)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 12))

# Plot the heatmap with masked data
im = ax.imshow(masked_distances, extent=(110, 160, -45, -5), origin='lower', 
               cmap=custom_cmap)

# Plot Australia's outline
boundary = australia_shape.boundary
for line in boundary.geoms:
    x, y = line.xy
    ax.plot(x, y, color='black', linewidth=2)

cbar = plt.colorbar(im, 
                   label='Distance to Nearest National Park (km)',
                   location='right',
                   shrink=0.5,  # Makes it shorter (0-1)
                   aspect=20,   # Makes it thinner (higher number = thinner)
                   pad=0.02)    # Adjusts spacing between colorbar and map


# Set the plot limits to focus on Australia
ax.set_axis_off()


# ax.set_title('Distance to Nearest National Park in Australia')
plt.savefig('australia_national_parks_heatmap.png', dpi=300)
plt.show()
No description has been provided for this image
In [196]:
colors = ['#444198', '#2e7eb0','#68c294', '#bbe68f', '#e9f793', '#fdec90', '#f7a958', '#f25b35', '#c7263c']
n_bins = 256
custom_cmap = LinearSegmentedColormap.from_list('custom', colors, N=n_bins)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 12))

# Plot the heatmap with masked data
im = ax.imshow(masked_distances_miles, extent=(110, 160, -45, -5), origin='lower', 
               cmap=custom_cmap, vmin=0, vmax=400)

# Plot Australia's outline
boundary = australia_shape.boundary
for line in boundary.geoms:
    x, y = line.xy
    ax.plot(x, y, color='black', linewidth=2)

cbar = plt.colorbar(im, 
                   label='Distance to Nearest National Park (miles)',
                   location='right',
                   shrink=0.5,  # Makes it shorter (0-1)
                   aspect=20,   # Makes it thinner (higher number = thinner)
                   pad=0.02)    # Adjusts spacing between colorbar and map


# Set the plot limits to focus on Australia
ax.set_axis_off()


# ax.set_title('Distance to Nearest National Park in Australia')
plt.savefig('australia_national_parks_heatmap_miles.png', dpi=300)
plt.show()
No description has been provided for this image