Source code for RES.boundaries

# Import Global Packages
from pathlib import Path
import geopandas as gpd
import pygadm
from dataclasses import dataclass
import inspect

# Import local packages
from RES.AttributesParser import AttributesParser
import RES.utility as utils
PRINT_LEVEL_BASE=4
[docs] @dataclass class GADMBoundaries(AttributesParser): """ GADM (Global Administrative Areas) boundary processor for regional analysis. This class handles the retrieval, processing, and management of administrative boundaries from the GADM dataset. It provides functionality to extract specific regional boundaries at administrative level 2 (typically states/provinces/districts) for renewable energy resource assessment areas. INHERITED METHODS FROM AttributesParser: ---------------------------------------- - get_gadm_config() -> Dict[str, dict]: Get GADM configuration from config file - get_default_crs() -> str: Get default coordinate reference system ('EPSG:4326') - get_country() -> str: Get country name from config file - get_region_name() -> str: Get region name from config file using region_short_code - get_region_mapping() -> Dict[str, dict]: Get region mapping dictionary - is_region_code_valid() -> bool: Validate region short code - load_config() -> Dict[str, dict]: Load YAML configuration file - get_excluder_crs() -> int: Get recommended CRS for excluder operations - get_vis_dir() -> Path: Get visualization directory path - region_code_validity (property): Boolean property for region code validation - Plus other utility methods for config access OWN METHODS DEFINED IN THIS CLASS: ---------------------------------- - get_country_boundary(country=None, force_update=False): Download and process complete country GADM boundaries - get_region_boundary(region_name=None, force_update=False): Extract and process specific regional boundary - get_bounding_box(): Generate minimum bounding rectangle for region - show_regions(basemap='CartoDB positron', save_path='vis/regions', save=False): Create interactive map visualization - run(): Execute complete boundary processing workflow Parameters ---------- config_file_path : str or Path Path to configuration file containing GADM settings region_short_code : str Short code identifying the target region within the country resource_type : str Resource type (passed through from parent workflow) Attributes ---------- admin_level : int GADM administrative level (fixed at 2 for regional districts) gadm_root : Path Root directory for GADM data storage gadm_processed : Path Directory for processed regional boundary files crs : str Coordinate reference system ('EPSG:4326') country : str Country name extracted from configuration region_file : Path Path to processed regional boundary file boundary_datafields : dict Mapping of GADM fields to standardized field names country_file : Path Path to country-level GADM boundary file boundary_country : gpd.GeoDataFrame GeoDataFrame containing country-level boundaries boundary_region : gpd.GeoDataFrame GeoDataFrame containing region-specific boundaries actual_boundary : gpd.GeoDataFrame GeoDataFrame containing the actual regional boundary geometry bounding_box : dict Dictionary containing bounding box coordinates (minx, maxx, miny, maxy) Methods ------- get_country_boundary(country=None, force_update=False) -> gpd.GeoDataFrame Download and process complete country GADM boundaries at administrative level 2 get_region_boundary(region_name=None, force_update=False) -> gpd.GeoDataFrame Extract and process specific regional boundary with standardized field names get_bounding_box() -> tuple Generate minimum bounding rectangle for region and return (bounding_box_dict, boundary_gdf) show_regions(basemap='CartoDB positron', save_path='vis/regions', save=False) -> folium.Map Create interactive folium map visualization of regional boundaries run() -> gpd.GeoDataFrame or None Execute complete boundary processing workflow and return regional boundary GeoDataFrame Examples -------- Extract British Columbia boundaries: >>> from RES.boundaries import GADMBoundaries >>> boundaries = GADMBoundaries( ... config_file_path="config/config_BC.yaml", ... region_short_code="BC", ... resource_type="wind" ... ) >>> bc_boundary = boundaries.get_region_boundary() >>> country_bounds = boundaries.get_country_boundary("Canada") >>> bbox, actual_boundary = boundaries.get_bounding_box() >>> interactive_map = boundaries.show_regions(save=True) >>> result = boundaries.run() # Execute full workflow Notes ----- - Uses pygadm package for GADM data access and download - Automatically handles data caching to avoid repeated downloads - Processes boundaries into GeoJSON format for efficient storage - Standardizes field names for consistent downstream processing - Administrative level 2 chosen to balance spatial resolution with data availability - All geometries maintained in WGS84 (EPSG:4326) for global compatibility - Region validation is performed using inherited region_code_validity property - Interactive maps are created using folium with optional save functionality Dependencies ------------ - pygadm: GADM data access and processing - geopandas: Spatial data manipulation - folium: Interactive map visualization (via geopandas.explore()) - pathlib: Path handling - RES.AttributesParser: Parent class for configuration management - RES.utility: Utility functions for logging and updates Raises ------ ValueError If the country is not found in the GADM dataset or if region code is invalid Exception If there is an error fetching or loading the GADM data """ def __post_init__(self): # Call the parent class __post_init__ to initialize inherited attributes super().__post_init__() self.required_args = { #order doesn't matter "config_file_path" : self.config_file_path, "region_short_code": self.region_short_code, "resource_type": self.resource_type } self.admin_level:int= 2 # hardcoded to keep the workflow intact. The workflow has dependency on Regional District name i.e. level 2 boundaries. # Setup paths and ensure directories exist self.gadm_config = super().get_gadm_config() # INHERITED METHOD from AttributesParser self.gadm_root = Path(self.gadm_config['root']) self.gadm_root.mkdir(parents=True, exist_ok=True) # Creates parent directories if not exists. self.gadm_processed = Path(self.gadm_config['processed']) self.gadm_processed.mkdir(parents=True, exist_ok=True) # Creates parent directories if not exists. self.boundary_datafields = self.gadm_config.get('datafield_mapping') self.crs=super().get_default_crs() # INHERITED METHOD from AttributesParser self.country=super().get_country() # INHERITED METHOD from AttributesParser self.region_file:Path = Path(self.gadm_processed) / f'gadm41_{self.country}_L{self.admin_level}_{self.region_short_code}.geojson' # Define a function to create bounding boxes (of cell) directly from coordinates (x, y) and resolution
[docs] def get_country_boundary(self, country: str = None, force_update: bool = False) -> gpd.GeoDataFrame: """ Retrieves and prepares the GADM boundaries dataset for the specified country (Administrative Level 2). Args: country(str): The name of the country to fetch GADM data for. If None, extracts the country from the user config file. force_update (bool): If True, re-fetch the GADM data even if a local file exists. Returns: gpd.GeoDataFrame: GeoDataFrame of the country's GADM regions in crs '4326' Dependency: Depends on pygadm package to fetch the GADM data. :raises ValueError: If the country is not found in the GADM dataset. :raises Exception: If there is an error fetching or loading the GADM data. """ utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__} | Country Selected: {self.country}.") # store the user input (via method args) if country is not None: self.country = country.capitalize() utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__} | Country Selected: {self.country}.") self.country_file:Path=Path (self.gadm_root) / f'gadm41_{self.country}_L{self.admin_level}.geojson' try: # Load or fetch data if self.country_file.exists() and not force_update: # load the country gdf from local file utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__} | Loading GADM data for {self.country} from local datafile {self.country_file}.") self.boundary_country=gpd.read_file(self.country_file) else: # Fetch and save data if file does not exist or force_update is True utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__} | Fetching GADM data for {self.country} at Administrative Level {self.admin_level}....from source: https://gadm.org/data.html") _country_gdf_:gpd.GeoDataFrame = pygadm.AdmItems(name=self.country, content_level=self.admin_level) _country_gdf_.set_crs(self.crs) self.boundary_country=_country_gdf_ # save to local file self.boundary_country.to_file(self.country_file, driver='GeoJSON') utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__} | GADM data saved to {self.country_file}.") return self.boundary_country except Exception as e: utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__} | Error fetching or loading GADM data: {e}") raise
[docs] def get_region_boundary(self, region_name: str = None, force_update: bool = False) -> gpd.GeoDataFrame: """ Prepares the boundaries for the specified region within the country. The defaults datafields (e.g. NAME_0, NAME_1, NAME_2) gets modified to match the user config file. Args: force_update (bool): To force update the data and formatting. Returns: gpd.GeoDataFrame: GeoDataFrame of the region boundaries. Raises: ValueError: If the region code is invalid or no data is found for the specified region """ if region_name is not None: self.region_short_code = region_name.upper() else: self.region_name =self.get_region_name() # INHERITED METHOD from AttributesParser utils.print_update(level=PRINT_LEVEL_BASE+2, message=f"{__name__}| Region Set to >> Short Code : {self.region_short_code}, Name: {self.region_name}).") utils.print_update(level=PRINT_LEVEL_BASE+2, message=f"{__name__}| Collecting regional boundary...") if self.region_code_validity: # It should be saved in processed because the column names have been modified from source. if self.region_file.exists() and not force_update: # There is a local file and no update required utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__}| Loading GADM boundaries (Sub-provincial | level =2) for {self.region_name} from local file {self.region_file}.") self.boundary_region:gpd.GeoDataFrame=gpd.read_file(self.region_file) else: # When the local file for region doesn't exist, Filter region data from country file and save locally if self.multi_country_flag: utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__} | Processing the Boundaries for Multi-Country Region : {self.country}.") _boundary_country = self.get_country_boundary(self.region_name,force_update) _boundary_region_ = _boundary_country.loc[_boundary_country['NAME_0'] == self.region_name] else: _boundary_country = self.get_country_boundary(self.country,force_update) _boundary_region_ = _boundary_country.loc[self.boundary_country['NAME_1'] == self.region_name] if _boundary_region_.empty : utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__}| No data found for region '{self.region_name}'.") utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__}| | @ LINE | Consider revising '{self.region_name}' to match source (e.g. https://gadm.org/maps.html); Select 'show sub-divisions' to get the list of Supported Regional Names") raise ValueError(f"{__name__} | @ LINE {inspect.currentframe().f_lineno} | No data found for region '{self.region_name}'.") else: _boundary_region_ = _boundary_region_[['NAME_0', 'NAME_1', 'NAME_2', 'geometry']].rename(columns={ 'NAME_0': self.boundary_datafields['NAME_0'], 'NAME_1': self.boundary_datafields['NAME_1'], 'NAME_2': self.boundary_datafields['NAME_2'] }) self.boundary_region:gpd.GeoDataFrame=_boundary_region_ self.boundary_region.to_file(self.region_file, driver='GeoJSON') utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__}| GADM data for {self.region_name} saved to {self.region_file}.") return self.boundary_region else: raise ValueError(f"{__name__} | @ LINE {inspect.currentframe().f_lineno} Invalid region code: {self.region_short_code}.")
[docs] def get_bounding_box(self)->tuple: """ This method loads the region boundary using `get_region_boundary()` method and gets Minimum Bounding Rectangle (MBR). Returns: tuple: A tuple containing the dictionary of bounding box coordinates, and the actual boundary GeoDataFrame for the specified region. Purpose: To be used internally to get the bounding box of the region to set ERA5 cutout boundaries. """ utils.print_update(level=PRINT_LEVEL_BASE+1, message=f"{__name__}| Processing regional bounding box...") self.actual_boundary=self.get_region_boundary() utils.print_update(level=PRINT_LEVEL_BASE+1,message=f"{__name__}| Setting up the Minimum Bounding Region (MBR) for {self.region_short_code}...") min_x, min_y, max_x, max_y=self.actual_boundary.geometry.total_bounds # MBR=box(min_x, min_y, max_x, max_y) self.bounding_box:dict={ 'minx': min_x, 'maxx': max_x, 'miny': min_y, 'maxy': max_y } # plot_info='(Minimum Bounding Rectangle)' # bounding_box_gdf = gpd.GeoDataFrame(geometry=[box(min_x, min_y, max_x, max_y)], crs=region_gadm_regions_gdf.crs) return self.bounding_box,self.actual_boundary
[docs] def show_regions(self, basemap: str = 'CartoDB positron', save_path: str = 'vis/regions', save:bool=False): """ Create and save an interactive map for the specified region. Args: basemap (str): The basemap to use (default is 'CartoDB positron'). save_path (str): The path to save the HTML map. The default is given. save(bool): If the user want's to skip saving as local file. Returns: folium.Map: An interactive map object showing the region boundaries. """ boundary_region = self.get_region_boundary() if boundary_region is not None: m = boundary_region.explore('Region', legend=True, tiles=basemap) if save: file_path = Path(save_path) / f"{self.region_short_code}.html" file_path.parent.mkdir(parents=True, exist_ok=True) m.save(file_path) utils.print_update(level=PRINT_LEVEL_BASE+1, message=f"{__name__}| Interactive map for '{self.region_short_code}' saved to {file_path}.") else: utils.print_update(level=PRINT_LEVEL_BASE+1, message=f"{__name__}| Skipping the save to local directories as 'save' is set to False.") return m
[docs] def run(self): """ Executes the process of extracting boundaries and creating an interactive map. To be used as a main method to run the class's sequential tasks. """ if self.region_code_validity: region_gadm_gdf=self.get_region_boundary() self.get_bounding_box() self.show_regions() return region_gadm_gdf else: utils.print_update(level=PRINT_LEVEL_BASE+1, message=f"{__name__}| Region code is not valid.") self.region_code_validity return None