Source code for prepare_geodata

from ast import Return
import pandas as pd
import geopandas as gpd
import osmnx as ox
from termcolor import cprint
import math
from numpy import random
import os
import shapely
from data import *
#import fiona
from pyproj import CRS, Transformer
from shapely.geometry import Point
from shapely.ops import transform
from shapely.geometry import Polygon


[docs] def get_geodata_from_place(place_name: str) -> gpd.GeoDataFrame: """Get geodata from a place name using the OSMnx library. This function retrieves geodata from a place name using the OSMnx library. The data is filtered for buildings. :param place_name: Location/Place name to get the geodata for. The name of the place must be a valid OSM place name. :type place_name: str :return: A GeoDataFrame containing building data for the specified place. If no data is found, None is returned. :rtype: gpd.GeoDataFrame """ tags = {'building': True} # Get only buildings cprint(f"Start: Retrieving geodata for: {place_name}") # Get the geometry of the place try: gdf_raw_geo = ox.features_from_place(place_name, tags) try: # remove rows with "man_mande" not emtpy, if this column exists gdf_raw_geo = gdf_raw_geo[gdf_raw_geo['man_made'].isna()] except: pass #reset index gdf_raw_geo.reset_index(inplace=True) cprint(f"Done: Geodata for {place_name} successfully retrieved.", "green") return gdf_raw_geo except Exception as e: cprint(f"Error: {e}", "red") return None
[docs] def get_geodata_from_polygon(polygon: shapely.geometry.polygon.Polygon) -> gpd.GeoDataFrame: """Get geodata from a polygon using the OSMnx library. This function retrieves geodata from a polygon using the OSMnx library. The data is filtered for buildings. :param polygon: A shapely polygon object representing the area of interest. The polygon must be a valid shapely polygon. :type polygon: shapely.geometry.polygon.Polygon :return: A GeoDataFrame containing building data for the specified polygon. If no data is found, None is returned. :rtype: _type_ """ tags = {'building': True} # Get only buildings cprint(f"Start: Retrieving geodata for the {polygon}") # Get the geometry of the place try: gdf_raw_geo = ox.features_from_polygon(polygon, tags) try: # remove rows with "man_mande" not emtpy gdf_raw_geo = gdf_raw_geo[gdf_raw_geo['man_made'].isna()] except: pass #reset index # set a numeric index gdf_raw_geo.reset_index(inplace=True) cprint(f"Done: Geodata for the polygon {polygon} successfully retrieved.", "green") return gdf_raw_geo except Exception as e: cprint(f"Error: {e}", "red") return None
[docs] def geodesic_point_buffer(lat: float, lon: float, km: float) -> list: """Perform a geodesic point buffer around a given latitude and longitude. :param lat: Latitude of the center point. :type lat: float :param lon: Longitude of the center point. :type lon: float :param km: Radius of the buffer in kilometers. :type km: float :return: A list of coordinates representing the buffer around the point. :rtype: list """ # Azimuthal equidistant projection aeqd_proj = CRS.from_proj4( f"+proj=aeqd +lat_0={lat} +lon_0={lon} +x_0=0 +y_0=0") tfmr = Transformer.from_proj(aeqd_proj, aeqd_proj.geodetic_crs) buf = Point(0, 0).buffer(km * 1000) # distance in metres return transform(tfmr.transform, buf).exterior.coords[:]
[docs] def polygon_by_circle(lat: float, lon: float, radius: float) -> shapely.geometry.polygon.Polygon: """Generate a polygon representing a circle around a given latitude and longitude. :param lat: Latitude of the center point. :type lat: float :param lon: Longitude of the center point. :type lon: float :param radius: Radius of the circle in kilometers. :type radius: float :raises ValueError: If the radius is negative or greater than 20 km. :return: Returns a polygon representing the circle in the format of a shapely polygon. :rtype: shapely.geometry.polygon.Polygon """ if radius < 0: raise ValueError("Radius must be positive") if radius > 20: raise ValueError("Radius must be less than 20 km") geo_circle = geodesic_point_buffer(lat, lon, radius) polygon = Polygon(geo_circle) return polygon
[docs] def extract_relevat_data(gdf_in: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Extract the relevant data from the raw geodata that are needed for further analysis. Data that are not available are set to a default value, and a warning is printed. Observe the output in the console to check for missing data and data quality. :param gdf_in: GeoDataFrame with the raw geodata obtained from OSM :type gdf_in: gpd.GeoDataFrame :return: A new GeoDataFrame containing the relevant data for further analysis. :rtype: gpd.GeoDataFrame """ # extract the relevant data from the raw geodata that are needed for further analysis cprint("Start: Extracting relevant data from the raw geodata") # copy the gemetry coulumn gdf_out = gdf_in[['geometry']].copy() # add a unique id for each building gdf_out['building_id'] = 'building_' + gdf_in.index.astype(str) # add the addr: information if available if 'addr:city' in gdf_in.columns: gdf_out['addr:city'] = gdf_in['addr:city'] cprint("Added addr:city", "green") else: gdf_out['addr:city'] = None cprint("Warning: addr:city not found in the raw data", "yellow") if 'addr:postcode' in gdf_in.columns: gdf_out['addr:postcode'] = gdf_in['addr:postcode'] cprint("Added addr:postcode", "green") else: gdf_out['addr:postcode'] = None cprint("Warning: addr:postcode not found in the raw data", "yellow") if 'addr:street' in gdf_in.columns: gdf_out['addr:street'] = gdf_in['addr:street'] cprint("Added addr:street", "green") else: gdf_out['addr:street'] = None cprint("Warning: addr:street not found in the raw data", "yellow") if 'addr:housenumber' in gdf_in.columns: gdf_out['addr:housenumber'] = gdf_in['addr:housenumber'] cprint("Added addr:housenumber", "green") else: gdf_out['addr:housenumber'] = None cprint("Warning: addr:housenumber not found in the raw data", "yellow") # add building information if available (according to the OSM wiki) if 'building' in gdf_in.columns: gdf_out['building'] = gdf_in['building'] cprint("Added building", "green") else: gdf_out['building'] = None cprint("Warning: building not found in the raw data", "yellow") if 'building:levels' in gdf_in.columns: gdf_out['building:levels'] = gdf_in['building:levels'] cprint("Added building:levels", "green") else: gdf_out['building:levels'] = None cprint("Warning: building:levels not found in the raw data", "yellow") if 'height' in gdf_in.columns: gdf_out['height'] = gdf_in['height'] cprint("Added height", "green") else: gdf_out['height'] = None cprint("Warning: height not found in the raw data", "yellow") if 'building_flats' in gdf_in.columns: gdf_out['building_flats'] = gdf_in['building_flats'] cprint("Added building_flats", "green") else: gdf_out['building_flats'] = None cprint("Warning: building_flats not found in the raw data", "yellow") if 'construction_date' in gdf_in.columns: gdf_out['construction_date'] = gdf_in['construction_date'] cprint("Added construction_date", "green") else: gdf_out['construction_date'] = None cprint("Warning: construction_date not found in the raw data", "yellow") cprint("Done: Relevant data extracted from the raw geodata", "green") return gdf_out
[docs] def estimate_data_osm(gdf_in: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Estimate the required data for the subsequent analysis (e.g. number of floors) just from the OSM data. The function estimates the required data for subsequent analysis. Where not sufficient data is available, the data is estimated from the available data. The estimation is based on the OSM data and might not be accurate, depending on the quality of the OSM data. :param gdf_in: GeoDataFrame containing the relevant data, exracted from the raw geodata. :type gdf_in: gpd.GeoDataFrame :return: A new GeoDataFrame containing the estimated building properties for further analysis. :rtype: gpd.GeoDataFrame """ cprint("Start: Processing data for building classification") gdf_out = gdf_in[['building_id','geometry']].copy() # get construction year gdf_out['year_of_construction'] = gdf_in['construction_date'] gdf_out.loc[gdf_out['year_of_construction'].isna(), 'year_of_construction'] = -99 # set a numeric value for missing data eaysier handling # ensure that the year of construction is an integer gdf_out['year_of_construction'] = gdf_out['year_of_construction'].astype(int) data_avail_year_of_construction = 1 - gdf_in['construction_date'].isna().sum() / len(gdf_in) cprint(f"Data availability for year of construction: {data_avail_year_of_construction*100:.2f} %") # calculate the projected area of the building gdf_out['projected_ground_area'] = gdf_in['geometry'].to_crs(epsg=3035).area # calculted for how mny buildings the projected area is available data_avail_area = (gdf_out['projected_ground_area'] > 0).sum() / len(gdf_out) cprint(f"Data availability for projected ground area: {data_avail_area*100:.2f} %") average_floor_height = 3.5 # average floor height in m; this is a rough estimation and can be adapted small_building_aspect_ratio = 2.5 # average ratio of large_building_aspect_ratio = 12 # average ratio of length to width for large buildings; # tbd: a case distinction between small and large buildings could be implemented to improve # get the number of floors for index, row in gdf_in.iterrows(): try: # try to get the actual number of floors first gdf_out.at[index, 'number_of_floors'] = int(row['building:levels']) except: try: if math.isnan(row['height']) is False: # as alternative estimate the number of buildings by the average height of a floor gdf_out.at[index, 'number_of_floors'] = round(row['height'] / average_floor_height, 0) else: # if no information is available, assume the height based on a average ground area to height ratio # make a distinction between small and large buildings if gdf_out.at[index, 'projected_ground_area'] < 1500: gdf_out.at[index, 'number_of_floors'] = round(math.sqrt(gdf_out.at[index,'projected_ground_area']) / (small_building_aspect_ratio*average_floor_height),0) else: # for large buildings, assume a higher aspect ratio gdf_out.at[index, 'number_of_floors'] = round(math.sqrt(gdf_out.at[index,'projected_ground_area']) / (large_building_aspect_ratio*average_floor_height),0) except: # if no information is available, assume the height based on a average ground area to height ratio if gdf_out.at[index, 'projected_ground_area'] < 1500: gdf_out.at[index, 'number_of_floors'] = round(math.sqrt(gdf_out.at[index,'projected_ground_area']) / (small_building_aspect_ratio*average_floor_height),0) else: # for large buildings, assume a higher aspect ratio gdf_out.at[index, 'number_of_floors'] = round(math.sqrt(gdf_out.at[index,'projected_ground_area']) / (large_building_aspect_ratio*average_floor_height),0) # set the number of floors to 1 if the estimated number of floors yielded 0; zero number of floors would make the area zero as well gdf_out.loc[gdf_out['number_of_floors'] == 0, 'number_of_floors'] = 1 data_avail_number_of_floors = 1 - gdf_in['building:levels'].isna().sum() / len(gdf_in) cprint(f"Data availability for number of floors: {data_avail_number_of_floors*100:.2f} %") # print average and max number of floors cprint(f"Average number of floors: {gdf_out['number_of_floors'].mean():.2f}") cprint(f"Maximum number of floors: {gdf_out['number_of_floors'].max():.2f}") # calculate the number of dwellings for index, row in gdf_in.iterrows(): try: gdf_out.at[index, 'number_of_dwellings'] = int(row['building_flats']) except: # if no information is available, assume the number of dwellings based on the size of the building gdf_out.at[index, 'number_of_dwellings'] = math.ceil(gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] / 250) # assuming 250 m² per dwelling; including that there is a lot of projected area the is not residnetial area data_avail_number_of_dwellings = 1 - gdf_in['building_flats'].isna().sum() / len(gdf_in) cprint(f"Data availability for number of dwellings: {data_avail_number_of_dwellings*100:.2f} %") # get the building category according to the OSM definition: https://wiki.openstreetmap.org/wiki/Key:building # split up multiple entries first for index, row in gdf_in.iterrows(): multiple_entries = row['building'].split(';') if len(multiple_entries) == 0: gdf_out.at[index, 'building_primary'] = 'house' # default value as that is the most freuequently used building type gdf_out.at[index, 'building_secondary'] = 'None' elif len(multiple_entries) == 1: gdf_out.at[index, 'building_primary'] = multiple_entries[0] gdf_out.at[index, 'building_secondary'] = 'None' else: # check if one entry is a garage or carport and assign it to the secondary building type if 'carport' in multiple_entries[0] or 'garage' in multiple_entries[0] or 'garages' in multiple_entries[0] or 'parking' in multiple_entries[0]: gdf_out.at[index, 'building_secondary'] = 'garage' gdf_out.at[index, 'building_primary'] = multiple_entries[1] else: gdf_out.at[index, 'building_primary'] = multiple_entries[0] gdf_out.at[index, 'building_secondary'] = multiple_entries[1] #gdf_in['building_primary'] = gdf_in['building'].str.split(';', expand=True)[0] #gdf_in['building_secondary'] = gdf_in['building'].str.split(';', expand=True)[1] # iterate through the building types and assign a category ratio_heated_projected_area = 0.6 # ratio of heated area to projected area; this value is based on a rough estimation and can be adapted ratio_heated_projected_area_PCI = 0.1 # ratio of heated area to projected area for PCI buildings; to account for less heating in huge industry sites; this value is based on a rough estimation and can be adapted cap_heated_area_SFH = 300 # maximum heated area for a single family house; to account e.g. for farm houses, with larege area but only partially heated; this value is based on a rough estimation and can be adapted for index, row in gdf_out.iterrows(): try: match row['building_primary']: case 'apartments': gdf_out.at[index, 'building_category'] = 'AB' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area case 'barracks': gdf_out.at[index, 'building_category'] = 'TH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area case 'bungalow': gdf_out.at[index, 'building_category'] = 'SFH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area if gdf_out.at[index, 'heated_area'] > cap_heated_area_SFH: gdf_out.at[index, 'heated_area'] = cap_heated_area_SFH case 'cabin': gdf_out.at[index, 'building_category'] = 'SFH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area if gdf_out.at[index, 'heated_area'] > cap_heated_area_SFH: gdf_out.at[index, 'heated_area'] = cap_heated_area_SFH case 'detached': gdf_out.at[index, 'building_category'] = 'SFH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area if gdf_out.at[index, 'heated_area'] > cap_heated_area_SFH: gdf_out.at[index, 'heated_area'] = cap_heated_area_SFH case 'annexe': gdf_out.at[index, 'building_category'] = 'SFH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area if gdf_out.at[index, 'heated_area'] > cap_heated_area_SFH: gdf_out.at[index, 'heated_area'] = cap_heated_area_SFH case 'dormitory': gdf_out.at[index, 'building_category'] = 'AB' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area case 'farm': gdf_out.at[index, 'building_category'] = 'SFH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area if gdf_out.at[index, 'heated_area'] > cap_heated_area_SFH: gdf_out.at[index, 'heated_area'] = cap_heated_area_SFH case 'ger': gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 case 'hotel': gdf_out.at[index, 'building_category'] = 'AB' # assuming a hotel has a similar energy demand as an apartment building gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area case 'house': gdf_out.at[index, 'building_category'] = 'SFH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area if gdf_out.at[index, 'heated_area'] > cap_heated_area_SFH: gdf_out.at[index, 'heated_area'] = cap_heated_area_SFH case 'houseboat': gdf_out.at[index, 'building_category'] = 'SFH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area if gdf_out.at[index, 'heated_area'] > cap_heated_area_SFH: gdf_out.at[index, 'heated_area'] = cap_heated_area_SFH case 'residential': if gdf_out.at[index, 'number_of_dwellings'] < 2: gdf_out.at[index, 'building_category'] = 'SFH' if gdf_out.at[index, 'heated_area'] > cap_heated_area_SFH: gdf_out.at[index, 'heated_area'] = cap_heated_area_SFH elif gdf_out.at[index, 'number_of_dwellings'] < 5: gdf_out.at[index, 'building_category'] = 'MFH' else: gdf_out.at[index, 'building_category'] = 'AB' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area case 'semidetached_house': gdf_out.at[index, 'building_category'] = 'TH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area case 'static_caravan': gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 case 'stilt_house': gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 case 'terrace': gdf_out.at[index, 'building_category'] = 'TH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area case 'tree_house': gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 case 'trullo': gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 case 'commercial': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'industrial': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'kiosk': gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 case 'office': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'retail': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'supermarket': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'warehouse': gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 case 'civic': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'college': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'fire_station': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'government': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'hospital': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'kindergarten': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'museum': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'public': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'school': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'university': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'sports_hall': gdf_out.at[index, 'building_category'] = 'PCI' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area_PCI case 'yes': # this can cause problems, as the 'yes' tag is used very inconsistently in OSM, and therefore some buildings can be missmatched! if gdf_out.at[index, 'number_of_dwellings'] < 1: gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 elif gdf_out.at[index, 'number_of_dwellings'] < 2: gdf_out.at[index, 'building_category'] = 'SFH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area if gdf_out.at[index, 'heated_area'] > cap_heated_area_SFH: gdf_out.at[index, 'heated_area'] = cap_heated_area_SFH elif gdf_out.at[index, 'number_of_dwellings'] < 5: gdf_out.at[index, 'building_category'] = 'MFH' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area else: gdf_out.at[index, 'building_category'] = 'AB' gdf_out.at[index, 'heated_area'] = gdf_out.at[index, 'projected_ground_area'] * gdf_out.at[index, 'number_of_floors'] * ratio_heated_projected_area case default: gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 except: gdf_out.at[index, 'building_category'] = 'other' gdf_out.at[index, 'heated_area'] = 0 # decrease heating area by parking space which is not heated gdf_out.loc[gdf_out['building_secondary'] != 'None','heated_area'] = gdf_out.loc[gdf_out['building_secondary'] != 'None', 'heated_area'] * 0.5 cprint("Done: Data processing for building classification", "green") return gdf_out
[docs] def merge_other_data(gdf_in: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """This function is a dummy to merge in other data sources like cadastre data if available. If more accurate building data is available, it can be merged in here. The function is a placeholder for future development and does not do anything at the moment. :param gdf_in: GeoDataFrame containing the building data. :type gdf_in: gpd.GeoDataFrame :return: A new GeoDataFrame containing the updated and merged building data. :rtype: gpd.GeoDataFrame """ gdf_out = gdf_in.copy() # add code here to merge in other data sources e.g. more accurate building data from the cadastre return gdf_out
[docs] def adjust_num_of_dwelling(gdf_in: gpd.GeoDataFrame): """Set the number of dwellings to 0 for non residential buildings and limit the number of dwellings to a maximum of 500. The number of dwellings is set to 0 for non residential buildings (e.g. PCI) and the number of dwellings is limited to a maximum of 500. This is done to avoid unrealistic values for the number of dwellings. In the subsequent analysis, the number of dwellings is used to calculate the specific heating demand and the yearly heating demand. :param gdf_in: GeoDataFrame containing the building data. :type gdf_in: gpd.GeoDataFrame """ #remove the number of dwelling for non residential buildings gdf_in.loc[gdf_in['building_category'] == 'PCI', 'number_of_dwellings'] = 0 gdf_in.loc[gdf_in['building_category'] == 'other', 'number_of_dwellings'] = 0 # TBD: make that parameterizable # limit the number of dwellings to a maximum of 500 gdf_in.loc[gdf_in['number_of_dwellings'] > 500, 'number_of_dwellings'] = 500
[docs] def remove_zero_area_buildings(gdf_in: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Removes buildings with a projected area of 0. For buildings with zero area, no further analysis is possible. The buildings are removed from the GeoDataFrame to avoid errors in the subsequent analysis. Most buildings without a projected area might be buildings that are not residential or not heated. :param gdf_in: GeoDataFrame containing the building data. :type gdf_in: gpd.GeoDataFrame :return: A new GeoDataFrame containing the building data without buildings with a projected area of 0. :rtype: gpd.GeoDataFrame """ #cprint("Start: Removing buildings with a projected area of 0") # delete the building rows with a projected area of 0; when the area is not available at all, no other estimation is possible len_before = len(gdf_in) gdf_out = gdf_in[gdf_in['projected_ground_area'] > 0].copy() len_after = len(gdf_out) cprint(f"Done: Removed {len_before-len_after} buildings with a projected area of zero", "green") return gdf_out
[docs] def get_spec_HD(gdf_in: gpd.GeoDataFrame, path_spec_HD: str): """Get the specific heating demand of the buildings. The specific heating demand is obtained from the excel file containing the specific heating demand data. The country speccific data can be obtained from the TABULA webtool: https://webtool.building-typology.eu/#bm and has to be adjusted to the format of the excel file. :param gdf_in: GeoDataFrame containing the building data, including the year of construction and the building category. :type gdf_in: gpd.GeoDataFrame :param path_spec_HD: Path to the excel file containing the specific heating demand data. :type path_spec_HD: str """ # load data for specific heating demand spec_HD = pd.read_excel(path_spec_HD, index_col=[0,1]) # iterate trough all rows and columns in the data for year in spec_HD.index: for bulding_type in spec_HD.columns: #print(row, column, data.loc[row, column]) gdf_in.loc[(gdf_in['building_category'] == bulding_type) & (gdf_in['year_of_construction'] >= year[0]) & (gdf_in['year_of_construction'] <= year[1] ), 'specific_HD'] = spec_HD.at[year, bulding_type]
[docs] def get_yearly_HD(gdf_in: gpd.GeoDataFrame): """Calculate the yearly heating demand based on the specific heating demand and the heated area :param gdf_in: GeoDataFrame containing specific heating demand and heated area. :type gdf_in: gpd.GeoDataFrame """ gdf_in['YearlyDemand'] = gdf_in['specific_HD'] * gdf_in['heated_area']
[docs] def calc_heating_hours(case_study_name: str, config: dict, Tset = 21.0, Tsetback=18.0) -> float: """Calculate the heating degree hours based on the outside temperature and the setpoint temperature. The heating degree hours are calculated for the whole year, for a reference actitive occupancy profile. :param case_study_name: Name of the case study, used to load the temperature data :type case_study_name: str :param config: Config dictionary :type config: dict :param Tset: Setpoint temperature for buildings, defaults to 21 :type Tset: float, optional :param Tsetback: Setback temperature for buildings, defaults to 18 :type Tsetback: float, optional :return: Number of heating degree hours for the whole year in °C*h :rtype: float """ df_temp = load_temp_data(case_study_name, config) # calculate a generic Tset profile for the year that is Tset from 6 - 22h and Tsetback from 22 - 6h df_temp['hour'] = df_temp.time.dt.hour df_temp['Tset'] = Tset df_temp.loc[df_temp['hour'] < 6, 'Tset'] = Tsetback df_temp.loc[df_temp['hour'] > 21, 'Tset'] = Tsetback df_temp['Tdiff'] = df_temp['Tset'] - df_temp['T_out'] heatinghours = df_temp.loc[df_temp['Tdiff'] > 0, 'Tdiff'].sum() return heatinghours
[docs] def estimate_thermal_properties(gdf_in: gpd.GeoDataFrame, heatinghours=50000.0, speHeatStorCap = 0.04, surface_to_area = 2.5): """ Estimate the thermal properties of the buildings based on the specific heating demand and the heated area. The estimated thermal properties are scaled up for the whole building, that means all parameters refere to the total building. :param gdf_in: GeoDataFrame containing the building data. :type gdf_in: gpd.GeoDataFrame :param heatinghours: Number of heating hours per year. Sumed up the temperature difference between setpoint and outside temperature for all hours of the year., defaults to 50000 :type heatinghours: float, optional :param speHeatStorCap: specific heat storage capacity of the building material in kWh/m²K of a wall of average thickness, defaults to 0.04 :type speHeatStorCap: float, optional :param surface_to_area: float ratio of thermal active area of the building (i.e. walls, roof, floor) to the ground area of the building (avoid double counting of indoor walls), defaults to 2.5 :type surface_to_area: float, optional """ gdf_in['GeneralisedThermCond'] = gdf_in['YearlyDemand'] / heatinghours gdf_in['GeneralisedThermCap'] = gdf_in['projected_ground_area'] * gdf_in['number_of_floors'] * surface_to_area * speHeatStorCap gdf_in['MaxDemand'] = gdf_in['GeneralisedThermCond'] * 25 + gdf_in['GeneralisedThermCap'] * 2 #assuming the max heating power to heat at 25 °C temp. diff, and be able to rise temp by 2 °C per hour
[docs] def define_setpontTemp(gdf_in: gpd.GeoDataFrame, temp_setpoint = 22.0, std_setpoint = 2.0, temp_setback = 17.0, std_setback = 2.0): """Define the setpoint temperature and the setback temperature for the buildings. The values are randomly generated based on a normal distribution around the mean values. If real data is available, this function can be replaced by the real data. :param gdf_in: GeoDataFrame containing the building data. :type gdf_in: gpd.GeoDataFrame :param temp_setpoint: mean value for the setpoint temperature, defaults to 22.0 :type temp_setpoint: float, optional :param std_setpoint: standard deviation for the setpoint temperature, defaults to 2.0 :type std_setpoint: float, optional :param temp_setback: mean value for the setback temperature, defaults to 17.0 :type temp_setback: float, optional :param std_setback: standard deviation for the setback temperature, defaults to 2.0 :type std_setback: float, optional """ gdf_in['temp_setpoint'] = random.normal(temp_setpoint, std_setpoint, len(gdf_in)) gdf_in['temp_setback'] = random.normal(temp_setback, std_setback, len(gdf_in))
[docs] def estimate_local_heat_prod_costs(gdf_in: gpd.GeoDataFrame, cost_per_kWh = 0.15): """Estimate the local heating production costs based on the cost per kWh. The costs are randomly generated based on a normal distribution around the mean value. If real data is available, this function can be replaced by the real data. :param gdf_in: GeoDataFrame containing the building data. :type gdf_in: gpd.GeoDataFrame :param cost_per_kWh: mean value for the heating costs per kWh, defaults to 0.15 :type cost_per_kWh: float, optional """ gdf_in['LocalHeatProdCosts'] = abs(random.normal(cost_per_kWh, 0.02, len(gdf_in)))
[docs] def estimate_internal_gainfactor(gdf_in: gpd.GeoDataFrame, gain_per_dwelling = 0.12): """Estimate the internal gain factor for the buildings. The gain is scaled per dwelling, with the input parameter gain_per_dwelling. :param gdf_in: GeoDataFrame containing the building data. :type gdf_in: gpd.GeoDataFrame :param gain_per_dwelling: value for the internal gain per dwelling in kW/dwelling, defaults to 0.2 :type gain_per_dwelling: float, optional """ gdf_in['InternalGainFactor'] = gdf_in['number_of_dwellings'] * gain_per_dwelling
[docs] def estimate_solar_gainfactor(gdf_in: gpd.GeoDataFrame, window_fraction = 0.08): """Estimate the solar gain factor for the buildings. The gain is scaled per dwelling, with the input parameter window_fraction. If more accurate data is available, this function can be replaced by the real data. :param gdf_in: GeoDataFrame containing the building data. :type gdf_in: gpd.GeoDataFrame :param window_fraction: share of window area of total facade area, defaults to 0.1 :type window_fraction: float, optional """ # estimate the area of one side of the building and multiply it with the share of the window area gdf_in['SolarGainFactor'] = gdf_in['number_of_floors'] * gdf_in['projected_ground_area'].apply(lambda x: math.sqrt(x)) * window_fraction
[docs] def write_buildingdata_to_disk(gdf_in: gpd.GeoDataFrame, casestudy: str, config: dict): """Write the building data to disk as a GeoJSON file. The function checks if the directory for the case study exists and creates it if not. The building data is then written to disk in the specified format. :param gdf_in: GeoDataFrame containing the building data :type gdf_in: gpd.GeoDataFrame :param casestudy: name of the casestudy :type casestudy: str :param config: configuration dictionary :type config: dict """ # check if directory exists data_path = os.path.join(casestudy) if not os.path.exists(data_path): os.makedirs(data_path) gdf_in.to_file(os.path.join(data_path,config['building_data']['buildings_gdf']), driver='GeoJSON') cprint("Done: Building data written to disk", "green")
[docs] def generate_complete_geodataset(casstudy_name: str, location) -> gpd.GeoDataFrame: """Run a complete case study for a given location. The function generates a new case study folder if the folder does not exist yet. It then loads the building data from OpenStreetMap and processes it to get the relevant data. The function estimates the thermal properties of the buildings and writes the data to disk. The function returns the processed building data as a GeoDataFrame. :param casstudy_name: Name of the case study :type casstudy_name: str :param location: Geographical location of the case study. Can be a polygon (must be polygon) or a place name (string). :type location: str or shapely.geometry.polygon.Polygon :return: A GeoDataFrame containing the building data for the case study with all properites. :rtype: gpd.GeoDataFrame """ # load config config = load_config() # generate a new case study folder if not exists generate_new_case_study(casstudy_name, config) # check if the location is a polygon or a place name if isinstance(location, shapely.geometry.polygon.Polygon): gdf_raw_geo = get_geodata_from_polygon(location) else: gdf_raw_geo = get_geodata_from_place(location) # Extract the relevant data gdf_relevant_data = extract_relevat_data(gdf_raw_geo) # Estimate the data gdf_estimated_data = estimate_data_osm(gdf_relevant_data) # Merge other data gdf_merged = merge_other_data(gdf_estimated_data) # remove buildings with a projected area of 0 gdf_complete = remove_zero_area_buildings(gdf_merged) # adjust the number of dwellings for non residential buildings adjust_num_of_dwelling(gdf_complete) # Get the specific heating demand path_spec_HD = os.path.join(casstudy_name,config['parameter_dir'], config['building_data_dir'], config['case_study_data']['building_typology']) get_spec_HD(gdf_complete, path_spec_HD) # Get the yearly heating demand get_yearly_HD(gdf_complete) # estimate the number of heating hours in the year heatinghours = calc_heating_hours(casstudy_name, config) # Estimate the thermal properties estimate_thermal_properties(gdf_complete, heatinghours) # Define the setpoint temperature define_setpontTemp(gdf_complete) # Estimate the local heating production costs estimate_local_heat_prod_costs(gdf_complete) # Estimate the internal gain factor estimate_internal_gainfactor(gdf_complete) # Estimate the solar gain factor estimate_solar_gainfactor(gdf_complete, 0.1) # Write the building data to disk write_buildingdata_to_disk(gdf_complete, casstudy_name, config) return gdf_complete
if __name__ == "__main__": casestudy = "Frauental" location = "Frauental, Styria, Austria" # create a polygon for the location generate_complete_geodataset(casestudy, location)