Use Case: Swiss maps
2024-03-25
Get all the R code of this talk: felixluginbuhl.com/talks
Disclamer: I am a self-trained R mapper!1
Online presence: felixluginbuhl.com, github.com/lgnbhl.
International level:
European level:
Country level (Switzerland):
Get world data from the Natural Earth Project.
You also can access physical geodata with ne_download()
.
Get national data with “country” argument (“geounit” for metropolitan only):
Get “state” (cantonal) level data with ne_states()
.
Access Eurostat Mapping API.
Swiss national geoportal (Swiss Stac API) with bfs_get_catalog_geodata()
.
library(BFS)
library(dplyr) # for glimpse()
#bfs_download_geodata() #download by collection_id
bfs_get_catalog_geodata(include_metadata = FALSE) |> glimpse()
Rows: 332
Columns: 3
$ collection_id <chr> "ch.agroscope.feuchtflaechenpotential-kulturlandschaft",…
$ type <chr> "API", "API", "API", "API", "API", "API", "API", "API", …
$ href <chr> "https://data.geo.admin.ch/api/stac/v0.9/collections/ch.…
The ThemaKart project gives access to various thematic maps.
A typical base maps ThemaKart file looks like this:
You can access base maps files with bfs_get_base_maps()
.
# list of geometry names: https://www.bfs.admin.ch/asset/en/24025645
switzerland_sf <- bfs_get_base_maps(geom = "suis")
communes_sf <- bfs_get_base_maps(geom = "polg", date = "20230101")
districts_sf <- bfs_get_base_maps(geom = "bezk")
cantons_sf <- bfs_get_base_maps(geom = "kant")
cantons_capitals_sf <- bfs_get_base_maps(geom = "stkt", type = "Pnts", category = "kk")
lakes_sf <- bfs_get_base_maps(geom = "seen", category = "11")
library(ggplot2)
ggplot() +
geom_sf(data = communes_sf, fill = "snow", color = "grey45") +
geom_sf(data = lakes_sf, fill = "lightblue2", color = "black") +
geom_sf(data = districts_sf, fill = "transparent", color = "grey65") +
geom_sf(data = cantons_sf, fill = "transparent", color = "black") +
geom_sf(data = cantons_capitals_sf, shape = 18, size = 3) +
theme_minimal() +
theme(axis.text = element_blank()) +
labs(caption = "Source: ThemaKart, © BFS")
Each observation of your data should be joined to a geodata spatial geometry.
The sf
R package (for “Simple Feature”) allows to do it in a tidy way.
Rows: 26
Columns: 3
$ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
$ name <chr> "Zürich", "Bern", "Luzern", "Uri", "Schwyz", "Obwalden", "Nid…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((2694877 123..., MULTIPOLYGON (((…
Let’s download an official Swiss dataset with BFS
:
Rows: 38,880
Columns: 6
$ degre_de_formation <chr> "Degré de formation - Total", "Degré de formatio…
$ canton_de_lecole <chr> "Suisse", "Suisse", "Suisse", "Suisse", "Suisse"…
$ sexe <chr> "Sexe - Total", "Sexe - Total", "Sexe - Total", …
$ nationalite_categorie <chr> "Nationalité - Total", "Nationalité - Total", "N…
$ annee <chr> "1999/00", "2000/01", "2001/02", "1999/00", "200…
$ eleves_et_etudiants <dbl> 1309950, 1311661, 1312533, 1025029, 1026294, 102…
We can then pivot it and calculate share of female students:
library(tidyverse)
swiss_students_gender <- swiss_students |>
filter(nationalite_categorie == "Suisse", #only Swiss students
canton_de_lecole != "Suisse") |> #only cantonal data
pivot_wider(names_from = sexe, values_from = eleves_et_etudiants) |>
mutate(share_woman = round(Femme/`Sexe - Total`*100, 1))
glimpse(swiss_students_gender)
Rows: 3,120
Columns: 8
$ degre_de_formation <chr> "Degré de formation - Total", "Degré de formatio…
$ canton_de_lecole <chr> "Zürich", "Zürich", "Zürich", "Bern / Berne", "B…
$ nationalite_categorie <chr> "Suisse", "Suisse", "Suisse", "Suisse", "Suisse"…
$ annee <chr> "1999/00", "2000/01", "2001/02", "1999/00", "200…
$ `Sexe - Total` <dbl> 161170, 161670, 162710, 144931, 144071, 141637, …
$ Homme <dbl> 82867, 83322, 83820, 74847, 74212, 73077, 33247,…
$ Femme <dbl> 78303, 78348, 78890, 70084, 69859, 68560, 27463,…
$ share_woman <dbl> 48.6, 48.5, 48.5, 48.4, 48.5, 48.4, 45.2, 46.0, …
We can then joining our data to the geodata.
We need to do some recoding before the left join.
swiss_student_map <- swiss_students_gender %>%
filter(canton_de_lecole != "Schweiz") %>% # remove national level
mutate(canton_de_lecole = str_remove(canton_de_lecole, ".*/"),
canton_de_lecole = str_trim(canton_de_lecole),
canton_de_lecole = recode(canton_de_lecole, "Berne" = "Bern", "Freiburg" = "Fribourg", "Grischun" = "Graubünden", "Ticino" = "Tessin", "Wallis" = "Valais")) |>
left_join(swiss_map, by = c("canton_de_lecole" = "name")) |>
select(canton_de_lecole, annee, degre_de_formation, share_woman, geometry)
glimpse(swiss_student_map)
Rows: 3,120
Columns: 5
$ canton_de_lecole <chr> "Zürich", "Zürich", "Zürich", "Bern", "Bern", "Bern…
$ annee <chr> "1999/00", "2000/01", "2001/02", "1999/00", "2000/0…
$ degre_de_formation <chr> "Degré de formation - Total", "Degré de formation -…
$ share_woman <dbl> 48.6, 48.5, 48.5, 48.4, 48.5, 48.4, 45.2, 46.0, 46.…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((2694877 123..., MULTIP…
To ease your work with geodata names, I provided the Swiss official registers.
library(BFS)
#register_bzn, register_dic, register_kt, register_kt_seeanteile
bfs_get_base_maps(geom = "polg", date = "20230101") |>
inner_join(BFS::register_gde |>
filter(GDEKTNA == "Genève"), #only Geneva communes
by = c("id" = "GDENR")) |>
glimpse()
Rows: 45
Columns: 10
$ id <dbl> 6601, 6602, 6603, 6604, 6605, 6606, 6607, 6608, 6609, 6610, …
$ name <chr> "Aire-la-Ville", "Anières", "Avully", "Avusy", "Bardonnex", …
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((2491472 111..., MULTIPOLYGON ((…
$ GDEKT <chr> "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", …
$ GDEBZNR <dbl> 2500, 2500, 2500, 2500, 2500, 2500, 2500, 2500, 2500, 2500, …
$ GDENAME <chr> "Aire-la-Ville", "Anières", "Avully", "Avusy", "Bardonnex", …
$ GDENAMK <chr> "Aire-la-Ville", "Anières", "Avully", "Avusy", "Bardonnex", …
$ GDEBZNA <chr> "Canton de Genève", "Canton de Genève", "Canton de Genève", …
$ GDEKTNA <chr> "Genève", "Genève", "Genève", "Genève", "Genève", "Genève", …
$ GDEMUTDAT <chr> "1848-09-12", "1858-01-01", "1848-09-12", "1848-09-12", "185…
mapview
mapview provides functions to very quickly and conveniently create interactive visualisations of spatial data. It’s main goal is to fill the gap of quick (not presentation grade) interactive plotting to examine and visually investigate both aspects of spatial data, the geometries and their attributes. It can also be considered a data-driven API for the leaflet package as it will automatically render correct map types, depending on the type of the data (points, lines, polygons, raster).
Source: https://r-spatial.github.io/mapview/
mapview
The easy way with mapview
(but limited customization).
mapview
Using mapview
with our Swiss students dataset.
mapview
mapview
is using Leaflet in the background, which allows to access extra functionality:
map_2023 <- swiss_student_map_pivoted |>
filter(annee == "2022/23") |>
mapview(zcol = "Degré de formation - Total", layer.name = "% étudiantes suisses, 2023")
map_2000 <- swiss_student_map_pivoted |>
filter(annee == "1999/00") |>
mapview(zcol = "Degré de formation - Total", layer.name = "% étudiantes suisses, 2000")
mapview
mapview
is using Leaflet in the background, which allows to access extra functionality:
mapview
mapview
is using Leaflet in the background, which allows to access extra functionality:
mapview
mapview
is using Leaflet in the background, which allows to access extra functionality:
leaflet
Let’s reproduce this datawrapper map:
leaflet
Let’s find the data with bfs_get_catalog_tables()
.
tables_names <- BFS::bfs_get_catalog_tables(
language = "en",
title = "last+names")
glimpse(tables_names)
Rows: 8
Columns: 7
$ title <chr> "Last names of the permanent resident population", "L…
$ language <chr> "en", "en", "en", "en", "en", "en", "en", "en"
$ publication_date <dttm> 2023-08-22 08:30:00, 2023-08-22 08:30:00, 2023-08-22 …
$ number_asset <dbl> 26925057, 26925060, 26925059, 26925058, 23264628, 23…
$ url_bfs <chr> "https://www.bfs.admin.ch/content/bfs/en/home/statist…
$ url_table <chr> "https://www.bfs.admin.ch/bfsstatic/dam/assets/269250…
$ catalog_date <dttm> 2023-08-22 08:30:00, 2023-08-22 08:30:00, 2023-08-22 …
leaflet
Let’s download and read the data.
leaflet
Let’s download and read the data.
Rows: 955,498
Columns: 6
$ LASTNAME <chr> "Frei", "Keller", "Meier", "Weiss", "Müller", "Angst", "Bachm…
$ GDENR <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ GDENAME <chr> "Aeugst am Albis", "Aeugst am Albis", "Aeugst am Albis", "Aeu…
$ RANG_GDE <dbl> 1, 1, 1, 1, 5, 6, 6, 6, 6, 10, 11, 11, 11, 14, 14, 14, 14, 18…
$ VALUE <dbl> 13, 13, 13, 13, 12, 11, 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 8, …
$ PCT_GDE <dbl> 0.65, 0.65, 0.65, 0.65, 0.60, 0.55, 0.55, 0.55, 0.55, 0.50, 0…
leaflet
Top 5 by commune by rank and percent:
df_top_5 <- df |>
arrange(desc(PCT_GDE)) |>
group_by(GDENAME) |>
slice(1:5) |>
mutate(RANK = row_number()) |>
ungroup()
glimpse(df_top_5)
Rows: 10,708
Columns: 7
$ LASTNAME <chr> "Müller", "Schmid", "Ammann", "Meier", "Zehnder", "Müller", "…
$ GDENR <dbl> 4551, 4551, 4551, 4551, 4551, 4001, 4001, 4001, 4001, 4001, 3…
$ GDENAME <chr> "Aadorf", "Aadorf", "Aadorf", "Aadorf", "Aadorf", "Aarau", "A…
$ RANG_GDE <dbl> 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 3, 5, 1…
$ VALUE <dbl> 115, 82, 76, 71, 69, 216, 147, 131, 99, 95, 51, 44, 40, 38, 3…
$ PCT_GDE <dbl> 1.22, 0.87, 0.81, 0.75, 0.73, 0.99, 0.67, 0.60, 0.45, 0.44, 1…
$ RANK <int> 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1…
leaflet
Create html table for each commune
create_table <- function(name) {
df <- df_top_5 |>
filter(GDENAME == name)
table <- df |>
select("Rank" = RANK, "Last name" = LASTNAME,
"Number" = VALUE, "Percentage" = PCT_GDE) |>
mutate(Rank = paste0(Rank, "."),
Percentage = paste0(Percentage, "%")) |>
kableExtra::kable(format = "html", align = "llrr")
paste0(
"<b>", unique(df$GDENAME), "</b><br>",
table
)
}
leaflet
[1] "<b>Aadorf</b><br><table>\n <thead>\n <tr>\n <th style=\"text-align:left;\"> Rank </th>\n <th style=\"text-align:left;\"> Last name </th>\n <th style=\"text-align:right;\"> Number </th>\n <th style=\"text-align:right;\"> Percentage </th>\n </tr>\n </thead>\n<tbody>\n <tr>\n <td style=\"text-align:left;\"> 1. </td>\n <td style=\"text-align:left;\"> Müller </td>\n <td style=\"text-align:right;\"> 115 </td>\n <td style=\"text-align:right;\"> 1.22% </td>\n </tr>\n <tr>\n <td style=\"text-align:left;\"> 2. </td>\n <td style=\"text-align:left;\"> Schmid </td>\n <td style=\"text-align:right;\"> 82 </td>\n <td style=\"text-align:right;\"> 0.87% </td>\n </tr>\n <tr>\n <td style=\"text-align:left;\"> 3. </td>\n <td style=\"text-align:left;\"> Ammann </td>\n <td style=\"text-align:right;\"> 76 </td>\n <td style=\"text-align:right;\"> 0.81% </td>\n </tr>\n <tr>\n <td style=\"text-align:left;\"> 4. </td>\n <td style=\"text-align:left;\"> Meier </td>\n <td style=\"text-align:right;\"> 71 </td>\n <td style=\"text-align:right;\"> 0.75% </td>\n </tr>\n <tr>\n <td style=\"text-align:left;\"> 5. </td>\n <td style=\"text-align:left;\"> Zehnder </td>\n <td style=\"text-align:right;\"> 69 </td>\n <td style=\"text-align:right;\"> 0.73% </td>\n </tr>\n</tbody>\n</table>"
leaflet
Add html table code in the table
column.
leaflet
Get official Swiss base maps.
leaflet
Join our data with geodata.
leaflet
Create a basic leaflet map.
library(leaflet)
# customize legend
bins = c(0, 1, 2.5, 5, 10, 20, 100)
col_custom = c("#f5b3bb", "#ef8894","#e85767","#dc0018","#a60013","#73000d")
pal <- colorBin(col_custom,
domain = sf_communes_joined$PCT_GDE,
bins = bins)
legend_labels <- c("0 – 1", "1 – 2.5", "2.5 – 5",
"5 – 10", "10 – 20", "20 – 100")
map <- leaflet(sf_communes_joined, height = 600, width = 900) |>
addPolygons(
weight = 0.3,
opacity = 1,
color = "white",
fillOpacity = 1,
fillColor = ~pal(PCT_GDE),
label = ~lapply(table, htmltools::HTML),
) |>
addPolygons(
data = lakes_sf,
label = ~name,
stroke = FALSE,
color = "gray70"
) |>
addLegend(
title = "Share of the most<br>common surname, in %",
labFormat = function(type, cuts, p) { paste0(legend_labels) },
values = ~PCT_GDE,
pal = pal,
opacity = 1
)
leaflet
Create a basic leaflet map.
leaflet
Adding bounding box, empty background and fix zooming.
# get bounding box of Switzerland
bbox <- st_bbox(sf_communes_joined) |>
as.vector()
map_final <- map |>
addTiles(urlTemplate = "", # empty background
options = providerTileOptions(minZoom = 8, maxZoom = 12)) |>
setMaxBounds(lng1 = bbox[1], lat1 = bbox[2],
lng2 = bbox[3], lat2 = bbox[4]) |>
leaflet.extras::setMapWidgetStyle(style = list(background = "transparent"))
leaflet
Adding bounding box, empty background and fix zooming.
leaflet
Add map in a card with title and legend.
library(bslib)
library(htmltools)
card <- card(
tags$h5("The Five Most Frequent Last Names by Commune, 2022"),
tags$i("Hover to display the five most common surnames (actual number and percentage) by commune."),
map,
tags$p(
"Source: FSO – STATPOP, inspired by:",
tags$a("https://www.bfs.admin.ch/asset/en/27205601",
href = "https://www.bfs.admin.ch/asset/en/27205601")),
tags$p("Get the R code:",
tags$a("felixanalytix.com",
href = "https://felixanalytix.com")),
min_height = 800,
max_height = 800
)
leaflet
Add map in a card with title and legend.
Source: FSO – STATPOP, inspired by: https://www.bfs.admin.ch/asset/en/27205601
Get the R code: felixanalytix.com
Get the R code of this talk:
You can contact me on LinkedIn:
Thank you for your attention!