1 Unfalldaten
Das Statistische Bundeamt stellt eine vielzahl an unterschiedlichen Datensätzen zur Verfügung. In diesem Dokument werden offizielle Unfalldaten mit Personenschaden für Regensburg ausgewertet. Diese können hier heruntergeladen werden.
library(tidyverse)
library(lubridate)filenames <-
list.files(
path = here::here("data-raw/accidents")
)ReadGarbageData <- function(filename){
# read a file
data <- read_csv2(here::here("data-raw/accidents", filename))
# the files have different headers
# this key corrects that
col_key <-
c(
# ids
FID = "id1",
OBJECTID = "id2",
OBJECTID_1 = "id2",
UIDENTSTLA = "id3",
UIDENTSTLAE = "id3",
# lighting
ULICHTVERH = "light_condition",
LICHT = "light_condition",
# street condition
IstStrasse = "street_condition",
STRZUSTAND = "street_condition",
# other
IstSonstig = "other",
IstSonstige = "other",
# common
ULAND = "land",
UREGBEZ = "bezirk",
UKREIS = "kreis",
UGEMEINDE = "gemeinde",
UJAHR = "year",
UMONAT = "month",
USTUNDE = "hour",
UWOCHENTAG = "weekday",
UKATEGORIE = "severity",
UART = "kind_of_accident",
UTYP1 = "type_of_accident",
IstRad = "bicycle",
IstKrad = "bike",
IstPKW = "car",
IstFuss = "pedestrian",
IstGkfz = "truck",
LINREFX = "linref_x",
LINREFY = "linref_y",
XGCSWGS84 = "lng",
YGCSWGS84 = "lat"
)
# correct col names via the key
names(data) <- col_key[names(data)]
# correct col types
data <-
data |>
mutate(
bezirk = as.character(bezirk),
year = as.numeric(year),
month = as.numeric(month),
hour = as.numeric(hour)
)
return(data)
}data <-
filenames |>
map_dfr(
ReadGarbageData
) |>
select(-starts_with("id"))data <-
data |>
filter(
land == "09" &
bezirk == "3" &
kreis == "62" &
gemeinde == "000"
) |>
select(-kind_of_accident, -type_of_accident, -linref_x, -linref_y) |>
select(-land, -bezirk, -kreis, -gemeinde)
# add id
data <-
data |>
mutate(
id = row_number()
) |>
select(id, everything())data <-
data |>
mutate(
datetime = glue::glue("{month}-{year}-{hour}") |>
parse_datetime(format = "%m-%Y-%H")
) |>
mutate(
weekday = wday(weekday, label = TRUE),
date = date(datetime)
) |>
mutate(
across(
.cols = c(severity, light_condition, street_condition),
.fns = as_factor
)
) |>
mutate(
across(
.cols = bicycle:other,
.fns = as.logical
)
) |>
mutate(
severity = fct_recode(
severity,
"Toedlich" = "1",
"Schwer" = "2",
"Leicht" = "3"
),
light_condition = fct_recode(
light_condition,
"Tageslicht" = "0",
"Dämmerung" = "1",
"Dunkelheit" = "2"
),
street_condition = fct_recode(
street_condition,
"Trocken" = "0",
"Nass/Feucht" = "1",
"Winterglatt" = "2"
)
)data |>
DT::datatable()1.1 Geocode
Der folgende Chunk fügt den einzelnen Unfällen die passende Adresse hinzu. Dies dauert wegen fehlender Parallelisierung recht lange (1-2 Stunden), und wurde in der Auswertung nicht wirklich benötigt. Daher wird der Code nicht ausgeführt.
pb <-
progress::progress_bar$new(
format = "Lade Geodaten :current/:total [:bar] :percent (eta: :eta)",
total = nrow(data)
)
pb$tick(0)
data <-
map2_dfr(
.x = data$lng,
.y = data$lat,
.f = function(x = .x, y = .y){
geodata <- photon::reverse(x, y) |>
select(name:country)
pb$tick()
return(geodata)
}
) |>
mutate(
id = row_number(),
street = ifelse(is.na(street), name, street)
) |>
right_join(data, by = c("id"))
remove(pb)1.2 CSV/RDA speichern.
# data
write_csv2(
x = data,
file = here::here("output/regensburg_data.csv")
)
save(
list = c("data"),
file = here::here("data/regensburg_data.rda")
)