1 Unfalldaten
Das Statistische Bundeamt stellt eine vielzahl an unterschiedlichen Datensätzen zur Verfügung. In diesem Dokument werden offizielle Unfalldaten mit Personenschaden für Regensburg ausgewertet. Diese können hier heruntergeladen werden.
library(tidyverse)
library(lubridate)
<-
filenames list.files(
path = here::here("data-raw/accidents")
)
<- function(filename){
ReadGarbageData
# read a file
<- read_csv2(here::here("data-raw/accidents", filename))
data
# the files have different headers
# this key corrects that
<-
col_key c(
# ids
FID = "id1",
OBJECTID = "id2",
OBJECTID_1 = "id2",
UIDENTSTLA = "id3",
UIDENTSTLAE = "id3",
# lighting
ULICHTVERH = "light_condition",
LICHT = "light_condition",
# street condition
IstStrasse = "street_condition",
STRZUSTAND = "street_condition",
# other
IstSonstig = "other",
IstSonstige = "other",
# common
ULAND = "land",
UREGBEZ = "bezirk",
UKREIS = "kreis",
UGEMEINDE = "gemeinde",
UJAHR = "year",
UMONAT = "month",
USTUNDE = "hour",
UWOCHENTAG = "weekday",
UKATEGORIE = "severity",
UART = "kind_of_accident",
UTYP1 = "type_of_accident",
IstRad = "bicycle",
IstKrad = "bike",
IstPKW = "car",
IstFuss = "pedestrian",
IstGkfz = "truck",
LINREFX = "linref_x",
LINREFY = "linref_y",
XGCSWGS84 = "lng",
YGCSWGS84 = "lat"
)
# correct col names via the key
names(data) <- col_key[names(data)]
# correct col types
<-
data |>
data mutate(
bezirk = as.character(bezirk),
year = as.numeric(year),
month = as.numeric(month),
hour = as.numeric(hour)
)
return(data)
}
<-
data |>
filenames map_dfr(
ReadGarbageData|>
) select(-starts_with("id"))
<-
data |>
data filter(
== "09" &
land == "3" &
bezirk == "62" &
kreis == "000"
gemeinde |>
) select(-kind_of_accident, -type_of_accident, -linref_x, -linref_y) |>
select(-land, -bezirk, -kreis, -gemeinde)
# add id
<-
data |>
data mutate(
id = row_number()
|>
) select(id, everything())
<-
data |>
data mutate(
datetime = glue::glue("{month}-{year}-{hour}") |>
parse_datetime(format = "%m-%Y-%H")
|>
) mutate(
weekday = wday(weekday, label = TRUE),
date = date(datetime)
|>
) mutate(
across(
.cols = c(severity, light_condition, street_condition),
.fns = as_factor
)|>
) mutate(
across(
.cols = bicycle:other,
.fns = as.logical
)|>
) mutate(
severity = fct_recode(
severity,"Toedlich" = "1",
"Schwer" = "2",
"Leicht" = "3"
),light_condition = fct_recode(
light_condition,"Tageslicht" = "0",
"Dämmerung" = "1",
"Dunkelheit" = "2"
),street_condition = fct_recode(
street_condition,"Trocken" = "0",
"Nass/Feucht" = "1",
"Winterglatt" = "2"
) )
|>
data ::datatable() DT
1.1 Geocode
Der folgende Chunk fügt den einzelnen Unfällen die passende Adresse hinzu. Dies dauert wegen fehlender Parallelisierung recht lange (1-2 Stunden), und wurde in der Auswertung nicht wirklich benötigt. Daher wird der Code nicht ausgeführt.
<-
pb ::progress_bar$new(
progressformat = "Lade Geodaten :current/:total [:bar] :percent (eta: :eta)",
total = nrow(data)
)
$tick(0)
pb
<-
data map2_dfr(
.x = data$lng,
.y = data$lat,
.f = function(x = .x, y = .y){
<- photon::reverse(x, y) |>
geodata select(name:country)
$tick()
pb
return(geodata)
}|>
) mutate(
id = row_number(),
street = ifelse(is.na(street), name, street)
|>
) right_join(data, by = c("id"))
remove(pb)
1.2 CSV/RDA speichern.
# data
write_csv2(
x = data,
file = here::here("output/regensburg_data.csv")
)
save(
list = c("data"),
file = here::here("data/regensburg_data.rda")
)