Vignette Rmd source code (Not on CRAN to reduce load on DWD server through daily new builds and checks of the vignette)
library(rdwd)
links <- selectDWD(res="daily", var="more_precip", per="hist")
length(links) # 5583 stations - would take very long to download
## [1] 4635
# select only the relevant files:
data("metaIndex")
myIndex <- metaIndex[
metaIndex$von_datum < 20140101 &
metaIndex$bis_datum > 20161231 & metaIndex$hasfile , ]
data("fileIndex")
links <- fileIndex[
suppressWarnings(as.numeric(fileIndex$id)) %in% myIndex$Stations_id &
fileIndex$res=="daily" &
fileIndex$var=="more_precip" &
fileIndex$per=="historical" , "path" ]
links <- paste0("ftp://ftp-cdc.dwd.de/pub/CDC/observations_germany/climate/", links)
length(links) # 2001 elements - much better
## [1] 2006
If some downloads fail (mostly because you’ll get kicked off the FTP server), you can just run the same code again and only the missing files will be downloaded.
If you really want to download 2k historical (large!) datasets, you definitely want to set sleep
to a much higher value.
For speed, we’ll only work with the first 3 urls.
localfiles <- dataDWD(links[1:3], sleep=0.2, read=FALSE)
2k large datasets probably is way too much for memory, so we’ll use a custom reading function. It will only select the relevant time section and rainfall column. The latter will be named with the id extracted from the filename.
readVars(localfiles[1])[,-3] # we want the RS column
## Par Kurz Einheit
## RS RS Niederschlagshoehe mm
## RSF RSF Niederschlagsform numerischer Code
## SH_TAG SH_TAG Schneehoehe cm
read2014_2016 <- function(file, fread=TRUE, ...)
{
out <- readDWD(file, fread=fread, ...)
out <- out[out$MESS_DATUM > as.POSIXct(as.Date("2014-01-01")) &
out$MESS_DATUM < as.POSIXct(as.Date("2016-12-31")) , ]
out <- out[ , c("MESS_DATUM", "RS")]
out$MESS_DATUM <- as.Date(out$MESS_DATUM) # might save some memory space...
# Station id as column name:
idstringloc <- unlist(gregexpr(pattern="tageswerte_RR_", file))
idstring <- substring(file, idstringloc+14, idstringloc+18)
colnames(out) <- c("date", idstring)
return(out)
}
str(read2014_2016(localfiles[1])) # test looks good
## 'data.frame': 1090 obs. of 2 variables:
## $ date : Date, format: "2014-01-02" "2014-01-03" ...
## $ 00006: num 1.8 0.4 2.3 0.7 0.2 0 0 8.3 0 4 ...
Now let’s apply this to all our files and merge the result.
library(pbapply) # progress bar for lapply loop
rain_list <- pblapply(localfiles, read2014_2016)
rain_df <- Reduce(function(...) merge(..., all=T), rain_list)
str(rain_df) # looks nice!
## 'data.frame': 1094 obs. of 4 variables:
## $ date : Date, format: "2014-01-02" "2014-01-03" ...
## $ 00006: num 1.8 0.4 2.3 0.7 0.2 0 0 8.3 0 4 ...
## $ 00015: num 1.2 0.2 1.5 1.5 0 0 0 5.1 0.3 0.6 ...
## $ 00019: num 3.3 0.4 2.9 0 0.2 0.1 0 6.3 0.2 3.1 ...
summary(rain_df) # 9 NAs in station 00006
## date 00006 00015 00019
## Min. :2014-01-02 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:2014-10-02 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median :2015-07-02 Median : 0.100 Median : 0.000 Median : 0.100
## Mean :2015-07-02 Mean : 2.142 Mean : 1.647 Mean : 2.152
## 3rd Qu.:2016-03-31 3rd Qu.: 1.900 3rd Qu.: 1.500 3rd Qu.: 1.900
## Max. :2016-12-30 Max. :68.400 Max. :54.000 Max. :53.600
## NA's :5
plot(rain_df$date, rain_df[,2], type="n", ylim=range(rain_df[,-1], na.rm=T),
las=1, xaxt="n", xlab="Date", ylab="Daily rainfall sum [mm]")
berryFunctions::monthAxis()
for(i in 2:ncol(rain_df)) lines(rain_df$date, rain_df[,i], col=sample(colours(), size=1))
plot(rain_df[,2:4]) # correlation plot only works for a few columns!
Let’s see the locations of our stations in an interactive map.
data(geoIndex) ; library(leaflet)
mygeoIndex <- geoIndex[geoIndex$id %in% as.numeric(colnames(rain_df)[-1]),]
leaflet(data=mygeoIndex) %>% addTiles() %>%
addCircleMarkers(~lon, ~lat, popup=~display, stroke=T)
For a static map with scaleBar, OSMscale works nicely but currently still has a Java dependency, see https://github.com/brry/OSMscale#installation
library(OSMscale)
pointsMap("lat", "lon", mygeoIndex, fx=2, fy=1, pargs=list(lwd=3),
col="blue", zoom=5)
m <- nearbyStations(49.211784, 9.812475, radius=30,
res=c("daily","hourly"), var=c("precipitation","more_precip","kl"),
mindate=20160530, statname="Braunsbach catchment center")
# Remove duplicates. if kl and more_precip are both available, keep only more_precip:
library("berryFunctions")
m <- sortDF(m, "var")
m <- m[!duplicated(paste0(m$Stations_id, m$res)),]
m <- sortDF(m, "res")
m <- sortDF(m, "dist", decreasing=FALSE)
rownames(m) <- NULL
head(m[,-14]) # don't show url column with long urls
## Stations_id von_datum bis_datum Stationshoehe geoBreite geoLaenge
## 1 NA NA NA NA 49.21178 9.812475
## 2 2848 19310101 20190309 458 49.25750 9.859400
## 3 2787 20050101 20190309 354 49.24280 9.678600
## 4 2787 19410101 20190309 354 49.24280 9.678600
## 5 5206 19510101 20161109 396 49.11750 9.897200
## 6 2575 20060901 20190309 426 49.18040 9.980000
## Stationsname Bundesland res var
## 1 Braunsbach catchment center <NA> z <NA>
## 2 Langenburg-Atzenrod Baden-Wuerttemberg daily more_precip
## 3 Kupferzell-Rechbach Baden-Wuerttemberg hourly precipitation
## 4 Kupferzell-Rechbach Baden-Wuerttemberg daily more_precip
## 5 Vellberg-Kleinaltdorf Baden-Wuerttemberg daily more_precip
## 6 Kirchberg/Jagst-Herboldshausen Baden-Wuerttemberg hourly precipitation
## per hasfile dist
## 1 <NA> NA 0.00000
## 2 historical TRUE 6.11954
## 3 historical TRUE 10.31522
## 4 historical TRUE 10.31522
## 5 historical TRUE 12.15981
## 6 historical TRUE 12.66318
Interactive map of just the meteo station locations:
library(leaflet)
m$col <- "red" ; m$col[1] <- "blue"
leaflet(m) %>% addTiles() %>%
addCircles(lng=9.812475, lat=49.211784, radius=30e3) %>%
addCircleMarkers(~geoLaenge, ~geoBreite, col=~col, popup=~Stationsname)
Download and process data for the stations, get the rainfall sums of a particular day (Braunsbach flood May 2016):
prec <- dataDWD(m$url)
names(prec) <- m$Stations_id[-1]
prec29 <- sapply(prec[m$res[-1]=="daily"], function(x)
{
col <- "RS"
if(!col %in% colnames(x)) col <- "R1"
if(!col %in% colnames(x)) col <- "RSK"
x[x$MESS_DATUM==as.POSIXct(as.Date("2016-05-29")), col]
})
prec29 <- data.frame(Stations_id=names(prec29), precsum=unname(prec29))
prec29 <- merge(prec29, m[m$res=="daily",c(1,4:7,14)], sort=FALSE)
head(prec29[,-7]) # don't show url column with long urls
## Stations_id precsum Stationshoehe geoBreite geoLaenge
## 1 2848 105.0 458 49.2575 9.8594
## 2 2787 72.0 354 49.2428 9.6786
## 3 5206 82.2 396 49.1175 9.8972
## 4 2575 94.0 426 49.1804 9.9800
## 5 3416 82.5 294 49.3423 9.8073
## 6 6260 76.6 385 49.3328 9.7040
## Stationsname
## 1 Langenburg-Atzenrod
## 2 Kupferzell-Rechbach
## 3 Vellberg-Kleinaltdorf
## 4 Kirchberg/Jagst-Herboldshausen
## 5 Mulfingen/Jagst
## 6 Ingelfingen-Stachenhausen
For a quick look without a map, this works:
plot(geoBreite~geoLaenge, data=m, asp=1)
textField(prec29$geoLaenge, prec29$geoBreite, prec29$precsum, col=2)
But it’s nicer to have an actual map. If OSMscale installation fails, go to https://github.com/brry/OSMscale#installation
library(OSMscale)
map <- pointsMap(geoBreite,geoLaenge, data=m, type="osm", plot=FALSE)
pp <- projectPoints("geoBreite", "geoLaenge", data=prec29, to=map$tiles[[1]]$projection)
prec29 <- cbind(prec29,pp) ; rm(pp)
plot(map)
scaleBar(map, cex=1.5, type="line", y=0.82)
title(main="Rainfall sum 2016-05-29 7AM-7AM [mm]", line=-1)
textField(prec29$x, prec29$y, round(prec29$precsum), font=2, cex=1.5)
Any feedback on this package (or this vignette) is very welcome via github or berry-b@gmx.de!