The existence of abundant meteorological data from logbooks of different European countries constitutes a common and invaluable heritage of the most outstanding scientific interest. The analysis of the logbooks content will contribute to characterise climate during XVIII th and XIX th centuries and to asses climate change.
http://www.ucm.es/info/cliwoc/
http://www.ucm.es/info/cliwoc/cliwoc15.htm
1790 424 0106 10 1446ES 99 0 MNM;Archivo Museo Naval Madrid;Madrid;Spain;;;;;MS. 204;Spanish;;;EL FERROL;RIO DE LA PLATA;FRAGATA;;;GREGORIO DE JOSE Y LLAÑOS;ALFEREZ DE FRAGATA;;;;;Unknown;MEDICION Y DATOS DEL MEDIODIA MENOS DESCRIPTORES QUE SUELEN SER DEL ANOCHECER;Diario de navegación desde El Ferrol al Río de la Plata del alférez de fragata don Gregorio de José y Llaños hecho en la fragata Santa Perpetua del mando del cap. José Postillo desde el 1-2-1774. SIGUEN LA CARTA HOLANDESA;Unknown;Unknown;DESCONOCIDO;;;360 degrees;17740201;;;;;;;;48.00;;;;0;;;;;;;;;;;;;SSE;SSE;CALMOSO;;CALMOSO;;HORIZONTES CLAROS;;;;;;0;0;0;0;0;0;0;ALGO PICADA DEL VIENTO;;;;;;;;;;;0;;0;;0;;0;;0;;0;0;;;0
1790 7 31700-1518 28258 0106 10 926ES 1225123 99 0 MNM;Archivo Museo Naval Madrid;Madrid;Spain;;;;;MS. 204;Spanish;;;EL FERROL;RIO DE LA PLATA;FRAGATA;;;GREGORIO DE JOSE Y LLAÑOS;ALFEREZ DE FRAGATA;;;;;Unknown;MEDICION Y DATOS DEL MEDIODIA MENOS DESCRIPTORES QUE SUELEN SER DEL ANOCHECER;Diario de navegación desde El Ferrol al Río de la Plata del alférez de fragata don Gregorio de José y Llaños hecho en la fragata Santa Perpetua del mando del cap. José Postillo desde el 1-2-1774. SIGUEN LA CARTA HOLANDESA;Unknown;Unknown;DESCONOCIDO;;;360 degrees;17740201;;;;;;;;;;;;0;;;;;;;;;;;;;NO1/4N;NO 1/4 N;BONANCIBLE;;BONANCIBLE;;HORIZONTES CARGADOS. POR LA NOCHE EMPEZO A RELAMPAGUEAR MUY A MENUDO Y CON MUCHA VIVEZA. A LAS 3:00 ENTRO UNA NUBE MUY HORRIBLE Y NEGRA POR EL SO Y LO;;;;;;0;0;0;0;0;0;0;LLANA;;;;;;;;;;;0;;0;;0;;0;;0;;0;0;;;0
substring()
read.fwf()
http://www.ucm.es/info/cliwoc/content/CLIWOC15all.htm
library(XML)
res = readHTMLTable("http://www.ucm.es/info/cliwoc/content/CLIWOC15all.htm",
stringsAsFactors = FALSE)
head(res[[1]][, -5])
## V1 V2 V3 V4
## 1 Variable Start End Format
## 2 YR 1 4 A4
## 3 MO 5 6 A2
## 4 DY 7 8 A2
## 5 HR 9 12 A4
## 6 LAT 13 17 A5
res[[1]] = res[[1]][-1, ] # remove header
# start and stop positions are in the
# 1st table
s1 = as.integer(res[[1]][, 2])
s2 = as.integer(res[[1]][, 3])
# field widths
s2 - s1 + 1
## [1] 4 2 2 4 5 6 2 1 1 1 1 1 2 2 9 2 1 3
## [19] 1 3 1 2 2 1 5 1 3 1 4 1 4 1 4 2 4 1
## [37] 1 1 1 1 1 1 2 2 2 2 2 2 2 2 1
# all variable names
nms = tolower(c(res[[1]][, 1],
res[[2]][-1, 1]))
head(nms, 30)
## [1] "yr" "mo" "dy" "hr" "lat"
## [6] "lon" "im" "attc" "ti" "li"
## [11] "ds" "vs" "nid" "ii" "id"
## [16] "c1" "di" "d" "wi" "w"
## [21] "vi" "vv" "ww" "w1" "slp"
## [26] "a" "ppp" "it" "at" "wbti"
# save to fix2pip.awk
BEGIN {
FIELDWIDTHS = "4 2 2 4 5 6 2 1 1 1 1 1" \
" 2 2 9 2 1 3 1 3 1 2 2 1 5 1 3 1 4" \
" 1 4 1 4 2 4 1 1 1 1 1 1 1 2 2 2 2" \
" 2 2 2 2 1 10000"
}
{
for (i=1; i <= NF; i++) {
if (i < NF) {
sub(/^[ ]+/, "", $i)
sub(/[ ]+$/, "", $i)
printf "%s|", $i
} else {
gsub(";", "|", $i)
printf "%s\n", $i
}
}
}
http://www.pement.org/awk/awk1line.txt
awk NF
mean?gawk -f fix2pipe.awk CLIWOC15 > CLIWOC15pipe
But text data is really awful! (lack of rigorous field separators)
df = read.table("CLIWOC15pipe",
sep = "|", fill = TRUE)
exported to CSV using mdbtools
wget http://www.knmi.nl/cliwoc/download/CLIWOC15_2000.zip
unzip CLIWOC15_2000.zip
mdb-export CLIWOC15_2000.mdb CLIWOC15 > CLIWOC15.csv
bzip2 CLIWOC15.csv
Now we get a compressed CSV file CLIWOC15.csv.bz2
(one copy at http://xie.public.iastate.edu/CLIWOC15full.csv.bz2
)
system.time(df <- read.csv("CLIWOC15.csv.bz2"))
## user system elapsed
## 49.979 0.756 50.888
print(object.size(df), unit = "Mb")
## 190.1 Mb
summary(df$Lat3)
## Min. 1st Qu. Median Mean 3rd Qu.
## -129 -22 7 6 34
## Max. NA's
## 80 22297
table(df$Lat3 < -90) # records lower than -90
##
## FALSE TRUE
## 257980 3
df = subset(df, Lat3 >= -90 &
Year > 1700)
library(maps)
par(mar = rep(0, 4))
map(col = "darkgray", xlim = c(-170,
170), ylim = c(-75, 80))
with(df, points(Lon3, Lat3, pch = ".",
col = rgb(0.18, 0.55, 0.34, 0.1)))
df$Date = with(df, as.Date(sprintf("%s-%s-%s",
Year, Month, Day)))
# d0 = min(df$Date, na.rm = TRUE)
d0 = as.Date("1749-01-01")
d1 = max(df$Date, na.rm = TRUE)
# d1 = d0 + 1000
library(scales)
df$Nationality = factor(gsub("^ *| *$",
"", df$Nationality))
nat = levels(df$Nationality)
cols = brewer_pal("qual")(length(nat))
colvec = alpha(dscale(df$Nationality,
brewer_pal("qual")), 0.7)
# x11(width = 10, height = 6);
# dev.control('inhibit')
n = 14 # traces of 2 weeks
par(mar = rep(0, 4))
with(df, while (d0 + n <= d1) {
dev.hold()
map(col = "darkgray", xlim = c(-170,
170), ylim = c(-75, 80))
idx = Date >= d0 & Date < d0 + n
points(Lon3[idx], Lat3[idx], pch = 20,
col = colvec[idx])
text(120, -56, d0 + n, cex = 2, col = "darkgray")
legend("bottomleft", nat, fill = cols,
bty = "n", ncol = 2, cex = 0.8, text.col = "darkgray")
dev.flush()
d0 = d0 + 1
})
The slides were made by the R package knitr and a great document converter pandoc which converted the markdown output from knitr to DZslides. Markdown source at https://yihui.org/slides/stat585x-shipping-yihui-xie.Rmd
, and you can compile it by the function knit()
in knitr.
sessionInfo()
## R version 2.14.2 (2012-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8
## [2] LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8
## [4] LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8
## [6] LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=C
## [8] LC_NAME=C
## [9] LC_ADDRESS=C
## [10] LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8
## [12] LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices
## [4] utils datasets methods
## [7] base
##
## other attached packages:
## [1] XML_3.93-0 knitr_0.4.11
##
## loaded via a namespace (and not attached):
## [1] codetools_0.2-8 digest_0.5.2
## [3] evaluate_0.4.2 formatR_0.4.1
## [5] highlight_0.3.1 parser_0.0-14
## [7] plyr_1.7.1 Rcpp_0.9.10
## [9] stringr_0.6 tools_2.14.2