]> code.communitydata.science - covid19.git/blob - keywords/analysis/translations_example.R
add two small shellscripts for automation
[covid19.git] / keywords / analysis / translations_example.R
1 ## example reading latest file straight from the server
2 df <- read.csv("https://covid19.communitydata.science/datasets/keywords/csv/latest.csv")
3
4 ## make the data more R-friendly
5 df$is.alt <- df$is_alt == "True"
6 df$is_alt <- NULL
7
8 ## find all translations for coronavirus
9 coronavirus.itemids <- df[ (tolower(df$label) == "coronavirus") &
10                          (df$langcode == 'en')
11                        ,"itemid"]
12
13 ## there are actually 5 item ids. The one referring to the family of virus is Q57751738
14 coronavirus.translations <- df[df$itemid == "http://www.wikidata.org/entity/Q57751738",]
15
16 ## let's only look at non-aliases
17 print(coronavirus.translations[c(coronavirus.translations$is.alt == FALSE), c("label","langcode")])

Community Data Science Collective || Want to submit a patch?