X-Git-Url: https://code.communitydata.science/covid19.git/blobdiff_plain/ff96d52cb92966dd0c487e6452aeab70773cf3e6..576d882c04b4cc784cd5a6296cb3c3e5aa596d39:/keywords/example_analysis/translations_example.py diff --git a/keywords/example_analysis/translations_example.py b/keywords/example_analysis/translations_example.py new file mode 100644 index 0000000..8df986b --- /dev/null +++ b/keywords/example_analysis/translations_example.py @@ -0,0 +1,13 @@ +import pandas as pd + +# read the latest dataset +df = pd.read_csv("https://covid19.communitydata.science/datasets/keywords/csv/latest.csv") + +# find translations of "coronavirus" +coronavirus_itemids = df.loc[df.label.str.lower() == "coronavirus"] + +# there are actually 5 item ids. The one referring to the family of virus is Q57751738 +coronavirus_translations = df.loc[df.itemid == "http://www.wikidata.org/entity/Q57751738"] + +# let's only look at unique, non-aliases +print(coronavirus_translations.loc[df.is_alt == False,['label','langcode']])