-
- label_base_query = """
- SELECT DISTINCT ?label WHERE {
- wd:%s rdfs:label ?label;
- }"""
-
- altLabel_base_query = """
- SELECT DISTINCT ?label WHERE {
- wd:%s skos:altLabel ?label;
- }"""
-
- label_results = run_query_and_parse(label_base_query, entityid, is_alt=False)
-
- altLabel_results = run_query_and_parse(altLabel_base_query, entityid, is_alt=True)
-
- return chain(label_results, altLabel_results)
+ def prep_query(query, prop, entityids):
+ values = ' '.join(('wd:{0}'.format(id) for id in entityids))
+ return query.format(prop, values)
+
+ base_query = """
+ SELECT DISTINCT ?entity ?label WHERE {{
+ ?entity {0} ?label;
+ VALUES ?entity {{ {1} }}
+ }}"""
+
+ # we can't get all the entities at once. how about 100 at a time?
+ chunksize = 100
+ entityids = (id for id in entityids)
+ chunk = list(islice(entityids, chunksize))
+ calls = []
+ while len(chunk) > 0:
+ label_query = prep_query(base_query, "rdfs:label", chunk)
+ altLabel_query = prep_query(base_query, "skos:altLabel", chunk)
+ label_results = run_query_and_parse(label_query, is_alt=False)
+ altLabel_results = run_query_and_parse(altLabel_query, is_alt=True)
+ calls.extend([label_results, altLabel_results])
+ chunk = list(islice(entityids, chunksize))
+
+ return chain(*calls)