An overview of R libraries to query Wikidata (27.1.2019)
https://www.lehir.net/how-to-query-wikidata-in-r/
=> https://www.lehir.net/how-to-query-wikidata-in-r/#summary
> WikidataR
https://github.com/Ironholds/WikidataR
https://cran.r-project.org/web/packages/WikidataR/index.html
https://cran.r-project.org/web/packages/WikidataR/WikidataR.pdf
> WikidataQueryServiceR
https://github.com/bearloga/WikidataQueryServiceR
https://cran.r-project.org/web/packages/WikidataQueryServiceR/index.html
https://cran.r-project.org/web/packages/WikidataQueryServiceR/WikidataQueryServiceR.pdf
> SPARQL
https://cran.r-project.org/web/packages/SPARQL/index.html
https://cran.r-project.org/web/packages/SPARQL/SPARQL.pdf
# Import libraries
library(WikidataQueryServiceR) ## This is an R wrapper for the Wikidata Query Service (WDQS) which provides a way for tools toquery Wikidata via SPARQL.
library(SPARQL) ## Load SPARQL SELECT query result tables as a data frame, or UPDATE the triple store by connecting to an end-point over HTTP.
library(tidyverse) ## # collection of R packages designed for data science
library(sf) ## GIS vector library
library(tmap) ## This package offers a flexible, layer-based, and easy to use approach to create thematic maps
library(stringr) ## The stringr package provide a cohesive set of functions designed to make working with strings as easy as possible
library(DT) ## Data objects in R can be rendered as HTML tables using the JavaScript library 'DataTables'
# Number formatting
options(scipen = 1000000)
options(digits = 6)
## WikidataQueryServiceR
start.time <- Sys.time()
museum1_df <- WikidataQueryServiceR::query_wikidata('SELECT DISTINCT ?item ?name ?coord ?lat ?lon
WHERE
{
hint:Query hint:optimizer "None" .
?item wdt:P131* wd:Q11943 .
?item wdt:P31/wdt:P279* wd:Q33506 .
?item wdt:P625 ?coord .
?item p:P625 ?coordinate .
?coordinate psv:P625 ?coordinate_node .
?coordinate_node wikibase:geoLatitude ?lat .
?coordinate_node wikibase:geoLongitude ?lon .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "de" .
?item rdfs:label ?name
}
}
ORDER BY ASC (?name)')
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
## Time difference of 2.16801 secs
DT::datatable(museum1_df)
## SPARQL
start.time <- Sys.time()
endpoint <- "https://query.wikidata.org/sparql"
query <- 'SELECT DISTINCT ?item ?name ?coord ?lat ?lon
WHERE
{
hint:Query hint:optimizer "None" .
?item wdt:P131* wd:Q11943 .
?item wdt:P31/wdt:P279* wd:Q33506 .
?item wdt:P625 ?coord .
?item p:P625 ?coordinate .
?coordinate psv:P625 ?coordinate_node .
?coordinate_node wikibase:geoLatitude ?lat .
?coordinate_node wikibase:geoLongitude ?lon .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "de" .
?item rdfs:label ?name
}
}
ORDER BY ASC (?name) '
museum2 <- SPARQL::SPARQL(endpoint,query,curl_args=list(useragent=R.version.string))
museum2_df <- museum2$results
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
## Time difference of 2.48332 secs
DT::datatable(museum2_df)
http://yasgui.org/short/fg7fNak6G
## SPARQL
endpoint <- "https://ld.geo.admin.ch/query"
query <- 'PREFIX schema: <http://schema.org/>
PREFIX gn: <http://www.geonames.org/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX st: <https://ld.geo.admin.ch/def/>
select ?Municipality ?Name ?Population ?bfs ?WKT
where{
?Municipality gn:featureCode gn:A.ADM3 .
?Municipality schema:name ?Name .
?Municipality gn:population?Population .
?Municipality st:bfsNumber ?bfs .
?Municipality dct:issued ?Date .
?Municipality gn:parentADM1 ?InCanton .
?InCanton schema:name ?CantonName .
?Municipality geo:hasGeometry ?Geometry .
?Geometry geo:asWKT ?WKT .
FILTER (?Date = "2019-01-01"^^xsd:date)
FILTER (?CantonName = "Zürich")
}'
municipality <- SPARQL::SPARQL(endpoint,query)
municipality_df <- municipality$results
DT::datatable(municipality_df)
## museum
museum_sf <- sf::st_as_sf(x = museum2_df, coords = c("lon", "lat"), crs= 4326) %>% st_transform(2056)
museum_sf
## Simple feature collection with 102 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 2670720 ymin: 1232780 xmax: 2708900 ymax: 1269530
## CRS: EPSG:2056
## First 10 features:
## item
## 1 <http://www.wikidata.org/entity/Q27490199>
## 2 <http://www.wikidata.org/entity/Q820463>
## 3 <http://www.wikidata.org/entity/Q820482>
## 4 <http://www.wikidata.org/entity/Q686324>
## 5 <http://www.wikidata.org/entity/Q18018874>
## 6 <http://www.wikidata.org/entity/Q14553724>
## 7 <http://www.wikidata.org/entity/Q1158760>
## 8 <http://www.wikidata.org/entity/Q85986333>
## 9 <http://www.wikidata.org/entity/Q22984572>
## 10 <http://www.wikidata.org/entity/Q1408868>
## name
## 1 "Archäologische Sammlung der Universität Zürich"@de
## 2 "Bergwerk Käpfnach"@de
## 3 "Bergwerk Riedhof"@de
## 4 "Botanischer Garten Zürich"@de
## 5 "Bruno Weber Park"@de
## 6 "Burg Maur"@de
## 7 "Dampfbahn-Verein Zürcher Oberland"@de
## 8 "FCZ-Museum"@de
## 9 "FIFA World Football Museum"@de
## 10 "Festung Ebersberg"@de
## coord
## 1 "Point(8.548931 47.375186)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 2 "Point(8.613055555 47.253055555)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 3 "Point(8.480582 47.287284)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 4 "Point(8.561105555 47.358647222)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 5 "Point(8.52003 47.3132)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 6 "Point(8.668592 47.338512)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 7 "Point(8.8805 47.368561)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 8 "Point(8.530333333 47.372627777)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 9 "Point(8.531749 47.363509)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 10 "Point(8.57978 47.5709)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## geometry
## 1 POINT (2683853 1247743)
## 2 POINT (2688898 1234236)
## 3 POINT (2678821 1237900)
## 4 POINT (2684798 1245917)
## 5 POINT (2681765 1240821)
## 6 POINT (2692953 1243801)
## 7 POINT (2708904 1247414)
## 8 POINT (2682452 1247439)
## 9 POINT (2682573 1246426)
## 10 POINT (2685866 1269534)
## municipality
municipality_df$WKT[1]
## [1] "\"POLYGON((8.461406460982 47.515271792943,8.4587031704404 47.518308822118,8.4530176224062 47.519555030139,8.4521752924411 47.525516809131,8.4431226290276 47.533447882693,8.4452505344461 47.535003457824,8.439923809883 47.536371802718,8.4404225106229 47.5381663938,8.4460904740775 47.53867491537,8.4455015797037 47.541243467794,8.449965583293 47.543381267141,8.4469886962367 47.54471140855,8.4508700923011 47.544839473944,8.4525939373948 47.548853938257,8.4555729257398 47.549016723079,8.45609516275 47.558249468141,8.4604995313228 47.555809202919,8.471156074218 47.559933184428,8.4727738405438 47.556347952441,8.4907800077793 47.550420418974,8.5001958399639 47.542923037684,8.4951343628205 47.540900540653,8.4970830754402 47.535369074612,8.4906797209677 47.530597293835,8.4907713759442 47.528518707439,8.4887387761008 47.523104401217,8.4670326324631 47.521284341212,8.4672876350712 47.519366227264,8.461406460982 47.515271792943))\"^^<http://www.openlinksw.com/schemas/virtrdf#Geometry>"
municipality_df$WKT_corrected <-str_match(municipality_df$WKT, '\"(.*?)\"')[,2]
municipality_df$WKT_corrected[1]
## [1] "POLYGON((8.461406460982 47.515271792943,8.4587031704404 47.518308822118,8.4530176224062 47.519555030139,8.4521752924411 47.525516809131,8.4431226290276 47.533447882693,8.4452505344461 47.535003457824,8.439923809883 47.536371802718,8.4404225106229 47.5381663938,8.4460904740775 47.53867491537,8.4455015797037 47.541243467794,8.449965583293 47.543381267141,8.4469886962367 47.54471140855,8.4508700923011 47.544839473944,8.4525939373948 47.548853938257,8.4555729257398 47.549016723079,8.45609516275 47.558249468141,8.4604995313228 47.555809202919,8.471156074218 47.559933184428,8.4727738405438 47.556347952441,8.4907800077793 47.550420418974,8.5001958399639 47.542923037684,8.4951343628205 47.540900540653,8.4970830754402 47.535369074612,8.4906797209677 47.530597293835,8.4907713759442 47.528518707439,8.4887387761008 47.523104401217,8.4670326324631 47.521284341212,8.4672876350712 47.519366227264,8.461406460982 47.515271792943))"
municipality_sfc = st_as_sfc(municipality_df$WKT_corrected, crs=4326)
municipality_sf = st_sf(municipality_df %>% select(-WKT, -WKT_corrected), geometry =municipality_sfc) %>% st_transform(2056)
municipality_sf
## Simple feature collection with 162 features and 4 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 2669260 ymin: 1223900 xmax: 2716910 ymax: 1283360
## CRS: EPSG:2056
## First 10 features:
## Municipality Name
## 1 <https://ld.geo.admin.ch/boundaries/municipality/100:2019> Stadel
## 2 <https://ld.geo.admin.ch/boundaries/municipality/101:2019> Steinmaur
## 3 <https://ld.geo.admin.ch/boundaries/municipality/102:2019> Weiach
## 4 <https://ld.geo.admin.ch/boundaries/municipality/10:2019> Obfelden
## 5 <https://ld.geo.admin.ch/boundaries/municipality/111:2019> Bäretswil
## 6 <https://ld.geo.admin.ch/boundaries/municipality/112:2019> Bubikon
## 7 <https://ld.geo.admin.ch/boundaries/municipality/113:2019> Dürnten
## 8 <https://ld.geo.admin.ch/boundaries/municipality/114:2019> Fischenthal
## 9 <https://ld.geo.admin.ch/boundaries/municipality/115:2019> Gossau (ZH)
## 10 <https://ld.geo.admin.ch/boundaries/municipality/116:2019> Grüningen
## Population bfs geometry
## 1 2280 100 POLYGON ((2677040 1263227, ...
## 2 3482 101 POLYGON ((2678167 1260574, ...
## 3 1756 102 POLYGON ((2673733 1268649, ...
## 4 5356 10 POLYGON ((2673859 1233032, ...
## 5 5038 111 POLYGON ((2706232 1242705, ...
## 6 7200 112 POLYGON ((2703756 1233911, ...
## 7 7570 113 POLYGON ((2709905 1236485, ...
## 8 2512 114 POLYGON ((2711223 1241032, ...
## 9 9968 115 POLYGON ((2700164 1242624, ...
## 10 3382 116 POLYGON ((2698987 1236040, ...
# Plot result: R base plot
plot(st_geometry(municipality_sf))
plot(st_geometry(museum_sf), pch = 19, col="blue", cex = 0.5, add = TRUE)
plot(st_geometry(municipality_sf), add = TRUE)
legend(x=2708000,y=1287500,
c("Museum","Muncipality"),
lty=c(NA,1),
pch=c(19,NA),
cex=.8,
col=c("blue","black"),
bty='n'
)
# Spatial Join: instead of joining dataframes via an equal ID we join data- frames based on an equal location.
spjoin_sf <- sf::st_join(museum_sf, municipality_sf)
spjoin_sf
## Simple feature collection with 102 features and 7 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 2670720 ymin: 1232780 xmax: 2708900 ymax: 1269530
## CRS: EPSG:2056
## First 10 features:
## item
## 1 <http://www.wikidata.org/entity/Q27490199>
## 2 <http://www.wikidata.org/entity/Q820463>
## 3 <http://www.wikidata.org/entity/Q820482>
## 4 <http://www.wikidata.org/entity/Q686324>
## 5 <http://www.wikidata.org/entity/Q18018874>
## 6 <http://www.wikidata.org/entity/Q14553724>
## 7 <http://www.wikidata.org/entity/Q1158760>
## 8 <http://www.wikidata.org/entity/Q85986333>
## 9 <http://www.wikidata.org/entity/Q22984572>
## 10 <http://www.wikidata.org/entity/Q1408868>
## name
## 1 "Archäologische Sammlung der Universität Zürich"@de
## 2 "Bergwerk Käpfnach"@de
## 3 "Bergwerk Riedhof"@de
## 4 "Botanischer Garten Zürich"@de
## 5 "Bruno Weber Park"@de
## 6 "Burg Maur"@de
## 7 "Dampfbahn-Verein Zürcher Oberland"@de
## 8 "FCZ-Museum"@de
## 9 "FIFA World Football Museum"@de
## 10 "Festung Ebersberg"@de
## coord
## 1 "Point(8.548931 47.375186)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 2 "Point(8.613055555 47.253055555)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 3 "Point(8.480582 47.287284)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 4 "Point(8.561105555 47.358647222)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 5 "Point(8.52003 47.3132)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 6 "Point(8.668592 47.338512)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 7 "Point(8.8805 47.368561)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 8 "Point(8.530333333 47.372627777)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 9 "Point(8.531749 47.363509)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## 10 "Point(8.57978 47.5709)"^^<http://www.opengis.net/ont/geosparql#wktLiteral>
## Municipality Name
## 1 <https://ld.geo.admin.ch/boundaries/municipality/261:2019> Zürich
## 2 <https://ld.geo.admin.ch/boundaries/municipality/295:2019> Horgen
## 3 <https://ld.geo.admin.ch/boundaries/municipality/1:2019> Aeugst am Albis
## 4 <https://ld.geo.admin.ch/boundaries/municipality/261:2019> Zürich
## 5 <https://ld.geo.admin.ch/boundaries/municipality/131:2019> Adliswil
## 6 <https://ld.geo.admin.ch/boundaries/municipality/195:2019> Maur
## 7 <https://ld.geo.admin.ch/boundaries/municipality/297:2019> Bauma
## 8 <https://ld.geo.admin.ch/boundaries/municipality/261:2019> Zürich
## 9 <https://ld.geo.admin.ch/boundaries/municipality/261:2019> Zürich
## 10 <https://ld.geo.admin.ch/boundaries/municipality/23:2019> Berg am Irchel
## Population bfs geometry
## 1 409241 261 POINT (2683853 1247743)
## 2 22514 295 POINT (2688898 1234236)
## 3 1941 1 POINT (2678821 1237900)
## 4 409241 261 POINT (2684798 1245917)
## 5 18803 131 POINT (2681765 1240821)
## 6 10170 195 POINT (2692953 1243801)
## 7 4939 297 POINT (2708904 1247414)
## 8 409241 261 POINT (2682452 1247439)
## 9 409241 261 POINT (2682573 1246426)
## 10 564 23 POINT (2685866 1269534)
# Density calculation
# > 1. Count points per polygon
pts_count <- spjoin_sf %>%
dplyr::group_by(bfs) %>%
dplyr::summarise(count=n())
municipality_sf <- municipality_sf %>%
dplyr::left_join(pts_count %>% st_set_geometry(NULL) , by = c("bfs" ))
# > 2. Calculate area of polygon
municipality_sf <- municipality_sf %>%
dplyr::mutate(mun_area_m2 =as.vector(sf::st_area(.)))
# > 3. Calculate density: count/area
municipality_sf$density <- municipality_sf$count / municipality_sf$mun_area_m2 * 1000000
# Plot result: tmap
# > tmap static
tmap::tm_shape(municipality_sf) +
tmap::tm_fill("density",
title="Number of Museums per km2 \n(Classification Method: Quantile)",
style="quantile",
n = 4, # preferred number of classes
palette="YlGnBu",
colorNA = "grey90",
textNA = "No Museum",
legend.hist = TRUE,
) +
tmap::tm_borders() +
tmap::tm_legend(outside = TRUE, hist.width =3) +
tmap::tm_layout(main.title = "Density of Museum",
frame = FALSE,
legend.position = c("right", "top"),
legend.outside = TRUE)
# Export Data as shp
st_write(museum_sf, "./museum.shp", delete_layer = TRUE)
## Deleting layer `museum' using driver `ESRI Shapefile'
## Writing layer `museum' to data source `./museum.shp' using driver `ESRI Shapefile'
## Writing 102 features with 3 fields and geometry type Point.
st_write(municipality_sf, "./municipality.shp", delete_layer = TRUE)
## Deleting layer `municipality' using driver `ESRI Shapefile'
## Writing layer `municipality' to data source `./municipality.shp' using driver `ESRI Shapefile'
## Writing 162 features with 7 fields and geometry type Unknown (any).
Thank you @csarasuagar! =D
# municipality
# https://w.wiki/BA8
start.time <- Sys.time()
endpoint <- "https://query.wikidata.org/sparql"
query <- 'SELECT *
{
?ch wdt:P31 wd:Q70208 .
OPTIONAL {?ch wdt:P17 wd:Q39.}
?ch ?prop ?statement .
?statement prov:wasDerivedFrom ?refnode.
?refnode pr:P854 ?ref. #pr:P248 #pr:P854
FILTER (CONTAINS(str(?ref),"statistik.zh.ch"))
}
order by ?ch
'
source_municipality <- SPARQL::SPARQL(endpoint,query,curl_args=list(useragent=R.version.string))
source_municipality_df <-source_municipality$results
DT::datatable(source_municipality_df )
# city
# https://w.wiki/BA7
start.time <- Sys.time()
endpoint <- "https://query.wikidata.org/sparql"
query <- 'SELECT *
WHERE
{
?ch wdt:P31 wd:Q54935504 .
OPTIONAL {?ch wdt:P17 wd:Q39.}
?ch ?prop ?statement .
?statement prov:wasDerivedFrom ?refnode.
?refnode pr:P854 ?ref. #pr:P248 #pr:P854
FILTER (CONTAINS(str(?ref),"statistik.zh.ch"))
}
order by ?ch
'
source_city <- SPARQL::SPARQL(endpoint,query,curl_args=list(useragent=R.version.string))
source_city_df <-source_city$results
DT::datatable(source_city_df )