Larramendi, Azkoitiko Sermoia: Difference between revisions
(→SPARQL) |
(→Token) |
||
Line 17: | Line 17: | ||
#title: Galdetzen du tokenak non dauden wikisourcen, eta zer lema-formei lotuta dauden | #title: Galdetzen du tokenak non dauden wikisourcen, eta zer lema-formei lotuta dauden | ||
select ?token ?token_zbk ?token_forma ?mlv_lexema (iri(concat('http://www.wikidata.org/entity/',? | select ?token ?token_zbk ?token_forma ?mlv_lexema (iri(concat('http://www.wikidata.org/entity/',?sense_id)) as ?wikidata_sense) | ||
?wd_pos_label | |||
(iri(concat('https://eu.wikisource.org/wiki/',?wikisource)) as ?wikisource_paragraph) | (iri(concat('https://eu.wikisource.org/wiki/',?wikisource)) as ?wikisource_paragraph) | ||
?lemma ?sense ?forma (group_concat(?morph_label;SEPARATOR="-") as ?morph_labels) ?pos_label | ?lemma ?sense ?forma (group_concat(?morph_label;SEPARATOR="-") as ?morph_labels) ?pos_label | ||
(iri(concat('http://www.wikidata.org/entity/',?wd_erref)) as ?wd_ent_erref) | (iri(concat('http://www.wikidata.org/entity/',?wd_erref)) as ?wd_ent_erref) | ||
(concat(?wd_erref_label," (",?class_label,")") as ?wd_erref_info) | (concat(?wd_erref_label," (",?class_label,")") as ?wd_erref_info) | ||
where { | where { | ||
?token mdp:P5 mwb:Q15 ; | ?token mdp:P5 mwb:Q15 ; | ||
Line 30: | Line 31: | ||
optional { ?token mp:P7 ?lemmanode . ?lemmanode mps:P7 ?mlv_lexema. ?mlv_lexema wikibase:lemma ?lemma . | optional { ?token mp:P7 ?lemmanode . ?lemmanode mps:P7 ?mlv_lexema. ?mlv_lexema wikibase:lemma ?lemma . | ||
optional {?mlv_lexema mdp:P1 ?wd_qid .} | optional {?mlv_lexema mdp:P1 ?wd_qid .} | ||
optional {?lemmanode mpq:P155 ?sense_id. ?sense_id skos:definition ?sense .} | optional {?lemmanode mpq:P155 ?sense_id. ?sense_id skos:definition ?sense; mp:P1 [mpq:P153 ?wd_pos]. ?wd_pos rdfs:label ?wd_pos_label. filter(lang(?wd_pos_label) = "eu")} | ||
optional {?lemmanode mpq:P156 ?form_id. ?form_id ontolex:representation ?forma . | optional {?lemmanode mpq:P156 ?form_id. ?form_id ontolex:representation ?forma . | ||
optional {?form_id mdp:P172 ?morph. ?morph rdfs:label ?morph_label. filter(lang(?morph_label) = "eu")} | optional {?form_id mdp:P172 ?morph. ?morph rdfs:label ?morph_label. filter(lang(?morph_label) = "eu")} | ||
Line 41: | Line 42: | ||
select ?item ?wd_erref_label (sample(?class_l) as ?class_label) | select ?item ?wd_erref_label (sample(?class_l) as ?class_label) | ||
where {?item rdfs:label ?wd_erref_label. filter(lang(?wd_erref_label) = "eu") | where {?item rdfs:label ?wd_erref_label. filter(lang(?wd_erref_label) = "eu") | ||
?item wdt:P31/rdfs:label ?class_l. filter(lang(?class_l) = "eu")} | ?item wdt:P31/rdfs:label|wdt:P279/rdfs:label ?class_l. filter(lang(?class_l) = "eu")} | ||
group by ?item ?wd_erref_label ?class_label | group by ?item ?wd_erref_label ?class_label | ||
} | } | ||
} | } | ||
} group by ?token ?token_zbk ?token_forma ?mlv_lexema ? | } group by ?token ?token_zbk ?token_forma ?mlv_lexema ?sense_id ?wd_pos_label ?wikisource ?lemma ?sense ?forma ?morph_labels ?pos_label ?wd_erref ?wd_erref_label ?class_label | ||
order by xsd:integer(?token_zbk) | order by xsd:integer(?token_zbk) | ||
</sparql> | </sparql> |
Revision as of 23:28, 16 November 2023
Testu historikoen edukiak errepresentatzeko eta anotazioez aberasteko datu-eredu garatzeko asmotan, Larramendiren Azkoitiko Sermoia hartu dugu adibide. Wikitekan (euskarazko Wikisourcen), eskuizkribua eta transkribapena ditugu, eta hemen, MLV Wikibase honetan, transkribaketaren tokenak (hau da, hitzak eta interpuntzio ikurrak segmentu banatan jasotzen duen zatiketa, modu bertikalean errepresentatu daitekeena, aspalditik usadioa den legez (ikus, adibidez, CONLL formatua). Galdeketak bistarazten duen taularen atzetik, Datu Lotuak daude, hau da, hirukote semantikoak. Corpus datuak Datu Lotu gisan jasotzeko proposatzen dugun eredu honetan, Linguistik Linked Data arloko azkenengo proposamenak hartzen ditugu aintzat (ikus Stanković et al. 2023).
SPARQL
Token
Erabili galdeketa hau Azkoitiko Sermoiaren tokenak eta anotazioak ikusteko.
PREFIX mwb: <https://monumenta.wikibase.cloud/entity/>
PREFIX mdp: <https://monumenta.wikibase.cloud/prop/direct/>
PREFIX mp: <https://monumenta.wikibase.cloud/prop/>
PREFIX mps: <https://monumenta.wikibase.cloud/prop/statement/>
PREFIX mpq: <https://monumenta.wikibase.cloud/prop/qualifier/>
PREFIX mpr: <https://monumenta.wikibase.cloud/prop/reference/>
PREFIX mno: <https://monumenta.wikibase.cloud/prop/novalue/>
#title: Galdetzen du tokenak non dauden wikisourcen, eta zer lema-formei lotuta dauden
select ?token ?token_zbk ?token_forma ?mlv_lexema (iri(concat('http://www.wikidata.org/entity/',?sense_id)) as ?wikidata_sense)
?wd_pos_label
(iri(concat('https://eu.wikisource.org/wiki/',?wikisource)) as ?wikisource_paragraph)
?lemma ?sense ?forma (group_concat(?morph_label;SEPARATOR="-") as ?morph_labels) ?pos_label
(iri(concat('http://www.wikidata.org/entity/',?wd_erref)) as ?wd_ent_erref)
(concat(?wd_erref_label," (",?class_label,")") as ?wd_erref_info)
where {
?token mdp:P5 mwb:Q15 ;
mdp:P148 ?token_zbk ;
mdp:P147 ?token_forma ;
mdp:P177 ?wikisource ;
optional { ?token mp:P7 ?lemmanode . ?lemmanode mps:P7 ?mlv_lexema. ?mlv_lexema wikibase:lemma ?lemma .
optional {?mlv_lexema mdp:P1 ?wd_qid .}
optional {?lemmanode mpq:P155 ?sense_id. ?sense_id skos:definition ?sense; mp:P1 [mpq:P153 ?wd_pos]. ?wd_pos rdfs:label ?wd_pos_label. filter(lang(?wd_pos_label) = "eu")}
optional {?lemmanode mpq:P156 ?form_id. ?form_id ontolex:representation ?forma .
optional {?form_id mdp:P172 ?morph. ?morph rdfs:label ?morph_label. filter(lang(?morph_label) = "eu")}
optional {?form_id mdp:P173 ?pos. ?pos rdfs:label ?pos_label. filter(lang(?pos_label) = "eu")}
}
}
optional { ?token mdp:P178 ?wd_erref .
bind(iri(concat(str(wd:),?wd_erref)) as ?item)
SERVICE <https://query.wikidata.org/sparql> {
select ?item ?wd_erref_label (sample(?class_l) as ?class_label)
where {?item rdfs:label ?wd_erref_label. filter(lang(?wd_erref_label) = "eu")
?item wdt:P31/rdfs:label|wdt:P279/rdfs:label ?class_l. filter(lang(?class_l) = "eu")}
group by ?item ?wd_erref_label ?class_label
}
}
} group by ?token ?token_zbk ?token_forma ?mlv_lexema ?sense_id ?wd_pos_label ?wikisource ?lemma ?sense ?forma ?morph_labels ?pos_label ?wd_erref ?wd_erref_label ?class_label
order by xsd:integer(?token_zbk)
Span
Erabili galdeketa hau Azkoitiko Sermoiaren spanak (anotazioa partekatzen duten token-multzoak) ikusteko.
PREFIX mwb: <https://monumenta.wikibase.cloud/entity/>
PREFIX mdp: <https://monumenta.wikibase.cloud/prop/direct/>
PREFIX mp: <https://monumenta.wikibase.cloud/prop/>
PREFIX mps: <https://monumenta.wikibase.cloud/prop/statement/>
PREFIX mpq: <https://monumenta.wikibase.cloud/prop/qualifier/>
PREFIX mpr: <https://monumenta.wikibase.cloud/prop/reference/>
PREFIX mno: <https://monumenta.wikibase.cloud/prop/novalue/>
#title: Spanak zerrendatzen ditu, zer token hartzen dituzten barne, eta zer anotazio duten
select
?span
(group_concat(strafter(str(?token),str(mwb:))) as ?span_tokenak) (group_concat(?num_forma) as ?span_forma)
(iri(concat('https://eu.wikisource.org/wiki/',sample(?wikisource))) as ?wikisource_paragraph)
(iri(concat('http://www.wikidata.org/entity/',?wd_erref)) as ?wd_ent_erref)
(concat(?wd_erref_label," (",?class_label,")") as ?wd_erref_info)
where {
?span mdp:P5 mwb:Q20;
mp:P174 [mps:P174 ?token; mpq:P32 ?ord] .
?token mdp:P147 ?token_forma . bind (concat(?ord,":",?token_forma) as ?num_forma)
?token mdp:P177 ?wikisource .
optional{?span mdp:P178 ?wd_erref .
bind(iri(concat(str(wd:),?wd_erref)) as ?item)
SERVICE <https://query.wikidata.org/sparql> {
select ?item ?wd_erref_label (sample(?class_l) as ?class_label)
where {?item rdfs:label ?wd_erref_label. filter(lang(?wd_erref_label) = "eu")
?item wdt:P31/rdfs:label ?class_l. filter(lang(?class_l) = "eu")}
group by ?item ?wd_erref_label ?class_label
}
}
} group by ?span ?span_tokenak ?span_forma ?wd_erref ?wd_erref_label ?class_label