@inproceedings{ogrodniczuk-etal-2024-polish-discourse,
title = "{P}olish Discourse Corpus ({PDC}): Corpus Design, {ISO}-Compliant Annotation, Data Highlights, and Parser Development",
author = "Ogrodniczuk, Maciej and
Tomaszewska, Aleksandra and
Ziembicki, Daniel and
{\.Z}urowski, Sebastian and
Tuora, Ryszard and
Zwierzchowska, Aleksandra",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1123",
pages = "12829--12835",
abstract = "This paper presents the Polish Discourse Corpus, a pioneering resource of this kind for Polish and the first corpus in Poland to employ the ISO standard for discourse relation annotation. The Polish Discourse Corpus adopts ISO 24617-8, a segment of the Language Resource Management {--} Semantic Annotation Framework (SemAF), which outlines a set of core discourse relations adaptable for diverse languages and genres. The paper overviews the corpus architecture, annotation procedures, the challenges that the annotators have encountered, as well as key statistical data concerning discourse relations and connectives in the corpus. It further discusses the initial phases of the discourse parser tailored for the ISO 24617-8 framework. Evaluations on the efficacy and potential refinement areas of the corpus annotation and parsing strategies are also presented. The final part of the paper touches upon anticipated research plans to improve discourse analysis techniques in the project and to conduct discourse studies involving multiple languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ogrodniczuk-etal-2024-polish-discourse">
<titleInfo>
<title>Polish Discourse Corpus (PDC): Corpus Design, ISO-Compliant Annotation, Data Highlights, and Parser Development</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleksandra</namePart>
<namePart type="family">Tomaszewska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Ziembicki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Żurowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryszard</namePart>
<namePart type="family">Tuora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleksandra</namePart>
<namePart type="family">Zwierzchowska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the Polish Discourse Corpus, a pioneering resource of this kind for Polish and the first corpus in Poland to employ the ISO standard for discourse relation annotation. The Polish Discourse Corpus adopts ISO 24617-8, a segment of the Language Resource Management – Semantic Annotation Framework (SemAF), which outlines a set of core discourse relations adaptable for diverse languages and genres. The paper overviews the corpus architecture, annotation procedures, the challenges that the annotators have encountered, as well as key statistical data concerning discourse relations and connectives in the corpus. It further discusses the initial phases of the discourse parser tailored for the ISO 24617-8 framework. Evaluations on the efficacy and potential refinement areas of the corpus annotation and parsing strategies are also presented. The final part of the paper touches upon anticipated research plans to improve discourse analysis techniques in the project and to conduct discourse studies involving multiple languages.</abstract>
<identifier type="citekey">ogrodniczuk-etal-2024-polish-discourse</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1123</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>12829</start>
<end>12835</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Polish Discourse Corpus (PDC): Corpus Design, ISO-Compliant Annotation, Data Highlights, and Parser Development
%A Ogrodniczuk, Maciej
%A Tomaszewska, Aleksandra
%A Ziembicki, Daniel
%A Żurowski, Sebastian
%A Tuora, Ryszard
%A Zwierzchowska, Aleksandra
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F ogrodniczuk-etal-2024-polish-discourse
%X This paper presents the Polish Discourse Corpus, a pioneering resource of this kind for Polish and the first corpus in Poland to employ the ISO standard for discourse relation annotation. The Polish Discourse Corpus adopts ISO 24617-8, a segment of the Language Resource Management – Semantic Annotation Framework (SemAF), which outlines a set of core discourse relations adaptable for diverse languages and genres. The paper overviews the corpus architecture, annotation procedures, the challenges that the annotators have encountered, as well as key statistical data concerning discourse relations and connectives in the corpus. It further discusses the initial phases of the discourse parser tailored for the ISO 24617-8 framework. Evaluations on the efficacy and potential refinement areas of the corpus annotation and parsing strategies are also presented. The final part of the paper touches upon anticipated research plans to improve discourse analysis techniques in the project and to conduct discourse studies involving multiple languages.
%U https://aclanthology.org/2024.lrec-main.1123
%P 12829-12835
Markdown (Informal)
[Polish Discourse Corpus (PDC): Corpus Design, ISO-Compliant Annotation, Data Highlights, and Parser Development](https://aclanthology.org/2024.lrec-main.1123) (Ogrodniczuk et al., LREC-COLING 2024)
ACL
- Maciej Ogrodniczuk, Aleksandra Tomaszewska, Daniel Ziembicki, Sebastian Żurowski, Ryszard Tuora, and Aleksandra Zwierzchowska. 2024. Polish Discourse Corpus (PDC): Corpus Design, ISO-Compliant Annotation, Data Highlights, and Parser Development. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 12829–12835, Torino, Italia. ELRA and ICCL.