@article{
author = "Stanković, Ranka and Šandrih, Branislava and Stijović, Rada and Krstev, Cvetana and Vitas, Duško and Marković, Aleksandra",
year = "2019",
abstract = "In this paper we present a model for selection of good dictionary examples for Serbian and the
development of initial model components. The method used is based on a thorough analysis of
various lexical and syntactic features in a corpus compiled of examples from the five digitized
volumes of the Serbian Academy of Sciences and Arts (SASA) dictionary. The initial set of
features was inspired by a similar approach for other languages. The feature distribution of
examples from this corpus is compared with the feature distribution of sentence samples
extracted from corpora comprising various texts. The analysis showed that there is a group of
features which are strong indicators that a sentence should not be used as an example. The
remaining features, including detection of non-standard and other marked lexis from the SASA
dictionary, are used for ranking. The selected candidate examples, represented as featurevectors,
are used with the GDEX ranking tool for Serbian candidate examples and a supervised
machine learning model for classification on standard and non-standard Serbian sentences, for
further integration into a solution for present and future dictionary production projects.",
publisher = "Brno : Lexical Computing CZ s.r.o.",
journal = "Electronic lexicography in the 21st century : Smart lexicography",
title = "SASA Dictionary as the Gold Standard for Good Dictionary Examples for Serbian",
pages = "248-269",
url = "https://hdl.handle.net/21.15107/rcub_dais_7162"
}