skipgram model from Li et al. trained on tex8
metadata▼
|
fasttext model from Li et al. trained on text8
metadata▼
|
subword model with CNN composition function from Li et al. trained on text8
metadata▼
|
Japanese word embeddings from Karpinska et al. trained on Mainichi+Wiki
metadata▼
|
subword model with LSTM composition function from Li et al. trained on text8
metadata▼
|
Japanese subword-level word embeddings (SG+kanji+bushu) from Karpinska et al. trained on Mainichi+Wiki
metadata▼
|
Japanese subword-level word embeddings (SG+Kanji) from Karpinska et al. trained on Mainichi+Wiki
metadata▼
|
Japanese Similarity dataset (subsets for tokenized Japanese corpora)
metadata▼
bibtex▼
the original dataset
@inproceedings{SakaizawaKomachi2018,
author = "Yuya Sakaizawa and Mamoru Komachi"
title = "Construction of a Japanese Word Similarity Dataset"
url = "http://www.lrec-conf.org/proceedings/lrec2018/pdf/96.pdf"
booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)"
address = "Miyazaki, Japan"
publisher = "European Language Resources Association (ELRA)"
year = "2018"
pages = "948-951"
}
subsets for tokenized corpora
@inproceedings{KarpinskaLiEtAl_2018,
author = "Marzena Karpinska and Bofang Li and Anna Rogers and Aleksandr Drozd"
title = "Subcharacter Information in Japanese embeddings: when is it worth it?"
year = "2018"
booktitle = "Proceedings of the Workshop on the Relevance of Linguistic Structure in Neural Architectures for NLP"
url = "http://aclweb.org/anthology/W18-2905"
pages = "28-37"
address = "Melbourne, Australia"
publisher = "Association for Computational Linguistics"
}
|
Japanese Bigger Analogy Test Set
metadata▼
bibtex▼
the original dataset
@inproceedings{KarpinskaLiEtAl_2018,
author = "Marzena Karpinska and Bofang Li and Anna Rogers and Aleksandr Drozd"
title = "Subcharacter Information in Japanese embeddings: when is it worth it?"
year = "2018"
booktitle = "Proceedings of the Workshop on the Relevance of Linguistic Structure in Neural Architectures for NLP"
url = "http://aclweb.org/anthology/W18-2905"
pages = "28-37"
address = "Melbourne, Australia"
publisher = "Association for Computational Linguistics"
}
|
Bigger Analogy Test Set
metadata▼
bibtex▼
the original dataset
@inproceedings{GladkovaDrozdEtAl_2016,
author = "Anna Gladkova and Aleksandr Drozd and Satoshi Matsuoka"
title = "Analogy-based detection of morphological and semantic relations with word embeddings: what works and what doesn't"
doi = "10.18653/v1/N16-2002"
url = "https://www.aclweb.org/anthology/N/N16/N16-2002.pdf"
booktitle = "Proceedings of the NAACL-HLT SRW"
publisher = "ACL"
year = "2016"
pages = "47-54"
}
|