index

Vecto resource library

skipgram model from Li et al. trained on tex8 metadata▼ { "class": "embeddings", "url": "https://drive.google.com/open?id=1KCQvn2zM1iO8tkGTTFzomvtsWC8TwLsg", "description": "skipgram model from Li et al. trained on tex8", "batchsize": 1000, "context_representation": "word", "context_type": "linear", "dimensions": 300, "dropout": 0.0, "embeddings_type": "none", "epoch": 5, "execution_time": 6721.362618938088, "gpu": 0, "max_tokens_length": 20, "model": "skipgram", "negative_size": 5, "out_type": "ns", "path_corpus": "/home/users/bofang/work/data/NLP/corpora/raw_texts/Eng/text8/", "path_out": "/home/users/bofang/work/data/NLP/embeddings/Bofang/experiments/text7/_none/1/1/w3r/u300/e5/d0/", "path_vocab": "/home/users/bofang/work/data/NLP/vocabs/raw_texts/Eng/text8/normal/", "path_vocab_ngram_tokens": "/home/users/bofang/work/data/NLP/vocabs/raw_texts/Eng/text8/ngram_tokens/1/1/", "subword": "_none", "test": false, "verbose": false, "vocab_ngram_tokens": { "cnt_words": 28, "execution_time": 85.39216637611389, "max_gram": 1, "min_frequency": 5, "min_gram": 1, "path_source": "/work/data/NLP/corpora/raw_texts/Eng/text8/", "timestamp": "2017-12-05T15:55:47.055930", "vsmlib_version": "0.1.20" }, "vocabulary": { "cnt_words": 71290, "execution_time": 17.662477016448975, "min_frequency": 5, "path_source": "/work/data/NLP/corpora/raw_texts/Eng/text8/", "timestamp": "2017-12-05T15:51:28.036162", "vsmlib_version": "0.1.20" }, "vsmlib_version": "0.1.23", "window": 3 }
fasttext model from Li et al. trained on text8 metadata▼ { "class": "embeddings", "url": "https://drive.google.com/open?id=1PnndcDhyEtVhec-E0ULSOl2lQAHRSYJs", "description": "fasttext model from Li et al. trained on text8" }
subword model with CNN composition function from Li et al. trained on text8 metadata▼ { "class": "embeddings", "url": "https://drive.google.com/open?id=1iRz6jaP2FxaFHF0NnSZEeBZsKAfOT454", "description": "subword model with CNN composition function from Li et al. trained on text8" }
Japanese word embeddings from Karpinska et al. trained on Mainichi+Wiki metadata▼ { "class": "embeddings", "url": "https://drive.google.com/open?id=15d3wSBo2q8MHEnU7Q5oxXoUfeOGuz4f4", "description": "Japanese word embeddings from Karpinska et al. trained on Mainichi+Wiki" }
subword model with LSTM composition function from Li et al. trained on text8 metadata▼ { "class": "embeddings", "url": "https://drive.google.com/open?id=1dH8Pe1dMSr8TXg-d_MCw4iE3g0lm3xqe", "description": "subword model with LSTM composition function from Li et al. trained on text8" }
Japanese subword-level word embeddings (SG+kanji+bushu) from Karpinska et al. trained on Mainichi+Wiki metadata▼ { "class": "embeddings", "url": "https://drive.google.com/open?id=1vUmCXA9YFXIfUd6qLSuyplq0sSgGhFv8", "description": "Japanese subword-level word embeddings (SG+kanji+bushu) from Karpinska et al. trained on Mainichi+Wiki" }
Japanese subword-level word embeddings (SG+Kanji) from Karpinska et al. trained on Mainichi+Wiki metadata▼ { "class": "embeddings", "url": "https://drive.google.com/open?id=1N_P4CcbIfVz_n9QjflZYlXVUGfRpbBO_", "description": "Japanese subword-level word embeddings (SG+Kanji) from Karpinska et al. trained on Mainichi+Wiki" }
Japanese Similarity dataset (subsets for tokenized Japanese corpora) metadata▼ bibtex▼ the original dataset @inproceedings{SakaizawaKomachi2018, author = "Yuya Sakaizawa and Mamoru Komachi" title = "Construction of a Japanese Word Similarity Dataset" url = "http://www.lrec-conf.org/proceedings/lrec2018/pdf/96.pdf" booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)" address = "Miyazaki, Japan" publisher = "European Language Resources Association (ELRA)" year = "2018" pages = "948-951" } subsets for tokenized corpora @inproceedings{KarpinskaLiEtAl_2018, author = "Marzena Karpinska and Bofang Li and Anna Rogers and Aleksandr Drozd" title = "Subcharacter Information in Japanese embeddings: when is it worth it?" year = "2018" booktitle = "Proceedings of the Workshop on the Relevance of Linguistic Structure in Neural Architectures for NLP" url = "http://aclweb.org/anthology/W18-2905" pages = "28-37" address = "Melbourne, Australia" publisher = "Association for Computational Linguistics" } { "class": "dataset", "task": "similarity", "language": [ "Japanese" ], "name": "jSIM", "description": "Japanese Similarity dataset (subsets for tokenized Japanese corpora)", "domain": "general", "date": "2018", "source": "based on Japanese Similarity Dataset (https://github.com/tmu-nlp/JapaneseWordSimilarityDataset)", "project_page": "http://vecto.space/projects/jSIM", "version": "2.0", "size": "1997-4429 word pairs", "url": "https://vecto-data.s3-us-west-1.amazonaws.com/jSIM.zip", "cite": [ { "contribution": "the original dataset", "bibtex": { "title": "Construction of a Japanese Word Similarity Dataset", "url": "http://www.lrec-conf.org/proceedings/lrec2018/pdf/96.pdf", "booktitle": "Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)", "address": "Miyazaki, Japan", "publisher": "European Language Resources Association (ELRA)", "year": 2018, "pages": "948-951" } }, { "contribution": "subsets for tokenized corpora", "bibtex": { "title": "Subcharacter Information in Japanese embeddings: when is it worth it?", "year": 2018, "booktitle": "Proceedings of the Workshop on the Relevance of Linguistic Structure in Neural Architectures for NLP", "url": "http://aclweb.org/anthology/W18-2905", "pages": "28-37", "address": "Melbourne, Australia", "publisher": "Association for Computational Linguistics" } } ] }
Japanese Bigger Analogy Test Set metadata▼ bibtex▼ the original dataset @inproceedings{KarpinskaLiEtAl_2018, author = "Marzena Karpinska and Bofang Li and Anna Rogers and Aleksandr Drozd" title = "Subcharacter Information in Japanese embeddings: when is it worth it?" year = "2018" booktitle = "Proceedings of the Workshop on the Relevance of Linguistic Structure in Neural Architectures for NLP" url = "http://aclweb.org/anthology/W18-2905" pages = "28-37" address = "Melbourne, Australia" publisher = "Association for Computational Linguistics" } { "class": "dataset", "task": "analogy", "language": [ "Japanese" ], "name": "jBATS", "description": "Japanese Bigger Analogy Test Set", "domain": "general", "date": "2018", "source": "original, analagous to Bigger Analogy Test for English", "project_page": "http://vecto.space/projects/jBATS", "version": "1.0", "size": "97712 analogy questions (1,997 word pairs)", "url": "https://vecto-data.s3-us-west-1.amazonaws.com/JBATS.zip", "cite": [ { "contribution": "the original dataset", "bibtex": { "title": "Subcharacter Information in Japanese embeddings: when is it worth it?", "year": 2018, "booktitle": "Proceedings of the Workshop on the Relevance of Linguistic Structure in Neural Architectures for NLP", "url": "http://aclweb.org/anthology/W18-2905", "pages": "28-37", "address": "Melbourne, Australia", "publisher": "Association for Computational Linguistics" } } ] }
Bigger Analogy Test Set metadata▼ bibtex▼ the original dataset @inproceedings{GladkovaDrozdEtAl_2016, author = "Anna Gladkova and Aleksandr Drozd and Satoshi Matsuoka" title = "Analogy-based detection of morphological and semantic relations with word embeddings: what works and what doesn't" doi = "10.18653/v1/N16-2002" url = "https://www.aclweb.org/anthology/N/N16/N16-2002.pdf" booktitle = "Proceedings of the NAACL-HLT SRW" publisher = "ACL" year = "2016" pages = "47-54" } { "class": "dataset", "task": "analogy", "language": [ "English" ], "name": "BATS", "description": "Bigger Analogy Test Set", "domain": "general", "date": "2016", "source": "original", "project_page": "http://vecto.space/projects/BATS", "version": "3.0", "size": "98,200 analogy questions (2,000 word pairs)", "url": "https://vecto-data.s3-us-west-1.amazonaws.com/BATS_3.0.zip", "cite": [ { "contribution": "the original dataset", "bibtex": { "title": "Analogy-based detection of morphological and semantic relations with word embeddings: what works and what doesn't", "doi": "10.18653/v1/N16-2002", "url": "https://www.aclweb.org/anthology/N/N16/N16-2002.pdf", "booktitle": "Proceedings of the NAACL-HLT SRW", "publisher": "ACL", "year": 2016, "pages": "47-54" } } ] }