See n-gram on Wiktionary
{ "etymology_templates": [ { "args": { "1": "en", "2": "n", "3": "gram" }, "expansion": "n + -gram", "name": "suffix" } ], "etymology_text": "From n + -gram.", "forms": [ { "form": "n-grams", "tags": [ "plural" ] } ], "head_templates": [ { "args": { "head": "n-gram" }, "expansion": "n-gram (plural n-grams)", "name": "en-noun" } ], "lang": "English", "lang_code": "en", "pos": "noun", "senses": [ { "categories": [ { "kind": "other", "name": "English entries with incorrect language header", "parents": [ "Entries with incorrect language header", "Entry maintenance" ], "source": "w" }, { "kind": "other", "name": "English terms suffixed with -gram", "parents": [], "source": "w" }, { "kind": "other", "name": "Entries with translation boxes", "parents": [], "source": "w" }, { "kind": "other", "name": "Pages with 1 entry", "parents": [], "source": "w" }, { "kind": "other", "name": "Pages with entries", "parents": [], "source": "w" }, { "kind": "other", "name": "Terms with German translations", "parents": [], "source": "w" }, { "kind": "other", "name": "Terms with Icelandic translations", "parents": [], "source": "w" }, { "kind": "other", "name": "Terms with Swedish translations", "parents": [], "source": "w" }, { "kind": "topical", "langcode": "en", "name": "Computational linguistics", "orig": "en:Computational linguistics", "parents": [ "Computer science", "Linguistics", "Computing", "Sciences", "Language", "Social sciences", "Technology", "All topics", "Communication", "Society", "Fundamental" ], "source": "w" } ], "examples": [ { "ref": "2012, Winnie Cheng, Exploring Corpus Linguistics: Language in Action, Routledge, →ISBN:", "text": "Hyland's study demonstrates how n-grams can reveal similarities and differences both between genres and between different sets of writers.", "type": "quote" } ], "glosses": [ "A contiguous sequence of n items (usually characters or words) from a given sequence of text or speech, used in analysis." ], "hyponyms": [ { "word": "unigram" }, { "word": "bigram" }, { "word": "trigram" }, { "word": "fourgram" } ], "id": "en-n-gram-en-noun-xrq83lsQ", "links": [ [ "computational linguistics", "computational linguistics" ], [ "contiguous", "contiguous" ], [ "sequence", "sequence" ] ], "raw_glosses": [ "(computational linguistics) A contiguous sequence of n items (usually characters or words) from a given sequence of text or speech, used in analysis." ], "related": [ { "word": "shingle" }, { "word": "q-gram" } ], "synonyms": [ { "word": "ngram" } ], "topics": [ "computational", "computing", "engineering", "human-sciences", "linguistics", "mathematics", "natural-sciences", "physical-sciences", "sciences" ], "translations": [ { "code": "de", "lang": "German", "sense": "linguistics: contiguous sequence of n items", "tags": [ "neuter" ], "word": "N-Gramm" }, { "code": "is", "lang": "Icelandic", "sense": "linguistics: contiguous sequence of n items", "tags": [ "feminine" ], "word": "N-stæða" }, { "code": "sv", "lang": "Swedish", "sense": "linguistics: contiguous sequence of n items", "tags": [ "neuter" ], "word": "n-gram" } ], "wikipedia": [ "n-gram" ] } ], "sounds": [ { "ipa": "/ˈɛnˌɡɹæm/" }, { "ipa": "/ˈɛŋˌɡɹæm/" }, { "homophone": "engram" } ], "word": "n-gram" }
{ "etymology_templates": [ { "args": { "1": "en", "2": "n", "3": "gram" }, "expansion": "n + -gram", "name": "suffix" } ], "etymology_text": "From n + -gram.", "forms": [ { "form": "n-grams", "tags": [ "plural" ] } ], "head_templates": [ { "args": { "head": "n-gram" }, "expansion": "n-gram (plural n-grams)", "name": "en-noun" } ], "hyponyms": [ { "word": "unigram" }, { "word": "bigram" }, { "word": "trigram" }, { "word": "fourgram" } ], "lang": "English", "lang_code": "en", "pos": "noun", "related": [ { "word": "shingle" }, { "word": "q-gram" } ], "senses": [ { "categories": [ "English countable nouns", "English entries with incorrect language header", "English lemmas", "English multiword terms", "English nouns", "English terms suffixed with -gram", "English terms with homophones", "English terms with quotations", "Entries with translation boxes", "Pages with 1 entry", "Pages with entries", "Terms with German translations", "Terms with Icelandic translations", "Terms with Swedish translations", "en:Computational linguistics" ], "examples": [ { "ref": "2012, Winnie Cheng, Exploring Corpus Linguistics: Language in Action, Routledge, →ISBN:", "text": "Hyland's study demonstrates how n-grams can reveal similarities and differences both between genres and between different sets of writers.", "type": "quote" } ], "glosses": [ "A contiguous sequence of n items (usually characters or words) from a given sequence of text or speech, used in analysis." ], "links": [ [ "computational linguistics", "computational linguistics" ], [ "contiguous", "contiguous" ], [ "sequence", "sequence" ] ], "raw_glosses": [ "(computational linguistics) A contiguous sequence of n items (usually characters or words) from a given sequence of text or speech, used in analysis." ], "topics": [ "computational", "computing", "engineering", "human-sciences", "linguistics", "mathematics", "natural-sciences", "physical-sciences", "sciences" ], "wikipedia": [ "n-gram" ] } ], "sounds": [ { "ipa": "/ˈɛnˌɡɹæm/" }, { "ipa": "/ˈɛŋˌɡɹæm/" }, { "homophone": "engram" } ], "synonyms": [ { "word": "ngram" } ], "translations": [ { "code": "de", "lang": "German", "sense": "linguistics: contiguous sequence of n items", "tags": [ "neuter" ], "word": "N-Gramm" }, { "code": "is", "lang": "Icelandic", "sense": "linguistics: contiguous sequence of n items", "tags": [ "feminine" ], "word": "N-stæða" }, { "code": "sv", "lang": "Swedish", "sense": "linguistics: contiguous sequence of n items", "tags": [ "neuter" ], "word": "n-gram" } ], "word": "n-gram" }
Download raw JSONL data for n-gram meaning in All languages combined (2.4kB)
This page is a part of the kaikki.org machine-readable All languages combined dictionary. This dictionary is based on structured data extracted on 2024-11-06 from the enwiktionary dump dated 2024-10-02 using wiktextract (fbeafe8 and 7f03c9b). The data shown on this site has been post-processed and various details (e.g., extra categories) removed, some information disambiguated, and additional data merged from other sources. See the raw data download page for the unprocessed wiktextract data.
If you use this data in academic research, please cite Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data, Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 1317-1325, Marseille, 20-25 June 2022. Linking to the relevant page(s) under https://kaikki.org would also be greatly appreciated.