See corpus in All languages combined, or Wiktionary
{ "derived": [ { "_dis1": "0 0 0", "word": "aligned parallel corpus" }, { "_dis1": "0 0 0", "word": "corpus callosum" }, { "_dis1": "0 0 0", "word": "corpus cavernosum" }, { "_dis1": "0 0 0", "word": "corpus delicti" }, { "_dis1": "0 0 0", "word": "corpus language" }, { "_dis1": "0 0 0", "word": "corpus linguistics" }, { "_dis1": "0 0 0", "word": "corpus luteum" }, { "_dis1": "0 0 0", "word": "corpus manager" }, { "_dis1": "0 0 0", "word": "corpus spongiosum" }, { "_dis1": "0 0 0", "word": "corpus striatum" }, { "_dis1": "0 0 0", "word": "habeas corpus" }, { "_dis1": "0 0 0", "word": "metacorpus" }, { "_dis1": "0 0 0", "word": "noncorpus" }, { "_dis1": "0 0 0", "word": "procorpus" }, { "_dis1": "0 0 0", "word": "subcorpus" } ], "etymology_templates": [ { "args": { "1": "en", "2": "ine-pro", "3": "*krep-" }, "expansion": "", "name": "root" }, { "args": { "1": "en", "2": "la", "3": "corpus", "4": "", "5": "body" }, "expansion": "Borrowed from Latin corpus (“body”)", "name": "bor+" }, { "args": { "1": "en", "2": "corpse", "3": "corps", "4": "riff#Etymology 2" }, "expansion": "Doublet of corpse, corps, and riff", "name": "doublet" } ], "etymology_text": "Borrowed from Latin corpus (“body”). Doublet of corpse, corps, and riff.", "forms": [ { "form": "corpora", "tags": [ "plural" ] }, { "form": "corpuses", "tags": [ "plural" ] }, { "form": "corpusses", "tags": [ "plural" ] }, { "form": "corpi", "tags": [ "plural", "proscribed" ] } ], "head_templates": [ { "args": { "1": "corpora", "2": "+", "3": "corpusses", "4": "corpi", "pl4qual": "proscribed" }, "expansion": "corpus (plural corpora or corpuses or corpusses or (proscribed) corpi)", "name": "en-noun" } ], "hyphenation": [ "cor‧pus" ], "lang": "English", "lang_code": "en", "pos": "noun", "related": [ { "_dis1": "0 0 0", "word": "Wiktionary:Corpora" }, { "_dis1": "0 0 0", "word": "corpus allatum" }, { "_dis1": "0 0 0", "word": "corpus callosotomy" }, { "_dis1": "0 0 0", "word": "corpus fetishism" }, { "_dis1": "0 0 0", "word": "corpus fimbriatum" }, { "_dis1": "0 0 0", "word": "corpus juris" }, { "_dis1": "0 0 0", "word": "corpus separatum" }, { "_dis1": "0 0 0", "word": "corpus vile" } ], "senses": [ { "categories": [ { "_dis": "67 22 11", "kind": "other", "name": "English entries with incorrect language header", "parents": [ "Entries with incorrect language header", "Entry maintenance" ], "source": "w+disamb" }, { "_dis": "46 39 15", "kind": "other", "name": "English links with manual fragments", "parents": [ "Links with manual fragments", "Entry maintenance" ], "source": "w+disamb" }, { "_dis": "67 20 12", "kind": "other", "name": "Entries with translation boxes", "parents": [], "source": "w+disamb" }, { "_dis": "53 23 24", "kind": "other", "name": "Terms with Arabic translations", "parents": [], "source": "w+disamb" }, { "_dis": "63 24 13", "kind": "other", "name": "Terms with Belarusian translations", "parents": [], "source": "w+disamb" }, { "_dis": "63 22 15", "kind": "other", "name": "Terms with Bulgarian translations", "parents": [], "source": "w+disamb" }, { "_dis": "66 21 12", "kind": "other", "name": "Terms with Catalan translations", "parents": [], "source": "w+disamb" }, { "_dis": "66 21 13", "kind": "other", "name": "Terms with Czech translations", "parents": [], "source": "w+disamb" }, { "_dis": "74 16 10", "kind": "other", "name": "Terms with Danish translations", "parents": [], "source": "w+disamb" }, { "_dis": "62 24 14", "kind": "other", "name": "Terms with Dutch translations", "parents": [], "source": "w+disamb" }, { "_dis": "70 18 13", "kind": "other", "name": "Terms with Esperanto translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Estonian translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Finnish translations", "parents": [], "source": "w+disamb" }, { "_dis": "60 23 16", "kind": "other", "name": "Terms with French translations", "parents": [], "source": "w+disamb" }, { "_dis": "63 22 15", "kind": "other", "name": "Terms with German translations", "parents": [], "source": "w+disamb" }, { "_dis": "54 33 14", "kind": "other", "name": "Terms with Greek translations", "parents": [], "source": "w+disamb" }, { "_dis": "57 31 12", "kind": "other", "name": "Terms with Hungarian translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Indonesian translations", "parents": [], "source": "w+disamb" }, { "_dis": "66 20 14", "kind": "other", "name": "Terms with Italian translations", "parents": [], "source": "w+disamb" }, { "_dis": "66 20 14", "kind": "other", "name": "Terms with Japanese translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Korean translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Macedonian translations", "parents": [], "source": "w+disamb" }, { "_dis": "54 21 26", "kind": "other", "name": "Terms with Mandarin translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Maori translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Norwegian translations", "parents": [], "source": "w+disamb" }, { "_dis": "66 21 13", "kind": "other", "name": "Terms with Persian translations", "parents": [], "source": "w+disamb" }, { "_dis": "65 22 14", "kind": "other", "name": "Terms with Portuguese translations", "parents": [], "source": "w+disamb" }, { "_dis": "66 20 14", "kind": "other", "name": "Terms with Russian translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Slovak translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Slovene translations", "parents": [], "source": "w+disamb" }, { "_dis": "69 19 12", "kind": "other", "name": "Terms with Spanish translations", "parents": [], "source": "w+disamb" }, { "_dis": "68 21 11", "kind": "other", "name": "Terms with Swedish translations", "parents": [], "source": "w+disamb" }, { "_dis": "68 21 11", "kind": "other", "name": "Terms with Turkish translations", "parents": [], "source": "w+disamb" }, { "_dis": "67 21 12", "kind": "other", "name": "Terms with Ukrainian translations", "parents": [], "source": "w+disamb" } ], "examples": [ { "ref": "2011, Patrick Spedding, James Lambert, “Fanny Hill, Lord Fanny, and the Myth of Metonymy”, in Studies in Philology, volume 108, number 1, page 113:", "text": "No one suggests that Browning intended to mean vagina when he wrote “owls and bats, / Cowls and twats,” because the context does not allow for it, nor does the greater context of the Browning corpus.", "type": "quote" } ], "glosses": [ "A collection of writings, often on a specific topic, of a specific genre, from a specific demographic or a particular author, etc." ], "id": "en-corpus-en-noun-vYHqX49g", "links": [ [ "collection", "collection" ], [ "writings", "writings" ], [ "topic", "topic" ], [ "genre", "genre" ], [ "demographic", "demographic" ], [ "author", "author" ] ], "synonyms": [ { "word": "collection" }, { "word": "compilation" }, { "word": "aggregation" }, { "word": "body" } ], "translations": [ { "_dis1": "51 22 28", "code": "ar", "lang": "Arabic", "roman": "matn", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "مَتْن" }, { "_dis1": "51 22 28", "code": "ar", "lang": "Arabic", "roman": "maknaz luḡawiyy", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "مَكْنَز لُغَوِيّ" }, { "_dis1": "51 22 28", "code": "be", "lang": "Belarusian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "_dis1": "51 22 28", "code": "be", "lang": "Belarusian", "roman": "zbor", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "збор" }, { "_dis1": "51 22 28", "code": "bg", "lang": "Bulgarian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "_dis1": "51 22 28", "code": "ca", "lang": "Catalan", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "_dis1": "51 22 28", "code": "cmn", "lang": "Chinese Mandarin", "roman": "yǔliàokù", "sense": "linguistics: collection of writings", "word": "語料庫 /语料库" }, { "_dis1": "51 22 28", "code": "cs", "lang": "Czech", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "korpus" }, { "_dis1": "51 22 28", "code": "da", "lang": "Danish", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "korpus" }, { "_dis1": "51 22 28", "code": "nl", "lang": "Dutch", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "corpus" }, { "_dis1": "51 22 28", "code": "eo", "lang": "Esperanto", "sense": "linguistics: collection of writings", "word": "tekstaro" }, { "_dis1": "51 22 28", "code": "eo", "lang": "Esperanto", "sense": "linguistics: collection of writings", "word": "korpuso" }, { "_dis1": "51 22 28", "code": "et", "lang": "Estonian", "sense": "linguistics: collection of writings", "word": "korpus" }, { "_dis1": "51 22 28", "code": "fi", "lang": "Finnish", "sense": "linguistics: collection of writings", "word": "korpus" }, { "_dis1": "51 22 28", "code": "fr", "lang": "French", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "_dis1": "51 22 28", "code": "de", "lang": "German", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "Korpus" }, { "_dis1": "51 22 28", "code": "de", "lang": "German", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "Textkorpus" }, { "_dis1": "51 22 28", "code": "el", "lang": "Greek", "roman": "sóma", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "σώμα" }, { "_dis1": "51 22 28", "code": "el", "lang": "Greek", "roman": "syllogí", "sense": "linguistics: collection of writings", "tags": [ "feminine" ], "word": "συλλογή" }, { "_dis1": "51 22 28", "code": "hu", "lang": "Hungarian", "sense": "linguistics: collection of writings", "word": "korpusz" }, { "_dis1": "51 22 28", "code": "id", "lang": "Indonesian", "sense": "linguistics: collection of writings", "word": "korpus" }, { "_dis1": "51 22 28", "code": "it", "lang": "Italian", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "_dis1": "51 22 28", "code": "ja", "lang": "Japanese", "roman": "kōpasu", "sense": "linguistics: collection of writings", "word": "コーパス" }, { "_dis1": "51 22 28", "code": "ko", "lang": "Korean", "roman": "malmungchi", "sense": "linguistics: collection of writings", "word": "말뭉치" }, { "_dis1": "51 22 28", "code": "ko", "lang": "Korean", "roman": "kopeoseu", "sense": "linguistics: collection of writings", "word": "코퍼스" }, { "_dis1": "51 22 28", "code": "mk", "lang": "Macedonian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "_dis1": "51 22 28", "code": "mi", "lang": "Maori", "sense": "linguistics: collection of writings", "word": "putunga kōrero" }, { "_dis1": "51 22 28", "code": "mi", "lang": "Maori", "sense": "linguistics: collection of writings", "word": "whakaputunga" }, { "_dis1": "51 22 28", "code": "no", "lang": "Norwegian", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "korpus" }, { "_dis1": "51 22 28", "code": "pt", "lang": "Portuguese", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "_dis1": "51 22 28", "code": "ru", "lang": "Russian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "_dis1": "51 22 28", "code": "ru", "lang": "Russian", "roman": "sobránije", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "собра́ние" }, { "_dis1": "51 22 28", "code": "sk", "lang": "Slovak", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "korpus" }, { "_dis1": "51 22 28", "code": "sl", "lang": "Slovene", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "korpus" }, { "_dis1": "51 22 28", "code": "es", "lang": "Spanish", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "_dis1": "51 22 28", "code": "sv", "lang": "Swedish", "sense": "linguistics: collection of writings", "tags": [ "common-gender" ], "word": "korpus" }, { "_dis1": "51 22 28", "code": "sv", "lang": "Swedish", "sense": "linguistics: collection of writings", "tags": [ "common-gender" ], "word": "språkbank" }, { "_dis1": "51 22 28", "code": "tr", "english": "all works of a single author", "lang": "Turkish", "sense": "linguistics: collection of writings", "word": "külliyat" }, { "_dis1": "51 22 28", "code": "uk", "lang": "Ukrainian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "_dis1": "51 22 28", "code": "uk", "lang": "Ukrainian", "roman": "zbírnyk", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "збі́рник" } ] }, { "categories": [ { "kind": "topical", "langcode": "en", "name": "Linguistics", "orig": "en:Linguistics", "parents": [ "Language", "Social sciences", "Communication", "Sciences", "Society", "All topics", "Fundamental" ], "source": "w" }, { "_dis": "46 39 15", "kind": "other", "name": "English links with manual fragments", "parents": [ "Links with manual fragments", "Entry maintenance" ], "source": "w+disamb" } ], "examples": [ { "ref": "2007, Mihail Mihailov, Hannu Tommola, “Compiling Parallel Text Corpora: Towards Automation of Routine Procedures”, in Wolfgang Teubert, editor, Text Corpora and Multilingual Lexicography (Benjamins Current Topics; 8), Amsterdam: John Benjamins Publishing Company, →ISBN, page 60:", "text": "Text corpora are being used in most current lexicographic projects. Applied linguistic research is another field where text corpora are welcome as an inexhaustible source of empirical information, a polygon for testing various linguistic tools – spell-checkers, OCRs, machine translation systems, NLP systems, etc.", "type": "quote" }, { "ref": "2008, Anabel Borja, “Corpora for Translators in Spain. The CDJ-GITRAD Corpus and the GENITT Project.”, in Gunilla [M.] Anderman, Margaret Rogers, editors, Incorporating Corpora: The Linguist and the Translator, Clevedon, North Somerset: Multilingual Matters, →ISBN, page 248:", "text": "Comparable corpora are made up of texts in different languages that may be related in various ways, but are not translations of each other. They may have nothing in common at all, or be on the same subject, of the same genre, or from the same chronological period, etc.", "type": "quote" }, { "ref": "2013, “Introduction”, in Gerry Knowles, Briony Williams, L[ita] Taylor, editors, A Corpus of Formal British English Speech: The Lancaster/IBM Spoken English Corpus, Abingdon, Oxon., New York, N.Y.: Routledge, →ISBN, page 1:", "text": "The Lancaster/IBM Spoken English Corpus began in September 1984 as part of a research project into the automatic assignment of intonation […] The original design of the corpus was determined by the need to provide data for research into speech synthesis. As a result, unlike most other corpora currently being used in the computational linguistics field, the SEC exists in several forms. […] However, whatever the original motivation for compiling a corpus, it quickly becomes an object of interest in its own right. New users find it valuable for applications for which it was not designed.", "type": "quote" }, { "ref": "2014, Giuseppina Balossi, “Corpus Approaches to the Study of Language and Literature”, in A Corpus Linguistic Approach to Literary Language and Characterization: Virginia Woolf's The Waves (Linguistic Approaches to Literature; 18), Amsterdam: John Benjamins Publishing Company, →ISBN, page 41:", "text": "A corpus approach is a useful methodology for observing, describing and interpreting the stylistic features of language in literary and non-literary texts.", "type": "quote" }, { "ref": "2018, James Lambert, “A multitude of ‘lishes’: The nomenclature of hybridity”, in English World-Wide, page 4:", "text": "Today, computer databases and corpora infinitely increase the ease of this type of research, but the collecting process remains essentially the same.", "type": "quote" } ], "glosses": [ "Such a collection in form of an electronic database used for linguistic analyses." ], "id": "en-corpus-en-noun-pIRAwBVD", "links": [ [ "linguistics", "linguistics" ], [ "collection", "collection" ], [ "electronic", "electronic" ], [ "database", "database" ], [ "linguistic", "linguistic" ] ], "raw_glosses": [ "(specifically, linguistics) Such a collection in form of an electronic database used for linguistic analyses." ], "synonyms": [ { "word": "digital corpus" }, { "word": "text corpus" } ], "tags": [ "specifically" ], "topics": [ "human-sciences", "linguistics", "sciences" ], "translations": [ { "_dis1": "22 75 3", "code": "fa", "lang": "Persian", "sense": "linguistics: electronic text database", "word": "پیکره متنی" }, { "_dis1": "22 75 3", "code": "fa", "lang": "Persian", "sense": "linguistics: electronic text database", "word": "پیکره" } ] }, { "categories": [ { "_dis": "46 39 15", "kind": "other", "name": "English links with manual fragments", "parents": [ "Links with manual fragments", "Entry maintenance" ], "source": "w+disamb" } ], "examples": [ { "ref": "1998, Dimitǎr Draganov, “New Coin Types of Hadrianopolis”, in Ulrike Peter, editor, Stephanos Nomismatikos: Edith Schönert-Geiss zum 65. Geburtstag (Griechisches Münzwerk), Berlin: Akademie Verlag, →ISBN, page 221:", "text": "About a hundred years ago in Germany, the publishing of corpuses of the ancient Greek coinages was started. […] The significance of those, and some other corpuses is exclusive, because they allowed an enormous amount of numismatic material kept in museum and private collections all over the world, to be studied and systematized.", "type": "quote" }, { "ref": "2014, Margaret Darling, Barbara Precious, “Introduction”, in A Corpus of Roman Pottery from Lincoln (Lincoln Archaeological Studies; 6), Oxford: Oxbow Books, →ISBN, page 1:", "text": "An assessment in 1991 proposed publication of the results of this work in three stages: […] secondly, a corpus of the Roman pottery to present the type series and to discuss the fabrics and forms recovered, […]", "type": "quote" } ], "glosses": [ "A body, a collection." ], "id": "en-corpus-en-noun-kqed46eE", "links": [ [ "body", "body" ], [ "collection", "collection" ] ], "raw_glosses": [ "(uncommon) A body, a collection." ], "synonyms": [ { "word": "collection" }, { "word": "body" } ], "tags": [ "uncommon" ] } ], "sounds": [ { "ipa": "/ˈkɔːpəs/", "tags": [ "Received-Pronunciation" ] }, { "ipa": "/ˈkɔɹpəs/", "tags": [ "General-American" ] }, { "audio": "en-au-corpus.ogg", "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/4/4b/En-au-corpus.ogg/En-au-corpus.ogg.mp3", "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/4/4b/En-au-corpus.ogg" }, { "rhymes": "-ɔː(ɹ)pəs" } ], "word": "corpus" }
{ "categories": [ "English countable nouns", "English doublets", "English entries with incorrect language header", "English lemmas", "English links with manual fragments", "English nouns", "English nouns with irregular plurals", "English terms borrowed from Latin", "English terms derived from Latin", "English terms derived from Proto-Indo-European", "English terms derived from the Proto-Indo-European root *krep-", "Entries with translation boxes", "Pages with 10 entries", "Pages with entries", "Rhymes:English/ɔː(ɹ)pəs", "Rhymes:English/ɔː(ɹ)pəs/2 syllables", "Terms with Arabic translations", "Terms with Belarusian translations", "Terms with Bulgarian translations", "Terms with Catalan translations", "Terms with Czech translations", "Terms with Danish translations", "Terms with Dutch translations", "Terms with Esperanto translations", "Terms with Estonian translations", "Terms with Finnish translations", "Terms with French translations", "Terms with German translations", "Terms with Greek translations", "Terms with Hungarian translations", "Terms with Indonesian translations", "Terms with Italian translations", "Terms with Japanese translations", "Terms with Korean translations", "Terms with Macedonian translations", "Terms with Mandarin translations", "Terms with Maori translations", "Terms with Norwegian translations", "Terms with Persian translations", "Terms with Portuguese translations", "Terms with Russian translations", "Terms with Slovak translations", "Terms with Slovene translations", "Terms with Spanish translations", "Terms with Swedish translations", "Terms with Turkish translations", "Terms with Ukrainian translations" ], "derived": [ { "word": "aligned parallel corpus" }, { "word": "corpus callosum" }, { "word": "corpus cavernosum" }, { "word": "corpus delicti" }, { "word": "corpus language" }, { "word": "corpus linguistics" }, { "word": "corpus luteum" }, { "word": "corpus manager" }, { "word": "corpus spongiosum" }, { "word": "corpus striatum" }, { "word": "habeas corpus" }, { "word": "metacorpus" }, { "word": "noncorpus" }, { "word": "procorpus" }, { "word": "subcorpus" } ], "etymology_templates": [ { "args": { "1": "en", "2": "ine-pro", "3": "*krep-" }, "expansion": "", "name": "root" }, { "args": { "1": "en", "2": "la", "3": "corpus", "4": "", "5": "body" }, "expansion": "Borrowed from Latin corpus (“body”)", "name": "bor+" }, { "args": { "1": "en", "2": "corpse", "3": "corps", "4": "riff#Etymology 2" }, "expansion": "Doublet of corpse, corps, and riff", "name": "doublet" } ], "etymology_text": "Borrowed from Latin corpus (“body”). Doublet of corpse, corps, and riff.", "forms": [ { "form": "corpora", "tags": [ "plural" ] }, { "form": "corpuses", "tags": [ "plural" ] }, { "form": "corpusses", "tags": [ "plural" ] }, { "form": "corpi", "tags": [ "plural", "proscribed" ] } ], "head_templates": [ { "args": { "1": "corpora", "2": "+", "3": "corpusses", "4": "corpi", "pl4qual": "proscribed" }, "expansion": "corpus (plural corpora or corpuses or corpusses or (proscribed) corpi)", "name": "en-noun" } ], "hyphenation": [ "cor‧pus" ], "lang": "English", "lang_code": "en", "pos": "noun", "related": [ { "word": "Wiktionary:Corpora" }, { "word": "corpus allatum" }, { "word": "corpus callosotomy" }, { "word": "corpus fetishism" }, { "word": "corpus fimbriatum" }, { "word": "corpus juris" }, { "word": "corpus separatum" }, { "word": "corpus vile" } ], "senses": [ { "categories": [ "English terms with quotations" ], "examples": [ { "ref": "2011, Patrick Spedding, James Lambert, “Fanny Hill, Lord Fanny, and the Myth of Metonymy”, in Studies in Philology, volume 108, number 1, page 113:", "text": "No one suggests that Browning intended to mean vagina when he wrote “owls and bats, / Cowls and twats,” because the context does not allow for it, nor does the greater context of the Browning corpus.", "type": "quote" } ], "glosses": [ "A collection of writings, often on a specific topic, of a specific genre, from a specific demographic or a particular author, etc." ], "links": [ [ "collection", "collection" ], [ "writings", "writings" ], [ "topic", "topic" ], [ "genre", "genre" ], [ "demographic", "demographic" ], [ "author", "author" ] ], "synonyms": [ { "word": "collection" }, { "word": "compilation" }, { "word": "aggregation" }, { "word": "body" } ] }, { "categories": [ "English terms with quotations", "en:Linguistics" ], "examples": [ { "ref": "2007, Mihail Mihailov, Hannu Tommola, “Compiling Parallel Text Corpora: Towards Automation of Routine Procedures”, in Wolfgang Teubert, editor, Text Corpora and Multilingual Lexicography (Benjamins Current Topics; 8), Amsterdam: John Benjamins Publishing Company, →ISBN, page 60:", "text": "Text corpora are being used in most current lexicographic projects. Applied linguistic research is another field where text corpora are welcome as an inexhaustible source of empirical information, a polygon for testing various linguistic tools – spell-checkers, OCRs, machine translation systems, NLP systems, etc.", "type": "quote" }, { "ref": "2008, Anabel Borja, “Corpora for Translators in Spain. The CDJ-GITRAD Corpus and the GENITT Project.”, in Gunilla [M.] Anderman, Margaret Rogers, editors, Incorporating Corpora: The Linguist and the Translator, Clevedon, North Somerset: Multilingual Matters, →ISBN, page 248:", "text": "Comparable corpora are made up of texts in different languages that may be related in various ways, but are not translations of each other. They may have nothing in common at all, or be on the same subject, of the same genre, or from the same chronological period, etc.", "type": "quote" }, { "ref": "2013, “Introduction”, in Gerry Knowles, Briony Williams, L[ita] Taylor, editors, A Corpus of Formal British English Speech: The Lancaster/IBM Spoken English Corpus, Abingdon, Oxon., New York, N.Y.: Routledge, →ISBN, page 1:", "text": "The Lancaster/IBM Spoken English Corpus began in September 1984 as part of a research project into the automatic assignment of intonation […] The original design of the corpus was determined by the need to provide data for research into speech synthesis. As a result, unlike most other corpora currently being used in the computational linguistics field, the SEC exists in several forms. […] However, whatever the original motivation for compiling a corpus, it quickly becomes an object of interest in its own right. New users find it valuable for applications for which it was not designed.", "type": "quote" }, { "ref": "2014, Giuseppina Balossi, “Corpus Approaches to the Study of Language and Literature”, in A Corpus Linguistic Approach to Literary Language and Characterization: Virginia Woolf's The Waves (Linguistic Approaches to Literature; 18), Amsterdam: John Benjamins Publishing Company, →ISBN, page 41:", "text": "A corpus approach is a useful methodology for observing, describing and interpreting the stylistic features of language in literary and non-literary texts.", "type": "quote" }, { "ref": "2018, James Lambert, “A multitude of ‘lishes’: The nomenclature of hybridity”, in English World-Wide, page 4:", "text": "Today, computer databases and corpora infinitely increase the ease of this type of research, but the collecting process remains essentially the same.", "type": "quote" } ], "glosses": [ "Such a collection in form of an electronic database used for linguistic analyses." ], "links": [ [ "linguistics", "linguistics" ], [ "collection", "collection" ], [ "electronic", "electronic" ], [ "database", "database" ], [ "linguistic", "linguistic" ] ], "raw_glosses": [ "(specifically, linguistics) Such a collection in form of an electronic database used for linguistic analyses." ], "synonyms": [ { "word": "digital corpus" }, { "word": "text corpus" } ], "tags": [ "specifically" ], "topics": [ "human-sciences", "linguistics", "sciences" ] }, { "categories": [ "English terms with quotations", "English terms with uncommon senses" ], "examples": [ { "ref": "1998, Dimitǎr Draganov, “New Coin Types of Hadrianopolis”, in Ulrike Peter, editor, Stephanos Nomismatikos: Edith Schönert-Geiss zum 65. Geburtstag (Griechisches Münzwerk), Berlin: Akademie Verlag, →ISBN, page 221:", "text": "About a hundred years ago in Germany, the publishing of corpuses of the ancient Greek coinages was started. […] The significance of those, and some other corpuses is exclusive, because they allowed an enormous amount of numismatic material kept in museum and private collections all over the world, to be studied and systematized.", "type": "quote" }, { "ref": "2014, Margaret Darling, Barbara Precious, “Introduction”, in A Corpus of Roman Pottery from Lincoln (Lincoln Archaeological Studies; 6), Oxford: Oxbow Books, →ISBN, page 1:", "text": "An assessment in 1991 proposed publication of the results of this work in three stages: […] secondly, a corpus of the Roman pottery to present the type series and to discuss the fabrics and forms recovered, […]", "type": "quote" } ], "glosses": [ "A body, a collection." ], "links": [ [ "body", "body" ], [ "collection", "collection" ] ], "raw_glosses": [ "(uncommon) A body, a collection." ], "synonyms": [ { "word": "collection" }, { "word": "body" } ], "tags": [ "uncommon" ] } ], "sounds": [ { "ipa": "/ˈkɔːpəs/", "tags": [ "Received-Pronunciation" ] }, { "ipa": "/ˈkɔɹpəs/", "tags": [ "General-American" ] }, { "audio": "en-au-corpus.ogg", "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/4/4b/En-au-corpus.ogg/En-au-corpus.ogg.mp3", "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/4/4b/En-au-corpus.ogg" }, { "rhymes": "-ɔː(ɹ)pəs" } ], "translations": [ { "code": "ar", "lang": "Arabic", "roman": "matn", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "مَتْن" }, { "code": "ar", "lang": "Arabic", "roman": "maknaz luḡawiyy", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "مَكْنَز لُغَوِيّ" }, { "code": "be", "lang": "Belarusian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "code": "be", "lang": "Belarusian", "roman": "zbor", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "збор" }, { "code": "bg", "lang": "Bulgarian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "code": "ca", "lang": "Catalan", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "code": "cmn", "lang": "Chinese Mandarin", "roman": "yǔliàokù", "sense": "linguistics: collection of writings", "word": "語料庫 /语料库" }, { "code": "cs", "lang": "Czech", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "korpus" }, { "code": "da", "lang": "Danish", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "korpus" }, { "code": "nl", "lang": "Dutch", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "corpus" }, { "code": "eo", "lang": "Esperanto", "sense": "linguistics: collection of writings", "word": "tekstaro" }, { "code": "eo", "lang": "Esperanto", "sense": "linguistics: collection of writings", "word": "korpuso" }, { "code": "et", "lang": "Estonian", "sense": "linguistics: collection of writings", "word": "korpus" }, { "code": "fi", "lang": "Finnish", "sense": "linguistics: collection of writings", "word": "korpus" }, { "code": "fr", "lang": "French", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "code": "de", "lang": "German", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "Korpus" }, { "code": "de", "lang": "German", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "Textkorpus" }, { "code": "el", "lang": "Greek", "roman": "sóma", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "σώμα" }, { "code": "el", "lang": "Greek", "roman": "syllogí", "sense": "linguistics: collection of writings", "tags": [ "feminine" ], "word": "συλλογή" }, { "code": "hu", "lang": "Hungarian", "sense": "linguistics: collection of writings", "word": "korpusz" }, { "code": "id", "lang": "Indonesian", "sense": "linguistics: collection of writings", "word": "korpus" }, { "code": "it", "lang": "Italian", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "code": "ja", "lang": "Japanese", "roman": "kōpasu", "sense": "linguistics: collection of writings", "word": "コーパス" }, { "code": "ko", "lang": "Korean", "roman": "malmungchi", "sense": "linguistics: collection of writings", "word": "말뭉치" }, { "code": "ko", "lang": "Korean", "roman": "kopeoseu", "sense": "linguistics: collection of writings", "word": "코퍼스" }, { "code": "mk", "lang": "Macedonian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "code": "mi", "lang": "Maori", "sense": "linguistics: collection of writings", "word": "putunga kōrero" }, { "code": "mi", "lang": "Maori", "sense": "linguistics: collection of writings", "word": "whakaputunga" }, { "code": "no", "lang": "Norwegian", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "korpus" }, { "code": "pt", "lang": "Portuguese", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "code": "ru", "lang": "Russian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "code": "ru", "lang": "Russian", "roman": "sobránije", "sense": "linguistics: collection of writings", "tags": [ "neuter" ], "word": "собра́ние" }, { "code": "sk", "lang": "Slovak", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "korpus" }, { "code": "sl", "lang": "Slovene", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "korpus" }, { "code": "es", "lang": "Spanish", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "corpus" }, { "code": "sv", "lang": "Swedish", "sense": "linguistics: collection of writings", "tags": [ "common-gender" ], "word": "korpus" }, { "code": "sv", "lang": "Swedish", "sense": "linguistics: collection of writings", "tags": [ "common-gender" ], "word": "språkbank" }, { "code": "tr", "english": "all works of a single author", "lang": "Turkish", "sense": "linguistics: collection of writings", "word": "külliyat" }, { "code": "uk", "lang": "Ukrainian", "roman": "kórpus", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "ко́рпус" }, { "code": "uk", "lang": "Ukrainian", "roman": "zbírnyk", "sense": "linguistics: collection of writings", "tags": [ "masculine" ], "word": "збі́рник" }, { "code": "fa", "lang": "Persian", "sense": "linguistics: electronic text database", "word": "پیکره متنی" }, { "code": "fa", "lang": "Persian", "sense": "linguistics: electronic text database", "word": "پیکره" } ], "word": "corpus" }
Download raw JSONL data for corpus meaning in English (14.6kB)
This page is a part of the kaikki.org machine-readable English dictionary. This dictionary is based on structured data extracted on 2025-02-03 from the enwiktionary dump dated 2025-01-20 using wiktextract (05fdf6b and 9dbd323). The data shown on this site has been post-processed and various details (e.g., extra categories) removed, some information disambiguated, and additional data merged from other sources. See the raw data download page for the unprocessed wiktextract data.
If you use this data in academic research, please cite Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data, Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 1317-1325, Marseille, 20-25 June 2022. Linking to the relevant page(s) under https://kaikki.org would also be greatly appreciated.