corpus linguistics in All languages combined

[Show JSON for postprocessed kaikki.org data shown on this page ▼] [Hide JSON for postprocessed kaikki.org data shown on this page ▲]

{
  "head_templates": [
    {
      "args": {
        "1": "-"
      },
      "expansion": "corpus linguistics (uncountable)",
      "name": "en-noun"
    }
  ],
  "lang": "English",
  "lang_code": "en",
  "pos": "noun",
  "senses": [
    {
      "categories": [
        {
          "kind": "other",
          "name": "English entries with incorrect language header",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Entries with translation boxes",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Mandarin terms with redundant transliterations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Pages with 1 entry",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Pages with entries",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Afrikaans translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Catalan translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Finnish translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Hungarian translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Mandarin translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Swedish translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "langcode": "en",
          "name": "Computing",
          "orig": "en:Computing",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "langcode": "en",
          "name": "Linguistics",
          "orig": "en:Linguistics",
          "parents": [],
          "source": "w"
        }
      ],
      "examples": [
        {
          "bold_text_offsets": [
            [
              24,
              42
            ]
          ],
          "ref": "2018, Clarence Green, James Lambert, “Position vectors, homologous chromosomes and gamma rays: Promoting disciplinary literacy through Secondary Phrase Lists”, in English for Specific Purposes, →DOI, page 2:",
          "text": "ESP, using the tools of corpus linguistics, has advanced the methodologies for investigating discipline-specific language, yet there has been little cross-fertilization so far with disciplinary literacy in secondary education.",
          "type": "quote"
        }
      ],
      "glosses": [
        "A branch of linguistics that studies large samples (corpora) of real-world text, usually with the aid of computer software."
      ],
      "id": "en-corpus_linguistics-en-noun-frNw-xcN",
      "links": [
        [
          "computing",
          "computing#Noun"
        ],
        [
          "linguistics",
          "linguistics"
        ],
        [
          "corpora",
          "corpus"
        ],
        [
          "computer",
          "computer"
        ],
        [
          "software",
          "software"
        ]
      ],
      "raw_glosses": [
        "(computing, linguistics) A branch of linguistics that studies large samples (corpora) of real-world text, usually with the aid of computer software."
      ],
      "related": [
        {
          "word": "cotext"
        },
        {
          "word": "KWIC"
        }
      ],
      "tags": [
        "uncountable"
      ],
      "topics": [
        "computing",
        "engineering",
        "human-sciences",
        "linguistics",
        "mathematics",
        "natural-sciences",
        "physical-sciences",
        "sciences"
      ],
      "translations": [
        {
          "code": "af",
          "lang": "Afrikaans",
          "sense": "a branch of linguistics that studies large samples",
          "word": "korpuslinguistiek"
        },
        {
          "code": "ca",
          "lang": "Catalan",
          "sense": "a branch of linguistics that studies large samples",
          "word": "lingüística de corpus"
        },
        {
          "code": "cmn",
          "lang": "Chinese Mandarin",
          "roman": "yǔliàokù yǔyánxué",
          "sense": "a branch of linguistics that studies large samples",
          "word": "語料庫語言學 /语料库语言学"
        },
        {
          "code": "fi",
          "lang": "Finnish",
          "sense": "a branch of linguistics that studies large samples",
          "word": "korpustutkimus"
        },
        {
          "code": "fi",
          "lang": "Finnish",
          "sense": "a branch of linguistics that studies large samples",
          "word": "korpuslingvistiikka"
        },
        {
          "code": "hu",
          "lang": "Hungarian",
          "sense": "a branch of linguistics that studies large samples",
          "word": "korpusznyelvészet"
        },
        {
          "code": "sv",
          "lang": "Swedish",
          "sense": "a branch of linguistics that studies large samples",
          "tags": [
            "common-gender"
          ],
          "word": "korpuslingvistik"
        }
      ],
      "wikipedia": [
        "corpus linguistics"
      ]
    }
  ],
  "word": "corpus linguistics"
}

[Show JSON for raw wiktextract data ▼] [Hide JSON for raw wiktextract data ▲]

{
  "head_templates": [
    {
      "args": {
        "1": "-"
      },
      "expansion": "corpus linguistics (uncountable)",
      "name": "en-noun"
    }
  ],
  "lang": "English",
  "lang_code": "en",
  "pos": "noun",
  "related": [
    {
      "word": "cotext"
    },
    {
      "word": "KWIC"
    }
  ],
  "senses": [
    {
      "categories": [
        "English entries with incorrect language header",
        "English lemmas",
        "English multiword terms",
        "English nouns",
        "English terms with quotations",
        "English uncountable nouns",
        "Entries with translation boxes",
        "Mandarin terms with redundant transliterations",
        "Pages with 1 entry",
        "Pages with entries",
        "Terms with Afrikaans translations",
        "Terms with Catalan translations",
        "Terms with Finnish translations",
        "Terms with Hungarian translations",
        "Terms with Mandarin translations",
        "Terms with Swedish translations",
        "en:Computing",
        "en:Linguistics"
      ],
      "examples": [
        {
          "bold_text_offsets": [
            [
              24,
              42
            ]
          ],
          "ref": "2018, Clarence Green, James Lambert, “Position vectors, homologous chromosomes and gamma rays: Promoting disciplinary literacy through Secondary Phrase Lists”, in English for Specific Purposes, →DOI, page 2:",
          "text": "ESP, using the tools of corpus linguistics, has advanced the methodologies for investigating discipline-specific language, yet there has been little cross-fertilization so far with disciplinary literacy in secondary education.",
          "type": "quote"
        }
      ],
      "glosses": [
        "A branch of linguistics that studies large samples (corpora) of real-world text, usually with the aid of computer software."
      ],
      "links": [
        [
          "computing",
          "computing#Noun"
        ],
        [
          "linguistics",
          "linguistics"
        ],
        [
          "corpora",
          "corpus"
        ],
        [
          "computer",
          "computer"
        ],
        [
          "software",
          "software"
        ]
      ],
      "raw_glosses": [
        "(computing, linguistics) A branch of linguistics that studies large samples (corpora) of real-world text, usually with the aid of computer software."
      ],
      "tags": [
        "uncountable"
      ],
      "topics": [
        "computing",
        "engineering",
        "human-sciences",
        "linguistics",
        "mathematics",
        "natural-sciences",
        "physical-sciences",
        "sciences"
      ],
      "wikipedia": [
        "corpus linguistics"
      ]
    }
  ],
  "translations": [
    {
      "code": "af",
      "lang": "Afrikaans",
      "sense": "a branch of linguistics that studies large samples",
      "word": "korpuslinguistiek"
    },
    {
      "code": "ca",
      "lang": "Catalan",
      "sense": "a branch of linguistics that studies large samples",
      "word": "lingüística de corpus"
    },
    {
      "code": "cmn",
      "lang": "Chinese Mandarin",
      "roman": "yǔliàokù yǔyánxué",
      "sense": "a branch of linguistics that studies large samples",
      "word": "語料庫語言學 /语料库语言学"
    },
    {
      "code": "fi",
      "lang": "Finnish",
      "sense": "a branch of linguistics that studies large samples",
      "word": "korpustutkimus"
    },
    {
      "code": "fi",
      "lang": "Finnish",
      "sense": "a branch of linguistics that studies large samples",
      "word": "korpuslingvistiikka"
    },
    {
      "code": "hu",
      "lang": "Hungarian",
      "sense": "a branch of linguistics that studies large samples",
      "word": "korpusznyelvészet"
    },
    {
      "code": "sv",
      "lang": "Swedish",
      "sense": "a branch of linguistics that studies large samples",
      "tags": [
        "common-gender"
      ],
      "word": "korpuslingvistik"
    }
  ],
  "word": "corpus linguistics"
}

Download raw JSONL data for corpus linguistics meaning in All languages combined (3.0kB)

This page is a part of the kaikki.org machine-readable All languages combined dictionary. This dictionary is based on structured data extracted on 2025-06-01 from the enwiktionary dump dated 2025-05-20 using wiktextract (3dadd05 and f1c2b61). The data shown on this site has been post-processed and various details (e.g., extra categories) removed, some information disambiguated, and additional data merged from other sources. See the raw data download page for the unprocessed wiktextract data.

If you use this data in academic research, please cite Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data, Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 1317-1325, Marseille, 20-25 June 2022. Linking to the relevant page(s) under https://kaikki.org would also be greatly appreciated.

"corpus linguistics" meaning in All languages combined

Noun [English]