corpus linguistics in English

[Show JSON for postprocessed kaikki.org data shown on this page ▼] [Hide JSON for postprocessed kaikki.org data shown on this page ▲]

{
  "head_templates": [
    {
      "args": {
        "1": "-"
      },
      "expansion": "corpus linguistics (uncountable)",
      "name": "en-noun"
    }
  ],
  "lang": "English",
  "lang_code": "en",
  "pos": "noun",
  "senses": [
    {
      "categories": [
        {
          "kind": "other",
          "name": "English entries with incorrect language header",
          "parents": [
            "Entries with incorrect language header",
            "Entry maintenance"
          ],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Entries with translation boxes",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Mandarin terms with redundant transliterations",
          "parents": [
            "Terms with redundant transliterations",
            "Entry maintenance"
          ],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Pages with 1 entry",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Pages with entries",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Afrikaans translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Catalan translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Finnish translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Hungarian translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Mandarin translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "other",
          "name": "Terms with Swedish translations",
          "parents": [],
          "source": "w"
        },
        {
          "kind": "topical",
          "langcode": "en",
          "name": "Computing",
          "orig": "en:Computing",
          "parents": [
            "Technology",
            "All topics",
            "Fundamental"
          ],
          "source": "w"
        },
        {
          "kind": "topical",
          "langcode": "en",
          "name": "Linguistics",
          "orig": "en:Linguistics",
          "parents": [
            "Language",
            "Social sciences",
            "Communication",
            "Sciences",
            "Society",
            "All topics",
            "Fundamental"
          ],
          "source": "w"
        }
      ],
      "examples": [
        {
          "bold_text_offsets": [
            [
              24,
              42
            ]
          ],
          "ref": "2018, Clarence Green, James Lambert, “Position vectors, homologous chromosomes and gamma rays: Promoting disciplinary literacy through Secondary Phrase Lists”, in English for Specific Purposes, →DOI, page 2:",
          "text": "ESP, using the tools of corpus linguistics, has advanced the methodologies for investigating discipline-specific language, yet there has been little cross-fertilization so far with disciplinary literacy in secondary education.",
          "type": "quote"
        }
      ],
      "glosses": [
        "A branch of linguistics that studies large samples (corpora) of real-world text, usually with the aid of computer software."
      ],
      "id": "en-corpus_linguistics-en-noun-frNw-xcN",
      "links": [
        [
          "computing",
          "computing#Noun"
        ],
        [
          "linguistics",
          "linguistics"
        ],
        [
          "corpora",
          "corpus"
        ],
        [
          "computer",
          "computer"
        ],
        [
          "software",
          "software"
        ]
      ],
      "raw_glosses": [
        "(computing, linguistics) A branch of linguistics that studies large samples (corpora) of real-world text, usually with the aid of computer software."
      ],
      "related": [
        {
          "word": "cotext"
        },
        {
          "word": "KWIC"
        }
      ],
      "tags": [
        "uncountable"
      ],
      "topics": [
        "computing",
        "engineering",
        "human-sciences",
        "linguistics",
        "mathematics",
        "natural-sciences",
        "physical-sciences",
        "sciences"
      ],
      "translations": [
        {
          "code": "af",
          "lang": "Afrikaans",
          "sense": "a branch of linguistics that studies large samples",
          "word": "korpuslinguistiek"
        },
        {
          "code": "ca",
          "lang": "Catalan",
          "sense": "a branch of linguistics that studies large samples",
          "word": "lingüística de corpus"
        },
        {
          "code": "cmn",
          "lang": "Chinese Mandarin",
          "roman": "yǔliàokù yǔyánxué",
          "sense": "a branch of linguistics that studies large samples",
          "word": "語料庫語言學 /语料库语言学"
        },
        {
          "code": "fi",
          "lang": "Finnish",
          "sense": "a branch of linguistics that studies large samples",
          "word": "korpustutkimus"
        },
        {
          "code": "fi",
          "lang": "Finnish",
          "sense": "a branch of linguistics that studies large samples",
          "word": "korpuslingvistiikka"
        },
        {
          "code": "hu",
          "lang": "Hungarian",
          "sense": "a branch of linguistics that studies large samples",
          "word": "korpusznyelvészet"
        },
        {
          "code": "sv",
          "lang": "Swedish",
          "sense": "a branch of linguistics that studies large samples",
          "tags": [
            "common-gender"
          ],
          "word": "korpuslingvistik"
        }
      ],
      "wikipedia": [
        "corpus linguistics"
      ]
    }
  ],
  "word": "corpus linguistics"
}

[Show JSON for raw wiktextract data ▼] [Hide JSON for raw wiktextract data ▲]

{
  "head_templates": [
    {
      "args": {
        "1": "-"
      },
      "expansion": "corpus linguistics (uncountable)",
      "name": "en-noun"
    }
  ],
  "lang": "English",
  "lang_code": "en",
  "pos": "noun",
  "related": [
    {
      "word": "cotext"
    },
    {
      "word": "KWIC"
    }
  ],
  "senses": [
    {
      "categories": [
        "English entries with incorrect language header",
        "English lemmas",
        "English multiword terms",
        "English nouns",
        "English terms with quotations",
        "English uncountable nouns",
        "Entries with translation boxes",
        "Mandarin terms with redundant transliterations",
        "Pages with 1 entry",
        "Pages with entries",
        "Terms with Afrikaans translations",
        "Terms with Catalan translations",
        "Terms with Finnish translations",
        "Terms with Hungarian translations",
        "Terms with Mandarin translations",
        "Terms with Swedish translations",
        "en:Computing",
        "en:Linguistics"
      ],
      "examples": [
        {
          "bold_text_offsets": [
            [
              24,
              42
            ]
          ],
          "ref": "2018, Clarence Green, James Lambert, “Position vectors, homologous chromosomes and gamma rays: Promoting disciplinary literacy through Secondary Phrase Lists”, in English for Specific Purposes, →DOI, page 2:",
          "text": "ESP, using the tools of corpus linguistics, has advanced the methodologies for investigating discipline-specific language, yet there has been little cross-fertilization so far with disciplinary literacy in secondary education.",
          "type": "quote"
        }
      ],
      "glosses": [
        "A branch of linguistics that studies large samples (corpora) of real-world text, usually with the aid of computer software."
      ],
      "links": [
        [
          "computing",
          "computing#Noun"
        ],
        [
          "linguistics",
          "linguistics"
        ],
        [
          "corpora",
          "corpus"
        ],
        [
          "computer",
          "computer"
        ],
        [
          "software",
          "software"
        ]
      ],
      "raw_glosses": [
        "(computing, linguistics) A branch of linguistics that studies large samples (corpora) of real-world text, usually with the aid of computer software."
      ],
      "tags": [
        "uncountable"
      ],
      "topics": [
        "computing",
        "engineering",
        "human-sciences",
        "linguistics",
        "mathematics",
        "natural-sciences",
        "physical-sciences",
        "sciences"
      ],
      "wikipedia": [
        "corpus linguistics"
      ]
    }
  ],
  "translations": [
    {
      "code": "af",
      "lang": "Afrikaans",
      "sense": "a branch of linguistics that studies large samples",
      "word": "korpuslinguistiek"
    },
    {
      "code": "ca",
      "lang": "Catalan",
      "sense": "a branch of linguistics that studies large samples",
      "word": "lingüística de corpus"
    },
    {
      "code": "cmn",
      "lang": "Chinese Mandarin",
      "roman": "yǔliàokù yǔyánxué",
      "sense": "a branch of linguistics that studies large samples",
      "word": "語料庫語言學 /语料库语言学"
    },
    {
      "code": "fi",
      "lang": "Finnish",
      "sense": "a branch of linguistics that studies large samples",
      "word": "korpustutkimus"
    },
    {
      "code": "fi",
      "lang": "Finnish",
      "sense": "a branch of linguistics that studies large samples",
      "word": "korpuslingvistiikka"
    },
    {
      "code": "hu",
      "lang": "Hungarian",
      "sense": "a branch of linguistics that studies large samples",
      "word": "korpusznyelvészet"
    },
    {
      "code": "sv",
      "lang": "Swedish",
      "sense": "a branch of linguistics that studies large samples",
      "tags": [
        "common-gender"
      ],
      "word": "korpuslingvistik"
    }
  ],
  "word": "corpus linguistics"
}

This page is a part of the kaikki.org machine-readable English dictionary. This dictionary is based on structured data extracted on 2025-04-13 from the enwiktionary dump dated 2025-04-03 using wiktextract (aeaf2a1 and fb63907). The data shown on this site has been post-processed and various details (e.g., extra categories) removed, some information disambiguated, and additional data merged from other sources. See the raw data download page for the unprocessed wiktextract data.

If you use this data in academic research, please cite Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data, Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 1317-1325, Marseille, 20-25 June 2022. Linking to the relevant page(s) under https://kaikki.org would also be greatly appreciated.

"corpus linguistics" meaning in English

Noun