See tokenize on Wiktionary
Download JSON data for tokenize meaning in All languages combined (2.8kB)
{ "derived": [ { "_dis1": "0 0 0", "word": "tokenizable" }, { "_dis1": "0 0 0", "word": "tokenizer" } ], "etymology_templates": [ { "args": { "1": "en", "2": "token", "3": "ize" }, "expansion": "token + -ize", "name": "suffix" } ], "etymology_text": "token + -ize", "forms": [ { "form": "tokenizes", "tags": [ "present", "singular", "third-person" ] }, { "form": "tokenizing", "tags": [ "participle", "present" ] }, { "form": "tokenized", "tags": [ "participle", "past" ] }, { "form": "tokenized", "tags": [ "past" ] } ], "head_templates": [ { "args": {}, "expansion": "tokenize (third-person singular simple present tokenizes, present participle tokenizing, simple past and past participle tokenized)", "name": "en-verb" } ], "lang": "English", "lang_code": "en", "pos": "verb", "related": [ { "_dis1": "0 0 0", "word": "tokenization" } ], "senses": [ { "categories": [ { "kind": "topical", "langcode": "en", "name": "Computing", "orig": "en:Computing", "parents": [ "Technology", "All topics", "Fundamental" ], "source": "w" }, { "_dis": "62 32 6", "kind": "other", "name": "English entries with incorrect language header", "parents": [ "Entries with incorrect language header", "Entry maintenance" ], "source": "w+disamb" }, { "_dis": "59 32 9", "kind": "other", "name": "English entries with language name categories using raw markup", "parents": [ "Entries with language name categories using raw markup", "Entry maintenance" ], "source": "w+disamb" }, { "_dis": "54 25 21", "kind": "other", "name": "English terms suffixed with -ize", "parents": [], "source": "w+disamb" } ], "glosses": [ "To reduce to a set of tokens by lexical analysis." ], "id": "en-tokenize-en-verb-6AmbAw8e", "links": [ [ "computing", "computing#Noun" ], [ "token", "token" ], [ "lexical analysis", "lexical analysis" ] ], "raw_glosses": [ "(transitive, computing) To reduce to a set of tokens by lexical analysis." ], "tags": [ "transitive" ], "topics": [ "computing", "engineering", "mathematics", "natural-sciences", "physical-sciences", "sciences" ] }, { "categories": [ { "kind": "topical", "langcode": "en", "name": "Computing", "orig": "en:Computing", "parents": [ "Technology", "All topics", "Fundamental" ], "source": "w" } ], "glosses": [ "To substitute sensitive data with meaningless placeholders." ], "id": "en-tokenize-en-verb--Kj3NOCS", "links": [ [ "computing", "computing#Noun" ] ], "raw_glosses": [ "(transitive, computing) To substitute sensitive data with meaningless placeholders." ], "tags": [ "transitive" ], "topics": [ "computing", "engineering", "mathematics", "natural-sciences", "physical-sciences", "sciences" ] }, { "categories": [], "glosses": [ "To treat as a token minority." ], "id": "en-tokenize-en-verb-bFvBu9Vi", "links": [ [ "token", "token" ], [ "minority", "minority" ] ], "raw_glosses": [ "(transitive) To treat as a token minority." ], "tags": [ "transitive" ] } ], "sounds": [ { "ipa": "/ˈtoʊ.kən.aɪz/", "tags": [ "General-American" ] } ], "wikipedia": [ "Tokenization" ], "word": "tokenize" }
{ "categories": [ "English 3-syllable words", "English entries with incorrect language header", "English entries with language name categories using raw markup", "English lemmas", "English terms suffixed with -ize", "English terms with IPA pronunciation", "English verbs" ], "derived": [ { "word": "tokenizable" }, { "word": "tokenizer" } ], "etymology_templates": [ { "args": { "1": "en", "2": "token", "3": "ize" }, "expansion": "token + -ize", "name": "suffix" } ], "etymology_text": "token + -ize", "forms": [ { "form": "tokenizes", "tags": [ "present", "singular", "third-person" ] }, { "form": "tokenizing", "tags": [ "participle", "present" ] }, { "form": "tokenized", "tags": [ "participle", "past" ] }, { "form": "tokenized", "tags": [ "past" ] } ], "head_templates": [ { "args": {}, "expansion": "tokenize (third-person singular simple present tokenizes, present participle tokenizing, simple past and past participle tokenized)", "name": "en-verb" } ], "lang": "English", "lang_code": "en", "pos": "verb", "related": [ { "word": "tokenization" } ], "senses": [ { "categories": [ "English transitive verbs", "en:Computing" ], "glosses": [ "To reduce to a set of tokens by lexical analysis." ], "links": [ [ "computing", "computing#Noun" ], [ "token", "token" ], [ "lexical analysis", "lexical analysis" ] ], "raw_glosses": [ "(transitive, computing) To reduce to a set of tokens by lexical analysis." ], "tags": [ "transitive" ], "topics": [ "computing", "engineering", "mathematics", "natural-sciences", "physical-sciences", "sciences" ] }, { "categories": [ "English transitive verbs", "en:Computing" ], "glosses": [ "To substitute sensitive data with meaningless placeholders." ], "links": [ [ "computing", "computing#Noun" ] ], "raw_glosses": [ "(transitive, computing) To substitute sensitive data with meaningless placeholders." ], "tags": [ "transitive" ], "topics": [ "computing", "engineering", "mathematics", "natural-sciences", "physical-sciences", "sciences" ] }, { "categories": [ "English transitive verbs" ], "glosses": [ "To treat as a token minority." ], "links": [ [ "token", "token" ], [ "minority", "minority" ] ], "raw_glosses": [ "(transitive) To treat as a token minority." ], "tags": [ "transitive" ] } ], "sounds": [ { "ipa": "/ˈtoʊ.kən.aɪz/", "tags": [ "General-American" ] } ], "wikipedia": [ "Tokenization" ], "word": "tokenize" }
This page is a part of the kaikki.org machine-readable All languages combined dictionary. This dictionary is based on structured data extracted on 2024-05-03 from the enwiktionary dump dated 2024-05-02 using wiktextract (f4fd8c9 and c9440ce). The data shown on this site has been post-processed and various details (e.g., extra categories) removed, some information disambiguated, and additional data merged from other sources. See the raw data download page for the unprocessed wiktextract data.
If you use this data in academic research, please cite Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data, Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 1317-1325, Marseille, 20-25 June 2022. Linking to the relevant page(s) under https://kaikki.org would also be greatly appreciated.