See tokenize in All languages combined, or Wiktionary
{ "categories": [ { "kind": "other", "name": "Anagramm sortiert (Englisch)", "parents": [], "source": "w" }, { "kind": "other", "name": "Englisch", "parents": [], "source": "w" }, { "kind": "other", "name": "Rückläufige Wörterliste (Englisch)", "parents": [], "source": "w" }, { "kind": "other", "name": "Verb (Englisch)", "parents": [], "source": "w" }, { "kind": "other", "name": "Siehe auch", "orig": "siehe auch", "parents": [], "source": "w" } ], "forms": [ { "form": "simple present I, you, they" }, { "form": "simple present tokenize" }, { "form": "he tokenizes" }, { "form": "she tokenizes" }, { "form": "it tokenizes" }, { "form": "simple past tokenized" }, { "form": "present participle tokenizing" }, { "form": "past participle tokenized" } ], "hyphenation": "to·ke·nize", "lang": "Englisch", "lang_code": "en", "pos": "verb", "pos_title": "Verb", "senses": [ { "examples": [ { "author": "Pierre M. Nugues", "italic_text_offsets": [ [ 32, 40 ] ], "pages": "127", "publisher": "Springer", "ref": "Pierre M. Nugues: Language Processing with Perl and Prolog. Springer, 2014, Seite 127 (Google Books)", "text": "„The previous program failed to tokenize the puctuation.“", "title": "Language Processing with Perl and Prolog", "url": "Google Books", "year": "2014" }, { "author": "Eric Rochester", "italic_text_offsets": [ [ 23, 31 ] ], "pages": "64", "publisher": "PACKT Publishing", "ref": "Eric Rochester: Clojure Data Analysis Cookbook. PACKT Publishing, 2015, Seite 64 (Google Books)", "text": "„We need a function to tokenize a string into words.“", "title": "Clojure Data Analysis Cookbook", "url": "Google Books", "year": "2015" }, { "author": "David Smiley,Eric Pugh,Kranti Parisa,Matt Mitchell", "italic_text_offsets": [ [ 14, 23 ] ], "pages": "59", "publisher": "PACKT Publishing", "ref": "David Smiley,Eric Pugh,Kranti Parisa,Matt Mitchell: Apache Solr Enterprise Search Server. PACKT Publishing, 2015, Seite 59 (Google Books)", "text": "„This example tokenizes a semi-colon separated list.“", "title": "Apache Solr Enterprise Search Server", "url": "Google Books", "year": "2015" } ], "glosses": [ "zerlegen eines Textes für die Weiterverarbeitung" ], "id": "de-tokenize-en-verb-DXd4qcWp", "raw_tags": [ "Computerlinguistik" ], "sense_index": "1" } ], "translations": [ { "lang": "Deutsch", "lang_code": "de", "sense": "Computerlinguistik: zerlegen eines Textes für die Weiterverarbeitung", "sense_index": "1", "word": "tokenisieren" } ], "word": "tokenize" }
{ "categories": [ "Anagramm sortiert (Englisch)", "Englisch", "Rückläufige Wörterliste (Englisch)", "Verb (Englisch)", "siehe auch" ], "forms": [ { "form": "simple present I, you, they" }, { "form": "simple present tokenize" }, { "form": "he tokenizes" }, { "form": "she tokenizes" }, { "form": "it tokenizes" }, { "form": "simple past tokenized" }, { "form": "present participle tokenizing" }, { "form": "past participle tokenized" } ], "hyphenation": "to·ke·nize", "lang": "Englisch", "lang_code": "en", "pos": "verb", "pos_title": "Verb", "senses": [ { "examples": [ { "author": "Pierre M. Nugues", "italic_text_offsets": [ [ 32, 40 ] ], "pages": "127", "publisher": "Springer", "ref": "Pierre M. Nugues: Language Processing with Perl and Prolog. Springer, 2014, Seite 127 (Google Books)", "text": "„The previous program failed to tokenize the puctuation.“", "title": "Language Processing with Perl and Prolog", "url": "Google Books", "year": "2014" }, { "author": "Eric Rochester", "italic_text_offsets": [ [ 23, 31 ] ], "pages": "64", "publisher": "PACKT Publishing", "ref": "Eric Rochester: Clojure Data Analysis Cookbook. PACKT Publishing, 2015, Seite 64 (Google Books)", "text": "„We need a function to tokenize a string into words.“", "title": "Clojure Data Analysis Cookbook", "url": "Google Books", "year": "2015" }, { "author": "David Smiley,Eric Pugh,Kranti Parisa,Matt Mitchell", "italic_text_offsets": [ [ 14, 23 ] ], "pages": "59", "publisher": "PACKT Publishing", "ref": "David Smiley,Eric Pugh,Kranti Parisa,Matt Mitchell: Apache Solr Enterprise Search Server. PACKT Publishing, 2015, Seite 59 (Google Books)", "text": "„This example tokenizes a semi-colon separated list.“", "title": "Apache Solr Enterprise Search Server", "url": "Google Books", "year": "2015" } ], "glosses": [ "zerlegen eines Textes für die Weiterverarbeitung" ], "raw_tags": [ "Computerlinguistik" ], "sense_index": "1" } ], "translations": [ { "lang": "Deutsch", "lang_code": "de", "sense": "Computerlinguistik: zerlegen eines Textes für die Weiterverarbeitung", "sense_index": "1", "word": "tokenisieren" } ], "word": "tokenize" }
Download raw JSONL data for tokenize meaning in Englisch (2.0kB)
This page is a part of the kaikki.org machine-readable Englisch dictionary. This dictionary is based on structured data extracted on 2025-04-12 from the dewiktionary dump dated 2025-04-03 using wiktextract (aeaf2a1 and fb63907). The data shown on this site has been post-processed and various details (e.g., extra categories) removed, some information disambiguated, and additional data merged from other sources. See the raw data download page for the unprocessed wiktextract data.
If you use this data in academic research, please cite Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data, Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 1317-1325, Marseille, 20-25 June 2022. Linking to the relevant page(s) under https://kaikki.org would also be greatly appreciated.