Source code for tkseem.morphological_tokenizer
import os
import pickle
from ._base import BaseTokenizer
[docs]class MorphologicalTokenizer(BaseTokenizer):
""" Auto tokenization using a saved dictionary"""
[docs] def train(self):
"""Use a default dictionary for training"""
print("Training MorphologicalTokenizer ...")
vocab_path = os.path.join(self.rel_path, "dictionaries/vocab.pl")
self.vocab = self._truncate_dict(pickle.load(open(vocab_path, "rb")))