trajdl.tokenizers.abstract module#

class trajdl.tokenizers.abstract.AbstractLocSeqTokenizer(vocab: Dict[str, int])[source]#

Bases: AbstractTokenizer

tokenizer for location sequences

class trajdl.tokenizers.abstract.AbstractTokenizer(vocab: Dict[str, int])[source]#

Bases: ABC

property bos: int#
abstract classmethod build(*args, **kwargs) AbstractTokenizer[source]#

类方法,用于构建Tokenizer实例,可以根据子类需求调整参数

check_vocab(vocab: Dict[str, int])[source]#

check the correctness of vocab

abstract classmethod construct_vocab(*args, **kwargs) Dict[str, int][source]#

静态方法,根据输入数据构造词汇表

property eos: int#
static load_pretrained(path: str | Path) AbstractTokenizer[source]#

静态方法,从指定路径加载预训练模型

abstract loc2idx(loc: str) int[source]#

将位置转换为下标

property mask: int#
property pad: int#
save_pretrained(path: str) None[source]#

保存预训练模型到指定路径

tokenize_loc_seq(loc_seq: Iterable[str] | LocSeq | ListScalar, add_bos: bool = False, add_eos: bool = False, return_as: str = 'py') List[int] | ndarray | LongTensor[source]#

公共接口,用于将位置序列转换为数字序列

property unk: int#
class trajdl.tokenizers.abstract.AbstractTrajTokenizer(vocab: Dict[str, int])[source]#

Bases: AbstractTokenizer

tokenizer for trajectories

abstract tokenize_traj(traj: Trajectory, add_start_end_token: bool = False, return_as: str = 'py') List[int][source]#

transform trajectory into location sequence