31 lines
953 B
Python
31 lines
953 B
Python
from ctypes import c_uint64
|
|
from typing import Callable, List, Sequence, Tuple
|
|
|
|
from murmurhash import hash_unicode
|
|
|
|
from ..config import registry
|
|
from ..model import Model
|
|
from ..types import Ints2d
|
|
|
|
InT = Sequence[Sequence[str]]
|
|
OutT = List[Ints2d]
|
|
|
|
|
|
@registry.layers("strings2arrays.v1")
|
|
def strings2arrays() -> Model[InT, OutT]:
|
|
"""Transform a sequence of string sequences to a list of arrays."""
|
|
return Model("strings2arrays", forward)
|
|
|
|
|
|
def forward(model: Model[InT, OutT], Xs: InT, is_train: bool) -> Tuple[OutT, Callable]:
|
|
# Cast 32-bit (signed) integer to 64-bit unsigned, since such casting
|
|
# is deprecated in NumPy.
|
|
hashes = [[c_uint64(hash_unicode(word)).value for word in X] for X in Xs]
|
|
hash_arrays = [model.ops.asarray1i(h, dtype="uint64") for h in hashes]
|
|
arrays = [model.ops.reshape2i(array, -1, 1) for array in hash_arrays]
|
|
|
|
def backprop(dX: OutT) -> InT:
|
|
return []
|
|
|
|
return arrays, backprop
|