diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index d255750..5c36e9c 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -99,7 +99,7 @@ for p in range(n_parts): fout.write(struct.pack("i", ftype)) # Is this correct?? - for i in range(32000): + for i in range(tokenizer.vocab_size()): if tokenizer.is_unknown(i): # "" token (translated as ??) text = " \u2047 ".encode("utf-8")