From c1c7026b470ced0b8a6c67e968c04bb47864def1 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 19:33:18 +0200 Subject: [PATCH] Fix python stuff (#109) --- convert-pth-to-ggml.py | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index 8194876..c1941a8 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -32,7 +32,7 @@ def parse_args(): return parser.parse_args() def get_n_parts(dim): - + mappings = {4096: 1, 5120: 2, 6656: 4, 8192: 8} n_parts = mappings.get(dim) if n_parts is None: @@ -43,7 +43,7 @@ def get_n_parts(dim): return n_parts def load_hparams_and_tokenizer(dir_model): - + fname_hparams = f"{dir_model}/params.json" fname_tokenizer = f"{dir_model}/../tokenizer.model" @@ -57,7 +57,7 @@ def load_hparams_and_tokenizer(dir_model): return hparams, tokenizer def write_header(fout, hparams, ftype): - + keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"] values = [ 0x67676d6c, # magic: ggml in hex @@ -88,26 +88,17 @@ def write_tokens(fout, tokenizer): def process_and_write_variables(fout, model, ftype): - for name, data in model.items(): - + for name, datao in model.items(): + if name.endswith("freqs"): continue - - shape = data.shape - - print(f"Processing variable: {name} with shape: {shape} and type: {data.dtype}\n") - - data = np.squeeze(data) - n_dims = len(shape) - # for efficiency - transpose some matrices - # "model/h.*/attn/c_attn/w" - # "model/h.*/attn/c_proj/w" - # "model/h.*/mlp/c_fc/w" - # "model/h.*/mlp/c_proj/w" - #if name.endswith(("/attn/c_attn/w", "/attn/c_proj/w", "/mlp/c_fc/w", "/mlp/c_proj/w")): - # print("Transposing") - # data = data.transpose() + shape = datao.shape + + print(f"Processing variable: {name} with shape: {shape} and type: {datao.dtype}") + + data = datao.numpy().squeeze() + n_dims = len(shape) # default type is fp16 ftype_cur = 1 @@ -122,8 +113,8 @@ def process_and_write_variables(fout, model, ftype): for dim in reversed(data.shape): fout.write(struct.pack("i", dim)) fout.write(sname) - - # data + + # data output to file data.tofile(fout) def main(): @@ -139,7 +130,7 @@ def main(): for p in range(n_parts): print(f"Processing part {p}\n") - + fname_model = f"{dir_model}/consolidated.0{p}.pth" fname_out = f"{dir_model}/ggml-model-{ftype_str[ftype]}.bin{'' if p == 0 else '.' + str(p)}"