diff --git a/main.cpp b/main.cpp index 32c3a68..77260bb 100644 --- a/main.cpp +++ b/main.cpp @@ -275,13 +275,16 @@ int main(int argc, char ** argv) { // determine newline token auto llama_token_newline = ::llama_tokenize(ctx, "\n", false); - fprintf(stderr, "\n"); - fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); - fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < (int) embd_inp.size(); i++) { - fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i])); + if (params.verbose_prompt) { + fprintf(stderr, "\n"); + fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); + fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); + for (int i = 0; i < (int) embd_inp.size(); i++) { + fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i])); + } + fprintf(stderr, "\n"); } - fprintf(stderr, "\n"); + if (params.interactive) { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) struct sigaction sigint_action; diff --git a/utils.cpp b/utils.cpp index 319924c..cea3096 100644 --- a/utils.cpp +++ b/utils.cpp @@ -134,6 +134,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.use_mlock = true; } else if (arg == "--mtest") { params.mem_test = true; + } else if (arg == "--verbose_prompt") { + params.verbose_prompt = true; } else if (arg == "-r" || arg == "--reverse-prompt") { if (++i >= argc) { invalid_param = true; @@ -212,6 +214,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n"); } fprintf(stderr, " --mtest compute maximum memory usage\n"); + fprintf(stderr, " --verbose-prompt print prompt before generation\n"); fprintf(stderr, " -m FNAME, --model FNAME\n"); fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); fprintf(stderr, "\n"); diff --git a/utils.h b/utils.h index 0690ef7..dede803 100644 --- a/utils.h +++ b/utils.h @@ -48,6 +48,7 @@ struct gpt_params { bool perplexity = false; // compute perplexity over the prompt bool use_mlock = false; // use mlock to keep model in memory bool mem_test = false; // compute maximum memory usage + bool verbose_prompt = false; // print prompt tokens before generation }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params);