@ -9,7 +9,6 @@
# include <cstring>
# include <cstring>
# include <fstream>
# include <fstream>
# include <iostream>
# include <iostream>
# include <map>
# include <string>
# include <string>
# include <vector>
# include <vector>
@ -69,7 +68,7 @@ void set_console_state(console_state new_st)
static const int EOS_TOKEN_ID = 2 ;
static const int EOS_TOKEN_ID = 2 ;
// determine number of model parts based on the dimension
// determine number of model parts based on the dimension
static const std : : map< int , int > LLAMA_N_PARTS = {
static const std : : unordered_ map< int , int > LLAMA_N_PARTS = {
{ 4096 , 1 } ,
{ 4096 , 1 } ,
{ 5120 , 2 } ,
{ 5120 , 2 } ,
{ 6656 , 4 } ,
{ 6656 , 4 } ,
@ -123,7 +122,7 @@ struct llama_model {
//
//
struct ggml_context * ctx ;
struct ggml_context * ctx ;
std : : map< std : : string , struct ggml_tensor * > tensors ;
std : : unordered_ map< std : : string , struct ggml_tensor * > tensors ;
} ;
} ;
// load the model's weights from a file
// load the model's weights from a file
@ -208,6 +207,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
// load vocab
// load vocab
{
{
std : : string word ;
std : : string word ;
vocab . id_to_token . resize ( model . hparams . n_vocab ) ;
std : : vector < char > tmp ( 64 ) ;
std : : vector < char > tmp ( 64 ) ;
for ( int i = 0 ; i < model . hparams . n_vocab ; i + + ) {
for ( int i = 0 ; i < model . hparams . n_vocab ; i + + ) {
@ -227,8 +227,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
fin . read ( ( char * ) & score , sizeof ( score ) ) ;
fin . read ( ( char * ) & score , sizeof ( score ) ) ;
vocab . token_to_id [ word ] = i ;
vocab . token_to_id [ word ] = i ;
vocab . id_to_token [ i ] = word ;
vocab . score [ i ] = score ;
auto & tok_score = vocab . id_to_token [ i ] ;
tok_score . tok = word ;
tok_score . score = score ;
}
}
}
}
@ -1028,7 +1030,7 @@ int main(int argc, char ** argv) {
fprintf ( stderr , " %s: prompt: '%s' \n " , __func__ , params . prompt . c_str ( ) ) ;
fprintf ( stderr , " %s: prompt: '%s' \n " , __func__ , params . prompt . c_str ( ) ) ;
fprintf ( stderr , " %s: number of tokens in prompt = %zu \n " , __func__ , embd_inp . size ( ) ) ;
fprintf ( stderr , " %s: number of tokens in prompt = %zu \n " , __func__ , embd_inp . size ( ) ) ;
for ( int i = 0 ; i < ( int ) embd_inp . size ( ) ; i + + ) {
for ( int i = 0 ; i < ( int ) embd_inp . size ( ) ; i + + ) {
fprintf ( stderr , " %6d -> '%s' \n " , embd_inp [ i ] , vocab . id_to_token . at ( embd_inp [ i ] ) . c_str( ) ) ;
fprintf ( stderr , " %6d -> '%s' \n " , embd_inp [ i ] , vocab . id_to_token . at ( embd_inp [ i ] ) . tok. c_str( ) ) ;
}
}
fprintf ( stderr , " \n " ) ;
fprintf ( stderr , " \n " ) ;
if ( params . interactive ) {
if ( params . interactive ) {
@ -1154,7 +1156,7 @@ int main(int argc, char ** argv) {
// display text
// display text
if ( ! input_noecho ) {
if ( ! input_noecho ) {
for ( auto id : embd ) {
for ( auto id : embd ) {
printf ( " %s " , vocab . id_to_token [ id ] . c_str( ) ) ;
printf ( " %s " , vocab . id_to_token [ id ] . tok. c_str( ) ) ;
}
}
fflush ( stdout ) ;
fflush ( stdout ) ;
}
}
@ -1169,7 +1171,7 @@ int main(int argc, char ** argv) {
// check for reverse prompt
// check for reverse prompt
std : : string last_output ;
std : : string last_output ;
for ( auto id : last_n_tokens ) {
for ( auto id : last_n_tokens ) {
last_output + = vocab . id_to_token [ id ] ;
last_output + = vocab . id_to_token [ id ] .tok ;
}
}
// Check if each of the reverse prompts appears at the end of the output.
// Check if each of the reverse prompts appears at the end of the output.