|
|
@ -8653,9 +8653,11 @@ static void ggml_compute_forward_rope_f32(
|
|
|
|
|
|
|
|
|
|
|
|
const float theta_scale = powf(10000.0, -2.0f/n_dims);
|
|
|
|
const float theta_scale = powf(10000.0, -2.0f/n_dims);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const bool is_neox = mode & 2;
|
|
|
|
|
|
|
|
|
|
|
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
|
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
|
|
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
|
|
|
for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
|
|
|
const int p = (mode == 0 ? n_past + i2 : i2);
|
|
|
|
const int p = ((mode & 1) == 0 ? n_past + i2 : i2);
|
|
|
|
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
|
|
|
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
|
|
|
if (ir++ < ir0) continue;
|
|
|
|
if (ir++ < ir0) continue;
|
|
|
|
if (ir > ir1) break;
|
|
|
|
if (ir > ir1) break;
|
|
|
@ -8668,14 +8670,25 @@ static void ggml_compute_forward_rope_f32(
|
|
|
|
|
|
|
|
|
|
|
|
theta *= theta_scale;
|
|
|
|
theta *= theta_scale;
|
|
|
|
|
|
|
|
|
|
|
|
const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
if (!is_neox) {
|
|
|
|
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
|
|
|
|
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const float x0 = src[0];
|
|
|
|
|
|
|
|
const float x1 = src[1];
|
|
|
|
|
|
|
|
|
|
|
|
const float x0 = src[0];
|
|
|
|
dst_data[0] = x0*cos_theta - x1*sin_theta;
|
|
|
|
const float x1 = src[1];
|
|
|
|
dst_data[1] = x0*sin_theta + x1*cos_theta;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + (i0/2)*nb0);
|
|
|
|
|
|
|
|
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + (i0/2)*nb0);
|
|
|
|
|
|
|
|
|
|
|
|
dst_data[0] = x0*cos_theta - x1*sin_theta;
|
|
|
|
const float x0 = src[0];
|
|
|
|
dst_data[1] = x0*sin_theta + x1*cos_theta;
|
|
|
|
const float x1 = src[n_dims/2];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dst_data[0] = x0*cos_theta - x1*sin_theta;
|
|
|
|
|
|
|
|
dst_data[n_dims/2] = x0*sin_theta + x1*cos_theta;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -8730,9 +8743,11 @@ static void ggml_compute_forward_rope_f16(
|
|
|
|
|
|
|
|
|
|
|
|
const float theta_scale = powf(10000.0, -2.0f/n_dims);
|
|
|
|
const float theta_scale = powf(10000.0, -2.0f/n_dims);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const bool is_neox = mode & 2;
|
|
|
|
|
|
|
|
|
|
|
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
|
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
|
|
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
|
|
|
for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
|
|
|
const int p = (mode == 0 ? n_past + i2 : i2);
|
|
|
|
const int p = ((mode & 1) == 0 ? n_past + i2 : i2);
|
|
|
|
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
|
|
|
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
|
|
|
if (ir++ < ir0) continue;
|
|
|
|
if (ir++ < ir0) continue;
|
|
|
|
if (ir > ir1) break;
|
|
|
|
if (ir > ir1) break;
|
|
|
@ -8745,14 +8760,25 @@ static void ggml_compute_forward_rope_f16(
|
|
|
|
|
|
|
|
|
|
|
|
theta *= theta_scale;
|
|
|
|
theta *= theta_scale;
|
|
|
|
|
|
|
|
|
|
|
|
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
if (!is_neox) {
|
|
|
|
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
|
|
|
|
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const float x0 = GGML_FP16_TO_FP32(src[0]);
|
|
|
|
|
|
|
|
const float x1 = GGML_FP16_TO_FP32(src[1]);
|
|
|
|
|
|
|
|
|
|
|
|
const float x0 = GGML_FP16_TO_FP32(src[0]);
|
|
|
|
dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
|
|
const float x1 = GGML_FP16_TO_FP32(src[1]);
|
|
|
|
dst_data[1] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + (i0/2)*nb0);
|
|
|
|
|
|
|
|
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + (i0/2)*nb0);
|
|
|
|
|
|
|
|
|
|
|
|
dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
|
|
const float x0 = GGML_FP16_TO_FP32(src[0]);
|
|
|
|
dst_data[1] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
|
|
const float x1 = GGML_FP16_TO_FP32(src[n_dims/2]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
|
|
|
|
|
|
dst_data[n_dims/2] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|