Skip to content

Commit 9383922

Browse files
author
John
committed
ggml perf bugfix
1 parent 1d6e234 commit 9383922

File tree

2 files changed

+11
-7
lines changed

2 files changed

+11
-7
lines changed

ggml.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17234,8 +17234,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1723417234
if (GGML_OP_HAS_FINALIZE[node->op]) {
1723517235
params.type = GGML_TASK_FINALIZE;
1723617236
ggml_compute_forward(&params, node);
17237-
ggml_graph_compute_perf_stats_node(node, state->shared);
17238-
}
17237+
}
17238+
ggml_graph_compute_perf_stats_node(node, state->shared);
1723917239
} else {
1724017240
break;
1724117241
}
@@ -17269,10 +17269,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1726917269
if (state->ith < node->n_tasks) {
1727017270
ggml_compute_forward(&params, node);
1727117271
}
17272-
}
17272+
ggml_graph_compute_perf_stats_node(node, state->shared);
17273+
}
1727317274

17274-
return 0;
17275-
}
17275+
return 0;
17276+
}
1727617277

1727717278
void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
1727817279
const int n_threads = cgraph->n_threads;
@@ -18246,6 +18247,8 @@ void ggml_graph_print_impl(const struct ggml_cgraph * cgraph, bool print_nodes,
1824618247
GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
1824718248
for (int i = 0; i < cgraph->n_nodes; i++) {
1824818249
struct ggml_tensor * node = cgraph->nodes[i];
18250+
node->perf_time_us = MAX(node->perf_time_us, 1); // should not happen anymore
18251+
node->perf_runs = MAX(node->perf_runs, 1);
1824918252

1825018253
perf_total_per_op_us[node->op] += MAX(1, node->perf_time_us);
1825118254
perf_array[i] = node->perf_time_us / node->perf_runs;

libfalcon.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3793,7 +3793,7 @@ struct falcon_context * falcon_init_from_file(
37933793
}
37943794
};
37953795
}
3796-
3796+
int64_t t_start_us = ggml_time_us();
37973797
ggml_type memory_type = params.f16_kv ? GGML_TYPE_F16 : GGML_TYPE_F32;
37983798
falcon_model *model = falcon_model_load(path_model, params.n_ctx, params.n_batch, params.n_gpu_layers,
37993799
params.main_gpu, memory_type, params.use_mmap, params.use_mlock,
@@ -3808,7 +3808,8 @@ struct falcon_context * falcon_init_from_file(
38083808
params.i_gpu_start = model->i_gpu_start; // first layer that's GPU accelerated
38093809
params.i_gpu_last = model->i_gpu_last; // last layer that's GPU accelerated
38103810
falcon_context * f_ctx = falcon_context_prepare(params, model, "falcon_main",true);
3811-
3811+
f_ctx->t_load_us = ggml_time_us() - t_start_us;
3812+
f_ctx->t_start_us = t_start_us;
38123813
//falcon_context_set_buffers(f_ctx,params.n_batch,params.n_ctx);
38133814
//const size_t memory_size = ggml_nbytes(model->kv_self.k) + ggml_nbytes(model->kv_self.v);
38143815
//fprintf(stderr, "%s: RAM buffers - key_val = %7.2f MB, Compute = %7.2f MB, Scratch 0 = %7.2f MB, Scratch 1 = %7.2f MB \n", __func__, memory_size / 1024.0 / 1024.0, f_ctx->buf_compute.size /1024.0/1024.0, (f_ctx->buf_scratch[0].size)/1024.0/1024.0, (f_ctx->buf_scratch[1].size)/1024.0/1024.0);

0 commit comments

Comments
 (0)