OpenMP Example
Description
This example is a basic MatAdd parallelized on the 8 cluster cores using OpenMP and the standard parallelization code used in many of the parallelized SDK libraries like the Autotiler.
The OpenMP library is linked by the CMake utilities of the SDK thanks to the cariable CONFIG_LIB_OPENMP=y set in the sdk.config.
How to run
cmake -B build
cmake --build build --target menuconfig # Select your board in the menu
cmake --build build --target run
Or use the gap command:
gap init
gap menuconfig
gap run
cmake -B build
cmake --build build --target menuconfig # Select GVSoC in the menu
cmake --build build --target run
Or use the gap command:
gap init
gap menuconfig
gap run
Results
You should have an output looking like this (order may vary):
*** OPENMP Basic Example ***
Running the Standard parallelization code (Autotiler-like)
Elapsed: 6137 Cyc (0.16 Op/Cyc)
Running the OpenMP Code
Elapsed: 6250 Cyc (0.16 Op/Cyc)
Test Succeded !
Code
/*
* Copyright (C) 2024 GreenWaves Technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* PMSIS includes */
#include <pmsis.h>
#include "Gap.h"
#define Abs(a) (((int)(a)<0)?(-(a)):(a))
#define Min(a, b) (((a)<(b))?(a):(b))
#define Max(a, b) (((a)>(b))?(a):(b))
#define MAT_SIZE 8000
float *In1;
float *In2;
float *Out;
/*************** OpenMP Code ******************/
/* Cluster main entry, executed by core 0. */
void openmp_cluster_delegate(void *arg)
{
printf("Running the OpenMP Code\n");
gap_cl_resethwtimer();
gap_cl_starttimer();
int start = gap_cl_readhwtimer();
#pragma omp parallel for
for (unsigned int i=0; i<MAT_SIZE; i++) {
Out[i] = In1[i] + In2[i];
}
int elapsed = gap_cl_readhwtimer() - start;
printf("Elapsed: %d Cyc (%.2f Op/Cyc)\n\n", elapsed, ((float) MAT_SIZE) / elapsed);
}
/**************** Standard Code *****************/
static inline unsigned int __attribute__((always_inline)) ChunkSize(unsigned int X)
{
unsigned int NCore;
unsigned int Log2Core;
unsigned int Chunk;
NCore = gap_ncore();
Log2Core = gap_fl1(NCore);
Chunk = (X>>Log2Core) + ((X&(NCore-1))!=0);
return Chunk;
}
/* Task executed by cluster cores. */
void matadd(void *arg)
{
unsigned int CoreId = gap_coreid();
unsigned int ChunkCell = ChunkSize(MAT_SIZE);
unsigned int First = Min(CoreId*ChunkCell, MAT_SIZE), Last = Min(MAT_SIZE, First+ChunkCell);
for (unsigned int i=First; i<Last; i++) {
Out[i] = In1[i] + In2[i];
}
}
/* Cluster main entry, executed by core 0. */
void cluster_delegate(void *arg)
{
printf("Running the Standard parallelization code (Autotiler-like)\n");
gap_cl_resethwtimer();
gap_cl_starttimer();
int start = gap_cl_readhwtimer();
/* Task dispatch to cluster cores. */
pi_cl_team_fork(pi_cl_cluster_nb_cores(), matadd, arg);
int elapsed = gap_cl_readhwtimer() - start;
printf("Elapsed: %d Cyc (%.2f Op/Cyc)\n\n", elapsed, ((float) MAT_SIZE) / elapsed);
}
void init_arrs(float* In1, float * In2, int size) {
for (int i=0; i<size; i++) {
In1[i] = i / 10;
In2[i] = i % 10;
}
}
int check_results() {
int err = 0;
for (int i=0; i<MAT_SIZE; i++) {
if ((In1[i] + In2[i]) != Out[i]) {
printf("Error @ %d\n", i);
err++;
}
}
return err;
}
/* Program Entry. */
int main(void)
{
printf("\n\t *** OPENMP Basic Example ***\n\n");
pi_device_t* cluster_dev;
if(pi_open(PI_CORE_CLUSTER, &cluster_dev))
{
printf("Cluster open failed !\n");
pmsis_exit(-1);
}
In1 = (float *) pi_cl_l1_malloc(cluster_dev, MAT_SIZE * sizeof(float));
In2 = (float *) pi_cl_l1_malloc(cluster_dev, MAT_SIZE * sizeof(float));
Out = (float *) pi_cl_l1_malloc(cluster_dev, MAT_SIZE * sizeof(float));
if ((In1 == NULL) || (In2 == NULL) || (Out == NULL)) {
printf("Error allocating the L1\n");
return -1;
}
/* Prepare cluster task and send it to cluster. */
struct pi_cluster_task cl_task;
init_arrs(In1, In2, MAT_SIZE);
pi_cluster_send_task_to_cl(cluster_dev, pi_cluster_task(&cl_task, cluster_delegate, NULL));
int err = check_results();
init_arrs(In1, In2, MAT_SIZE);
pi_cluster_send_task_to_cl(cluster_dev, pi_cluster_task(&cl_task, openmp_cluster_delegate, NULL));
err += check_results();
pi_cl_l1_free(cluster_dev, Out, MAT_SIZE * sizeof(float));
pi_cl_l1_free(cluster_dev, In2, MAT_SIZE * sizeof(float));
pi_cl_l1_free(cluster_dev, In1, MAT_SIZE * sizeof(float));
pi_cluster_close(cluster_dev);
if (err)
printf("Test Failed !\n");
else
printf("Test Succeded !\n");
return err;
}
# Copyright (c) 2022 GreenWaves Technologies SAS
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. Neither the name of GreenWaves Technologies SAS nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.19)
###############################################################################
# Panel Control
###############################################################################
set(TARGET_NAME "openmp")
set(TARGET_SRCS main.c)
###############################################################################
# CMake pre initialization
###############################################################################
include($ENV{GAP_SDK_HOME}/utils/cmake/setup.cmake)
project(${TARGET_NAME} C ASM)
add_executable(${TARGET_NAME} ${TARGET_SRCS})
include_directories($ENV{GAP_H_INC})
target_compile_options(${TARGET_NAME} PUBLIC "-O3")
if (${CONFIG_LIB_OPENMP})
target_compile_options(${TARGET_NAME} PUBLIC "-DCONFIG_LIB_OPENMP")
endif()
###############################################################################
# CMake post initialization
###############################################################################
setupos(${TARGET_NAME})