Reentrant MNIST image recognition

Requirements

  • Autotiler

Description

This example is the same as the one presented in MnistGraph folder but with Reentrant mode activated. With this mode the model run can be interrupted by higher priority tasks and resumed afterwards. Look into the Autotiler documentation for more details.

Look into the MnistModel.c file for details on how to connect layers, define input and output of NN.

To generate the code for the NN initialization and execution:

make clean_model model

It will generate MnistKernels.c, MnistKernels.h and Mnist_L3_Flash_Const.dat. They contain the NN code for initialization, run and the constants tensors.

Mnist.c contains the main application code. After allocating the input and output buffers and read the image from file, the application opens the cluster and initializes the graph. Firstly it needs to explicitely configure and initialize the flash used for the file system. The pointer is then passed to the Construct via the Descr variable which allocates the static L2 buffer used for constant tensors used in the graph. Then the AllocCtxt function is called, here all the dynamic buffers are allocated (if not provided by the user - like in the warm mode of the model). After these initializations the model can be executed, two tasks are setup: one for the model RunMnist and another one for normal printfs RunPrint. The latter with higher priority: pi_cluster_task_priority(task_1, 1). They are enqueued one after the other asyncrounusely, so the RunMnist will start and right after it the higher priority one is triggered. As you can see from the log, after the first layer execution the RunMnist task is interrupted, the second task is served and eventually RunMnist is resumed to finish the execution.

make clean all run
         *** PMSIS MnistGraph Reentrant Test ***

Entering main controller
Image ../../../../Mnist/test_img/6/1578.pgm:  [W: 28, H: 28] Bytes per pixel 1, HeaderSize: 13
Image ../../../../Mnist/test_img/6/1578.pgm, [W: 28, H: 28], Bytes per pixel 1, Size: 784 bytes, Loaded successfully
NN Construct
Flash is open
NN Constructed and ready to run
Calling Cluster

Waiting

Inside RunMnist for the First time
Node Conv5x5ReLUMaxPool2x2_0 starts
Suspending RunMnist
Higher priority cluster task execution 0/3
Higher priority cluster task execution 1/3
Higher priority cluster task execution 2/3
Higher priority cluster task execution 3/3
Resuming RunMnist
Node Conv5x5ReLUMaxPool2x2_1 starts
Node LinearLayerReLU_0 starts
Node SoftMax_0 starts
End RunMnist
Recognized number : 6

Test success with 0 error(s) !

Code

/*
 * Copyright (C) 2017 GreenWaves Technologies
 * All rights reserved.
 *
 * This software may be modified and distributed under the terms
 * of the BSD license.  See the LICENSE file for details.
 *
 */

/* PMSIS includes. */
#include "pmsis.h"

/* Autotiler includes. */
#include "Gap.h"
#include "MnistKernels.h"
#if defined(ENABLE_BRIDGE)
#include "gaplib/ImgIO.h"
#else
#include "golden.h"
#endif  /* USE_BRIDGE */

/* Variables used. */
#define NUM_DIR 6
#define NUM_PIC 1578

#define __IMG_NAME(x)    #x
#define _IMG_NAME(x,y,z) __IMG_NAME(x/y/z.pgm)
#define IMG_NAME(x,y,z)  _IMG_NAME(x,y,z)
#define NAME             IMG_NAME(IMG_DIR, NUM_DIR, NUM_PIC)

#define STACK_SIZE 1024

int16_t *image_in = NULL;
int16_t *output = NULL;
uint8_t *image_in_real = NULL;
uint8_t rec_digit = 0xAD;

AT_CTXT_TYPE Ctxt;
AT_DEFAULTFLASH_FS_T Flash;

typedef struct {
    CNN_Graph_Descr_T *Descr;
    int rec_digit;
} Arg_T;

static void RunMnist(Arg_T *Arg)
{
    if (MnistCNN_CtxtGetLayerIndex(Ctxt) == 0) {
        printf("Inside RunMnist for the First time\n");
    } else {
        printf("Resuming RunMnist\n");
    }
    MnistCNN(Ctxt,image_in,output,0);

    if (MnistCNN_CtxtGetLayerIndex(Ctxt) == 0) {
    //    uint8_t *digit = (uint8_t *) arg;
        int16_t highest = *output;
        for (uint8_t i = 1; i < 10; i++)
        {
            if (highest < output[i])
            {
                highest = output[i];
                Arg->rec_digit = i;
            }
        }
        printf("End RunMnist\n");
    } else {
        printf("Suspending RunMnist\n");
    }
}



int runprint=0;

static void RunPrint(){
    while(runprint<4){
        printf("Higher priority cluster task execution %d/3\n",runprint++);
        pi_cl_task_yield(); 
    }
    runprint=0;
}


int main(void)
{
    printf("\n\n\t *** PMSIS MnistGraph Reentrant Test ***\n\n");
    printf("Entering main controller\n");

    uint8_t CheckResults = 0;
    uint32_t errors = 0;
    char *image_name = NAME;

    //Input image size
    uint32_t img_w = 28, img_h = 28;
    uint32_t size_img_in = 0, size_img_in_real = 0;
    size_img_in = img_w * img_h * sizeof(uint16_t);
    size_img_in_real = img_w * img_h * sizeof(uint8_t);

    //Allocating input and output image buffers in L2 memory
    output = (int16_t *) pi_l2_malloc(10*sizeof(short int));
    image_in  = (int16_t *) pi_l2_malloc(size_img_in);
    if (image_in == NULL)
    {
        printf("Failed to allocate memory for image (%d bytes)\n", size_img_in);
        pmsis_exit(-1);
    }

    #if defined(ENABLE_BRIDGE)
    image_in_real = (uint8_t *) pi_l2_malloc(size_img_in_real);
    if (image_in_real == NULL)
    {
        printf("Failed to allocate memory for image (%d bytes)\n", size_img_in);
        pmsis_exit(-2);
    }

    //Reading Image from host
    uint8_t *read_status = ReadImageFromFile(image_name, img_w, img_h, 1, image_in_real, size_img_in_real, IMGIO_OUTPUT_CHAR, 0);
    if (read_status)
    {
        printf("Failed to load image %s or dimension mismatch Expects [%dx%d]\n", image_name, img_w, img_h);
        pmsis_exit(-3);
    }

    #else
    image_in_real = ImageIn;
    #endif  /* ENABLE_BRIDGE */

    //Convert in Mnist dataset format
    for (uint32_t i = 0; i < (img_w * img_h); i++)
    {
        image_in[i] = image_in_real[i] * 16;
    }

    /* Configure And open cluster. */
    struct pi_device cluster_dev;
    struct pi_cluster_conf cl_conf;

    int stacks_size = STACK_SIZE * pi_cl_cluster_nb_pe_cores();
    pi_cluster_conf_init(&cl_conf);
    cl_conf.id = 0;
    cl_conf.scratch_size = stacks_size + 0x8000;
    pi_open_from_conf(&cluster_dev, (void *) &cl_conf);
    if (pi_cluster_open(&cluster_dev))
    {
        printf("Cluster open failed !\n");
        pmsis_exit(-7);
    }

    printf("NN Construct\n");

    int Status;
    CNN_Graph_Descr_T _Descr = {0, &Flash, 0, 0, 0}, *Descr = &_Descr;
    {
        int Error;
        AT_DEFAULTFLASH_FS_CONF_T FlashConf;

        AT_DEFAULTFLASH_FS_CONF_INIT(&FlashConf, AT_MEM_L3_HFLASH, 0);
        AT_DEFAULTFLASH_FS_OPEN((AT_DEFAULTFLASH_FS_T *) Descr->Flash, &FlashConf, 0, 0, &Error);
        if (Error) {
            printf("Flash open failed\n"); return 1;
        } else printf("Flash is open\n");
    }
    MnistCNN_Construct(Descr);
    Ctxt = MnistCNN_AllocCtxt(Descr,
                              0,      /* Ctxt Mem */
                              0,      /* L1 */
                              0,      /* L2 Dyn */
                              0,      /* L3 Dyn */
                              &Status);
    if(Status){
        printf("Construct Error: %s\n", GetAtErrorName(Status));
        pmsis_exit(-1);
    }
    printf("NN Constructed and ready to run\n");

    Arg_T Arg = (Arg_T) {Descr, 0xAD};
    struct pi_cluster_task *task_0 = pi_l2_malloc(sizeof(struct pi_cluster_task));
    pi_cluster_task(task_0, (void (*)(void *))RunMnist, (void *) &Arg);
    /*
    pi_cluster_enqueue_task_async function is similar to pi_cluster_send_task but supports priority 0 and 1 and do not support automatic stack allocation.
    Stacks must always be allocated by the caller.
    */
    void *stacks = pi_cl_l1_scratch_alloc(&cluster_dev, task_0, stacks_size);
    pi_cluster_task_stacks(task_0, stacks, STACK_SIZE);

    struct pi_cluster_task *task_1 = pi_l2_malloc(sizeof(struct pi_cluster_task));
    pi_cluster_task(task_1, (void (*)(void *))RunPrint, NULL);
    stacks = pi_cl_l1_scratch_alloc(&cluster_dev, task_1, stacks_size);
    pi_cluster_task_stacks(task_1, stacks, STACK_SIZE);


    printf("Calling Cluster\n\n");
    static pi_evt_t end_task_mnist,end_task_print;

    pi_evt_sig_init(&end_task_mnist);
    pi_evt_sig_init(&end_task_print);

    pi_cluster_task_priority(task_1, 1);

    pi_cluster_enqueue_task_async(&cluster_dev, task_0,&end_task_mnist);
    pi_cluster_enqueue_task_async(&cluster_dev, task_1,&end_task_print);

    printf("Waiting\n\n");
    pi_evt_wait(&end_task_print);
    pi_evt_wait(&end_task_mnist);

    MnistCNN_DeAllocCtxt(Ctxt, 1, 1, 1, 1);
    MnistCNN_Destruct(Descr, 1, 1);
    pi_cluster_close(&cluster_dev);

    printf("Recognized number : %d\n", Arg.rec_digit);
    #if defined(ENABLE_BRIDGE)
    errors = (Arg.rec_digit != (uint8_t) NUM_DIR);
    #else
    errors = (Arg.rec_digit != (uint8_t) GoldenOutput);
    #endif /* defined(ENABLE_BRIDGE) */

    printf("\nTest %s with %d error(s) !\n", (errors) ? "failed" : "success", errors);

    if(errors) pmsis_exit(-9);
    else pmsis_exit(0);
}