MelSpectrogram Using NNTool

Requirements

No specific requirement. This example should run without issue on all chips/boards/OSes.

Description

In this example the MelSpectrogram is done by generating a function from NNTool. In nntool_script.py a single node graph is created in NNTool with the Mel spectrogram options specified in MelConfig.json and the Autotiler code is generated. Then the Autotiler model is compiled and run to generate GAP C code. All the rules and dependencies to generate files are done via CMakeLists.txt.

In the main application the Mel Spectrogram function is then applyed frame by frame to the wav file read from PC. The output features are written to a single file out_file.bin and can be compared to the classical librosa implementation of the algorithm via test_librosa.py.

How to run

mkdir build
cd build
cmake ../
make run -j

To test the output you can use test_librosa.py:

python test_librosa.py --plot

Code

/*
 * Copyright (C) 2017 GreenWaves Technologies
 * All rights reserved.
 *
 * This software may be modified and distributed under the terms
 * of the BSD license.  See the LICENSE file for details.
 *
 */


/* Autotiler includes. */
#include "melspect.h"
#include "melspectKernels.h"
#include "gaplib/fs_switch.h"
#include "gaplib/wavIO.h"
#define __XSTR(__s) __STR(__s)
#define __STR(__s) #__s

struct pi_device DefaultRam; 
struct pi_device* ram = &DefaultRam;
//Setting a big buffer to load files from PC to L2 and then store in ram
#define TEMP_L2_SIZE 1200000
#define AUDIO_BUFFER_SIZE (TEMP_L2_SIZE>>1)

AT_DEFAULTFLASH_EXT_ADDR_TYPE melspect_L3_Flash = 0;

/* Inputs */
/* Outputs */
static uint32_t inSig;

#define DATATYPE_SIGNAL     F16_DSP
L2_MEM DATATYPE_SIGNAL Audio_Frame[FRAME_SIZE];
L2_MEM DATATYPE_SIGNAL MelSpectrogram[N_MELS];
static uint32_t outSig;


static void cluster()
{
    melspectCNN(Audio_Frame, MelSpectrogram);
}

int test_melspect(void)
{
    printf("Entering main controller\n");

    /* Configure And open cluster. */
    struct pi_device cluster_dev;
    struct pi_cluster_conf cl_conf;
    pi_cluster_conf_init(&cl_conf);
    cl_conf.cc_stack_size = STACK_SIZE;

    cl_conf.id = 0; /* Set cluster ID. */
                    // Enable the special icache for the master core
    cl_conf.icache_conf = PI_CLUSTER_MASTER_CORE_ICACHE_ENABLE |
                    // Enable the prefetch for all the cores, it's a 9bits mask (from bit 2 to bit 10), each bit correspond to 1 core
                    PI_CLUSTER_ICACHE_PREFETCH_ENABLE |
                    // Enable the icache for all the cores
                    PI_CLUSTER_ICACHE_ENABLE;

    pi_open_from_conf(&cluster_dev, (void *) &cl_conf);
    if (pi_cluster_open(&cluster_dev))
    {
        printf("Cluster open failed !\n");
        pmsis_exit(-4);
    }

    /* Frequency Settings: defined in the Makefile */
    int cur_fc_freq = pi_freq_set(PI_FREQ_DOMAIN_FC, FREQ_FC*1000*1000);
    int cur_cl_freq = pi_freq_set(PI_FREQ_DOMAIN_CL, FREQ_CL*1000*1000);
    int cur_pe_freq = pi_freq_set(PI_FREQ_DOMAIN_PERIPH, FREQ_PE*1000*1000);
    if (cur_fc_freq == -1 || cur_cl_freq == -1 || cur_pe_freq == -1)
    {
        printf("Error changing frequency !\nTest failed...\n");
        pmsis_exit(-4);
    }
	printf("FC Frequency = %d Hz CL Frequency = %d Hz PERIPH Frequency = %d Hz\n", 
            pi_freq_get(PI_FREQ_DOMAIN_FC), pi_freq_get(PI_FREQ_DOMAIN_CL), pi_freq_get(PI_FREQ_DOMAIN_PERIPH));    

    /****
        Configure And Open the External Ram. 
    ****/
    struct pi_default_ram_conf ram_conf;
    pi_default_ram_conf_init(&ram_conf);
    ram_conf.baudrate = FREQ_FC*1000*1000;
    pi_open_from_conf(&DefaultRam, &ram_conf);
    if (pi_ram_open(&DefaultRam))
    {
        printf("Error ram open !\n");
        return -3;
    }
    printf("RAM Opened\n");

    /****
        Load Audio Wav from file 

    ****/
    // Read Audio Data from file using temp_L2_memory as temporary buffer
    // Data are prepared in L3 external memory
    char* temp_L2_memory = pi_l2_malloc(TEMP_L2_SIZE);
    if (temp_L2_memory == 0) {
        printf("Error when allocating L2 buffer\n");
        return -5;        
    }
    
    // Allocate L3 buffers for audio IN/OUT
    if (pi_ram_alloc(&DefaultRam, &inSig, (uint32_t) AUDIO_BUFFER_SIZE*sizeof(short)))
    {
        printf("inSig Ram malloc failed !\n");
        return -4;
    }

    // Read audio from file
    header_struct header_info;
    if (ReadWavFromFile(__XSTR(WAV_FILE), temp_L2_memory, AUDIO_BUFFER_SIZE*sizeof(short), &header_info)){
        printf("\nError reading wav file\n");
        return -1;
    }
    int samplerate = header_info.SampleRate;
    int num_samples = header_info.DataSize * 8 / (header_info.NumChannels * header_info.BitsPerSample);
    printf("Num Samples: %d with BitsPerSample: %d SR: %dkHz\n", num_samples, header_info.BitsPerSample, samplerate);
    int tot_frames = (int) (((float) (num_samples - FRAME_SIZE) / FRAME_STEP) + 1);
    printf("Number of frames to be processed: %d\n", tot_frames);

    if(num_samples*sizeof(short) > TEMP_L2_SIZE){
        printf("The size of the audio exceeds the available L2 memory space!\n");
        return -1;
    }
    // copy input data to L3
    pi_ram_write(&DefaultRam, inSig, temp_L2_memory, num_samples * sizeof(short));
    // free the temporary input memory
    pi_l2_free(temp_L2_memory, TEMP_L2_SIZE);

    // Allocate output buffer in L3
    if (pi_ram_alloc(&DefaultRam, &outSig, (uint32_t) tot_frames*N_MELS*sizeof(DATATYPE_SIGNAL)))
    {
        printf("outSig Ram malloc failed !\n");
        return -5;
    }

    // IMPORTANT - MUST BE CALLED AFTER THE CLUSTER IS SWITCHED ON!!!!
    printf("Constructor\n");
    int ConstructorErr = melspectCNN_Construct();
    if (ConstructorErr)
    {
        printf("Graph constructor exited with error: %d\n(check the generated file melspectKernels.c to see which memory have failed to be allocated)\n", ConstructorErr);
        pmsis_exit(-6);
    }
    pi_perf_conf(1 << PI_PERF_CYCLES | 1 << PI_PERF_ACTIVE_CYCLES);
    gap_fc_starttimer();
    gap_fc_resethwtimer();
    int start = gap_fc_readhwtimer();
    struct pi_cluster_task task_ctor;
    pi_cluster_task(&task_ctor, (void (*)(void *)) melspectCNN_ConstructCluster, NULL);
    pi_cluster_send_task_to_cl(&cluster_dev, &task_ctor);
    int elapsed = gap_fc_readhwtimer() - start;
    printf("L1 Promotion copy took %d FC Cycles\n", elapsed);

    struct pi_cluster_task task;
    pi_cluster_task(&task, (void (*)(void *))cluster, NULL);
    pi_cluster_task_stacks(&task, NULL, SLAVE_STACK_SIZE);

    for (int frame_id=0; frame_id < tot_frames; frame_id++)
    {
        printf("Frame [%3d/%3d]", frame_id+1, tot_frames);
        // Copy Data from L3 to L2
        short * in_temp_buffer = (short *) Audio_Frame;
        pi_ram_read(
            &DefaultRam, 
            inSig + frame_id * FRAME_STEP * sizeof(short), 
            in_temp_buffer, 
            (uint32_t) FRAME_SIZE*sizeof(short)
        );
        // cast data from Q16.15 to DATATYPE_SIGNAL (may be float16)

        for (int i=(FRAME_SIZE-1) ; i>=0; i--){
            Audio_Frame[i] = ((DATATYPE_SIGNAL) in_temp_buffer[i])/(1<<15);
        }

        /******
            Compute the Mel Spectrogram
        ******/
        start = gap_fc_readhwtimer();
        pi_cluster_send_task_to_cl(&cluster_dev, &task);
        elapsed = gap_fc_readhwtimer() - start;
        printf(" --> %d (%.2fus) \n", elapsed, ( (float) elapsed ) / FREQ_FC);

        // for (int i=0; i<N_MELS; i++) printf("%f, ", MelSpectrogram[i]);
        // printf("\n\n");

        // Write the outsignal to L3
        pi_ram_write(&DefaultRam, (uint32_t)( (DATATYPE_SIGNAL *) outSig + (frame_id*N_MELS)),
            MelSpectrogram, N_MELS * sizeof(DATATYPE_SIGNAL));

    }   // stop looping over frames
    temp_L2_memory = pi_l2_malloc(TEMP_L2_SIZE);
    if (temp_L2_memory == 0) {
        printf("Error when allocating L2 buffer\n");
        pmsis_exit(18);        
    }
    // copy input data to L3
    DATATYPE_SIGNAL *out_temp_buffer = (DATATYPE_SIGNAL * ) temp_L2_memory; 
    pi_ram_read(&DefaultRam, outSig, out_temp_buffer, tot_frames * N_MELS * sizeof(DATATYPE_SIGNAL));

    switch_fs_t fs;
    __FS_INIT(fs);
    void *File;
    int ret = 0;

    File = __OPEN_WRITE(fs, __XSTR(OUT_FILE));
    ret = __WRITE(File, temp_L2_memory, tot_frames*N_MELS*sizeof(DATATYPE_SIGNAL));
    __CLOSE(File);

    __FS_DEINIT(fs);
    printf("Writing %d Bytes to to %s completed successfully\n", tot_frames*N_MELS*sizeof(DATATYPE_SIGNAL), __XSTR(OUT_FILE));

    melspectCNN_Destruct();

    printf("Ended\n");
    pmsis_exit(0);
    return 0;
}

int main(int argc, char *argv[])
{
    printf("\n\n\t *** NNTOOL melspect Example ***\n\n");
    test_melspect();
    return 0;
}

import os
from nntool.api import NNGraph
from nntool.api.utils import model_settings
from nntool.graph.types import MFCCPreprocessingNode
from nntool.graph.dim import Dim
from nntool.graph.types.base import NNEdge
import argparse
import argcomplete
import json

def create_parser():
    # create the top-level parser
    parser = argparse.ArgumentParser(prog='fft_at_generators')

    parser.add_argument('--cfg_file', default="MelConfig.json",
                        help="File for settings configuration in json format")
    parser.add_argument('--mel_at_model_path', default=None,
                        help="Path to the C autotiler model file to generate")
    parser.add_argument('--mel_tensors_dir', default=None,
                        help="Path to the autotiler model constant files to generate")
    return parser


if __name__ == '__main__':
    parser = create_parser()
    argcomplete.autocomplete(parser)
    args = parser.parse_args()
    # NOTE: in librosa the frame length is always n_fft. The window_size can be less and will be padded centered to n_fft before multiplying to the frame values
    with open(args.cfg_file, "r") as f:
        config = json.load(f)
        print(config)
    sample_rate = config["sample_rate"]
    frame_size = config["frame_size"]
    frame_step = config["frame_step"]
    window_size = config["window_size"]
    win_func = config["win_func"]
    n_fft = config["n_fft"]
    n_mels = config["n_mels"]
    fmin = config["fmin"]
    fmax = config["fmax"]
    mel_norm = config["mel_norm"]
    power = config["power"]
    float_type = config["float_type"]

    G_melspect = NNGraph(name='melspect')
    inp = G_melspect.add_input(Dim.unnamed([frame_size]))

    mfcc_param = MFCCPreprocessingNode(
            "melspect",
            sample_rate=sample_rate,
            n_fft=n_fft,
            frame_size=frame_size,
            frame_step=n_fft,
            n_frames=1,
            window=win_func,
            power=power,
            fmin=fmin,
            fmax=fmax,
            n_fbanks=n_mels,
            fbank_type="librosa",
            librosa_norm=mel_norm,
            log_type=None
    )
    mfcc_param.gen_fft_twiddles(G_melspect)
    mfcc_param.gen_melfilter(G_melspect)
    G_melspect.add_edge(NNEdge(inp, mfcc_param))
    out = G_melspect.add_output()
    G_melspect.add_edge(NNEdge(mfcc_param, out))
    G_melspect.add_dimensions()
    G_melspect.adjust_order()
    G_melspect.fake_quantize(graph_options={"scheme": "FLOAT", "float_type": float_type})

    G_melspect.generate(
        write_constants=True,
        settings=model_settings(
            l1_size=128000,
            l2_size=1300000,
            tensor_directory=args.mel_tensors_dir,
            model_directory="BUILD_MODEL",
            model_file=os.path.split(args.mel_at_model_path)[1],
            graph_l1_promotion=1
        )
    )