I have a code in nodejs using addons in c++. The code is a algorithm for a product of matrix in serial code and parallel code.
When i execute the code in node for a matrix with size 2000 the parallel not present a improvement speed-up. But when i execute a code, only in c++, there is improvement speed-up.
the code in essence is the same, which leads me to think that there is a problem with nodejs or something, like the way of nodejs for work the threads.
I'm using ubuntu 16.04, nodejs v6.9.1, gcc 5.4.0 and library pthread.
Another important thing is when i run the nodejs code in my smartphone with termux there improvement speed-up.
Serial code in C++
//g++ serial.cc -o serial
#include <cstdlib>
#include <iostream>
#include <cmath>
#include <stdio.h>
using namespace std;
#define BILLION 1E9;
float **A;
float **B;
float **result;
int n;
void createMatrix();
int main(int argc, char *argv[]){
n = atoi(argv[1]);
createMatrix();
result = (float**) malloc(n *sizeof(float*));
for (unsigned int i = 0; i < n; i++) {
result[i] = (float*) malloc(n *sizeof(float));
}
std::cout << "calculating product..." << std::endl;
struct timespec requestStart, requestEnd;
//start execution time
clock_gettime(CLOCK_REALTIME, &requestStart);
for (unsigned int i = 0; i < n ; ++i){
for (unsigned int j = 0; j < n ; ++j){
result[i][j] = 0;
for (unsigned int k = 0; k < n ; k++) {
result[i][j] += (A[i][k] * B[k][j]);
}
}
}
//end execution time
clock_gettime(CLOCK_REALTIME, &requestEnd);
double accum = ( requestEnd.tv_sec - requestStart.tv_sec )
+ ( requestEnd.tv_nsec - requestStart.tv_nsec )
/ BILLION;
printf( "Serial Time taken: %lf\n", accum );
return 0;
}
void createMatrix(){
std::cout << "creating matrix A ..." << std::endl;
A = (float**) malloc(n * sizeof(float*));
for (int i = 0; i < n; i++) {
A[i] = (float*) malloc(n * sizeof(float));
for (int j = 0; j < n; j++) {
A[i][j] = rand() % 10;
}
}
std::cout << "creating matrix B ..." << std::endl;
B = (float**) malloc(n * sizeof(float*));
for (int i = 0; i < n; i++) {
B[i] = (float*) malloc(n * sizeof(float));
for (int j = 0; j < n; j++) {
B[i][j] = rand() % 10;
}
}
}
Parallel code in c++
//g++ -std=c++11 parallel.cc -o parallel -pthread
#include <cstdlib>
#include <iostream>
#include <cmath>
#include <stdio.h>
#include <pthread.h>
#include <thread>
using namespace std;
#define BILLION 1E9;
float **A;
float **B;
float **result;
int n;
int task_per_thread;
void createMatrix();
void *runner(void *pid) {
int slice = (long) pid;
int to = task_per_thread*slice;
int from = (task_per_thread*(slice+2)<n) ? task_per_thread*(slice+1) : n;
for (unsigned int i = to; i < from ; ++i){
for (unsigned int j = 0; j < n ; ++j){
result[i][j] = 0;
for (unsigned int k = 0; k < n ; k++) {
result[i][j] += (A[i][k] * B[k][j]);
}
}
}
pthread_exit(NULL);
}
int main(int argc, char *argv[]){
n = atoi(argv[1]);
createMatrix();
result = (float**) malloc(n *sizeof(float*));
for (unsigned int i = 0; i < n; i++) {
result[i] = (float*) malloc(n *sizeof(float));
}
unsigned cpus = 4;
//std::cout << "cpus: "<< cpus << std::endl;
pthread_t threads[cpus];
task_per_thread = n / cpus;
int rc;
std::cout << "calculating product..." << std::endl;
struct timespec requestStart, requestEnd;
//start execution time
clock_gettime(CLOCK_REALTIME, &requestStart);
for(int i=0; i < cpus; i++ ){
rc = pthread_create(&threads[i], NULL, runner, (void *) i);
if (rc){
cout << "Error:unable to create thread," << rc << endl;
exit(-1);
}
}
for(int i=0; i < cpus; i++ ){
pthread_join(threads[i],NULL);
}
//end execution time
clock_gettime(CLOCK_REALTIME, &requestEnd);
double accum = ( requestEnd.tv_sec - requestStart.tv_sec )
+ ( requestEnd.tv_nsec - requestStart.tv_nsec )
/ BILLION;
printf( "Parallel Time taken: %lf\n", accum );
return 0;
}
void createMatrix(){
std::cout << "creating matrix A ..." << std::endl;
A = (float**) malloc(n * sizeof(float*));
for (int i = 0; i < n; i++) {
A[i] = (float*) malloc(n * sizeof(float));
for (int j = 0; j < n; j++) {
A[i][j] = rand() % 10;
}
}
std::cout << "creating matrix B ..." << std::endl;
B = (float**) malloc(n * sizeof(float*));
for (int i = 0; i < n; i++) {
B[i] = (float*) malloc(n * sizeof(float));
for (int j = 0; j < n; j++) {
B[i][j] = rand() % 10;
}
}
}
code of addon in Nodejs
#include <cstdlib>
#include <iostream>
#include <stdio.h>
#include <pthread.h>
#include <node.h>
#include <v8.h>
using v8::Exception;
using v8::FunctionCallbackInfo;
using v8::Isolate;
using v8::Local;
using v8::Number;
using v8::Object;
using v8::String;
using v8::Value;
using v8::Array;
using v8::Integer;
using namespace std;
#define BILLION 1E9;
float **A;
float **B;
float **result;
int n;
int task_per_thread;
void createMatrix();
void *runner(void *pid) {
int slice = (long) pid;
int to = task_per_thread*slice;
int from = (task_per_thread*(slice+2)<n) ? task_per_thread*(slice+1) : n;
for (unsigned int i = to; i < from ; ++i){
for (unsigned int j = 0; j < n ; ++j){
result[i][j] = 0;
for (unsigned int k = 0; k < n ; k++) {
result[i][j] += (A[i][k] * B[k][j]);
}
}
}
pthread_exit(NULL);
}
void parallelProduct(const FunctionCallbackInfo<Value>& args){
Isolate* isolate = args.GetIsolate();
if(args.Length() < 1){
isolate->ThrowException(Exception::TypeError(
String::NewFromUtf8(isolate, "Wrong numbers of arguments")));
return;
}
n = args[0]->NumberValue();
std::cout << "n: "<< n << std::endl;
createMatrix();
result = (float**) malloc(n *sizeof(float*));
for (unsigned int i = 0; i < n; i++) {
result[i] = (float*) malloc(n *sizeof(float));
}
unsigned cpus = 4;
//std::cout << "cpus: "<< cpus << std::endl;
pthread_t threads[cpus];
task_per_thread = n / cpus;
int rc;
std::cout << "calculating product..." << std::endl;
struct timespec requestStart, requestEnd;
//start execution time
clock_gettime(CLOCK_REALTIME, &requestStart);
for(int i=0; i < cpus; i++ ){
rc = pthread_create(&threads[i], NULL, runner, (void *) i);
if (rc){
cout << "Error:unable to create thread," << rc << endl;
exit(-1);
}
}
for(int i=0; i < cpus; i++ ){
pthread_join(threads[i],NULL);
}
//end execution time
clock_gettime(CLOCK_REALTIME, &requestEnd);
double accum = ( requestEnd.tv_sec - requestStart.tv_sec )
+ ( requestEnd.tv_nsec - requestStart.tv_nsec )
/ BILLION;
printf( "Parallel Time taken: %lf\n", accum );
}
void createMatrix(){
std::cout << "creating matrix A ..." << std::endl;
A = (float**) malloc(n * sizeof(float*));
for (int i = 0; i < n; i++) {
A[i] = (float*) malloc(n * sizeof(float));
for (int j = 0; j < n; j++) {
A[i][j] = rand() % 10;
}
}
std::cout << "creating matrix B ..." << std::endl;
B = (float**) malloc(n * sizeof(float*));
for (int i = 0; i < n; i++) {
B[i] = (float*) malloc(n * sizeof(float));
for (int j = 0; j < n; j++) {
B[i][j] = rand() % 10;
}
}
}
void serialProduct(const FunctionCallbackInfo<Value>& args){
Isolate* isolate = args.GetIsolate();
if(args.Length() < 1){
isolate->ThrowException(Exception::TypeError(
String::NewFromUtf8(isolate, "Wrong numbers of arguments")));
return;
}
std::cout << "n: "<< args[0]->NumberValue() << std::endl;
n = args[0]->NumberValue();
createMatrix();
result = (float**) malloc(n *sizeof(float*));
for (unsigned int i = 0; i < n; i++) {
result[i] = (float*) malloc(n *sizeof(float));
}
std::cout << "calculating product..." << std::endl;
struct timespec requestStart, requestEnd;
//start execution time
clock_gettime(CLOCK_REALTIME, &requestStart);
for (unsigned int i = 0; i < n ; ++i){
for (unsigned int j = 0; j < n ; ++j){
result[i][j] = 0;
for (unsigned int k = 0; k < n ; k++) {
result[i][j] += (A[i][k] * B[k][j]);
}
}
}
//end execution time
clock_gettime(CLOCK_REALTIME, &requestEnd);
double accum = ( requestEnd.tv_sec - requestStart.tv_sec )
+ ( requestEnd.tv_nsec - requestStart.tv_nsec )
/ BILLION;
printf( "Serial Time taken: %lf\n", accum );
}
void Init(Local<Object> exports) {
NODE_SET_METHOD(exports, "serialProduct", serialProduct);
NODE_SET_METHOD(exports, "parallelProduct", parallelProduct);
}
NODE_MODULE(addon, Init)
binding.gyp
{
"targets": [
{
"target_name": "addon",
"sources": [ "addon.cc" ],
"conditions":[ ['OS=="linux"', {
'cflags': [
'-w'
]},
{# OS != "linux"
'cflags': [
'-w'
]},
],
]
}
],
"link_settings":
{
"libraries": [
"-pthread"
]
}
}
index.js
let addon = require('./build/Release/addon')
addon.parallelProduct(2000)
addon.serialProduct(2000)