my own Class type not working well with MPI_scatterv and Gatherv

I have my own class class2 in a .h file and i am using it to create a struct like the code below. (All the code is verified and complete)

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <sys/time.h>
#include <string.h>
#include <numeric>
#include <iterator> 
#include "class2.h"
using namespace std;

struct Pcle
    Class2 Pot;
    Class2 Vy;
    float   ss;
    Pcle(){}; //default empty constructor
    Pcle(float M, int Px, int Py) // constructor
    :Pot(Px, Py)
    , Vy(0.f, 0.f )
    , ss (M)

I will provide the .h file also if necessary. the problem is that logically, values passed throught both the MPI_Scatterv and MPI_Gatherv, at the end change (or some of them at least) and when i try changing from Class2 to simply int type (and change the whole idea obviously) the code works fine. To have an idea here is my main file which produces good results with int instead ofClass2

 int main(int argc, char *argv[]){

    int size, rank, chunk,j=0;
    int recvcount, part;

    vector<Pcle> in;
    vector<Pcle> c;
    vector<Pcle> f;

     MPI_Init(&argc, &argv);

        MPI_Datatype MPI_PART, oldtypes[2];
        int blockcounts[2];

       // MPI_Aint type used to be consistent with syntax of
       // MPI_Type_extent routine
        MPI_Aint offsets[2], extent;

        MPI_Status stat;

        offsets[0] = 0;
        oldtypes[0] = MPI_CLASS2;
        blockcounts[0] = 2;

        // Setup description of the 1 MPI_FLOAT field, Mass
        // Need to first figure offset by getting size of MPI_Vector2
       MPI_Type_extent(MPI_CLASS2, &extent);
        offsets[1] = 2*extent;
       oldtypes[1] = MPI_FLOAT;
       blockcounts[1] = 1;

        // Define structured type and commit it
        MPI_Type_struct(2, blockcounts, offsets, oldtypes, &MPI_PART);

    if (rank ==0){ //initalizing stuff in rank ==0
        part = 64;
        chunk = floorf(part/size);

        for (int i=0; i< part; i++){

    //broadcasting needed variables
    MPI_Bcast(&chunk, 1, MPI_INT, 0, MPI_COMM_WORLD);

    int sendcount[size];
   //creating the arguments for scatterv
   for(size_t ct = 0; ct<size; ct++){
         if (ct < part%size)
            sendcount[ct] = chunk+1;
           sendcount[ct] = chunk;

    int displs[size];


    for ( j =1; j< size; j++)
    displs[j] = displs[j-1] +sendcount[j];

    recvcount = sendcount[rank];


    MPI_Scatterv(, sendcount, displs, MPI_PART,, recvcount, MPI_PART, 0, MPI_COMM_WORLD);

    for(int iteration =0; iteration <10; ++iteration){

        //two functions which changes some of the data in Pot but leaves ss the same and can be left out
        MPI_Gatherv(, sendcount[rank], MPI_PART,, sendcount, displs, MPI_PART, 0, MPI_COMM_WORLD);

    if(rank ==0)
        for (int k=0; k<64; k++)
            cout<<f[k].ss<<" ";

    return 0;


The output of this code is

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 Program ended with exit code: 0

the last few values remained zero which is wrong. I eliminated my class completely and tried running with an int type instead of class2 in struct and other changed in code accordingly. the results were correct

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 Program ended with exit code: 0

With my knowledge I guess that I am not defining the class correctly to MPI. I am writing #define MPI_CLASS2 MPI_FLOAT with the preprocessor directives maybe there is something wrong with that.

here is the header file. with this, the code should be compiled and outputs replicated.

           #pragma once

 #include <math.h>
 #include <assert.h>

 struct Class2

         float Element[2];
         struct { float X, Y; };
         struct { float U, V; };

     Class2() {}

     Class2(float p_fValue)
     : X(p_fValue), Y(p_fValue) {}

     Class2(float p_x, float p_y)
     : X(p_x), Y(p_y) {}

     Class2(const Class2 &p_vector)
     : X(p_vector.X), Y(p_vector.Y) {}

     float operator[](int p_nIndex) const { return Element[p_nIndex]; }
     float& operator[](int p_nIndex) { return Element[p_nIndex]; }

     inline void Set(float p_x, float p_y) {
         X = p_x; Y = p_y;

     inline bool Equals(const Class2 &p_vector, const float p_epsilon = 1e-5f) const
         if (fabs(X - p_vector.X) > p_epsilon) return false;
         if (fabs(Y - p_vector.Y) > p_epsilon) return false;

         return true;

     Class2& operator=(const Class2 &p_vector)
         X = p_vector.X;
         Y = p_vector.Y;

         return *this;

     inline bool operator==(const Class2 &p_vector) const {
         return Equals(p_vector);

     inline bool operator!=(const Class2& p_vector) const {
         return !(*this == p_vector);

     inline Class2 operator*(float p_fValue) const {
        return Class2(p_fValue * X, p_fValue * Y);

     inline Class2 operator/(float p_fValue) const
         assert(p_fValue != 0.f);
         return Class2(*this * (1.0f / p_fValue));

     inline Class2 operator*(const Class2 &p_vector) const {
         return Class2(p_vector.X * X, p_vector.Y * Y);

     inline Class2 operator+(const Class2 &p_vector) const {
         return Class2(X + p_vector.X, Y + p_vector.Y);

     inline Class2 operator-(const Class2 &p_vector) const {
         return Class2(X - p_vector.X, Y - p_vector.Y);

     inline Class2 operator-(void) const {
         return Class2(-X, -Y);

     inline Class2& operator*=(float p_fValue) {
         return *this = *this * p_fValue;

     inline Class2& operator*=(const Class2 &p_vector) {
         return *this = *this * p_vector;

     inline Class2& operator/=(float p_fValue) {
         return *this = *this / p_fValue;

     inline Class2& operator+=(const Class2 &p_vector) {
         return *this = *this + p_vector;

     inline Class2& operator-=(const Class2 &p_vector) {
         return *this = *this - p_vector;

     inline float MaxComponent() const {
         return std::max(X, Y);

     inline float MinComponent() const {
         return std::min(X, Y);

     inline float MaxAbsComponent() const {
         return std::max(fabs(X), fabs(Y));

     inline float MinAbsComponent() const
         return std::min(fabs(X), fabs(Y));

     static Class2 Max(const Class2 &p_vector1, const Class2 &p_vector2)
              return Class2(std::max(p_vector1.X, p_vector2.X),
                   std::max(p_vector1.Y, p_vector2.Y));

     static Class2 Min(const Class2 &p_vector1, const Class2 &p_vector2)
         return Class2(std::min(p_vector1.X, p_vector2.X),
                        std::min(p_vector1.Y, p_vector2.Y));

     inline float Length(void) const {
        return sqrt(X * X + Y * Y);

     inline float LengthSquared(void) const {
         return X * X + Y * Y;

   inline void Normalize(void) {
          *this = Class2::Normalize(*this);

   inline float Dot(const Class2 &p_vector) const {
        return Class2::Dot(*this, p_vector);

   inline float AbsDot(const Class2 &p_vector) const {
        return Class2::AbsDot(*this, p_vector);

   static float Dot(const Class2 &p_vector1, const Class2 &p_vector2) {
        return p_vector1.X * p_vector2.X + p_vector1.Y * p_vector2.Y;

    static float AbsDot(const Class2 &p_vector1, const Class2 &p_vector2) {
            return fabs(p_vector1.X * p_vector2.X +
                   p_vector1.Y * p_vector2.Y);

    static Class2 Normalize(const Class2 &p_vector) {
        return p_vector / sqrt(p_vector.Length());

    static float DistanceSquared(const Class2 &p_point1, const Class2 &p_point2) {
        return (p_point2 - p_point1).LengthSquared();

    static float Distance(const Class2 &p_point1, const Class2 &p_point2) {
        return (p_point2 - p_point1).Length();

inline Class2 operator*(float p_fValue, const Class2 &p_vector) {
    return Class2(p_fValue * p_vector.X, p_fValue * p_vector.Y);

Hristo Iliev On BEST ANSWER

First of all, the code is very confusing. You should not use the MPI_ prefix for naming your own symbols as that prefix is reserved for the MPI implementation and doing so might result in name clashes with consequences very hard to debug. Then, you define MPI_CLASS2 to be MPI_FLOAT, which is really confusing given that Class2 is the name of a structure with multiple fields and not simply a scalar. Use MPI_Type_create_struct or MPI_Type_contiguous instead. Because MPI_CLASS2 is MPI_FLOAT, the type map of MPI_PART does not correspond to the actual data layout and MPI incorrectly computes the offsets in both the scatter and the gather operations.

The simplest solution is to replace the definition of MPI_CLASS2 with the following:

MPI_Datatype MPI_CLASS2;
MPI_Type_contiguous(2, MPI_FLOAT, &MPI_CLASS2);

The more elaborate solution is to notice that the extent of the MPI datatype might not always match the true size of the language type, therefore computations such as offsets[1] = 2*extent; might not always work. It is advisable to instantiate an object instead and fetch the address of each field separately (or use the C standard offsetof macro from the cstddef header):

MPI_Datatype dt_class2;
MPI_Type_contiguous(2, MPI_FLOAT, &dt_class2);

MPI_Type dt_temp, dt_pcle;
MPI_Aint offsets[3], base;
int blockcounts[3];
MPI_Datatype oldtypes[3];

Pcle dummy;
MPI_Get_address(&dummy.Pot, &base);
MPI_Get_address(&dummy.Vy, &offsets[1]);
MPI_Get_address(&, &offsets[2]);

offsets[0] = 0;
blockcounts[0] = 1;
oldtypes[0] = dt_class2;

offsets[1] -= base;
blockcounts[1] = 1;
oldtypes[1] = dt_class2;

offsets[2] -= base;
blockcounts[2] = 1;
oldtypes[2] = MPI_FLOAT;

MPI_Type_create_struct(3, blockcounts, offsets, oldtypes, &dt_temp);
MPI_Type_create_resized(dt_temp, 0, sizeof(Pcle), &dt_pcle);


// Now dt_pcle is ready to use

MPI_Type_create_resized is used to make sure that the extent of the MPI datatype matches exactly the size of the structure Pcle, which takes care of any padding that might get inserted by the compiler (non in your case as there are only floats).