C++ Program for computing the Pearson Correlation Coefficient

This C++ program computes the linear correlation between two variables X and Y using the Pearson correlation coefficient. The X and Y data vectors can be read from files or they can be inserted by hand. The program is largely self-explanatory and easy to understand.
(see more details here: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient)






    #include <cmath>
    #include <cstdio>
    #include <vector>
    #include <iostream>
    #include <algorithm>
    #include <iomanip>
    #include <fstream>
    using namespace std;

     double suma(vector<double> a)
    {
        double s = 0;
        for (int i = 0; i < a.size(); i++)
        {
            s = s + a[i];
        }
        return s;
    }

    double mean(vector<double> a)
    {
        return suma(a) / a.size(); 
    }
        
   
    
    double sqsum(vector<double> a)
    {
        double s = 0;
        for (int i = 0; i < a.size(); i++)
        {
            s = s + pow(a[i], 2);
        }
        return s;
    }

    double stdev(vector<double> nums)
    {
        double N = nums.size();
        return pow(sqsum(nums) / N - pow(suma(nums) / N, 2), 0.5);
    }

   

    vector<double> operator-(vector<double> a, double b)
    {
        vector<double> retvect;
        for (int i = 0; i < a.size(); i++)
        {
            retvect.push_back(a[i] - b);
        }
        return retvect;
    }

    vector<double> operator*(vector<double> a, vector<double> b)
    {
        vector<double> retvect;
        for (int i = 0; i < a.size() ; i++)
        {
            retvect.push_back(a[i] * b[i]);
        }
        return retvect;
    }

    double pearsoncoeff(vector<double> X, vector<double> Y)
    {
        return suma((X - mean(X))*(Y - mean(Y))) / (X.size()*stdev(X)* stdev(Y));
    }

    int main() {
        
        int key;
        

        cout<<"Press 1 to read vectors from files. Press any other key otherwise"<<endl;
        cin>>key;
        if(key==1) 
        {
            vector<double> X(5);    // a vector to hold rainfall data
            vector<double> Y(5);
            // open file    
            ifstream inputFile("v1.txt");
            ifstream inputFile2("v2.txt");

    // test file open   
    if (inputFile) 
    {        
        double value;
        // read the elements in the file into a vector  
        while ( inputFile >> value ) //
        {
            X.push_back(value);
        }
    }

    if (inputFile2)
    {
        double value;
        // read the elements in the file into a vector  
        while ( inputFile2 >> value ) 
        {
            Y.push_back(value);
        }
    }

    cout << fixed << setprecision(3) << pearsoncoeff(X, Y) << endl;

    // close files
    inputFile.close();
    inputFile2.close();
        }
        else
        {
        int choice, nr;
        cout<<"Sample size: "; cin >> nr; //this is the size of the vector
        vector<double> X(nr);
        vector<double> Y(nr);
        
        cout<<"Choose 1 to insert (x,y) data points or any other value to insert 1 data vector at a time: "<<endl;
        cin>>choice;
        
        if (choice==1)
        {
            for (int i = 0; i < nr ; i++)
            {
                cout<<"x= "; cin >> X[i]; cout<<"y= "; cin >> Y[i];
            }
        }
        else
        {
        cout<<"Insert vector X: "<<endl;
        for (int i = 0; i < X.size() ; i++) //i itereaza pana la al ultimul element al lui vectoruli X
        {
            cin >> X[i];
        }

        cout<<"Insert vector Y: "<<endl;
        for (int i = 0; i < Y.size(); i++) //citeste al doilea vector
        {
            cin >> Y[i];
        }
        }
        cout<<"Pearson correlation betweent the 2 variables is: ";
        cout << fixed << setprecision(3) << pearsoncoeff(X, Y) << endl;
    }

        return 0;
    }

Comments

  1. Thanks for the code. But it has an error: it returns different values when using the input files compared to when you enter them manually, from the console. I tried for these values: X=9 7 5 3 1 and Y = 10 6 1 5 3. If I enter them manually, it gives the correct result 0.699, but when using the input files, the result is 0.877.

    ReplyDelete
    Replies
    1. Edit: the push_back() call is adding some zero elements to the vector, a simple X[i] = value and i++ works.

      Delete

Post a Comment

Popular posts from this blog

Object Oriented Programming C++ Simple Calculator