cosine similarity measure c#

3.9k views Asked by At

I have to calculate cosine similarity measure between different documents and queries.

private static double[,] getContent()
{
    List<List<string>> documents = new List<List<string>>();

    string query = "access control policies game";
    string document1 = "The game of life. is a game of, everlasting learning";

    char[] separator = new char[] {' ', '.', ',', ':', ';'};
    var splitedQuery = query.Split(separator).ToList();
    var splitedDocument1 = document1.Split(separator).ToList();

    var unicalWords_D1 = (query + " " + document1).Split(separator).GroupBy(g => g).Select(s => s.Key).ToArray();

    documents.Add(splitedQuery);
    documents.Add(splitedDocument1);   

    var array = GetWeights(documents, unicalWords_D1);
    return array; 
}

public static double[,] GetWeights(List<List<string>> splitedDocuments, string[] unicalWords)
{
    double[,] matrix = new double[unicalWords.Count(), splitedDocuments.Count];
    // some processing for term frequency (tf) and inverse term frequency (idf)
    return matrix[j, i] = weight;
}

private static double CalculateCosineSimilarity(double[] vecA, double[] vecB)
{
    var dotProduct = DotProduct(vecA, vecB);
    var magnitudeOfA = Magnitude(vecA);
    var magnitudeOfB = Magnitude(vecB);

    return dotProduct / (magnitudeOfA * magnitudeOfB);
}
private static double DotProduct(double[] vecA, double[] vecB)
{
    double dotProduct = 0;
    for (var i = 0; i < vecA.Length; i++)
    {
        dotProduct += (vecA[i] * vecB[i]);
    }
    return dotProduct;
}
private static double Magnitude(double[] vector)
{
    return Math.Sqrt(DotProduct(vector, vector));
}

so, all methods are there and here is the main method of class.

static void Main(string[] args)
{
    var result = getContent();
    var length = result.GetLength(0);
    double[] doc1Array = new double[length];
    double[] doc2Array = new double[length];

    //first doc
    for (int i = 0; i < length; i++)
    {
        doc1Array[i] = result[i, 0];
    }

    //second doc
    for (int i = 0; i < length; i++)
    {
        doc2Array[i] = result[i, 1];
    }

    var cosSimilarity = CalculateCosineSimilarity(doc1Array, doc2Array);

    Console.WriteLine("Similarity between Query and Doc1: ");
    Console.WriteLine(cosSimilarity);
    Console.ReadKey();
}

It all calculates similarity between query and document1. I have to calculate cosine similarity between different queries and documents, store in an array and sort them descending. How can I get these tasks.

Suggestions are highly appreciated. Thanks

0

There are 0 answers