Perl PDL, counting occurrences of two float values

126 views Asked by At

I want to count the occurrences of two float values at the same time.

my @Key_Str = swcols "%0.2f_%0.2f", $a_datas, $b_datas;
my %counts;
foreach my $key_str (@Key_Str) {
    $counts{$key_str}++;
}
my (@p_a, @p_b @cnts);
foreach (sort keys %counts){
    my ($z, $h) = split /_/, $_;
    push @p_a, $z; push @p_b, $h; push @cnts, $counts{$_};
}
my $a= pdl(@p_a); my $b = pdl(@p_b); my $count = pdl(@cnts);

Is it possible same thing with pure PDL?

It's taking time using hash and array.

In more detail,

#!/usr/bin/perl

use PDL;
use Data::Dumper;
use feature 'say';

my $a_data = pdl(1.5,  2.1,  2.1,  1.5,  -5.7);
my $b_data = pdl(12.2, 22.3, 22.3, 12.2, 15.3);

my @Key_Str = swcols "%0.2f_%0.2f", $a_data, $b_data;

my %counts;
foreach my $key_str (@Key_Str) {
    $counts{$key_str}++;
}
my (@p_a, @p_b, @cnts);
foreach (sort keys %counts){
    my ($z, $h) = split /_/, $_;
    push @p_a, $z; push @p_b, $h; push @cnts, $counts{$_};
}
my $a= pdl(@p_a); my $b = pdl(@p_b); my $count = pdl(@cnts);
say $a;
say $b;
say $count;

result is

[-5.7 1.5 2.1]
[15.3 12.2 22.3]
[1 2 2]
3

There are 3 answers

1
Håkon Hægland On BEST ANSWER

I am not sure how to get the counts of the unique elements with a simple PDL function, but you can do it in a loop. For example:

use v5.38;
use PDL;

# Sample values for $a_datas and $b_datas
my @a_datas = (1.2345, 2.3456, 1.2399, 3.4567, 2.3411, 1.2300, 3.4500, 4.5600, 2.3456, 1.2345);
my @b_datas = (3.4567, 4.5678, 3.4512, 5.6789, 4.5634, 3.4000, 5.6700, 6.7800, 4.5678, 3.4567);

my $a = pdl(@a_datas);
my $b = pdl(@b_datas);

# Round to two decimal places
$a = ($a * 100)->rint / 100;
$b = ($b * 100)->rint / 100;

# Combine into a 2D piddle
my $A = $a->cat($b);
$A = $A->xchg(0,1)->qsortvec;
my $B = $A->uniqvec;
say $B;

my $C = zeroes($B->dim(1));  # Initialize $C with zeros, with the same length as the number of rows in $B
my $j = 0;
for my $i (0 .. $B->dim(1) - 1) {
    my $Bval = $B->slice(":,($i)");
    my $count = 0;
    while (1) {
        my $Aval = $A->slice(":,($j)");
        if ($j < $A->dim(1) && all($Aval == $Bval)) {
            $count++;
            $j++;
        }
        else {
            last;
        }
    }
    $C->set($i, $count);
}
say $C;

Output:

[
 [1.23  3.4]
 [1.23 3.46]
 [1.24 3.45]
 [2.34 4.56]
 [2.35 4.57]
 [3.45 5.67]
 [3.46 5.68]
 [4.56 6.78]
]

[1 2 1 1 2 1 1 1]
    
2
doosoonk On

Thank you Hakon, You give me lot's hints(qsortvec, uniqvec and other)

My Final Code is

#!/usr/bin/perl

use PDL;
use PDL::NiceSlice;
use feature 'say';

# Sample values for $a_datas and $b_datas
my @a_datas = (1.2345, 2.3456, 1.2399, 3.4567, 2.3411, 1.2300, 3.4500, 4.5600, 2.3456, 1.2345);
my @b_datas = (3.4567, 4.5678, 3.4512, 5.6789, 4.5634, 3.4000, 5.6700, 6.7800, 4.5678, 3.4567);

my $a = pdl(@a_datas);
my $b = pdl(@b_datas);

# Round to two decimal places
$a = ($a * 100)->rint / 100;
$b = ($b * 100)->rint / 100;

# Combine into a 2D piddle
my $A = $a->cat($b)->transpose;
say 'after cat : $A : ', $A;
my $B = $A->qsortvec->uniqvec;
say 'after uniqvec : $B : ', $B;

my $C = zeroes($B->dim(1));

map { $C($_) .= which(($A == $B(,$_))->sumover == 2)->nelem } 0 .. $B(0,)->nelem - 1;

say '$C : ', $C;

after cat : $A :
[
 [1.23 3.46]
 [2.35 4.57]
 [1.24 3.45]
 [3.46 5.68]
 [2.34 4.56]
 [1.23  3.4]
 [3.45 5.67]
 [4.56 6.78]
 [2.35 4.57]
 [1.23 3.46]
]

after uniqvec : $B :
[
 [1.23  3.4]
 [1.23 3.46]
 [1.24 3.45]
 [2.34 4.56]
 [2.35 4.57]
 [3.45 5.67]
 [3.46 5.68]
 [4.56 6.78]
]

$C : [1 2 1 1 2 1 1 1]

I hope that is correct

2
Ed. On

This is a task for https://metacpan.org/pod/PDL::Slices#rlevec (contributed to main-PDL by Bryan Jurish), which operates (as shown in the code below) only on sorted lists of vectors.

Your code, with the use of rlevec at the end:

use PDL;
use PDL::NiceSlice;
use feature 'say';

# Sample values for $a_datas and $b_datas
my @a_datas = (1.2345, 2.3456, 1.2399, 3.4567, 2.3411, 1.2300, 3.4500, 4.5600, 2.3456, 1.2345);
my @b_datas = (3.4567, 4.5678, 3.4512, 5.6789, 4.5634, 3.4000, 5.6700, 6.7800, 4.5678, 3.4567);

my $a = pdl(@a_datas);
my $b = pdl(@b_datas);

# Round to two decimal places
$a = ($a * 100)->rint / 100;
$b = ($b * 100)->rint / 100;

# Combine into a 2D piddle
my $A = $a->cat($b)->transpose;
say 'after cat : $A : ', $A;
my $B = $A->qsortvec->uniqvec;
say 'after uniqvec : $B : ', $B;

my $C = zeroes($B->dim(1));

map { $C($_) .= which(($A == $B(,$_))->sumover == 2)->nelem } 0 .. $B(0,)->nelem - 1;

say '$C : ', $C;

say "rlevec: ", rlevec($A->qsortvec);

The output of the rlevec bit:

rlevec: [1 2 1 1 2 1 1 1 0 0]
[
 [1.23  3.4]
 [1.23 3.46]
 [1.24 3.45]
 [2.34 4.56]
 [2.35 4.57]
 [3.45 5.67]
 [3.46 5.68]
 [4.56 6.78]
 [   0    0]
 [   0    0]
]