Not getting output from HTML::TreeBuilder

340 views Asked by At

I'm trying to get a whole bunch of values from around 3,000 HTML files and save them to a spreadsheet.

I'm using HTML::TreeBuilder to process the HTML and creating a spreadsheet using Spreadsheet::WriteExcel.

But my script doesn't successfully get the values. I see

Use of uninitialized value $val in concatenation (.) or string at spreadsheet.pl line 63.

What might I be doing wrong?

Here's an example of my HTML files on pastebin.com. It was too large to post in the question.

My Perl code

use warnings 'all';
use strict;

use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;
use constant URL => 'http://pastebin.com/raw/qLwu80ZW';

my $teamNumber  = "";
my $teamName    = "";
my $schoolName  = "";
my $area        = "";
my $district    = "";
my $agDeptPhone = "";
my $schoolPhone = "";
my $fax         = "";
my $addressOne  = "";
my $addressTwo  = "";
my $city        = ""; 
my $state       = "";
my $zipCode     = "";
my $name        = "";
my $email       = "";
my $row         = "";
my $Ypos        = 0; 

my $path = "Z:\\_WEB_CLIENTS\\Morgan Livestock\\Judging Card";

my $workbook  = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();


sub getTeamNumber {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_TeamNumber/)->attr('value');
    }

    print "Got Team Number $val\n";

    return $val;
}


sub getTeamName {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_TeamName/)->attr('value');
    }

    print "Got Team Name $val\n";

    return $val;
}


sub getSchoolName {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(tag_ => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_SchoolName/)->attr('value');
    }

    print "Got School Name $val\n";

    return $val;
}


sub getArea{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(tag_ => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Area/)->attr('value');
    }

    print "Got Area $val\n";

    return $val;
}


sub getDistrict{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_District/)->attr('value');
    }

    print "Got District $val\n";

    return $val;
}


sub getDeptPhone {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Phone/)->attr('value');
    }

    print "Got Dept Phone $val\n";

    return $val;
}


sub getSchoolPhone{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Phone2/)->attr('value');
    }

    print "Got School Phone $val\n";

    return $val;
}


sub getFax{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Fax/)->attr('value');
    }

    print "Got Fax $val\n";

    return $val;
}


sub getAddress1 {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Address1/)->attr('value');
    }

    print "Got Address One $val\n";

    return $val;
}


sub getAddress2 {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Address2/)->attr('value');
    }

    print "Got Address Two $val\n";

    return $val;
}


sub getCity {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_City/)->attr('value');
    }

    print "Got Address Two $val\n";

    return $val;
}


sub getState {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_State/)->attr('value');
    }

    print "Got State $val\n";

    return $val;
}


sub getZip {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Zip/)->attr('value');
    }

    print "Got Zip $val\n";

    return $val;
}


sub getWebsite {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Website/)->attr('value');
    }

    print "Got Website $val\n";

    return $val;
}


sub getNameAndEmail {
    my ($file) = @_;

    my $tree = HTML::TreeBuilder->new_from_content(get URL);
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');

    for my $tr ( $table->look_down(_tag => 'tr') ) {
        next unless my @td = $tr->look_down(_tag => 'td');
        my ($name, $email) = map { $_->as_trimmed_text } @td[0,1];
    }

    print "Got Name and Email $name and $email\n";

    return ($name, $email);
}

# FILLER: This fills the spreadsheet with all the variables we've acquired

sub fill {
    my ($name, $email, $teamNumber, $teamName, $schoolName,
        $area, $district, $agDeptPhone, $schoolPhone,
        $fax, $addressOne, $addressTwo, $city, $state, $zipCode) = (@_);

    $worksheet->write($Ypos, 1, $name);
    $worksheet->write($Ypos, 2, $email);
    $worksheet->write($Ypos, 3, $teamNumber);
    $worksheet->write($Ypos, 4, $teamName);
    $worksheet->write($Ypos, 5, $schoolName);
    $worksheet->write($Ypos, 6, $area);
    $worksheet->write($Ypos, 7, $district);
    $worksheet->write($Ypos, 8, $agDeptPhone);
    $worksheet->write($Ypos, 9, $schoolPhone);
    $worksheet->write($Ypos, 10, $fax);
    $worksheet->write($Ypos, 11, $addressOne);
    $worksheet->write($Ypos, 12, $addressTwo);
    $worksheet->write($Ypos, 13, $city);
    $worksheet->write($Ypos, 14, $state);
    $worksheet->write($Ypos, 15, $zipCode);
}

# Open judgingcard directory

opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";

my @files = readdir(DIR);

# This fills out all top row info

$worksheet->write("A1", "Name");
$worksheet->write("B1", "Email");
$worksheet->write("C1", "Team Number");
$worksheet->write("D1", "Team Name");
$worksheet->write("E1", "School Name");
$worksheet->write("F1", "Area");
$worksheet->write("G1", "District");
$worksheet->write("H1", "Ag Dept Phone");
$worksheet->write("I1", "School Phone");
$worksheet->write("J1", "Fax");
$worksheet->write("K1", "Address One");
$worksheet->write("L1", "Address Two");
$worksheet->write("M1", "City");
$worksheet->write("N1", "State");
$worksheet->write("O1", "Zip Code");

###################################

foreach my $file (@files) { # run through all files in directory

    next if (-d $file); # Skip file if file is folder

    $Ypos = $Ypos + 1;

    my ($name1, $email1) = getNameAndEmail($file);

    $name        = $name1;
    $email       = $email1;
    $teamNumber  = getTeamNumber($file);
    $teamName    = getTeamName($file);
    $schoolName  = getSchoolName($file);
    $area        = getArea($file);
    $district    = getDistrict($file);
    $agDeptPhone = getDeptPhone($file);
    $schoolPhone = getSchoolPhone($file);
    $fax         = getFax($file);
    $addressOne  = getAddress1($file);
    $addressTwo  = getAddress2($file);
    $city        = getCity($file);
    $state       = getState($file);
    $zipCode     = getZip($file);

    fill($name, $email, $teamNumber, $teamName, $schoolName,
        $area, $district, $agDeptPhone, $schoolPhone, $fax,
        $addressOne, $addressTwo, $city, $state, $zipCode);

    print "Progressing                    $file                ($Ypos)\n"
}

closedir(DIR);


sub getTeamNumber {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $name;
    my $val;

    foreach my $node (@nodes) {
        $name = $node->look_down('name', qr/\$txt_TeamNumber/);
    }

    if ( ! defined $name ) {
        print "Couldn't get team number\n";
    }

    if ( $name ) {
        $val = $name->attr('value');
        print "Got Team number $val\n";
    }

    return $val;
}

New script:

use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;

my $path = "Z:\\_WEB_CLIENTS\\Morgan Livestock\\Judging Card";

my $workbook  = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();

opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";
my @files = readdir(DIR);

# Specify spreadsheet headers in desired order and write  to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name', 'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One'
    , 'Address Two', 'City', 'State', 'Zip Code');
$worksheet->write_row(0, 0, \@headers);               # first row

# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email}, 
    TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, SchoolName => $ho{'School Name'}, Area => $ho{'Area'}, District => $ho{'District'}, 
        AgDeptPhone => $ho{'Ag Dept Phone'}, SchoolPhone => $ho{'School Phone'}, Fax => $ho{'Fax'}, AddressOne => $ho{'Address One'},
        AddressTwo => $ho{'Address Two'}, City => $ho{'City'}, State => $ho{'State'}, Zip => $ho{'Zip Code'});
        
sub getNames {
    my ($file) = @_;
    my $tree = HTML::TreeBuilder->new_from_content( path($file) );
    my @nodes = $tree->look_down(_tag => 'input');

    # List phrases to find, and build hash with their derived names
    # Should probably be defined globally, once for the whole program
    my @patterns  = map { '$txt_' . $_ } 
        qw(TeamName TeamNumber SchoolName Area District 
           Phone Phone2 Fax Address1 Address2 City State Zip Website);

    # Name for each pattern: everything after first _ (so after $txt_) 
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/);
            if ($name) {
                $name_val{$patt_name{$patt}} = $name->attr('value') || '';
            }
        }
    }

    # Name and Email are stored differently. Fetch those now
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) { 
        next unless my @td = $tr->look_down(_tag => 'td');
        # Discard incomplete Name-Email records -- either both or none
        @name_val{qw(Name Email)} = 
            map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];
    }
    return \%name_val;
}

sub fill_row {
   my ($ws, $row, $rdata, $rorder) = @_;    
   my %name_val   = %$rdata;
   my %name_order = %$rorder;

   my @vals = map { $name_val{$_} } 
              sort { $name_order{$a} <=> $name_order{$b} } 
              keys %name_val;

   $ws->write_row($row, 0, \@vals);  # add check (returns 0 on success)

   return 1;

    my $row = 1;
}

foreach my $file (@files) {
    next if -d $file;

    my %name_val = %{ getNames($file) };

    foreach my $name (sort keys %name_val) {
        # Fill the spreadsheet with all info in one go
        if ($name_val{$name}) {
            print "$name => $name_val{$name}\n";
        } else {
            print "Not found $name in $file\n";
        }
    }

    
    my %name_val = %{ getNames($file) };
    fill_row($worksheet, $row++, \%name_val, \%name_order);

    foreach my $name (sort keys %name_val) {  # demo
        if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
        else                  { print "Not found $name in $file\n" }
    }
    
    print "Progressing $Ypos \n"
}

                                                                                                  
2

There are 2 answers

10
zdim On BEST ANSWER

In short, some of those 'name' are probably just not found in (some of) HTML files. So test first to see whether it is there, then write to $val or print message about it not being found.

The most apparent thing to be improved: there is no need for separate functions. You can search and find all of them in one call, and store them in the hash name => value, which is returned.

sub getNames {
    my ($file) = @_;
    my $tree = HTML::TreeBuilder->new_from_content( path($file) );
    my @nodes = $tree->look_down(_tag => 'input');

    # List phrases to find, and build hash with their derived names
    # Should probably be defined globally, once for the whole program
    my @patterns  = map { '$txt_' . $_ } 
        qw(TeamName TeamNumber SchoolName Area District 
           Phone Phone2 Fax Address1 Address2 City State Zip Website);

    # Name for each pattern: everything after first _ (so after $txt_) 
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/);
            if ($name) {
                $name_val{$patt_name{$patt}} = $name->attr('value') || '';
            }
        }
    }

    # Name and Email are stored differently. Fetch those now
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) { 
        next unless my @td = $tr->look_down(_tag => 'td');
        # Discard incomplete Name-Email records -- either both or none
        if (2 == grep { not ref $_ } @td) {
            @name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
        }
        else { @name_val{qw(Name Email)} = ('', '') }
    }
    return \%name_val;
}

For Name and Email we require both to be there as text, or both are thrown out. (The sample source has There are no people ... inside a div for Name, and nothing for Email.)

To get whatever is there, instead of if-else above use

@name_val{qw(Name Email)} = 
    map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];

and we get the note quoted above for Name and an empty string for Email, with this sample.

Then

# Specify spreadsheet headers in desired order and write  to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', ...);
$worksheet->write_row(0, 0, \@headers);               # first row

# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email}, 
    TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, ... 
);

my $row = 1;
foreach my $file (@files) {
    next if -d $file;

    my %name_val = %{ getNames($file) };
    fill_row($worksheet, $row++, \%name_val, \%name_order);

    foreach my $name (sort keys %name_val) {  # demo
        if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
        else                  { print "Not found $name in $file\n" }
    }
}

sub fill_row {
   my ($ws, $row, $rdata, $rorder) = @_;    
   my %name_val   = %$rdata;
   my %name_order = %$rorder;

   my @vals = map { $name_val{$_} } 
              sort { $name_order{$a} <=> $name_order{$b} } 
              keys %name_val;

   $ws->write_row($row, 0, \@vals);  # add check (returns 0 on success)

   return 1;
}

The write_row takes a reference to an array and writes out a row with its elements. Note that write can be used that way, too, when an arrayref is given.

The output on the linked HTML file

Area => 1
District => 1
State => NM
TeamName => Ruidoso
TeamNumber => 83

and Not found ... for others. The .xls file is correct (when the full list of names is used).


The whole program

use warnings;
use strict;
use feature qw(say state);

use Path::Tiny;
use HTML::TreeBuilder;
use Spreadsheet::WriteExcel;


my @src = qw(TeamName TeamNumber SchoolName Area District Phone Phone2 
    Fax Address1 Address2 City State Zip Website);
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name', 
    'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One', 
    'Address Two', 'City', 'State', 'Zip Code', 'Web Site'
);
my @lens = map { length } @headers;  # for printing

# Numeric order of headers' fields (so, columns)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# Translation: name from HTML source => column number (retrieved from %ho)
my %name_order = ( 
    Name => $ho{Name}, Email => $ho{Email}, 
    TeamNumber => $ho{'Team Number'},
    TeamName => $ho{'Team Name'}, 
    SchoolName => $ho{'School Name'}, Area => $ho{'Area'},
    District => $ho{'District'}, Phone2 => $ho{'Ag Dept Phone'},
    Phone => $ho{'School Phone'}, Fax => $ho{'Fax'}, 
    Address1 => $ho{'Address One'},
    Address2 => $ho{'Address Two'}, 
    City => $ho{'City'}, State => $ho{'State'}, 'Zip' => $ho{'Zip Code'},
    Website => $ho{'Web Site'}
);

say "Order (column) of names from HTML source to follow headers:";
printf("%-10s  ==>  %s\n", $_, $name_order{$_})
    for sort { $name_order{$a} <=> $name_order{$b} } keys %name_order;
say '';

my $workbook  = Spreadsheet::WriteExcel->new('data.xls');
my $worksheet = $workbook->add_worksheet();

# Print headers to .xls file (and to screen)
$worksheet->write_row(0, 0, \@headers);
say "Spreadsheet, header and rows:";
prn_row(\@headers);  # print to screen

my @files = ('fetch_names.html');
my $row = 1;
foreach my $file (@files) {
    next if -d $file;

    # Parse the file, print the row to spreadsheet
    my %name_val = %{ getNames($file) };
    fill_row($worksheet, $row++, \%name_val, \%name_order);
}


# Functions

sub fill_row {
    my ($ws, $row, $rdata, $rorder) = @_;

    my %name_val = %$rdata;
    my $name_order = %$rorder;

    my @vals =
        map { $name_val{$_} }
        sort { $name_order{$a} <=> $name_order{$b} }
        grep { exists $name_order{$_} }
        keys %name_val;

    prn_row(\@vals);  # print to screen

    $worksheet->write_row($row, 0, \@vals);  # test this (returns 0 on success)

    return 1;
}

sub prn_row {
    my @ary = @{ $_[0] };
    for (0..$#ary) {
        my $len = $lens[$_];
        printf("%${len}s  ", $ary[$_]);
    }
    say '';
}

sub getNames {
    my ($file) = @_;
    my $tree = HTML::TreeBuilder->new_from_content( path($file)->slurp );
    my @nodes = $tree->look_down(_tag => 'input');

    my @patterns = map { '$txt_' . $_ } @src;
    # List phrases to find, and build hash with their derived names
    # Name for each pattern: everything first _ (so after \$txt_)
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;

    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/) or next;
            $name_val{$patt_name{$patt}} = $name->attr('value') // '';
        }
    }

    # Name and Email are stored differently, fetch those now
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) {
        next unless my @td = $tr->look_down(_tag => 'td');
        # Discard incomplete Name-Email records -- either both or none
        if (2 == grep { not ref } @td) {
            @name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
        }
        else { @name_val{qw(Name Email)} = ('', '') }
    }
    return \%name_val;
}

This works as a complete program with the supplied sample HTML source.


Addition: an actual page may have multiple name-email pairs

use LWP::Simple qw(get);

sub getNames {
    my ($file) = @_; 
    my $url = 'https://www.judgingcard.com/Directory/Directory.aspx?ID=1643';
    my $page = get($url) or die "Can't get the page $url: $!";
    my $tree = HTML::TreeBuilder->new_from_content( $page );

    my @nodes = $tree->look_down(_tag => 'input');

    my @patterns = map { '$txt_' . $_ } @src;
    # List phrases to find, and build hash with their derived names
    # Name for each pattern: everything first _ (so after \$txt_)
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;

    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/) or next;
            $name_val{$patt_name{$patt}} = $name->attr('value') // ''; 
        }   
    }   

    # Name and Email are stored differently, fetch those now
    my %name_email;
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) { 
        next unless my @td = $tr->look_down(_tag => 'td');
        # There may be more than one Name-Email pair
        # so enter key-value pair explicitely
        if (2 <= grep { ref } @td) {
            $name_email{$td[0]->as_trimmed_text} = $td[1]->as_trimmed_text;
        }
        else { %name_email = ('', '') }
    }
    return \%name_val, \%name_email;
}

Then in the main you need

foreach my $file (@files) {
    next if -d $file;

    # Parse the file, unpack name-value and name-email hashes
    my ($rname_val, $rname_email) = getNames($file);
    my %name_val   = %$rname_val;
    my %name_email = %$rname_email;

    # Print a row for each Name-Email, adding them to %name_val
    foreach my $name (keys %name_email) {
        $name_val{Name}  = $name;
        $name_val{Email} = $name_email{$name};
        fill_row($worksheet, $row++, \%name_val, \%name_order);
    }
}

The desired format with mutpliple Name-Email pairs is: same headers, and for each pair a separate row is printed to file, where all information other than Name-Email is the same.

The printed spreadsheet (URL used was provided in comments)

Printed spreadsheet. URL was provided in comments

0
clt60 On

You probably could reduce the code. Even if in the following example me not using HTML::TreeBuidler, the approach is similar. Using Mojo::DOM58,

use 5.014;
use warnings;

use Mojo::DOM58;
use Path::Tiny;
use Data::Dumper;

my @fields = qw( TeamName TeamNumber SchoolName Area District Phone Phone2 Fax Address1 Address2 City State Zip Website );

my $html = path('team.html')->slurp;
my $dom = Mojo::DOM58->new($html);

my $data;
for my $field( @fields ) {
    $data->{$field} = $dom->at(qq{input[name*="txt_$field"]})->attr('value') // "";
}

say Dumper $data;

prints:

$VAR1 = {
          'TeamName' => 'Ruidoso',
          'Zip' => '',
          'State' => 'NM',
          'City' => '',
          'District' => '1',
          'Phone2' => '',
          'Area' => '1',
          'SchoolName' => '',
          'Address2' => '',
          'Website' => '',
          'Address1' => '',
          'Phone' => '',
          'Fax' => '',
          'TeamNumber' => '83'
        };