Extracting links from a html table

Do you have a question? Post it now! No Registration Necessary.  Now with pictures!


Thought I would share my code with you as it might help to fix my
problem a bit quicker...

I thought I would be able to some how use table extract to pull out
the links to each of the fund names at the URL in the variable

For instance "CAAM Funds Latin America Equities C Inc" has a link
This is what I would like to extract.

Does anyone know how I can do this with Tableextract or Mechanize?

Your help would be really appreciated!




use HTML::TableExtract;
use LWP::Simple;
use WWW::Mechanize;

## Morningstar launch page
$MORNINGSTAR="http://www.morningstar.co.uk/UK/ISAQuickrank /

## Variables
my @nasdaqPrices;                    ## array containing nasdaq close prices
my $mech = WWW::Mechanize->new();    ## perl agent
my $content;                        ## webpage content
my $link_name;
my $nasdaqWeekly;                    ## Weekly nasdaq change
my $nasdaqDaily;                    ## Daily nasdaq change

## Obtain latest NASDAQ values
@nasdaqPrices = &nasdaqToArray($NASDAQ_PRICE_DATA);

$nasdaqWeekly = &nasdaqPerformanceWeekly(@nasdaqPrices);
print "Nasdaq w     ".$nasdaqWeekly."\n";
$nasdaqDaily = &nasdaqPerformanceDaily(@nasdaqPrices);
print "Nasdaq d     ".$nasdaqDaily."\n";

#print scalar(@nasdaqPrices)."\n";

#print "@nasdaqPrices";


    $mech->agent_alias("Windows IE 6");

    #$content = get($MORNINGSTAR);

    ## Find the total number of pages
    $link_name = $mech->find_link( text => "last" );

    if ($link_name->url() =~ m/$re/is)

    ## Print ever single page out.
    for ($count=1;$count<=$total_pages;$count++)
        ## if first go then print the
        if ($count == 1)
            ##&printTable($content, $nasdaqDaily, $nasdaqWeekly);
            &printTable($mech, $nasdaqDaily, $nasdaqWeekly);
            #&doPostBack($mech, 'ctl00$ctl00$MainContent$Layout_1MainContent
            #&printTable($mech->content(), $nasdaqDaily, $nasdaqWeekly);
            &doPostBack($mech, 'ctl00$ctl00$MainContent$Layout_1MainContent
            &printTable($mech, $nasdaqDaily, $nasdaqWeekly);


sub printTable()
    my $contents = shift;
    my $dailychange = shift;
    my $weeklychange = shift;
    my $link;

    my $webpage = $contents->content();

    ## Print out a table of values
    my $te = HTML::TableExtract->new( headers => [
       ##qw(Fund\sName Risk Std\sDev YTD 1\sYr 3\sYr\nAnlsd 5\sYr
       qw(Fund\sName Latest\nPrice 1\sDay 1\sWeek 1\sMonth 3\sMonth
6\sMonth Date)
     ], );

    foreach $ts ($te->tables)
        foreach ($ts->rows)
            ($fund_name, $latest_price, $p1_day, $p1_week, $p1_month,
$p3_month, $p6_month, $pdate) = @$_;

            if (($p1_day > $dailychange) && ($p1_day > 0))
                print $fund_name."\t\t\t\t\t\t\t".$p1_day."\t".$dailychange."\n";

                ## find the link
                $link = $contents->find_link( text => $fund_name );
                ##print $link."\n\n";




sub doPostBack()
    my $agent = shift;    ## WWW::Mechanize agent-object
    my $target = shift;    ## first argument in the __doPostBack() call
    my $arg = shift;


    $agent->field('__EVENTTARGET', $target);
    $agent->field('__EVENTARGUMENT', $arg);



sub nasdaqToArray()
    my @nasdaqPriceList;
    my $nasdaq_price_data = shift;

    open (NASDAQ, "$nasdaq_price_data") || die "File not found\n";
    while ($line =<NASDAQ>)
        chomp $line; # removes the carriage return
        push @nasdaqPriceList, split(/, / , $line); # breaks the line up
into fields

    close NASDAQ;

    return @nasdaqPriceList;

sub nasdaqPerformanceDaily()
    my @nasdaqPriceList = @_;
    my $nasdaqDailyPC;
    my $old = @nasdaqPriceList[-2];
    my $new = @nasdaqPriceList[-1];

    $nasdaqDailyPC = sprintf("%.4f", ((($new - $old) / $old) * 100));

    return $nasdaqDailyPC;

sub nasdaqPerformanceWeekly()
    my @nasdaqPriceList = @_;
    my $nasdaqWeeklyPC;
    my $old = @nasdaqPriceList[-5];
    my $new = @nasdaqPriceList[-1];

    $nasdaqWeeklyPC = sprintf("%.4f", ((($new - $old) / $old) * 100));

    return $nasdaqWeeklyPC;

Site Timeline