use vars qw(@day_names); my @restaurant_info = ( #[ "(TaY) Sodexo Linna", "http://www.sodexo.fi/fi-FI/linna/lounas/", "right" ], #[ "(TTY) Sodexo Erkkeri", "http://www.sodexo.fi/fi-FI/erkkeri/lounas/", "left" ] [ "(TaY) Sodexo Linna", "http://www.sodexo.fi/linna", "right" ], [ "(TTY) Sodexo Erkkeri", "http://www.sodexo.fi/erkkeri", "left" ] ); my ($cur_text, $cur_title, $parse_func, $day_id, $week); my (@cur_day_foods, @week_foods); sub sodexo_finish_day { push @week_foods, [@cur_day_foods]; @cur_day_foods = (); $day_id = $day_id + 1; } sub utf8_to_8859 { $_ = shift; s/ä/ä/g; s/ö/ö/g; s/Ä/Ä/g; s/Ö/Ö/g; return $_; } sub sodexo_parse_finish { } sub sodexo_parse_after_food { my $token = shift; if ($token->[0] eq 'E' && $token->[1] eq 'tbody') { sodexo_finish_day(); $parse_func = \&sodexo_parse_finish; } elsif ($token->[0] eq 'S' && $token->[1] eq 'td') { $parse_func = \&sodexo_parse_to_food; } elsif ($token->[0] eq 'S' && $token->[1] eq 'th') { sodexo_finish_day(); $parse_func = \&sodexo_parse_to_food; } } sub sodexo_finish_food { if ($cur_text ne "") { push @cur_day_foods, "$cur_title ($cur_text)"; } else { push @cur_day_foods, "$cur_title"; } $parse_func = \&sodexo_parse_after_food; } sub sodexo_parse_allergy { my $token = shift; if ($token->[0] eq 'T' && !($token->[1] =~ /\s*\/\s*/)) { my $text = $token->[1]; $text =~ s/^\s+//; $text =~ s/\s+$//; $cur_text .= ", " if $cur_text ne ""; $cur_text .= $text; } elsif ($token->[0] eq 'E' && $token->[1] eq 'td') { sodexo_finish_food(); } } sub sodexo_parse_to_allergy { my $token = shift; if ($token->[0] eq 'S' && $token->[1] eq 'td') { my %attrs = %{$token->[2]}; if ($attrs{'class'} eq 'food-properties') { $parse_func = \&sodexo_parse_allergy; } } } sub sodexo_parse_food { my $token = shift; if ($token->[0] eq 'T' && $token->[1] =~ /\S/) { $cur_title = utf8_to_8859($token->[1]); $parse_func = \&sodexo_parse_to_allergy; } } sub sodexo_parse_to_food { my $token = shift; if ($token->[0] eq 'S' && $token->[1] eq "td") { my %attrs = %{$token->[2]}; if ($attrs{'class'} eq "food-desc") { $cur_title = ''; $cur_text = ''; $parse_func = \&sodexo_parse_food; } } } sub sodexo_parse_week { my $token = shift; if ($token->[0] eq 'T' && $token->[1] =~ /Viikko (\d+)/) { $week = $1; $parse_func = \&sodexo_parse_to_food; } } sub sodexo_parse_to_week { my $token = shift; if ($token->[0] eq 'S' && $token->[1] eq 'div') { my %attrs = %{$token->[2]}; if ($attrs{'class'} eq "list-date") { $parse_func = \&sodexo_parse_week; } } } sub parse_sodexo { my ($fname, $info_ref) = @_; my $title = @{$info_ref}[0]; my $url = @{$info_ref}[1]; my $align = @{$info_ref}[2]; my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname"); $week = ""; $day_id = 0; @cur_day_foods = (); @week_foods = (); $parse_func = \&sodexo_parse_to_week; while (my $token = $p->get_token) { &$parse_func($token); } return [ $title, "", $week, [ @week_foods ], [ $title, $url, "M", $align ] ]; } sub get_sodexo_restaurants { my $use_old = shift; my $count = 0; my @restaurants = (); foreach my $i (@restaurant_info) { my @info = @{$i}; my $temp_fname = "sodexo$count.temp.html"; my $url = $info[1]; if (!-f $temp_fname || !$use_old) { system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname"); return undef if (!-f $temp_fname); } push @restaurants, parse_sodexo($temp_fname, \@info); } return @restaurants; } 1;