use vars qw(@day_names); # erkkeri is gone - could remove a lot of this ugliness my $erkkeri_title = "(TTY) Amica Erkkeri"; my @restaurant_info = ( [ "(TaY) Amica Minerva", "http://www.amica.fi/minerva", "", "middle" ] #[ $erkkeri_title, "http://www.amica.fi/erkkeri", "", "left" ] ); my ($parse_func, $day_id, $week, $erkkeri); my (@cur_day_foods, @week_foods); sub amica_parse_to_eof { } sub amica_finish_day { push @week_foods, [@cur_day_foods]; @cur_day_foods = (); $day_id = $day_id + 1; } sub utf8_to_8859 { $_ = shift; s/ä/ä/g; s/ö/ö/g; s/Ä/Ä/g; s/Ö/Ö/g; return $_; } sub amica_parse_line { my $text = shift; $text =~ tr/\r\n\t/ /; $text =~ s/( | )*$//; $text =~ s/ *(\d+,\d+ *\/ *)?\d+,\d+ *$//; $text =~ s/^( | )*//; $text =~ s/( | )*$//; $text =~ s/valinnan mukaan$//; if ($text =~ /^(.*) \(([^\)]+)\)$/) { my ($name, $allergy) = ($1, $2); $name =~ s/( )*$//; $allergy =~ s/\*veg\./ eriks: Ve/g; $allergy =~ s/veg/Ve/gi; $allergy =~ s/\*([A-Z]+)/ eriks: $1/; return "$name ($allergy)\n"; } elsif ($text ne "") { return "$text\n"; } return ""; } sub amica_parse_split { my $text = shift; my $food = ""; for (;;) { $i = index($text, ")"); last if ($i == -1); my $line = substr($text, 0, $i + 1); $food .= amica_parse_line($line); $text = substr($text, $i + 1); } $food .= amica_parse_line($text); push @cur_day_foods, $food if ($food ne ""); } sub amica_parse_more_food { my $token = shift; if ($token->[0] eq 'T') { my $text = utf8_to_8859($token->[1]); my $next_day_name = $day_names[$day_id+1]; if ($day_id < 6 && $text =~ /^$next_day_name\b/i) { # day changed amica_finish_day(); } else { amica_parse_split($text); } } elsif ($token->[0] eq 'S' && $token->[1] eq 'h2') { amica_finish_day(); $parse_func = \&amica_parse_to_eof; } } sub amica_parse_first_day { my $token = shift; if ($token->[0] eq 'T') { for ($day_id = 0; $day_id < 7; $day_id++) { my $dayname = $day_names[$i]; last if ($token->[1] =~ /^$dayname\b/i); } $day_id = 0 if ($day_id == 7); $parse_func = \&amica_parse_more_food; } } sub amica_parse_to_start { my $token = shift; if ($token->[0] eq 'S' && $token->[1] eq 'p') { $parse_func = \&amica_parse_first_day; } } sub get_week { my ($mday, $mon) = @_; my @l = localtime; my @l2 = (0, 0, 0, $mday, $mon-1, $l[5], 0, 0, -1); @l = localtime(mktime(@l2)); return strftime("%V", @l); } sub amica_parse_date { my $token = shift; if ($token->[0] eq 'T') { my $text = $token->[1]; if ($text =~ /^(\d\d\d\d)-(\d?\d)-(\d?\d) /) { my ($mday, $mon) = ($3, $2); $week = get_week($mday, $mon); } } elsif ($token->[0] eq 'E' && $token->[1] eq 'h2') { $parse_func = \&amica_parse_to_start; } } sub amica_parse_to_date { my $token = shift; if ($token->[0] eq 'S' && $token->[1] eq 'h2') { my %attrs = %{$token->[2]}; if ($attrs{'id'} =~ /HeadingMenu/) { $parse_func = \&amica_parse_date; } } } sub parse_amica { my ($fname, $info_ref) = @_; my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname"); my $title = @{$info_ref}[0]; $week = ""; $day_id = 0; @cur_day_foods = (); @week_foods = (); $parse_func = \&amica_parse_to_date; while (my $token = $p->get_token) { &$parse_func($token); } return [ $title, "", $week, [ @week_foods ], $info_ref ]; } sub parse_amica_get_finnish_url { my ($fname) = @_; my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname"); my @l = localtime; my $this_week = strftime("%V", @l); my $state = 0; my $last_url = ""; my $week = ""; while (my $token = $p->get_token) { if ($token->[0] eq 'S' && $token->[1] eq 'meta') { my %attrs = %{$token->[2]}; if ($attrs{'name'} eq "TITLE") { if ($attrs{'content'} =~ /(\d+)\.(\d+)\.? *- *(\d+)\.(\d+)/) { my ($mday, $mon) = ($1, $2); $week = get_week($mday, $mon); } elsif ($attrs{'content'} =~ /(\d+)\.? *- *(\d+)\.(\d+)/) { my ($mday, $mon) = ($1, $3); $week = get_week($mday, $mon); } return "" if ($attrs{'content'} !~ /English/i && $week == $this_week); } } elsif ($token->[0] eq 'S' && $token->[1] eq 'a') { my %attrs = %{$token->[2]}; $last_url = $attrs{'href'}; } elsif ($token->[0] eq 'T' && $token->[1] =~ /^ruokalista (\d+)\.(\d+)\.? *- *(\d+)\.(\d+)/i) { my ($mday, $mon) = ($1, $2); $week = get_week($mday, $mon); return $last_url if ($week == $this_week); } elsif ($token->[0] eq 'T' && $token->[1] =~ /^ruokalista (\d+)\.? *- *(\d+)\.(\d+)/i) { my ($mday, $mon) = ($1, $3); $week = get_week($mday, $mon); return $last_url if ($week == $this_week); } elsif ($token->[0] eq 'T' && $token->[1] =~ /^ruokalista (viikko|vko) (\d+)/i) { $week = $2; return $last_url if ($week == $this_week); } } return ""; } sub parse_amica_url { my ($fname) = @_; my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname"); my $state = 0; while (my $token = $p->get_token) { if ($token->[0] eq 'S') { my %attrs = %{$token->[2]}; if ($token->[1] eq 'td' && $attrs{'title'} eq 'Ruokalistat' && $state == 0) { $state = 1; } elsif ($token->[1] eq 'a' && $state == 1) { return $attrs{'href'}; } } } return ""; } sub get_amica_restaurant { my $use_old = shift; my $count = 0; my @restaurants = (); foreach my $i (@restaurant_info) { my @info = @{$i}; my $temp_fname = "amica$count.temp.html"; my $url = $info[1]; if (!-f $temp_fname || !$use_old) { system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname") if ($url ne ""); } if (-f $temp_fname) { $info[1] = $url; push @restaurants, parse_amica($temp_fname, \@info); } $count++; } return @restaurants; } 1;