123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- use vars qw(@day_names);
-
- # erkkeri is gone - could remove a lot of this ugliness
-
- my $erkkeri_title = "(TTY) Amica Erkkeri";
- my @restaurant_info = (
- [ "(TaY) Amica Minerva", "http://www.amica.fi/minerva", "", "middle" ]
- #[ $erkkeri_title, "http://www.amica.fi/erkkeri", "", "left" ]
- );
-
- my ($parse_func, $day_id, $week, $erkkeri);
- my (@cur_day_foods, @week_foods);
-
- sub amica_parse_to_eof {
- }
-
- sub amica_finish_day {
- push @week_foods, [@cur_day_foods];
- @cur_day_foods = ();
- $day_id = $day_id + 1;
- }
-
- sub utf8_to_8859 {
- $_ = shift;
-
- s/ä/ä/g;
- s/ö/ö/g;
- s/Ä/Ä/g;
- s/Ö/Ö/g;
- return $_;
- }
-
- sub amica_parse_line {
- my $text = shift;
-
- $text =~ tr/\r\n\t/ /;
- $text =~ s/( | )*$//;
- $text =~ s/ *(\d+,\d+ *\/ *)?\d+,\d+ *$//;
- $text =~ s/^( | )*//;
- $text =~ s/( | )*$//;
- $text =~ s/valinnan mukaan$//;
- if ($text =~ /^(.*) \(([^\)]+)\)$/) {
- my ($name, $allergy) = ($1, $2);
- $name =~ s/( )*$//;
- $allergy =~ s/\*veg\./ eriks: Ve/g;
- $allergy =~ s/veg/Ve/gi;
- $allergy =~ s/\*([A-Z]+)/ eriks: $1/;
- return "$name ($allergy)\n";
- } elsif ($text ne "") {
- return "$text\n";
- }
- return "";
- }
-
- sub amica_parse_split {
- my $text = shift;
-
- my $food = "";
- for (;;) {
- $i = index($text, ")");
- last if ($i == -1);
-
- my $line = substr($text, 0, $i + 1);
- $food .= amica_parse_line($line);
- $text = substr($text, $i + 1);
- }
- $food .= amica_parse_line($text);
- push @cur_day_foods, $food if ($food ne "");
- }
-
- sub amica_parse_more_food {
- my $token = shift;
-
- if ($token->[0] eq 'T') {
- my $text = utf8_to_8859($token->[1]);
- my $next_day_name = $day_names[$day_id+1];
-
- if ($day_id < 6 && $text =~ /^$next_day_name\b/i) {
- # day changed
- amica_finish_day();
- } else {
- amica_parse_split($text);
- }
- } elsif ($token->[0] eq 'S' && $token->[1] eq 'h2') {
- amica_finish_day();
- $parse_func = \&amica_parse_to_eof;
- }
- }
-
- sub amica_parse_first_day {
- my $token = shift;
-
- if ($token->[0] eq 'T') {
- for ($day_id = 0; $day_id < 7; $day_id++) {
- my $dayname = $day_names[$i];
- last if ($token->[1] =~ /^$dayname\b/i);
- }
- $day_id = 0 if ($day_id == 7);
- $parse_func = \&amica_parse_more_food;
- }
- }
-
- sub amica_parse_to_start {
- my $token = shift;
-
- if ($token->[0] eq 'S' && $token->[1] eq 'p') {
- $parse_func = \&amica_parse_first_day;
- }
- }
-
- sub get_week {
- my ($mday, $mon) = @_;
-
- my @l = localtime;
- my @l2 = (0, 0, 0, $mday, $mon-1, $l[5], 0, 0, -1);
- @l = localtime(mktime(@l2));
- return strftime("%V", @l);
- }
-
- sub amica_parse_date {
- my $token = shift;
-
- if ($token->[0] eq 'T') {
- my $text = $token->[1];
- if ($text =~ /^(\d\d\d\d)-(\d?\d)-(\d?\d) /) {
- my ($mday, $mon) = ($3, $2);
- $week = get_week($mday, $mon);
- }
- } elsif ($token->[0] eq 'E' && $token->[1] eq 'h2') {
- $parse_func = \&amica_parse_to_start;
- }
- }
-
- sub amica_parse_to_date {
- my $token = shift;
-
- if ($token->[0] eq 'S' && $token->[1] eq 'h2') {
- my %attrs = %{$token->[2]};
- if ($attrs{'id'} =~ /HeadingMenu/) {
- $parse_func = \&amica_parse_date;
- }
- }
- }
-
- sub parse_amica {
- my ($fname, $info_ref) = @_;
- my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
- my $title = @{$info_ref}[0];
-
- $week = "";
- $day_id = 0;
- @cur_day_foods = ();
- @week_foods = ();
-
- $parse_func = \&amica_parse_to_date;
- while (my $token = $p->get_token) {
- &$parse_func($token);
- }
- return [ $title, "", $week, [ @week_foods ], $info_ref ];
- }
-
- sub parse_amica_get_finnish_url {
- my ($fname) = @_;
- my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
-
- my @l = localtime;
- my $this_week = strftime("%V", @l);
-
- my $state = 0;
- my $last_url = "";
- my $week = "";
- while (my $token = $p->get_token) {
- if ($token->[0] eq 'S' && $token->[1] eq 'meta') {
- my %attrs = %{$token->[2]};
- if ($attrs{'name'} eq "TITLE") {
- if ($attrs{'content'} =~ /(\d+)\.(\d+)\.? *- *(\d+)\.(\d+)/) {
- my ($mday, $mon) = ($1, $2);
- $week = get_week($mday, $mon);
- } elsif ($attrs{'content'} =~ /(\d+)\.? *- *(\d+)\.(\d+)/) {
- my ($mday, $mon) = ($1, $3);
- $week = get_week($mday, $mon);
- }
- return "" if ($attrs{'content'} !~ /English/i && $week == $this_week);
- }
- } elsif ($token->[0] eq 'S' && $token->[1] eq 'a') {
- my %attrs = %{$token->[2]};
- $last_url = $attrs{'href'};
- } elsif ($token->[0] eq 'T' && $token->[1] =~ /^ruokalista (\d+)\.(\d+)\.? *- *(\d+)\.(\d+)/i) {
- my ($mday, $mon) = ($1, $2);
- $week = get_week($mday, $mon);
- return $last_url if ($week == $this_week);
- } elsif ($token->[0] eq 'T' && $token->[1] =~ /^ruokalista (\d+)\.? *- *(\d+)\.(\d+)/i) {
- my ($mday, $mon) = ($1, $3);
- $week = get_week($mday, $mon);
- return $last_url if ($week == $this_week);
- } elsif ($token->[0] eq 'T' && $token->[1] =~ /^ruokalista (viikko|vko) (\d+)/i) {
- $week = $2;
- return $last_url if ($week == $this_week);
- }
- }
- return "";
- }
-
- sub parse_amica_url {
- my ($fname) = @_;
- my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
-
- my $state = 0;
- while (my $token = $p->get_token) {
- if ($token->[0] eq 'S') {
- my %attrs = %{$token->[2]};
- if ($token->[1] eq 'td' && $attrs{'title'} eq 'Ruokalistat' && $state == 0) {
- $state = 1;
- } elsif ($token->[1] eq 'a' && $state == 1) {
- return $attrs{'href'};
- }
- }
- }
- return "";
- }
-
- sub get_amica_restaurant {
- my $use_old = shift;
- my $count = 0;
- my @restaurants = ();
- foreach my $i (@restaurant_info) {
- my @info = @{$i};
- my $temp_fname = "amica$count.temp.html";
- my $url = $info[1];
- if (!-f $temp_fname || !$use_old) {
- system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname") if ($url ne "");
- }
- if (-f $temp_fname) {
- $info[1] = $url;
- push @restaurants, parse_amica($temp_fname, \@info);
- }
- $count++;
- }
- return @restaurants;
- }
-
- 1;
|