| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 | use vars qw(@day_names);
my @restaurant_info = (
  #[ "(TaY) Sodexo Linna", "http://www.sodexo.fi/fi-FI/linna/lounas/", "right" ],
  #[ "(TTY) Sodexo Erkkeri", "http://www.sodexo.fi/fi-FI/erkkeri/lounas/", "left" ]
  [ "(TaY) Sodexo Linna", "http://www.sodexo.fi/linna", "right" ],
  [ "(TTY) Sodexo Erkkeri", "http://www.sodexo.fi/erkkeri", "left" ]
);
my ($cur_text, $cur_title, $parse_func, $day_id, $week);
my (@cur_day_foods, @week_foods);
sub sodexo_finish_day {
  push @week_foods, [@cur_day_foods];
  @cur_day_foods = ();
  $day_id = $day_id + 1;
}
sub utf8_to_8859 {
  $_ = shift;
  s/ä/ä/g;
  s/ö/ö/g;
  s/Ä/Ä/g;
  s/Ö/Ö/g;
  return $_;
}
sub sodexo_parse_finish {
}
sub sodexo_parse_after_food {
  my $token = shift;
  if ($token->[0] eq 'E' && $token->[1] eq 'tbody') {
    sodexo_finish_day();
    $parse_func = \&sodexo_parse_finish;
  } elsif ($token->[0] eq 'S' && $token->[1] eq 'td') {
    $parse_func = \&sodexo_parse_to_food;
    sodexo_parse_to_food($token);
  } elsif ($token->[0] eq 'S' && $token->[1] eq 'th') {
    sodexo_finish_day();
    $parse_func = \&sodexo_parse_to_food;
  }
}
sub sodexo_finish_food {
  if ($cur_text ne "") {
    push @cur_day_foods, "$cur_title ($cur_text)";
  } else {
    push @cur_day_foods, "$cur_title";
  }
  $parse_func = \&sodexo_parse_after_food;
}
sub sodexo_parse_allergy {
  my $token = shift;
  if ($token->[0] eq 'T' && !($token->[1] =~ /\s*\/\s*/)) {
    my $text = $token->[1];
    $text =~ s/^\s+//;
    $text =~ s/\s+$//;
    $cur_text .= ", " if $cur_text ne "";
    $cur_text .= $text;
  } elsif ($token->[0] eq 'E' && $token->[1] eq 'td') {
    sodexo_finish_food();
  }
}
sub sodexo_parse_to_allergy {
  my $token = shift;
  if ($token->[0] eq 'S' && $token->[1] eq 'td') {
    my %attrs = %{$token->[2]};
    if ($attrs{'class'} eq 'food-properties') {
      $parse_func = \&sodexo_parse_allergy;
    }
  }
}
sub sodexo_parse_food {
  my $token = shift;
  if ($token->[0] eq 'T' && $token->[1] =~ /\S/) {
     $cur_title .= utf8_to_8859($token->[1]);
     $parse_func = \&sodexo_parse_to_allergy;
  }
}
sub sodexo_parse_type {
  my $token = shift;
  if ($token->[0] eq 'T' && $token->[1] =~ /\S/) {
    if ($token->[1] !~ /^ $/) {
        $cur_title = utf8_to_8859($token->[1]) . ": ";
    }
    $parse_func = \&sodexo_parse_food; 
  }
}
sub sodexo_parse_to_food {
  my $token = shift;
  # Allow list to skip days, ie. Helatorstai
  if ($token->[0] eq 'T') {
    for (my $day = $day_id; $day <= 6; ++$day) {
      if ($token->[1] eq $day_names[$day]) {
        while ($day > $day_id) {
          sodexo_finish_day();
          $day = $day - 1;
        }
        break;
      }
    }
  }
  if ($token->[0] eq 'S' && $token->[1] eq "td") {
    my %attrs = %{$token->[2]};
    if ($attrs{'class'} eq "food-type") {
      $cur_title = '';
      $cur_text = '';
      $parse_func = \&sodexo_parse_type;
    }
    if ($attrs{'class'} eq "food-desc") {
      $cur_title = '';
      $cur_text = '';
      $parse_func = \&sodexo_parse_food;
    }
  }
}
sub sodexo_parse_week {
  my $token = shift;
  if ($token->[0] eq 'T' && $token->[1] =~ /Viikko (\d+)/) {
    $week = $1;
    $parse_func = \&sodexo_parse_to_food;
  }
}
sub sodexo_parse_to_week {
  my $token = shift;
  if ($token->[0] eq 'S' && $token->[1] eq 'div') {
    my %attrs = %{$token->[2]};
    if ($attrs{'class'} eq "list-date") {
      $parse_func = \&sodexo_parse_week;
    }
  }
}
sub parse_sodexo {
  my ($fname, $info_ref) = @_;
  my $title = @{$info_ref}[0];
  my $url = @{$info_ref}[1];
  my $align = @{$info_ref}[2];
  my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
  $week = "";
  $day_id = 0;
  @cur_day_foods = ();
  @week_foods = ();
  my $week_day = `date +%w`;
  for (my $i = 1; $i < $week_day; ++$i) {
    sodexo_finish_day();
  }
  $parse_func = \&sodexo_parse_to_week;
  while (my $token = $p->get_token) {
    &$parse_func($token);
  }
  return [ $title, "", $week, [ @week_foods ], [ $title, $url, "M", $align ] ];
}
sub get_sodexo_restaurants {
  my $use_old = shift;
  my $count = 0;
  my @restaurants = ();
  foreach my $i (@restaurant_info) {
    my @info = @{$i};
    my $temp_fname = "sodexo$count.temp.html";
    my $url = $info[1];
    if (!-f $temp_fname || !$use_old) {
      system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname");
      return undef if (!-f $temp_fname);
    }
    push @restaurants, parse_sodexo($temp_fname, \@info);
    ++$count;
  }
  return @restaurants;
}
1;
 |