|
- use vars qw(@day_names);
-
- my $pinni_title = "(TaY) Café Pinni";
- my $bio_title = "(TAYS) Bio";
- my $kliininen_title = "(TAYS) Arvo";
- my $kliininen_fusion_title = "(TAYS) Arvo Fusion Kitchen";
- my $zip_salaattibaari_title = "(TTY) Zip Salaattibaari";
- my @restaurant_info = (
- [ "(TaY) Yliopiston Ravintola", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola", "M", "left" ],
- [ "(TaY) Yliopiston Ravintola / Salaattibaari", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola/Salaattibaari", "", "left" ],
- [ "(TaY) Fusion Kitchen", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola/Fusion_Kitchen", "", "left" ],
- [ $pinni_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Caf__Pinni", "M", "middle" ],
- [ $bio_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Bio", "M", "left" ],
- [ $kliininen_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Arvo", "M", "left" ],
- [ $kliininen_fusion_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Arvo/Fusion_Kitchen", "M", "left" ],
- [ "(TTY) Newton", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Newton", "", "left" ],
- [ "(TTY) Zip", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Zip", "", "right" ],
- [ "(TTY) Edison", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Edison", "", "middle" ],
- [ $zip_salaattibaari_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Zip/Salaattibaari", "", "right" ],
- [ "(TTY) Pastabaari", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Caf____Fast_Voltti/Pastabaari", "", "middle" ],
- [ "(TTY) Fast Voltti", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Caf____Fast_Voltti", "", "middle" ],
- [ "(TTY) Fusion Kitchen", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Newton/Fusion_Kitchen", "", "left" ],
- [ "(TAMK) Dot", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TAMK__Dot__Ziberia_", "", "middle" ]
- );
-
- my @restaurants;
- my ($parse_func, $week, $open_hours, $day_id, $cur_title);
- my ($cur_food, @cur_day_foods, @week_foods);
-
- sub parse_to_eof {
- }
-
- sub parse_skip_to_end_of_div {
- my $token = shift;
-
- if ($token->[0] eq 'E' && $token->[1] eq 'div') {
- $parse_func = \&parse_more_food;
- }
- }
-
- sub parse_open_hours_begin {
- my $token = shift;
-
- if ($token->[0] eq 'T') {
- if ($token->[1] =~ /^Aukiolo/) {
- $parse_func = \&parse_open_hours_end;
- }
- }
- }
-
- sub parse_open_hours_end {
- my $token = shift;
-
- if ($token->[0] eq 'T') {
- my $text = $token->[1];
- if ($text eq 'Erityisruokavaliot') {
- $parse_func = \&parse_to_eof;
- } else {
- $text =~ s/\n//g;
- $text =~ s/ +$//;
- $open_hours .= "$text\n" if ($text ne "");
- }
- }
- }
-
- sub finish_food {
- chomp $cur_food;
- if ($cur_food =~ /Liha paniini.*tai Kasvis paniini/i && $cur_title eq $pinni_title) {
- # you get this every day, ignore
- } else {
- push @cur_day_foods, $cur_food if ($cur_food ne "");
- }
- $cur_food = "";
- }
-
- sub finish_day {
- push @week_foods, [@cur_day_foods];
- @cur_day_foods = ();
- $day_id = $day_id + 1;
- }
-
- sub parse_more_food {
- my $token = shift;
-
- if ($token->[0] eq 'S') {
- my %attrs = %{$token->[2]};
- if ($token->[1] eq 'div') {
- if ($attrs{'style'} =~ /display: *none/) {
- # infobox, skip
- $parse_func = \&parse_skip_to_end_of_div;
- } elsif ($attrs{'class'} eq 'Column') {
- # end of food
- finish_food();
- finish_day();
- $parse_func = \&parse_open_hours_begin;
- }
- } elsif ($token->[1] eq 'br') {
- if ($br_is_new_food) {
- finish_food();
- } else {
- $cur_food .= "\n" if ($cur_food ne "" && substr($cur_food, -1) ne "\n");
- }
- }
- } elsif ($token->[0] eq 'T') {
- my $text = $token->[1];
- if ($day_id < 6 && $text eq $day_names[$day_id+1]) {
- # day changed
- finish_food();
- finish_day();
- } elsif ($text eq " ") {
- # next food
- finish_food();
- } else {
- $text =~ tr/\r\n\t/ /;
- $text =~ s/ +/ /g;
- $text =~ s/^ +//;
- $text =~ s/^\.+//;
- $text =~ s/ +$//;
- $text =~ s/sisältää ([^, \)]+)/sis.$1/ig;
- $cur_food .= $text;
- }
- }
- }
-
- sub parse_monday {
- my $token = shift;
-
- if ($token->[0] eq 'T') {
- if ($token->[1] eq $day_names[0]) {
- $parse_func = \&parse_more_food;
- }
- elsif ($token->[1] eq $day_names[1]) {
- finish_day();
- $parse_func = \&parse_more_food;
- }
- }
- }
-
- sub parse_week {
- my $token = shift;
-
- if ($token->[0] eq 'T') {
- if ($token->[1] =~ /Viikko: (\d+)/) {
- $week = $1;
- $parse_func = \&parse_monday;
- }
- }
- }
-
- sub parse_juvenes {
- my ($fname, $info_ref) = @_;
- my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
-
- my $title = @{$info_ref}[0];
- $week = "";
- $open_hours = "";
- $day_id = 0;
- $cur_food = "";
- @cur_day_foods = ();
- @week_foods = ();
- $br_is_new_food = $title eq $zip_salaattibaari_title;
- $cur_title = $title;
-
- $parse_func = \&parse_week;
- while (my $token = $p->get_token) {
- &$parse_func($token);
- }
- push @restaurants, [ $title, $open_hours, $week, [ @week_foods ], $info_ref ];
- }
-
- sub can_merge_bio_kliininen {
- my $day = shift;
-
- my $bio_foods = "";
- my $kliininen_foods = "";
- my $food_dest;
- foreach my $r (@restaurants) {
- my ($title, $open_hours, $week, $week_foods_ref) = @{$r};
- if ($title eq $bio_title) {
- $food_dest = \$bio_foods;
- } elsif ($title eq $kliininen_title) {
- $food_dest = \$kliininen_foods;
- } else {
- next;
- }
- my @week_foods = @{$week_foods_ref};
- foreach my $food (@{$week_foods[$day]}) {
- ${$food_dest} .= "$food\n";
- }
- }
- return $bio_foods eq $kliininen_foods;
- }
-
- sub try_merge_bio_kliininen {
- my ($title_ref, $day) = @_;
- my $title = $$title_ref;
-
- if ($title eq $bio_title && can_merge_bio_kliininen($day)) {
- $$title_ref .= " + Kliininen";
- } elsif ($title eq $kliininen_title && can_merge_bio_kliininen($day)) {
- return 1;
- }
- return 0;
- }
-
- sub get_juvenes_restaurants {
- my $use_old = shift;
- my $count = 0;
- foreach my $i (@restaurant_info) {
- my @info = @{$i};
- my $temp_fname = "juvenes$count.temp.html";
- my $url = $info[1];
- if (!-f $temp_fname || !$use_old) {
- system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname");
- }
- if (-f $temp_fname) {
- parse_juvenes($temp_fname, \@info);
- }
- $count++;
- }
- return @restaurants;
- }
-
- 1;
|