Browse Source

Rewrite juvenes parser for the new site.

Toni Fadjukoff 11 years ago
parent
commit
59c80fb262
1 changed files with 56 additions and 157 deletions
  1. 56 157
      juvenes.pl

+ 56 - 157
juvenes.pl View File

@@ -1,4 +1,5 @@
1 1
 use vars qw(@day_names);
2
+use JSON;
2 3
 
3 4
 my $pinni_title = "(TaY) Café Pinni";
4 5
 my $bio_title = "(TAYS) Bio";
@@ -6,64 +7,26 @@ my $kliininen_title = "(TAYS) Arvo";
6 7
 my $kliininen_fusion_title = "(TAYS) Arvo Fusion Kitchen";
7 8
 my $zip_salaattibaari_title = "(TTY) Zip Salaattibaari";
8 9
 my @restaurant_info = (
9
-  [ "(TaY) Yliopiston Ravintola", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola", "M", "left" ],
10
-  [ "(TaY) Yliopiston Ravintola / Salaattibaari", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola/Salaattibaari", "", "left" ],
11
-  [ "(TaY) Fusion Kitchen", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola/Fusion_Kitchen", "", "left" ],
12
-  [ $pinni_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Caf__Pinni", "M", "middle" ],
13
-  [ $bio_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Bio", "M", "left" ],
14
-  [ $kliininen_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Arvo", "M", "left" ],
15
-  [ $kliininen_fusion_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Arvo/Fusion_Kitchen", "M", "left" ],
16
-  [ "(TTY) Newton", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Newton", "", "left" ],
17
-  [ "(TTY) Zip", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Zip", "", "right" ],
18
-  [ "(TTY) Edison", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Edison", "", "middle" ],
19
-# There is no Salaattibaari anymore?
20
-#  [ $zip_salaattibaari_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Zip/Salaattibaari", "", "right" ],
21
-  [ "(TTY) Pastabaari", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Caf____Fast_Voltti/Pastabaari", "", "middle" ],
22
-  [ "(TTY) Fast Voltti", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Caf____Fast_Voltti", "", "middle" ],
23
-  [ "(TTY) Fusion Kitchen", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Newton/Fusion_Kitchen", "", "left" ],
24
-  [ "(TAMK) Dot", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TAMK__Dot__Ziberia_", "", "middle" ]
10
+  [ "(TaY) Yliopiston Ravintola", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/tayp%C3%A4%C3%A4kampus/yliopistonravintola.aspx", "M", "left", 13, 60 ],
11
+  [ "(TaY) Yliopiston Ravintola / Salaattibaari", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/tayp%C3%A4%C3%A4kampus/yliopistonravintola/salaattibaari.aspx", "", "left", 13, 5 ],
12
+  [ "(TaY) Fusion Kitchen", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/tayp%C3%A4%C3%A4kampus/yliopistonravintola/fusionkitchen.aspx", "", "left", 13, 3 ],
13
+  [ $pinni_title, "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/tayp%C3%A4%C3%A4kampus/caf%C3%A9lunchpinni.aspx", "M", "middle", 31, 60 ],
14
+  [ $bio_title, "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/taykaupinkampus/medicabio.aspx", "M", "left", 5, 60 ],
15
+  [ $kliininen_title, "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/taykaupinkampus/medicaarvo.aspx", "M", "left", 27, 60 ],
16
+  [ $kliininen_fusion_title, "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/taykaupinkampus/medicaarvo/fusionkitchen.aspx", "M", "left", 27, 3 ],
17
+  [ "(TTY) Newton", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/ttykampus/newton.aspx", "", "left", 6, 60],
18
+  [ "(TTY) Zip", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/ttykampus/zip.aspx", "", "right", 12, 60 ],
19
+  [ "(TTY) Edison", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/ttykampus/edison.aspx", "", "middle", 2, 60 ],
20
+  [ "(TTY) Pastabaari", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/ttykampus/caf%C3%A9pastabaarivoltti.aspx", "", "middle", 26, 11 ],
21
+  [ "(TTY) Fast Voltti", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/ttykampus/caf%C3%A9fastvoltti.aspx", "", "middle", 2, 4 ],
22
+  [ "(TTY) Fusion Kitchen", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/ttykampus/newton/fusionkitchen.aspx", "", "left", 6, 3 ],
23
+  [ "(TAMK) Dot", "http://www.juvenes.fi/fi-fi/ravintolatjakahvilat/opiskelijaravintolat/tamkdot.aspx", "", "middle", 15, 1 ]
25 24
 );
26 25
 
27 26
 my @restaurants;
28
-my ($parse_func, $week, $open_hours, $day_id, $cur_title);
27
+my ($open_hours, $day_id, $cur_title);
29 28
 my ($cur_food, @cur_day_foods, @week_foods);
30 29
 
31
-sub parse_to_eof {
32
-}
33
-
34
-sub parse_skip_to_end_of_div {
35
-  my $token = shift;
36
-  
37
-  if ($token->[0] eq 'E' && $token->[1] eq 'div') {
38
-    $parse_func = \&parse_more_food;
39
-  }
40
-}
41
-
42
-sub parse_open_hours_begin {
43
-  my $token = shift;
44
-
45
-  if ($token->[0] eq 'T') {
46
-    if ($token->[1] =~ /^Aukiolo/) {
47
-      $parse_func = \&parse_open_hours_end;
48
-    }
49
-  }
50
-}
51
-
52
-sub parse_open_hours_end {
53
-  my $token = shift;
54
-
55
-  if ($token->[0] eq 'T') {
56
-    my $text = $token->[1];
57
-    if ($text eq 'Erityisruokavaliot') {
58
-      $parse_func = \&parse_to_eof;
59
-    } else {
60
-      $text =~ s/\n//g;
61
-      $text =~ s/ +$//;
62
-      $open_hours .= "$text\n" if ($text ne "");
63
-    }
64
-  }
65
-}
66
-
67 30
 sub finish_food {
68 31
   chomp $cur_food;
69 32
   if ($cur_food =~ /Liha paniini.*tai Kasvis paniini/i && $cur_title eq $pinni_title) {
@@ -80,103 +43,6 @@ sub finish_day {
80 43
   $day_id = $day_id + 1;
81 44
 }
82 45
 
83
-sub parse_more_food {
84
-  my $token = shift;
85
-  
86
-  if ($token->[0] eq 'S') {
87
-    my %attrs = %{$token->[2]};
88
-    if ($token->[1] eq 'div') {
89
-      if ($attrs{'style'} =~ /display: *none/) {
90
-	# infobox, skip
91
-	$parse_func = \&parse_skip_to_end_of_div;
92
-      } elsif ($attrs{'class'} eq 'Column') {
93
-	# end of food
94
-	finish_food();
95
-	finish_day();
96
-	$parse_func = \&parse_open_hours_begin;
97
-      }
98
-    } elsif ($token->[1] eq 'br') {
99
-      if ($br_is_new_food) {
100
-	finish_food();
101
-      } else {
102
-	$cur_food .= "\n" if ($cur_food ne "" && substr($cur_food, -1) ne "\n");
103
-      }
104
-    }
105
-  } elsif ($token->[0] eq 'T') {
106
-    my $text = $token->[1];
107
-
108
-    for (my $day = $day_id+1; $day <= 6; $day++) {
109
-      if ($text eq $day_names[$day]) {
110
-        while ($day >= $day_id) {
111
-          finish_food();
112
-          finish_day();
113
-          $day = $day - 1;
114
-        }
115
-        return;
116
-      }
117
-    }
118
-    if ($text eq "&nbsp;") {
119
-      # next food
120
-      finish_food();
121
-    } else {
122
-      $text =~ tr/\r\n\t/   /;
123
-      $text =~ s/ +/ /g;
124
-      $text =~ s/^ +//;
125
-      $text =~ s/^\.+//;
126
-      $text =~ s/ +$//;
127
-      $text =~ s/sisältää ([^, \)]+)/sis.$1/ig;
128
-      $cur_food .= $text;
129
-    }
130
-  }
131
-}
132
-
133
-sub parse_monday {
134
-  my $token = shift;
135
-  
136
-  for (my $day = 0; $day <= 6; $day++) {
137
-    if ($token->[0] eq 'T' and $token->[1] eq $day_names[$day]) {
138
-      while ($day > 0) {
139
-        finish_day();
140
-        $day = $day - 1;
141
-      }
142
-      $parse_func = \&parse_more_food;
143
-      break;
144
-    }
145
-  }
146
-}
147
-
148
-sub parse_week {
149
-  my $token = shift;
150
-  
151
-  if ($token->[0] eq 'T') {
152
-    if ($token->[1] =~ /Viikko: (\d+)/) {
153
-      $week = $1;
154
-      $parse_func = \&parse_monday;
155
-    }
156
-  }
157
-}
158
-
159
-sub parse_juvenes {
160
-  my ($fname, $info_ref) = @_;
161
-  my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
162
-
163
-  my $title = @{$info_ref}[0];
164
-  $week = "";
165
-  $open_hours = "";
166
-  $day_id = 0;
167
-  $cur_food = "";
168
-  @cur_day_foods = ();
169
-  @week_foods = ();
170
-  $br_is_new_food = $title eq $zip_salaattibaari_title;
171
-  $cur_title = $title;
172
-  
173
-  $parse_func = \&parse_week;
174
-  while (my $token = $p->get_token) {
175
-    &$parse_func($token);
176
-  }
177
-  push @restaurants, [ $title, $open_hours, $week, [ @week_foods ], $info_ref ];
178
-}
179
-
180 46
 sub can_merge_bio_kliininen {
181 47
   my $day = shift;
182 48
 
@@ -215,16 +81,49 @@ sub try_merge_bio_kliininen {
215 81
 sub get_juvenes_restaurants {
216 82
   my $use_old = shift;
217 83
   my $count = 0;
84
+  # Loops restraurants
218 85
   foreach my $i (@restaurant_info) {
219 86
     my @info = @{$i};
220
-    my $temp_fname = "juvenes$count.temp.html";
221
-    my $url = $info[1];
222
-    if (!-f $temp_fname || !$use_old) {
223
-      system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname");
224
-    }
225
-    if (-f $temp_fname) {
226
-      parse_juvenes($temp_fname, \@info);
87
+    my $kitchen = $info[4];
88
+    my $menutype = $info[5];
89
+    $title = $info[0];
90
+    $cur_title = $title;
91
+    $open_hours = "";
92
+    @week_foods = ();
93
+    my $week = `date +%V`;
94
+    chomp($week);
95
+    # Loop weekdays
96
+    for (my $weekday = 1; $weekday < 7; $weekday++) {
97
+      my $temp_fname = "juvenes$count-$weekday.temp.js";
98
+      my $url = "http://www.juvenes.fi/DesktopModules/Talents.LunchMenu/LunchMenuServices.asmx/GetMenuByWeekday?KitchenId=$kitchen&MenuTypeId=$menutype&Week=$week&Weekday=$weekday&lang='fi'&format=json";
99
+      if (!-f $temp_fname || !$use_old) {
100
+        system("wget -q --timeout=10 -O $temp_fname.tmp \"$url\" && mv $temp_fname.tmp $temp_fname");
101
+      }
102
+      if (-f $temp_fname) {
103
+        open(FILE, $temp_fname);
104
+        my $jsonp = do { local $/; <FILE> };
105
+        close(FILE);
106
+        # the file is encapsulated in ({"d": json}); so we have to double parse it
107
+        my $data = from_json(substr($jsonp, 1, -2));
108
+        unless ($data->{'d'} eq 'null') {
109
+          $data = from_json($data->{'d'});
110
+          my $mealoptions = $data->{'MealOptions'};
111
+          # loop different meals
112
+          foreach my $meal_info (@$mealoptions) {
113
+            my $menuitems = $meal_info->{'MenuItems'};
114
+            # loops different foods in a meal
115
+            foreach my $food_info (@$menuitems) {
116
+              $cur_food .= "\n" if $cur_food ne ""; 
117
+              $cur_food .= $food_info->{'Name'};
118
+              $cur_food .= " (" . $food_info->{'Diets'} . ")" if $food_info->{'Diets'};
119
+            }
120
+            finish_food();
121
+          }
122
+        }
123
+        finish_day();
124
+      }
227 125
     }
126
+    push @restaurants, [ $title, $open_hours, $week, [ @week_foods ], \@info ];
228 127
     $count++;
229 128
   }
230 129
   return @restaurants;