Browse Source

Fix Sodexo parsing.

Toni Fadjukoff 10 years ago
parent
commit
cc3db28f1f
1 changed files with 53 additions and 153 deletions
  1. 53 153
      sodexo.pl

+ 53 - 153
sodexo.pl View File

@@ -1,21 +1,30 @@
1 1
 use vars qw(@day_names);
2
+use POSIX qw(strftime);
3
+
2 4
 
3 5
 my @restaurant_info = (
4 6
   #[ "(TaY) Sodexo Linna", "http://www.sodexo.fi/fi-FI/linna/lounas/", "right" ],
5 7
   #[ "(TTY) Sodexo Erkkeri", "http://www.sodexo.fi/fi-FI/erkkeri/lounas/", "left" ]
6
-  [ "(TaY) Sodexo Linna", "http://www.sodexo.fi/linna", "right" ],
7
-  [ "(TTY) Sodexo Erkkeri", "http://www.sodexo.fi/erkkeri", "left" ]
8
+  [ "(TaY) Sodexo Linna", "http://www.sodexo.fi/linna", "right", 92],
9
+  [ "(TTY) Sodexo Erkkeri", "http://www.sodexo.fi/erkkeri", "left", 100]
8 10
 );
9 11
 
10 12
 my ($cur_text, $cur_title, $parse_func, $day_id, $week);
11 13
 my (@cur_day_foods, @week_foods);
12 14
 
15
+sub sodexo_finish_food {
16
+  chomp $cur_food;
17
+  push @cur_day_foods, utf8_to_8859($cur_food) if ($cur_food ne "");
18
+  $cur_food = "";
19
+}
20
+
13 21
 sub sodexo_finish_day {
14 22
   push @week_foods, [@cur_day_foods];
15 23
   @cur_day_foods = ();
16 24
   $day_id = $day_id + 1;
17 25
 }
18 26
 
27
+
19 28
 sub utf8_to_8859 {
20 29
   $_ = shift;
21 30
 
@@ -26,163 +35,54 @@ sub utf8_to_8859 {
26 35
   return $_;
27 36
 }
28 37
 
29
-sub sodexo_parse_finish {
30
-}
31
-
32
-sub sodexo_parse_after_food {
33
-  my $token = shift;
34
-
35
-  if ($token->[0] eq 'E' && $token->[1] eq 'tbody') {
36
-    sodexo_finish_day();
37
-    $parse_func = \&sodexo_parse_finish;
38
-  } elsif ($token->[0] eq 'S' && $token->[1] eq 'td') {
39
-    $parse_func = \&sodexo_parse_to_food;
40
-    sodexo_parse_to_food($token);
41
-  } elsif ($token->[0] eq 'S' && $token->[1] eq 'th') {
42
-    sodexo_finish_day();
43
-    $parse_func = \&sodexo_parse_to_food;
44
-  }
45
-}
46
-
47
-sub sodexo_finish_food {
48
-  if ($cur_text ne "") {
49
-    push @cur_day_foods, "$cur_title ($cur_text)";
50
-  } else {
51
-    push @cur_day_foods, "$cur_title";
52
-  }
53
-  $parse_func = \&sodexo_parse_after_food;
54
-}
55
-
56
-sub sodexo_parse_allergy {
57
-  my $token = shift;
58
-
59
-  if ($token->[0] eq 'T' && !($token->[1] =~ /\s*\/\s*/)) {
60
-    my $text = $token->[1];
61
-    $text =~ s/^\s+//;
62
-    $text =~ s/\s+$//;
63
-    $cur_text .= ", " if $cur_text ne "";
64
-    $cur_text .= $text;
65
-  } elsif ($token->[0] eq 'E' && $token->[1] eq 'td') {
66
-    sodexo_finish_food();
67
-  }
68
-}
69
-
70
-sub sodexo_parse_to_allergy {
71
-  my $token = shift;
72
-
73
-  if ($token->[0] eq 'S' && $token->[1] eq 'td') {
74
-    my %attrs = %{$token->[2]};
75
-    if ($attrs{'class'} eq 'food-properties') {
76
-      $parse_func = \&sodexo_parse_allergy;
77
-    }
78
-  }
79
-}
80
-
81
-sub sodexo_parse_food {
82
-  my $token = shift;
83
-
84
-  if ($token->[0] eq 'T' && $token->[1] =~ /\S/) {
85
-     $cur_title .= utf8_to_8859($token->[1]);
86
-     $parse_func = \&sodexo_parse_to_allergy;
87
-  }
88
-}
89
-
90
-sub sodexo_parse_type {
91
-  my $token = shift;
92
-
93
-  if ($token->[0] eq 'T' && $token->[1] =~ /\S/) {
94
-    if ($token->[1] !~ /^ $/) {
95
-        $cur_title = utf8_to_8859($token->[1]) . ": ";
96
-    }
97
-    $parse_func = \&sodexo_parse_food; 
98
-  }
99
-}
100
-
101
-sub sodexo_parse_to_food {
102
-  my $token = shift;
103
-  # Allow list to skip days, ie. Helatorstai
104
-  if ($token->[0] eq 'T') {
105
-    for (my $day = $day_id; $day <= 6; ++$day) {
106
-      if ($token->[1] eq $day_names[$day]) {
107
-        while ($day > $day_id) {
108
-          sodexo_finish_day();
109
-          $day = $day - 1;
110
-        }
111
-        break;
112
-      }
113
-    }
114
-  }
115
-  if ($token->[0] eq 'S' && $token->[1] eq "td") {
116
-    my %attrs = %{$token->[2]};
117
-    if ($attrs{'class'} eq "food-type") {
118
-      $cur_title = '';
119
-      $cur_text = '';
120
-      $parse_func = \&sodexo_parse_type;
121
-    }
122
-    if ($attrs{'class'} eq "food-desc") {
123
-      $cur_title = '';
124
-      $cur_text = '';
125
-      $parse_func = \&sodexo_parse_food;
126
-    }
127
-  }
128
-}
129
-
130
-sub sodexo_parse_week {
131
-  my $token = shift;
132
-  if ($token->[0] eq 'T' && $token->[1] =~ /Viikko (\d+)/) {
133
-    $week = $1;
134
-    $parse_func = \&sodexo_parse_to_food;
135
-  }
136
-}
137
-
138
-sub sodexo_parse_to_week {
139
-  my $token = shift;
140
-  if ($token->[0] eq 'S' && $token->[1] eq 'div') {
141
-    my %attrs = %{$token->[2]};
142
-    if ($attrs{'class'} eq "list-date") {
143
-      $parse_func = \&sodexo_parse_week;
144
-    }
145
-  }
146
-}
147
-
148
-sub parse_sodexo {
149
-  my ($fname, $info_ref) = @_;
150
-  my $title = @{$info_ref}[0];
151
-  my $url = @{$info_ref}[1];
152
-  my $align = @{$info_ref}[2];
153
-  my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
154
-
155
-  $week = "";
156
-  $day_id = 0;
157
-  @cur_day_foods = ();
158
-  @week_foods = ();
159
-
160
-  my $week_day = `date +%w`;
161
-  for (my $i = 1; $i < $week_day; ++$i) {
162
-    sodexo_finish_day();
163
-  }
164
-
165
-  $parse_func = \&sodexo_parse_to_week;
166
-  while (my $token = $p->get_token) {
167
-    &$parse_func($token);
168
-  }
169
-  return [ $title, "", $week, [ @week_foods ], [ $title, $url, "M", $align ] ];
170
-}
171
-
172 38
 sub get_sodexo_restaurants {
173 39
   my $use_old = shift;
174 40
   my $count = 0;
175
-  my @restaurants = ();
41
+  # Loops restraurants
176 42
   foreach my $i (@restaurant_info) {
177 43
     my @info = @{$i};
178
-    my $temp_fname = "sodexo$count.temp.html";
179
-    my $url = $info[1];
180
-    if (!-f $temp_fname || !$use_old) {
181
-      system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname");
182
-      return undef if (!-f $temp_fname);
44
+    my $kitchen = $info[3];
45
+    $title = $info[0];
46
+    $cur_title = $title;
47
+    $open_hours = "";
48
+    @week_foods = ();
49
+    my $week = `date +%V`;
50
+    chomp($week);
51
+    # Loop weekdays
52
+    for (my $weekday = 1; $weekday < 7; $weekday++) {
53
+
54
+      # Get current unix timestamp and week
55
+      $s = strftime "%s", localtime;
56
+      $v = strftime "%w", localtime;
57
+      # Calculate current weekday 
58
+      $s = $s - ($v - 2 + $weekday)  * 84600;
59
+      $timestr = strftime "%Y/%m/%d", localtime($s);
60
+
61
+      my $url = "http://www.sodexo.fi/ruokalistat/output/daily_json/$kitchen/$timestr/fi";
62
+      my $temp_fname = "sodexo$count-$weekday.temp.js";  
63
+      if (!-f $temp_fname || !$use_old) {
64
+        system("wget -q --timeout=10 -O $temp_fname.tmp \"$url\" && mv $temp_fname.tmp $temp_fname");
65
+      }
66
+      if (-f $temp_fname) {
67
+        open(FILE, $temp_fname);
68
+        my $json = do { local $/; <FILE> };
69
+        close(FILE);
70
+        # the file is encapsulated in ({"d": json}); so we have to double parse it
71
+        my $data = from_json($json);
72
+        my $courses = $data->{'courses'};
73
+        # loop different meals
74
+        foreach my $course_info (@$courses) {
75
+          if ($course_info->{'category'} ne 'Aamupala') {
76
+            $cur_food = $course_info->{'title_fi'};
77
+            $cur_food .= " (" . $course_info->{'properties'} . ")" if $course_info->{'properties'};
78
+          }
79
+          sodexo_finish_food();
80
+        }
81
+        sodexo_finish_day();
82
+      }
183 83
     }
184
-    push @restaurants, parse_sodexo($temp_fname, \@info);
185
-    ++$count;
84
+    push @restaurants, [ $title, $open_hours, $week, [ @week_foods ], \@info ];
85
+    $count++;
186 86
   }
187 87
   return @restaurants;
188 88
 }