Przeglądaj źródła

Add campusravita

Toni Fadjukoff 8 lat temu
rodzic
commit
0f24186b2f
7 zmienionych plików z 235 dodań i 48 usunięć
  1. 3 13
      amica.py
  2. 173 0
      campusravita.py
  3. 6 4
      food.py
  4. 3 16
      juvenes.py
  5. 18 1
      pikante.py
  6. 29 0
      pnalib.py
  7. 3 14
      sodexo.py

+ 3 - 13
amica.py Wyświetl plik

@@ -1,8 +1,5 @@
1
+import pnalib
1 2
 import datetime
2
-import os
3
-import urllib.request
4
-import urllib.error
5
-import json
6 3
 
7 4
 restaurant_info = [
8 5
   [ "(TaY) Amica Minerva", "http://www.amica.fi/minerva", "", "middle", "http://www.amica.fi/api/restaurant/menu/week?language=fi&restaurantPageId=7381" ],
@@ -21,15 +18,8 @@ def get_restaurants(use_old, week):
21 18
         url = info[4]
22 19
         temp_fname = "amica_{count}.temp.js".format(count = count)
23 20
         url = "{url}&weekDate={week_date}".format(url=url, week_date=week_date)
24
-        if not use_old or not os.path.isfile(temp_fname):
25
-            try:
26
-                urllib.request.urlretrieve(url, temp_fname)
27
-            except urllib.error.HTTPError as e:
28
-                print("Failed to download {url}".format(url=url))
29
-        try:
30
-            with open(temp_fname, "r", encoding="utf-8") as fin:
31
-                data = json.load(fin)
32
-        except OSError as e:
21
+        data = pnalib.get_json_file(url, temp_fname, use_old)
22
+        if not data:
33 23
             continue
34 24
 
35 25
         week_foods = {}

+ 173 - 0
campusravita.py Wyświetl plik

@@ -0,0 +1,173 @@
1
+import pnalib
2
+import html.parser
3
+import re
4
+
5
+url = "http://www.campusravita.fi/ruokalista";
6
+
7
+restaurant_info = [
8
+  [ "(TAMK) Campus Food", url, "", "middle" ],
9
+]
10
+
11
+class Tracker(object):
12
+
13
+    def __init__(self, tag, attr_match=None, on_started=None, on_ended=None, on_data=None):
14
+        self.tag = tag
15
+        self.attr_match = attr_match
16
+        self.on_started = on_started
17
+        self.on_ended = on_ended
18
+        self.on_data = on_data
19
+        self.nesting = 0
20
+
21
+    def handle_starttag(self, tag, attrs):
22
+        if self.tag == tag:
23
+            if self.nesting:
24
+                self.nesting += 1
25
+            else:
26
+                attrs_matched = False
27
+                if self.attr_match is None:
28
+                    attrs_matched = True
29
+                else:
30
+                    for attr in attrs:
31
+                        if attr[0] == self.attr_match[0] and self.attr_match[1].match(attr[1]):
32
+                            attrs_matched = True
33
+                if attrs_matched:
34
+                    self.nesting = 1
35
+                    if self.on_started:
36
+                        self.on_started()
37
+
38
+
39
+    def handle_endtag(self, tag):
40
+        if self.nesting and self.tag == tag:
41
+            self.nesting -= 1
42
+            if self.nesting == 0 and self.on_ended:
43
+                self.on_ended()
44
+
45
+    def handle_data(self, data):
46
+        if self.nesting and self.on_data:
47
+            self.on_data(data)
48
+
49
+    def __bool__(self):
50
+        return self.nesting > 0
51
+
52
+class CampusravitaHTMLParser(html.parser.HTMLParser):
53
+    week_re = re.compile("Ruokalista - Viikko (\d+)")
54
+    lunch_re = re.compile("Lounas|Deli-lounas")
55
+
56
+    def __init__(self):
57
+        html.parser.HTMLParser.__init__(self)
58
+        self._trackers = []
59
+        self.in_h3 = self._register_tracker("h3", on_data=self.handle_h3)
60
+        # Everything in inside menu
61
+        self.in_menu = self._register_tracker("section", ("id", "block-system-main"),
62
+                on_started=self.handle_menu_start, on_ended=self.handle_menu_end)
63
+        # Date comes after menu
64
+        self.in_date_display = self._register_tracker("span", ("class", "date-display-single"),
65
+                on_data=self.handle_date_display)
66
+        # Lunch element contains one meal
67
+        self.in_lunch = self._register_tracker("div", ("about", r"/fi/field-collection/field-ruoka-annos/\d+"),
68
+                on_started=self.handle_lunch_start, on_ended=self.handle_lunch_end)
69
+        # Next element contains food name 
70
+        self.in_lunch_food = self._register_tracker("div", ("class", ".*field-name-field-nimi.*"),
71
+                on_data=self.handle_lunch_food)
72
+        # Next element contains food allergies
73
+        self.in_allergy = self._register_tracker("div", ("class", ".*field-name-field-ruokavaliot.*"),
74
+                on_started=self.handle_allergy_start, on_ended=self.handle_allergy_end)
75
+        # Next element contains allergy short name
76
+        self.in_allergy_short = self._register_tracker("div", ("class", ".*field-name-title field-type-ds.*"),
77
+                on_data=self.handle_allergy)
78
+        # Next element contains lunch price
79
+        self.in_lunch_price = self._register_tracker("div", ("class", ".*field-name-field-annoksen-hinta.*"))
80
+        self.lunch_type_match = False
81
+        self.lunch = None
82
+
83
+        self.week_foods = {}
84
+
85
+    def _register_tracker(self, tag, attr_match=None, **kwargs):
86
+        tracker = Tracker(tag, (attr_match[0], re.compile(attr_match[1])) if attr_match else None, **kwargs)
87
+        self._trackers.append(tracker)
88
+        return tracker
89
+
90
+    def handle_date_display(self, data):
91
+        index = -1
92
+        if "Maanantai" in data:
93
+            index = 0
94
+        elif "Tiistai" in data:
95
+            index = 1
96
+        elif "Keskiviikko" in data:
97
+            index = 2
98
+        elif "Torstai" in data:
99
+            index = 3
100
+        elif "Perjantai" in data:
101
+            index = 4
102
+        elif "Lauantai" in data:
103
+            index = 5
104
+        elif "Sunnuntai" in data:
105
+            index = 6
106
+        if index >= 0:
107
+            self.current_day = []
108
+            self.week_foods[index] = self.current_day
109
+
110
+    def handle_h3(self, data):
111
+        if self.in_menu:
112
+            lunch_match = self.lunch_re.match(data)
113
+            self.lunch_type_match = bool(lunch_match)
114
+
115
+    def handle_menu_start(self):
116
+        print("*********** menu start")
117
+
118
+    def handle_menu_end(self):
119
+        print("*********** menu end")
120
+
121
+    def handle_allergy(self, data):
122
+        data = data.strip()
123
+        if self.in_allergy and self.in_allergy_short and self.lunch and data:
124
+            self.lunch["allergies"].append(data) 
125
+
126
+    def handle_allergy_start(self):
127
+        pass
128
+
129
+    def handle_allergy_end(self):
130
+        pass
131
+
132
+    def handle_lunch_food(self, data):
133
+        data = data.strip()
134
+        if self.lunch and data:
135
+            self.lunch["food"].append(data)
136
+
137
+    def handle_lunch_start(self):
138
+        if self.lunch_type_match:
139
+            print("lunch start")
140
+            self.lunch = {"food": [], "allergies": []}
141
+
142
+    def handle_lunch_end(self):
143
+        if self.lunch:
144
+            print(repr(self.lunch).encode("cp1252", "ignore"))
145
+            menu = "{menu} ({allergies})".format(menu=self.lunch["food"][0], allergies=", ".join(self.lunch["allergies"]))
146
+            self.current_day.append(menu)
147
+            self.lunch = None
148
+
149
+    def handle_starttag(self, tag, attrs):
150
+        for tracker in self._trackers:
151
+            tracker.handle_starttag(tag, attrs)
152
+
153
+    def handle_endtag(self, tag):
154
+        for tracker in self._trackers:
155
+            tracker.handle_endtag(tag)
156
+
157
+    def handle_data(self, data):
158
+        for tracker in self._trackers:
159
+            tracker.handle_data(data)
160
+        week_match = self.week_re.match(data)
161
+        if week_match:
162
+            self.week = int(week_match.group(1))
163
+
164
+def get_restaurants(use_old, week):
165
+
166
+    data = pnalib.get_file(url, "campusravita.html", use_old)
167
+    parser = CampusravitaHTMLParser()
168
+    parser.feed(data)
169
+
170
+    restaurants = [[restaurant_info[0][0], "", parser.week, parser.week_foods, restaurant_info[0]]]
171
+
172
+    return restaurants
173
+

+ 6 - 4
food.py Wyświetl plik

@@ -9,7 +9,6 @@
9 9
 # This is Public Domain
10 10
 
11 11
 import sys
12
-print(sys.version)
13 12
 
14 13
 day_names = [ "Maanantai", "Tiistai", "Keskiviikko", "Torstai", 
15 14
 	       "Perjantai", "Lauantai", "Sunnuntai" ]
@@ -17,7 +16,7 @@ day_names = [ "Maanantai", "Tiistai", "Keskiviikko", "Torstai",
17 16
 import amica
18 17
 import sodexo
19 18
 import juvenes
20
-import pikante
19
+import campusravita
21 20
 
22 21
 allergies = [ "M", "L", "VL", "G", "K", "Ve" ]
23 22
 allergy_descriptions = {
@@ -43,9 +42,8 @@ this_week = datetime.datetime.now().isocalendar()[1]
43 42
 unordered += amica.get_restaurants(use_old, this_week)
44 43
 unordered += juvenes.get_restaurants(use_old, this_week)
45 44
 unordered += sodexo.get_restaurants(use_old, this_week)
46
-unordered += pikante.get_restaurants(use_old, this_week)
45
+unordered += campusravita.get_restaurants(use_old, this_week)
47 46
 
48
-print(unordered)
49 47
 
50 48
 max_week = 0;
51 49
 for r in unordered:
@@ -149,12 +147,16 @@ def write_prefix_header(fout, prefix, day, resources_prefix):
149 147
 
150 148
 def write_day(day, header, outfname, last_day, restaurants, prefix, resources_prefix):
151 149
     with open(outfname, "w", encoding="utf-8") as fout:
150
+
151
+
152 152
         import types
153 153
         def write(self, writable):
154 154
             #print("Writing {}: {}".format(type(writable), writable))
155 155
             self.write_orig(writable)
156 156
         fout.write_orig = fout.write
157 157
         fout.write = types.MethodType(write, fout)
158
+
159
+
158 160
         fout.write(file_header.format(resources_prefix=resources_prefix))
159 161
         fout.write("<h1>{header}</h1>\n".format(header=header))
160 162
         # print weekday links

+ 3 - 16
juvenes.py Wyświetl plik

@@ -1,10 +1,8 @@
1 1
 # encoding: UTF-8
2
+import pnalib
2 3
 import datetime
3
-import urllib.request
4
-import urllib.error
5
-import json
6 4
 import re
7
-import os
5
+import json
8 6
 
9 7
 # last two in order are $kitchen_info_id, $menu_type_id
10 8
 restaurant_info = [
@@ -36,18 +34,7 @@ def get_restaurants(use_old, week):
36 34
         for weekday in range(1,7):
37 35
             url = "http://www.juvenes.fi/DesktopModules/Talents.LunchMenu/LunchMenuServices.asmx/GetMenuByWeekday?KitchenId={kitchen}&MenuTypeId={menutype}&Week={week}&Weekday={weekday}&lang='fi'&format=json".format(kitchen=kitchen, menutype=menutype, week=week, weekday=weekday)
38 36
             temp_fname = "juvenes_{count}-{weekday}.temp.js".format(count=count, weekday=weekday)
39
-            if not use_old or not os.path.isfile(temp_fname):
40
-                try:
41
-                    urllib.request.urlretrieve(url, temp_fname)
42
-                except urllib.error.HTTPError as e:
43
-                    print("Failed to download {url}".format(url=url))
44
-                    # Juvenes may fail with error code 500 if food is not available
45
-            try:
46
-                with open(temp_fname, "r", encoding="utf-8") as fin:
47
-                    jsonp = fin.read()
48
-                data = json.loads(jsonp[1:-2])
49
-            except OSError as e:
50
-                continue
37
+            data = pnalib.get_jsonp_file(url, temp_fname, use_old)
51 38
             if data and data["d"] != "null":
52 39
                 data = json.loads(data["d"])
53 40
                 cur_day_foods = []

+ 18 - 1
pikante.py Wyświetl plik

@@ -1,10 +1,27 @@
1
+import pnalib
2
+import html.parser
3
+# Pikante does not have any student restaurants at the moment
4
+
1 5
 pikante_url = "http://www.pikante.fi/lounaslistat-pdf";
2 6
 
3 7
 restaurant_info = [
4
-  [ "(TAYS) Finn-Medi", "$pky_url", "all", "middle" ],
8
+  [ "(TAYS) Finnmedin ravintola", "$pky_url", "all", "middle" ],
5 9
   [ "(TAYS) Café Olive", "$pky_url", "all", "middle" ],
6 10
   [ "(TAYS) Ellipsi", "$pky_url", "all", "middle" ]
7 11
 ]
8 12
 
13
+class PikanteHTMLParser(html.parser.HTMLParser):
14
+    def handle_starttag(self, tag, attrs):
15
+        pass
16
+    def handle_endtag(self, tag):
17
+        pass
18
+    def handle_data(self, data):
19
+        pass
20
+
9 21
 def get_restaurants(use_old, week):
22
+
23
+    #data = pnalib.get_file(pikante_url, "pikante.html", use_old)
24
+    #parser = PikanteHTMLParser()
25
+    #parser.feed(data)
26
+
10 27
     return []

+ 29 - 0
pnalib.py Wyświetl plik

@@ -0,0 +1,29 @@
1
+import os.path
2
+import urllib.request
3
+import urllib.error
4
+import json
5
+
6
+def get_jsonp_file(url, temp_fname, use_old):
7
+    return get_file(url, temp_fname, use_old, jsonp_load)
8
+
9
+def get_json_file(url, temp_fname, use_old):
10
+    return get_file(url, temp_fname, use_old, json.load)
11
+
12
+def jsonp_load(fp):
13
+    return json.loads(fp.read()[1:-2])
14
+
15
+def read_all(fp):
16
+    return fp.read()
17
+
18
+def get_file(url, temp_fname, use_old, consumer=read_all):
19
+    if not use_old or not os.path.isfile(temp_fname):
20
+        try:
21
+            urllib.request.urlretrieve(url, temp_fname)
22
+        except urllib.error.HTTPError as e:
23
+            print("Failed to download {url}".format(url=url))
24
+            # Juvenes may fail with error code 500 if food is not available
25
+    try:
26
+        with open(temp_fname, "r", encoding="utf-8") as fin:
27
+            return consumer(fin)
28
+    except OSError as e:
29
+        pass

+ 3 - 14
sodexo.py Wyświetl plik

@@ -1,8 +1,5 @@
1
+import pnalib
1 2
 import datetime
2
-import os
3
-import urllib.request
4
-import urllib.error
5
-import json
6 3
 
7 4
 restaurant_info = [
8 5
   [ "(TaY) Sodexo Linna", "http://www.sodexo.fi/linna", "", "right", 92],
@@ -25,20 +22,12 @@ def get_restaurants(use_old, week):
25 22
             timestr = date.strftime("%Y/%m/%d")
26 23
             url = "http://www.sodexo.fi/ruokalistat/output/daily_json/{kitchen}/{timestr}/fi".format(kitchen=kitchen, timestr=timestr)
27 24
             temp_fname = "sodexo_{count}-{weekday}.temp.js".format(count=count, weekday=weekday)
28
-            if not use_old or not os.path.isfile(temp_fname):
29
-                try:
30
-                    urllib.request.urlretrieve(url, temp_fname)
31
-                except urllib.error.HTTPError as e:
32
-                    print("Failed to download {url}".format(url=url))
33
-            try:
34
-                with open(temp_fname, "r", encoding="utf-8") as fin:
35
-                    data = json.load(fin)
36
-            except OSError as e:
25
+            data = pnalib.get_json_file(url, temp_fname, use_old)
26
+            if not data:
37 27
                 continue
38 28
             current_day_foods = []
39 29
             courses = data["courses"]
40 30
             for course_info in courses:
41
-                print(course_info)
42 31
                 if course_info["category"] != "Aamupuuro":
43 32
                     food = course_info["title_fi"]
44 33
                     if "properties" in course_info: