|
@@ -0,0 +1,173 @@
|
|
1
|
+import pnalib
|
|
2
|
+import html.parser
|
|
3
|
+import re
|
|
4
|
+
|
|
5
|
+url = "http://www.campusravita.fi/ruokalista";
|
|
6
|
+
|
|
7
|
+restaurant_info = [
|
|
8
|
+ [ "(TAMK) Campus Food", url, "", "middle" ],
|
|
9
|
+]
|
|
10
|
+
|
|
11
|
+class Tracker(object):
|
|
12
|
+
|
|
13
|
+ def __init__(self, tag, attr_match=None, on_started=None, on_ended=None, on_data=None):
|
|
14
|
+ self.tag = tag
|
|
15
|
+ self.attr_match = attr_match
|
|
16
|
+ self.on_started = on_started
|
|
17
|
+ self.on_ended = on_ended
|
|
18
|
+ self.on_data = on_data
|
|
19
|
+ self.nesting = 0
|
|
20
|
+
|
|
21
|
+ def handle_starttag(self, tag, attrs):
|
|
22
|
+ if self.tag == tag:
|
|
23
|
+ if self.nesting:
|
|
24
|
+ self.nesting += 1
|
|
25
|
+ else:
|
|
26
|
+ attrs_matched = False
|
|
27
|
+ if self.attr_match is None:
|
|
28
|
+ attrs_matched = True
|
|
29
|
+ else:
|
|
30
|
+ for attr in attrs:
|
|
31
|
+ if attr[0] == self.attr_match[0] and self.attr_match[1].match(attr[1]):
|
|
32
|
+ attrs_matched = True
|
|
33
|
+ if attrs_matched:
|
|
34
|
+ self.nesting = 1
|
|
35
|
+ if self.on_started:
|
|
36
|
+ self.on_started()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+ def handle_endtag(self, tag):
|
|
40
|
+ if self.nesting and self.tag == tag:
|
|
41
|
+ self.nesting -= 1
|
|
42
|
+ if self.nesting == 0 and self.on_ended:
|
|
43
|
+ self.on_ended()
|
|
44
|
+
|
|
45
|
+ def handle_data(self, data):
|
|
46
|
+ if self.nesting and self.on_data:
|
|
47
|
+ self.on_data(data)
|
|
48
|
+
|
|
49
|
+ def __bool__(self):
|
|
50
|
+ return self.nesting > 0
|
|
51
|
+
|
|
52
|
+class CampusravitaHTMLParser(html.parser.HTMLParser):
|
|
53
|
+ week_re = re.compile("Ruokalista - Viikko (\d+)")
|
|
54
|
+ lunch_re = re.compile("Lounas|Deli-lounas")
|
|
55
|
+
|
|
56
|
+ def __init__(self):
|
|
57
|
+ html.parser.HTMLParser.__init__(self)
|
|
58
|
+ self._trackers = []
|
|
59
|
+ self.in_h3 = self._register_tracker("h3", on_data=self.handle_h3)
|
|
60
|
+ # Everything in inside menu
|
|
61
|
+ self.in_menu = self._register_tracker("section", ("id", "block-system-main"),
|
|
62
|
+ on_started=self.handle_menu_start, on_ended=self.handle_menu_end)
|
|
63
|
+ # Date comes after menu
|
|
64
|
+ self.in_date_display = self._register_tracker("span", ("class", "date-display-single"),
|
|
65
|
+ on_data=self.handle_date_display)
|
|
66
|
+ # Lunch element contains one meal
|
|
67
|
+ self.in_lunch = self._register_tracker("div", ("about", r"/fi/field-collection/field-ruoka-annos/\d+"),
|
|
68
|
+ on_started=self.handle_lunch_start, on_ended=self.handle_lunch_end)
|
|
69
|
+ # Next element contains food name
|
|
70
|
+ self.in_lunch_food = self._register_tracker("div", ("class", ".*field-name-field-nimi.*"),
|
|
71
|
+ on_data=self.handle_lunch_food)
|
|
72
|
+ # Next element contains food allergies
|
|
73
|
+ self.in_allergy = self._register_tracker("div", ("class", ".*field-name-field-ruokavaliot.*"),
|
|
74
|
+ on_started=self.handle_allergy_start, on_ended=self.handle_allergy_end)
|
|
75
|
+ # Next element contains allergy short name
|
|
76
|
+ self.in_allergy_short = self._register_tracker("div", ("class", ".*field-name-title field-type-ds.*"),
|
|
77
|
+ on_data=self.handle_allergy)
|
|
78
|
+ # Next element contains lunch price
|
|
79
|
+ self.in_lunch_price = self._register_tracker("div", ("class", ".*field-name-field-annoksen-hinta.*"))
|
|
80
|
+ self.lunch_type_match = False
|
|
81
|
+ self.lunch = None
|
|
82
|
+
|
|
83
|
+ self.week_foods = {}
|
|
84
|
+
|
|
85
|
+ def _register_tracker(self, tag, attr_match=None, **kwargs):
|
|
86
|
+ tracker = Tracker(tag, (attr_match[0], re.compile(attr_match[1])) if attr_match else None, **kwargs)
|
|
87
|
+ self._trackers.append(tracker)
|
|
88
|
+ return tracker
|
|
89
|
+
|
|
90
|
+ def handle_date_display(self, data):
|
|
91
|
+ index = -1
|
|
92
|
+ if "Maanantai" in data:
|
|
93
|
+ index = 0
|
|
94
|
+ elif "Tiistai" in data:
|
|
95
|
+ index = 1
|
|
96
|
+ elif "Keskiviikko" in data:
|
|
97
|
+ index = 2
|
|
98
|
+ elif "Torstai" in data:
|
|
99
|
+ index = 3
|
|
100
|
+ elif "Perjantai" in data:
|
|
101
|
+ index = 4
|
|
102
|
+ elif "Lauantai" in data:
|
|
103
|
+ index = 5
|
|
104
|
+ elif "Sunnuntai" in data:
|
|
105
|
+ index = 6
|
|
106
|
+ if index >= 0:
|
|
107
|
+ self.current_day = []
|
|
108
|
+ self.week_foods[index] = self.current_day
|
|
109
|
+
|
|
110
|
+ def handle_h3(self, data):
|
|
111
|
+ if self.in_menu:
|
|
112
|
+ lunch_match = self.lunch_re.match(data)
|
|
113
|
+ self.lunch_type_match = bool(lunch_match)
|
|
114
|
+
|
|
115
|
+ def handle_menu_start(self):
|
|
116
|
+ print("*********** menu start")
|
|
117
|
+
|
|
118
|
+ def handle_menu_end(self):
|
|
119
|
+ print("*********** menu end")
|
|
120
|
+
|
|
121
|
+ def handle_allergy(self, data):
|
|
122
|
+ data = data.strip()
|
|
123
|
+ if self.in_allergy and self.in_allergy_short and self.lunch and data:
|
|
124
|
+ self.lunch["allergies"].append(data)
|
|
125
|
+
|
|
126
|
+ def handle_allergy_start(self):
|
|
127
|
+ pass
|
|
128
|
+
|
|
129
|
+ def handle_allergy_end(self):
|
|
130
|
+ pass
|
|
131
|
+
|
|
132
|
+ def handle_lunch_food(self, data):
|
|
133
|
+ data = data.strip()
|
|
134
|
+ if self.lunch and data:
|
|
135
|
+ self.lunch["food"].append(data)
|
|
136
|
+
|
|
137
|
+ def handle_lunch_start(self):
|
|
138
|
+ if self.lunch_type_match:
|
|
139
|
+ print("lunch start")
|
|
140
|
+ self.lunch = {"food": [], "allergies": []}
|
|
141
|
+
|
|
142
|
+ def handle_lunch_end(self):
|
|
143
|
+ if self.lunch:
|
|
144
|
+ print(repr(self.lunch).encode("cp1252", "ignore"))
|
|
145
|
+ menu = "{menu} ({allergies})".format(menu=self.lunch["food"][0], allergies=", ".join(self.lunch["allergies"]))
|
|
146
|
+ self.current_day.append(menu)
|
|
147
|
+ self.lunch = None
|
|
148
|
+
|
|
149
|
+ def handle_starttag(self, tag, attrs):
|
|
150
|
+ for tracker in self._trackers:
|
|
151
|
+ tracker.handle_starttag(tag, attrs)
|
|
152
|
+
|
|
153
|
+ def handle_endtag(self, tag):
|
|
154
|
+ for tracker in self._trackers:
|
|
155
|
+ tracker.handle_endtag(tag)
|
|
156
|
+
|
|
157
|
+ def handle_data(self, data):
|
|
158
|
+ for tracker in self._trackers:
|
|
159
|
+ tracker.handle_data(data)
|
|
160
|
+ week_match = self.week_re.match(data)
|
|
161
|
+ if week_match:
|
|
162
|
+ self.week = int(week_match.group(1))
|
|
163
|
+
|
|
164
|
+def get_restaurants(use_old, week):
|
|
165
|
+
|
|
166
|
+ data = pnalib.get_file(url, "campusravita.html", use_old)
|
|
167
|
+ parser = CampusravitaHTMLParser()
|
|
168
|
+ parser.feed(data)
|
|
169
|
+
|
|
170
|
+ restaurants = [[restaurant_info[0][0], "", parser.week, parser.week_foods, restaurant_info[0]]]
|
|
171
|
+
|
|
172
|
+ return restaurants
|
|
173
|
+
|