PNA.fi koodi

campusravita.py 6.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. # Copyright 2018 Toni Fadjukoff. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import pnalib
  15. import html.parser
  16. import re
  17. url = "http://www.campusravita.fi/ruokalista";
  18. restaurant_info = [
  19. [ "(TAMK) Campus Food", url, "", "middle" ],
  20. ]
  21. class Tracker(object):
  22. def __init__(self, tag, attr_match=None, on_started=None, on_ended=None, on_data=None):
  23. self.tag = tag
  24. self.attr_match = attr_match
  25. self.on_started = on_started
  26. self.on_ended = on_ended
  27. self.on_data = on_data
  28. self.nesting = 0
  29. def handle_starttag(self, tag, attrs):
  30. if self.tag == tag:
  31. if self.nesting:
  32. self.nesting += 1
  33. else:
  34. attrs_matched = False
  35. if self.attr_match is None:
  36. attrs_matched = True
  37. else:
  38. for attr in attrs:
  39. if attr[0] == self.attr_match[0] and self.attr_match[1].match(attr[1]):
  40. attrs_matched = True
  41. if attrs_matched:
  42. self.nesting = 1
  43. if self.on_started:
  44. self.on_started()
  45. def handle_endtag(self, tag):
  46. if self.nesting and self.tag == tag:
  47. self.nesting -= 1
  48. if self.nesting == 0 and self.on_ended:
  49. self.on_ended()
  50. def handle_data(self, data):
  51. if self.nesting and self.on_data:
  52. self.on_data(data)
  53. def __bool__(self):
  54. return self.nesting > 0
  55. class CampusravitaHTMLParser(html.parser.HTMLParser):
  56. week_re = re.compile("Ruokalista - Viikko (\d+)")
  57. lunch_re = re.compile("Lounas|Deli-lounas")
  58. week = None
  59. def __init__(self):
  60. html.parser.HTMLParser.__init__(self)
  61. self._trackers = []
  62. self.in_h3 = self._register_tracker("h3", on_data=self.handle_h3)
  63. # Everything in inside menu
  64. self.in_menu = self._register_tracker("section", ("id", "block-system-main"),
  65. on_started=self.handle_menu_start, on_ended=self.handle_menu_end)
  66. # Date comes after menu
  67. self.in_date_display = self._register_tracker("span", ("class", "date-display-single"),
  68. on_data=self.handle_date_display)
  69. # Lunch element contains one meal
  70. self.in_lunch = self._register_tracker("div", ("about", r"/fi/field-collection/field-ruoka-annos/\d+"),
  71. on_started=self.handle_lunch_start, on_ended=self.handle_lunch_end)
  72. # Next element contains food name
  73. self.in_lunch_food = self._register_tracker("div", ("class", ".*field-name-field-nimi.*"),
  74. on_data=self.handle_lunch_food)
  75. # Next element contains food allergies
  76. self.in_allergy = self._register_tracker("div", ("class", ".*field-name-field-ruokavaliot.*"),
  77. on_started=self.handle_allergy_start, on_ended=self.handle_allergy_end)
  78. # Next element contains allergy short name
  79. self.in_allergy_short = self._register_tracker("div", ("class", ".*field-name-title field-type-ds.*"),
  80. on_data=self.handle_allergy)
  81. # Next element contains lunch price
  82. self.in_lunch_price = self._register_tracker("div", ("class", ".*field-name-field-annoksen-hinta.*"))
  83. self.lunch_type_match = False
  84. self.lunch = None
  85. self.week_foods = {}
  86. def _register_tracker(self, tag, attr_match=None, **kwargs):
  87. tracker = Tracker(tag, (attr_match[0], re.compile(attr_match[1])) if attr_match else None, **kwargs)
  88. self._trackers.append(tracker)
  89. return tracker
  90. def handle_date_display(self, data):
  91. index = -1
  92. if "Maanantai" in data:
  93. index = 0
  94. elif "Tiistai" in data:
  95. index = 1
  96. elif "Keskiviikko" in data:
  97. index = 2
  98. elif "Torstai" in data:
  99. index = 3
  100. elif "Perjantai" in data:
  101. index = 4
  102. elif "Lauantai" in data:
  103. index = 5
  104. elif "Sunnuntai" in data:
  105. index = 6
  106. if index >= 0:
  107. self.current_day = []
  108. self.week_foods[index] = self.current_day
  109. def handle_h3(self, data):
  110. if self.in_menu:
  111. lunch_match = self.lunch_re.match(data)
  112. self.lunch_type_match = bool(lunch_match)
  113. def handle_menu_start(self):
  114. pass
  115. def handle_menu_end(self):
  116. pass
  117. def handle_allergy(self, data):
  118. data = data.strip()
  119. if self.in_allergy and self.in_allergy_short and self.lunch and data:
  120. self.lunch["allergies"].append(data)
  121. def handle_allergy_start(self):
  122. pass
  123. def handle_allergy_end(self):
  124. pass
  125. def handle_lunch_food(self, data):
  126. data = data.strip()
  127. if self.lunch and data:
  128. self.lunch["food"].append(data)
  129. def handle_lunch_start(self):
  130. if self.lunch_type_match:
  131. self.lunch = {"food": [], "allergies": []}
  132. def handle_lunch_end(self):
  133. if self.lunch:
  134. #print(repr(self.lunch).encode("cp1252", "ignore"))
  135. menu = "{menu} ({allergies})".format(menu=self.lunch["food"][0], allergies=", ".join(self.lunch["allergies"]))
  136. self.current_day.append(menu)
  137. self.lunch = None
  138. def handle_starttag(self, tag, attrs):
  139. for tracker in self._trackers:
  140. tracker.handle_starttag(tag, attrs)
  141. def handle_endtag(self, tag):
  142. for tracker in self._trackers:
  143. tracker.handle_endtag(tag)
  144. def handle_data(self, data):
  145. for tracker in self._trackers:
  146. tracker.handle_data(data)
  147. week_match = self.week_re.match(data)
  148. if week_match:
  149. self.week = int(week_match.group(1))
  150. def get_restaurants(use_old, week):
  151. data = pnalib.get_file(url, "campusravita.html", use_old)
  152. parser = CampusravitaHTMLParser()
  153. parser.feed(data)
  154. restaurants = []
  155. if parser.week is not None:
  156. restaurants.append([restaurant_info[0][0], "", parser.week, parser.week_foods, restaurant_info[0]])
  157. return restaurants