PNA.fi koodi

juvenes.pl 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. use vars qw(@day_names);
  2. my $pinni_title = "(TaY) Café Pinni";
  3. my $bio_title = "(TAYS) Bio";
  4. my $kliininen_title = "(TAYS) Arvo";
  5. my $kliininen_fusion_title = "(TAYS) Arvo Fusion Kitchen";
  6. my $zip_salaattibaari_title = "(TTY) Zip Salaattibaari";
  7. my @restaurant_info = (
  8. [ "(TaY) Yliopiston Ravintola", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola", "M", "left" ],
  9. [ "(TaY) Yliopiston Ravintola / Salaattibaari", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola/Salaattibaari", "", "left" ],
  10. [ "(TaY) Fusion Kitchen", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola/Fusion_Kitchen", "", "left" ],
  11. [ $pinni_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Caf__Pinni", "M", "middle" ],
  12. [ $bio_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Bio", "M", "left" ],
  13. [ $kliininen_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Arvo", "M", "left" ],
  14. [ $kliininen_fusion_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Arvo/Fusion_Kitchen", "M", "left" ],
  15. [ "(TTY) Newton", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Newton", "", "left" ],
  16. [ "(TTY) Zip", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Zip", "", "right" ],
  17. [ "(TTY) Edison", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Edison", "", "middle" ],
  18. # There is no Salaattibaari anymore?
  19. # [ $zip_salaattibaari_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Zip/Salaattibaari", "", "right" ],
  20. [ "(TTY) Pastabaari", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Caf____Fast_Voltti/Pastabaari", "", "middle" ],
  21. [ "(TTY) Fast Voltti", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Caf____Fast_Voltti", "", "middle" ],
  22. [ "(TTY) Fusion Kitchen", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Newton/Fusion_Kitchen", "", "left" ],
  23. [ "(TAMK) Dot", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TAMK__Dot__Ziberia_", "", "middle" ]
  24. );
  25. my @restaurants;
  26. my ($parse_func, $week, $open_hours, $day_id, $cur_title);
  27. my ($cur_food, @cur_day_foods, @week_foods);
  28. sub parse_to_eof {
  29. }
  30. sub parse_skip_to_end_of_div {
  31. my $token = shift;
  32. if ($token->[0] eq 'E' && $token->[1] eq 'div') {
  33. $parse_func = \&parse_more_food;
  34. }
  35. }
  36. sub parse_open_hours_begin {
  37. my $token = shift;
  38. if ($token->[0] eq 'T') {
  39. if ($token->[1] =~ /^Aukiolo/) {
  40. $parse_func = \&parse_open_hours_end;
  41. }
  42. }
  43. }
  44. sub parse_open_hours_end {
  45. my $token = shift;
  46. if ($token->[0] eq 'T') {
  47. my $text = $token->[1];
  48. if ($text eq 'Erityisruokavaliot') {
  49. $parse_func = \&parse_to_eof;
  50. } else {
  51. $text =~ s/\n//g;
  52. $text =~ s/ +$//;
  53. $open_hours .= "$text\n" if ($text ne "");
  54. }
  55. }
  56. }
  57. sub finish_food {
  58. chomp $cur_food;
  59. if ($cur_food =~ /Liha paniini.*tai Kasvis paniini/i && $cur_title eq $pinni_title) {
  60. # you get this every day, ignore
  61. } else {
  62. push @cur_day_foods, $cur_food if ($cur_food ne "");
  63. }
  64. $cur_food = "";
  65. }
  66. sub finish_day {
  67. push @week_foods, [@cur_day_foods];
  68. @cur_day_foods = ();
  69. $day_id = $day_id + 1;
  70. }
  71. sub parse_more_food {
  72. my $token = shift;
  73. if ($token->[0] eq 'S') {
  74. my %attrs = %{$token->[2]};
  75. if ($token->[1] eq 'div') {
  76. if ($attrs{'style'} =~ /display: *none/) {
  77. # infobox, skip
  78. $parse_func = \&parse_skip_to_end_of_div;
  79. } elsif ($attrs{'class'} eq 'Column') {
  80. # end of food
  81. finish_food();
  82. finish_day();
  83. $parse_func = \&parse_open_hours_begin;
  84. }
  85. } elsif ($token->[1] eq 'br') {
  86. if ($br_is_new_food) {
  87. finish_food();
  88. } else {
  89. $cur_food .= "\n" if ($cur_food ne "" && substr($cur_food, -1) ne "\n");
  90. }
  91. }
  92. } elsif ($token->[0] eq 'T') {
  93. my $text = $token->[1];
  94. for (my $day = $day_id+1; $day <= 6; $day++) {
  95. if ($text eq $day_names[$day]) {
  96. while ($day >= $day_id) {
  97. finish_food();
  98. finish_day();
  99. $day = $day - 1;
  100. }
  101. return;
  102. }
  103. }
  104. if ($text eq "&nbsp;") {
  105. # next food
  106. finish_food();
  107. } else {
  108. $text =~ tr/\r\n\t/ /;
  109. $text =~ s/ +/ /g;
  110. $text =~ s/^ +//;
  111. $text =~ s/^\.+//;
  112. $text =~ s/ +$//;
  113. $text =~ s/sisältää ([^, \)]+)/sis.$1/ig;
  114. $cur_food .= $text;
  115. }
  116. }
  117. }
  118. sub parse_monday {
  119. my $token = shift;
  120. for (my $day = 0; $day <= 6; $day++) {
  121. if ($token->[0] eq 'T' and $token->[1] eq $day_names[$day]) {
  122. while ($day > 0) {
  123. finish_day();
  124. $day = $day - 1;
  125. }
  126. $parse_func = \&parse_more_food;
  127. break;
  128. }
  129. }
  130. }
  131. sub parse_week {
  132. my $token = shift;
  133. if ($token->[0] eq 'T') {
  134. if ($token->[1] =~ /Viikko: (\d+)/) {
  135. $week = $1;
  136. $parse_func = \&parse_monday;
  137. }
  138. }
  139. }
  140. sub parse_juvenes {
  141. my ($fname, $info_ref) = @_;
  142. my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
  143. my $title = @{$info_ref}[0];
  144. $week = "";
  145. $open_hours = "";
  146. $day_id = 0;
  147. $cur_food = "";
  148. @cur_day_foods = ();
  149. @week_foods = ();
  150. $br_is_new_food = $title eq $zip_salaattibaari_title;
  151. $cur_title = $title;
  152. $parse_func = \&parse_week;
  153. while (my $token = $p->get_token) {
  154. &$parse_func($token);
  155. }
  156. push @restaurants, [ $title, $open_hours, $week, [ @week_foods ], $info_ref ];
  157. }
  158. sub can_merge_bio_kliininen {
  159. my $day = shift;
  160. my $bio_foods = "";
  161. my $kliininen_foods = "";
  162. my $food_dest;
  163. foreach my $r (@restaurants) {
  164. my ($title, $open_hours, $week, $week_foods_ref) = @{$r};
  165. if ($title eq $bio_title) {
  166. $food_dest = \$bio_foods;
  167. } elsif ($title eq $kliininen_title) {
  168. $food_dest = \$kliininen_foods;
  169. } else {
  170. next;
  171. }
  172. my @week_foods = @{$week_foods_ref};
  173. foreach my $food (@{$week_foods[$day]}) {
  174. ${$food_dest} .= "$food\n";
  175. }
  176. }
  177. return $bio_foods eq $kliininen_foods;
  178. }
  179. sub try_merge_bio_kliininen {
  180. my ($title_ref, $day) = @_;
  181. my $title = $$title_ref;
  182. if ($title eq $bio_title && can_merge_bio_kliininen($day)) {
  183. $$title_ref .= " + Kliininen";
  184. } elsif ($title eq $kliininen_title && can_merge_bio_kliininen($day)) {
  185. return 1;
  186. }
  187. return 0;
  188. }
  189. sub get_juvenes_restaurants {
  190. my $use_old = shift;
  191. my $count = 0;
  192. foreach my $i (@restaurant_info) {
  193. my @info = @{$i};
  194. my $temp_fname = "juvenes$count.temp.html";
  195. my $url = $info[1];
  196. if (!-f $temp_fname || !$use_old) {
  197. system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname");
  198. }
  199. if (-f $temp_fname) {
  200. parse_juvenes($temp_fname, \@info);
  201. }
  202. $count++;
  203. }
  204. return @restaurants;
  205. }
  206. 1;