PNA.fi koodi

juvenes.pl 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. use vars qw(@day_names);
  2. my $pinni_title = "(TaY) Café Pinni";
  3. my $bio_title = "(TAYS) Bio";
  4. my $kliininen_title = "(TAYS) Arvo";
  5. my $kliininen_fusion_title = "(TAYS) Arvo Fusion Kitchen";
  6. my $zip_salaattibaari_title = "(TTY) Zip Salaattibaari";
  7. my @restaurant_info = (
  8. [ "(TaY) Yliopiston Ravintola", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola", "M", "left" ],
  9. [ "(TaY) Yliopiston Ravintola / Salaattibaari", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola/Salaattibaari", "", "left" ],
  10. [ "(TaY) Fusion Kitchen", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Yliopiston_Ravintola/Fusion_Kitchen", "", "left" ],
  11. [ $pinni_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY__Caf__Pinni", "M", "middle" ],
  12. [ $bio_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Bio", "M", "left" ],
  13. [ $kliininen_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Arvo", "M", "left" ],
  14. [ $kliininen_fusion_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TaY_Kauppi__Medica_Arvo/Fusion_Kitchen", "M", "left" ],
  15. [ "(TTY) Newton", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Newton", "", "left" ],
  16. [ "(TTY) Zip", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Zip", "", "right" ],
  17. [ "(TTY) Edison", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Edison", "", "middle" ],
  18. [ $zip_salaattibaari_title, "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Zip/Salaattibaari", "", "right" ],
  19. [ "(TTY) Pastabaari", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Caf____Fast_Voltti/Pastabaari", "", "middle" ],
  20. [ "(TTY) Fast Voltti", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Caf____Fast_Voltti", "", "middle" ],
  21. [ "(TTY) Fusion Kitchen", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TTY__Newton/Fusion_Kitchen", "", "left" ],
  22. [ "(TAMK) Dot", "http://www.juvenes.fi/Suomeksi/Ravintolat_ja_kahvilat/Opiskelijaravintolat/_TAMK__Dot__Ziberia_", "", "middle" ]
  23. );
  24. my @restaurants;
  25. my ($parse_func, $week, $open_hours, $day_id, $cur_title);
  26. my ($cur_food, @cur_day_foods, @week_foods);
  27. sub parse_to_eof {
  28. }
  29. sub parse_skip_to_end_of_div {
  30. my $token = shift;
  31. if ($token->[0] eq 'E' && $token->[1] eq 'div') {
  32. $parse_func = \&parse_more_food;
  33. }
  34. }
  35. sub parse_open_hours_begin {
  36. my $token = shift;
  37. if ($token->[0] eq 'T') {
  38. if ($token->[1] =~ /^Aukiolo/) {
  39. $parse_func = \&parse_open_hours_end;
  40. }
  41. }
  42. }
  43. sub parse_open_hours_end {
  44. my $token = shift;
  45. if ($token->[0] eq 'T') {
  46. my $text = $token->[1];
  47. if ($text eq 'Erityisruokavaliot') {
  48. $parse_func = \&parse_to_eof;
  49. } else {
  50. $text =~ s/\n//g;
  51. $text =~ s/ +$//;
  52. $open_hours .= "$text\n" if ($text ne "");
  53. }
  54. }
  55. }
  56. sub finish_food {
  57. chomp $cur_food;
  58. if ($cur_food =~ /Liha paniini.*tai Kasvis paniini/i && $cur_title eq $pinni_title) {
  59. # you get this every day, ignore
  60. } else {
  61. push @cur_day_foods, $cur_food if ($cur_food ne "");
  62. }
  63. $cur_food = "";
  64. }
  65. sub finish_day {
  66. push @week_foods, [@cur_day_foods];
  67. @cur_day_foods = ();
  68. $day_id = $day_id + 1;
  69. }
  70. sub parse_more_food {
  71. my $token = shift;
  72. if ($token->[0] eq 'S') {
  73. my %attrs = %{$token->[2]};
  74. if ($token->[1] eq 'div') {
  75. if ($attrs{'style'} =~ /display: *none/) {
  76. # infobox, skip
  77. $parse_func = \&parse_skip_to_end_of_div;
  78. } elsif ($attrs{'class'} eq 'Column') {
  79. # end of food
  80. finish_food();
  81. finish_day();
  82. $parse_func = \&parse_open_hours_begin;
  83. }
  84. } elsif ($token->[1] eq 'br') {
  85. if ($br_is_new_food) {
  86. finish_food();
  87. } else {
  88. $cur_food .= "\n" if ($cur_food ne "" && substr($cur_food, -1) ne "\n");
  89. }
  90. }
  91. } elsif ($token->[0] eq 'T') {
  92. my $text = $token->[1];
  93. if ($day_id < 6 && $text eq $day_names[$day_id+1]) {
  94. # day changed
  95. finish_food();
  96. finish_day();
  97. } elsif ($text eq "&nbsp;") {
  98. # next food
  99. finish_food();
  100. } else {
  101. $text =~ tr/\r\n\t/ /;
  102. $text =~ s/ +/ /g;
  103. $text =~ s/^ +//;
  104. $text =~ s/^\.+//;
  105. $text =~ s/ +$//;
  106. $text =~ s/sisältää ([^, \)]+)/sis.$1/ig;
  107. $cur_food .= $text;
  108. }
  109. }
  110. }
  111. sub parse_monday {
  112. my $token = shift;
  113. if ($token->[0] eq 'T') {
  114. if ($token->[1] eq $day_names[0]) {
  115. $parse_func = \&parse_more_food;
  116. }
  117. }
  118. }
  119. sub parse_week {
  120. my $token = shift;
  121. if ($token->[0] eq 'T') {
  122. if ($token->[1] =~ /Viikko: (\d+)/) {
  123. $week = $1;
  124. $parse_func = \&parse_monday;
  125. }
  126. }
  127. }
  128. sub parse_juvenes {
  129. my ($fname, $info_ref) = @_;
  130. my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
  131. my $title = @{$info_ref}[0];
  132. $week = "";
  133. $open_hours = "";
  134. $day_id = 0;
  135. $cur_food = "";
  136. @cur_day_foods = ();
  137. @week_foods = ();
  138. $br_is_new_food = $title eq $zip_salaattibaari_title;
  139. $cur_title = $title;
  140. $parse_func = \&parse_week;
  141. while (my $token = $p->get_token) {
  142. &$parse_func($token);
  143. }
  144. push @restaurants, [ $title, $open_hours, $week, [ @week_foods ], $info_ref ];
  145. }
  146. sub can_merge_bio_kliininen {
  147. my $day = shift;
  148. my $bio_foods = "";
  149. my $kliininen_foods = "";
  150. my $food_dest;
  151. foreach my $r (@restaurants) {
  152. my ($title, $open_hours, $week, $week_foods_ref) = @{$r};
  153. if ($title eq $bio_title) {
  154. $food_dest = \$bio_foods;
  155. } elsif ($title eq $kliininen_title) {
  156. $food_dest = \$kliininen_foods;
  157. } else {
  158. next;
  159. }
  160. my @week_foods = @{$week_foods_ref};
  161. foreach my $food (@{$week_foods[$day]}) {
  162. ${$food_dest} .= "$food\n";
  163. }
  164. }
  165. return $bio_foods eq $kliininen_foods;
  166. }
  167. sub try_merge_bio_kliininen {
  168. my ($title_ref, $day) = @_;
  169. my $title = $$title_ref;
  170. if ($title eq $bio_title && can_merge_bio_kliininen($day)) {
  171. $$title_ref .= " + Kliininen";
  172. } elsif ($title eq $kliininen_title && can_merge_bio_kliininen($day)) {
  173. return 1;
  174. }
  175. return 0;
  176. }
  177. sub get_juvenes_restaurants {
  178. my $use_old = shift;
  179. my $count = 0;
  180. foreach my $i (@restaurant_info) {
  181. my @info = @{$i};
  182. my $temp_fname = "juvenes$count.temp.html";
  183. my $url = $info[1];
  184. if (!-f $temp_fname || !$use_old) {
  185. system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname");
  186. }
  187. if (-f $temp_fname) {
  188. parse_juvenes($temp_fname, \@info);
  189. }
  190. $count++;
  191. }
  192. return @restaurants;
  193. }
  194. 1;