ruby regex, parsing html

Posted by danwoods on Stack Overflow See other posts from Stack Overflow or by danwoods
Published on 2010-04-03T15:29:47Z Indexed on 2010/04/03 15:33 UTC
Read the original article Hit count: 480

Filed under:
|

Hello all, I'm trying to parse some returned html to look for currently playing movies. The pattern I'm trying to match looks like:
<span dir=ltr>Clash of the Titans</span>

Of which there are several in the returned html. (the html is huge, I've posted a sample at the bottom)

I'm trying get an array of the movie titles with the following command:
titles = listings_html.split(/(<span dir=ltr>).*(<\/span>)/)

But I'm not getting the results I'm expecting. Can anyone see a problem with my approach or regex?

Returned html (I believe the 'markdown'formating will render the some of the html, but this is just an example): <script>window.gbar={};(function(){function h(a,b,d){var c="on"+b;if(a.addEventListener)a.addEventListener(b,d,false);else if(a.attachEvent)a.attachEvent(c,d);else{var f=a[c];a[c]=function(){var e=f.apply(this,arguments),g=d.apply(this,arguments);return e==undefined?g:g==undefined?e:g&&e}}};var i=window.gbar,k,l,m;function n(a){var b=window.encodeURIComponent&&(document.forms[0].q||"").value;if(b)a.href=a.href.replace(/([?&])q=[^&]*|$/,function(d,c){return(c||"&")+"q="+encodeURIComponent(b)})}i.qs=n;function o(a,b,d,c,f,e){var g=document.getElementById(a);if(g){var j=g.style;j.left=c?"auto":b+"px";j.right=c?b+"px":"auto";j.top=d+"px";j.visibility=l?"hidden":"visible";if(f&&e){j.width=f+"px";j.height=e+"px"}else{o(k,b,d,c,g.offsetWidth,g.offsetHeight);l=l?"":a}}}i.tg=function(a){a=a||window.event;var b,d=a.target||a.srcElement;a.cancelBubble=true;if(k!=null)p(d);else{b=document.createElement(Array.every||window.createPopup?"iframe":"div");b.frameBorder="0";k=b.id="gbs";b.src="javascript:''";d.parentNode.appendChild(b);h(document,"click",i.close);p(d);i.alld&&i.alld(function(){var c=document.getElementById("gbli");if(c){var f=c.parentNode;q(f,c);var e=c.prevSibling;f.removeChild(c);i.removeExtraDelimiters(f,e);b.style.height=f.offsetHeight+"px"}})}};function r(a){var b,d=document.defaultView;if(d&&d.getComputedStyle){if(a=d.getComputedStyle(a,""))b=a.direction}else b=a.currentStyle?a.currentStyle.direction:a.style.direction;return b=="rtl"}function p(a){var b=0;if(a.className!="gb3")a=a.parentNode;var d=a.getAttribute("aria-owns")||"gbi",c=a.offsetWidth,f=a.offsetTop>20?46:24,e=false;do b+=a.offsetLeft||0;while(a=a.offsetParent);a=(document.documentElement.clientWidth||document.body.clientWidth)-b-c;c=r(document.body);if(d=="gbi"){var g=document.getElementById("gbi");q(g,document.getElementById("gbli")||g.firstChild);if(c){b=a;e=true}}else if(!c){b=a;e=true}l!=d&&i.close();o(d,b,f,e)}i.close=function(){l&&o(l,0,0)};function s(a,b,d){if(!m){m="gb2";if(i.alld){var c=i.findClassName(a);if(c)m=c}}a.insertBefore(b,d).className=m}function q(a,b){for(var d,c=window.navExtra;c&&(d=c.pop());)s(a,d,b)}i.addLink=function(a,b,d){if((b=document.getElementById(b))&&a){a.className="gb4";var c=document.createElement("span");c.appendChild(a);c.appendChild(document.createTextNode(" | "));c.id=d;b.appendChild(c)}}})();if(!window.google)window.google={};if(!window.google.movies)window.google.movies={};window.google.movies.registerFixdir=function(){var c="[\u0000- !-@[-{-\u00bf\u00d7\u00f7\u02b9-\u02ff\u2000-\u2bff]",g=new RegExp("^"+c+"([0-9]"+c+"$|[A-Za-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02b8\u0300-\u0590\u0800-\u1fff\u2c00-\ufb1c\ufdfe-\ufe6f\ufefd-\uffff])"),h=new RegExp("^"+c+"$");function e(d,a){if(!a)a=d&&d.target?d.target:window.event.srcElement;a.dir=g.test(a.value)?"ltr":(h.test(a.value)?"":"rtl")} var i=[document.getElementsByName("q")[0],document.getElementById("mtq")];for(var f=0,b;b=i[f];f++)if(b){b.onkeyup=e;e(null,b)}}; Movie Showtimes - Google Search.fl:link{}a:link,.w,a.w:link,.w a:link{color:#00c}a:visited{color:#551a8b}a:active{color:red}.t a:link,.t a:active,.t a:visited,.t{color:#000}.left{width:12em}.box{background:#fff}.nopadding{padding:0}.k{background:#36c}.z{display:none}.x{width:3em}.y{width:23em}.b{color:#00c;font-size:12pt;font-weight:bold}.i,.i:link{color:#a90a08}.n a{color:#000;font-size:10pt}.n .b a{color:#00c}.n .i{font-size:10pt;font-weight:bold}.h{cursor:pointer}body{background:#fff;font:82% Arial,Helvetica,sans-serif;margin:3px 0 0;padding:0}table{border-collapse:collapse;border-spacing:0}img{border:0}td,th{vertical-align:top}h1,h2{font-size:100%;margin:0}a{color:#00c}/ CSS for page */#title_bar{background:#f0f7f9;border-top:1px solid #6b90da;padding-bottom:4px;padding-left:8px;padding-top:4px}#google_bar{margin:3px 10px}#search_form{margin:3px 10px}#left_nav{border-right:1px solid #c9d7f1;margin-top:11px;position:absolute;left:9px;width:13.4em}#left_nav .section{margin-bottom:1.2em}.hidden{visibility:hidden}#results{height:auto !important;height:350px;margin-left:15em;min-height:350px;min-width:800px;width:expression(document.body.clientWidth<1000?"800px":"99.9%")}.name{font-size:124%;margin:0}.times{clear:both;margin:0}.address{margin:0}#movie_results{overflow:auto}.movie_results{margin-top:11px}.movie{clear:both;margin-bottom:40px}.movie .header{padding-left:8px}.movie .img{border:1px solid #ccc;float:left;margin-bottom:10px}.movie .desc{margin-bottom:15px;max-width:42em}.movie h2{font-size:124%;margin-bottom:2px}.movie .info{margin-bottom:10px}.movie .syn{margin-bottom:10px}.movie .section_title{background:#f0f7f9;clear:both;font-size:108%;margin-bottom:11px;margin-top:11px;padding-bottom:4px;padding-left:8px;padding-top:5px}.movie .showtimes{margin-bottom:8px;padding-left:8px}.movie .show_left{width:49%}.movie .show_right{width:49%}.movie .theater{padding-bottom:15px}.theater{clear:both;padding-bottom:1px}.theater_after_icon{padding-left:25px}.theater .show_left{width:49%}.theater .show_right{width:49%}.theater h2{font-size:124%;margin-bottom:2px}.theater .icon{float:left;height:3em;margin-right:5px}.theater .closure{font-size:100%}.theater .info{font-size:100%;padding-bottom:5px;padding-top:5px}.theater .movie{margin-bottom:8px;margin-right:8px;max-width:42em}.theater .movie .desc{margin-bottom:5px;margin-left:0}.theater .movie .info{margin-top:0}.theater .showtimes{margin-bottom:40px;margin-top:8px}#theater_map{right:0;left:0;position:relative;top:0}#theater_static_map{border:1px solid #c9d7f1;margin:10px}.map_marker .name{margin-top:10px}.photo{border:1px solid #ccc;margin-bottom:20px;margin-left:8px}.show_left{float:left;margin:0;width:49.999%}.show_right{float:right;margin:0;width:50%}.show_more{clear:both;font-size:124%;margin:0}.show_more a{color:#77c}.reviews{margin-bottom:8px;padding-left:8px}.review{margin-bottom:5px}.review .publisher{color:green}.review .date{color:#6f6f6f}.trailer{margin-bottom:8px;padding-left:8px}.clear{clear:both}.iconA{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 0}.iconB{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 -38px}.iconC{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 -76px}.iconD{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 -114px}.iconE{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 -152px}.iconF{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 -190px}.iconG{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 -228px}.iconH{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 -266px}.iconI{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 -304px}.iconJ{background:url(http://maps.gstatic.com/mapfiles/red_icons_A_J.png) repeat 0 -342px}.iconK{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 0}.iconL{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -38px}.iconM{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -76px}.iconN{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -114px}.iconO{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -152px}.iconP{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -190px}.iconQ{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -228px}.iconR{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -266px}.iconS{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -304px}.iconT{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -342px}.iconU{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -380px}.iconV{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -418px}.iconW{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -456px}.iconX{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -494px}.iconY{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -532px}.iconZ{background:url(http://maps.gstatic.com/mapfiles/red_icons_K_Z.png) repeat 0 -570px}#gbar,#guser{font-size:13px;padding-top:1px !important}#gbar{float:left;height:22px}#guser{padding-bottom:7px !important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#gbs,.gbm{background:#fff;left:0;position:absolute;text-align:left;visibility:hidden;z-index:1000}.gbm{border:1px solid;border-color:#c9d7f1 #36c #36c #a2bae7;z-index:1001}.gb1{margin-right:.5em}.gb1,.gb3{zoom:1}.gb2{display:block;padding:.2em .5em;}.gb2,.gb3{text-decoration:none;border-bottom:none}a.gb1,a.gb2,a.gb3,a.gb4{color:#00c !important}.gbi .gb3,.gbi .gb2,.gbi .gb4{color:#dd8e27 !important}.gbf .gb3,.gbf .gb2,.gbf .gb4{color:#900 !important}a.gb2:hover{background:#36c;color:#fff !important}Web Images Videos Maps News Shopping Gmail more ▼Books Finance Translate Scholar Blogs YouTube Calendar Photos Documents Reader Sites Groups even more » [email protected] | Google Account settings | Sign out     Advanced Search
  PreferencesShowtimes for Murfreesboro, TN 37130Change Location

Today › Tomorrow › Monday › Tuesday› Theaters › Movies› Show list view › Show map viewPremiere 6 Theater810 Northwest Broad Street, Murfreesboro, TN - (615) 896-4100Clash of the Titans? - 1hr 50min?? - Rated PG-13?? - Action/Adventure? - Trailer - IMDb2:10  4:15  6:15  8:20  10:25pmDiary of a Wimpy Kid? - 1hr 33min?? - Rated PG?? - Comedy/Drama? - Trailer - IMDb2:00  3:50  6:00  7:50  9:40pmHow to Train Your Dragon?1hr 38min?? - Rated PG?? - Family/Animation? - IMDb2:00  3:55  6:00  7:55  9:50pmThe Bounty Hunter? - 1hr 46min?? - Rated PG-13?? - Action/Adventure/Comedy/Romance? - Trailer - IMDb2:15  4:15  6:25  8:25  10:30pmThe Last Song? - 1hr 47min?? - Rated PG?? - Drama? - Trailer - IMDb2:20  4:15  6:30  8:35  10:35pmTyler Perry's Why Did I Get Married Too?2hr 1min?? - Rated PG-13?? - Comedy?2:20  4:35  7:30  9:45pmContinental Cinema 5450 US Highway 231 N, Troy, AL - (334) 808-4225Clash of the Titans 3D? - 1hr 50min?? - Rated PG-13?? - Action/Adventure? - IMDb1:00  4:00  7:00  9:30pmHow to Train Your Dragon 3D? - 1hr 38min?? - Rated PG?? - Family/Animation? - IMDb1:05  4:05  7:05  9:25pmThe Bounty Hunter? - 1hr 46min?? - Rated PG-13?? - Action/Adventure/Comedy/Romance? - Trailer - IMDb1:00  4:00  7:00  9:30pmThe Last Song? - 1hr 47min?? - Rated PG?? - Drama? - Trailer - IMDb1:05  4:05  7:05  9:25pmTyler Perry's Why Did I Get Married Too?2hr 1min?? - Rated PG-13?? - Comedy?12:55  3:55  6:55  9:35pmMall Cinema - Hartford KYUS Hwy 231 South 62 East, Hartford, KY - (270) 298-3315Clash of the Titans? - 1hr 50min?? - Rated PG-13?? - Action/Adventure? - Trailer - IMDb5:00  7:00  9:00pmHow to Train Your Dragon?1hr 38min?? - Rated PG?? - Family/Animation? - IMDb5:00  7:00  9:00pmCarmike Wynnsong 16 - Murfreesboro2626 Cason Square Boulevard, Murfreesboro, TN - (615) 893-2253The Last Song? - 1hr 47min?? - Rated PG?? - Drama? - Trailer - IMDb12:15  1:00  2:45  4:00  5:15  7:00  7:45 

© Stack Overflow or respective owner

Related posts about ruby

Related posts about regex