Python regex on list
        Posted  
        
            by Peter Nielsen
        on Stack Overflow
        
        See other posts from Stack Overflow
        
            or by Peter Nielsen
        
        
        
        Published on 2010-05-22T10:48:29Z
        Indexed on 
            2010/05/22
            11:00 UTC
        
        
        Read the original article
        Hit count: 352
        
Hi there
I am trying to build a parser and save the results as an xml file but i have problems.. For instance i get a TypeError: expected string or buffer when i try to run the code..
Would you experts please have a look at my code ?
import urllib2, re
from xml.dom.minidom import Document from BeautifulSoup import BeautifulSoup as bs
osc = open('OSCTEST.html','r')
oscread = osc.read()
soup=bs(oscread)
doc = Document()
root = doc.createElement('root')
doc.appendChild(root)
countries = doc.createElement('countries')
root.appendChild(countries)
findtags1 = re.compile ('<h1 class="title metadata_title content_perceived_text(.*?)</h1>', re.DOTALL |  re.IGNORECASE).findall(soup)
findtags2 = re.compile ('<span class="content_text">(.*?)</span>', re.DOTALL |  re.IGNORECASE).findall(soup)
for header in findtags1:
title_elem = doc.createElement('title')
countries.appendChild(title_elem)
header_elem = doc.createTextNode(header)
title_elem.appendChild(header_elem)
for item in findtags2:
    art_elem = doc.createElement('artikel')
    countries.appendChild(art_elem)
    s = item.replace('<P>','')
    t = s.replace('</P>','')
    text_elem = doc.createTextNode(t)
    art_elem.appendChild(text_elem)    
print doc.toprettyxml()
© Stack Overflow or respective owner