Parsing XHTML results from Bing
- by Nir
Hello, i am trying to parse received search queries from bing search engines which are received in xhtml in java. I am using sax XmlReader to read the results but i keep on getting errors.
here is my code-this one is for the hadler of the reader:
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class XHTMLHandler extends DefaultHandler{
    public XHTMLHandler()
    {
        super();
    }
    public void startDocument ()
    {
        System.out.println("Start document");
    }
    public void endDocument ()
    {
    System.out.println("End document");
    }
    public void startElement (String uri, String name,String qName, Attributes atts)
    {
        if ("".equals (uri))
                System.out.println("Start element: " + qName);
            else
                System.out.println("Start element: {" + uri + "}" + name);
    }
    public void endElement (String uri, String name, String qName)
    {
    if ("".equals (uri))
        System.out.println("End element: " + qName);
    else
        System.out.println("End element:   {" + uri + "}" + name);
    }
    public void startPrefixMapping (String prefix, String uri)
      throws SAXException {
    }
    public void endPrefixMapping (String prefix)
      throws SAXException {
    }
    public void characters (char ch[], int start, int length)
        {
        System.out.print("Characters:    \"");
        for (int i = start; i < start + length; i++) {
            switch (ch[i]) {
            case '\\':
            System.out.print("\\\\");
            break;
            case '"':
            System.out.print("\\\"");
            break;
            case '\n':
            System.out.print("\\n");
            break;
            case '\r':
            System.out.print("\\r");
            break;
            case '\t':
            System.out.print("\\t");
            break;
            default:
            System.out.print(ch[i]);
            break;
            }
        }
        System.out.print("\"\n");
        }
}
and this is the program itself:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.HttpRetryException;
import java.net.HttpURLConnection;
import java.net.URL;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
public class Searching {
    private String m_urlBingSearch  = "http://www.bing.com/search?q=";
    private HttpURLConnection m_httpCon;
    private OutputStreamWriter m_streamWriter;
    //private BufferedReader m_bufferReader;
    private URL m_serverAdress;
    private StringBuilder sb;
    private String m_line;
    private InputSource m_inputSrc;
    public Searching()
    {
        m_httpCon = null;
        m_streamWriter = null;
        //m_bufferReader = null;
        m_serverAdress = null;
        sb = null;
        m_line = new String();
    }
    public void SearchBing(String searchPrms) throws SAXException,IOException 
    {
            //set up connection
            sb = new StringBuilder();
            sb.append(m_urlBingSearch);
            sb.append(searchPrms);
            m_serverAdress = new URL(sb.toString());
            m_httpCon = (HttpURLConnection)m_serverAdress.openConnection();
            m_httpCon.setRequestMethod("GET");
            m_httpCon.setDoOutput(true);
            m_httpCon.setConnectTimeout(10000);
            m_httpCon.connect();
            //m_streamWriter = new OutputStreamWriter(m_httpCon.getOutputStream());
            //m_bufferReader = new BufferedReader(new InputStreamReader(m_httpCon.getInputStream()));
            XMLReader reader = XMLReaderFactory.createXMLReader();
            XHTMLHandler handle = new XHTMLHandler();
            reader.setContentHandler(handle);
            reader.setErrorHandler(handle);
            //reader.startPrefixMapping("html", "http://www.w3.org/1999/xhtml");
            handle.startPrefixMapping("html", "http://www.w3.org/1999/xhtml");
            m_inputSrc = new InputSource(m_httpCon.getInputStream());
            reader.parse(m_inputSrc);
            m_httpCon.disconnect();
    }
    public static void main(String [] args) throws SAXException,IOException
    {
        Searching s = new Searching();
        s.SearchBing("beatles");
    }
}
this is my error message:
Exception in thread "main" java.io.IOException: Server returned HTTP response code: 503 for URL: http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd
    at sun.net.www.protocol.http.HttpURLConnection.getInputStream(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.startEntity(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.startDTDEntity(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDTDScannerImpl.setInputSource(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$DTDDriver.dispatch(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$DTDDriver.next(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$PrologDriver.next(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source)
    at Searching.SearchBing(Searching.java:57)
    at Searching.main(Searching.java:65)
can someone please help? i think it has something to do with dtd but i don't know hot to fix it