i need some help with my vb.net codes..plzz

Posted by akmalizhar on Stack Overflow See other posts from Stack Overflow or by akmalizhar
Published on 2010-06-01T08:38:35Z Indexed on 2010/06/01 8:43 UTC
Read the original article Hit count: 217

Filed under:
|

currently i need to develop an application that can exctract information from few website.. this is what i have done up until now..

Imports System Imports System.Text.RegularExpressions Imports System.IO Imports System.Net Imports System.Web Imports System.Data.SqlClient Imports System.Threading Imports System.Data.DataSet Imports System.Data.OleDb

Module module1 Dim url As String

Dim hotelName As String = ""

Sub Main()

    Dim url As String = ""


    Console.Write("enter url: ")
    url = Console.ReadLine()

    extractor(url)

End Sub
Public Sub extractor(ByVal url As String)

    Dim strConn As String = "Data Source = localhost; Initial Catalog = knowledgeBase; Integrated Security = True; Connection Timeout = 0;"
    Dim conn As SqlConnection = New SqlConnection(strConn)
    conn.Open()

    Dim strSQL1 As String
    Dim matchStn1 As String = ""
    Dim matchstn2 As String = ""
    Dim matchstn3 As String = ""
    Dim matchstn4 As String = ""
    Dim matchstn5 As String = ""
    Dim matchstn6 As String = ""
    Dim matchstn7 As String = ""
    Dim matchstn8 As String = ""
    Dim matchstn9 As String = ""
    Dim matchstn10 As String = ""


    Dim objRequest As WebRequest = HttpWebRequest.Create(url)
    Dim objResponse As WebResponse = objRequest.GetResponse()
    Dim objStreamReader As New StreamReader(objResponse.GetResponseStream())
    Dim strpage As String = objStreamReader.ReadToEnd

    Dim RegExStr As String = "<[^>]*>"
    Dim R As New Regex(RegExStr)


    Dim sourcestring As String = strpage
    Dim re As Regex = New Regex("<h2 class=""name hotel""[^>]*>[\s\S]+?</h2>")
    Dim mc As MatchCollection = re.Matches(sourcestring)
    Dim mIdx As Integer = 0
    For Each m As Match In mc
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchStn1 = m.Groups(groupIdx).Value
            matchStn1 = R.Replace(matchStn1, " ")
            matchStn1 = matchStn1.Trim()

        Next
        mIdx = mIdx + 1

    Next

    Dim re9 As Regex = New Regex("<li class=""cuisine""[^>]*>[^>]+</li>")
    Dim mc9 As MatchCollection = re9.Matches(sourcestring)
    Dim mIdx9 As Integer = 0
    For Each m As Match In mc9
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchstn9 = m.Groups(groupIdx).Value
            matchstn9 = R.Replace(matchstn9, " ")
            matchstn9 = matchstn9.Trim()

        Next
        mIdx = mIdx + 1

    Next

    Dim re2 As Regex = New Regex("<span class=""street-address""[^>]*>[^>]+</span>")
    Dim mc2 As MatchCollection = re2.Matches(sourcestring)
    Dim mIdx2 As Integer = 0
    For Each m As Match In mc2
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchstn2 = m.Groups(groupIdx).Value
            matchstn2 = R.Replace(matchstn2, " ")
            matchstn2 = matchstn2.Trim()


        Next
        mIdx2 = mIdx2 + 1

    Next

    Dim re3 As Regex = New Regex("<span class=""locality""[^>]*>[\s\S]+?</span>")
    Dim mc3 As MatchCollection = re3.Matches(sourcestring)
    Dim mIdx3 As Integer = 0
    For Each m As Match In mc3
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchstn3 = m.Groups(groupIdx).Value
            matchstn3 = R.Replace(matchstn3, " ")
            matchstn3 = matchstn3.Trim()

        Next
        mIdx3 = mIdx3 + 1
    Next

    Dim re4 As Regex = New Regex("<span property=""v:postal-code""[^>]*>[\s\S]+?</span>")
    Dim mc4 As MatchCollection = re4.Matches(sourcestring)
    Dim mIdx4 As Integer = 0
    For Each m As Match In mc4
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchstn4 = m.Groups(groupIdx).Value
            matchstn4 = R.Replace(matchstn4, " ")
            matchstn4 = matchstn4.Trim()

        Next
        mIdx4 = mIdx4 + 1
    Next


    Dim re5 As Regex = New Regex("<span class=""country-name""[^>]*>[\s\S]+?</span>")
    Dim mc5 As MatchCollection = re5.Matches(sourcestring)
    Dim mIdx5 As Integer = 0
    For Each m As Match In mc5
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchstn5 = m.Groups(groupIdx).Value
            matchstn5 = R.Replace(matchstn5, " ")
            matchstn5 = matchstn5.Trim()

        Next
        mIdx5 = mIdx5 + 1
    Next

    Dim re10 As Regex = New Regex("<address class=""adr""[^>]*>[\s\S]+?</address>")
    Dim mc10 As MatchCollection = re10.Matches(sourcestring)
    Dim mIdx10 As Integer = 0
    For Each m As Match In mc10
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchstn10 = m.Groups(groupIdx).Value
            matchstn10 = R.Replace(matchstn10, " ")
            matchstn10 = matchstn10.Trim()


            strSQL1 = "insert into infoRestaurant (nameRestaurant, cuisine, streetAddress, locality, postalCode, countryName, addressFull, tel, attractionType) values (N" & _
                                        FormatSqlParam(matchStn1) & ",N" & _
                                        FormatSqlParam(matchstn9) & ",N" & _
                                        FormatSqlParam(matchstn2) & ",N" & _
                                        FormatSqlParam(matchstn3) & ",N" & _
                                        FormatSqlParam(matchstn4) & ",N" & _
                                        FormatSqlParam(matchstn5) & ",N" & _
                                        FormatSqlParam(matchstn10) & ",N" & _
                                        FormatSqlParam(matchstn6) & ",N" & _
                                        FormatSqlParam(matchstn7) & ")"

            Dim objCommand1 As New SqlCommand(strSQL1, conn)
            objCommand1.ExecuteNonQuery()


        Next
        mIdx4 = mIdx4 + 1
    Next




    Dim re6 As Regex = New Regex("<span class=""tel""[^>]*>[\s\S]+?</span>")
    Dim mc6 As MatchCollection = re6.Matches(sourcestring)
    Dim mIdx6 As Integer = 0
    For Each m As Match In mc6
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchstn6 = m.Groups(groupIdx).Value
            matchstn6 = R.Replace(matchstn6, " ")
            matchstn6 = matchstn6.Trim()

        Next
        mIdx6 = mIdx6 + 1
    Next

    Dim re7 As Regex = New Regex("<div><b>Attraction type:[^>]*>[\s\S]+?</div>")
    Dim mc7 As MatchCollection = re7.Matches(sourcestring)
    Dim mIdx7 As Integer = 0
    For Each m As Match In mc7
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchstn7 = m.Groups(groupIdx).Value
            matchstn7 = R.Replace(matchstn7, " ")
            matchstn7 = matchstn7.Trim()




        Next
        mIdx7 = mIdx7 + 1

    Next

    Dim re8 As Regex = New Regex("(?=<p id).*(?<=</p>)")
    Dim mc8 As MatchCollection = re8.Matches(sourcestring)
    Dim mIdx8 As Integer = 0
    For Each m As Match In mc8
        For groupIdx As Integer = 0 To m.Groups.Count - 1
            matchstn8 = m.Groups(groupIdx).Value
            matchstn8 = R.Replace(matchstn8, " ")
            matchstn8 = matchstn8.Trim()


            Dim strSQL2 As String = "insert into feedBackRestaurant (feedBackView) values(N" + FormatSqlParam(matchstn8) + ")"
            Dim objCommand2 As New SqlCommand(strSQL2, conn)
            objCommand2.ExecuteNonQuery()


        Next
        mIdx8 = mIdx8 + 1
    Next


    objStreamReader.Close()
    conn.Close()
End Sub


Public Function FormatSqlParam(ByVal strParam As String) As String

    Dim newParamFormat As String

    If strParam = String.Empty Then
        newParamFormat = "'" & "NA" & "'"
    Else
        newParamFormat = strParam.Trim()
        newParamFormat = "'" & newParamFormat.Replace("'", "''") & "'"
    End If

    Return newParamFormat

End Function

End Module

---problems--

problem that i face are 1. the database foreign key is not working here..someone told me that need some codes to be added..but i dunno how. 2. the data repeats as i run the application. i guest it require update database function.but i hv no idea how. 3. i have to add in multithreading function as well..and last, how to make my application is flexible eventhough the HTML code changes..can anyone help me??plzzz

website that i need to extract is http://www.tripadvisor.com/Tourism-g293951-Malaysia-Vacations.html i need the information about hotel, restaurant and attraction place..plzz..i need some help here..

© Stack Overflow or respective owner

Related posts about vb.net

Related posts about multithreading