本文介绍了使用发布数据和xlmlhttp的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我仍在尝试学习有关抓取的更多信息,我可以设计一个代码来使我获得所需的结果.
I am still trying to learn more about scraping and I could devise a code that enables me to get the desired results.
这是代码
Sub Test() Dim e As Variant Dim ie As Object Dim ulElem As Object Dim liElem As Object Dim anchElem As Object Dim dt As Date Dim lDay As Integer Dim lMnth As Integer Dim lYear As Integer Dim r As Long Set ie = CreateObject("InternetExplorer.Application") dt = Date - 2 lDay = Day(dt) lMnth = Month(dt) lYear = Year(dt) With ie .Visible = False .Navigate "www.handelsregisterbekanntmachungen.de/?aktion=suche#Ergebnis" Do: DoEvents: Loop Until .readyState = 4 For Each e In ie.document.getElementsByTagName("select") If Len(e.innerText) = 56 Then e.selectedIndex = lDay ElseIf Len(e.innerText) = 18 Then e.selectedIndex = lMnth ElseIf Left(e.innerText, 8) = "----2000" Then e.selectedIndex = lYear - 1999 ElseIf InStr(e.innerText, "Alle Bekanntmachungen") > 0 Then e.selectedIndex = 1 End If Next e For Each e In ie.document.getElementsByTagName("input") If e.Value = "Suche starten" Then e.Click: Exit For Next e Do: DoEvents: Loop Until .readyState = 4 Application.Wait Now() + TimeValue("00:00:05") If InStr(ie.document.body.innerHTML, "Es wurden 0 Treffer gefunden.") > 0 Then MsgBox "No Results Found", vbExclamation: Exit Sub Else For Each ulElem In ie.document.getElementsByTagName("b") For Each liElem In ulElem.getElementsByTagName("li") Set anchElem = liElem.getElementsByTagName("a") If anchElem.Length > 0 Then r = r + 1 Cells(r, 1) = Mid(anchElem.Item(0).innerText, 11) End If Next liElem Next ulElem End If End With End Sub但是,为了尝试更多地了解XMLHTTP请求,我正在寻找一种获得相同结果但不使用IE的方法.因此,我认为使用XMLHTTP会更加高效,特别是在为搜索过程设置所需的选项后,我可以看到发布数据.
But as a matter of trying to learn more about XMLHTTP requests I am seeking for a way to get the same results but without using IE. so I think using XMLHTTP will be more efficient specially I could see post data after setting up the desired choices for the search process.
推荐答案看看下面的例子:
Option Explicit Sub Test() Dim sState As String Dim sCourt As String Dim dtFrom As Date Dim dtTill As Date Dim sSubject As String Dim sOrder As String Dim oStates As Object Dim oCourts As Object Dim oSubjects As Object Dim oOrders As Object Dim sStateCode As String Dim sCourtId As String Dim sSubjectVal As String Dim sOrderVal As String Dim aData ' Set query data sState = "" sCourt = "" dtFrom = DateSerial(2018, 2, 11) dtTill = DateSerial(2018, 2, 11) sSubject = "" sOrder = "Aktenzeichen" ' Retrieve options GetOptions oStates, oCourts, oSubjects, oOrders ' Validate query parameters If Not oStates.Exists(sState) Then MsgBox "State valid values:" & vbCrLf & vbCrLf & """" & Join(oStates.Keys(), """" & vbCrLf & """") & """": Exit Sub If Not oCourts(oStates(sState)).Exists(sCourt) Then MsgBox "Court valid values:" & vbCrLf & vbCrLf & """" & Join(oCourts(oStates(sState)).Keys(), """" & vbCrLf & """") & """": Exit Sub If Not oSubjects.Exists(sSubject) Then MsgBox "Subject valid values:" & vbCrLf & vbCrLf & """" & Join(oSubjects.Keys(), """" & vbCrLf & """") & """": Exit Sub If Not oOrders.Exists(sOrder) Then MsgBox "Order valid values:" & vbCrLf & vbCrLf & """" & Join(oOrders.Keys(), """" & vbCrLf & """") & """": Exit Sub ' Request data sStateCode = oStates(sState) sCourtId = oCourts(sStateCode)(sCourt) sSubjectVal = oSubjects(sSubject) sOrderVal = oOrders(sOrder) GetData sStateCode, sCourt, sCourtId, dtFrom, dtTill, sSubjectVal, sOrderVal, aData ' Rebuild nested arrays to 2d array for output aData = Denestify(aData) ' Output With ThisWorkbook.Sheets(1) .Cells.Delete Output2DArray .Cells(1, 1), aData .Columns.AutoFit End With MsgBox "Completed" End Sub Sub GetOptions(oStates As Object, oCourts As Object, oSubjects As Object, oOrders As Object) Dim sContent As String Dim aTmp0 Dim aTmp1 Dim vItem Dim oTmp Dim i As Long ' Retrieve request options from search page XmlHttpRequest "GET", "www.handelsregisterbekanntmachungen.de/?aktion=suche", "", "", "", sContent ' Get each state and code ExtractOptions sContent, "land", oStates ' Get courts with courts ids for each federal state Set oCourts = CreateObject("Scripting.Dictionary") For Each vItem In oStates.Items() ' Put courts and ids into temp dictionary Set oTmp = CreateObject("Scripting.Dictionary") If vItem <> "" Then ' Extract the whole JS array statement with courts names ParseResponse "BundeslandArray\['" & vItem & "'\]=new Array\(('[^']*'(?:,'[^']*')*)\);", sContent, aTmp0, False ' Extract each court name into temp array ParseResponse "'([^']*)'", (aTmp0(0)), aTmp0, False ' Extract the whole JS array statement with courts ids ParseResponse "BundeslandArrayId\['" & vItem & "'\]=new Array\(('[^']*'(?:,'[^']*')*)\);", sContent, aTmp1, False ' Extract each court id into temp array ParseResponse "'([^']*)'", (aTmp1(0)), aTmp1, False For i = 0 To UBound(aTmp0) oTmp(DecodeHTMLEntities((aTmp0(i)))) = DecodeHTMLEntities((aTmp1(i))) Next End If ' Add dummy item oTmp("") = "" ' Put courts-ids for the state code into dictionary Set oCourts(vItem) = oTmp Next ' Add dummy item oStates("") = "" ' Get subjects ExtractOptions sContent, "gegenstand", oSubjects ' Add dummy item oSubjects("") = "0" ' Get sort order types ExtractOptions sContent, "order", oOrders End Sub Sub GetData(sStateCode As String, sCourt As String, sCourtId As String, dtFrom As Date, dtTill As Date, sSubjectVal As String, sOrderVal As String, aData) Dim i As Long Dim oQuery As Object Dim sQuery As String Dim sContent As String ' Set query parameters Set oQuery = CreateObject("Scripting.Dictionary") With oQuery .Add "suchart", "uneingeschr" .Add "button", "Start search" .Add "land", sStateCode .Add "gericht", sCourtId .Add "gericht_name", sCourt .Add "seite", "" .Add "l", "" .Add "r", "" .Add "all", "false" .Add "vt", Day(dtFrom) .Add "vm", Month(dtFrom) .Add "vj", Year(dtFrom) .Add "bt", Day(dtTill) .Add "bm", Month(dtTill) .Add "bj", Year(dtTill) .Add "fname", "" .Add "fsitz", "" .Add "rubrik", "" .Add "az", "" .Add "gegenstand", sSubjectVal .Add "anzv", "alle" .Add "order", sOrderVal End With sQuery = EncodeQueryParams(oQuery) ' Retrieve search results XmlHttpRequest "POST", _ "www.handelsregisterbekanntmachungen.de/de/index.php?aktion=suche", _ Array( _ Array("Content-Type", "application/x-www-form-urlencoded"), _ Array("Content-Length", Len(sQuery) _ ) _ ), _ sQuery, _ "", _ sContent ' Parse response sContent = Replace(sContent, "<br>", vbCrLf) ParseResponse "<li[^>]*><a[^>]*?href=""javascript:NeuFenster\('([^']*)'\)""[^>]*>([^<]*)<ul[^>]*>([\s\S]*?)</ul>", sContent, aData, False For i = 0 To UBound(aData, 1) aData(i)(0) = "www.handelsregisterbekanntmachungen.de/en/skripte/hrb.php?" & aData(i)(0) Next End Sub Sub ExtractOptions(sContent As String, sName As String, oOptions As Object) Dim aTmp0 Dim vItem ' Extract the whole <select> for parameter ParseResponse "<select[^>]* name=" & sName & "[^>]*>[^<]*((?:<option[^>]*>[^<]*</option>[^<]*)+)[^<]*</[^>]*>", sContent, aTmp0, False ' Extract each parameter <option> ParseResponse "<option[^>]*value=(""[^""]*""|[^\s>]*)[^>]*>([^<]*)</option>", (aTmp0(0)), aTmp0, False ' Put each parameter and value into dictionary Set oOptions = CreateObject("Scripting.Dictionary") For Each vItem In aTmp0 oOptions(DecodeHTMLEntities((vItem(1)))) = DecodeHTMLEntities(Replace(vItem(0), """", "")) Next End Sub Sub XmlHttpRequest(sMethod, sUrl, aSetHeaders, sFormData, sRespHeaders, sRespText) Dim aHeader ' With CreateObject("MSXML2.ServerXMLHTTP") ' .SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS With CreateObject("MSXML2.XMLHTTP") .Open sMethod, sUrl, False If IsArray(aSetHeaders) Then For Each aHeader In aSetHeaders .SetRequestHeader aHeader(0), aHeader(1) Next End If .Send (sFormData) sRespHeaders = .GetAllResponseHeaders sRespText = .ResponseText End With End Sub Sub ParseResponse(sPattern, sResponse, aData, Optional bAppend As Boolean = True, Optional bGlobal = True, Optional bMultiLine = True, Optional bIgnoreCase = True) Dim oMatch Dim aTmp0() Dim sSubMatch If Not (IsArray(aData) And bAppend) Then aData = Array() With CreateObject("VBScript.RegExp") .Global = bGlobal .MultiLine = bMultiLine .IgnoreCase = bIgnoreCase .Pattern = sPattern For Each oMatch In .Execute(sResponse) If oMatch.SubMatches.Count = 1 Then PushItem aData, oMatch.SubMatches(0) Else aTmp0 = Array() For Each sSubMatch In oMatch.SubMatches PushItem aTmp0, sSubMatch Next PushItem aData, aTmp0 End If Next End With End Sub Sub PushItem(aData, vItem, Optional bAppend As Boolean = True) If Not (IsArray(aData) And bAppend) Then aData = Array() ReDim Preserve aData(UBound(aData) + 1) aData(UBound(aData)) = vItem End Sub Function DecodeHTMLEntities(sText As String) As String Static oHtmlfile As Object Static oDiv As Object If oHtmlfile Is Nothing Then Set oHtmlfile = CreateObject("htmlfile") oHtmlfile.Open Set oDiv = oHtmlfile.createElement("div") End If oDiv.innerHTML = sText DecodeHTMLEntities = oDiv.innerText End Function Function EncodeQueryParams(oParams As Object) As String Dim aParams Dim i As Long aParams = oParams.Keys() For i = 0 To UBound(aParams) aParams(i) = EncodeUriComponent((aParams(i))) & "=" & EncodeUriComponent((oParams(aParams(i)))) Next EncodeQueryParams = Join(aParams, "&") End Function Function EncodeUriComponent(strText As String) As String Static objHtmlfile As Object If objHtmlfile Is Nothing Then Set objHtmlfile = CreateObject("htmlfile") objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript" End If EncodeUriComponent = objHtmlfile.parentWindow.encode(strText) End Function Function Denestify(aRows) Dim aData() Dim aItems() Dim i As Long Dim j As Long If UBound(aRows) = -1 Then Exit Function ReDim aData(1 To UBound(aRows) + 1, 1 To 1) For j = 0 To UBound(aRows) If IsArray(aRows(j)) Then aItems = aRows(j) For i = 0 To UBound(aItems) If i + 1 > UBound(aData, 2) Then ReDim Preserve aData(1 To UBound(aRows) + 1, 1 To i + 1) aData(j + 1, i + 1) = aItems(i) Next Else aData(j + 1, 1) = aRows(j) End If Next Denestify = aData End Function Sub Output2DArray(oDstRng As Range, aCells As Variant) With oDstRng .Parent.Select With .Resize( _ UBound(aCells, 1) - LBound(aCells, 1) + 1, _ UBound(aCells, 2) - LBound(aCells, 2) + 1) .NumberFormat = "@" .Value = aCells End With End With End Sub更多推荐
使用发布数据和xlmlhttp
发布评论