使用发布数据和xlmlhttp

编程入门 行业动态 更新时间:2024-10-27 20:39:20
本文介绍了使用发布数据和xlmlhttp的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧! 问题描述

我仍在尝试学习有关抓取的更多信息,我可以设计一个代码来使我获得所需的结果.

I am still trying to learn more about scraping and I could devise a code that enables me to get the desired results.

这是代码

Sub Test() Dim e As Variant Dim ie As Object Dim ulElem As Object Dim liElem As Object Dim anchElem As Object Dim dt As Date Dim lDay As Integer Dim lMnth As Integer Dim lYear As Integer Dim r As Long Set ie = CreateObject("InternetExplorer.Application") dt = Date - 2 lDay = Day(dt) lMnth = Month(dt) lYear = Year(dt) With ie .Visible = False .Navigate "www.handelsregisterbekanntmachungen.de/?aktion=suche#Ergebnis" Do: DoEvents: Loop Until .readyState = 4 For Each e In ie.document.getElementsByTagName("select") If Len(e.innerText) = 56 Then e.selectedIndex = lDay ElseIf Len(e.innerText) = 18 Then e.selectedIndex = lMnth ElseIf Left(e.innerText, 8) = "----2000" Then e.selectedIndex = lYear - 1999 ElseIf InStr(e.innerText, "Alle Bekanntmachungen") > 0 Then e.selectedIndex = 1 End If Next e For Each e In ie.document.getElementsByTagName("input") If e.Value = "Suche starten" Then e.Click: Exit For Next e Do: DoEvents: Loop Until .readyState = 4 Application.Wait Now() + TimeValue("00:00:05") If InStr(ie.document.body.innerHTML, "Es wurden 0 Treffer gefunden.") > 0 Then MsgBox "No Results Found", vbExclamation: Exit Sub Else For Each ulElem In ie.document.getElementsByTagName("b") For Each liElem In ulElem.getElementsByTagName("li") Set anchElem = liElem.getElementsByTagName("a") If anchElem.Length > 0 Then r = r + 1 Cells(r, 1) = Mid(anchElem.Item(0).innerText, 11) End If Next liElem Next ulElem End If End With End Sub

但是,为了尝试更多地了解XMLHTTP请求,我正在寻找一种获得相同结果但不使用IE的方法.因此,我认为使用XMLHTTP会更加高效,特别是在为搜索过程设置所需的选项后,我可以看到发布数据.

But as a matter of trying to learn more about XMLHTTP requests I am seeking for a way to get the same results but without using IE. so I think using XMLHTTP will be more efficient specially I could see post data after setting up the desired choices for the search process.

推荐答案

看看下面的例子:

Option Explicit Sub Test() Dim sState As String Dim sCourt As String Dim dtFrom As Date Dim dtTill As Date Dim sSubject As String Dim sOrder As String Dim oStates As Object Dim oCourts As Object Dim oSubjects As Object Dim oOrders As Object Dim sStateCode As String Dim sCourtId As String Dim sSubjectVal As String Dim sOrderVal As String Dim aData ' Set query data sState = "" sCourt = "" dtFrom = DateSerial(2018, 2, 11) dtTill = DateSerial(2018, 2, 11) sSubject = "" sOrder = "Aktenzeichen" ' Retrieve options GetOptions oStates, oCourts, oSubjects, oOrders ' Validate query parameters If Not oStates.Exists(sState) Then MsgBox "State valid values:" & vbCrLf & vbCrLf & """" & Join(oStates.Keys(), """" & vbCrLf & """") & """": Exit Sub If Not oCourts(oStates(sState)).Exists(sCourt) Then MsgBox "Court valid values:" & vbCrLf & vbCrLf & """" & Join(oCourts(oStates(sState)).Keys(), """" & vbCrLf & """") & """": Exit Sub If Not oSubjects.Exists(sSubject) Then MsgBox "Subject valid values:" & vbCrLf & vbCrLf & """" & Join(oSubjects.Keys(), """" & vbCrLf & """") & """": Exit Sub If Not oOrders.Exists(sOrder) Then MsgBox "Order valid values:" & vbCrLf & vbCrLf & """" & Join(oOrders.Keys(), """" & vbCrLf & """") & """": Exit Sub ' Request data sStateCode = oStates(sState) sCourtId = oCourts(sStateCode)(sCourt) sSubjectVal = oSubjects(sSubject) sOrderVal = oOrders(sOrder) GetData sStateCode, sCourt, sCourtId, dtFrom, dtTill, sSubjectVal, sOrderVal, aData ' Rebuild nested arrays to 2d array for output aData = Denestify(aData) ' Output With ThisWorkbook.Sheets(1) .Cells.Delete Output2DArray .Cells(1, 1), aData .Columns.AutoFit End With MsgBox "Completed" End Sub Sub GetOptions(oStates As Object, oCourts As Object, oSubjects As Object, oOrders As Object) Dim sContent As String Dim aTmp0 Dim aTmp1 Dim vItem Dim oTmp Dim i As Long ' Retrieve request options from search page XmlHttpRequest "GET", "www.handelsregisterbekanntmachungen.de/?aktion=suche", "", "", "", sContent ' Get each state and code ExtractOptions sContent, "land", oStates ' Get courts with courts ids for each federal state Set oCourts = CreateObject("Scripting.Dictionary") For Each vItem In oStates.Items() ' Put courts and ids into temp dictionary Set oTmp = CreateObject("Scripting.Dictionary") If vItem <> "" Then ' Extract the whole JS array statement with courts names ParseResponse "BundeslandArray\['" & vItem & "'\]=new Array\(('[^']*'(?:,'[^']*')*)\);", sContent, aTmp0, False ' Extract each court name into temp array ParseResponse "'([^']*)'", (aTmp0(0)), aTmp0, False ' Extract the whole JS array statement with courts ids ParseResponse "BundeslandArrayId\['" & vItem & "'\]=new Array\(('[^']*'(?:,'[^']*')*)\);", sContent, aTmp1, False ' Extract each court id into temp array ParseResponse "'([^']*)'", (aTmp1(0)), aTmp1, False For i = 0 To UBound(aTmp0) oTmp(DecodeHTMLEntities((aTmp0(i)))) = DecodeHTMLEntities((aTmp1(i))) Next End If ' Add dummy item oTmp("") = "" ' Put courts-ids for the state code into dictionary Set oCourts(vItem) = oTmp Next ' Add dummy item oStates("") = "" ' Get subjects ExtractOptions sContent, "gegenstand", oSubjects ' Add dummy item oSubjects("") = "0" ' Get sort order types ExtractOptions sContent, "order", oOrders End Sub Sub GetData(sStateCode As String, sCourt As String, sCourtId As String, dtFrom As Date, dtTill As Date, sSubjectVal As String, sOrderVal As String, aData) Dim i As Long Dim oQuery As Object Dim sQuery As String Dim sContent As String ' Set query parameters Set oQuery = CreateObject("Scripting.Dictionary") With oQuery .Add "suchart", "uneingeschr" .Add "button", "Start search" .Add "land", sStateCode .Add "gericht", sCourtId .Add "gericht_name", sCourt .Add "seite", "" .Add "l", "" .Add "r", "" .Add "all", "false" .Add "vt", Day(dtFrom) .Add "vm", Month(dtFrom) .Add "vj", Year(dtFrom) .Add "bt", Day(dtTill) .Add "bm", Month(dtTill) .Add "bj", Year(dtTill) .Add "fname", "" .Add "fsitz", "" .Add "rubrik", "" .Add "az", "" .Add "gegenstand", sSubjectVal .Add "anzv", "alle" .Add "order", sOrderVal End With sQuery = EncodeQueryParams(oQuery) ' Retrieve search results XmlHttpRequest "POST", _ "www.handelsregisterbekanntmachungen.de/de/index.php?aktion=suche", _ Array( _ Array("Content-Type", "application/x-www-form-urlencoded"), _ Array("Content-Length", Len(sQuery) _ ) _ ), _ sQuery, _ "", _ sContent ' Parse response sContent = Replace(sContent, "<br>", vbCrLf) ParseResponse "<li[^>]*><a[^>]*?href=""javascript:NeuFenster\('([^']*)'\)""[^>]*>([^<]*)<ul[^>]*>([\s\S]*?)</ul>", sContent, aData, False For i = 0 To UBound(aData, 1) aData(i)(0) = "www.handelsregisterbekanntmachungen.de/en/skripte/hrb.php?" & aData(i)(0) Next End Sub Sub ExtractOptions(sContent As String, sName As String, oOptions As Object) Dim aTmp0 Dim vItem ' Extract the whole <select> for parameter ParseResponse "<select[^>]* name=" & sName & "[^>]*>[^<]*((?:<option[^>]*>[^<]*</option>[^<]*)+)[^<]*</[^>]*>", sContent, aTmp0, False ' Extract each parameter <option> ParseResponse "<option[^>]*value=(""[^""]*""|[^\s>]*)[^>]*>([^<]*)</option>", (aTmp0(0)), aTmp0, False ' Put each parameter and value into dictionary Set oOptions = CreateObject("Scripting.Dictionary") For Each vItem In aTmp0 oOptions(DecodeHTMLEntities((vItem(1)))) = DecodeHTMLEntities(Replace(vItem(0), """", "")) Next End Sub Sub XmlHttpRequest(sMethod, sUrl, aSetHeaders, sFormData, sRespHeaders, sRespText) Dim aHeader ' With CreateObject("MSXML2.ServerXMLHTTP") ' .SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS With CreateObject("MSXML2.XMLHTTP") .Open sMethod, sUrl, False If IsArray(aSetHeaders) Then For Each aHeader In aSetHeaders .SetRequestHeader aHeader(0), aHeader(1) Next End If .Send (sFormData) sRespHeaders = .GetAllResponseHeaders sRespText = .ResponseText End With End Sub Sub ParseResponse(sPattern, sResponse, aData, Optional bAppend As Boolean = True, Optional bGlobal = True, Optional bMultiLine = True, Optional bIgnoreCase = True) Dim oMatch Dim aTmp0() Dim sSubMatch If Not (IsArray(aData) And bAppend) Then aData = Array() With CreateObject("VBScript.RegExp") .Global = bGlobal .MultiLine = bMultiLine .IgnoreCase = bIgnoreCase .Pattern = sPattern For Each oMatch In .Execute(sResponse) If oMatch.SubMatches.Count = 1 Then PushItem aData, oMatch.SubMatches(0) Else aTmp0 = Array() For Each sSubMatch In oMatch.SubMatches PushItem aTmp0, sSubMatch Next PushItem aData, aTmp0 End If Next End With End Sub Sub PushItem(aData, vItem, Optional bAppend As Boolean = True) If Not (IsArray(aData) And bAppend) Then aData = Array() ReDim Preserve aData(UBound(aData) + 1) aData(UBound(aData)) = vItem End Sub Function DecodeHTMLEntities(sText As String) As String Static oHtmlfile As Object Static oDiv As Object If oHtmlfile Is Nothing Then Set oHtmlfile = CreateObject("htmlfile") oHtmlfile.Open Set oDiv = oHtmlfile.createElement("div") End If oDiv.innerHTML = sText DecodeHTMLEntities = oDiv.innerText End Function Function EncodeQueryParams(oParams As Object) As String Dim aParams Dim i As Long aParams = oParams.Keys() For i = 0 To UBound(aParams) aParams(i) = EncodeUriComponent((aParams(i))) & "=" & EncodeUriComponent((oParams(aParams(i)))) Next EncodeQueryParams = Join(aParams, "&") End Function Function EncodeUriComponent(strText As String) As String Static objHtmlfile As Object If objHtmlfile Is Nothing Then Set objHtmlfile = CreateObject("htmlfile") objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript" End If EncodeUriComponent = objHtmlfile.parentWindow.encode(strText) End Function Function Denestify(aRows) Dim aData() Dim aItems() Dim i As Long Dim j As Long If UBound(aRows) = -1 Then Exit Function ReDim aData(1 To UBound(aRows) + 1, 1 To 1) For j = 0 To UBound(aRows) If IsArray(aRows(j)) Then aItems = aRows(j) For i = 0 To UBound(aItems) If i + 1 > UBound(aData, 2) Then ReDim Preserve aData(1 To UBound(aRows) + 1, 1 To i + 1) aData(j + 1, i + 1) = aItems(i) Next Else aData(j + 1, 1) = aRows(j) End If Next Denestify = aData End Function Sub Output2DArray(oDstRng As Range, aCells As Variant) With oDstRng .Parent.Select With .Resize( _ UBound(aCells, 1) - LBound(aCells, 1) + 1, _ UBound(aCells, 2) - LBound(aCells, 2) + 1) .NumberFormat = "@" .Value = aCells End With End With End Sub

更多推荐

使用发布数据和xlmlhttp

本文发布于:2023-10-11 04:44:30,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1480718.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:数据   xlmlhttp

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!