但在浏览器控制台中工作"/>
Puppeteer 无头运行没有结果,但在浏览器控制台中工作
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const url= '.en-us.html?label=gen173nr-1FCAEoggI46AdIM1gEaLUBiAEBmAExuAEXyAEP2AEB6AEB-AECiAIBqAIDuAL_6vKbBsACAdICJDA0MjU3ZGNjLTJlOWEtNDMyYi1hNDQ2LTg0MmIwMjczYjMzYtgCBeACAQ&sid=5727a604ee8453f41f52e9d9b2d7a5c8&aid=304142&ss=Mingora&ssne=Mingora&ssne_untouched=Mingora&efdco=1&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=-2769132&dest_type=city&checkin=2022-11-27&checkout=2022-11-30&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure'
await page.goto(url);
await page.screenshot({path: 'example.png'});
const selector = "#search_results_table div > h3 > a";
const titles = await page.evaluate(() =>
Array.from(document.querySelectorAll("h3.a4225678b2 a.e13098a59f"))
.map((name) => name.textContent)
);
console.log(titles.length);
console.log(titles);
await browser.close();
})();
这是我想用来抓取特定位置酒店名称的简单代码。当我在 Google Chrome 浏览器的控制台中运行查询时,查询工作正常,但当我使用 Puppeteer 在 Node.js 中运行它时,它返回一个空数组。我做错了什么?
回答如下:似乎 booking 正在阻止您。我强烈建议您将 Puppeteer 与
puppeteer-extra
和 puppeteer-extra-plugin-stealth
包一起使用,以防止网站检测到您正在使用无头 Chromium 或您正在使用网络驱动程序。而且您的选择器似乎不稳定:
const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
const { executablePath } = require("puppeteer");
puppeteer.use(StealthPlugin());
(async () => {
const browser = await puppeteer.launch({
headless: true,
args: ["--no-sandbox", "--disable-setuid-sandbox", "--window-size=1600,900", "--single-process"],
executablePath: executablePath(),
});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 720,
});
const url= 'https://www.booking/searchresults.en-us.html?label=gen173nr-1FCAEoggI46AdIM1gEaLUBiAEBmAExuAEXyAEP2AEB6AEB-AECiAIBqAIDuAL_6vKbBsACAdICJDA0MjU3ZGNjLTJlOWEtNDMyYi1hNDQ2LTg0MmIwMjczYjMzYtgCBeACAQ&sid=5727a604ee8453f41f52e9d9b2d7a5c8&aid=304142&ss=Mingora&ssne=Mingora&ssne_untouched=Mingora&efdco=1&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=-2769132&dest_type=city&checkin=2022-11-27&checkout=2022-11-30&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure'
await page.goto(url);
await page.waitForSelector('[data-testid="property-card"]');
await page.screenshot({path: 'example.png'});
const titles = await page.evaluate(() =>
Array.from(document.querySelectorAll('[data-testid="property-card"]'))
.map((el) => el.querySelector('h3 [data-testid="title"]').textContent)
);
console.log(titles.length);
console.log(titles);
await browser.close();
})();
输出:
25
[
'Swat Continental Hotel',
'Swat Continental Hotel',
'Rock City Resort',
'SWAT GUEST HOUSE',
'Grand Holiday Hotel',
'Swat Hills Hotel',
'Sun Rise Hotel',
'GB Ghizer Hotel',
'Gilgit Hunza Hotel',
'Sarhad Hotel',
'Hotel Five Star & Restaurant',
'Hotel Swat Gateway',
'Pameer Hotel',
'Hotel Gulf And Restaurant',
'Tour De Swat',
'Hotel Tour De Swat',
'LOKAL Rooms x Swat River',
'River Palace Hotel',
'Hotel One Swat',
'Mingora Bypass Hotel',
'Lahore Hotel',
'Mingora Bypass Hotel',
'Swat Palace Hotel By Northin',
'Hotel Swat Regency',
'Hotel Hill City Mingora Swat'
]
作为替代方案,您可以使用 hotels-scraper-js 库。那么你的代码将是:
import { booking } from "hotels-scraper-js";
booking.getHotels(undefined, undefined, undefined, "Mingora").then((results) => {
console.log(results.map((el) => el.title));
});
输出:
[
'Swat Continental Hotel',
'Swat Continental Hotel',
'SWAT GUEST HOUSE',
'Rock City Resort',
'Grand Holiday Hotel',
'Swat Hills Hotel',
'Pameer Hotel',
'Gilgit Hunza Hotel',
'GB Ghizer Hotel',
'Hotel Five Star & Restaurant',
'Hotel Swat Gateway',
'Sarhad Hotel',
'Sun Rise Hotel',
'Hotel Gulf And Restaurant',
'Tour De Swat',
'Hotel Tour De Swat',
'LOKAL Rooms x Swat River',
'Mingora Bypass Hotel',
'Hotel One Swat',
'Mingora Bypass Hotel',
'Swat Palace Hotel By Northin',
'Hotel Swat Regency',
'Swat Hilton Hotel',
'Hotel Hill City Mingora Swat',
'Hotel Hilton Palace',
'The Rose Palace Swat',
'Punjab Hotel',
'Bypass Hotel',
'Galiyat Hills Hotel',
'Ruby Hotel',
'River Palace Hotel',
'Riverside Hotel',
'Rahat Luxury Guest House by LMC',
'Lahore Hotel',
'Dubai Hotel'
]
更多推荐
Puppeteer 无头运行没有结果,但在浏览器控制台中工作
发布评论