I’m trying to scrape some products from a website,
the function scrapes the products ncluded in the first page before resulting in an error (Error: Execution context was destroyed, most likely because of a navigation.)
I’ve tried few stuff like navigating back another time after the last loop , changing the waitForFunction to wait ForNavigation , and deleting the part that updates the productCards since i’m sure they never change, each one of these attemptes resulted in the same error but after onle one product.
Logic : the function takes a subCategory page link containing cards and click on each one to get to the subSubCategory page were the product cards are , navigate to each one , scrape the data and upload it to the database.
async function scrapeProductPages(categoryPageLinks) {
await mongoose.connect(process.env.MONGO_URI, {
dbName: DBName,
useNewUrlParser: true,
useUnifiedTopology: true,
});
const browser = await puppeteer.launch({ headless: 'new'});
for (const categoryPageLink of categoryPageLinks) {
const page = await browser.newPage(); // Create a new page instance for each category page
await page.goto(categoryPageLink);
await page.waitForSelector('.elementor-heading-title a')
const subCategoryCardSelector = '.elementor-heading-title a';
const subCategoryCards = await page.$$(subCategoryCardSelector);
for (let index = 0; index < subCategoryCards.length; index++) {
const subCategoryCard = subCategoryCards[index];
await subCategoryCard.click();
await page.waitForNavigation({ waitUntil: 'networkidle0' });
await page.waitForSelector('.o-hero__content__title')
const subSubCategoryNameArray = await page.$$eval('.o-hero__content__title', elements => elements.map(el => el.textContent));
const subSubCategoryName = replaceSpacesWithHyphen(subSubCategoryNameArray[0]);
const productCardSelector = '.product-holder';
// Get the product card elements on the page
const productCards = await page.$$(productCardSelector);
for (let i = 0; i < productCards.length; i++) {
const productCard = productCards[i];
await productCard.click();
// Wait for the product page to load
await page.waitForNavigation({ waitUntil: 'networkidle0' });
await scraper(page).then(async (data) => {
data.subSubCategory = subSubCategoryName;
await uploadProduct(data).then(() => {
console.log(data.name + ' uploaded');
}).catch(err => console.log(err));
})
// Go back to the category page
await page.goBack();
// Wait for the page to load after navigating back
await page.waitForFunction('document.readyState === "complete"');
// Refresh the product cards array after navigating back
const refreshedProductCards = await page.$$(productCardSelector);
// Reset the loop index if necessary
if (refreshedProductCards.length !== productCards.length) {
i -= refreshedProductCards.length - productCards.length;
}
// Update the product cards array reference
productCards = refreshedProductCards;
}
}
await page.close(); // Close the page after scraping all product pages within a category
}
await browser.close();
}