The function imgArray returns just one image and not an array. That is, the third column of the spreadsheet which is img lists just one image. Here is my code:
const rp = require('request-promise');
const otcsv = require('objects-to-csv');
const cheerio = require('cheerio');
const baseURL = 'https://www.example.com';
const getCategories = async () => {
const html = await rp(baseURL);
const imgArray = () => {
cheerio('td.productListing-data > a > img', html).each((i, image) => {
img = cheerio(image).attr('src');
})};
imgArray();
const businessMap = cheerio('.category', html).map(async (i, e) => {
const link = e.attribs.href;
const innerHtml = await rp(link);
const cat = e.children[0].data;
return {
link,
cat,
img,
}
}).get();
return Promise.all(businessMap);
};
getCategories()
.then(result => {
const transformed = new otcsv(result);
return transformed.toDisk('./spreadsheets/output.csv');
})
.then(() => console.log('SUCCESSFULLY COMPLETED THE WEB SCRAPING SAMPLE'));
Well, there isn’t too much context in the other thread, but if you want to merge them that’s fine I guess. I have never merged threads so I don’t really know how to do it.
Well, that didn’t really do what I wanted it to. @camperextraordinaire can you control how the merge happens?
A part of my view source looks like this:
<td align="center" valign="top" class="productListing-data" style="position:relative;padding-bottom: 5px;" width="25%">
<a style="position:relative;float:left;" href="product_info.php?products_id=474302&kind=2&cPath=172_93_96&description=
3PCS---Round-Metal-Link-Chain-Layered-Anklets-">
<img src="images/20200131/thumb/AK0065-@GDXX@3P-03H-75_3L@474302@350@01@200.jpg" title="
3PCS - Round Metal Link Chain Layered Anklets " width="200" border="0" height="200" alt="
3PCS - Round Metal Link Chain Layered Anklets ">
<td align="center" valign="top" class="productListing-data" style="position:relative;padding-bottom: 5px;" width="25%">
<a style="position:relative;float:left;" href="product_info.php?products_id=474303&kind=2&cPath=172_93_96&description=
3PCS---Round-Metal-Link-Chain-Layered-Anklets-">
<img src="images/20200131/thumb/AK0065-@RHXX@3P-03H-75_3L@474303@350@01@200.jpg" title="
3PCS - Round Metal Link Chain Layered Anklets " width="200" border="0" height="200" alt="
3PCS - Round Metal Link Chain Layered Anklets ">
<td align="center" valign="top" class="productListing-data" style="position:relative;padding-bottom: 5px;" width="25%">
<a style="position:relative;float:left;" href="product_info.php?products_id=479684&kind=2&cPath=172_93_96&description=Faceted-Bead-Pearl-Link-Anklet">
<img src="images/20200312/thumb/AK0015-@GD-NMLT2@02H-9_3L@479684@225@01@200.jpg" title="Faceted Bead Pearl Link Anklet" width="200" border="0" height="200" alt="Faceted Bead Pearl Link Anklet"><span class="small_cart" ></span></a><a href="product_info.php?products_id=479684&kind=2&cPath=172_93_96&description=Faceted-Bead-Pearl-Link-Anklet"><span style="display:-webkit-inline-box">479684</span><br /><a href="product_info.php?products_id=479684&kind=2&cPath=172_93_96&description=Faceted-Bead-Pearl-Link-Anklet"><font style="display: block;height:40px;text-transform: uppercase;" title="Faceted Bead Pearl Link Anklet">Faceted Bead Pearl Link Anklet</font></a> <a href="https://www.wonatrading.com/login">Login for Price</a> </td>
My node code looks like this:
const rp = require('request-promise');
const $ = require('cheerio');
const url = 'https://www.example.com';
rp(url)
.then(function(html) {
console.log($('td.productListing-data > a > img', html).attr('src'));
})
.catch(function(err) {
//handle error
});
When I execute the file, I get just one image when I should get all three:
C:\Users\Maureen\Desktop\scraper>node scraper.js
images/20200312/thumb/AK0015-@GD-NMLT2@02H-9_3L@479684@225@01@200.jpg
@makamo66 I merged your old thread into this one to give some context. Sorry for any confusion it may have caused.