Hello,
I am trying to scrape cracked.com with Cheerio. I don’t know why but when I try to place the text inside my a tags there are some links that will go inside the anchor tags correctly and some that will go after anchor tag.
Here is my Jquery code
$( document ).ready(function() {
// Grab the articles as a json
$.getJSON("/articles", function(data) {
console.log(data)
for (var i = 0; i < data.length; i++) {
// Display the apropos information on the page
$(".content-wrapper").append(
"<div class='content-item' ><a data-id= " + data[i]._id + ' href= '+ data[i].link + ">" + data[i].title + "</a></div>");
}
});
});
Server side code with Cheerio
app.get("/scrape", function (req, res) {
axios.get("https://www.cracked.com/").then(function (response) {
var $ = cheerio.load(response.data);
$('.content-cards-wrapper').each(function (i, element) {
var result = {};
// Grabs the title, image and link from cracked
var title = $(element).children().children().children().attr('title')
var link = $(element).children().attr('href')
var image = $(element).children().attr("data-original");
// Save these results in an object that we'll push into the results array we defined earlier
if (link !== undefined && image !== undefined && title !== undefined) {
result.title = title,
result.link = link,
result.image = image
}
db.Article.create(result)
.then(function (dbArticle) {
// View the added result in the console
console.log(dbArticle);
})
.catch(function (err) {
// If an error occurred, log it
console.log(err);
});
});
res.send("Scrape Complete");
})
})
The site I am trying to scrape https://www.cracked.com
Below is what I am seeing in the inspector.
/