Hello everyone can you please help me I’m trying to get into a subpages with a https.request Node.js . To get Data from the page I using cheerio. I’m not using any others Modules …
const fs = require('fs');
const https = require('https');
const cheerio = require('cheerio');
let pageLink;
let siteProducts = "";
const options = {
host: 'shirts4mike.com',
path: '/shirts.php',
}
let pageLinks = []; //to save my href attributes I got from my first https.resquest
//connect with the website
const request = https.request(options, function(response){
//console.log(options);
response.on('data', function (chunk) {
const $ = cheerio.load(chunk);
$('.products li a').each( function(linkIndex) {
pageLink = $(this).attr('href');
pageLinks.push(pageLink);
});
//loop the links I save in var let pageLinks
for(var i = 0; i < pageLinks.length; i+= 1){
let pages = pageLinks[i];
/** here does not work! : */
const requestPages = https.request((options.host + pages), function(response){
response.on('data', function (chunk) {
siteProducts += chunk; console.log(siteProducts);
});
})
/** end hier */
}
});
response.on('end', function (){
console.log('no more data in response');
});
});
I just want to do another https request with the Links I have saved in the variable let pageLinks and Loop it to get the Data I want , but it does not work. I don’t know if it is possible to do a https.request into another https.request … 
Does your first request even work?
I think you may be missing a couple of cases where you should be calling request.end(); and requestPages.end();
After that, (options.host + pages) won’t be a valid URL, it’ll be something like shirts4mike.comshirts.php?shirt=101 or something (missing https:// at the start and then the intermediate '/')
Hi gebulmer! no I don’t missing anything and my first request works perfectly until this block of code …
/** here does not work! : */
const requestPages = https.request((options.host + pages), function(response){
response.on('data', function (chunk) {
siteProducts += chunk; console.log(siteProducts);
});
})
/** end hier */
You can console let pages = pageLinks[i]; and see what I already got from https://www.shirts4mike.com/shirt.php
But now the question ist : How can I do another https.request in another https.request? How can you do this block Code to work? : I added an Slash (/)
for(var i = 0; i < pageLinks.length; i+= 1){
let pages = '/' + pageLinks[i];
/**It does not work! this : */
// console.log(pages);
const requestPages = https.request(options.host + pages, function(response){
response.on('data', function (chunk) {
siteProducts += chunk; console.log(siteProducts);
});
})
/** end hier */
}
Here’s your code with the changes that I mentioned, does this work for you?
const https = require('https');
const cheerio = require('cheerio');
let pageLink;
let siteProducts = '';
const options = {
host: 'shirts4mike.com',
path: '/shirts.php',
};
let pageLinks = []; //to save my href attributes I got from my first https.resquest
//connect with the website
const request = https.request(options, function(response){
//console.log(options);
console.log('Reached inside request one');
console.log(response);
response.on('data', function (chunk) {
const $ = cheerio.load(chunk);
console.log('Reached inside the chunked response');
$('.products li a').each( function() {
pageLink = $(this).attr('href');
pageLinks.push(pageLink);
});
console.log(pageLinks);
//loop the links I save in var let pageLinks
for(var i = 0; i < pageLinks.length; i+= 1){
let pages = pageLinks[i];
/** here does not work! : */
const request2 = https.request(('https://' + options.host + '/' + pages), function(response){
response.on('data', function (chunk) {
siteProducts += chunk; console.log(siteProducts);
});
});
/** end hier */
request2.end();
}
});
response.on('end', function (){
console.log('no more data in response');
});
});
request.end();
1 Like
I should probably clarify why I think your code wasn’t working
Running the code I posted with the changes works, so assuming I’m right:
-
requestPages didn’t have requestPages.end() called, so the requests didn’t work like you think they should,
-
request got away with not having end called by existing at the end of the file
The url part was just the problem after that
Thank very much! I understand.
Do you know why by the first request it works without 'https://' ?
I just gave the options variable like this: const request = https.request(options, function(response) and it worked. But by the second request I needed this : https.request(('https://' + options.host + '/' + pages), function(response) …
I am confuse know…
Yes it does seem a bit odd to need the https there
The reason is basically in how the node https library handles the argument
If the argument is a string it tries to parse the string as a URL to create an options object, and that’s where it needs the protocol, it assumes that you’re setting a protocol yourself in the string to be parsed
Otherwise if the first argument is the options object directly it adds the default protocol to the object
It’s a bit of common pitfall I think