I have 4 lists mainly fabrics len --> 24
, design_Name len --> 84
, creator_Name len --> 84
and results len --> 2106
.I want to map those values with each other in a specific manner. What I am looking for is something like this.
Expected Output:
{('Catching Fireflies', 'thestorysmith'): {'fabric_name_00': 'FABRIC_PETAL_SIGNATURE_COTTON', 'test_swatch_meter_00': 1.75, 'fat_quarter_meter_00': 10.58, 'meter_00': 18.22, 'fabric_name_01': 'FABRIC_SATIN', 'test_swatch_meter_01': 1.75, 'fat_quarter_meter_01': 11.85, 'meter_01': 19.71, 'fabric_name_02': 'FABRIC_COTTON_POPLIN_BRAVA', 'test_swatch_meter_02': 1.75, 'fat_quarter_meter_02': 11.85, 'meter_02': 19.71, 'fabric_name_03': 'FABRIC_PERFORMANCE_PIQUE', 'test_swatch_meter_03': 1.75, 'fat_quarter_meter_03': 12.9, 'meter_03': 22.65, 'fabric_name_04': 'FABRIC_CHIFFON', 'test_swatch_meter_04': 1.75, 'fat_quarter_meter_04': 12.9, 'meter_04': 22.65, 'fabric_name_05': 'FABRIC_ORGANIC_SWEET_PEA_GAUZE', 'test_swatch_meter_05': 1.75, 'fat_quarter_meter_05': 12.9, 'meter_05': 22.65, 'fabric_name_06': 'FABRIC_POLY_CREPE_DE_CHINE', 'test_swatch_meter_06': 1.75, 'fat_quarter_meter_06': 12.9, 'meter_06': 22.65, 'fabric_name_07': 'FABRIC_COTTON_LAWN_APPAREL', 'test_swatch_meter_07': 1.75, 'fat_quarter_meter_07': 11.75, 'meter_07': 21.66, 'fabric_name_08': 'FABRIC_LIGHTWEIGHT_COTTON_TWILL', 'test_swatch_meter_08': 1.75, 'fat_quarter_meter_08': 12.9, 'meter_08': 25.63, 'fabric_name_09': 'FABRIC_MODERN_JERSEY', 'test_swatch_meter_09': 1.75, 'fat_quarter_meter_09': 13.29, 'meter_09': 26.18, 'fabric_name_10': 'FABRIC_COTTON_SPANDEX_JERSEY', 'test_swatch_meter_10': 1.75, 'fat_quarter_meter_10': 13.57, 'meter_10': 26.42, 'fabric_name_11': 'FABRIC_ORGANIC_COTTON_SATEEN', 'test_swatch_meter_11': 1.75, 'fat_quarter_meter_11': 13.83, 'meter_11': 26.67, 'fabric_name_12': 'FABRIC_LINEN_COTTON_CANVAS', 'test_swatch_meter_12': 1.75, 'fat_quarter_meter_12': 14.79, 'meter_12': 27.59, 'fabric_name_13': 'FABRIC_ORGANIC_COTTON_KNIT_PRIMA', 'test_swatch_meter_13': 1.75, 'fat_quarter_meter_13': 13.83, 'meter_13': 26.67, 'fabric_name_14': 'FABRIC_FLEECE', 'test_swatch_meter_14': 1.75, 'fat_quarter_meter_14': 14.79, 'meter_14': 27.59, 'fabric_name_15': 'FABRIC_MINKY', 'test_swatch_meter_15': 1.75, 'fat_quarter_meter_15': 15.29, 'meter_15': 28.15, 'fabric_name_16': 'FABRIC_DOGWOOD_DENIM', 'test_swatch_meter_16': 1.75, 'fat_quarter_meter_16': 15.83, 'meter_16': 29.54, 'fabric_name_17': 'FABRIC_PERFORMANCE_LINEN', 'test_swatch_meter_17': 1.75, 'fat_quarter_meter_17': 16.8, 'meter_17': 31.58, 'fabric_name_18': 'FABRIC_RECYCLED_CANVAS', 'test_swatch_meter_18': 1.75, 'fat_quarter_meter_18': 16.8, 'meter_18': 31.58, 'fabric_name_19': 'FABRIC_SPORT_LYCRA', 'test_swatch_meter_19': 1.75, 'fat_quarter_meter_19': 16.8, 'meter_19': 31.58, 'fabric_name_20': 'FABRIC_CYPRESS_COTTON_BRAVA', 'test_swatch_meter_20': 1.75, 'fat_quarter_meter_20': 17.23, 'meter_20': 32.48, 'fabric_name_21': 'FABRIC_CELOSIA_VELVET', 'test_swatch_meter_21': 1.75, 'fat_quarter_meter_21': 18.7, 'meter_21': 35.46, 'fabric_name_22': 'FABRIC_PERFORMANCE_VELVET', 'test_swatch_meter_22': 1.75, 'fat_quarter_meter_22': 21.5, 'meter_22': 41.08, 'fabric_name_23': 'FABRIC_BELGIAN_LINEN', 'test_swatch_meter_23': 1.75, 'fat_quarter_meter_23': 29.54, 'meter_23': 58.17}}
I’ve tried so far:
import asyncio
import aiohttp
import json
import requests
from bs4 import BeautifulSoup
import pandas as pd
from CreateDict import CreateDict
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5',
'X-Spoonflower-Window-UUID': 'a9bc37a2-9eb2-4a1e-8ea1-fcee89347364',
'Content-Type': 'application/json',
'Origin': 'https://www.spoonflower.com',
'Connection': 'keep-alive',
'Referer': 'https://www.spoonflower.com/',
'Sec-GPC': '1',
'If-None-Match': 'W/95d6572c326b81ce98c7ae27ac449d42',
'TE': 'Trailers',
}
def get_fabric_names():
res = requests.get('https://www.spoonflower.com/spoonflower_fabrics')
soup = BeautifulSoup(res.text, 'lxml')
fabrics = [fabric.find('h2').text.strip() for fabric in soup.find_all('div', {'class': 'product_detail medium_text'})]
fabric = [("_".join(fab.upper().replace(u"\u2122", '').split())) for fab in fabrics]
for index in range(len(fabric)):
if 'COTTON_LAWN_(BETA)' in fabric[index]:
fabric[index] = 'COTTON_LAWN_APPAREL'
elif 'COTTON_POPLIN' in fabric[index]:
fabric[index] = 'COTTON_POPLIN_BRAVA'
elif 'ORGANIC_COTTON_KNIT' in fabric[index]:
fabric[index] = 'ORGANIC_COTTON_KNIT_PRIMA'
elif 'PERFORMANCE_PIQUÉ' in fabric[index]:
fabric[index] = 'PERFORMANCE_PIQUE'
elif 'CYPRESS_COTTON' in fabric[index]:
fabric[index] = 'CYPRESS_COTTON_BRAVA'
return fabric
async def get_designEndpoint(session, url):
"""
Get Design End Point
:param url:
"""
async with session.get(url) as response:
response = await response.read()
# print(response)
json_response = json.loads(response.decode("utf-8"))
extracting_endpoint = json_response['page_results']
# extracting designId
design_Id = [item['designId'] for item in extracting_endpoint]
# extracting designName
design_Name = [item['name'] for item in extracting_endpoint]
# extracting creator_Name
creator_Name = [item['user']['screenName'] for item in extracting_endpoint]
return design_Id, design_Name, creator_Name
async def get_Fabric_Pricing_Data(session, url):
"""
Extract all the pricing data with respect to Fabric type
:param url: detail url
:Return: json data
"""
async with session.get(url) as response:
response = await response.read()
json_response = json.loads(response)
#print(json_response)
# Extracting Data
try:
fabric = json_response['data']['fabric_code']
except:
fabric = 'N/A'
try:
test_swatch_meter = json_response['data']['pricing']['TEST_SWATCH_METER']['price']
except:
test_swatch_meter = 'N/A'
try:
fat_quarter_meter = json_response['data']['pricing']['FAT_QUARTER_METER']['price']
except:
fat_quarter_meter = 'N/A'
try:
meter = json_response['data']['pricing']['METER']['price']
except:
meter = 'N/A'
# summary = fabric + "|" + str(test_swatch_meter) + "|" + str(fat_quarter_meter) + "|" + str(meter)
return fabric, test_swatch_meter, fat_quarter_meter, meter
async def main():
urls = []
tasks = []
async with aiohttp.ClientSession(headers=headers) as session:
fabrics = get_fabric_names()
design_Id, design_Name, creator_Name = await get_designEndpoint(session, 'https://pythias.spoonflower.com/search/v1/designs?lang=en&page_offset=0&sort=bestSelling&product=Fabric&forSale=true&showMatureContent=false&page_locale=en')
for item in design_Id:
for fab_type in fabrics[0:-3]:
price_url = 'https://api-gateway.spoonflower.com/alpenrose/pricing/fabrics/FABRIC_'+ fab_type +'?quantity=1&shipping_country=PK¤cy=EUR&measurement_system=METRIC&design_id='+str(item)+'&page_locale=en'
print(price_url)
urls.append(price_url)
for url in urls:
tasks.append(asyncio.create_task(get_Fabric_Pricing_Data(session, url)))
results = await asyncio.gather(*tasks)
print(len(design_Name))
print(len(creator_Name))
print(len(results))
return design_Name, creator_Name, results
if __name__ == '__main__':
loop = asyncio.get_event_loop()
fabrics = get_fabric_names()[0:-3]
design_Name, creator_Name, results = loop.run_until_complete(main())
#print(creator_Name)
for fab in fabrics:
print(fab)
for name, creator in zip(design_Name, creator_Name):
for fab_type in fabrics:
Design_Name = name
Creator_Name = creator
test_swatch_meter = results[1]
fat_quarter_meter = results[2]
meter = results[3]
if (name, creator) not in items_dict.keys():
items_dict[(name, creator)] = {}
itemCount = len(items_dict[(name, creator)].values()) / 4
items_dict[(name, creator)].update({'fabric_name_%02d' %itemCount: fab_type,
'test_swatch_meter_%02d' %itemCount: test_swatch_meter,
'fat_quarter_meter_%02d' %itemCount: fat_quarter_meter,
'meter_%02d' %itemCount: meter})
df = pd.DataFrame.from_dict(items_dict, orient='index').reset_index(drop=False)
df = df.rename(columns={'level_0':'designName','level_1':'screenName'})
df.to_csv('scraped_data.csv', index=False)
but cannot get it to format I am looking for like the given spreadsheet.
Any help would be much appreciated I am trying for almost a week but no luck.
Thanks in Advance.