Disclaimer : Real Data API only extracts publicly available data while maintaining a strict policy against collecting any personal or identity-related information.
Are you looking to Scrape information on Shopify websites? Our Shopify Products Data Scrape is the ideal tool for you! This tool lets you quickly obtain product details such as price, images, and variants from any concurrent. It works in various countries, including Australia, Canada, Germany, France, Singapore, the USA, the UK, UAE, and India.
Shopify Data Scraper allows the loading of product information on websites built using Shopify.
To get started with Web Scraper, you only need two things. First, tell the scraper which web pages it should load, and second, tell it how to extract data from each of the pages.
If you want to check if this is a Shopify website, open the https://
To avoid floating point computations, it multiples price values by 100.
Support tag categorization for collection.
This scraper accepts JSON input as below.
Field | Type | Description |
---|---|---|
startUrls | Array | Start links of the Shopify website to begin the API execution. It supports product group pages, category pages, and product page URLs. |
proxy | Object | Choose proxy servers to support your crawler. |
Check out the following example:
{
"startUrls": [
{ "url": "https://uk.gymshark.com/collections/crop-tops" }
],
"proxy": { "useRealdataAPIProxy": true }
}
Retrieve Shopify site collections.
{
"startUrls": [
{ "url": "https://uk.gymshark.com/collections" }
],
"proxy": { "useRealdataAPIProxy": true }
}
Crawl all Shopify site products.
{
"startUrls": [
{ "url": "https://uk.gymshark.com" }
],
"proxy": { "useRealdataAPIProxy": true }
}
Retrieve products of a single collection.
{
"startUrls": [
{ "url": "https://uk.gymshark.com/collections/crop-tops" }
],
"proxy": { "useRealdataAPIProxy": true }
}
Fetch product information.
{
"startUrls": [
{ "url": "https://uk.gymshark.com/products/gymshark-vision-long-sleeve-crop-top-black-aw21" }
],
"proxy": { "useRealdataAPIProxy": true }
}
The Shopify data scraper saves the extracted data in JSON format.
{
"source": {
"id": "4857453543626",
"canonicalUrl": "https://www.gymshark.com/products/gymshark-fraction-crop-top-light-green-white-logo",
"retailer": "Gymshark | Be a visionary.",
"language": "en",
"currency": "USD",
"createdUTC": 1613989955000,
"updatedUTC": 1640058905000,
"publishedUTC": 1614279501000
},
"title": "Gymshark Fraction Crop Top - Light Green",
"description": "<meta charset=/"utf-8/"><strong data-mce-fragment=/"1/">IN YOUR LOCKER</strong><span data-mce-fragment=/"1/"></span><br data-mce-fragment=/"1/"><br data-mce-fragment=/"1/"><span data-mce-fragment=/"1/">A slice of unique athleisure, the Fraction Crop Top is tasteful, charming and convenient. Available in a range of versatile colours, style any workout or rest day outfit with the finishing touch of this short sleeve crop top.</span><br data-mce-fragment=/"1/"><br data-mce-fragment=/"1/"><span data-mce-fragment=/"1/">- Cropped t-shirt</span><br data-mce-fragment=/"1/"><span data-mce-fragment=/"1/">- Slight dropped shoulder</span><br data-mce-fragment=/"1/"><span data-mce-fragment=/"1/">- Crew neck</span><br data-mce-fragment=/"1/"><span data-mce-fragment=/"1/">- Rolled hem to sleeve</span><br data-mce-fragment=/"1/"><span data-mce-fragment=/"1/">- Large cut off Gymshark logo at hem</span><br data-mce-fragment=/"1/"><span data-mce-fragment=/"1/">- 95% Cotton, 5% Elastane</span><br data-mce-fragment=/"1/"><span data-mce-fragment=/"1/">- We've cut down on our use of swing tags, so this product comes without one</span><br data-mce-fragment=/"1/"><span data-mce-fragment=/"1/">- Model is </span><meta charset=/"utf-8/"><span data-mce-fragment=/"1/">5'9/" and wears a size L<br></span>- Video model is 5'8/" and wears size XS<br data-mce-fragment=/"1/"><meta charset=/"utf-8/"><span data-mce-fragment=/"1/">- SKU: GLCT1844-CLM</span><br>",
"brand": "Gymshark | Be a visionary.",
"categories": [
"Womens Crop Top"
],
"tags": [
"25/02/21",
"all-products",
"crop-tops",
"essentials",
"filter-colour: Green",
"filter-size:l",
"filter-size:m",
"filter-size:s",
"filter-size:xl",
"filter-size:xs",
"filter-size:xxl",
"home-workout-clothes",
"instock:s",
"instock:xs",
"outlet",
"retention-collection",
"short-sleeve",
"sizeguide:top",
"SS21",
"t-shirts-tops",
"Womens"
],
"variants": [
{
"id": "32686177550538",
"title": "Extra Small",
"sku": "GLCT1844-CLM-XS",
"options": [
"Extra Small"
],
"price": {
"current": 1500,
"previous": 2500,
"stockStatus": "InStock"
}
},
{
"id": "32686177583306",
"title": "Small",
"sku": "GLCT1844-CLM-S",
"options": [
"Small"
],
"price": {
"current": 1500,
"previous": 2500,
"stockStatus": "InStock"
}
},
{
"id": "32686177616074",
"title": "Medium",
"sku": "GLCT1844-CLM-M",
"options": [
"Medium"
],
"price": {
"current": 1500,
"previous": 2500,
"stockStatus": "OutOfStock"
}
},
{
"id": "32686177648842",
"title": "Large",
"sku": "GLCT1844-CLM-L",
"options": [
"Large"
],
"price": {
"current": 1500,
"previous": 2500,
"stockStatus": "OutOfStock"
}
},
{
"id": "32686177681610",
"title": "Extra Large",
"sku": "GLCT1844-CLM-XL",
"options": [
"Extra Large"
],
"price": {
"current": 1500,
"previous": 2500,
"stockStatus": "OutOfStock"
}
},
{
"id": "39785973285066",
"title": "Extra Extra Large",
"sku": "GLCT1844-CLM-XXL",
"options": [
"Extra Extra Large"
],
"price": {
"current": 1500,
"previous": 2500,
"stockStatus": "OutOfStock"
}
}
],
"medias": [
{
"id": "15928233361610",
"type": "Image",
"url": "https://cdn.shopifycdn.net/s/files/1/0156/6146/products/TRAININGCROPTEECOOLMINT.A-Edit_AS.jpg?v=1613989955",
"variantIds": [],
"alt": ""
},
{
"id": "15928233394378",
"type": "Image",
"url": "https://cdn.shopifycdn.net/s/files/1/0156/6146/products/TRAININGCROPTEECOOLMINT.B-Edit_AS.jpg?v=1613989955",
"variantIds": [],
"alt": ""
},
{
"id": "15928233427146",
"type": "Image",
"url": "https://cdn.shopifycdn.net/s/files/1/0156/6146/products/TRAININGCROPTEECOOLMINT.C-Edit_AS.jpg?v=1613989955",
"variantIds": [],
"alt": ""
},
{
"id": "15928233459914",
"type": "Image",
"url": "https://cdn.shopifycdn.net/s/files/1/0156/6146/products/TRAININGCROPTEECOOLMINT.D1-Edit_AS.jpg?v=1613997217",
"variantIds": [],
"alt": ""
},
{
"id": "15928233492682",
"type": "Image",
"url": "https://cdn.shopifycdn.net/s/files/1/0156/6146/products/TRAININGCROPTEECOOLMINT.D2-Edit_AS.jpg?v=1613997217",
"variantIds": [],
"alt": ""
},
{
"id": "15928233525450",
"type": "Image",
"url": "https://cdn.shopifycdn.net/s/files/1/0156/6146/products/TRAININGCROPTEECOOLMINT.D3-Edit_AS.jpg?v=1613997217",
"variantIds": [],
"alt": ""
}
],
"options": [
{
"type": "Size",
"values": [
{
"id": "Extra Small",
"name": "Extra Small"
},
{
"id": "Small",
"name": "Small"
},
{
"id": "Medium",
"name": "Medium"
},
{
"id": "Large",
"name": "Large"
},
{
"id": "Extra Large",
"name": "Extra Large"
},
{
"id": "Extra Extra Large",
"name": "Extra Extra Large"
}
]
}
]
}
You should have a Real Data API account to execute the program examples. Replace < YOUR_API_TOKEN >
in the program using the token of your actor. Read about the live APIs with Real Data API docs for more explanation.
import { RealdataAPIClient } from 'RealdataAPI-Client';
// Initialize the RealdataAPIClient with API token
const client = new RealdataAPIClient({
token: '<YOUR_API_TOKEN>',
});
// Prepare actor input
const input = {
"startUrls": [
{
"url": "https://www.gymshark.com/collections/crop-tops"
}
],
"proxy": {
"useRealdataAPIProxy": true
}
};
(async () => {
// Run the actor and wait for it to finish
const run = await client.actor("autofacts/shopify").call(input);
// Fetch and print actor results from the run's dataset (if any)
console.log('Results from dataset');
const { items } = await client.dataset(run.defaultDatasetId).listItems();
items.forEach((item) => {
console.dir(item);
});
})();
from RealdataAPI_client import RealdataAPIClient
# Initialize the RealdataAPIClient with your API token
client = RealdataAPIClient("<YOUR_API_TOKEN>")
# Prepare the actor input
run_input = {
"startUrls": [{ "url": "https://www.gymshark.com/collections/crop-tops" }],
"proxy": { "useRealdataAPIProxy": True },
}
# Run the actor and wait for it to finish
run = client.actor("autofacts/shopify").call(run_input=run_input)
# Fetch and print actor results from the run's dataset (if there are any)
for item in client.dataset(run["defaultDatasetId"]).iterate_items():
print(item)
# Set API token
API_TOKEN=<YOUR_API_TOKEN>
# Prepare actor input
cat > input.json <<'EOF'
{
"startUrls": [
{
"url": "https://www.gymshark.com/collections/crop-tops"
}
],
"proxy": {
"useRealdataAPIProxy": true
}
}
EOF
# Run the actor
curl "https://api.RealdataAPI.com/v2/acts/autofacts~shopify/runs?token=$API_TOKEN" /
-X POST /
-d @input.json /
-H 'Content-Type: application/json'
startUrls
Required Array
Start URLs of the source website to begin the actor execution. It supports all URLs like category page links, site root links, product page URLs, etc.
proxy
Optional Object
Choose proxies to support your actor.
{
"startUrls": [
{
"url": "https://www.gymshark.com/collections/crop-tops"
}
],
"proxy": {
"useRealdataAPIProxy": true
}
}