Web Scraper Task
Pricing
Pay per usage
Go to Apify Store
Web Scraper Task
0.0 (0)
Pricing
Pay per usage
1
121
2
Last modified
3 years ago
Pricing
Pay per usage
0.0 (0)
Pricing
Pay per usage
1
121
2
Last modified
3 years ago
# First, specify the base Docker image. You can read more about# the available images at https://sdk.apify.com/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:16
# Second, copy just package.json and package-lock.json since those are the only# files that affect "npm install" in the next step, to speed up the build.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Optionally, specify how to launch the source code of your actor.# By default, Apify's base Docker images define the CMD instruction# that runs the Node.js source code using the command specified# in the "scripts.start" section of the package.json file.# In short, the instruction looks something like this:## CMD npm start
1// This is the main Node.js source code file of your actor.2// It is referenced from the "scripts" section of the package.json file.3
4const Apify = require('apify');5
6Apify.main(async () => {7 // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.8 // For more information, see https://docs.apify.com/actors/development/input-schema9 const input = await Apify.getInput();10 console.log('Input:');11 console.dir(input);12
13 // Here you can prepare your input for actor apify/web-scraper this input is based on a actor14 // task you used as the starting point.15 const metamorphInput = {16 "breakpointLocation": "NONE",17 "browserLog": false,18 "debugLog": false,19 "downloadCss": true,20 "downloadMedia": true,21 "ignoreCorsAndCsp": false,22 "ignoreSslErrors": false,23 "injectJQuery": true,24 "keepUrlFragments": false,25 "maxRequestRetries": input.maxRequestRetries,26 "pageFunction": input.pageFunction,27 "postNavigationHooks": `// We need to return array of (possibly async) functions here.28 // The functions accept a single argument: the "crawlingContext" object.29 [30 async (crawlingContext) => {31 // ...32 },33 ]`,34 "preNavigationHooks": `// We need to return array of (possibly async) functions here.35 // The functions accept two arguments: the "crawlingContext" object36 // and "gotoOptions".37 [38 async (crawlingContext, gotoOptions) => {39 // ...40 },41 ]`,42 "proxyConfiguration": {43 "useApifyProxy": true,44 "apifyProxyCountry": "US"45 },46 "runMode": "PRODUCTION",47 "startUrls": input.startUrls,48 "useChrome": true,49 "waitUntil": [50 "networkidle2"51 ]52 };53
54 // Now let's metamorph into actor apify/web-scraper using the created input.55 await Apify.metamorph('apify/web-scraper', metamorphInput);56});
{ "name": "my-actor", "version": "0.0.1", "dependencies": { "apify": "^2.2.2" }, "scripts": { "start": "node main.js" }, "author": "Me!"}