Bzj Actor avatar

Bzj Actor

Under maintenance
Try for free

No credit card required

Go to Store
This Actor is under maintenance.

This Actor may be unreliable while under maintenance. Would you like to try a similar Actor instead?

See alternative Actors
Bzj Actor

Bzj Actor

fateful_orangerie/bzj-actor
Try for free

No credit card required

this is test

.dockerignore

1# configurations
2.idea
3.vscode
4
5# crawlee and apify storage folders
6apify_storage
7crawlee_storage
8storage
9
10# installed files
11node_modules
12
13# git folder
14.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "root": true,
3    "env": {
4        "browser": true,
5        "es2020": true,
6        "node": true
7    },
8    "extends": [
9        "@apify/eslint-config-ts"
10    ],
11    "parserOptions": {
12        "project": "./tsconfig.json",
13        "ecmaVersion": 2020
14    },
15    "ignorePatterns": [
16        "node_modules",
17        "dist",
18        "**/*.d.ts"
19    ]
20}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5.vscode
6dist
7node_modules
8apify_storage
9storage
10
11# Added by Apify CLI
12.venv

package.json

1{
2	"name": "bzj-actor",
3	"version": "0.0.1",
4	"type": "module",
5	"description": "This is an example of an Apify actor.",
6	"engines": {
7		"node": ">=18.0.0"
8	},
9	"dependencies": {
10		"apify": "^3.2.6",
11		"crawlee": "^3.11.5",
12		"puppeteer": "*",
13		"scrapeless-sdk-node": "^0.0.3"
14	},
15	"devDependencies": {
16		"@apify/eslint-config-ts": "^0.3.0",
17		"@apify/tsconfig": "^0.1.0",
18		"@typescript-eslint/eslint-plugin": "^7.18.0",
19		"@typescript-eslint/parser": "^7.18.0",
20		"eslint": "^8.50.0",
21		"tsx": "^4.6.2",
22		"typescript": "^5.3.3"
23	},
24	"scripts": {
25		"start": "npm run start:dev",
26		"start:prod": "node dist/main.js",
27		"start:dev": "tsx src/main.ts",
28		"build": "tsc",
29		"lint": "eslint ./src --ext .ts",
30		"lint:fix": "eslint ./src --ext .ts --fix",
31		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
32	},
33	"author": "It's not you it's me",
34	"license": "ISC"
35}

tsconfig.json

1{
2  "extends": "@apify/tsconfig",
3  "compilerOptions": {
4    "module": "NodeNext",
5    "moduleResolution": "NodeNext",
6    "target": "ES2022",
7    "outDir": "dist",
8    "noUnusedLocals": false,
9    "skipLibCheck": true,
10    "esModuleInterop": true,
11    "allowSyntheticDefaultImports": true,
12    "lib": [
13      "DOM"
14    ]
15  },
16  "include": [
17    "./src/**/*"
18  ]
19}

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "bzj-actor",
4	"title": "Project Puppeteer Crawler Typescript",
5	"description": "Crawlee and Puppeteer project in typescript.",
6	"version": "0.0",
7	"meta": {
8		"templateId": "ts-crawlee-puppeteer-chrome"
9	},
10	"input": "./input_schema.json",
11	"dockerfile": "./Dockerfile",
12  "storages": {
13    "dataset": "./dataset_schema.json"
14}
15}

.actor/dataset_schema.json

1{
2  "actorSpecification": 1,
3  "views": {
4    "overview": {
5      "title": "Overview",
6      "transformation": {
7        "fields": [
8          "count",
9          "data",
10          "code",
11          "message"
12        ]
13      },
14      "display": {
15        "component": "table",
16        "properties": {
17          "count": {
18            "label": "count",
19            "format": "text"
20          },
21          "data": {
22            "label": "data",
23            "format": "object"
24          },
25          "code": {
26            "label": "code",
27            "format": "text"
28          },
29          "message": {
30            "label": "message",
31            "format": "text"
32          }
33        }
34      }
35    }
36  }
37}

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-puppeteer-chrome:20 AS builder
5
6# Check preinstalled packages
7RUN npm ls crawlee apify puppeteer playwright
8
9# Copy just package.json and package-lock.json
10# to speed up the build using Docker layer cache.
11COPY --chown=myuser package*.json ./
12
13# Install all dependencies. Don't audit to speed up the installation.
14RUN npm install --include=dev --audit=false
15
16# Next, copy the source files using the user set
17# in the base image.
18COPY --chown=myuser . ./
19
20# Install all dependencies and build the project.
21# Don't audit to speed up the installation.
22RUN npm run build
23
24# Create final image
25FROM apify/actor-node-puppeteer-chrome:20
26
27# Check preinstalled packages
28RUN npm ls crawlee apify puppeteer playwright
29
30# Copy just package.json and package-lock.json
31# to speed up the build using Docker layer cache.
32COPY --chown=myuser package*.json ./
33
34# Install NPM packages, skip optional and development dependencies to
35# keep the image small. Avoid logging too much and print the dependency
36# tree for debugging
37RUN npm --quiet set progress=false \
38    && npm install --omit=dev --omit=optional \
39    && echo "Installed NPM packages:" \
40    && (npm list --omit=dev --all || true) \
41    && echo "Node.js version:" \
42    && node --version \
43    && echo "NPM version:" \
44    && npm --version \
45    && rm -r ~/.npm
46
47# Copy built JS files from builder image
48COPY --from=builder --chown=myuser /home/myuser/dist ./dist
49
50# Next, copy the remaining files and directories with the source code.
51# Since we do this after NPM install, quick build will be really fast
52# for most source file changes.
53COPY --chown=myuser . ./
54
55
56# Run the image. If you know you won't need headful browsers,
57# you can remove the XVFB start script for a micro perf gain.
58CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent

.actor/input_schema.json

1{
2  "title": "Actor BZJ",
3  "type": "object",
4  "schemaVersion": 1,
5  "properties": {
6    "url": {
7      "title": "Shopee URL",
8      "type": "string",
9      "editor": "textfield",
10      "description": "Shopee URL that needs to be crawled",
11      "prefill": "https://shopee.sg/api/v4/search/search_items?keyword=hp%201020%20toner&limit=30&newest=0&order=desc&page_type=search&scenario=page_global_search&version=2"
12    },
13    "apiKey": {
14      "title": "API Key",
15      "type": "string",
16      "editor": "textfield",
17      "description": "Start using the API with your [API KEY](https://app.scrapeless.com/dashboard/account?tab=apiKey)"
18    },
19    "actor": {
20      "title": "Shopee Actor",
21      "type": "string",
22      "enum": [
23        "scraper.shopee",
24        "scraper.shopee.retry",
25        "scraper.shopee.stock",
26        "scraper.shopee.mobile",
27        "scraper.shopee.mobilev2",
28        "scraper.shopee.mobilev3",
29        "scraper.shopee.webv1"
30      ],
31      "description": "Shopee Actor to use for scraping",
32      "prefill": "scraper.shopee"
33    },
34    "action": {
35      "title": "Shopee Action",
36      "type": "string",
37      "enum": [
38        "shopee.product",
39        "shopee.search",
40        "shopee.live",
41        "shopee.rcmd"
42      ],
43      "description": "Shopee Action to use for scraping",
44      "prefill": "shopee.search"
45    },
46    "webhook": {
47      "title": "webhook",
48      "type": "string",
49      "editor": "textfield",
50      "description": "webhook URL to send the data to",
51      "default": ""
52    }
53  },
54  "required": ["url", "actor", "apiKey", "action"]
55}

src/main.ts

1// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/).
2// Web scraping and browser automation library (Read more at https://crawlee.dev)
3// import { PuppeteerCrawler, Request } from 'crawlee';
4// import { router } from './routes.js';
5import { Actor } from 'apify';
6import Scrapeless from 'scrapeless-sdk-node';
7
8// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init().
9await Actor.init();
10
11// "enumTitles": [
12//   "No retries allowed",
13//   "Retry is allowed, and the task timeout is 55 seconds",
14//   "Allow retry and get inventory information, only supported in `shopee.product`",
15//   "only supported in `shopee.product`",
16//   "only supported in `shopee.product`",
17//   "only supported in `shopee.product`",
18//   "only supported in `shopee.product`"
19// ],
20
21enum ShopeeActorEnum {
22  // No retries allowed
23  default = 'scraper.shopee',
24  // Retry is allowed, and the task timeout is 55 seconds
25  retry = 'scraper.shopee.retry',
26  // Allow retry and get inventory information, only supported in "shopee.product"
27  stock = 'scraper.shopee.stock',
28  // only supported in "shopee.product"
29  mobile = 'scraper.shopee.mobile',
30  // only supported in "shopee.product"
31  mobilev2 = 'scraper.shopee.mobilev2',
32  // only supported in "shopee.product"
33  mobilev3 = 'scraper.shopee.mobilev3',
34  // only supported in "shopee.product"
35  webv1 = 'scraper.shopee.webv1',
36}
37
38enum ShopeeActionEnum {
39  product = 'shopee.product',
40  search = 'shopee.search',
41  live = 'shopee.live',
42  rcmd = 'shopee.rcmd',
43  ratings = 'shopee.ratings',
44}
45
46interface Input {
47  apiKey: string;
48  actor: ShopeeActorEnum;
49  action: ShopeeActionEnum;
50  webhook: string;
51  url: string;
52}
53
54const {
55    apiKey,
56    actor = ShopeeActorEnum.default,
57    action = ShopeeActionEnum.search,
58    webhook = '',
59    url = 'https://shopee.tw/2312312.10228173.24803858474',
60} = await Actor.getInput<Input>() ?? {};
61
62// @ts-expect-error scrapeless-sdk-node
63const scrapeless = new Scrapeless({ apiKey });
64
65function getScrapelessInput() {
66    const baseInput = { url };
67    if (action === ShopeeActionEnum.search) {
68        return { ...baseInput, type: ShopeeActionEnum.search };
69    }
70    return { ...baseInput, action };
71}
72
73const response = await scrapeless.scraper({
74    actor,
75    webhook,
76    input: getScrapelessInput(),
77});
78
79console.log('[response]===>', response);
80
81await Actor.pushData(response as object);
82
83// Define the URLs to start the crawler with - get them from the input of the Actor or use a default list.
84
85// Create a proxy configuration that will rotate proxies from Apify Proxy.
86// const proxyConfiguration = await Actor.createProxyConfiguration();
87
88// // Create a PuppeteerCrawler that will use the proxy configuration and and handle requests with the router from routes.ts file.
89// const crawler = new PuppeteerCrawler({
90//     proxyConfiguration,
91//     requestHandler: router,
92//     launchContext: {
93//         launchOptions: {
94//             args: [
95//                 '--disable-gpu', // Mitigates the "crashing GPU process" issue in Docker containers
96//             ],
97//         },
98//     },
99// });
100
101// // Run the crawler with the start URLs and wait for it to finish.
102// await crawler.run(startUrls);
103
104// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit().
105await Actor.exit();

src/routes.ts

1import { Dataset, createPuppeteerRouter } from 'crawlee';
2
3export const router = createPuppeteerRouter();
4
5router.addDefaultHandler(async ({ enqueueLinks, log }) => {
6    log.info(`enqueueing new URLs`);
7    await enqueueLinks({
8        globs: ['https://apify.com/*'],
9        label: 'detail',
10    });
11});
12
13router.addHandler('detail', async ({ request, page, log }) => {
14    const title = await page.title();
15    log.info(`${title}`, { url: request.loadedUrl });
16
17    await Dataset.pushData({
18        url: request.loadedUrl,
19        title,
20    });
21});
Developer
Maintained by Community

Actor Metrics

  • 2 monthly users

  • 0 No stars yet

  • >99% runs succeeded

  • Created in Jan 2025

  • Modified 12 days ago

Categories