Actor Costs avatar

Actor Costs

Try for free

No credit card required

Go to Store
Actor Costs

Actor Costs

lukaskrivka/actor-costs
Try for free

No credit card required

Get costs and usage stats for your actor use aggregated daily. The actor also provides summary stats for the whole period.

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32    && npm install --omit=dev --omit=optional \
33    && echo "Installed NPM packages:" \
34    && (npm list --omit=dev --all || true) \
35    && echo "Node.js version:" \
36    && node --version \
37    && echo "NPM version:" \
38    && npm --version \
39    && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY --from=builder /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "actor-costs",
4	"title": "Project Cheerio Crawler Typescript",
5	"description": "Crawlee and Cheerio project in typescript.",
6	"version": "0.0",
7	"meta": {
8		"templateId": "ts-crawlee-cheerio"
9	},
10	"input": "./input_schema.json",
11	"dockerfile": "./Dockerfile"
12}

.actor/input_schema.json

1{
2    "title": "CheerioCrawler Template",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "actorIdOrName": {
7            "title": "Actor ID or full name",
8            "type": "string",
9            "description": "Actor ID or full name",
10            "editor": "textfield",
11            "prefill": "apify/web-scraper"
12        },
13        "onlyRunsNewerThan": {
14            "title": "Only runs newer than date",
15            "type": "string",
16            "description": "Measured by when the run was started. Use JSON input to specify date with a time in ISO format, e.g. \"2024-01-01T12:00:00\"",
17            "editor": "datepicker"
18        },
19        "onlyRunsOlderThan": {
20            "title": "Only runs older than date",
21            "type": "string",
22            "description": "Measured by when the run was started. Use JSON input to specify date with a time in ISO format, e.g. \"2024-01-01T12:00:00\"",
23            "editor": "datepicker"
24        },
25        "getCostBreakdown": {
26            "title": "Get cost breakdown by usage type (1000x slower!)",
27            "type": "boolean",
28            "description": "Very slow since we need to request each run separately",
29            "default": false
30        },
31        "getDatasetItemCount": {
32            "title": "Get dataset item count (1000x slower!)",
33            "type": "boolean",
34            "description": "Very slow since we need to request each run separately",
35            "default": false
36        }
37    },
38    "required": ["actorIdOrName"]
39}

src/main.ts

1import { Actor, log } from 'apify';
2import { useState } from 'crawlee';
3import { processRuns } from './process-runs.js';
4
5interface Input {
6    actorIdOrName: string;
7    onlyRunsNewerThan?: string;
8    onlyRunsOlderThan?: string;
9    getCostBreakdown?: boolean;
10    getDatasetItemCount?: boolean;
11}
12
13interface DateAggregation {
14    date: string,
15    runCount: number,
16    cost: number,
17    // Only when requested in input
18    datasetItems?: number,
19    costDetail: Record<string, number>,
20    firstRunDate: string,
21    lastRunDate: string,
22    buildNumbers: Record<string, number>,
23    statuses: Record<string, number>,
24    origins: Record<string, number>,
25}
26
27type DateAggregations = Record<string, DateAggregation>;
28
29// { date: stats }
30export interface State {
31    dateAggregations: DateAggregations;
32    lastProcessedRunId: string | null;
33    lastProcessedOffset: number;
34}
35
36// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()
37await Actor.init();
38
39const {
40    actorIdOrName,
41    onlyRunsNewerThan,
42    onlyRunsOlderThan,
43    getCostBreakdown = false,
44    getDatasetItemCount = false
45} = (await Actor.getInput<Input>())!;
46
47let onlyRunsNewerThanDate;
48
49if (onlyRunsNewerThan) {
50    onlyRunsNewerThanDate = new Date(onlyRunsNewerThan);
51    if (Number.isNaN(onlyRunsNewerThanDate.getTime())) {
52        throw Actor.fail('Invalid date format for onlyRunsNewerThan, use YYYY-MM-DD or with time YYYY-MM-DDTHH:mm:ss');
53    }
54}
55
56let onlyRunsOlderThanDate;
57
58if (onlyRunsOlderThan) {
59    onlyRunsOlderThanDate = new Date(onlyRunsOlderThan);
60    if (Number.isNaN(onlyRunsOlderThanDate.getTime())) {
61        throw Actor.fail('Invalid date format for onlyRunsOlderThan, use YYYY-MM-DD or with time YYYY-MM-DDTHH:mm:ss');
62    }
63}
64
65const runsClient = Actor.apifyClient.actor(actorIdOrName).runs();
66
67const state = await useState<State>(
68    'STATE',
69    { lastProcessedOffset: 0, lastProcessedRunId: null, dateAggregations: {} },
70);
71
72const LIMIT = 1000;
73let offset = state.lastProcessedOffset;
74for (; ;) {
75    const runs = await runsClient.list({ desc: true, limit: 1000, offset }).then((res) => res.items);
76
77    log.info(`Loaded ${runs.length} runs (offset from now: ${offset}), newest: ${runs[0]?.startedAt}, `
78        + `oldest: ${runs[runs.length - 1]?.startedAt} processing them now`);
79
80    const { stopLoop } = await processRuns({
81        runs,
82        state,
83        onlyRunsOlderThanDate,
84        onlyRunsNewerThanDate,
85        getCostBreakdown,
86        getDatasetItemCount,
87    });
88
89    state.lastProcessedOffset = offset;
90
91    if (stopLoop) {
92        log.warning(`Reached onlyRunsNewerThanDate ${onlyRunsNewerThanDate}, stopping loading runs`);
93        break;
94    }
95
96    if (runs.length < LIMIT) {
97        log.warning('No more runs to process, stopping loading runs');
98        break;
99    }
100
101    offset += LIMIT;
102}
103
104const totalStats: Omit<DateAggregation, 'date'> = {
105    runCount: 0,
106    cost: 0,
107    costDetail: {},
108    firstRunDate: '',
109    lastRunDate: '',
110    buildNumbers: {},
111    statuses: {},
112    origins: {},
113};
114
115await Actor.pushData(Object.values(state.dateAggregations)
116    .map((aggregation: DateAggregation) => {
117        totalStats.runCount += aggregation.runCount;
118        totalStats.cost += aggregation.cost;
119        if (aggregation.datasetItems) {
120            if (!totalStats.datasetItems) {
121                totalStats.datasetItems = 0;
122            }
123            totalStats.datasetItems += aggregation.datasetItems;
124        }
125        if (!totalStats.lastRunDate) {
126            totalStats.lastRunDate = aggregation.lastRunDate;
127        }
128        totalStats.firstRunDate = aggregation.firstRunDate;
129        for (const [buildNumber, count] of Object.entries(aggregation.buildNumbers)) {
130            totalStats.buildNumbers[buildNumber] = (totalStats.buildNumbers[buildNumber] ?? 0) + count;
131        }
132        for (const [status, count] of Object.entries(aggregation.statuses)) {
133            totalStats.statuses[status] = (totalStats.statuses[status] ?? 0) + count;
134        }
135        for (const [origin, count] of Object.entries(aggregation.origins)) {
136            totalStats.origins[origin] = (totalStats.origins[origin] ?? 0) + count;
137        }
138
139        const cleanedCostDetail: Record<string, number> = {};
140
141        for (const [usageType, usageUsd] of Object.entries(aggregation.costDetail)) {
142            cleanedCostDetail[usageType] = Number(usageUsd.toFixed(4));
143            totalStats.costDetail[usageType] ??= 0
144            totalStats.costDetail[usageType] += Number(usageUsd.toFixed(4))
145        }
146
147        return { ...aggregation, cost: Number(aggregation.cost.toFixed(4)), costDetail: cleanedCostDetail };
148    }));
149
150await Actor.setValue('STATE', state);
151await Actor.setValue('TOTAL_STATS', totalStats);
152
153const store = await Actor.openKeyValueStore();
154const url = store.getPublicUrl('TOTAL_STATS');
155await Actor.exit(`Total stats for whole period are available at ${url}`);

src/process-runs.ts

1import { Actor, log } from 'apify';
2
3import type { ActorRunListItem, ActorRun } from 'apify-client';
4import { sleep } from 'crawlee';
5import type { State } from './main.js';
6
7interface ProcessRunsInputs {
8    runs: ActorRunListItem[];
9    state: State;
10    onlyRunsOlderThanDate?: Date;
11    onlyRunsNewerThanDate?: Date;
12    getCostBreakdown: boolean;
13    getDatasetItemCount: boolean;
14}
15
16let isMigrating = false;
17Actor.on('migrating', () => {
18    isMigrating = true;
19});
20
21let foundLastProcessedRun = false;
22
23export const processRuns = async ({ runs, state, onlyRunsOlderThanDate, onlyRunsNewerThanDate, getCostBreakdown, getDatasetItemCount }: ProcessRunsInputs): Promise<{ stopLoop: boolean }> => {
24    // Runs are in decs mode
25    for (let run of runs) {
26        if (getCostBreakdown) {
27            run = (await Actor.apifyClient.run(run.id).get())! as ActorRun
28        }
29
30        let cleanItemCount = null;
31        if (getDatasetItemCount) {
32            cleanItemCount = await Actor.apifyClient.dataset(run.defaultDatasetId).get().then((res) => res!.cleanItemCount);
33        }
34
35        if (isMigrating) {
36            log.warning('Actor is migrating, pausing all processing and storing last state to continue where we left of');
37            state.lastProcessedRunId = run.id;
38            await sleep(999999);
39        }
40
41        // If we load after migration, we need to find run we already processed
42        if (state.lastProcessedRunId && !foundLastProcessedRun) {
43            const isLastProcessed = state.lastProcessedRunId === run.id;
44            if (isLastProcessed) {
45                foundLastProcessedRun = true;
46                state.lastProcessedRunId = null;
47            } else {
48                log.warning(`Skipping run we already processed before migration ${run.id}`);
49                continue;
50            }
51        }
52
53        if (onlyRunsOlderThanDate && run.startedAt > onlyRunsOlderThanDate) {
54            continue;
55        }
56        if (onlyRunsNewerThanDate && run.startedAt < onlyRunsNewerThanDate) {
57            // We are going from present to past so at this point we can exit
58            return { stopLoop: true };
59        }
60
61        const runDate = run.startedAt.toISOString().split('T')[0];
62        state.dateAggregations[runDate] ??= {
63            date: runDate,
64            runCount: 0,
65            cost: 0,
66            costDetail: {},
67            firstRunDate: run.startedAt.toISOString(),
68            lastRunDate: run.startedAt.toISOString(),
69            buildNumbers: {},
70            statuses: {},
71            origins: {},
72        };
73
74        state.dateAggregations[runDate].runCount++;
75        state.dateAggregations[runDate].cost += run.usageTotalUsd ?? 0;
76
77
78        if ((run as ActorRun).usageUsd) {
79            for (const [usageType, usageUsd] of Object.entries((run as ActorRun).usageUsd as Record<string, number>)) {
80                state.dateAggregations[runDate].costDetail[usageType] ??= 0;
81                state.dateAggregations[runDate].costDetail[usageType] += usageUsd;
82            }
83        }
84
85        // lastRunDate is always the first we encounter because we go desc so we don't have to update it
86        state.dateAggregations[runDate].firstRunDate = run.startedAt.toISOString();
87
88        state.dateAggregations[runDate].buildNumbers[run.buildNumber] ??= 0;
89        state.dateAggregations[runDate].buildNumbers[run.buildNumber]++;
90
91        state.dateAggregations[runDate].statuses[run.status] ??= 0;
92        state.dateAggregations[runDate].statuses[run.status]++;
93
94        state.dateAggregations[runDate].origins[run.meta.origin] ??= 0;
95        state.dateAggregations[runDate].origins[run.meta.origin]++;
96
97        if (getDatasetItemCount && cleanItemCount !== null) {
98            state.dateAggregations[runDate].datasetItems ??= 0;
99            state.dateAggregations[runDate].datasetItems += cleanItemCount;
100        }
101    }
102
103    return { stopLoop: false };
104};

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "root": true,
3    "env": {
4        "browser": true,
5        "es2020": true,
6        "node": true
7    },
8    "extends": [
9        "@apify/eslint-config-ts"
10    ],
11    "parserOptions": {
12        "project": "./tsconfig.json",
13        "ecmaVersion": 2020
14    },
15    "ignorePatterns": [
16        "node_modules",
17        "dist",
18        "**/*.d.ts"
19    ]
20}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv

package.json

1{
2	"name": "actor-costs",
3	"version": "0.0.1",
4	"type": "module",
5	"description": "This is a boilerplate of an Apify actor.",
6	"engines": {
7		"node": ">=18.0.0"
8	},
9	"dependencies": {
10		"apify": "^3.1.10",
11		"crawlee": "^3.5.4"
12	},
13	"devDependencies": {
14		"@apify/eslint-config-ts": "^0.3.0",
15		"@apify/tsconfig": "^0.1.0",
16		"@typescript-eslint/eslint-plugin": "^6.7.2",
17		"@typescript-eslint/parser": "^6.7.2",
18		"eslint": "^8.50.0",
19		"tsx": "^4.6.2",
20		"typescript": "^5.5"
21	},
22	"scripts": {
23		"start": "npm run start:dev",
24		"start:prod": "node dist/main.js",
25		"start:dev": "tsx src/main.ts",
26		"build": "tsc",
27		"lint": "eslint ./src --ext .ts",
28		"lint:fix": "eslint ./src --ext .ts --fix",
29		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
30	},
31	"author": "It's not you it's me",
32	"license": "ISC"
33}

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "module": "NodeNext",
5        "moduleResolution": "NodeNext",
6        "target": "ES2022",
7        "outDir": "dist",
8        "noUnusedLocals": false,
9        "skipLibCheck": true,
10        "lib": ["DOM"]
11    },
12    "include": [
13        "./src/**/*"
14    ]
15}
Developer
Maintained by Community

Actor Metrics

  • 1 monthly user

  • 3 stars

  • >99% runs succeeded

  • Created in May 2024

  • Modified 4 months ago