Rename Dataset Fields avatar

Rename Dataset Fields

Try for free

No credit card required

Go to Store
Rename Dataset Fields

Rename Dataset Fields

lukaskrivka/rename-dataset-fields
Try for free

No credit card required

Rename field/columns of any dataset with a simple mapping

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "root": true,
3    "env": {
4        "browser": true,
5        "es2020": true,
6        "node": true
7    },
8    "extends": [
9        "@apify/eslint-config-ts"
10    ],
11    "parserOptions": {
12        "project": "./tsconfig.json",
13        "ecmaVersion": 2020
14    },
15    "ignorePatterns": [
16        "node_modules",
17        "dist",
18        "**/*.d.ts"
19    ]
20}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv

package.json

1{
2	"name": "rename-dataset-fields",
3	"version": "0.0.1",
4	"type": "module",
5	"description": "This is a boilerplate of an Apify actor.",
6	"engines": {
7		"node": ">=18.0.0"
8	},
9	"dependencies": {
10		"apify": "^3.1.10",
11		"crawlee": "^3.5.4"
12	},
13	"devDependencies": {
14		"@apify/eslint-config-ts": "^0.3.0",
15		"@apify/tsconfig": "^0.1.0",
16		"@typescript-eslint/eslint-plugin": "^6.7.2",
17		"@typescript-eslint/parser": "^6.7.2",
18		"eslint": "^8.50.0",
19		"tsx": "^4.6.2",
20		"typescript": "^5.3.3"
21	},
22	"scripts": {
23		"start": "npm run start:dev",
24		"start:prod": "node dist/main.js",
25		"start:dev": "tsx src/main.ts",
26		"build": "tsc",
27		"lint": "eslint ./src --ext .ts",
28		"lint:fix": "eslint ./src --ext .ts --fix",
29		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
30	},
31	"author": "It's not you it's me",
32	"license": "ISC"
33}

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "module": "NodeNext",
5        "moduleResolution": "NodeNext",
6        "target": "ES2022",
7        "outDir": "dist",
8        "noUnusedLocals": false,
9        "skipLibCheck": true,
10        "lib": ["DOM"]
11    },
12    "include": [
13        "./src/**/*"
14    ]
15}

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32    && npm install --omit=dev --omit=optional \
33    && echo "Installed NPM packages:" \
34    && (npm list --omit=dev --all || true) \
35    && echo "Node.js version:" \
36    && node --version \
37    && echo "NPM version:" \
38    && npm --version \
39    && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY --from=builder /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "rename-dataset-fields",
4	"title": "Project Cheerio Crawler Typescript",
5	"description": "Crawlee and Cheerio project in typescript.",
6	"version": "0.0",
7	"meta": {
8		"templateId": "ts-crawlee-cheerio"
9	},
10	"input": "./input_schema.json",
11	"dockerfile": "./Dockerfile"
12}

.actor/input_schema.json

1{
2    "title": "CheerioCrawler Template",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "datasetId": {
7            "title": "Dataset ID",
8            "type": "string",
9            "description": "ID of the dataset you want to rename the columns in. Can also use run ID.",
10            "editor": "textfield"
11        },
12        "renameMapping": {
13            "title": "Rename fields mapping",
14            "type": "array",
15            "editor": "keyValue",
16            "placeholderKey": "old_field_name",
17            "placeholderValue": "new_field_name",
18            "description": "Mapping of old field names to new field names."
19        }
20    }
21}

src/main.ts

1// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/)
2import { Actor, ActorRun } from 'apify';
3
4interface Input {
5    renameMapping: { key: string, value: string }[],
6    datasetId?: string,
7
8    // Maybe take datasetId from webhook
9    payload?: { resource: ActorRun },
10    resource?: ActorRun,
11}
12
13// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()
14await Actor.init();
15
16// Structure of input is defined in input_schema.json
17const {
18    datasetId,
19    renameMapping = [],
20    payload,
21    resource,
22} = (await Actor.getInput<Input>())!;
23
24const realDatasetId = datasetId || payload?.resource?.defaultDatasetId || resource?.defaultDatasetId;
25
26const preDedupTransformFunction = (items: Record<string, unknown>[], { customInputData }: { customInputData: { renameMapping: Input['renameMapping'] } }) => {
27    return items.map((item) => {
28        const newFields: Record<string, unknown> = {};
29        customInputData.renameMapping.forEach(({ key: from, value: to }) => {
30            if (item[from] !== undefined) {
31                newFields[to] = item[from];
32                delete item[from];
33            }
34        });
35
36        // We want the renamed fields to be the first in that order
37        return {
38            ...newFields,
39            ...item,
40        };
41    });
42};
43
44const dedupActorInput = {
45    datasetIds: [realDatasetId],
46    preDedupTransformFunction,
47    customInputData: {
48        renameMapping,
49    },
50};
51
52if (Actor.isAtHome()) {
53    await Actor.metamorph('lukaskrivka/dedup-datasets', dedupActorInput);
54} else {
55    await Actor.call('lukaskrivka/dedup-datasets', dedupActorInput, { waitSecs: 0 });
56}
57
58// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit()
59await Actor.exit();
Developer
Maintained by Community

Actor Metrics

  • 2 monthly users

  • 1 star

  • Created in Aug 2024

  • Modified 5 months ago