OpenSearch Integration avatar
OpenSearch Integration

Pricing

Pay per usage

Go to Store
OpenSearch Integration

OpenSearch Integration

Developed by

Apify

Maintained by Apify

Transfer data from Apify Actors to Amazon OpenSearch Service. This Actor is a good starting point for building question-answering systems, search functionality, or Retrieval-Augmented Generation (RAG) use cases.

4.4 (5)

Pricing

Pay per usage

4

Monthly users

1

0

Last modified

5 days ago

You can access the OpenSearch Integration programmatically from your own applications by using the Apify API. You can also choose the language preference from below. To use the Apify API, you’ll need an Apify account and your API token, found in Integrations settings in Apify Console.

1{
2  "openapi": "3.0.1",
3  "info": {
4    "version": "0.1",
5    "x-build-id": "DTJRpPDfctH4CjJKQ"
6  },
7  "servers": [
8    {
9      "url": "https://api.apify.com/v2"
10    }
11  ],
12  "paths": {
13    "/acts/apify~opensearch-integration/run-sync-get-dataset-items": {
14      "post": {
15        "operationId": "run-sync-get-dataset-items-apify-opensearch-integration",
16        "x-openai-isConsequential": false,
17        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
18        "tags": [
19          "Run Actor"
20        ],
21        "requestBody": {
22          "required": true,
23          "content": {
24            "application/json": {
25              "schema": {
26                "$ref": "#/components/schemas/inputSchema"
27              }
28            }
29          }
30        },
31        "parameters": [
32          {
33            "name": "token",
34            "in": "query",
35            "required": true,
36            "schema": {
37              "type": "string"
38            },
39            "description": "Enter your Apify token here"
40          }
41        ],
42        "responses": {
43          "200": {
44            "description": "OK"
45          }
46        }
47      }
48    },
49    "/acts/apify~opensearch-integration/runs": {
50      "post": {
51        "operationId": "runs-sync-apify-opensearch-integration",
52        "x-openai-isConsequential": false,
53        "summary": "Executes an Actor and returns information about the initiated run in response.",
54        "tags": [
55          "Run Actor"
56        ],
57        "requestBody": {
58          "required": true,
59          "content": {
60            "application/json": {
61              "schema": {
62                "$ref": "#/components/schemas/inputSchema"
63              }
64            }
65          }
66        },
67        "parameters": [
68          {
69            "name": "token",
70            "in": "query",
71            "required": true,
72            "schema": {
73              "type": "string"
74            },
75            "description": "Enter your Apify token here"
76          }
77        ],
78        "responses": {
79          "200": {
80            "description": "OK",
81            "content": {
82              "application/json": {
83                "schema": {
84                  "$ref": "#/components/schemas/runsResponseSchema"
85                }
86              }
87            }
88          }
89        }
90      }
91    },
92    "/acts/apify~opensearch-integration/run-sync": {
93      "post": {
94        "operationId": "run-sync-apify-opensearch-integration",
95        "x-openai-isConsequential": false,
96        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
97        "tags": [
98          "Run Actor"
99        ],
100        "requestBody": {
101          "required": true,
102          "content": {
103            "application/json": {
104              "schema": {
105                "$ref": "#/components/schemas/inputSchema"
106              }
107            }
108          }
109        },
110        "parameters": [
111          {
112            "name": "token",
113            "in": "query",
114            "required": true,
115            "schema": {
116              "type": "string"
117            },
118            "description": "Enter your Apify token here"
119          }
120        ],
121        "responses": {
122          "200": {
123            "description": "OK"
124          }
125        }
126      }
127    }
128  },
129  "components": {
130    "schemas": {
131      "inputSchema": {
132        "type": "object",
133        "required": [
134          "openSearchUrl",
135          "awsAccessKeyId",
136          "awsSecretAccessKey",
137          "openSearchIndexName",
138          "embeddingsProvider",
139          "embeddingsApiKey",
140          "datasetFields"
141        ],
142        "properties": {
143          "openSearchUrl": {
144            "title": "OpenSearch URL",
145            "type": "string",
146            "description": "The URL of the Amazon OpenSearch Service instance to connect to"
147          },
148          "awsAccessKeyId": {
149            "title": "AWS Access Key ID",
150            "type": "string",
151            "description": "The AWS access key ID for the Amazon OpenSearch Service"
152          },
153          "awsSecretAccessKey": {
154            "title": "AWS Secret Access Key",
155            "type": "string",
156            "description": "The AWS secret access key for the Amazon OpenSearch Service"
157          },
158          "openSearchIndexName": {
159            "title": "OpenSearch Index Name",
160            "type": "string",
161            "description": "The name of the index in the Amazon OpenSearch Service where the data will be stored"
162          },
163          "autoCreateIndex": {
164            "title": "Auto-create index",
165            "type": "boolean",
166            "description": "When set to true, the integration will automatically create the index if it does not exist in the Amazon OpenSearch Service instance",
167            "default": true
168          },
169          "awsRegion": {
170            "title": "AWS Region",
171            "type": "string",
172            "description": "The AWS region where the Amazon OpenSearch Service instance is located",
173            "default": "us-east-1"
174          },
175          "awsServiceName": {
176            "title": "AWS Service Name",
177            "enum": [
178              "aoss",
179              "es"
180            ],
181            "type": "string",
182            "description": "The AWS service name for the Amazon OpenSearch Service",
183            "default": "aoss"
184          },
185          "useSsl": {
186            "title": "Use SSL",
187            "type": "boolean",
188            "description": "When set to true, the integration will use SSL to connect to the Amazon OpenSearch Service instance",
189            "default": true
190          },
191          "verifyCerts": {
192            "title": "Verify SSL certificates",
193            "type": "boolean",
194            "description": "When set to true, the integration will verify SSL certificates when connecting to the Amazon OpenSearch Service instance",
195            "default": true
196          },
197          "useAWS4Auth": {
198            "title": "Use AWS4 authentication",
199            "type": "boolean",
200            "description": "When enabled, the integration will use AWS4 authentication to connect to the Amazon OpenSearch Service instance.\n\nNote: If you are connecting to an OpenSearch Service instance that is not hosted on AWS, set this to false. In this case, AWS credentials are not required and will be ignored. You can provide dummy values for awsAccessKeyId and awsSecretAccessKey.",
201            "default": true
202          },
203          "embeddingsProvider": {
204            "title": "Embeddings provider (as defined in the langchain API)",
205            "enum": [
206              "OpenAI",
207              "Cohere"
208            ],
209            "type": "string",
210            "description": "Choose the embeddings provider to use for generating embeddings",
211            "default": "OpenAI"
212          },
213          "embeddingsConfig": {
214            "title": "Configuration for embeddings provider",
215            "type": "object",
216            "description": "Configure the parameters for the LangChain embedding class. Key points to consider:\n\n1. Typically, you only need to specify the model name. For example, for OpenAI, set the model name as {\"model\": \"text-embedding-3-small\"}.\n\n2. It's required to ensure that the vector size of your embeddings matches the size of embeddings in the database.\n\n3. Here are examples of embedding models:\n   - [OpenAI](https://platform.openai.com/docs/guides/embeddings): `text-embedding-3-small`, `text-embedding-3-large`, etc.\n   - [Cohere](https://docs.cohere.com/docs/cohere-embed): `embed-english-v3.0`, `embed-multilingual-light-v3.0`, etc.\n\n4. For more details about other parameters, refer to the [LangChain documentation](https://python.langchain.com/docs/integrations/text_embedding/)."
217          },
218          "embeddingsApiKey": {
219            "title": "Embeddings API KEY (whenever applicable, depends on provider)",
220            "type": "string",
221            "description": "Value of the API KEY for the embeddings provider (if required).\n\n For example for OpenAI it is OPENAI_API_KEY, for Cohere it is COHERE_API_KEY)"
222          },
223          "datasetFields": {
224            "title": "Dataset fields to select from the dataset results and store in the database",
225            "type": "array",
226            "description": "This array specifies the dataset fields to be selected and stored in the vector store. Only the fields listed here will be included in the vector store.\n\nFor instance, when using the Website Content Crawler, you might choose to include fields such as `text`, `url`, and `metadata.title` in the vector store.",
227            "default": [
228              "text"
229            ],
230            "items": {
231              "type": "string"
232            }
233          },
234          "metadataDatasetFields": {
235            "title": "Dataset fields to select from the dataset and store as metadata in the database",
236            "type": "object",
237            "description": "A list of dataset fields which should be selected from the dataset and stored as metadata in the vector stores.\n\nFor example, when using the Website Content Crawler, you might want to store `url` in metadata. In this case, use `metadataDatasetFields parameter as follows {\"url\": \"url\"}`"
238          },
239          "metadataObject": {
240            "title": "Custom object to be stored as metadata in the vector store database",
241            "type": "object",
242            "description": "This object allows you to store custom metadata for every item in the vector store.\n\nFor example, if you want to store the `domain` as metadata, use the `metadataObject` like this: {\"domain\": \"apify.com\"}."
243          },
244          "datasetId": {
245            "title": "Dataset ID",
246            "type": "string",
247            "description": "Dataset ID (when running standalone without integration)"
248          },
249          "dataUpdatesStrategy": {
250            "title": "Update strategy (add, upsert, deltaUpdates (default))",
251            "enum": [
252              "add",
253              "upsert",
254              "deltaUpdates"
255            ],
256            "type": "string",
257            "description": "Choose the update strategy for the integration. The update strategy determines how the integration updates the data in the database.\n\nThe available options are:\n\n- **Add data** (`add`):\n  - Always adds new records to the database.\n  - No checks for existing records or updates are performed.\n  - Useful when appending data without concern for duplicates.\n\n- **Upsert data** (`upsert`):\n  - Updates existing records if they match a key or identifier.\n  - Inserts new records into the database if they don't already exist.\n  - Ideal for ensuring the database contains the most up-to-date data, avoiding duplicates.\n\n- **Update changed data based on deltas** (`deltaUpdates`):\n  - Performs incremental updates by identifying differences (deltas) between the new dataset and the existing records.\n  - Only adds new records and updates those that have changed.\n  - Unchanged records are left untouched.\n  - Maximizes efficiency by reducing unnecessary updates.\n\nSelect the strategy that best fits your use case.",
258            "default": "deltaUpdates"
259          },
260          "dataUpdatesPrimaryDatasetFields": {
261            "title": "Dataset fields to uniquely identify dataset items (only relevant when dataUpdatesStrategy is `upsert` or `deltaUpdates`)",
262            "type": "array",
263            "description": "This array contains fields that are used to uniquely identify dataset items, which helps to handle content changes across different runs.\n\nFor instance, in a web content crawling scenario, the `url` field could serve as a unique identifier for each item.",
264            "default": [
265              "url"
266            ],
267            "items": {
268              "type": "string"
269            }
270          },
271          "enableDeltaUpdates": {
272            "title": "Enable incremental updates for objects based on deltas (deprecated)",
273            "type": "boolean",
274            "description": "When set to true, this setting enables incremental updates for objects in the database by comparing the changes (deltas) between the crawled dataset items and the existing objects, uniquely identified by the `datasetKeysToItemId` field.\n\n The integration will only add new objects and update those that have changed, reducing unnecessary updates. The `datasetFields`, `metadataDatasetFields`, and `metadataObject` fields are used to determine the changes.",
275            "default": true
276          },
277          "deleteExpiredObjects": {
278            "title": "Delete expired objects from the database",
279            "type": "boolean",
280            "description": "When set to true, delete objects from the database that have not been crawled for a specified period.",
281            "default": true
282          },
283          "expiredObjectDeletionPeriodDays": {
284            "title": "Delete expired objects from the database after a specified number of days",
285            "minimum": 0,
286            "type": "integer",
287            "description": "This setting allows the integration to manage the deletion of objects from the database that have not been crawled for a specified period. It is typically used in subsequent runs after the initial crawl.\n\nWhen the value is greater than 0, the integration checks if objects have been seen within the last X days (determined by the expiration period). If the objects are expired, they are deleted from the database. The specific value for `deletedExpiredObjectsDays` depends on your use case and how frequently you crawl data.\n\nFor example, if you crawl data daily, you can set `deletedExpiredObjectsDays` to 7 days. If you crawl data weekly, you can set `deletedExpiredObjectsDays` to 30 days.",
288            "default": 30
289          },
290          "performChunking": {
291            "title": "Enable text chunking",
292            "type": "boolean",
293            "description": "When set to true, the text will be divided into smaller chunks based on the settings provided below. Proper chunking helps optimize retrieval and ensures accurate and efficient responses.",
294            "default": true
295          },
296          "chunkSize": {
297            "title": "Maximum chunk size",
298            "minimum": 1,
299            "type": "integer",
300            "description": "Defines the maximum number of characters in each text chunk. Choosing the right size balances between detailed context and system performance. Optimal sizes ensure high relevancy and minimal response time.",
301            "default": 2000
302          },
303          "chunkOverlap": {
304            "title": "Chunk overlap",
305            "minimum": 0,
306            "type": "integer",
307            "description": "Specifies the number of overlapping characters between consecutive text chunks. Adjusting this helps maintain context across chunks, which is crucial for accuracy in retrieval-augmented generation systems.",
308            "default": 0
309          }
310        }
311      },
312      "runsResponseSchema": {
313        "type": "object",
314        "properties": {
315          "data": {
316            "type": "object",
317            "properties": {
318              "id": {
319                "type": "string"
320              },
321              "actId": {
322                "type": "string"
323              },
324              "userId": {
325                "type": "string"
326              },
327              "startedAt": {
328                "type": "string",
329                "format": "date-time",
330                "example": "2025-01-08T00:00:00.000Z"
331              },
332              "finishedAt": {
333                "type": "string",
334                "format": "date-time",
335                "example": "2025-01-08T00:00:00.000Z"
336              },
337              "status": {
338                "type": "string",
339                "example": "READY"
340              },
341              "meta": {
342                "type": "object",
343                "properties": {
344                  "origin": {
345                    "type": "string",
346                    "example": "API"
347                  },
348                  "userAgent": {
349                    "type": "string"
350                  }
351                }
352              },
353              "stats": {
354                "type": "object",
355                "properties": {
356                  "inputBodyLen": {
357                    "type": "integer",
358                    "example": 2000
359                  },
360                  "rebootCount": {
361                    "type": "integer",
362                    "example": 0
363                  },
364                  "restartCount": {
365                    "type": "integer",
366                    "example": 0
367                  },
368                  "resurrectCount": {
369                    "type": "integer",
370                    "example": 0
371                  },
372                  "computeUnits": {
373                    "type": "integer",
374                    "example": 0
375                  }
376                }
377              },
378              "options": {
379                "type": "object",
380                "properties": {
381                  "build": {
382                    "type": "string",
383                    "example": "latest"
384                  },
385                  "timeoutSecs": {
386                    "type": "integer",
387                    "example": 300
388                  },
389                  "memoryMbytes": {
390                    "type": "integer",
391                    "example": 1024
392                  },
393                  "diskMbytes": {
394                    "type": "integer",
395                    "example": 2048
396                  }
397                }
398              },
399              "buildId": {
400                "type": "string"
401              },
402              "defaultKeyValueStoreId": {
403                "type": "string"
404              },
405              "defaultDatasetId": {
406                "type": "string"
407              },
408              "defaultRequestQueueId": {
409                "type": "string"
410              },
411              "buildNumber": {
412                "type": "string",
413                "example": "1.0.0"
414              },
415              "containerUrl": {
416                "type": "string"
417              },
418              "usage": {
419                "type": "object",
420                "properties": {
421                  "ACTOR_COMPUTE_UNITS": {
422                    "type": "integer",
423                    "example": 0
424                  },
425                  "DATASET_READS": {
426                    "type": "integer",
427                    "example": 0
428                  },
429                  "DATASET_WRITES": {
430                    "type": "integer",
431                    "example": 0
432                  },
433                  "KEY_VALUE_STORE_READS": {
434                    "type": "integer",
435                    "example": 0
436                  },
437                  "KEY_VALUE_STORE_WRITES": {
438                    "type": "integer",
439                    "example": 1
440                  },
441                  "KEY_VALUE_STORE_LISTS": {
442                    "type": "integer",
443                    "example": 0
444                  },
445                  "REQUEST_QUEUE_READS": {
446                    "type": "integer",
447                    "example": 0
448                  },
449                  "REQUEST_QUEUE_WRITES": {
450                    "type": "integer",
451                    "example": 0
452                  },
453                  "DATA_TRANSFER_INTERNAL_GBYTES": {
454                    "type": "integer",
455                    "example": 0
456                  },
457                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
458                    "type": "integer",
459                    "example": 0
460                  },
461                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
462                    "type": "integer",
463                    "example": 0
464                  },
465                  "PROXY_SERPS": {
466                    "type": "integer",
467                    "example": 0
468                  }
469                }
470              },
471              "usageTotalUsd": {
472                "type": "number",
473                "example": 0.00005
474              },
475              "usageUsd": {
476                "type": "object",
477                "properties": {
478                  "ACTOR_COMPUTE_UNITS": {
479                    "type": "integer",
480                    "example": 0
481                  },
482                  "DATASET_READS": {
483                    "type": "integer",
484                    "example": 0
485                  },
486                  "DATASET_WRITES": {
487                    "type": "integer",
488                    "example": 0
489                  },
490                  "KEY_VALUE_STORE_READS": {
491                    "type": "integer",
492                    "example": 0
493                  },
494                  "KEY_VALUE_STORE_WRITES": {
495                    "type": "number",
496                    "example": 0.00005
497                  },
498                  "KEY_VALUE_STORE_LISTS": {
499                    "type": "integer",
500                    "example": 0
501                  },
502                  "REQUEST_QUEUE_READS": {
503                    "type": "integer",
504                    "example": 0
505                  },
506                  "REQUEST_QUEUE_WRITES": {
507                    "type": "integer",
508                    "example": 0
509                  },
510                  "DATA_TRANSFER_INTERNAL_GBYTES": {
511                    "type": "integer",
512                    "example": 0
513                  },
514                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
515                    "type": "integer",
516                    "example": 0
517                  },
518                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
519                    "type": "integer",
520                    "example": 0
521                  },
522                  "PROXY_SERPS": {
523                    "type": "integer",
524                    "example": 0
525                  }
526                }
527              }
528            }
529          }
530        }
531      }
532    }
533  }
534}

OpenSearch Integration OpenAPI definition

OpenAPI is a standard for designing and describing RESTful APIs, allowing developers to define API structure, endpoints, and data formats in a machine-readable way. It simplifies API development, integration, and documentation.

OpenAPI is effective when used with AI agents and GPTs by standardizing how these systems interact with various APIs, for reliable integrations and efficient communication.

By defining machine-readable API specifications, OpenAPI allows AI models like GPTs to understand and use varied data sources, improving accuracy. This accelerates development, reduces errors, and provides context-aware responses, making OpenAPI a core component for AI applications.

You can download the OpenAPI definitions for OpenSearch Integration from the options below:

If you’d like to learn more about how OpenAPI powers GPTs, read our blog post.

You can also check out our other API clients:

Pricing

Pricing model

Pay per usage

This Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.