> ## Documentation Index
> Fetch the complete documentation index at: https://intunedhq.com/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# extractArrayFromPage

<Warning> **Deprecated:** This function is deprecated and will be removed in the future. </Warning>

Extracts an array of structured data from a web page in an optimized way. This function uses AI for the first few extractions until it collects multiple examples, then builds reliable selectors in the background for improved efficiency.

```typescript theme={null}
export declare function extractArrayFromPage(
  page: Page,
  options: {
    label: string;
    itemEntityName: string;
    itemEntitySchema: SimpleArrayItemSchema;
    strategy?: ImageStrategy | HtmlStrategy;
    prompt?: string;
    optionalPropertiesInvalidator?: (
      result: Record<string, string>[]
    ) => string[];
    variantKey?: string;
    apiKey?: string;
  }
): Promise<Record<string, string>[]>;
```

## Examples

<CodeGroup>
  ```typescript extractArrayFromPage theme={null}
  import { extractArrayFromPage } from "@intuned/browser/optimized-extractors";

  await page.goto("https://books.toscrape.com/");
  const books = await extractArrayFromPage(page, {
    strategy: {
      model: "gpt4-turbo",
      type: "HTML",
    },
    itemEntityName: "book",
    label: "books-extraction",
    itemEntitySchema: {
      type: "object",
      required: ["name"],
      properties: {
        name: {
          type: "string",
          description: "book name",
          primary: true,
        },
      },
    },
  });

  console.log(books);

  // output:
  // [
  // ...
  // { name: 'Olio' },
  // { name: 'Mesaerion: The Best Science Fiction Stories 1800-1849' },
  // { name: 'Libertarianism for Beginners' },
  // { name: "It's Only the Himalayas" }
  // ...
  // ]
  ```
</CodeGroup>

## Arguments

<ResponseField name="page" type="any" required>
  The Playwright Page object from which to extract the data.
</ResponseField>

<ResponseField name="options" type="object" required>
  <Expandable title="properties" defaultOpen>
    <ResponseField name="options.label" type="any" required>
      A label for this extraction process, used for billing and monitoring.
    </ResponseField>

    <ResponseField name="options.itemEntityName" type="any" required>
      The name of the entity items being extracted. Must be 1–50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
    </ResponseField>

    <ResponseField name="options.itemEntitySchema" type="any" required>
      The schema of the entity items being extracted.
    </ResponseField>

    <ResponseField name="options.strategy" type="any" required>
      Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
    </ResponseField>

    <ResponseField name="options.prompt" type="any" required>
      Optional. A prompt to guide the extraction process.
    </ResponseField>

    <ResponseField name="options.optionalPropertiesInvalidator" type="any" required>
      Optional. A function to invalidate optional properties.
    </ResponseField>

    <ResponseField name="options.variantKey" type="any" required>
      Optional. A variant key for the extraction process, use this when the page has multiple variants/shapes.
    </ResponseField>

    <ResponseField name="options.apiKey" type="any" required>
      Optional. An API key for AI extraction. Extractions made with your API key won't be billed to your account.
    </ResponseField>
  </Expandable>
</ResponseField>

## Returns: `any`

A promise that resolves to a list of extracted data.
