From 3280699c84f9c11128dc84134ca0832f4347a879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Fri, 19 Jun 2026 14:00:39 +0200 Subject: [PATCH 1/5] feat: make `impit` the default HTTP client --- docs/guides/custom-http-client/custom-http-client.mdx | 4 ++-- packages/basic-crawler/package.json | 2 +- packages/basic-crawler/src/internals/basic-crawler.ts | 6 +++--- pnpm-lock.yaml | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/guides/custom-http-client/custom-http-client.mdx b/docs/guides/custom-http-client/custom-http-client.mdx index 9eb0918dbb7b..4e1b9f04c010 100644 --- a/docs/guides/custom-http-client/custom-http-client.mdx +++ b/docs/guides/custom-http-client/custom-http-client.mdx @@ -16,8 +16,8 @@ The `BasicCrawler` clas Crawlee provides several HTTP client implementations out of the box: -- **`GotScrapingHttpClient`** (default) - Uses the `got-scraping` library for browser-like requests with support for custom headers, browser fingerprints, and proxies. -- **`ImpitHttpClient`** - Uses the `impit` library for making requests that closely mimic browser behavior. +- **`ImpitHttpClient`** (default) - Uses the `impit` library for making requests that closely mimic browser behavior. +- **`GotScrapingHttpClient`** - Uses the `got-scraping` library for browser-like requests with support for custom headers, browser fingerprints, and proxies. This was the default HTTP client in Crawlee v3. - **`FetchHttpClient`** - Simple implementation using the native `fetch` API (does not support proxies). ## Implementing a custom HTTP client diff --git a/packages/basic-crawler/package.json b/packages/basic-crawler/package.json index 3d4c15d3115a..f55559283d4f 100644 --- a/packages/basic-crawler/package.json +++ b/packages/basic-crawler/package.json @@ -42,7 +42,7 @@ "@apify/timeout": "^0.3.2", "@apify/utilities": "^2.15.5", "@crawlee/core": "workspace:*", - "@crawlee/got-scraping-client": "workspace:*", + "@crawlee/impit-client": "workspace:^", "@crawlee/types": "workspace:*", "@crawlee/utils": "workspace:*", "csv-stringify": "^6.5.2", diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index eeb4d4adfea4..e5d5e2b33252 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -61,7 +61,7 @@ import { Statistics, validators, } from '@crawlee/core'; -import { GotScrapingHttpClient } from '@crawlee/got-scraping-client'; +import { ImpitHttpClient } from '@crawlee/impit-client'; import type { Awaitable, BaseHttpClient, @@ -377,7 +377,7 @@ export interface BasicCrawlerOptions< /** * HTTP client implementation for the `sendRequest` context helper and for plain HTTP crawling. - * Defaults to a new instance of {@apilink GotScrapingHttpClient} + * Defaults to a new instance of {@apilink ImpitHttpClient} */ httpClient?: BaseHttpClient; @@ -807,7 +807,7 @@ export class BasicCrawler< this.requestManager = new RequestManagerTandem(requestList, () => this.openOwnedRequestQueue()); } - this.httpClient = httpClient ?? new GotScrapingHttpClient({ logger: this.log }); + this.httpClient = httpClient ?? new ImpitHttpClient({ logger: this.log }); this.proxyConfiguration = proxyConfiguration; this.statusMessageLoggingInterval = statusMessageLoggingInterval; this.statusMessageCallback = statusMessageCallback as StatusMessageCallback; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2c9cf535b126..e75ffc4cd002 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -256,9 +256,9 @@ importers: '@crawlee/core': specifier: workspace:* version: link:../core - '@crawlee/got-scraping-client': - specifier: workspace:* - version: link:../got-scraping-client + '@crawlee/impit-client': + specifier: workspace:^ + version: link:../impit-client '@crawlee/types': specifier: workspace:* version: link:../types From 3642235085a0edc3ffb58e79a7ed7a66dd891014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Mon, 22 Jun 2026 14:33:30 +0200 Subject: [PATCH 2/5] chore: make `chrome` fingerprint default for `ImpitHttpClient` --- docs/package.json | 2 +- packages/impit-client/package.json | 2 +- packages/impit-client/src/index.ts | 7 +- pnpm-lock.yaml | 169 +++++++---------------------- 4 files changed, 46 insertions(+), 134 deletions(-) diff --git a/docs/package.json b/docs/package.json index 28eb417d0517..8f5150c62336 100644 --- a/docs/package.json +++ b/docs/package.json @@ -17,7 +17,7 @@ "@crawlee/stagehand": "workspace:*", "apify": "*", "crawlee": "workspace:*", - "impit": "^0.7.1", + "impit": "^0.14.2", "pino": "^9.6.0", "playwright-extra": "^4.3.6", "puppeteer-extra": "^3.3.6", diff --git a/packages/impit-client/package.json b/packages/impit-client/package.json index 3e362b1a3ca8..ff0cf69190c4 100644 --- a/packages/impit-client/package.json +++ b/packages/impit-client/package.json @@ -50,7 +50,7 @@ "@apify/datastructures": "^2.0.3", "@crawlee/http-client": "workspace:*", "@crawlee/types": "workspace:*", - "impit": "^0.14.1", + "impit": "^0.14.2", "tough-cookie": "^6.0.0" } } diff --git a/packages/impit-client/src/index.ts b/packages/impit-client/src/index.ts index 23c780146347..d9ae64839a02 100644 --- a/packages/impit-client/src/index.ts +++ b/packages/impit-client/src/index.ts @@ -97,12 +97,15 @@ export class ImpitHttpClient extends BaseHttpClient { private resolveImpitBrowser(fingerprint?: SessionFingerprint): ImpitBrowser | undefined { if (!fingerprint?.browser) return undefined; - const versions = IMPIT_VERSIONS_BY_BROWSER[fingerprint.browser]; - if (!versions?.length) return undefined; const cached = this.impitBrowserByFingerprint.get(fingerprint); if (cached) return cached; + // impit can only impersonate Chrome and Firefox. Map other (Chromium-based or + // unsupported) families like `edge`/`safari` onto Chrome so the request still + // carries realistic browser headers instead of impit's bare `*/*` defaults. + const versions = IMPIT_VERSIONS_BY_BROWSER[fingerprint.browser] ?? IMPIT_VERSIONS_BY_BROWSER.chrome!; + const picked = versions[Math.floor(Math.random() * versions.length)]; this.impitBrowserByFingerprint.set(fingerprint, picked); return picked; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e75ffc4cd002..5a41974882e8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -223,8 +223,8 @@ importers: specifier: workspace:* version: link:../packages/crawlee impit: - specifier: ^0.7.1 - version: 0.7.6 + specifier: ^0.14.2 + version: 0.14.2 pino: specifier: ^9.6.0 version: 9.14.0 @@ -605,8 +605,8 @@ importers: specifier: workspace:* version: link:../types impit: - specifier: ^0.14.1 - version: 0.14.1 + specifier: ^0.14.2 + version: 0.14.2 tough-cookie: specifier: ^6.0.0 version: 6.0.1 @@ -7903,14 +7903,8 @@ packages: cpu: [arm64] os: [darwin] - impit-darwin-arm64@0.14.1: - resolution: {integrity: sha512-K3KXto3gzR60kOFFxRmuxPNl3sPhdTLLpjBIW3RKD6vJFiGV5YH/T218WBECygPxRaIhESfsxfcOYrP53bHgFw==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [darwin] - - impit-darwin-arm64@0.7.6: - resolution: {integrity: sha512-M7NQXkttyzqilWfzVkNCp7hApT69m0etyJkVpHze4bR5z1kJnHhdsb8BSdDv2dzvZL4u1JyqZNxq+qoMn84eUw==} + impit-darwin-arm64@0.14.2: + resolution: {integrity: sha512-ChvxbJj893rWAhHXJ3kkGe8Pg4lsZwQ1Dt0w/noCkNe436gm0nIQ/eBBbwIJBsY4Ev7q6fgi/QerF2trSyUXmg==} engines: {node: '>= 10'} cpu: [arm64] os: [darwin] @@ -7921,14 +7915,8 @@ packages: cpu: [x64] os: [darwin] - impit-darwin-x64@0.14.1: - resolution: {integrity: sha512-k+w8SBHUtLX3KbzsCqSmcwitHfbOXXsLryL++KVahtFqU5D93T1buoTANnd63/mm5dNyadecIGG0MQ7SwvWx8g==} - engines: {node: '>= 10'} - cpu: [x64] - os: [darwin] - - impit-darwin-x64@0.7.6: - resolution: {integrity: sha512-kikTesWirAwJp9JPxzGLoGVc+heBlEabWS5AhTkQedACU153vmuL90OBQikVr3ul2N0LPImvnuB+51wV0zDE6g==} + impit-darwin-x64@0.14.2: + resolution: {integrity: sha512-r1g3WkwljcRgY1V0yBCUJ0/Sy6OHLc4RUrfz3mHi2X4WvkTk7aY17K4X+baBx+tcpOfE/ME/iWAY/PesOM9JVA==} engines: {node: '>= 10'} cpu: [x64] os: [darwin] @@ -7940,15 +7928,8 @@ packages: os: [linux] libc: [glibc] - impit-linux-arm64-gnu@0.14.1: - resolution: {integrity: sha512-yBT7h6cVHL/hefzI/qjJc22fKP386O0inReOpUzCnb4SGnwhEHvL9nJk3OzU2prO9vvKNKSb6vfRBcvQ8HHH1g==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [linux] - libc: [glibc] - - impit-linux-arm64-gnu@0.7.6: - resolution: {integrity: sha512-H6GHjVr/0lG9VEJr6IHF8YLq+YkSIOF4k7Dfue2ygzUAj1+jZ5ZwnouhG/XrZHYW6EWsZmEAjjRfWE56Q0wDRQ==} + impit-linux-arm64-gnu@0.14.2: + resolution: {integrity: sha512-JZJqUnEqFiktcXVgg/AIKkSv8SGu5zoQ7lf9CSvS0eEiwSlHnLUPwBiFc9/mSHDIUQkM9qlvJyGUiGqGEt9K6A==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] @@ -7961,15 +7942,8 @@ packages: os: [linux] libc: [musl] - impit-linux-arm64-musl@0.14.1: - resolution: {integrity: sha512-lFatihYntVIN814AnoNm/hSw5iONLKt6XtaAaJahZNdJOU+zsclcg9pbK0/61aENamkpIBpok/H8j5Sw/Xjuuw==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [linux] - libc: [musl] - - impit-linux-arm64-musl@0.7.6: - resolution: {integrity: sha512-1sCB/UBVXLZTpGJsXRdNNSvhN9xmmQcYLMWAAB4Itb7w684RHX1pLoCb6ichv7bfAf6tgaupcFIFZNBp3ghmQA==} + impit-linux-arm64-musl@0.14.2: + resolution: {integrity: sha512-ZsL9JAFEZBP3tvT8h4pkcg/b/MtHP8mZSgsncD6z9+qKM/IjqiDYwQunKaAYMwGu0EEbjtaU4C1zTK6ipnvwbA==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] @@ -7982,15 +7956,8 @@ packages: os: [linux] libc: [glibc] - impit-linux-x64-gnu@0.14.1: - resolution: {integrity: sha512-oioVQzHqSi68BzxRpVLbNv9HRgOxAA21QNlzCMyeEM3/c02MkURCQ1W66yAlEFOR1gaqPZDJi/ymnmIGk6+PjQ==} - engines: {node: '>= 10'} - cpu: [x64] - os: [linux] - libc: [glibc] - - impit-linux-x64-gnu@0.7.6: - resolution: {integrity: sha512-yYhlRnZ4fhKt8kuGe0JK2WSHc8TkR6BEH0wn+guevmu8EOn9Xu43OuRvkeOyVAkRqvFnlZtMyySUo/GuSLz9Gw==} + impit-linux-x64-gnu@0.14.2: + resolution: {integrity: sha512-3/keeSOCiByIfbOpmvixRXDlCdcyf112ru0fNl7AcpC1RtQz48ctsAn6R4+xEEnULO63By5VK9X0HNm1O2gUdw==} engines: {node: '>= 10'} cpu: [x64] os: [linux] @@ -8003,15 +7970,8 @@ packages: os: [linux] libc: [musl] - impit-linux-x64-musl@0.14.1: - resolution: {integrity: sha512-v7fBi405zZepQo0LqH51H+aXc/MYEtrQviIK+34kEqFBYOaSC9+wFZrs6A4WwrF9DaUuRZ3klisljwglN7rxDQ==} - engines: {node: '>= 10'} - cpu: [x64] - os: [linux] - libc: [musl] - - impit-linux-x64-musl@0.7.6: - resolution: {integrity: sha512-sdGWyu+PCLmaOXy7Mzo4WP61ZLl5qpZ1L+VeXW+Ycazgu0e7ox0NZLdiLRunIrEzD+h0S+e4CyzNwaiP3yIolg==} + impit-linux-x64-musl@0.14.2: + resolution: {integrity: sha512-BiUXZhj6lQOrZmeYB4HqYiBS5XvM/clrSVyZyae/CJshELUyYOLNxnTOxabd0tWR6J9ntC7bZwDrwskWD/TePA==} engines: {node: '>= 10'} cpu: [x64] os: [linux] @@ -8023,14 +7983,8 @@ packages: cpu: [arm64] os: [win32] - impit-win32-arm64-msvc@0.14.1: - resolution: {integrity: sha512-W4kPvvra8dXDdtJQbkxWPCCouUlZpOrLqHsJ7gc8CwAr69ljZqw6v+qB395nCD1rn3u32dV4d4fjBsyMdolQoA==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [win32] - - impit-win32-arm64-msvc@0.7.6: - resolution: {integrity: sha512-sM5deBqo0EuXg5GACBUMKEua9jIau/i34bwNlfrf/Amnw1n0GB4/RkuUh+sKiUcbNAntrRq+YhCq8qDP8IW19w==} + impit-win32-arm64-msvc@0.14.2: + resolution: {integrity: sha512-GQT3ITrIbh5P6FLzoHGuBpjrP/rzZoC3T0510+yCtLdQ1qQQzEIi1iCUElpCokyL4J/D4BdCRMS3wv3TzhcM4w==} engines: {node: '>= 10'} cpu: [arm64] os: [win32] @@ -8041,14 +7995,8 @@ packages: cpu: [x64] os: [win32] - impit-win32-x64-msvc@0.14.1: - resolution: {integrity: sha512-gwzWsaORyzq294t8CCxfHAmqulnQNQZK8ad8YM6oQaGVXeUFa8r/ZnR+PglfYcrYF/GbYwGrf9U8StUU8KFLmQ==} - engines: {node: '>= 10'} - cpu: [x64] - os: [win32] - - impit-win32-x64-msvc@0.7.6: - resolution: {integrity: sha512-ry63ADGLCB/PU/vNB1VioRt2V+klDJ34frJUXUZBEv1kA96HEAg9AxUk+604o+UHS3ttGH2rkLmrbwHOdAct5Q==} + impit-win32-x64-msvc@0.14.2: + resolution: {integrity: sha512-J6MpD0GzoMN9ydb2iT2oagE0Y4rbkKf8fDxrMh3/txp2sfzYDDfbcC4sySMm2b8QB4ERhB6N5YzcpGBAHbx/0w==} engines: {node: '>= 10'} cpu: [x64] os: [win32] @@ -8057,12 +8005,8 @@ packages: resolution: {integrity: sha512-968YrfzZN5CCgHs/n/yAbPgetq+bOreQOI9UQXmHK3srRs24g+m9CNGL8tRWUIZCK0tnc+baBJ0nw+8saHz0qw==} engines: {node: '>= 20'} - impit@0.14.1: - resolution: {integrity: sha512-ooFuGjrJzV16S5y4t7ICpfaAapNRrrUCv3cd5xIPKUqbbgKtx7CbVB+w+YRDMntoHwjuZ9ijI2AM8hMKISbvcw==} - engines: {node: '>= 20'} - - impit@0.7.6: - resolution: {integrity: sha512-AkS6Gv63+E6GMvBrcRhMmOREKpq5oJ0J5m3xwfkHiEs97UIsbpEqFmW3sFw/sdyOTDGRF5q4EjaLxtb922Ta8g==} + impit@0.14.2: + resolution: {integrity: sha512-8JlirJDFdrZg7a7nV00Jn5WO8K+X1FJQSVFrChxRyzkSAFv6mmGgriXS33wbJFUxpiOGY/MJpHY8hhmLfDcJDw==} engines: {node: '>= 20'} import-fresh@3.3.1: @@ -21407,73 +21351,49 @@ snapshots: impit-darwin-arm64@0.11.0: optional: true - impit-darwin-arm64@0.14.1: - optional: true - - impit-darwin-arm64@0.7.6: + impit-darwin-arm64@0.14.2: optional: true impit-darwin-x64@0.11.0: optional: true - impit-darwin-x64@0.14.1: - optional: true - - impit-darwin-x64@0.7.6: + impit-darwin-x64@0.14.2: optional: true impit-linux-arm64-gnu@0.11.0: optional: true - impit-linux-arm64-gnu@0.14.1: - optional: true - - impit-linux-arm64-gnu@0.7.6: + impit-linux-arm64-gnu@0.14.2: optional: true impit-linux-arm64-musl@0.11.0: optional: true - impit-linux-arm64-musl@0.14.1: - optional: true - - impit-linux-arm64-musl@0.7.6: + impit-linux-arm64-musl@0.14.2: optional: true impit-linux-x64-gnu@0.11.0: optional: true - impit-linux-x64-gnu@0.14.1: - optional: true - - impit-linux-x64-gnu@0.7.6: + impit-linux-x64-gnu@0.14.2: optional: true impit-linux-x64-musl@0.11.0: optional: true - impit-linux-x64-musl@0.14.1: - optional: true - - impit-linux-x64-musl@0.7.6: + impit-linux-x64-musl@0.14.2: optional: true impit-win32-arm64-msvc@0.11.0: optional: true - impit-win32-arm64-msvc@0.14.1: - optional: true - - impit-win32-arm64-msvc@0.7.6: + impit-win32-arm64-msvc@0.14.2: optional: true impit-win32-x64-msvc@0.11.0: optional: true - impit-win32-x64-msvc@0.14.1: - optional: true - - impit-win32-x64-msvc@0.7.6: + impit-win32-x64-msvc@0.14.2: optional: true impit@0.11.0: @@ -21487,27 +21407,16 @@ snapshots: impit-win32-arm64-msvc: 0.11.0 impit-win32-x64-msvc: 0.11.0 - impit@0.14.1: - optionalDependencies: - impit-darwin-arm64: 0.14.1 - impit-darwin-x64: 0.14.1 - impit-linux-arm64-gnu: 0.14.1 - impit-linux-arm64-musl: 0.14.1 - impit-linux-x64-gnu: 0.14.1 - impit-linux-x64-musl: 0.14.1 - impit-win32-arm64-msvc: 0.14.1 - impit-win32-x64-msvc: 0.14.1 - - impit@0.7.6: + impit@0.14.2: optionalDependencies: - impit-darwin-arm64: 0.7.6 - impit-darwin-x64: 0.7.6 - impit-linux-arm64-gnu: 0.7.6 - impit-linux-arm64-musl: 0.7.6 - impit-linux-x64-gnu: 0.7.6 - impit-linux-x64-musl: 0.7.6 - impit-win32-arm64-msvc: 0.7.6 - impit-win32-x64-msvc: 0.7.6 + impit-darwin-arm64: 0.14.2 + impit-darwin-x64: 0.14.2 + impit-linux-arm64-gnu: 0.14.2 + impit-linux-arm64-musl: 0.14.2 + impit-linux-x64-gnu: 0.14.2 + impit-linux-x64-musl: 0.14.2 + impit-win32-arm64-msvc: 0.14.2 + impit-win32-x64-msvc: 0.14.2 import-fresh@3.3.1: dependencies: From b65bdcdd5984983eee7c115f76556e95c4dd2d9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Mon, 22 Jun 2026 15:01:36 +0200 Subject: [PATCH 3/5] feat: make `impit-client` optional, switch to `FetchHttpClient` as a fallback --- packages/basic-crawler/package.json | 5 ++++- .../src/internals/basic-crawler.ts | 22 +++++++++++++++---- packages/crawlee/package.json | 1 + pnpm-lock.yaml | 11 ++++++++-- 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/packages/basic-crawler/package.json b/packages/basic-crawler/package.json index f55559283d4f..4ad24601472f 100644 --- a/packages/basic-crawler/package.json +++ b/packages/basic-crawler/package.json @@ -42,7 +42,7 @@ "@apify/timeout": "^0.3.2", "@apify/utilities": "^2.15.5", "@crawlee/core": "workspace:*", - "@crawlee/impit-client": "workspace:^", + "@crawlee/http-client": "workspace:^", "@crawlee/types": "workspace:*", "@crawlee/utils": "workspace:*", "csv-stringify": "^6.5.2", @@ -51,5 +51,8 @@ "tldts": "^7.0.6", "tslib": "^2.8.1", "type-fest": "^4.41.0" + }, + "optionalDependencies": { + "@crawlee/impit-client": "workspace:^" } } diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index e5d5e2b33252..4e9fbabb20ea 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -61,7 +61,7 @@ import { Statistics, validators, } from '@crawlee/core'; -import { ImpitHttpClient } from '@crawlee/impit-client'; +import { FetchHttpClient } from '@crawlee/http-client'; import type { Awaitable, BaseHttpClient, @@ -377,7 +377,7 @@ export interface BasicCrawlerOptions< /** * HTTP client implementation for the `sendRequest` context helper and for plain HTTP crawling. - * Defaults to a new instance of {@apilink ImpitHttpClient} + * Defaults to {@apilink ImpitHttpClient} when `@crawlee/impit-client` is installed, otherwise {@apilink FetchHttpClient}. */ httpClient?: BaseHttpClient; @@ -637,7 +637,7 @@ export class BasicCrawler< protected additionalHttpErrorStatusCodes: Set; protected ignoreHttpErrorStatusCodes: Set; protected autoscaledPoolOptions: AutoscaledPoolOptions; - protected httpClient: BaseHttpClient; + protected httpClient!: BaseHttpClient; protected retryOnBlocked: boolean; protected respectRobotsTxtFile: boolean | { userAgent?: string }; protected onSkippedRequest?: SkippedRequestCallback; @@ -807,7 +807,9 @@ export class BasicCrawler< this.requestManager = new RequestManagerTandem(requestList, () => this.openOwnedRequestQueue()); } - this.httpClient = httpClient ?? new ImpitHttpClient({ logger: this.log }); + if (httpClient) { + this.httpClient = httpClient; + } this.proxyConfiguration = proxyConfiguration; this.statusMessageLoggingInterval = statusMessageLoggingInterval; this.statusMessageCallback = statusMessageCallback as StatusMessageCallback; @@ -1707,6 +1709,18 @@ export class BasicCrawler< this._closeEvents = true; } + if (!this.httpClient) { + try { + const { ImpitHttpClient } = await import('@crawlee/impit-client'); + this.httpClient = new ImpitHttpClient({ logger: this.log }); + } catch { + this.log.warning( + 'Optional dependency @crawlee/impit-client is not installed. Falling back to native fetch — proxy support and browser fingerprinting are unavailable.', + ); + this.httpClient = new FetchHttpClient(); + } + } + // Initialize AutoscaledPool before awaiting _loadHandledRequestCount(), // so that the caller can get a reference to it before awaiting the promise returned from run() // (otherwise there would be no way) diff --git a/packages/crawlee/package.json b/packages/crawlee/package.json index 66e52eaa2749..a718f5ad185b 100644 --- a/packages/crawlee/package.json +++ b/packages/crawlee/package.json @@ -49,6 +49,7 @@ }, "dependencies": { "@crawlee/basic": "workspace:*", + "@crawlee/impit-client": "workspace:*", "@crawlee/browser": "workspace:*", "@crawlee/browser-pool": "workspace:*", "@crawlee/cheerio": "workspace:*", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5a41974882e8..c0798e5831cf 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -256,9 +256,9 @@ importers: '@crawlee/core': specifier: workspace:* version: link:../core - '@crawlee/impit-client': + '@crawlee/http-client': specifier: workspace:^ - version: link:../impit-client + version: link:../http-client '@crawlee/types': specifier: workspace:* version: link:../types @@ -283,6 +283,10 @@ importers: type-fest: specifier: ^4.41.0 version: 4.41.0 + optionalDependencies: + '@crawlee/impit-client': + specifier: workspace:^ + version: link:../impit-client packages/browser-crawler: dependencies: @@ -496,6 +500,9 @@ importers: '@crawlee/http': specifier: workspace:* version: link:../http-crawler + '@crawlee/impit-client': + specifier: workspace:* + version: link:../impit-client '@crawlee/jsdom': specifier: workspace:* version: link:../jsdom-crawler From 6d6b0306d29b40570cc8627c53a183b52fad6e3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Mon, 22 Jun 2026 15:41:24 +0200 Subject: [PATCH 4/5] feat: use lazy-initialized HTTP client in `BasicCrawler` --- .../src/internals/basic-crawler.ts | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index 4e9fbabb20ea..537425c93282 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -41,6 +41,7 @@ import { EnqueueStrategy, EventType, KeyValueStore, + log, LogLevel, mergeCookies, MissingSessionError, @@ -86,6 +87,28 @@ import { cryptoRandomObjectId } from '@apify/utilities'; import { createSendRequest } from './send-request.js'; +class LazyDefaultHttpClient implements BaseHttpClient { + private _delegate?: BaseHttpClient; + private readonly _delegatePromise: Promise; + + constructor(options?: { logger?: CrawleeLogger }) { + this._delegatePromise = import('@crawlee/impit-client') + .then(({ ImpitHttpClient }) => new ImpitHttpClient(options)) + .catch(() => { + (options?.logger ?? log).warning( + 'Optional dependency @crawlee/impit-client is not installed. ' + + 'Falling back to native fetch — proxy support and browser fingerprinting are unavailable.', + ); + return new FetchHttpClient(options); + }); + } + + async sendRequest(...args: Parameters): Promise { + this._delegate ??= await this._delegatePromise; + return this._delegate.sendRequest(...args); + } +} + export interface BasicCrawlingContext extends CrawlingContext {} /** @@ -637,7 +660,7 @@ export class BasicCrawler< protected additionalHttpErrorStatusCodes: Set; protected ignoreHttpErrorStatusCodes: Set; protected autoscaledPoolOptions: AutoscaledPoolOptions; - protected httpClient!: BaseHttpClient; + protected httpClient: BaseHttpClient; protected retryOnBlocked: boolean; protected respectRobotsTxtFile: boolean | { userAgent?: string }; protected onSkippedRequest?: SkippedRequestCallback; @@ -807,9 +830,7 @@ export class BasicCrawler< this.requestManager = new RequestManagerTandem(requestList, () => this.openOwnedRequestQueue()); } - if (httpClient) { - this.httpClient = httpClient; - } + this.httpClient = httpClient ?? new LazyDefaultHttpClient({ logger: this.log }); this.proxyConfiguration = proxyConfiguration; this.statusMessageLoggingInterval = statusMessageLoggingInterval; this.statusMessageCallback = statusMessageCallback as StatusMessageCallback; @@ -1709,18 +1730,6 @@ export class BasicCrawler< this._closeEvents = true; } - if (!this.httpClient) { - try { - const { ImpitHttpClient } = await import('@crawlee/impit-client'); - this.httpClient = new ImpitHttpClient({ logger: this.log }); - } catch { - this.log.warning( - 'Optional dependency @crawlee/impit-client is not installed. Falling back to native fetch — proxy support and browser fingerprinting are unavailable.', - ); - this.httpClient = new FetchHttpClient(); - } - } - // Initialize AutoscaledPool before awaiting _loadHandledRequestCount(), // so that the caller can get a reference to it before awaiting the promise returned from run() // (otherwise there would be no way) From 4990c5e8c086d6335dba3375ff03220361dcd0d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Tue, 23 Jun 2026 10:12:32 +0200 Subject: [PATCH 5/5] chore: simplify promise resolution --- packages/basic-crawler/src/internals/basic-crawler.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index 537425c93282..a48883e72ce6 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -88,7 +88,6 @@ import { cryptoRandomObjectId } from '@apify/utilities'; import { createSendRequest } from './send-request.js'; class LazyDefaultHttpClient implements BaseHttpClient { - private _delegate?: BaseHttpClient; private readonly _delegatePromise: Promise; constructor(options?: { logger?: CrawleeLogger }) { @@ -104,8 +103,7 @@ class LazyDefaultHttpClient implements BaseHttpClient { } async sendRequest(...args: Parameters): Promise { - this._delegate ??= await this._delegatePromise; - return this._delegate.sendRequest(...args); + return (await this._delegatePromise).sendRequest(...args); } }