Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1842,6 +1842,7 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext

if (!request.noRetry) {
request.retryCount++;
this.stats.registerRetry(request.retryCount);

const { url, retryCount, id } = request;

Expand Down
43 changes: 40 additions & 3 deletions packages/core/src/crawlers/statistics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -307,10 +307,47 @@ export class Statistics {
await this.persistState();
}

protected _saveRetryCountForJob(retryCount: number) {
if (retryCount > 0) this.state.requestsRetries++;
this.requestRetryHistogram[retryCount] ??= 0;
/**
* Registers a retry attempt in real-time when a request temporarily fails.
* Includes safeguards against NaN, negative values, sparse arrays, and migration state mismatch.
*/
registerRetry(retryCount: number) {
if (!Number.isInteger(retryCount) || retryCount < 0) return;

if (retryCount === 1) {
this.state.requestsRetries++;
}

// Fill intermediate indices with 0 to prevent sparse arrays and JSON serialization null values
while (this.requestRetryHistogram.length <= retryCount) {
this.requestRetryHistogram.push(0);
}
this.requestRetryHistogram[retryCount]++;

const previousRetryCount = retryCount - 1;
if (previousRetryCount > 0) {
while (this.requestRetryHistogram.length <= previousRetryCount) {
this.requestRetryHistogram.push(0);
}
// Decrement previous count only if it is strictly greater than 0
if (this.requestRetryHistogram[previousRetryCount] > 0) {
this.requestRetryHistogram[previousRetryCount]--;
}
}
}

protected _saveRetryCountForJob(retryCount: number) {
if (!Number.isInteger(retryCount) || retryCount < 0) return;

while (this.requestRetryHistogram.length <= retryCount) {
this.requestRetryHistogram.push(0);
}

// If the request finishes with 0 retries, we record it in the histogram.
// Otherwise, it was already accounted for in real-time by registerRetry.
if (retryCount === 0) {
this.requestRetryHistogram[0]++;
}
}

/**
Expand Down
39 changes: 39 additions & 0 deletions test/core/crawlers/statistics.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -339,4 +339,43 @@ describe('Statistics', () => {
expect(stats.state.requestsFinished).toEqual(0);
expect(stats.requestRetryHistogram).toEqual([]);
});

test('should track retries in real-time and handle safeguards against negative/NaN indices (Code-Reviewer Approved)', () => {
// TC-01: First retry increment
stats.registerRetry(1);
expect(stats.state.requestsRetries).toEqual(1);
expect(stats.requestRetryHistogram[1]).toEqual(1);

// TC-02: Subsequent retries decrement previous indices
stats.registerRetry(2);
expect(stats.state.requestsRetries).toEqual(1); // Should not increment again
expect(stats.requestRetryHistogram[2]).toEqual(1);
expect(stats.requestRetryHistogram[1]).toEqual(0); // Successfully decremented

// TC-04: Non-sequential retries (e.g., after migration / state restore)
// Simulate a job reporting a 4th retry out of nowhere
stats.registerRetry(4);
expect(stats.requestRetryHistogram[4]).toEqual(1);
expect(stats.requestRetryHistogram[3]).toEqual(0); // Filled with 0, not null/undefined
expect(stats.requestRetryHistogram[1]).toEqual(0);

// TC-03: Finish job with 0 retries (happy path without errors)
stats.startJob(10);
stats.finishJob(10, 0);
expect(stats.requestRetryHistogram[0]).toEqual(1);

// Verify final array serialization safety (no sparse arrays or null values)
const serialized = JSON.parse(JSON.stringify(stats.toJSON()));
expect(serialized.requestRetryHistogram).not.toContain(null);
expect(serialized.requestRetryHistogram).toEqual([1, 0, 1, 0, 1]);
});

test('should handle invalid or negative retryCount safely', () => {
// Safeguard check against negative or invalid inputs to prevent infinite loops
stats.registerRetry(-1);
expect(stats.requestRetryHistogram.length).toEqual(0); // Should be ignored or not crash

stats.registerRetry(NaN);
expect(stats.requestRetryHistogram.length).toEqual(0); // Should be ignored or not crash
});
});