-
-
Notifications
You must be signed in to change notification settings - Fork 734
fix: preserve CRLF line endings in TextFileHandler.read() #438
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ import { | |
| FileResult, | ||
| FileInfo | ||
| } from './base.js'; | ||
| import { detectLineEnding, normalizeLineEndings, type LineEndingStyle } from '../lineEndingHandler.js'; | ||
|
|
||
| // TODO: Centralize these constants with filesystem.ts to avoid silent drift | ||
| // These duplicate concepts from filesystem.ts and should be moved to a shared | ||
|
|
@@ -55,8 +56,31 @@ export class TextFileHandler implements FileHandler { | |
| const length = options?.length ?? 1000; // Default from config | ||
| const includeStatusMessage = options?.includeStatusMessage ?? true; | ||
|
|
||
| // Binary detection is done at factory level - just read as text | ||
| return this.readFileWithSmartPositioning(filePath, offset, length, 'text/plain', includeStatusMessage); | ||
| // Detect original line ending before reading. | ||
| // readline strips \r\n → \n; we need to restore the original style | ||
| // so that downstream consumers (e.g. write_file) preserve line endings. | ||
| const lineEnding = await this.detectFileLineEnding(filePath); | ||
| const endsWithNewline = await this.fileEndsWithNewline(filePath); | ||
|
|
||
| const result = await this.readFileWithSmartPositioning(filePath, offset, length, 'text/plain', includeStatusMessage); | ||
|
|
||
| // Restore original line endings if the file uses CRLF or CR | ||
| if (lineEnding !== '\n' && typeof result.content === 'string') { | ||
| result.content = normalizeLineEndings(result.content, lineEnding); | ||
| } | ||
|
|
||
| // Restore trailing newline stripped by readline. | ||
| // When we haven't reached EOF, the last line we read definitely had | ||
| // a newline after it (there's more content following in the file). | ||
| // When we have reached EOF, only restore if the original file ended with one. | ||
| if (typeof result.content === 'string' && !result.content.endsWith(lineEnding)) { | ||
| const reachedEOF = result.metadata?.reachedEOF !== false; | ||
| if (!reachedEOF || endsWithNewline) { | ||
| result.content += lineEnding; | ||
| } | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| return result; | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| async write(path: string, content: string, mode: 'rewrite' | 'append' = 'rewrite'): Promise<void> { | ||
|
|
@@ -131,6 +155,67 @@ export class TextFileHandler implements FileHandler { | |
| return undefined; | ||
| } | ||
|
|
||
| /** | ||
| * Detect line ending style by scanning file chunks until a newline is found. | ||
| * Reads in 8KB chunks up to 64KB to handle files whose first line exceeds 8KB. | ||
| */ | ||
| private async detectFileLineEnding(filePath: string): Promise<LineEndingStyle> { | ||
| const CHUNK_SIZE = 8192; | ||
| const MAX_SCAN = 65536; // 64KB cap | ||
| const fd = await fs.open(filePath, 'r'); | ||
| try { | ||
| const buffer = Buffer.alloc(CHUNK_SIZE); | ||
| let position = 0; | ||
| let prevEndedWithCR = false; | ||
| while (position < MAX_SCAN) { | ||
| const { bytesRead } = await fd.read(buffer, 0, CHUNK_SIZE, position); | ||
| if (bytesRead === 0) { | ||
| return prevEndedWithCR ? '\r' : '\n'; | ||
| } | ||
| const chunk = buffer.toString('utf-8', 0, bytesRead); | ||
| // Handle \r at previous chunk boundary followed by \n here | ||
| if (prevEndedWithCR) { | ||
| return chunk[0] === '\n' ? '\r\n' : '\r'; | ||
| } | ||
| for (let i = 0; i < chunk.length; i++) { | ||
| if (chunk[i] === '\r') { | ||
| if (i + 1 < chunk.length) { | ||
| return chunk[i + 1] === '\n' ? '\r\n' : '\r'; | ||
| } | ||
| // \r at end of chunk — check next chunk for \n | ||
| prevEndedWithCR = true; | ||
| break; | ||
| } | ||
| if (chunk[i] === '\n') { | ||
| return '\n'; | ||
| } | ||
| } | ||
| position += bytesRead; | ||
| } | ||
| // No newline found within cap — default to LF | ||
| return '\n'; | ||
|
coderabbitai[bot] marked this conversation as resolved.
Comment on lines
+164
to
+196
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: The line-ending detector stops scanning after 64KB and defaults to LF if no newline is found in that window. For files whose first newline appears after that limit (for example, very long first lines), CRLF/CR files will be misdetected as LF, so returned content will still be normalized to LF and the original endings will be lost. Continue scanning until the first newline or EOF (or make the cap large/adaptive and fall back to a safer strategy) instead of forcing LF at 64KB. [logic error] Severity Level: Major
|
||
| } finally { | ||
| await fd.close(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Check whether the file ends with a newline character (\n or \r). | ||
| * Used to restore trailing newlines stripped by readline. | ||
| */ | ||
| private async fileEndsWithNewline(filePath: string): Promise<boolean> { | ||
| const stats = await fs.stat(filePath); | ||
| if (stats.size === 0) return false; | ||
| const fd = await fs.open(filePath, 'r'); | ||
| try { | ||
| const buf = Buffer.alloc(1); | ||
| await fd.read(buf, 0, 1, stats.size - 1); | ||
| return buf[0] === 0x0A || buf[0] === 0x0D; // \n or \r | ||
| } finally { | ||
| await fd.close(); | ||
| } | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| /** | ||
| * Generate enhanced status message | ||
| */ | ||
|
|
@@ -283,7 +368,7 @@ export class TextFileHandler implements FileHandler { | |
| ? `${this.generateEnhancedStatusMessage(result.length, -n, fileTotalLines, true)}\n\n${result.join('\n')}` | ||
| : result.join('\n'); | ||
|
|
||
| return { content, mimeType, metadata: {} }; | ||
| return { content, mimeType, metadata: { reachedEOF: true } }; | ||
| } finally { | ||
| await fd.close(); | ||
| } | ||
|
|
@@ -330,7 +415,7 @@ export class TextFileHandler implements FileHandler { | |
| ? `${this.generateEnhancedStatusMessage(result.length, -requestedLines, fileTotalLines, true)}\n\n${result.join('\n')}` | ||
| : result.join('\n'); | ||
|
|
||
| return { content, mimeType, metadata: {} }; | ||
| return { content, mimeType, metadata: { reachedEOF: true } }; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -351,12 +436,15 @@ export class TextFileHandler implements FileHandler { | |
|
|
||
| const result: string[] = []; | ||
| let lineNumber = 0; | ||
| let reachedEOF = true; | ||
|
|
||
| for await (const line of rl) { | ||
| if (lineNumber >= offset && result.length < length) { | ||
| result.push(line); | ||
| } else if (result.length >= length) { | ||
| reachedEOF = false; | ||
| break; | ||
| } | ||
| if (result.length >= length) break; | ||
| lineNumber++; | ||
| } | ||
|
|
||
|
|
@@ -365,10 +453,10 @@ export class TextFileHandler implements FileHandler { | |
| if (includeStatusMessage) { | ||
| const statusMessage = this.generateEnhancedStatusMessage(result.length, offset, fileTotalLines, false); | ||
| const content = `${statusMessage}\n\n${result.join('\n')}`; | ||
| return { content, mimeType, metadata: {} }; | ||
| return { content, mimeType, metadata: { reachedEOF } }; | ||
| } else { | ||
| const content = result.join('\n'); | ||
| return { content, mimeType, metadata: {} }; | ||
| return { content, mimeType, metadata: { reachedEOF } }; | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -421,6 +509,7 @@ export class TextFileHandler implements FileHandler { | |
|
|
||
| const result: string[] = []; | ||
| let firstLineSkipped = false; | ||
| let reachedEOF = true; | ||
|
|
||
| for await (const line of rl2) { | ||
| if (!firstLineSkipped && startPosition > 0) { | ||
|
|
@@ -431,6 +520,7 @@ export class TextFileHandler implements FileHandler { | |
| if (result.length < length) { | ||
| result.push(line); | ||
| } else { | ||
| reachedEOF = false; | ||
| break; | ||
| } | ||
| } | ||
|
|
@@ -441,7 +531,7 @@ export class TextFileHandler implements FileHandler { | |
| ? `${this.generateEnhancedStatusMessage(result.length, offset, fileTotalLines, false)}\n\n${result.join('\n')}` | ||
| : result.join('\n'); | ||
|
|
||
| return { content, mimeType, metadata: {} }; | ||
| return { content, mimeType, metadata: { reachedEOF } }; | ||
| } finally { | ||
| await fd.close(); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggestion: This normalizes the entire returned payload to a single line-ending style, which rewrites files that contain mixed line endings. In a read→write roundtrip, mixed-ending files will be silently homogenized instead of preserved, causing unintended file diffs and content drift. Preserve per-line endings (or skip normalization when mixed endings are detected) instead of forcing one style. [logic error]
Severity Level: Major⚠️
Steps of Reproduction ✅
Fix in Cursor | Fix in VSCode Claude
(Use Cmd/Ctrl + Click for best experience)
Prompt for AI Agent 🤖