diff --git a/docs/9.0/connections/instantiation.md b/docs/9.0/connections/instantiation.md
index 72a55b6b..160cbc12 100644
--- a/docs/9.0/connections/instantiation.md
+++ b/docs/9.0/connections/instantiation.md
@@ -61,6 +61,7 @@ Alternatively, you can use the fromStream method.
```php
public static AbstractCsv::fromStream(SplFileObject|resource $stream): self
```
+
Creates a new object from a stream resource or a streaming object.
```php
diff --git a/docs/9.0/reader/record-mapping.md b/docs/9.0/reader/record-mapping.md
index 4381c0c2..368a8c70 100644
--- a/docs/9.0/reader/record-mapping.md
+++ b/docs/9.0/reader/record-mapping.md
@@ -8,7 +8,7 @@ description: Converts your CSV records into PHP objects using PHP's powerful Ref
New in version 9.12.0
-If you are working with a class which implements the `TabularDataReader` interface you can now deserialize
+If you are working with a class which implements the `TabularData` interface you can now deserialize
your data using the `TabularDataReader::getRecordsAsObject` method. The method will convert your document records
into objects using PHP's powerful Reflection API.
diff --git a/phpstan-build.neon b/phpstan-build.neon
index 563573c0..b30d4878 100644
--- a/phpstan-build.neon
+++ b/phpstan-build.neon
@@ -19,4 +19,6 @@ parameters:
treatPhpDocTypesAsCertain: false
parallel:
processTimeout: 300.0
+ bootstrapFiles:
+ - vendor/autoload.php
diff --git a/phpstan.neon b/phpstan.neon
index 84028aac..81f12199 100644
--- a/phpstan.neon
+++ b/phpstan.neon
@@ -22,4 +22,3 @@ parameters:
treatPhpDocTypesAsCertain: false
parallel:
processTimeout: 300.0
-
diff --git a/src/Buffer.php b/src/Buffer.php
index b25c5df0..cb40d7ba 100644
--- a/src/Buffer.php
+++ b/src/Buffer.php
@@ -18,6 +18,8 @@
use Iterator;
use League\Csv\Query\Constraint\Criteria;
use League\Csv\Query\Predicate;
+use League\Csv\Schema\Inspector;
+use League\Csv\Schema\Schema;
use League\Csv\Serializer\Denormalizer;
use League\Csv\Serializer\MappingFailed;
use League\Csv\Serializer\TypeCastingFailed;
@@ -203,6 +205,35 @@ public function map(callable $callback): Iterator
return MapIterator::fromIterable($this->getRecords(), $callback);
}
+ /**
+ * @param callable(TInitial|null, array, array-key=): TInitial $callback
+ * @param TInitial|null $initial
+ *
+ * @template TInitial
+ *
+ * @throws SyntaxError
+ *
+ * @return TInitial|null
+ */
+ public function reduce(callable $callback, mixed $initial = null): mixed
+ {
+ foreach ($this->getRecords() as $offset => $record) {
+ $initial = $callback($initial, $record, $offset);
+ }
+
+ return $initial;
+ }
+
+ public function inferSchema(?Inspector $inspector = null, array $header = []): Schema
+ {
+ return ($inspector ?? Inspector::default())->schema($this, $header);
+ }
+
+ public function inferRecords(?Inspector $inspector = null, array $header = []): Iterator
+ {
+ return $this->inferSchema($inspector, $header)->parse($this);
+ }
+
/**
* @param non-negative-int $nth
*
diff --git a/src/Reader.php b/src/Reader.php
index 678f2348..7e41ae2d 100644
--- a/src/Reader.php
+++ b/src/Reader.php
@@ -18,6 +18,8 @@
use Deprecated;
use Iterator;
use JsonSerializable;
+use League\Csv\Schema\Inspector;
+use League\Csv\Schema\Schema;
use League\Csv\Serializer\Denormalizer;
use League\Csv\Serializer\MappingFailed;
use League\Csv\Serializer\TypeCastingFailed;
@@ -416,6 +418,16 @@ public function map(callable $callback): Iterator
return MapIterator::fromIterable($this, $callback);
}
+ public function inferSchema(?Inspector $inspector = null, array $header = []): Schema
+ {
+ return ($inspector ?? Inspector::default())->schema($this, $header);
+ }
+
+ public function inferRecords(?Inspector $inspector = null, array $header = []): Iterator
+ {
+ return $this->inferSchema($inspector, $header)->parse($this);
+ }
+
/**
* @param positive-int $recordsCount
*
diff --git a/src/ResultSet.php b/src/ResultSet.php
index fecc439e..6b4a73e2 100644
--- a/src/ResultSet.php
+++ b/src/ResultSet.php
@@ -20,6 +20,8 @@
use Generator;
use Iterator;
use JsonSerializable;
+use League\Csv\Schema\Inspector;
+use League\Csv\Schema\Schema;
use League\Csv\Serializer\Denormalizer;
use League\Csv\Serializer\MappingFailed;
use League\Csv\Serializer\TypeCastingFailed;
@@ -206,6 +208,16 @@ public function map(callable $callback): Iterator
return MapIterator::fromIterable($this, $callback);
}
+ public function inferSchema(?Inspector $inspector = null, array $header = []): Schema
+ {
+ return ($inspector ?? Inspector::default())->schema($this, $header);
+ }
+
+ public function inferRecords(?Inspector $inspector = null, array $header = []): Iterator
+ {
+ return $this->inferSchema($inspector, $header)->parse($this);
+ }
+
/**
* @param positive-int $recordsCount
*
diff --git a/src/Schema/BooleanField.php b/src/Schema/BooleanField.php
new file mode 100644
index 00000000..d6e71557
--- /dev/null
+++ b/src/Schema/BooleanField.php
@@ -0,0 +1,62 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+
+use function filter_var;
+use function in_array;
+use function is_bool;
+use function is_string;
+use function trim;
+
+use const FILTER_NULL_ON_FAILURE;
+use const FILTER_VALIDATE_BOOLEAN;
+
+#[CoversClass(BooleanField::class)]
+final class BooleanField extends FieldEvaluator implements Field
+{
+ public function type(): FieldType
+ {
+ return FieldType::Boolean;
+ }
+
+ public function name(): string
+ {
+ return FieldType::Boolean->value;
+ }
+
+ public function parse(mixed $value): ?bool
+ {
+ if (is_bool($value)) {
+ return $value;
+ }
+
+ if (!is_string($value) && !in_array($value, [0, 1], true)) {
+ return null;
+ }
+
+ $value = trim((string) $value);
+ if ('' === $value) {
+ return null;
+ }
+
+ return filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata();
+ }
+}
diff --git a/src/Schema/BooleanFieldTest.php b/src/Schema/BooleanFieldTest.php
new file mode 100644
index 00000000..5f3265b5
--- /dev/null
+++ b/src/Schema/BooleanFieldTest.php
@@ -0,0 +1,64 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\TestCase;
+
+#[CoversClass(BooleanField::class)]
+final class BooleanFieldTest extends TestCase
+{
+ private BooleanField $field;
+
+ protected function setUp(): void
+ {
+ $this->field = new BooleanField();
+ }
+
+ public static function provideBooleanValues(): array
+ {
+ return [
+ [true, true],
+ [false, false],
+ ['true', true],
+ ['false', false],
+ ['1', true],
+ ['0', false],
+ [' true ', true],
+ ['', null],
+ [' ', null],
+ ['foo', null],
+ [[], null],
+ [123, null],
+ ];
+ }
+
+ #[DataProvider('provideBooleanValues')]
+ public function testParse(mixed $input, ?bool $expected): void
+ {
+ $result = $this->field->parse($input);
+
+ null === $expected
+ ? self::assertNull($result)
+ : self::assertSame($expected, $result);
+ }
+
+ public function test_metadata_contains_expected_structure(): void
+ {
+ $field = new BooleanField();
+
+ self::assertTrue($field->metadata()->isEmpty());
+ }
+}
diff --git a/src/Schema/CallbackFieldParser.php b/src/Schema/CallbackFieldParser.php
new file mode 100644
index 00000000..78b420c4
--- /dev/null
+++ b/src/Schema/CallbackFieldParser.php
@@ -0,0 +1,45 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use Closure;
+
+/**
+ * @template T
+ */
+final class CallbackFieldParser implements FieldParser
+{
+ /** @var Closure(mixed): ?T */
+ private Closure $callback;
+
+ /**
+ * @param (Closure(mixed): ?T)|(callable(mixed): ?T) $callback
+ */
+ public function __construct(Closure|callable $callback)
+ {
+ if (!$callback instanceof Closure) {
+ $callback = $callback(...);
+ }
+
+ $this->callback = $callback;
+ }
+
+ /**
+ * @returns ?T
+ */
+ public function parse(mixed $value): mixed
+ {
+ return ($this->callback)($value);
+ }
+}
diff --git a/src/Schema/CustomField.php b/src/Schema/CustomField.php
new file mode 100644
index 00000000..ebbbc023
--- /dev/null
+++ b/src/Schema/CustomField.php
@@ -0,0 +1,70 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use Closure;
+use ValueError;
+
+use function preg_match;
+
+/**
+ * @template T
+ */
+final class CustomField extends FieldEvaluator implements Field
+{
+ private readonly FieldParser $fieldParser;
+ /** @var non-empty-string */
+ private readonly string $fieldTypeName;
+
+ public function __construct(
+ FieldParser|Closure|callable $fieldParser,
+ string $fieldTypeName,
+ float $confidenceThreshold = 0.8
+ ) {
+ ('' !== $fieldTypeName && 1 === preg_match('/^[a-z]+(?:_[a-z0-9]+)*$/', $fieldTypeName)) || throw new ValueError('The name "'.$fieldTypeName.'" is not a valid snake case variable name.');
+ $fieldParser = self::resolveFieldParser($fieldParser);
+ parent::__construct($confidenceThreshold);
+
+ $this->fieldParser = $fieldParser;
+ $this->fieldTypeName = $fieldTypeName;
+ }
+
+ private static function resolveFieldParser(FieldParser|Closure|callable $parser): FieldParser
+ {
+ return $parser instanceof FieldParser ? $parser : new CallbackFieldParser($parser);
+ }
+
+ public function type(): FieldType
+ {
+ return FieldType::Custom;
+ }
+
+ public function name(): string
+ {
+ return FieldType::Custom->value.'('.$this->fieldTypeName.')';
+ }
+
+ /**
+ * @return ?T
+ */
+ public function parse(mixed $value): mixed
+ {
+ return $this->fieldParser->parse($value);
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata();
+ }
+}
diff --git a/src/Schema/CustomFieldTest.php b/src/Schema/CustomFieldTest.php
new file mode 100644
index 00000000..9bae5f94
--- /dev/null
+++ b/src/Schema/CustomFieldTest.php
@@ -0,0 +1,99 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\TestCase;
+
+#[CoversClass(CallbackFieldParser::class)]
+#[CoversClass(CustomField::class)]
+final class CustomFieldTest extends TestCase
+{
+ // --------------------------------------------------------
+ // parse()
+ // --------------------------------------------------------
+
+ public function testParseUsesClosure(): void
+ {
+ $field = new CustomField(
+ fn ($value) => 'ok' === $value ? 'parsed' : null,
+ 'custom',
+ );
+
+ self::assertSame('parsed', $field->parse('ok'));
+ self::assertNull($field->parse('nope'));
+ }
+
+ public function testParseUsesCallable(): void
+ {
+ $callable = function ($value) {
+ return is_int($value) ? $value * 2 : null;
+ };
+
+ $field = new CustomField($callable, 'custom');
+
+ self::assertSame(4, $field->parse(2));
+ self::assertNull($field->parse('2'));
+ }
+
+ // --------------------------------------------------------
+ // evaluate() (inherited behavior)
+ // --------------------------------------------------------
+
+ public function testEvaluateUsesParse(): void
+ {
+ $field = new CustomField(
+ fn ($value) => 'valid' === $value ? true : null,
+ 'custom'
+ );
+
+ self::assertSame(1, $field->evaluate('valid'));
+ self::assertSame(-1, $field->evaluate('invalid'));
+ self::assertSame(0, $field->evaluate(null));
+ self::assertSame(0, $field->evaluate(''));
+ }
+
+ // --------------------------------------------------------
+ // score()
+ // --------------------------------------------------------
+
+ // --------------------------------------------------------
+ // type()
+ // --------------------------------------------------------
+
+ public function testTypeIsCustom(): void
+ {
+ $field = new CustomField(fn () => null, 'custom');
+
+ self::assertSame(FieldType::Custom, $field->type());
+ }
+
+ // --------------------------------------------------------
+ // confidenceThreshold()
+ // --------------------------------------------------------
+
+ public function testConfidenceThresholdIsInherited(): void
+ {
+ $field = new CustomField(fn () => null, 'custom', 0.8);
+
+ self::assertSame(0.8, $field->confidenceThreshold());
+ }
+
+ public function test_metadata_contains_expected_structure(): void
+ {
+ $field = new BooleanField();
+
+ self::assertTrue($field->metadata()->isEmpty());
+ }
+}
diff --git a/src/Schema/DateTimeField.php b/src/Schema/DateTimeField.php
new file mode 100644
index 00000000..48f7fc6e
--- /dev/null
+++ b/src/Schema/DateTimeField.php
@@ -0,0 +1,221 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use DateTime;
+use DateTimeImmutable;
+use DateTimeInterface;
+use DateTimeZone;
+use Exception;
+use Throwable;
+use ValueError;
+
+use function is_string;
+use function is_subclass_of;
+use function trim;
+
+final class DateTimeField extends FieldEvaluator implements Field
+{
+ /** @var non-empty-string */
+ public readonly string $format;
+ public readonly DateTimeZone $timezone;
+ /** @var class-string */
+ public readonly string $outputClass;
+
+ /** @var list */
+ private const FORMAT_MACHINES = [
+ 'Y-m-d',
+ 'Y-m-d H:i:s',
+ 'Y-m-d\TH:i:s',
+ DateTimeInterface::RFC3339,
+ DateTimeInterface::RFC3339_EXTENDED,
+ DateTimeInterface::ISO8601_EXPANDED,
+ 'U',
+ ];
+
+ /** @var list */
+ private const FORMAT_LOCALIZED = [
+ // Europe Dates
+ 'd/m/Y',
+ 'd-m-Y',
+ 'd.m.Y',
+ // American Dates
+ 'm/d/Y',
+ 'm-d-Y',
+ 'm.d.Y',
+ ];
+
+ /**
+ * @param non-empty-string $format
+ * @param class-string $outputClass
+ */
+ public function __construct(
+ string $format,
+ DateTimeZone|string|null $timezone = null,
+ string $outputClass = DateTimeImmutable::class,
+ float $confidenceThreshold = 0.8,
+ ) {
+ $format = trim($format);
+ '' !== $format || throw new ValueError('The date field format can not be empty.');
+ $timezone = self::filterTimezone($timezone);
+ self::filterDateTimeInterfaceClass($outputClass);
+
+ parent::__construct($confidenceThreshold);
+ $this->format = $format;
+ $this->timezone = $timezone;
+ $this->outputClass = $outputClass;
+ }
+
+ /**
+ * @param class-string $outputClass
+ */
+ public static function common(
+ DateTimeZone|string|null $timezone = null,
+ string $outputClass = DateTimeImmutable::class,
+ ): FieldList {
+ return self::machine($timezone, $outputClass)->append(self::localized($timezone, $outputClass));
+ }
+
+ /**
+ * @param class-string $outputClass
+ */
+ public static function machine(
+ DateTimeZone|string|null $timezone = null,
+ string $outputClass = DateTimeImmutable::class,
+ ): FieldList {
+ return self::fromFormat(self::FORMAT_MACHINES, $timezone, $outputClass, .8);
+ }
+
+ /**
+ * @param class-string $outputClass
+ */
+ public static function localized(
+ DateTimeZone|string|null $timezone = null,
+ string $outputClass = DateTimeImmutable::class,
+ ): FieldList {
+ return self::fromFormat(self::FORMAT_LOCALIZED, $timezone, $outputClass, .7);
+ }
+
+ /**
+ * @param class-string $outputClass
+ */
+ public static function timestamp(
+ string $outputClass = DateTimeImmutable::class,
+ float $confidenceThreshold = .8
+ ): self {
+ return new self(
+ format: 'U',
+ timezone: 'UTC',
+ outputClass: $outputClass,
+ confidenceThreshold: $confidenceThreshold,
+ );
+ }
+
+ /**
+ * @param iterable $formats
+ * @param class-string $outputClass
+ */
+ public static function fromFormat(
+ iterable $formats,
+ DateTimeZone|string|null $timezone = null,
+ string $outputClass = DateTimeImmutable::class,
+ float $confidenceThreshold = 0.8,
+ ): FieldList {
+ $res = [];
+ foreach ($formats as $format) {
+ $res[] = new self($format, $timezone, $outputClass, $confidenceThreshold);
+ }
+
+ return new FieldList(...$res);
+ }
+
+ private static function filterDateTimeInterfaceClass(string $className): void
+ {
+ is_subclass_of($className, DateTimeInterface::class)
+ || throw new ValueError('The date field class '.$className.' does not implement the DateTimeInterface interface.');
+ }
+
+ private static function filterTimezone(DateTimeZone|string|null $timeZone): DateTimeZone
+ {
+ if (null === $timeZone) {
+ return new DateTimeZone('UTC');
+ }
+
+ if ($timeZone instanceof DateTimeZone) {
+ return $timeZone;
+ }
+
+ try {
+ return new DateTimeZone($timeZone);
+ } catch (Exception $exception) {
+ throw new ValueError('The date field timezone value `'.$timeZone.'` is invalid.', previous: $exception);
+ }
+ }
+
+ public function type(): FieldType
+ {
+ return FieldType::Datetime;
+ }
+
+ public function name(): string
+ {
+ $format = ('U' === $this->format) ? 'timestamp' : $this->format;
+
+ return FieldType::Datetime->value.'(format='.$format.',timezone='.$this->timezone->getName().')';
+ }
+
+ public function parse(mixed $value): ?DateTimeInterface
+ {
+ if ($value instanceof DateTimeInterface) {
+ return $value::class === $this->outputClass ? $value : $this->outputClass::createFromInterface($value);
+ }
+
+ if (!is_string($value)) {
+ return null;
+ }
+
+ $value = trim($value);
+ if ('' === $value) {
+ return null;
+ }
+
+ try {
+ $value = $this->outputClass::createFromFormat($this->format, $value, $this->timezone);
+ if (false === $value) {
+ return null;
+ }
+
+ $errors = $this->outputClass::getLastErrors();
+ if (
+ (isset($errors['warning_count']) && 0 < $errors['warning_count']) ||
+ (isset($errors['error_count']) && 0 < $errors['error_count'])
+ ) {
+ return null;
+ }
+
+ return $value;
+ } catch (Throwable) {
+ return null;
+ }
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata([
+ 'format' => $this->format,
+ 'timezone' => $this->timezone->getName(),
+ 'class' => $this->outputClass,
+ ]);
+ }
+}
diff --git a/src/Schema/DateTimeFieldTest.php b/src/Schema/DateTimeFieldTest.php
new file mode 100644
index 00000000..b583593b
--- /dev/null
+++ b/src/Schema/DateTimeFieldTest.php
@@ -0,0 +1,93 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use DateTime;
+use DateTimeImmutable;
+use DateTimeInterface;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\TestCase;
+
+#[CoversClass(DateTimeField::class)]
+final class DateTimeFieldTest extends TestCase
+{
+ private DateTimeField $field;
+
+ protected function setUp(): void
+ {
+ $this->field = new DateTimeField('Y-m-d');
+ }
+
+ public function testParseUsesNativeConstructorWhenFormatIsEmpty(): void
+ {
+ $result = $this->field->parse('2024-01-01');
+
+ self::assertInstanceOf(DateTimeImmutable::class, $result);
+ self::assertSame('2024-01-01', $result->format('Y-m-d'));
+ }
+
+ public function testParseUsesCreateFromFormatWhenFormatIsProvided(): void
+ {
+ $field = new DateTimeField('d-m-Y');
+ $result = $field->parse('01-01-2024');
+
+ self::assertInstanceOf(DateTimeImmutable::class, $result);
+ self::assertSame('2024-01-01', $result->format('Y-m-d'));
+ }
+
+ public function testItAcceptsDateTimeInterfaceAndNormalizesToImmutable(): void
+ {
+ $input = new DateTime('2024-01-01');
+
+ $result = $this->field->parse($input);
+
+ self::assertInstanceOf(DateTimeImmutable::class, $result);
+ self::assertSame('2024-01-01', $result->format('Y-m-d'));
+ }
+
+ public function testItReturnsNullForInvalidValues(): void
+ {
+ self::assertNull($this->field->parse(''));
+ self::assertNull($this->field->parse(' '));
+ self::assertNull($this->field->parse('invalid-date'));
+ self::assertNull($this->field->parse([]));
+ self::assertNull($this->field->parse(123));
+ }
+
+ public function test_it_can_return_another_implementing_datetime_interface(): void
+ {
+ $field = new DateTimeField('Y-m-d', outputClass: MyDate::class);
+ $result = $field->parse('2024-01-01');
+
+ self::assertInstanceOf(MyDate::class, $result);
+ self::assertSame('2024-01-01', $result->format('Y-m-d'));
+ self::assertSame(MyDate::class, $field->metadata()->get('class'));
+ self::assertSame('Y-m-d', $field->metadata()->get('format'));
+ self::assertSame('UTC', $field->metadata()->get('timezone'));
+ self::assertSame('datetime(format=Y-m-d,timezone=UTC)', $field->name());
+ }
+
+ public function test_it_uses_a_simpler_representation_for_timestamp(): void
+ {
+ self::assertSame('datetime(format=timestamp,timezone=UTC)', DateTimeField::timestamp()->name());
+ }
+}
+
+interface MyDateInterface extends DateTimeInterface
+{
+}
+
+class MyDate extends DateTimeImmutable implements MyDateInterface
+{
+}
diff --git a/src/Schema/EnumField.php b/src/Schema/EnumField.php
new file mode 100644
index 00000000..2aea302d
--- /dev/null
+++ b/src/Schema/EnumField.php
@@ -0,0 +1,124 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use BackedEnum;
+use ReflectionEnum;
+use ReflectionEnumUnitCase;
+use Throwable;
+use UnitEnum;
+use ValueError;
+
+use function array_map;
+use function filter_var;
+use function is_int;
+use function is_string;
+use function trim;
+
+use const FILTER_VALIDATE_INT;
+
+final class EnumField extends FieldEvaluator implements Field
+{
+ private readonly ?string $backedEnumType;
+ /** @var list */
+ private readonly array $cases;
+ /** @var class-string */
+ public readonly string $enumClass;
+ private readonly array $byNames;
+
+ /**
+ * @param class-string $enumClass
+ */
+ public function __construct(
+ string $enumClass,
+ float $confidenceThreshold = 0.8
+ ) {
+ try {
+ $ref = new ReflectionEnum($enumClass);
+ } catch (Throwable $exception) {
+ throw new ValueError('Enum "'.$enumClass.'" can not be use: '.$exception->getMessage(), previous: $exception);
+ }
+
+ parent::__construct($confidenceThreshold);
+
+ $this->enumClass = $enumClass;
+ $this->backedEnumType = !$ref->isBacked() ? null : $ref->getBackingType()->getName();
+ $this->cases = array_map(fn (ReflectionEnumUnitCase $case) => $case->getValue(), $ref->getCases());
+
+ $byNames = [];
+ foreach ($this->cases as $case) {
+ $byNames[$case->name] = $case;
+ }
+ $this->byNames = $byNames;
+ }
+
+ public function type(): FieldType
+ {
+ return FieldType::Enum;
+ }
+
+ public function name(): string
+ {
+ return FieldType::Enum->value.'('.$this->enumClass.')';
+ }
+
+ public function parse(mixed $value): ?UnitEnum
+ {
+ if ($value instanceof UnitEnum && $value::class === $this->enumClass) {
+ return $value;
+ }
+
+ if (!is_string($value) && !is_int($value)) {
+ return null;
+ }
+
+ if (is_string($value)) {
+ $value = trim($value);
+ if ('' === $value) {
+ return null;
+ }
+ }
+
+ if (null === $this->backedEnumType) {
+ return !is_string($value) ? null : ($this->byNames[$value] ?? null);
+ }
+
+ if ('int' === $this->backedEnumType && is_string($value)) {
+ $value = filter_var($value, FILTER_VALIDATE_INT);
+ if (false === $value) {
+ return null;
+ }
+ }
+
+ /** @var BackedEnum $enumClass */
+ $enumClass = $this->enumClass;
+
+ return $enumClass::tryFrom($value);
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata([
+ 'class' => $this->enumClass,
+ 'backedType' => $this->backedEnumType,
+ 'cases' => array_map(
+ fn (UnitEnum $case): array => [
+ 'name' => $case->name,
+ 'value' => $case instanceof BackedEnum ? $case->value : $case->name,
+ ],
+ $this->cases
+ ),
+ ]);
+ }
+}
diff --git a/src/Schema/EnumFieldTest.php b/src/Schema/EnumFieldTest.php
new file mode 100644
index 00000000..5189c183
--- /dev/null
+++ b/src/Schema/EnumFieldTest.php
@@ -0,0 +1,165 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\TestCase;
+use stdClass;
+use ValueError;
+
+#[CoversClass(EnumField::class)]
+final class EnumFieldTest extends TestCase
+{
+ private EnumField $field;
+
+ protected function setUp(): void
+ {
+ $this->field = new EnumField(TestEnum::class);
+ }
+
+ // --------------------------------------------------------
+ // Construction
+ // --------------------------------------------------------
+
+ public function testItThrowsWhenClassIsNotAnEnum(): void
+ {
+ $this->expectException(ValueError::class);
+
+ new EnumField(stdClass::class); /* @phpstan-ignore-line */
+ }
+
+ // --------------------------------------------------------
+ // UnitEnum (non-backed enum)
+ // --------------------------------------------------------
+
+ public function testItParsesEnumByInstance(): void
+ {
+ $value = TestEnum::A;
+
+ $result = $this->field->parse($value);
+
+ self::assertSame($value, $result);
+ self::assertSame(FieldType::Enum, $this->field->type());
+ self::assertSame(TestEnum::class, $this->field->enumClass);
+ self::assertSame('enum(League\Csv\Schema\TestEnum)', $this->field->name());
+ }
+
+ public function testItParsesEnumByName(): void
+ {
+ $result = $this->field->parse('A');
+
+ self::assertSame(TestEnum::A, $result);
+ }
+
+ public function testItTrimsStringInput(): void
+ {
+ $result = $this->field->parse(' A ');
+
+ self::assertSame(TestEnum::A, $result);
+ }
+
+ public function testItReturnsNullForInvalidEnumName(): void
+ {
+ self::assertNull($this->field->parse('INVALID'));
+ }
+
+ // --------------------------------------------------------
+ // BackedEnum (string/int)
+ // --------------------------------------------------------
+
+ public function testItParsesBackedEnumFromStringValue(): void
+ {
+ $field = new EnumField(TestBackedEnum::class);
+
+ $result = $field->parse('a');
+
+ self::assertSame(TestBackedEnum::A, $result);
+ }
+
+ public function testItParsesBackedEnumFromIntValue(): void
+ {
+ $field = new EnumField(TestIntBackedEnum::class);
+
+ $result = $field->parse(1);
+
+ self::assertSame(TestIntBackedEnum::A, $result);
+ }
+
+ public function testItParsesNumericStringForIntBackedEnum(): void
+ {
+ $field = new EnumField(TestIntBackedEnum::class);
+
+ $result = $field->parse('1');
+
+ self::assertSame(TestIntBackedEnum::A, $result);
+ }
+
+ public function testItReturnsNullForInvalidBackedValue(): void
+ {
+ $field = new EnumField(TestBackedEnum::class);
+
+ self::assertNull($field->parse('invalid'));
+ self::assertNull($field->parse([]));
+ self::assertNull($field->parse(''));
+ }
+
+ // --------------------------------------------------------
+ // Direct enum instance handling
+ // --------------------------------------------------------
+
+ public function testItRejectsEnumFromDifferentClass(): void
+ {
+ $result = $this->field->parse(OtherEnum::A);
+
+ self::assertNull($result);
+ }
+
+ public function test_metadata_contains_expected_structure(): void
+ {
+ $field = new EnumField(TestBackedEnum::class);
+
+ $metadata = $field->metadata();
+
+ self::assertSame(TestBackedEnum::class, $metadata->get('class'));
+ self::assertSame('string', $metadata->get('backedType'));
+ self::assertSame([
+ ['name' => 'A', 'value' => 'a'],
+ ['name' => 'B', 'value' => 'b'],
+ ], $metadata->get('cases'));
+ }
+}
+
+enum TestEnum
+{
+ case A;
+ case B;
+}
+
+enum TestBackedEnum: string
+{
+ case A = 'a';
+ case B = 'b';
+}
+
+enum TestIntBackedEnum: int
+{
+ case A = 1;
+ case B = 2;
+}
+
+enum OtherEnum
+{
+ case A;
+ case B;
+}
diff --git a/src/Schema/Field.php b/src/Schema/Field.php
new file mode 100644
index 00000000..f131a469
--- /dev/null
+++ b/src/Schema/Field.php
@@ -0,0 +1,44 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+interface Field extends FieldParser
+{
+ public function type(): FieldType;
+
+ /**
+ * @return non-empty-string
+ */
+ public function name(): string;
+
+ /**
+ * Returns the confidence on the field value.
+ *
+ * The range of valide value is from 0.0 up to including 1.0
+ */
+ public function confidenceThreshold(): float;
+
+ /**
+ * Score a single value to estimate its type.
+ *
+ * returns -1 if the value is invalid
+ * returns 0 if the value is skipped
+ * returns 1 if the value is valid
+ *
+ * @return int<-1, 1>
+ */
+ public function evaluate(mixed $value): int;
+
+ public function metadata(): FieldMetadata;
+}
diff --git a/src/Schema/FieldEvaluator.php b/src/Schema/FieldEvaluator.php
new file mode 100644
index 00000000..7a129a3a
--- /dev/null
+++ b/src/Schema/FieldEvaluator.php
@@ -0,0 +1,62 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use ValueError;
+
+use function is_string;
+use function trim;
+
+abstract class FieldEvaluator
+{
+ protected readonly float $confidenceThreshold;
+
+ public function __construct(float $confidenceThreshold = 0.8)
+ {
+ $this->confidenceThreshold = self::filterConfidenceThreshold($confidenceThreshold);
+ }
+
+ public function confidenceThreshold(): float
+ {
+ return $this->confidenceThreshold;
+ }
+
+ final protected static function filterConfidenceThreshold(float $confidenceThreshold): float
+ {
+ ($confidenceThreshold >= 0 && $confidenceThreshold <= 1) || throw new ValueError('the confidence threshold must be between 0 and 1.');
+
+ return $confidenceThreshold;
+ }
+
+ /**
+ * @return int<-1, 1>
+ */
+ public function evaluate(mixed $value): int
+ {
+ if (null === $value) {
+ return 0;
+ }
+
+ if (is_string($value)) {
+ $value = trim($value);
+ if ('' === $value) {
+ return 0;
+ }
+ }
+
+ return null !== $this->parse($value) ? 1 : -1;
+ }
+
+ abstract public function parse(mixed $value): mixed;
+}
diff --git a/src/Schema/FieldEvaluatorTest.php b/src/Schema/FieldEvaluatorTest.php
new file mode 100644
index 00000000..edd30e0a
--- /dev/null
+++ b/src/Schema/FieldEvaluatorTest.php
@@ -0,0 +1,98 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\TestCase;
+use ValueError;
+
+#[CoversClass(FieldEvaluator::class)]
+final class FieldEvaluatorTest extends TestCase
+{
+ // --------------------------------------------------------
+ // confidence threshold
+ // --------------------------------------------------------
+
+ public function testItAcceptsValidConfidenceThreshold(): void
+ {
+ $field = new DummyField(0.5);
+
+ self::assertSame(0.5, $field->confidenceThreshold());
+ }
+
+ public function testItThrowsForInvalidConfidenceThreshold(): void
+ {
+ $this->expectException(ValueError::class);
+
+ new DummyField(1.5);
+ }
+
+ // --------------------------------------------------------
+ // evaluate()
+ // --------------------------------------------------------
+
+ public function testEvaluateReturnsZeroForNull(): void
+ {
+ $field = new DummyField();
+
+ self::assertSame(0, $field->evaluate(null));
+ }
+
+ public function testEvaluateReturnsZeroForEmptyString(): void
+ {
+ $field = new DummyField();
+
+ self::assertSame(0, $field->evaluate(''));
+ self::assertSame(0, $field->evaluate(' '));
+ }
+
+ public function testEvaluateReturnsOneForValidValue(): void
+ {
+ $field = new DummyField();
+
+ self::assertSame(1, $field->evaluate('valid-value'));
+ }
+
+ public function testEvaluateReturnsMinusOneForInvalidValue(): void
+ {
+ $field = new DummyField();
+
+ self::assertSame(-1, $field->evaluate('invAlid'));
+ }
+}
+
+final class DummyField extends FieldEvaluator
+{
+ public function type(): FieldType
+ {
+ return FieldType::String;
+ }
+
+ public function name(): string
+ {
+ return 'dummy';
+ }
+
+ public function parse(mixed $value): ?string
+ {
+ return is_string($value) && str_contains($value, 'valid')
+ ? $value
+ : null;
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata();
+ }
+}
diff --git a/src/Schema/FieldList.php b/src/Schema/FieldList.php
new file mode 100644
index 00000000..b9d996b3
--- /dev/null
+++ b/src/Schema/FieldList.php
@@ -0,0 +1,197 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use Countable;
+use Iterator;
+use IteratorAggregate;
+use ValueError;
+
+use function array_filter;
+use function array_flip;
+use function array_key_exists;
+use function array_values;
+use function count;
+
+/**
+ * @implements IteratorAggregate
+ */
+final class FieldList implements Countable, IteratorAggregate
+{
+ /** @var list */
+ private array $fields;
+
+ public function __construct(Field ...$fields)
+ {
+ $this->fields = array_values($fields);
+ }
+
+ public static function default(): self
+ {
+ return new self(
+ new BooleanField(),
+ new NumericField(),
+ new JsonField(),
+ );
+ }
+
+ public function isEmpty(): bool
+ {
+ return [] === $this->fields;
+ }
+
+ public function count(): int
+ {
+ return count($this->fields);
+ }
+
+ /**
+ * @return Iterator
+ */
+ public function getIterator(): Iterator
+ {
+ yield from $this->fields;
+ }
+
+ /**
+ * @return list
+ */
+ public function all(): array
+ {
+ return $this->fields;
+ }
+
+ public function first(): ?Field
+ {
+ return $this->nth(0);
+ }
+
+ public function last(): ?Field
+ {
+ return $this->nth(-1);
+ }
+
+ public function nth(int $offset): ?Field
+ {
+ return $this->fields[$this->offset($offset)] ?? null;
+ }
+
+ public function get(int $offset): Field
+ {
+ return $this->nth($offset) ?? throw new ValueError('Invalid field offset: '.$offset);
+ }
+
+ private function offset(int $offset): ?int
+ {
+ if ($offset < 0) {
+ $offset += count($this->fields);
+ }
+
+ return array_key_exists($offset, $this->fields) ? $offset : null;
+ }
+
+ public function append(Field|self ...$items): self
+ {
+ $fields = self::flatten(...$items);
+
+ return [] === $fields ? $this : new self(...$this->fields, ...$fields);
+ }
+
+ public function prepend(Field|self ...$items): self
+ {
+ $fields = self::flatten(...$items);
+
+ return [] === $fields ? $this : new self(...$fields, ...$this->fields);
+ }
+
+ /**
+ * @return list
+ */
+ private static function flatten(Field|self ...$items): array
+ {
+ $fields = [];
+ foreach ($items as $item) {
+ if ($item instanceof Field) {
+ $fields[] = $item;
+ continue;
+ }
+
+ foreach ($item->fields as $field) {
+ $fields[] = $field;
+ }
+ }
+
+ return $fields;
+ }
+
+ public function replace(int $offset, Field $field): self
+ {
+ $found = $this->offset($offset);
+ null !== $found || throw new ValueError('the offset: '.$offset.' does not exist.');
+
+ $fields = $this->fields;
+ $fields[$found] = $field;
+
+ return new self(...$fields);
+ }
+
+ public function removeByOffset(int ...$offsets): self
+ {
+ $validOffsets = [];
+ foreach ($offsets as $offset) {
+ $index = $this->offset($offset);
+ if (null !== $index) {
+ $validOffsets[] = $index;
+ }
+ }
+
+ if ([] === $validOffsets) {
+ return $this;
+ }
+
+ $validOffsets = array_flip($validOffsets);
+ $fields = [];
+ foreach ($this->fields as $offset => $field) {
+ if (!isset($validOffsets[$offset])) {
+ $fields[] = $field;
+ }
+ }
+
+ return [] === $fields ? $this : new self(...$fields);
+ }
+
+ public function removeByType(FieldType $fieldType): self
+ {
+ $fields = array_filter(
+ $this->fields,
+ fn (Field $field): bool => $field->type() !== $fieldType
+ );
+
+ return $this->fields === $fields ? $this : new self(...$fields);
+ }
+
+ public function removeByName(Field|string $name): self
+ {
+ if ($name instanceof Field) {
+ $name = $name->name();
+ }
+
+ $fields = array_filter(
+ $this->fields,
+ fn (Field $field): bool => $field->name() !== $name
+ );
+
+ return $this->fields === $fields ? $this : new self(...$fields);
+ }
+}
diff --git a/src/Schema/FieldListTest.php b/src/Schema/FieldListTest.php
new file mode 100644
index 00000000..15315548
--- /dev/null
+++ b/src/Schema/FieldListTest.php
@@ -0,0 +1,249 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\TestCase;
+use ValueError;
+
+use function iterator_to_array;
+
+final class FieldListTest extends TestCase
+{
+ private Field $s1;
+ private Field $s2;
+ private Field $s3;
+
+ protected function setUp(): void
+ {
+ $this->s1 = $this->createField(FieldType::String);
+ $this->s2 = $this->createField(FieldType::Numeric);
+ $this->s3 = $this->createField(FieldType::Boolean);
+ }
+
+ private function createField(FieldType $type): Field
+ {
+ return new class ($type) implements Field {
+ public function __construct(private FieldType $type)
+ {
+ }
+
+ public function type(): FieldType
+ {
+ return $this->type;
+ }
+
+ public function name(): string
+ {
+ return $this->type->name;
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata();
+ }
+
+ public function confidenceThreshold(): float
+ {
+ return 0.5;
+ }
+
+ public function parse(mixed $value): mixed
+ {
+ return $value;
+ }
+
+ public function evaluate(mixed $value): int
+ {
+ return 1;
+ }
+ };
+ }
+
+ public function testConstructAndAll(): void
+ {
+ $list = new FieldList($this->s1, $this->s2);
+
+ self::assertSame([$this->s1, $this->s2], $list->all());
+ }
+
+ public function testIsEmpty(): void
+ {
+ self::assertTrue((new FieldList())->isEmpty());
+ self::assertFalse((new FieldList($this->s1))->isEmpty());
+ }
+
+ public function testCount(): void
+ {
+ $list = new FieldList($this->s1, $this->s2);
+
+ self::assertCount(2, $list);
+ }
+
+ public function testIterator(): void
+ {
+ $list = new FieldList($this->s1, $this->s2);
+
+ self::assertSame([$this->s1, $this->s2], iterator_to_array($list));
+ }
+
+ public function testFirstAndLast(): void
+ {
+ $list = new FieldList($this->s1, $this->s2, $this->s3);
+
+ self::assertSame($this->s1, $list->first());
+ self::assertSame($this->s3, $list->last());
+ }
+
+ public function testNthWithPositiveOffset(): void
+ {
+ $list = new FieldList($this->s1, $this->s2);
+
+ self::assertSame($this->s2, $list->nth(1));
+ }
+
+ public function testNthWithNegativeOffset(): void
+ {
+ $list = new FieldList($this->s1, $this->s2, $this->s3);
+
+ self::assertSame($this->s3, $list->nth(-1));
+ self::assertSame($this->s2, $list->nth(-2));
+ }
+
+ public function testNthOutOfBounds(): void
+ {
+ $list = new FieldList($this->s1);
+
+ self::assertNull($list->nth(10));
+ self::assertNull($list->nth(-10));
+ }
+
+ public function testGet(): void
+ {
+ $list = new FieldList($this->s1);
+
+ self::assertSame($this->s1, $list->get(0));
+ }
+
+ public function testGetThrows(): void
+ {
+ $list = new FieldList();
+
+ $this->expectException(ValueError::class);
+ $this->expectExceptionMessage('Invalid field offset: 0');
+
+ $list->get(0);
+ }
+
+ public function testAppend(): void
+ {
+ $list = new FieldList($this->s1);
+
+ $new = $list->append(new FieldList($this->s2));
+
+ self::assertSame([$this->s1, $this->s2], $new->all());
+ self::assertSame([$this->s1], $list->all()); // immutability
+ }
+
+ public function testPrepend(): void
+ {
+ $list = new FieldList($this->s1);
+
+ $new = $list->prepend($this->s2);
+
+ self::assertSame([$this->s2, $this->s1], $new->all());
+ self::assertSame([$this->s1], $list->all()); // immutability
+ }
+
+ public function testReplace(): void
+ {
+ $list = new FieldList($this->s1, $this->s2);
+
+ $new = $list->replace(0, $this->s3);
+
+ self::assertSame([$this->s3, $this->s2], $new->all());
+ self::assertSame([$this->s1, $this->s2], $list->all()); // immutability
+ }
+
+ public function testReplaceThrows(): void
+ {
+ $list = new FieldList();
+
+ $this->expectException(ValueError::class);
+
+ $list->replace(0, $this->s1);
+ }
+
+ public function testRemoveByOffset(): void
+ {
+ $list = new FieldList($this->s1, $this->s2, $this->s3);
+
+ $new = $list->removeByOffset(1);
+
+ self::assertSame([$this->s1, $this->s3], $new->all());
+ }
+
+ public function testRemoveByOffsetMultiple(): void
+ {
+ $list = new FieldList($this->s1, $this->s2, $this->s3);
+
+ $new = $list->removeByOffset(0, 2);
+
+ self::assertSame([$this->s2], $new->all());
+ }
+
+ public function testRemoveByOffsetInvalidReturnsSameInstance(): void
+ {
+ $list = new FieldList($this->s1);
+
+ $new = $list->removeByOffset(10);
+
+ self::assertSame($list, $new);
+ }
+
+ public function testRemoveByType(): void
+ {
+ $list = new FieldList($this->s1, $this->s2);
+
+ $new = $list->removeByType(FieldType::String);
+
+ self::assertSame([$this->s2], $new->all());
+ }
+
+ public function testRemoveByTypeNoMatchReturnsSameInstance(): void
+ {
+ $list = new FieldList($this->s1);
+
+ $new = $list->removeByType(FieldType::Numeric);
+
+ self::assertSame($list, $new);
+ }
+
+ public function testRemoveByName(): void
+ {
+ $list = new FieldList($this->s1, $this->s2);
+
+ $new = $list->removeByName($this->s1);
+
+ self::assertSame([$this->s2], $new->all());
+ }
+
+ public function testRemoveByNameNoMatchReturnsSameInstance(): void
+ {
+ $list = new FieldList($this->s1);
+
+ $new = $list->removeByName('enum');
+
+ self::assertSame($list, $new);
+ }
+}
diff --git a/src/Schema/FieldMetadata.php b/src/Schema/FieldMetadata.php
new file mode 100644
index 00000000..db45aad3
--- /dev/null
+++ b/src/Schema/FieldMetadata.php
@@ -0,0 +1,105 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use Countable;
+use Iterator;
+use IteratorAggregate;
+use ValueError;
+
+use function array_key_exists;
+use function array_keys;
+use function count;
+
+final class FieldMetadata implements Countable, IteratorAggregate
+{
+ private readonly array $data;
+
+ public function __construct(iterable $data = [])
+ {
+ $newData = [];
+ foreach ($data as $key => $value) {
+ self::assertNoDuplicate($newData, $key);
+ $newData[$key] = $value;
+ }
+
+ $this->data = $newData;
+ }
+
+ private static function assertNoDuplicate(array $data, string|int $key): void
+ {
+ ! array_key_exists($key, $data) || throw new ValueError('The key already exists: '.$key);
+ }
+
+ public function count(): int
+ {
+ return count($this->data);
+ }
+
+ /**
+ * @return Iterator
+ */
+ public function getIterator(): Iterator
+ {
+ yield from $this->data;
+ }
+
+ public function all(): array
+ {
+ return array_map(
+ fn (mixed $value) => $value instanceof self ? $value->all() : $value,
+ $this->data
+ );
+ }
+
+ public function isEmpty(): bool
+ {
+ return [] === $this->data;
+ }
+
+ /**
+ * @return list
+ */
+ public function keys(): array
+ {
+ return array_keys($this->data);
+ }
+
+ public function has(int|string $offset): bool
+ {
+ return array_key_exists($offset, $this->data);
+ }
+
+ public function get(int|string $offset): mixed
+ {
+ return $this->has($offset) ? $this->data[$offset] : throw new ValueError('The key does not exist: '.$offset);
+ }
+
+ public function union(FieldMetadata ...$metadatas): self
+ {
+ if ([] === $metadatas) {
+ return $this;
+ }
+
+ $newData = $this->data;
+ foreach ($metadatas as $metadata) {
+ foreach ($metadata->data as $key => $value) {
+ self::assertNoDuplicate($newData, $key);
+ $newData[$key] = $value;
+ }
+ }
+
+ return new self($newData);
+ }
+}
diff --git a/src/Schema/FieldMetadataTest.php b/src/Schema/FieldMetadataTest.php
new file mode 100644
index 00000000..cefa4d89
--- /dev/null
+++ b/src/Schema/FieldMetadataTest.php
@@ -0,0 +1,145 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use Iterator;
+use IteratorAggregate;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\TestCase;
+use ValueError;
+
+use function iterator_to_array;
+
+#[CoversClass(FieldMetadata::class)]
+final class FieldMetadataTest extends TestCase
+{
+ public function testConstructAndAll(): void
+ {
+ $metadata = new FieldMetadata(['a' => 1, 'b' => 2]);
+
+ self::assertSame(['a' => 1, 'b' => 2], $metadata->all());
+ }
+
+ public function testCount(): void
+ {
+ $metadata = new FieldMetadata(['a' => 1, 'b' => 2]);
+
+ self::assertCount(2, $metadata);
+ }
+
+ public function testIsEmpty(): void
+ {
+ self::assertTrue((new FieldMetadata([]))->isEmpty());
+ self::assertFalse((new FieldMetadata(['a' => 1]))->isEmpty());
+ }
+
+ public function testKeys(): void
+ {
+ $metadata = new FieldMetadata(['a' => 1, 'b' => 2]);
+
+ self::assertSame(['a', 'b'], $metadata->keys());
+ }
+
+ public function testHas(): void
+ {
+ $metadata = new FieldMetadata(['a' => 1]);
+
+ self::assertTrue($metadata->has('a'));
+ self::assertFalse($metadata->has('b'));
+ }
+
+ public function testGet(): void
+ {
+ $metadata = new FieldMetadata(['a' => 42]);
+
+ self::assertSame(42, $metadata->get('a'));
+ }
+
+ public function testGetThrowsOnMissingKey(): void
+ {
+ $metadata = new FieldMetadata();
+
+ $this->expectException(ValueError::class);
+ $this->expectExceptionMessage('The key does not exist: a');
+
+ $metadata->get('a');
+ }
+
+ public function testIterator(): void
+ {
+ $data = ['a' => 1, 'b' => 2];
+ $metadata = new FieldMetadata($data);
+
+ self::assertSame($data, iterator_to_array($metadata));
+ }
+
+ public function testConstructWithDuplicateKeysThrows(): void
+ {
+ $test = new class () implements IteratorAggregate {
+ public function getIterator(): Iterator
+ {
+ yield 'a' => 1;
+ yield 'a' => 2;
+ }
+ };
+
+ $this->expectException(ValueError::class);
+
+ new FieldMetadata($test);
+ }
+
+ public function testMergeSingle(): void
+ {
+ $m1 = new FieldMetadata(['a' => 1]);
+ $m2 = new FieldMetadata(['b' => 2]);
+
+ $merged = $m1->union($m2);
+
+ self::assertSame(['a' => 1, 'b' => 2], $merged->all());
+ }
+
+ public function testMergeMultiple(): void
+ {
+ $m1 = new FieldMetadata(['a' => 1]);
+ $m2 = new FieldMetadata(['b' => 2]);
+ $m3 = new FieldMetadata(['c' => 3]);
+
+ $merged = $m1->union($m2, $m3);
+
+ self::assertSame([
+ 'a' => 1,
+ 'b' => 2,
+ 'c' => 3,
+ ], $merged->all());
+ }
+
+ public function testMergeDuplicateKeysThrows(): void
+ {
+ $m1 = new FieldMetadata(['a' => 1]);
+ $m2 = new FieldMetadata(['a' => 2]);
+
+ $this->expectException(ValueError::class);
+
+ $m1->union($m2);
+ }
+
+ public function testMergeWithNoArgumentsReturnsSameInstance(): void
+ {
+ $m1 = new FieldMetadata(['a' => 1]);
+
+ $merged = $m1->union();
+
+ self::assertSame($m1, $merged);
+ }
+}
diff --git a/src/Schema/FieldParser.php b/src/Schema/FieldParser.php
new file mode 100644
index 00000000..25bdeca6
--- /dev/null
+++ b/src/Schema/FieldParser.php
@@ -0,0 +1,29 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+/**
+ * @template T
+ */
+interface FieldParser
+{
+ /**
+ * Try to parse and normalize the value according
+ * to the detector handled type. If the value can
+ * not be parse null is returned.
+ *
+ * @return ?T
+ */
+ public function parse(mixed $value): mixed;
+}
diff --git a/src/Schema/FieldType.php b/src/Schema/FieldType.php
new file mode 100644
index 00000000..ad602a49
--- /dev/null
+++ b/src/Schema/FieldType.php
@@ -0,0 +1,27 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+enum FieldType: string
+{
+ case Boolean = 'boolean';
+ case Custom = 'custom';
+ case Datetime = 'datetime';
+ case Enum = 'enum';
+ case Json = 'json';
+ case Numeric = 'numeric';
+ case String = 'string';
+ case Set = 'set';
+ case Time = 'time';
+}
diff --git a/src/Schema/Inspector.php b/src/Schema/Inspector.php
new file mode 100644
index 00000000..509a9ada
--- /dev/null
+++ b/src/Schema/Inspector.php
@@ -0,0 +1,118 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use League\Csv\InvalidArgument;
+use League\Csv\Statement;
+use League\Csv\SyntaxError;
+use League\Csv\TabularData;
+use ValueError;
+
+use function arsort;
+use function is_string;
+use function trim;
+
+use const SORT_NUMERIC;
+
+final readonly class Inspector
+{
+ /**
+ * @param positive-int $sampleLimit
+ */
+ public function __construct(
+ public int $sampleLimit = 10,
+ public FieldList $fieldList = new FieldList(),
+ ) {
+ 1 <= $this->sampleLimit || throw new ValueError('A sample size must be greater or equal to 1.');
+ }
+
+ /**
+ * @param positive-int $sampleLimit
+ */
+ public function withSampleLimit(int $sampleLimit): self
+ {
+ return $sampleLimit === $this->sampleLimit ? $this : new self($sampleLimit, $this->fieldList);
+ }
+
+ public function withFields(FieldList $fieldList): self
+ {
+ return new self($this->sampleLimit, $fieldList);
+ }
+
+ /**
+ * @param positive-int $sampleLimit
+ */
+ public static function default(int $sampleLimit = 10): self
+ {
+ return new self($sampleLimit, FieldList::default());
+ }
+
+ /**
+ * @throws InvalidArgument
+ * @throws SyntaxError
+ * @throws \League\Csv\Exception
+ */
+ public function schema(TabularData $tabularData, array $header = []): Schema
+ {
+ $score = [];
+ $counted = [];
+ foreach ((new Statement())->limit($this->sampleLimit)->process($tabularData, $header) as $record) {
+ foreach ($record as $column => $value) {
+ $counted[$column] ??= 0;
+ $score[$column] ??= [];
+ if (is_string($value)) {
+ $value = trim($value);
+ }
+
+ if (null === $value || '' === $value) {
+ continue;
+ }
+
+ $counted[$column]++;
+ foreach ($this->fieldList as $offset => $field) {
+ $score[$column][$offset] ??= 0;
+ if (1 === $field->evaluate($value)) {
+ $score[$column][$offset]++;
+ }
+ }
+ }
+ }
+
+ $result = [];
+ foreach ($score as $column => $fields) {
+ $result[$column] = new StringField();
+ $total = $counted[$column] ?? 0;
+ if (0 === $total) {
+ continue;
+ }
+
+ $normalized = [];
+ foreach ($fields as $offset => $validCount) {
+ $normalized[$offset] = $validCount / $total;
+ }
+
+ arsort($normalized, SORT_NUMERIC);
+
+ foreach ($normalized as $offset => $scoreValue) {
+ $field = $this->fieldList->get($offset);
+ if ($scoreValue >= $field->confidenceThreshold()) {
+ $result[$column] = $field;
+ break;
+ }
+ }
+ }
+
+ return new Schema($result);
+ }
+}
diff --git a/src/Schema/InspectorTest.php b/src/Schema/InspectorTest.php
new file mode 100644
index 00000000..9af4b263
--- /dev/null
+++ b/src/Schema/InspectorTest.php
@@ -0,0 +1,304 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use League\Csv\Reader;
+use PHPUnit\Framework\TestCase;
+use ValueError;
+
+use function array_map;
+use function implode;
+use function rand;
+use function random_int;
+use function str_repeat;
+use function str_shuffle;
+use function substr;
+
+final class InspectorTest extends TestCase
+{
+ private function csv(string $content): Reader
+ {
+ $reader = Reader::fromString($content);
+ $reader->setHeaderOffset(0);
+ $reader->setDelimiter(';');
+
+ return $reader;
+ }
+
+ public function testConstructorRejectsInvalidSampleLimit(): void
+ {
+ $this->expectException(ValueError::class);
+
+ new Inspector(0); /* @phpstan-ignore-line */
+ }
+
+ public function testWithSampleLimitReturnsSameInstanceIfUnchanged(): void
+ {
+ $inspector = new Inspector(10);
+
+ self::assertSame($inspector, $inspector->withSampleLimit(10));
+ }
+
+ public function testWithSampleLimitReturnsNewInstanceIfChanged(): void
+ {
+ $inspector = new Inspector(10);
+ $new = $inspector->withSampleLimit(5);
+
+ self::assertNotSame($inspector, $new);
+ self::assertSame(5, $new->sampleLimit);
+ }
+
+ public function testWithFieldsReturnsNewInstance(): void
+ {
+ $inspector = new Inspector(10);
+ $fieldList = new FieldList();
+
+ $new = $inspector->withFields($fieldList);
+
+ self::assertNotSame($inspector, $new);
+ self::assertSame($fieldList, $new->fieldList);
+ }
+
+ public function testDefaultFactory(): void
+ {
+ $inspector = Inspector::default(20);
+
+ self::assertSame(20, $inspector->sampleLimit);
+ self::assertCount(3, $inspector->fieldList);
+ }
+
+ public function testSchemaFallsBackToStringFieldWhenNoMatch(): void
+ {
+ $csv = $this->csv(<<inferSchema(Inspector::default());
+
+ self::assertSame(['name' => 'string', 'value' => 'string'], $schema->types());
+ }
+
+ public function testSchemaDetectsNumericField(): void
+ {
+ $csv = $this->csv(<<inferSchema(new Inspector(10, new FieldList(new NumericField())));
+
+ self::assertSame(['age' => 'numeric'], $schema->types());
+ }
+
+ public function testSchemaIgnoresEmptyValues(): void
+ {
+ $csv = $this->csv(<<inferSchema(new Inspector(10, new FieldList(new NumericField())));
+
+ self::assertSame('numeric', $schema->get('age')->name());
+ }
+
+ public function testSchemaRespectsSampleLimit(): void
+ {
+ $csv = $this->csv(<<inferSchema(new Inspector(2, new FieldList(new NumericField())));
+
+ self::assertSame('numeric', $schema->get('value')->name());
+ }
+
+ public function testSchemaChoosesBestScoringField(): void
+ {
+ $csv = $this->csv(<<inferSchema(new Inspector(10, $fieldList));
+
+ self::assertSame('string', $schema->get('value')->name());
+ }
+
+ /*******************
+ * FUZZY Tests
+ *******************/
+
+ private function csvFromRows(array $rows): Reader
+ {
+ $content = implode(
+ "\n",
+ array_map(
+ fn (array $row): string => implode(';', $row),
+ $rows
+ )
+ );
+
+ return $this->csv($content);
+ }
+
+ private function randomString(): string
+ {
+ return substr(str_shuffle(str_repeat('abcdefghijklmnopqrstuvwxyz', 5)), 0, random_int(1, 10));
+ }
+
+ private function randomValue(): mixed
+ {
+ return match (rand(0, 5)) {
+ 0 => random_int(0, 1000), // int
+ 1 => random_int(0, 1000) / 10, // float
+ 2 => (string) random_int(0, 1000), // numeric string
+ 3 => $this->randomString(), // random string
+ 4 => '', // empty string
+ default => null,
+ };
+ }
+
+ public function testFuzzSchemaDoesNotCrash(): void
+ {
+ $inspector = Inspector::default();
+ $columns = ['col1', 'col2', 'col3'];
+ $rows = [$columns];
+ for ($i = 0; $i < 50; $i++) {
+ for ($r = 0; $r < rand(1, 20); $r++) {
+ $rows[] = [
+ $this->randomValue(),
+ $this->randomValue(),
+ $this->randomValue(),
+ ];
+ }
+
+ $csv = $this->csvFromRows($rows);
+
+ self::assertSame($columns, $inspector->schema($csv)->names());
+ }
+
+ }
+
+ public function testFuzzTypesAreAlwaysNonEmptyStrings(): void
+ {
+ $inspector = Inspector::default();
+
+ for ($i = 0; $i < 50; $i++) {
+ $columns = ['a', 'b'];
+
+ $rows = [$columns];
+
+ for ($r = 0; $r < rand(1, 20); $r++) {
+ $rows[] = [
+ $this->randomValue(),
+ $this->randomValue(),
+ ];
+ }
+
+ $schema = $inspector->schema($this->csvFromRows($rows));
+
+ foreach ($schema->types() as $type) {
+ self::assertIsString($type);
+ self::assertNotSame('', $type);
+ }
+ }
+ }
+
+ public function testFuzzEmptyColumnsFallbackToString(): void
+ {
+ $inspector = Inspector::default();
+
+ $rows = [
+ ['col'],
+ ['', null, '', null],
+ ];
+
+ $schema = $inspector->schema($this->csvFromRows($rows));
+
+ self::assertSame('string', $schema->get('col')->name());
+ }
+
+ public function testFuzzNumericColumnsDetected(): void
+ {
+ $inspector = new Inspector(50, new FieldList(new NumericField(), new StringField()));
+
+ for ($i = 0; $i < 30; $i++) {
+ $rows = [
+ ['num'],
+ ];
+
+ for ($r = 0; $r < rand(5, 20); $r++) {
+ $rows[] = [rand(0, 1000)];
+ }
+
+ $schema = $inspector->schema($this->csvFromRows($rows));
+
+ self::assertSame('numeric', $schema->get('num')->name());
+ }
+ }
+
+ public function testFuzzMixedDataPrefersString(): void
+ {
+ $inspector = new Inspector(50, new FieldList(new NumericField(), new StringField()));
+
+ for ($i = 0; $i < 30; $i++) {
+ $rows = [
+ ['mixed'],
+ ];
+
+ for ($r = 0; $r < 20; $r++) {
+ $rows[] = [
+ 1 === rand(0, 1)
+ ? rand(0, 100)
+ : $this->randomString(),
+ ];
+ }
+
+ $schema = $inspector->schema($this->csvFromRows($rows));
+
+ self::assertSame('string', $schema->get('mixed')->name());
+ }
+ }
+
+ public function testFuzzSampleLimitDoesNotBreakInference(): void
+ {
+ $fieldList = new FieldList(new NumericField(), new StringField());
+ for ($limit = 1; $limit <= 10; $limit++) {
+ $inspector = new Inspector($limit, $fieldList);
+
+ $rows = [
+ ['value'],
+ ];
+
+ for ($i = 0; $i < 50; $i++) {
+ $rows[] = [random_int(0, 100)];
+ }
+
+ $schema = $inspector->schema($this->csvFromRows($rows));
+
+ self::assertSame('numeric', $schema->get('value')->name());
+ }
+ }
+}
diff --git a/src/Schema/JsonField.php b/src/Schema/JsonField.php
new file mode 100644
index 00000000..dc7d3aef
--- /dev/null
+++ b/src/Schema/JsonField.php
@@ -0,0 +1,83 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use ValueError;
+
+use function is_string;
+use function json_decode;
+use function json_encode;
+use function json_last_error;
+use function json_last_error_msg;
+use function trim;
+
+use const JSON_ERROR_NONE;
+use const JSON_THROW_ON_ERROR;
+
+final class JsonField extends FieldEvaluator implements Field
+{
+ public readonly int $flags;
+ /** @var int<1, max> */
+ public readonly int $depth;
+
+ /**
+ * @param int<1, max> $depth
+ */
+ public function __construct(
+ int $flags = 0,
+ int $depth = 512,
+ float $confidenceThreshold = 0.8
+ ) {
+ json_encode([], flags: $flags & ~JSON_THROW_ON_ERROR, depth: $depth);
+ JSON_ERROR_NONE === ($errorCode = json_last_error()) || throw new ValueError('The flags or the depth given are not valid JSON encoding parameters in PHP; '.json_last_error_msg(), $errorCode);
+
+ parent::__construct($confidenceThreshold);
+ $this->flags = $flags;
+ $this->depth = $depth;
+ }
+
+ public function type(): FieldType
+ {
+ return FieldType::Json;
+ }
+
+ public function name(): string
+ {
+ return FieldType::Json->value;
+ }
+
+ public function parse(mixed $value): mixed
+ {
+ if (!is_string($value)) {
+ return null;
+ }
+
+ $value = trim($value);
+ if ('' === $value) {
+ return null;
+ }
+
+ $res = json_decode(json: $value, associative: true, depth: $this->depth, flags: $this->flags & ~JSON_THROW_ON_ERROR);
+
+ return JSON_ERROR_NONE === json_last_error() ? $res : null;
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata([
+ 'flags' => $this->flags,
+ 'depth' => $this->depth,
+ ]);
+ }
+}
diff --git a/src/Schema/JsonFieldTest.php b/src/Schema/JsonFieldTest.php
new file mode 100644
index 00000000..fe0a592a
--- /dev/null
+++ b/src/Schema/JsonFieldTest.php
@@ -0,0 +1,140 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\TestCase;
+use stdClass;
+use ValueError;
+
+#[CoversClass(JsonField::class)]
+final class JsonFieldTest extends TestCase
+{
+ private JsonField $field;
+
+ protected function setUp(): void
+ {
+ $this->field = new JsonField();
+ }
+
+ public function testTypeAndName(): void
+ {
+ self::assertSame(FieldType::Json, $this->field->type());
+ self::assertSame(FieldType::Json->value, $this->field->name());
+ }
+
+ public function testDetailsExposeFlagsAndDepth(): void
+ {
+ $field = new JsonField(flags: JSON_BIGINT_AS_STRING, depth: 256);
+ $details = $field->metadata();
+
+ self::assertSame(JSON_BIGINT_AS_STRING, $details->get('flags'));
+ self::assertSame(256, $details->get('depth'));
+ }
+
+ public static function provideValidJson(): array
+ {
+ return [
+ ['{"a":1}', ['a' => 1]],
+ ['{"a":1,"b":2}', ['a' => 1, 'b' => 2]],
+ ['[1,2,3]', [1, 2, 3]],
+ [' {"foo":"bar"} ', ['foo' => 'bar']],
+ ['{"nested":{"x":1}}', ['nested' => ['x' => 1]]],
+ ['true', true],
+ ['false', false],
+ ['null', null],
+ ['123', 123],
+ ];
+ }
+
+ #[DataProvider('provideValidJson')]
+ public function testParseValidJson(string $input, mixed $expected): void
+ {
+ $result = $this->field->parse($input);
+
+ self::assertSame($expected, $result);
+ }
+
+ public static function provideInvalidJson(): array
+ {
+ return [
+ [''],
+ [' '],
+ ['{invalid}'],
+ ['{"a":1'], // missing closing brace
+ ['[1,2,]'], // trailing comma
+ ['foo'],
+ ];
+ }
+
+ #[DataProvider('provideInvalidJson')]
+ public function testParseInvalidJsonReturnsNull(string $input): void
+ {
+ self::assertNull($this->field->parse($input));
+ }
+
+ public function testParseRejectsNonStringValues(): void
+ {
+ self::assertNull($this->field->parse(null));
+ self::assertNull($this->field->parse(123));
+ self::assertNull($this->field->parse([]));
+ self::assertNull($this->field->parse(new stdClass()));
+ }
+
+ public function testDepthLimitIsRespected(): void
+ {
+ $field = new JsonField(depth: 2);
+
+ $json = '{"a":{"b":{"c":1}}}'; // depth 3
+
+ self::assertNull($field->parse($json));
+ }
+
+ public function testFlagsAffectDecoding(): void
+ {
+ $json = '{"big":12345678901234567890}';
+
+ $default = new JsonField();
+ $withFlag = new JsonField(flags: JSON_BIGINT_AS_STRING);
+
+ $defaultResult = $default->parse($json);
+ $flagResult = $withFlag->parse($json);
+
+ // default: bigint becomes float
+ self::assertIsArray($defaultResult);
+ self::assertIsFloat($defaultResult['big']);
+
+ // with flag: bigint preserved as string
+ self::assertIsArray($flagResult);
+ self::assertIsString($flagResult['big']);
+ }
+
+ public function testInvalidConstructorArgumentsThrow(): void
+ {
+ $this->expectException(ValueError::class);
+
+ new JsonField(depth: 0); /* @phpstan-ignore-line */
+ }
+
+ public function test_metadata_contains_expected_structure(): void
+ {
+ $field = new JsonField(depth: 2, flags: JSON_BIGINT_AS_STRING);
+
+ self::assertSame([
+ 'flags' => JSON_BIGINT_AS_STRING,
+ 'depth' => 2,
+ ], $field->metadata()->all());
+ }
+}
diff --git a/src/Schema/NumericField.php b/src/Schema/NumericField.php
new file mode 100644
index 00000000..fa0512ca
--- /dev/null
+++ b/src/Schema/NumericField.php
@@ -0,0 +1,125 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use ValueError;
+
+use function filter_var;
+use function is_float;
+use function is_int;
+use function is_numeric;
+use function is_string;
+use function trim;
+
+use const FILTER_VALIDATE_INT;
+
+final class NumericField extends FieldEvaluator implements Field
+{
+ public function __construct(
+ public readonly int|float|null $min = null,
+ public readonly int|float|null $max = null,
+ float $confidenceThreshold = 0.8
+ ) {
+ if (null !== $min && null !== $max && $min > $max) {
+ throw new ValueError('Minimum length can not be greater than maximum length.');
+ }
+
+ parent::__construct($confidenceThreshold);
+ }
+
+ public static function min(int $value, float $confidenceThreshold = 0.8): self
+ {
+ return new self(min: $value, max: null, confidenceThreshold: $confidenceThreshold);
+ }
+
+ public static function max(int $value, float $confidenceThreshold = 0.8): self
+ {
+ return new self(min: null, max: $value, confidenceThreshold: $confidenceThreshold);
+ }
+
+ public static function fixed(int $value, float $confidenceThreshold = 0.8): self
+ {
+ return new self(min: $value, max: $value, confidenceThreshold: $confidenceThreshold);
+ }
+
+ public static function between(int $min, int $max, float $confidenceThreshold = 0.8): self
+ {
+ return new self(min: $min, max: $max, confidenceThreshold: $confidenceThreshold);
+ }
+
+ public static function positive(float $confidenceThreshold = 0.8): self
+ {
+ return new self(min: 0, confidenceThreshold: $confidenceThreshold);
+ }
+
+ public static function negative(float $confidenceThreshold = 0.8): self
+ {
+ return new self(max: 0, confidenceThreshold: $confidenceThreshold);
+ }
+
+ public function type(): FieldType
+ {
+ return FieldType::Numeric;
+ }
+
+ public function name(): string
+ {
+ $range = (null === $this->min && null === $this->max)
+ ? '' :
+ (
+ $this->min === $this->max
+ ? '['.$this->min.']'
+ : '['.$this->min.','.$this->max.']'
+ );
+
+ return FieldType::Numeric->value.$range;
+ }
+
+ public function parse(mixed $value): int|float|null
+ {
+ if (is_string($value)) {
+ $value = trim($value);
+ if ('' === $value || !is_numeric($value)) {
+ return null;
+ }
+
+ $filterValue = filter_var($value, FILTER_VALIDATE_INT);
+ $value = false === $filterValue ? (float) $value : $filterValue;
+ }
+
+ if (!is_float($value) && !is_int($value)) {
+ return null;
+ }
+
+ if (null !== $this->min && $value < $this->min) {
+ return null;
+ }
+
+ if (null !== $this->max && $value > $this->max) {
+ return null;
+ }
+
+ return $value;
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata([
+ 'constraints' => [
+ 'min_value' => $this->min,
+ 'max_value' => $this->max,
+ ],
+ ]);
+ }
+}
diff --git a/src/Schema/NumericFieldTest.php b/src/Schema/NumericFieldTest.php
new file mode 100644
index 00000000..0d64efce
--- /dev/null
+++ b/src/Schema/NumericFieldTest.php
@@ -0,0 +1,166 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\TestCase;
+use stdClass;
+use ValueError;
+
+#[CoversClass(NumericField::class)]
+final class NumericFieldTest extends TestCase
+{
+ private NumericField $field;
+
+ protected function setUp(): void
+ {
+ $this->field = new NumericField();
+ }
+
+ // --------------------------------------------------------
+ // VALID VALUES → float
+ // --------------------------------------------------------
+
+ public static function provideValidNumericValues(): array
+ {
+ return [
+ 'positive int' => [10, 10],
+ 'negative int' => [-5, -5],
+ 'zero' => [0, 0],
+ 'positive float' => [10.5, 10.5],
+ 'negative float' => [-3.14, -3.14],
+ 'string positive int' => ['10', 10],
+ 'string positive float' => ['10.5', 10.5],
+ 'string negative int' => ['-2', -2],
+ 'string positive int with extra spaces' => [' 12 ', 12],
+ 'string positive float with extra spaces' => [' 3.14 ', 3.14],
+ 'string positive power float with extra spaces' => [' 3e14 ', 3e14],
+ ];
+ }
+
+ #[DataProvider('provideValidNumericValues')]
+ public function testParseValidValues(mixed $input, int|float $expected): void
+ {
+ self::assertSame($expected, $this->field->parse($input));
+ }
+
+ // --------------------------------------------------------
+ // INVALID VALUES → null
+ // --------------------------------------------------------
+
+ public static function provideInvalidNumericValues(): array
+ {
+ return [
+ [''],
+ [' '],
+ ['abc'],
+ ['12abc'],
+ ['abc12'],
+ [true],
+ [false],
+ [null],
+ [[]],
+ [new stdClass()],
+ ];
+ }
+
+ #[DataProvider('provideInvalidNumericValues')]
+ public function testParseInvalidValues(mixed $input): void
+ {
+ self::assertNull($this->field->parse($input));
+ }
+
+ public function test_metadata_contains_expected_structure(): void
+ {
+ self::assertFalse($this->field->metadata()->isEmpty());
+ }
+
+ // --------------------------------------------------------
+ // Factory constructors
+ // --------------------------------------------------------
+
+ public function testMinFactory(): void
+ {
+ $field = NumericField::min(4);
+
+ self::assertSame(FieldType::Numeric, $field->type());
+ self::assertSame('numeric[4,]', $field->name());
+ self::assertSame(0.8, $field->confidenceThreshold());
+ self::assertSame(5, $field->parse(5));
+ self::assertNull($field->parse(-4.1));
+ self::assertNull($field->parse('0'));
+ }
+
+ public function testMaxFactory(): void
+ {
+ $field = NumericField::max(4);
+
+ self::assertSame(FieldType::Numeric, $field->type());
+ self::assertSame('numeric[,4]', $field->name());
+ self::assertSame(0.8, $field->confidenceThreshold());
+ self::assertNull($field->parse(5));
+ self::assertSame(-4.1, $field->parse(-4.1));
+ self::assertSame(0, $field->parse('0'));
+ }
+
+ public function testFixedFactory(): void
+ {
+ $field = NumericField::fixed(4);
+
+ self::assertSame(FieldType::Numeric, $field->type());
+ self::assertSame('numeric[4]', $field->name());
+ self::assertSame(0.8, $field->confidenceThreshold());
+ self::assertNull($field->parse(5));
+ self::assertNull($field->parse(-4.1));
+ self::assertSame(4, $field->parse('4'));
+ }
+
+ public function testBetweenFactory(): void
+ {
+ $field = NumericField::between(-4, 4);
+
+ self::assertSame(FieldType::Numeric, $field->type());
+ self::assertSame('numeric[-4,4]', $field->name());
+ self::assertSame(0.8, $field->confidenceThreshold());
+ self::assertNull($field->parse(5));
+ self::assertNull($field->parse(-4.1));
+ self::assertSame(0, $field->parse('0'));
+ }
+
+ public function testPositiveFactory(): void
+ {
+ $field = NumericField::positive(.5);
+
+ self::assertSame(FieldType::Numeric, $field->type());
+ self::assertSame('numeric[0,]', $field->name());
+ self::assertSame(0.5, $field->confidenceThreshold());
+ }
+
+ public function testNegativeFactory(): void
+ {
+ $field = NumericField::negative(1);
+
+ self::assertSame(FieldType::Numeric, $field->type());
+ self::assertSame('numeric[,0]', $field->name());
+ self::assertSame(1.0, $field->confidenceThreshold());
+ }
+
+ public function testItFailsToInstantiateBetweenFactoryWithInvalidValues(): void
+ {
+ $this->expectException(ValueError::class);
+
+ NumericField::between(4, -4);
+ }
+}
diff --git a/src/Schema/Schema.php b/src/Schema/Schema.php
new file mode 100644
index 00000000..bfd1c5bc
--- /dev/null
+++ b/src/Schema/Schema.php
@@ -0,0 +1,161 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use Countable;
+use Iterator;
+use IteratorAggregate;
+use League\Csv\MapIterator;
+use League\Csv\TabularData;
+use ValueError;
+
+use function array_diff_key;
+use function array_flip;
+use function array_key_exists;
+use function array_keys;
+use function array_map;
+use function count;
+
+/**
+ * @implements IteratorAggregate
+ */
+final class Schema implements Countable, IteratorAggregate
+{
+ /** @var array */
+ private readonly array $fields;
+
+ public function __construct(iterable $fields = [])
+ {
+ $newFields = [];
+ foreach ($fields as $key => $value) {
+ self::assertNoDuplicate($newFields, $key);
+ $newFields[$key] = $value;
+ }
+
+ $this->fields = $newFields;
+ }
+
+ public function append(int|string $name, Field $field): self
+ {
+ self::assertNoDuplicate($this->fields, $name);
+
+ return new self([...$this->fields, ...[$name => $field]]);
+ }
+
+ public function replace(int|string $name, Field $field): self
+ {
+ $this->has($name) || throw new ValueError('Field "'.$name.'" does not exist.');
+
+ $fields = $this->fields;
+ $fields[$name] = $field;
+
+ return new self($fields);
+ }
+
+ public function remove(int|string ...$names): self
+ {
+ return [] === $names
+ ? $this
+ : new self(array_diff_key($this->fields, array_flip($names)));
+ }
+
+ private static function assertNoDuplicate(array $data, string|int $key): void
+ {
+ ! array_key_exists($key, $data) || throw new ValueError('The key already exists: '.$key);
+ }
+
+ public function count(): int
+ {
+ return count($this->fields);
+ }
+
+ /**
+ * @return Iterator
+ */
+ public function getIterator(): Iterator
+ {
+ yield from $this->fields;
+ }
+
+ /**
+ * @return array
+ */
+ public function all(): array
+ {
+ return $this->fields;
+ }
+
+ public function isEmpty(): bool
+ {
+ return [] === $this->fields;
+ }
+
+ /**
+ * @return array
+ */
+ public function types(): array
+ {
+ return array_map(fn (Field $field) => $field->name(), $this->fields);
+ }
+
+ /**
+ * @return list
+ */
+ public function names(): array
+ {
+ return array_keys($this->fields);
+ }
+
+ public function has(int|string $offset): bool
+ {
+ return array_key_exists($offset, $this->fields);
+ }
+
+ public function get(int|string $offset): Field
+ {
+ return $this->has($offset) ? $this->fields[$offset] : throw new ValueError('The key does not exist: '.$offset);
+ }
+
+ /**
+ * @template TValue
+ *
+ * @param callable(Field, array-key): TValue $callback
+ *
+ * @return Iterator
+ */
+ public function map(callable $callback): Iterator
+ {
+ foreach ($this->fields as $name => $field) {
+ yield $name => $callback($field, $name);
+ }
+ }
+
+ /**
+ * @return Iterator>
+ */
+ public function parse(TabularData $tabularData): Iterator
+ {
+ return MapIterator::fromIterable($tabularData->getRecords($this->names()), $this->format(...));
+ }
+
+ public function format(array $row): array
+ {
+ $result = [];
+ foreach ($this->fields as $column => $field) {
+ $result[$column] = $field->parse($row[$column] ?? null);
+ }
+
+ return $result;
+ }
+}
diff --git a/src/Schema/SchemaTest.php b/src/Schema/SchemaTest.php
new file mode 100644
index 00000000..0be0ba6d
--- /dev/null
+++ b/src/Schema/SchemaTest.php
@@ -0,0 +1,181 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use Iterator;
+use IteratorAggregate;
+use PHPUnit\Framework\TestCase;
+use ValueError;
+
+final class SchemaTest extends TestCase
+{
+ private function field(string $name): Field
+ {
+ return new CustomField(fn (mixed $value): mixed => $value, $name, 0.95);
+ }
+
+ public function testConstructAndCount(): void
+ {
+ $schema = new Schema([
+ 'name' => $this->field('string'),
+ 'age' => $this->field('numeric'),
+ ]);
+
+ self::assertCount(2, $schema);
+ }
+
+ public function testConstructThrowsOnDuplicateKey(): void
+ {
+ $test = new class () implements IteratorAggregate {
+ public function getIterator(): Iterator
+ {
+ yield 'a' => new StringField();
+ yield 'a' => new BooleanField();
+ }
+ };
+
+ $this->expectException(ValueError::class);
+ new Schema($test);
+ }
+
+ public function testIsEmpty(): void
+ {
+ $schema = new Schema();
+ self::assertTrue($schema->isEmpty());
+
+ $schema = new Schema(['name' => $this->field('string')]);
+ self::assertFalse($schema->isEmpty());
+ }
+
+ public function testAllReturnsFields(): void
+ {
+ $fields = [
+ 'name' => $this->field('string'),
+ ];
+
+ $schema = new Schema($fields);
+
+ self::assertSame($fields, $schema->all());
+ }
+
+ public function testNames(): void
+ {
+ $schema = new Schema([
+ 'name' => $this->field('string'),
+ 'age' => $this->field('numeric'),
+ ]);
+
+ self::assertSame(['name', 'age'], $schema->names());
+ }
+
+ public function testTypes(): void
+ {
+ $schema = new Schema([
+ 'name' => $this->field('string'),
+ 'age' => $this->field('numeric'),
+ ]);
+
+ self::assertSame([
+ 'name' => 'custom(string)',
+ 'age' => 'custom(numeric)',
+ ], $schema->types());
+ }
+
+ public function testHas(): void
+ {
+ $schema = new Schema([
+ 'name' => $this->field('string'),
+ ]);
+
+ self::assertTrue($schema->has('name'));
+ self::assertFalse($schema->has('age'));
+ }
+
+ public function testGetReturnsField(): void
+ {
+ $field = $this->field('string');
+
+ $schema = new Schema([
+ 'name' => $field,
+ ]);
+
+ self::assertSame($field, $schema->get('name'));
+ }
+
+ public function testGetThrowsOnMissingKey(): void
+ {
+ $schema = new Schema();
+
+ $this->expectException(ValueError::class);
+
+ $schema->get('missing');
+ }
+
+ public function testIterator(): void
+ {
+ $fields = [
+ 'name' => $this->field('string'),
+ 'age' => $this->field('numeric'),
+ ];
+
+ $schema = new Schema($fields);
+
+ $result = [];
+ foreach ($schema as $key => $field) {
+ $result[$key] = $field;
+ }
+
+ self::assertSame($fields, $result);
+ }
+
+ public function testMap(): void
+ {
+ $schema = new Schema([
+ 'name' => $this->field('string'),
+ 'age' => $this->field('numeric'),
+ ]);
+
+ $result = iterator_to_array(
+ $schema->map(fn (Field $field, $key) => $field->name())
+ );
+
+ self::assertSame([
+ 'name' => 'custom(string)',
+ 'age' => 'custom(numeric)',
+ ], $result);
+ }
+
+ public function testGetByNumericIndex(): void
+ {
+ $fields = [
+ $this->field('string'),
+ $this->field('numeric'),
+ ];
+
+ $schema = new Schema($fields);
+
+ self::assertSame('custom(string)', $schema->get(0)->name());
+ self::assertSame('custom(numeric)', $schema->get(1)->name());
+ }
+
+ public function testHasWithNumericIndex(): void
+ {
+ $schema = new Schema([
+ $this->field('string'),
+ ]);
+
+ self::assertTrue($schema->has(0));
+ self::assertFalse($schema->has(1));
+ }
+}
diff --git a/src/Schema/SetField.php b/src/Schema/SetField.php
new file mode 100644
index 00000000..35908fa5
--- /dev/null
+++ b/src/Schema/SetField.php
@@ -0,0 +1,113 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use UnitEnum;
+use ValueError;
+
+use function explode;
+use function is_string;
+use function trim;
+
+use const PHP_INT_MAX;
+
+final class SetField extends FieldEvaluator implements Field
+{
+ /** @var non-empty-string */
+ public readonly string $separator;
+ public readonly int $limit;
+ public readonly EnumField $enumField;
+
+ /**
+ * @param non-empty-string $separator
+ */
+ public function __construct(EnumField $enumField, string $separator = ',', int $limit = PHP_INT_MAX)
+ {
+ $separator = trim($separator);
+ '' !== $separator || throw new ValueError('The set field separator can not be an empty string.');
+
+ parent::__construct($enumField->confidenceThreshold());
+ $this->enumField = $enumField;
+ $this->separator = $separator;
+ $this->limit = $limit;
+ }
+
+ /**
+ * @param class-string $enumClass
+ * @param non-empty-string $separator
+ */
+ public static function fromEnum(
+ string $enumClass,
+ string $separator = ',',
+ int $limit = PHP_INT_MAX,
+ float $confidenceThreshold = 0.8
+ ): self {
+ return new self(
+ new EnumField($enumClass, $confidenceThreshold),
+ $separator,
+ $limit
+ );
+ }
+
+ public function type(): FieldType
+ {
+ return FieldType::Set;
+ }
+
+ public function name(): string
+ {
+ return FieldType::Set->value.'('.$this->enumField->name().')';
+ }
+
+ /**
+ * @return list|null
+ */
+ public function parse(mixed $value): mixed
+ {
+ if (!is_string($value)) {
+ return null;
+ }
+
+ $value = trim($value);
+ if ('' === $value) {
+ return null;
+ }
+
+ $result = [];
+ foreach (explode($this->separator, $value) as $part) {
+ $part = trim($part);
+ if ('' === $part || isset($result[$part])) {
+ continue;
+ }
+
+ $parsed = $this->enumField->parse($part);
+ if (null === $parsed) {
+ continue;
+ }
+
+ $result[$part] = $parsed;
+ }
+
+ return array_values($result);
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return (new FieldMetadata([
+ 'separator' => $this->separator,
+ 'limit' => $this->limit,
+ 'enum' => $this->enumField->metadata(),
+ ]));
+ }
+}
diff --git a/src/Schema/SetFieldTest.php b/src/Schema/SetFieldTest.php
new file mode 100644
index 00000000..274c2e78
--- /dev/null
+++ b/src/Schema/SetFieldTest.php
@@ -0,0 +1,144 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema\Tests;
+
+use League\Csv\Schema\FieldType;
+use League\Csv\Schema\SetField;
+use PHPUnit\Framework\TestCase;
+use stdClass;
+use ValueError;
+
+use function array_is_list;
+
+use const PHP_INT_MAX;
+
+final class SetFieldTest extends TestCase
+{
+ public function test_it_can_be_instantiated(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class);
+
+ self::assertSame(',', $field->separator);
+ self::assertSame(PHP_INT_MAX, $field->limit);
+ }
+
+ public function test_it_trims_the_separator(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class, ' | ');
+
+ self::assertSame('|', $field->separator);
+ }
+
+ public function test_it_throws_when_separator_is_empty(): void
+ {
+ $this->expectException(ValueError::class);
+ $this->expectExceptionMessage('The set field separator can not be an empty string.');
+
+ SetField::fromEnum(TestSetEnum::class, ' ');
+ }
+
+ public function test_it_returns_the_correct_type(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class);
+
+ self::assertSame(FieldType::Set, $field->type());
+ }
+
+ public function test_it_returns_the_correct_name(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class);
+
+ self::assertSame('set(enum(League\Csv\Schema\Tests\TestSetEnum))', $field->name());
+ }
+
+ public function test_it_returns_null_for_non_string_values(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class);
+
+ self::assertNull($field->parse(null));
+ self::assertNull($field->parse(1));
+ self::assertNull($field->parse(true));
+ self::assertNull($field->parse([]));
+ self::assertNull($field->parse(new stdClass()));
+ }
+
+ public function test_it_returns_null_for_empty_strings(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class);
+
+ self::assertNull($field->parse(''));
+ self::assertNull($field->parse(' '));
+ }
+
+ public function test_it_parses_a_set_value(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class);
+
+ self::assertSame([TestSetEnum::Read, TestSetEnum::Write, TestSetEnum::Delete], $field->parse('read,write,delete'));
+ }
+
+ public function test_it_respects_the_limit(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class, limit: 2);
+
+ self::assertSame([TestSetEnum::Read, TestSetEnum::Write, TestSetEnum::Delete], $field->parse('read,write,delete'));
+ }
+
+ public function test_it_can_use_custom_separator(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class, '|');
+
+ self::assertSame([TestSetEnum::Read, TestSetEnum::Write], $field->parse('read|write'));
+ }
+
+ public function test_it_returns_metadata(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class, '|', 3);
+
+ self::assertSame(
+ [
+ 'separator' => '|',
+ 'limit' => 3,
+ 'enum' => [
+ 'class' => TestSetEnum::class,
+ 'backedType' => 'string',
+ 'cases' => [
+ ['name' => 'Read', 'value' => 'read'],
+ ['name' => 'Write', 'value' => 'write'],
+ ['name' => 'Delete', 'value' => 'delete'],
+ ],
+ ],
+ ],
+ $field->metadata()->all()
+ );
+ }
+
+ public function test_it_handles_set_with_duplicate_values(): void
+ {
+ $field = SetField::fromEnum(TestSetEnum::class);
+ $value = $field->parse('read, write,read,,delete');
+
+ self::assertIsArray($value);
+ self::assertTrue(array_is_list($value));
+ self::assertCount(3, $value);
+ self::assertSame([TestSetEnum::Read, TestSetEnum::Write, TestSetEnum::Delete], $value);
+ }
+}
+
+enum TestSetEnum: string
+{
+ case Read = 'read';
+ case Write = 'write';
+ case Delete = 'delete';
+}
diff --git a/src/Schema/StringConstraint.php b/src/Schema/StringConstraint.php
new file mode 100644
index 00000000..321572e3
--- /dev/null
+++ b/src/Schema/StringConstraint.php
@@ -0,0 +1,23 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+interface StringConstraint
+{
+ public function apply(string $value): ?string;
+ /**
+ * @return non-empty-string
+ */
+ public function fieldTypeName(): string;
+}
diff --git a/src/Schema/StringField.php b/src/Schema/StringField.php
new file mode 100644
index 00000000..12e6d4f0
--- /dev/null
+++ b/src/Schema/StringField.php
@@ -0,0 +1,140 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use function is_string;
+use function trim;
+
+final class StringField extends FieldEvaluator implements Field
+{
+ public function __construct(
+ public readonly ?StringConstraint $constraint = null,
+ float $confidenceThreshold = 0.0
+ ) {
+ parent::__construct($confidenceThreshold);
+ }
+
+ /**
+ * @param positive-int $length
+ */
+ public static function max(int $length, float $confidenceThreshold = 0.8): self
+ {
+ return new self(StringLengthConstraint::max($length), $confidenceThreshold);
+ }
+
+ /**
+ * @param positive-int $length
+ */
+ public static function min(int $length, float $confidenceThreshold = 0.8): self
+ {
+ return new self(StringLengthConstraint::min($length), $confidenceThreshold);
+ }
+
+ /**
+ * @param positive-int $length
+ */
+ public static function fixed(int $length, float $confidenceThreshold = 0.8): self
+ {
+ return new self(StringLengthConstraint::fixed($length), $confidenceThreshold);
+ }
+
+ public static function uuid(float $confidenceThreshold = 0.8): self
+ {
+ return new self(StructuredStringConstraint::uuid(), $confidenceThreshold);
+ }
+
+ public static function ulid(float $confidenceThreshold = 0.8): self
+ {
+ return new self(StructuredStringConstraint::ulid(), $confidenceThreshold);
+ }
+
+ public static function hexColor(float $confidenceThreshold = 0.8): self
+ {
+ return new self(StructuredStringConstraint::hexColor(), $confidenceThreshold);
+ }
+
+ public static function jwtToken(float $confidenceThreshold = 0.8): self
+ {
+ return new self(StructuredStringConstraint::jwtToken(), $confidenceThreshold);
+ }
+
+ public static function md5(float $confidenceThreshold = 0.8): self
+ {
+ return new self(StructuredStringConstraint::md5(), $confidenceThreshold);
+ }
+
+ public static function sha1(float $confidenceThreshold = 0.8): self
+ {
+ return new self(StructuredStringConstraint::sha1(), $confidenceThreshold);
+ }
+
+ public static function cases(float $confidenceThreshold = 0.8): FieldList
+ {
+ return new FieldList(
+ self::uuid($confidenceThreshold),
+ self::ulid($confidenceThreshold),
+ self::hexColor($confidenceThreshold),
+ self::jwtToken($confidenceThreshold),
+ self::md5($confidenceThreshold),
+ self::sha1($confidenceThreshold),
+ );
+ }
+
+ public function type(): FieldType
+ {
+ return FieldType::String;
+ }
+
+ public function name(): string
+ {
+ return $this->constraint?->fieldTypeName() ?? FieldType::String->value;
+ }
+
+ public function parse(mixed $value): ?string
+ {
+ if (!is_string($value)) {
+ return null;
+ }
+
+ $value = trim($value);
+
+ return match (true) {
+ '' === $value => null,
+ null === $this->constraint => $value,
+ default => $this->constraint->apply($value),
+ };
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata();
+ }
+
+ /**
+ * @return int<-1, 1>
+ */
+ public function evaluate(mixed $value): int
+ {
+ return null === $this->constraint
+ ? (is_string($value) ? 1 : 0)
+ : parent::evaluate($value);
+ }
+
+ public function confidenceThreshold(): float
+ {
+ return null === $this->constraint
+ ? 0.0
+ : parent::confidenceThreshold();
+ }
+}
diff --git a/src/Schema/StringFieldTest.php b/src/Schema/StringFieldTest.php
new file mode 100644
index 00000000..617e1041
--- /dev/null
+++ b/src/Schema/StringFieldTest.php
@@ -0,0 +1,192 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\TestCase;
+use stdClass;
+
+#[CoversClass(StructuredStringConstraint::class)]
+#[CoversClass(StringLengthConstraint::class)]
+#[CoversClass(StringField::class)]
+final class StringFieldTest extends TestCase
+{
+ private StringField $field;
+
+ protected function setUp(): void
+ {
+ $this->field = new StringField();
+ }
+
+ // --------------------------------------------------------
+ // parse()
+ // --------------------------------------------------------
+
+ public static function provideParseValues(): array
+ {
+ return [
+ ['hello', 'hello'],
+ [' world ', 'world'],
+ ['', null],
+ ['123', '123'],
+ ];
+ }
+
+ #[DataProvider('provideParseValues')]
+ public function testParse(mixed $input, ?string $expected): void
+ {
+ self::assertSame($expected, $this->field->parse($input));
+ }
+
+ public static function provideInvalidParseValues(): array
+ {
+ return [
+ [123],
+ [12.5],
+ [true],
+ [false],
+ [null],
+ [[]],
+ [new stdClass()],
+ ];
+ }
+
+ #[DataProvider('provideInvalidParseValues')]
+ public function testParseReturnsNullForNonStrings(mixed $input): void
+ {
+ self::assertNull($this->field->parse($input));
+ }
+
+ // --------------------------------------------------------
+ // evaluate()
+ // --------------------------------------------------------
+
+ public static function provideEvaluateValues(): array
+ {
+ return [
+ ['hello', 1],
+ ['', 1],
+ ['123', 1],
+
+ [123, 0],
+ [12.5, 0],
+ [true, 0],
+ [false, 0],
+ [null, 0],
+ [[], 0],
+ ];
+ }
+
+ #[DataProvider('provideEvaluateValues')]
+ public function testEvaluate(mixed $input, int $expected): void
+ {
+ self::assertSame($expected, $this->field->evaluate($input));
+ }
+
+ // --------------------------------------------------------
+ // type()
+ // --------------------------------------------------------
+
+ public function testTypeIsString(): void
+ {
+ self::assertSame(FieldType::String, $this->field->type());
+ }
+
+ // --------------------------------------------------------
+ // confidenceThreshold()
+ // --------------------------------------------------------
+
+ public function testConfidenceThresholdIsZero(): void
+ {
+ self::assertSame(0.0, $this->field->confidenceThreshold());
+ }
+
+ public function test_metadata_contains_expected_structure(): void
+ {
+ $field = new StringField();
+
+ self::assertTrue($field->metadata()->isEmpty());
+ }
+
+ public function test_max_length_constraint_applied(): void
+ {
+ $field = StringField::max(3);
+
+ self::assertSame('string[,3]', $field->name());
+ self::assertNull($field->parse(null));
+ self::assertNull($field->parse('abcdef'));
+ self::assertSame('a', $field->parse('a'));
+ self::assertSame('ab', $field->parse('ab'));
+ self::assertSame('abc', $field->parse('abc'));
+ }
+
+ public function test_fixed_length_constraint_applied(): void
+ {
+ $field = StringField::fixed(3);
+
+ self::assertSame('string[3]', $field->name());
+ self::assertNull($field->parse(null));
+ self::assertNull($field->parse('abcdef'));
+ self::assertNull($field->parse('a'));
+ self::assertNull($field->parse('ab'));
+ self::assertSame('abc', $field->parse('abc'));
+ }
+
+ public function test_min_length_constraint_applied(): void
+ {
+ $field = StringField::min(3);
+
+ self::assertSame('string[3,]', $field->name());
+ self::assertNull($field->parse(null));
+ self::assertNull($field->parse('a'));
+ self::assertNull($field->parse('ab'));
+ self::assertSame('abc', $field->parse('abc'));
+ self::assertSame('abcdef', $field->parse('abcdef'));
+ }
+
+ // --------------------------------------------------------
+ // Factory constructors
+ // --------------------------------------------------------
+
+ public function testUuidFactoryCreatesValidStrategy(): void
+ {
+ $field = StringField::uuid();
+
+ self::assertSame(FieldType::String, $field->type());
+ self::assertSame('string(uuid)', $field->name());
+ self::assertSame(0.8, $field->confidenceThreshold());
+ }
+
+ public function testUlidFactoryCreatesValidStrategy(): void
+ {
+ $field = StringField::ulid();
+
+ self::assertSame('string(ulid)', $field->name());
+ }
+
+ public function testHexColorFactoryCreatesValidStrategy(): void
+ {
+ $field = StringField::hexColor();
+
+ self::assertSame('string(hex_color)', $field->name());
+ }
+
+ public function testJwtTokenFactoryCreatesValidStrategy(): void
+ {
+ $field = StringField::jwtToken();
+
+ self::assertSame('string(jwt_token)', $field->name());
+ }
+}
diff --git a/src/Schema/StringLengthConstraint.php b/src/Schema/StringLengthConstraint.php
new file mode 100644
index 00000000..290df6dc
--- /dev/null
+++ b/src/Schema/StringLengthConstraint.php
@@ -0,0 +1,107 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use ValueError;
+
+final readonly class StringLengthConstraint implements StringConstraint
+{
+ /**
+ * @param ?positive-int $min
+ * @param ?positive-int $max
+ */
+ private function __construct(
+ public ?int $min,
+ public ?int $max,
+ ) {
+ null === $min || $min > 0 || throw new ValueError('Min length must be greater than 0');
+ null === $max || $max > 0 || throw new ValueError('Max length must be greater than 0');
+ if (null !== $min && null !== $max && $min > $max) {
+ throw new ValueError('Minimum length can not be greater than maximum length.');
+ }
+ }
+
+ /**
+ * @param positive-int $length
+ */
+ public static function min(int $length): self
+ {
+ return new self(min: $length, max: null);
+ }
+
+ /**
+ * @param positive-int $length
+ */
+ public static function max(int $length): self
+ {
+ return new self(min: null, max: $length);
+ }
+
+ /**
+ * @param positive-int $length
+ */
+ public static function fixed(int $length): self
+ {
+ return new self(min: $length, max: $length);
+ }
+
+ /**
+ * @param positive-int $min
+ * @param positive-int $max
+ */
+ public static function between(int $min, int $max): self
+ {
+ return new self(min: $min, max: $max);
+ }
+
+ public function apply(string $value): ?string
+ {
+ $value = trim($value);
+ if ('' === $value) {
+ return null;
+ }
+
+ $length = mb_strlen($value);
+ if (null !== $this->min && $length < $this->min) {
+ return null;
+ }
+
+ if (null !== $this->max && $length > $this->max) {
+ return null;
+ }
+
+ return $value;
+ }
+
+ public function fieldTypeName(): string
+ {
+ $range = (null === $this->min && null === $this->max)
+ ? '' :
+ (
+ $this->min === $this->max
+ ? '['.$this->min.']'
+ : '['.$this->min.','.$this->max.']'
+ );
+
+ return FieldType::String->value.$range;
+ }
+
+ public function constraint(): FieldMetadata
+ {
+ return new FieldMetadata([
+ 'min_length' => $this->min,
+ 'max_length' => $this->max,
+ ]);
+ }
+}
diff --git a/src/Schema/StructuredStringConstraint.php b/src/Schema/StructuredStringConstraint.php
new file mode 100644
index 00000000..52c1815c
--- /dev/null
+++ b/src/Schema/StructuredStringConstraint.php
@@ -0,0 +1,94 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use ValueError;
+
+use function preg_match;
+use function trim;
+
+final readonly class StructuredStringConstraint implements StringConstraint
+{
+ /**
+ * @param non-empty-string $fieldTypeName
+ * @param non-empty-string $pattern
+ */
+ public function __construct(
+ public string $fieldTypeName,
+ public string $pattern,
+ ) {
+ ('' !== $pattern && false !== @preg_match($pattern, '')) || throw new ValueError('the regular expression pattern "'.$pattern.'" is not valid. Did you forget the delimiter?');
+ ('' !== $fieldTypeName && 1 === preg_match('/^[a-z][a-z0-9]*(?:_[a-z0-9]+)*$/', $fieldTypeName)) || throw new ValueError('The name "'.$fieldTypeName.'" is not a valid snake case variable name.');
+ }
+
+ public static function uuid(): self
+ {
+ return new self(
+ fieldTypeName: 'uuid',
+ pattern: '/^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i',
+ );
+ }
+
+ public static function ulid(): self
+ {
+ return new self(
+ fieldTypeName: 'ulid',
+ pattern: '/^[0-9A-HJKMNP-TV-Z]{26}$/i',
+ );
+ }
+
+ public static function jwtToken(): self
+ {
+ return new self(
+ fieldTypeName: 'jwt_token',
+ pattern: '/^[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+$/i',
+ );
+ }
+
+ public static function hexColor(): self
+ {
+ return new self(
+ fieldTypeName: 'hex_color',
+ pattern: '/^#(?:[0-9a-fA-F]{3}){1,2}$/i',
+ );
+ }
+
+ public static function md5(): self
+ {
+ return new self(
+ fieldTypeName: 'md5',
+ pattern: '/^[a-fA-F0-9]{32}$/',
+ );
+ }
+
+ public static function sha1(): self
+ {
+ return new self(
+ fieldTypeName: 'sha1',
+ pattern: '/^[a-fA-F0-9]{40}$/',
+ );
+ }
+
+ public function apply(string $value): ?string
+ {
+ $value = trim($value);
+
+ return ('' === $value || 1 !== preg_match($this->pattern, $value)) ? null : $value;
+ }
+
+ public function fieldTypeName(): string
+ {
+ return FieldType::String->value.'('.$this->fieldTypeName.')';
+ }
+}
diff --git a/src/Schema/TimeField.php b/src/Schema/TimeField.php
new file mode 100644
index 00000000..181c239c
--- /dev/null
+++ b/src/Schema/TimeField.php
@@ -0,0 +1,137 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use DateTimeInterface;
+use ValueError;
+
+use function array_map;
+use function ctype_digit;
+use function implode;
+use function is_string;
+use function preg_match;
+use function strlen;
+use function trim;
+
+final class TimeField extends FieldEvaluator implements Field
+{
+ /** @var non-empty-string */
+ private readonly string $pattern;
+
+ private function __construct(
+ public readonly string $separator,
+ public readonly TimePrecision $precision,
+ public readonly TimePadding $padding,
+ float $confidenceThreshold = 0.8
+ ) {
+ (1 === strlen($separator) && !ctype_digit($this->separator)) || throw new ValueError('The separator character must be a non-empty single byte string.');
+
+ parent::__construct($confidenceThreshold);
+
+ $this->pattern = $this->generatePattern();
+ }
+
+ public static function seconds(string $separator = ':', TimePadding $padding = TimePadding::Padded, float $confidenceThreshold = 0.8): self
+ {
+ return new self($separator, TimePrecision::HoursMinutesSeconds, $padding, $confidenceThreshold);
+ }
+
+ public static function minutes(string $separator = ':', TimePadding $padding = TimePadding::Padded, float $confidenceThreshold = 0.8): self
+ {
+ return new self($separator, TimePrecision::HoursMinutes, $padding, $confidenceThreshold);
+ }
+
+ public static function hours(string $separator = ':', TimePadding $padding = TimePadding::Padded, float $confidenceThreshold = 0.8): self
+ {
+ return new self($separator, TimePrecision::Hours, $padding, $confidenceThreshold);
+ }
+
+ public function type(): FieldType
+ {
+ return FieldType::Time;
+ }
+
+ public function metadata(): FieldMetadata
+ {
+ return new FieldMetadata();
+ }
+
+ public function name(): string
+ {
+ $precision = match ($this->precision) {
+ TimePrecision::Hours => 'hours',
+ TimePrecision::HoursMinutes => 'hours_minutes',
+ TimePrecision::HoursMinutesSeconds => 'hours_minutes_seconds',
+ };
+
+ $paddingMode = match ($this->padding) {
+ TimePadding::Unpadded => 'un_padded',
+ TimePadding::Padded => 'padded',
+ };
+
+ return FieldType::Time->value.'(precision='.$precision.',padding='.$paddingMode.',separator='.$this->separator.')';
+ }
+
+ public function parse(mixed $value): ?string
+ {
+ if ($value instanceof DateTimeInterface) {
+ return $value->format('H:i:s');
+ }
+
+ if (!is_string($value)) {
+ return null;
+ }
+
+ $value = trim($value);
+ if (1 !== preg_match($this->pattern, $value, $found)) {
+ return null;
+ }
+
+ $hour = (int) $found['hour'];
+ $minute = (int) ($found['minute'] ?? 0);
+ $second = (int) ($found['second'] ?? 0);
+
+ return ($hour > 23 || $minute > 59 || $second > 59)
+ ? null
+ : $this->formatTimePart($hour)
+ .$this->separator
+ .$this->formatTimePart($minute)
+ .$this->separator
+ .$this->formatTimePart($second);
+ }
+
+ private function formatTimePart(int $value): string
+ {
+ return ($value < 10 ? '0' : '').$value;
+ }
+
+ /**
+ * @return non-empty-string
+ */
+ private function generatePattern(): string
+ {
+ $digit = fn () => TimePadding::Padded === $this->padding ? '\d{2}' : '\d{1,2}';
+
+ $patternParts = array_map(
+ fn (string $part): string => "(?<{$part}>".$digit().')',
+ match ($this->precision) {
+ TimePrecision::Hours => ['hour'],
+ TimePrecision::HoursMinutes => ['hour', 'minute'],
+ TimePrecision::HoursMinutesSeconds => ['hour', 'minute', 'second'],
+ }
+ );
+
+ return '/^'.implode($this->separator, $patternParts).'$/';
+ }
+}
diff --git a/src/Schema/TimeFieldTest.php b/src/Schema/TimeFieldTest.php
new file mode 100644
index 00000000..36de040c
--- /dev/null
+++ b/src/Schema/TimeFieldTest.php
@@ -0,0 +1,125 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\TestCase;
+
+#[CoversClass(TimeField::class)]
+final class TimeFieldTest extends TestCase
+{
+ public function test_hours_constructor_parses_correctly(): void
+ {
+ $field = TimeField::hours();
+
+ self::assertSame('time(precision=hours,padding=padded,separator=:)', $field->name());
+
+ self::assertSame('10:00:00', $field->parse('10'));
+ self::assertSame('23:00:00', $field->parse('23'));
+ }
+
+ public function test_minutes_constructor_parses_correctly(): void
+ {
+ $field = TimeField::minutes(separator: '.');
+
+ self::assertSame('time(precision=hours_minutes,padding=padded,separator=.)', $field->name());
+
+ self::assertSame('10.30.00', $field->parse('10.30'));
+ self::assertSame('23.59.00', $field->parse('23.59'));
+ }
+
+ public function test_seconds_constructor_parses_correctly(): void
+ {
+ $field = TimeField::seconds();
+
+ self::assertSame('time(precision=hours_minutes_seconds,padding=padded,separator=:)', $field->name());
+
+ self::assertSame('10:30:45', $field->parse('10:30:45'));
+ self::assertSame('00:00:00', $field->parse('00:00:00'));
+ }
+
+ public function test_invalid_string_returns_null(): void
+ {
+ $field = TimeField::seconds();
+
+ self::assertNull($field->parse(''));
+ self::assertNull($field->parse(' '));
+ self::assertNull($field->parse('invalid'));
+ }
+
+ public function test_non_string_returns_null(): void
+ {
+ $field = TimeField::seconds();
+
+ self::assertNull($field->parse(null));
+ self::assertNull($field->parse(123));
+ self::assertNull($field->parse([]));
+ }
+
+ public function test_seconds_precision_rejects_invalid_time(): void
+ {
+ $field = TimeField::seconds();
+
+ self::assertNull($field->parse('25:00:00')); // invalid hour
+ self::assertNull($field->parse('10:70:00')); // invalid minute
+ self::assertNull($field->parse('10:00:90')); // invalid second
+ }
+
+ public function test_minutes_precision_rejects_seconds_input(): void
+ {
+ $field = TimeField::minutes();
+
+ self::assertNull($field->parse('10:30:45')); // too precise
+ }
+
+ public function test_hours_precision_rejects_minutes_input(): void
+ {
+ $field = TimeField::hours();
+
+ self::assertNull($field->parse('10:30')); // too precise
+ self::assertNull($field->parse('10:30:45'));
+ }
+
+ public function test_output_is_always_normalized_to_his(): void
+ {
+ $field = TimeField::seconds(padding: TimePadding::Unpadded);
+
+ self::assertSame('01:02:03', $field->parse('1:2:3'));
+ }
+
+ public function test_metadata_contains_format(): void
+ {
+ $field = TimeField::seconds();
+
+ self::assertSame([], $field->metadata()->all());
+ }
+
+ public function test_name_contains_format(): void
+ {
+ self::assertSame(
+ 'time(precision=hours_minutes_seconds,padding=padded,separator=:)',
+ TimeField::seconds()->name()
+ );
+
+ self::assertSame(
+ 'time(precision=hours_minutes,padding=padded,separator=:)',
+ TimeField::minutes()->name()
+ );
+
+ self::assertSame(
+ 'time(precision=hours,padding=padded,separator=:)',
+ TimeField::hours()->name()
+ );
+ }
+}
diff --git a/src/Schema/TimePadding.php b/src/Schema/TimePadding.php
new file mode 100644
index 00000000..3d028fa1
--- /dev/null
+++ b/src/Schema/TimePadding.php
@@ -0,0 +1,20 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+enum TimePadding
+{
+ case Padded;
+ case Unpadded;
+}
diff --git a/src/Schema/TimePrecision.php b/src/Schema/TimePrecision.php
new file mode 100644
index 00000000..6f378a4a
--- /dev/null
+++ b/src/Schema/TimePrecision.php
@@ -0,0 +1,21 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Schema;
+
+enum TimePrecision
+{
+ case Hours;
+ case HoursMinutes;
+ case HoursMinutesSeconds;
+}
diff --git a/src/TabularData.php b/src/TabularData.php
index f886eecf..44c8aca9 100644
--- a/src/TabularData.php
+++ b/src/TabularData.php
@@ -13,6 +13,7 @@
namespace League\Csv;
+use Closure;
use Iterator;
/**
@@ -24,6 +25,7 @@
* @method object|null lastAsObject(string $className, array $header = []) returns the last record from the tabular data as an instance of the defined class name.
* @method Iterator map(callable $callback) Run a map over each container record.
* @method Iterator getRecordsAsObject(string $className, array $header = []) Returns the tabular data records as an iterator object containing instance of the defined class name.
+ * @method mixed reduce(Closure $callback, mixed $initial = null) reduces the collection to a single value, passing the result of each iteration into the subsequent iteration
*/
interface TabularData
{
diff --git a/src/TabularDataReader.php b/src/TabularDataReader.php
index 3dacc1c1..72e81d69 100644
--- a/src/TabularDataReader.php
+++ b/src/TabularDataReader.php
@@ -30,7 +30,6 @@
* @method mixed value(int|string $column = 0) returns a given value from the first element of the tabular data.
* @method bool each(Closure $callback) iterates over each record and passes it to a closure. Iteration is interrupted if the closure returns false
* @method bool exists(Closure $callback) tells whether at least one record satisfies the predicate.
- * @method mixed reduce(Closure $callback, mixed $initial = null) reduces the collection to a single value, passing the result of each iteration into the subsequent iteration
* @method Iterator getObjects(string $className, array $header = []) Returns the tabular data records as an iterator object containing instance of the defined class name.
* @method TabularDataReader filter(Query\Predicate|Closure $predicate) returns all the elements of this collection for which your callback function returns `true`
* @method TabularDataReader slice(int $offset, ?int $length = null) extracts a slice of $length elements starting at position $offset from the Collection.