diff --git a/docs/9.0/connections/instantiation.md b/docs/9.0/connections/instantiation.md index 72a55b6b..160cbc12 100644 --- a/docs/9.0/connections/instantiation.md +++ b/docs/9.0/connections/instantiation.md @@ -61,6 +61,7 @@ Alternatively, you can use the fromStream method.

```php public static AbstractCsv::fromStream(SplFileObject|resource $stream): self ``` + Creates a new object from a stream resource or a streaming object. ```php diff --git a/docs/9.0/reader/record-mapping.md b/docs/9.0/reader/record-mapping.md index 4381c0c2..368a8c70 100644 --- a/docs/9.0/reader/record-mapping.md +++ b/docs/9.0/reader/record-mapping.md @@ -8,7 +8,7 @@ description: Converts your CSV records into PHP objects using PHP's powerful Ref

New in version 9.12.0

-If you are working with a class which implements the `TabularDataReader` interface you can now deserialize +If you are working with a class which implements the `TabularData` interface you can now deserialize your data using the `TabularDataReader::getRecordsAsObject` method. The method will convert your document records into objects using PHP's powerful Reflection API. diff --git a/phpstan-build.neon b/phpstan-build.neon index 563573c0..b30d4878 100644 --- a/phpstan-build.neon +++ b/phpstan-build.neon @@ -19,4 +19,6 @@ parameters: treatPhpDocTypesAsCertain: false parallel: processTimeout: 300.0 + bootstrapFiles: + - vendor/autoload.php diff --git a/phpstan.neon b/phpstan.neon index 84028aac..81f12199 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -22,4 +22,3 @@ parameters: treatPhpDocTypesAsCertain: false parallel: processTimeout: 300.0 - diff --git a/src/Buffer.php b/src/Buffer.php index b25c5df0..cb40d7ba 100644 --- a/src/Buffer.php +++ b/src/Buffer.php @@ -18,6 +18,8 @@ use Iterator; use League\Csv\Query\Constraint\Criteria; use League\Csv\Query\Predicate; +use League\Csv\Schema\Inspector; +use League\Csv\Schema\Schema; use League\Csv\Serializer\Denormalizer; use League\Csv\Serializer\MappingFailed; use League\Csv\Serializer\TypeCastingFailed; @@ -203,6 +205,35 @@ public function map(callable $callback): Iterator return MapIterator::fromIterable($this->getRecords(), $callback); } + /** + * @param callable(TInitial|null, array, array-key=): TInitial $callback + * @param TInitial|null $initial + * + * @template TInitial + * + * @throws SyntaxError + * + * @return TInitial|null + */ + public function reduce(callable $callback, mixed $initial = null): mixed + { + foreach ($this->getRecords() as $offset => $record) { + $initial = $callback($initial, $record, $offset); + } + + return $initial; + } + + public function inferSchema(?Inspector $inspector = null, array $header = []): Schema + { + return ($inspector ?? Inspector::default())->schema($this, $header); + } + + public function inferRecords(?Inspector $inspector = null, array $header = []): Iterator + { + return $this->inferSchema($inspector, $header)->parse($this); + } + /** * @param non-negative-int $nth * diff --git a/src/Reader.php b/src/Reader.php index 678f2348..7e41ae2d 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -18,6 +18,8 @@ use Deprecated; use Iterator; use JsonSerializable; +use League\Csv\Schema\Inspector; +use League\Csv\Schema\Schema; use League\Csv\Serializer\Denormalizer; use League\Csv\Serializer\MappingFailed; use League\Csv\Serializer\TypeCastingFailed; @@ -416,6 +418,16 @@ public function map(callable $callback): Iterator return MapIterator::fromIterable($this, $callback); } + public function inferSchema(?Inspector $inspector = null, array $header = []): Schema + { + return ($inspector ?? Inspector::default())->schema($this, $header); + } + + public function inferRecords(?Inspector $inspector = null, array $header = []): Iterator + { + return $this->inferSchema($inspector, $header)->parse($this); + } + /** * @param positive-int $recordsCount * diff --git a/src/ResultSet.php b/src/ResultSet.php index fecc439e..6b4a73e2 100644 --- a/src/ResultSet.php +++ b/src/ResultSet.php @@ -20,6 +20,8 @@ use Generator; use Iterator; use JsonSerializable; +use League\Csv\Schema\Inspector; +use League\Csv\Schema\Schema; use League\Csv\Serializer\Denormalizer; use League\Csv\Serializer\MappingFailed; use League\Csv\Serializer\TypeCastingFailed; @@ -206,6 +208,16 @@ public function map(callable $callback): Iterator return MapIterator::fromIterable($this, $callback); } + public function inferSchema(?Inspector $inspector = null, array $header = []): Schema + { + return ($inspector ?? Inspector::default())->schema($this, $header); + } + + public function inferRecords(?Inspector $inspector = null, array $header = []): Iterator + { + return $this->inferSchema($inspector, $header)->parse($this); + } + /** * @param positive-int $recordsCount * diff --git a/src/Schema/BooleanField.php b/src/Schema/BooleanField.php new file mode 100644 index 00000000..d6e71557 --- /dev/null +++ b/src/Schema/BooleanField.php @@ -0,0 +1,62 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\Attributes\CoversClass; + +use function filter_var; +use function in_array; +use function is_bool; +use function is_string; +use function trim; + +use const FILTER_NULL_ON_FAILURE; +use const FILTER_VALIDATE_BOOLEAN; + +#[CoversClass(BooleanField::class)] +final class BooleanField extends FieldEvaluator implements Field +{ + public function type(): FieldType + { + return FieldType::Boolean; + } + + public function name(): string + { + return FieldType::Boolean->value; + } + + public function parse(mixed $value): ?bool + { + if (is_bool($value)) { + return $value; + } + + if (!is_string($value) && !in_array($value, [0, 1], true)) { + return null; + } + + $value = trim((string) $value); + if ('' === $value) { + return null; + } + + return filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata(); + } +} diff --git a/src/Schema/BooleanFieldTest.php b/src/Schema/BooleanFieldTest.php new file mode 100644 index 00000000..5f3265b5 --- /dev/null +++ b/src/Schema/BooleanFieldTest.php @@ -0,0 +1,64 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\TestCase; + +#[CoversClass(BooleanField::class)] +final class BooleanFieldTest extends TestCase +{ + private BooleanField $field; + + protected function setUp(): void + { + $this->field = new BooleanField(); + } + + public static function provideBooleanValues(): array + { + return [ + [true, true], + [false, false], + ['true', true], + ['false', false], + ['1', true], + ['0', false], + [' true ', true], + ['', null], + [' ', null], + ['foo', null], + [[], null], + [123, null], + ]; + } + + #[DataProvider('provideBooleanValues')] + public function testParse(mixed $input, ?bool $expected): void + { + $result = $this->field->parse($input); + + null === $expected + ? self::assertNull($result) + : self::assertSame($expected, $result); + } + + public function test_metadata_contains_expected_structure(): void + { + $field = new BooleanField(); + + self::assertTrue($field->metadata()->isEmpty()); + } +} diff --git a/src/Schema/CallbackFieldParser.php b/src/Schema/CallbackFieldParser.php new file mode 100644 index 00000000..78b420c4 --- /dev/null +++ b/src/Schema/CallbackFieldParser.php @@ -0,0 +1,45 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use Closure; + +/** + * @template T + */ +final class CallbackFieldParser implements FieldParser +{ + /** @var Closure(mixed): ?T */ + private Closure $callback; + + /** + * @param (Closure(mixed): ?T)|(callable(mixed): ?T) $callback + */ + public function __construct(Closure|callable $callback) + { + if (!$callback instanceof Closure) { + $callback = $callback(...); + } + + $this->callback = $callback; + } + + /** + * @returns ?T + */ + public function parse(mixed $value): mixed + { + return ($this->callback)($value); + } +} diff --git a/src/Schema/CustomField.php b/src/Schema/CustomField.php new file mode 100644 index 00000000..ebbbc023 --- /dev/null +++ b/src/Schema/CustomField.php @@ -0,0 +1,70 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use Closure; +use ValueError; + +use function preg_match; + +/** + * @template T + */ +final class CustomField extends FieldEvaluator implements Field +{ + private readonly FieldParser $fieldParser; + /** @var non-empty-string */ + private readonly string $fieldTypeName; + + public function __construct( + FieldParser|Closure|callable $fieldParser, + string $fieldTypeName, + float $confidenceThreshold = 0.8 + ) { + ('' !== $fieldTypeName && 1 === preg_match('/^[a-z]+(?:_[a-z0-9]+)*$/', $fieldTypeName)) || throw new ValueError('The name "'.$fieldTypeName.'" is not a valid snake case variable name.'); + $fieldParser = self::resolveFieldParser($fieldParser); + parent::__construct($confidenceThreshold); + + $this->fieldParser = $fieldParser; + $this->fieldTypeName = $fieldTypeName; + } + + private static function resolveFieldParser(FieldParser|Closure|callable $parser): FieldParser + { + return $parser instanceof FieldParser ? $parser : new CallbackFieldParser($parser); + } + + public function type(): FieldType + { + return FieldType::Custom; + } + + public function name(): string + { + return FieldType::Custom->value.'('.$this->fieldTypeName.')'; + } + + /** + * @return ?T + */ + public function parse(mixed $value): mixed + { + return $this->fieldParser->parse($value); + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata(); + } +} diff --git a/src/Schema/CustomFieldTest.php b/src/Schema/CustomFieldTest.php new file mode 100644 index 00000000..9bae5f94 --- /dev/null +++ b/src/Schema/CustomFieldTest.php @@ -0,0 +1,99 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\TestCase; + +#[CoversClass(CallbackFieldParser::class)] +#[CoversClass(CustomField::class)] +final class CustomFieldTest extends TestCase +{ + // -------------------------------------------------------- + // parse() + // -------------------------------------------------------- + + public function testParseUsesClosure(): void + { + $field = new CustomField( + fn ($value) => 'ok' === $value ? 'parsed' : null, + 'custom', + ); + + self::assertSame('parsed', $field->parse('ok')); + self::assertNull($field->parse('nope')); + } + + public function testParseUsesCallable(): void + { + $callable = function ($value) { + return is_int($value) ? $value * 2 : null; + }; + + $field = new CustomField($callable, 'custom'); + + self::assertSame(4, $field->parse(2)); + self::assertNull($field->parse('2')); + } + + // -------------------------------------------------------- + // evaluate() (inherited behavior) + // -------------------------------------------------------- + + public function testEvaluateUsesParse(): void + { + $field = new CustomField( + fn ($value) => 'valid' === $value ? true : null, + 'custom' + ); + + self::assertSame(1, $field->evaluate('valid')); + self::assertSame(-1, $field->evaluate('invalid')); + self::assertSame(0, $field->evaluate(null)); + self::assertSame(0, $field->evaluate('')); + } + + // -------------------------------------------------------- + // score() + // -------------------------------------------------------- + + // -------------------------------------------------------- + // type() + // -------------------------------------------------------- + + public function testTypeIsCustom(): void + { + $field = new CustomField(fn () => null, 'custom'); + + self::assertSame(FieldType::Custom, $field->type()); + } + + // -------------------------------------------------------- + // confidenceThreshold() + // -------------------------------------------------------- + + public function testConfidenceThresholdIsInherited(): void + { + $field = new CustomField(fn () => null, 'custom', 0.8); + + self::assertSame(0.8, $field->confidenceThreshold()); + } + + public function test_metadata_contains_expected_structure(): void + { + $field = new BooleanField(); + + self::assertTrue($field->metadata()->isEmpty()); + } +} diff --git a/src/Schema/DateTimeField.php b/src/Schema/DateTimeField.php new file mode 100644 index 00000000..48f7fc6e --- /dev/null +++ b/src/Schema/DateTimeField.php @@ -0,0 +1,221 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use DateTime; +use DateTimeImmutable; +use DateTimeInterface; +use DateTimeZone; +use Exception; +use Throwable; +use ValueError; + +use function is_string; +use function is_subclass_of; +use function trim; + +final class DateTimeField extends FieldEvaluator implements Field +{ + /** @var non-empty-string */ + public readonly string $format; + public readonly DateTimeZone $timezone; + /** @var class-string */ + public readonly string $outputClass; + + /** @var list */ + private const FORMAT_MACHINES = [ + 'Y-m-d', + 'Y-m-d H:i:s', + 'Y-m-d\TH:i:s', + DateTimeInterface::RFC3339, + DateTimeInterface::RFC3339_EXTENDED, + DateTimeInterface::ISO8601_EXPANDED, + 'U', + ]; + + /** @var list */ + private const FORMAT_LOCALIZED = [ + // Europe Dates + 'd/m/Y', + 'd-m-Y', + 'd.m.Y', + // American Dates + 'm/d/Y', + 'm-d-Y', + 'm.d.Y', + ]; + + /** + * @param non-empty-string $format + * @param class-string $outputClass + */ + public function __construct( + string $format, + DateTimeZone|string|null $timezone = null, + string $outputClass = DateTimeImmutable::class, + float $confidenceThreshold = 0.8, + ) { + $format = trim($format); + '' !== $format || throw new ValueError('The date field format can not be empty.'); + $timezone = self::filterTimezone($timezone); + self::filterDateTimeInterfaceClass($outputClass); + + parent::__construct($confidenceThreshold); + $this->format = $format; + $this->timezone = $timezone; + $this->outputClass = $outputClass; + } + + /** + * @param class-string $outputClass + */ + public static function common( + DateTimeZone|string|null $timezone = null, + string $outputClass = DateTimeImmutable::class, + ): FieldList { + return self::machine($timezone, $outputClass)->append(self::localized($timezone, $outputClass)); + } + + /** + * @param class-string $outputClass + */ + public static function machine( + DateTimeZone|string|null $timezone = null, + string $outputClass = DateTimeImmutable::class, + ): FieldList { + return self::fromFormat(self::FORMAT_MACHINES, $timezone, $outputClass, .8); + } + + /** + * @param class-string $outputClass + */ + public static function localized( + DateTimeZone|string|null $timezone = null, + string $outputClass = DateTimeImmutable::class, + ): FieldList { + return self::fromFormat(self::FORMAT_LOCALIZED, $timezone, $outputClass, .7); + } + + /** + * @param class-string $outputClass + */ + public static function timestamp( + string $outputClass = DateTimeImmutable::class, + float $confidenceThreshold = .8 + ): self { + return new self( + format: 'U', + timezone: 'UTC', + outputClass: $outputClass, + confidenceThreshold: $confidenceThreshold, + ); + } + + /** + * @param iterable $formats + * @param class-string $outputClass + */ + public static function fromFormat( + iterable $formats, + DateTimeZone|string|null $timezone = null, + string $outputClass = DateTimeImmutable::class, + float $confidenceThreshold = 0.8, + ): FieldList { + $res = []; + foreach ($formats as $format) { + $res[] = new self($format, $timezone, $outputClass, $confidenceThreshold); + } + + return new FieldList(...$res); + } + + private static function filterDateTimeInterfaceClass(string $className): void + { + is_subclass_of($className, DateTimeInterface::class) + || throw new ValueError('The date field class '.$className.' does not implement the DateTimeInterface interface.'); + } + + private static function filterTimezone(DateTimeZone|string|null $timeZone): DateTimeZone + { + if (null === $timeZone) { + return new DateTimeZone('UTC'); + } + + if ($timeZone instanceof DateTimeZone) { + return $timeZone; + } + + try { + return new DateTimeZone($timeZone); + } catch (Exception $exception) { + throw new ValueError('The date field timezone value `'.$timeZone.'` is invalid.', previous: $exception); + } + } + + public function type(): FieldType + { + return FieldType::Datetime; + } + + public function name(): string + { + $format = ('U' === $this->format) ? 'timestamp' : $this->format; + + return FieldType::Datetime->value.'(format='.$format.',timezone='.$this->timezone->getName().')'; + } + + public function parse(mixed $value): ?DateTimeInterface + { + if ($value instanceof DateTimeInterface) { + return $value::class === $this->outputClass ? $value : $this->outputClass::createFromInterface($value); + } + + if (!is_string($value)) { + return null; + } + + $value = trim($value); + if ('' === $value) { + return null; + } + + try { + $value = $this->outputClass::createFromFormat($this->format, $value, $this->timezone); + if (false === $value) { + return null; + } + + $errors = $this->outputClass::getLastErrors(); + if ( + (isset($errors['warning_count']) && 0 < $errors['warning_count']) || + (isset($errors['error_count']) && 0 < $errors['error_count']) + ) { + return null; + } + + return $value; + } catch (Throwable) { + return null; + } + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata([ + 'format' => $this->format, + 'timezone' => $this->timezone->getName(), + 'class' => $this->outputClass, + ]); + } +} diff --git a/src/Schema/DateTimeFieldTest.php b/src/Schema/DateTimeFieldTest.php new file mode 100644 index 00000000..b583593b --- /dev/null +++ b/src/Schema/DateTimeFieldTest.php @@ -0,0 +1,93 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use DateTime; +use DateTimeImmutable; +use DateTimeInterface; +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\TestCase; + +#[CoversClass(DateTimeField::class)] +final class DateTimeFieldTest extends TestCase +{ + private DateTimeField $field; + + protected function setUp(): void + { + $this->field = new DateTimeField('Y-m-d'); + } + + public function testParseUsesNativeConstructorWhenFormatIsEmpty(): void + { + $result = $this->field->parse('2024-01-01'); + + self::assertInstanceOf(DateTimeImmutable::class, $result); + self::assertSame('2024-01-01', $result->format('Y-m-d')); + } + + public function testParseUsesCreateFromFormatWhenFormatIsProvided(): void + { + $field = new DateTimeField('d-m-Y'); + $result = $field->parse('01-01-2024'); + + self::assertInstanceOf(DateTimeImmutable::class, $result); + self::assertSame('2024-01-01', $result->format('Y-m-d')); + } + + public function testItAcceptsDateTimeInterfaceAndNormalizesToImmutable(): void + { + $input = new DateTime('2024-01-01'); + + $result = $this->field->parse($input); + + self::assertInstanceOf(DateTimeImmutable::class, $result); + self::assertSame('2024-01-01', $result->format('Y-m-d')); + } + + public function testItReturnsNullForInvalidValues(): void + { + self::assertNull($this->field->parse('')); + self::assertNull($this->field->parse(' ')); + self::assertNull($this->field->parse('invalid-date')); + self::assertNull($this->field->parse([])); + self::assertNull($this->field->parse(123)); + } + + public function test_it_can_return_another_implementing_datetime_interface(): void + { + $field = new DateTimeField('Y-m-d', outputClass: MyDate::class); + $result = $field->parse('2024-01-01'); + + self::assertInstanceOf(MyDate::class, $result); + self::assertSame('2024-01-01', $result->format('Y-m-d')); + self::assertSame(MyDate::class, $field->metadata()->get('class')); + self::assertSame('Y-m-d', $field->metadata()->get('format')); + self::assertSame('UTC', $field->metadata()->get('timezone')); + self::assertSame('datetime(format=Y-m-d,timezone=UTC)', $field->name()); + } + + public function test_it_uses_a_simpler_representation_for_timestamp(): void + { + self::assertSame('datetime(format=timestamp,timezone=UTC)', DateTimeField::timestamp()->name()); + } +} + +interface MyDateInterface extends DateTimeInterface +{ +} + +class MyDate extends DateTimeImmutable implements MyDateInterface +{ +} diff --git a/src/Schema/EnumField.php b/src/Schema/EnumField.php new file mode 100644 index 00000000..2aea302d --- /dev/null +++ b/src/Schema/EnumField.php @@ -0,0 +1,124 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use BackedEnum; +use ReflectionEnum; +use ReflectionEnumUnitCase; +use Throwable; +use UnitEnum; +use ValueError; + +use function array_map; +use function filter_var; +use function is_int; +use function is_string; +use function trim; + +use const FILTER_VALIDATE_INT; + +final class EnumField extends FieldEvaluator implements Field +{ + private readonly ?string $backedEnumType; + /** @var list */ + private readonly array $cases; + /** @var class-string */ + public readonly string $enumClass; + private readonly array $byNames; + + /** + * @param class-string $enumClass + */ + public function __construct( + string $enumClass, + float $confidenceThreshold = 0.8 + ) { + try { + $ref = new ReflectionEnum($enumClass); + } catch (Throwable $exception) { + throw new ValueError('Enum "'.$enumClass.'" can not be use: '.$exception->getMessage(), previous: $exception); + } + + parent::__construct($confidenceThreshold); + + $this->enumClass = $enumClass; + $this->backedEnumType = !$ref->isBacked() ? null : $ref->getBackingType()->getName(); + $this->cases = array_map(fn (ReflectionEnumUnitCase $case) => $case->getValue(), $ref->getCases()); + + $byNames = []; + foreach ($this->cases as $case) { + $byNames[$case->name] = $case; + } + $this->byNames = $byNames; + } + + public function type(): FieldType + { + return FieldType::Enum; + } + + public function name(): string + { + return FieldType::Enum->value.'('.$this->enumClass.')'; + } + + public function parse(mixed $value): ?UnitEnum + { + if ($value instanceof UnitEnum && $value::class === $this->enumClass) { + return $value; + } + + if (!is_string($value) && !is_int($value)) { + return null; + } + + if (is_string($value)) { + $value = trim($value); + if ('' === $value) { + return null; + } + } + + if (null === $this->backedEnumType) { + return !is_string($value) ? null : ($this->byNames[$value] ?? null); + } + + if ('int' === $this->backedEnumType && is_string($value)) { + $value = filter_var($value, FILTER_VALIDATE_INT); + if (false === $value) { + return null; + } + } + + /** @var BackedEnum $enumClass */ + $enumClass = $this->enumClass; + + return $enumClass::tryFrom($value); + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata([ + 'class' => $this->enumClass, + 'backedType' => $this->backedEnumType, + 'cases' => array_map( + fn (UnitEnum $case): array => [ + 'name' => $case->name, + 'value' => $case instanceof BackedEnum ? $case->value : $case->name, + ], + $this->cases + ), + ]); + } +} diff --git a/src/Schema/EnumFieldTest.php b/src/Schema/EnumFieldTest.php new file mode 100644 index 00000000..5189c183 --- /dev/null +++ b/src/Schema/EnumFieldTest.php @@ -0,0 +1,165 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\TestCase; +use stdClass; +use ValueError; + +#[CoversClass(EnumField::class)] +final class EnumFieldTest extends TestCase +{ + private EnumField $field; + + protected function setUp(): void + { + $this->field = new EnumField(TestEnum::class); + } + + // -------------------------------------------------------- + // Construction + // -------------------------------------------------------- + + public function testItThrowsWhenClassIsNotAnEnum(): void + { + $this->expectException(ValueError::class); + + new EnumField(stdClass::class); /* @phpstan-ignore-line */ + } + + // -------------------------------------------------------- + // UnitEnum (non-backed enum) + // -------------------------------------------------------- + + public function testItParsesEnumByInstance(): void + { + $value = TestEnum::A; + + $result = $this->field->parse($value); + + self::assertSame($value, $result); + self::assertSame(FieldType::Enum, $this->field->type()); + self::assertSame(TestEnum::class, $this->field->enumClass); + self::assertSame('enum(League\Csv\Schema\TestEnum)', $this->field->name()); + } + + public function testItParsesEnumByName(): void + { + $result = $this->field->parse('A'); + + self::assertSame(TestEnum::A, $result); + } + + public function testItTrimsStringInput(): void + { + $result = $this->field->parse(' A '); + + self::assertSame(TestEnum::A, $result); + } + + public function testItReturnsNullForInvalidEnumName(): void + { + self::assertNull($this->field->parse('INVALID')); + } + + // -------------------------------------------------------- + // BackedEnum (string/int) + // -------------------------------------------------------- + + public function testItParsesBackedEnumFromStringValue(): void + { + $field = new EnumField(TestBackedEnum::class); + + $result = $field->parse('a'); + + self::assertSame(TestBackedEnum::A, $result); + } + + public function testItParsesBackedEnumFromIntValue(): void + { + $field = new EnumField(TestIntBackedEnum::class); + + $result = $field->parse(1); + + self::assertSame(TestIntBackedEnum::A, $result); + } + + public function testItParsesNumericStringForIntBackedEnum(): void + { + $field = new EnumField(TestIntBackedEnum::class); + + $result = $field->parse('1'); + + self::assertSame(TestIntBackedEnum::A, $result); + } + + public function testItReturnsNullForInvalidBackedValue(): void + { + $field = new EnumField(TestBackedEnum::class); + + self::assertNull($field->parse('invalid')); + self::assertNull($field->parse([])); + self::assertNull($field->parse('')); + } + + // -------------------------------------------------------- + // Direct enum instance handling + // -------------------------------------------------------- + + public function testItRejectsEnumFromDifferentClass(): void + { + $result = $this->field->parse(OtherEnum::A); + + self::assertNull($result); + } + + public function test_metadata_contains_expected_structure(): void + { + $field = new EnumField(TestBackedEnum::class); + + $metadata = $field->metadata(); + + self::assertSame(TestBackedEnum::class, $metadata->get('class')); + self::assertSame('string', $metadata->get('backedType')); + self::assertSame([ + ['name' => 'A', 'value' => 'a'], + ['name' => 'B', 'value' => 'b'], + ], $metadata->get('cases')); + } +} + +enum TestEnum +{ + case A; + case B; +} + +enum TestBackedEnum: string +{ + case A = 'a'; + case B = 'b'; +} + +enum TestIntBackedEnum: int +{ + case A = 1; + case B = 2; +} + +enum OtherEnum +{ + case A; + case B; +} diff --git a/src/Schema/Field.php b/src/Schema/Field.php new file mode 100644 index 00000000..f131a469 --- /dev/null +++ b/src/Schema/Field.php @@ -0,0 +1,44 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +interface Field extends FieldParser +{ + public function type(): FieldType; + + /** + * @return non-empty-string + */ + public function name(): string; + + /** + * Returns the confidence on the field value. + * + * The range of valide value is from 0.0 up to including 1.0 + */ + public function confidenceThreshold(): float; + + /** + * Score a single value to estimate its type. + * + * returns -1 if the value is invalid + * returns 0 if the value is skipped + * returns 1 if the value is valid + * + * @return int<-1, 1> + */ + public function evaluate(mixed $value): int; + + public function metadata(): FieldMetadata; +} diff --git a/src/Schema/FieldEvaluator.php b/src/Schema/FieldEvaluator.php new file mode 100644 index 00000000..7a129a3a --- /dev/null +++ b/src/Schema/FieldEvaluator.php @@ -0,0 +1,62 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use ValueError; + +use function is_string; +use function trim; + +abstract class FieldEvaluator +{ + protected readonly float $confidenceThreshold; + + public function __construct(float $confidenceThreshold = 0.8) + { + $this->confidenceThreshold = self::filterConfidenceThreshold($confidenceThreshold); + } + + public function confidenceThreshold(): float + { + return $this->confidenceThreshold; + } + + final protected static function filterConfidenceThreshold(float $confidenceThreshold): float + { + ($confidenceThreshold >= 0 && $confidenceThreshold <= 1) || throw new ValueError('the confidence threshold must be between 0 and 1.'); + + return $confidenceThreshold; + } + + /** + * @return int<-1, 1> + */ + public function evaluate(mixed $value): int + { + if (null === $value) { + return 0; + } + + if (is_string($value)) { + $value = trim($value); + if ('' === $value) { + return 0; + } + } + + return null !== $this->parse($value) ? 1 : -1; + } + + abstract public function parse(mixed $value): mixed; +} diff --git a/src/Schema/FieldEvaluatorTest.php b/src/Schema/FieldEvaluatorTest.php new file mode 100644 index 00000000..edd30e0a --- /dev/null +++ b/src/Schema/FieldEvaluatorTest.php @@ -0,0 +1,98 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\TestCase; +use ValueError; + +#[CoversClass(FieldEvaluator::class)] +final class FieldEvaluatorTest extends TestCase +{ + // -------------------------------------------------------- + // confidence threshold + // -------------------------------------------------------- + + public function testItAcceptsValidConfidenceThreshold(): void + { + $field = new DummyField(0.5); + + self::assertSame(0.5, $field->confidenceThreshold()); + } + + public function testItThrowsForInvalidConfidenceThreshold(): void + { + $this->expectException(ValueError::class); + + new DummyField(1.5); + } + + // -------------------------------------------------------- + // evaluate() + // -------------------------------------------------------- + + public function testEvaluateReturnsZeroForNull(): void + { + $field = new DummyField(); + + self::assertSame(0, $field->evaluate(null)); + } + + public function testEvaluateReturnsZeroForEmptyString(): void + { + $field = new DummyField(); + + self::assertSame(0, $field->evaluate('')); + self::assertSame(0, $field->evaluate(' ')); + } + + public function testEvaluateReturnsOneForValidValue(): void + { + $field = new DummyField(); + + self::assertSame(1, $field->evaluate('valid-value')); + } + + public function testEvaluateReturnsMinusOneForInvalidValue(): void + { + $field = new DummyField(); + + self::assertSame(-1, $field->evaluate('invAlid')); + } +} + +final class DummyField extends FieldEvaluator +{ + public function type(): FieldType + { + return FieldType::String; + } + + public function name(): string + { + return 'dummy'; + } + + public function parse(mixed $value): ?string + { + return is_string($value) && str_contains($value, 'valid') + ? $value + : null; + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata(); + } +} diff --git a/src/Schema/FieldList.php b/src/Schema/FieldList.php new file mode 100644 index 00000000..b9d996b3 --- /dev/null +++ b/src/Schema/FieldList.php @@ -0,0 +1,197 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use Countable; +use Iterator; +use IteratorAggregate; +use ValueError; + +use function array_filter; +use function array_flip; +use function array_key_exists; +use function array_values; +use function count; + +/** + * @implements IteratorAggregate + */ +final class FieldList implements Countable, IteratorAggregate +{ + /** @var list */ + private array $fields; + + public function __construct(Field ...$fields) + { + $this->fields = array_values($fields); + } + + public static function default(): self + { + return new self( + new BooleanField(), + new NumericField(), + new JsonField(), + ); + } + + public function isEmpty(): bool + { + return [] === $this->fields; + } + + public function count(): int + { + return count($this->fields); + } + + /** + * @return Iterator + */ + public function getIterator(): Iterator + { + yield from $this->fields; + } + + /** + * @return list + */ + public function all(): array + { + return $this->fields; + } + + public function first(): ?Field + { + return $this->nth(0); + } + + public function last(): ?Field + { + return $this->nth(-1); + } + + public function nth(int $offset): ?Field + { + return $this->fields[$this->offset($offset)] ?? null; + } + + public function get(int $offset): Field + { + return $this->nth($offset) ?? throw new ValueError('Invalid field offset: '.$offset); + } + + private function offset(int $offset): ?int + { + if ($offset < 0) { + $offset += count($this->fields); + } + + return array_key_exists($offset, $this->fields) ? $offset : null; + } + + public function append(Field|self ...$items): self + { + $fields = self::flatten(...$items); + + return [] === $fields ? $this : new self(...$this->fields, ...$fields); + } + + public function prepend(Field|self ...$items): self + { + $fields = self::flatten(...$items); + + return [] === $fields ? $this : new self(...$fields, ...$this->fields); + } + + /** + * @return list + */ + private static function flatten(Field|self ...$items): array + { + $fields = []; + foreach ($items as $item) { + if ($item instanceof Field) { + $fields[] = $item; + continue; + } + + foreach ($item->fields as $field) { + $fields[] = $field; + } + } + + return $fields; + } + + public function replace(int $offset, Field $field): self + { + $found = $this->offset($offset); + null !== $found || throw new ValueError('the offset: '.$offset.' does not exist.'); + + $fields = $this->fields; + $fields[$found] = $field; + + return new self(...$fields); + } + + public function removeByOffset(int ...$offsets): self + { + $validOffsets = []; + foreach ($offsets as $offset) { + $index = $this->offset($offset); + if (null !== $index) { + $validOffsets[] = $index; + } + } + + if ([] === $validOffsets) { + return $this; + } + + $validOffsets = array_flip($validOffsets); + $fields = []; + foreach ($this->fields as $offset => $field) { + if (!isset($validOffsets[$offset])) { + $fields[] = $field; + } + } + + return [] === $fields ? $this : new self(...$fields); + } + + public function removeByType(FieldType $fieldType): self + { + $fields = array_filter( + $this->fields, + fn (Field $field): bool => $field->type() !== $fieldType + ); + + return $this->fields === $fields ? $this : new self(...$fields); + } + + public function removeByName(Field|string $name): self + { + if ($name instanceof Field) { + $name = $name->name(); + } + + $fields = array_filter( + $this->fields, + fn (Field $field): bool => $field->name() !== $name + ); + + return $this->fields === $fields ? $this : new self(...$fields); + } +} diff --git a/src/Schema/FieldListTest.php b/src/Schema/FieldListTest.php new file mode 100644 index 00000000..15315548 --- /dev/null +++ b/src/Schema/FieldListTest.php @@ -0,0 +1,249 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\TestCase; +use ValueError; + +use function iterator_to_array; + +final class FieldListTest extends TestCase +{ + private Field $s1; + private Field $s2; + private Field $s3; + + protected function setUp(): void + { + $this->s1 = $this->createField(FieldType::String); + $this->s2 = $this->createField(FieldType::Numeric); + $this->s3 = $this->createField(FieldType::Boolean); + } + + private function createField(FieldType $type): Field + { + return new class ($type) implements Field { + public function __construct(private FieldType $type) + { + } + + public function type(): FieldType + { + return $this->type; + } + + public function name(): string + { + return $this->type->name; + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata(); + } + + public function confidenceThreshold(): float + { + return 0.5; + } + + public function parse(mixed $value): mixed + { + return $value; + } + + public function evaluate(mixed $value): int + { + return 1; + } + }; + } + + public function testConstructAndAll(): void + { + $list = new FieldList($this->s1, $this->s2); + + self::assertSame([$this->s1, $this->s2], $list->all()); + } + + public function testIsEmpty(): void + { + self::assertTrue((new FieldList())->isEmpty()); + self::assertFalse((new FieldList($this->s1))->isEmpty()); + } + + public function testCount(): void + { + $list = new FieldList($this->s1, $this->s2); + + self::assertCount(2, $list); + } + + public function testIterator(): void + { + $list = new FieldList($this->s1, $this->s2); + + self::assertSame([$this->s1, $this->s2], iterator_to_array($list)); + } + + public function testFirstAndLast(): void + { + $list = new FieldList($this->s1, $this->s2, $this->s3); + + self::assertSame($this->s1, $list->first()); + self::assertSame($this->s3, $list->last()); + } + + public function testNthWithPositiveOffset(): void + { + $list = new FieldList($this->s1, $this->s2); + + self::assertSame($this->s2, $list->nth(1)); + } + + public function testNthWithNegativeOffset(): void + { + $list = new FieldList($this->s1, $this->s2, $this->s3); + + self::assertSame($this->s3, $list->nth(-1)); + self::assertSame($this->s2, $list->nth(-2)); + } + + public function testNthOutOfBounds(): void + { + $list = new FieldList($this->s1); + + self::assertNull($list->nth(10)); + self::assertNull($list->nth(-10)); + } + + public function testGet(): void + { + $list = new FieldList($this->s1); + + self::assertSame($this->s1, $list->get(0)); + } + + public function testGetThrows(): void + { + $list = new FieldList(); + + $this->expectException(ValueError::class); + $this->expectExceptionMessage('Invalid field offset: 0'); + + $list->get(0); + } + + public function testAppend(): void + { + $list = new FieldList($this->s1); + + $new = $list->append(new FieldList($this->s2)); + + self::assertSame([$this->s1, $this->s2], $new->all()); + self::assertSame([$this->s1], $list->all()); // immutability + } + + public function testPrepend(): void + { + $list = new FieldList($this->s1); + + $new = $list->prepend($this->s2); + + self::assertSame([$this->s2, $this->s1], $new->all()); + self::assertSame([$this->s1], $list->all()); // immutability + } + + public function testReplace(): void + { + $list = new FieldList($this->s1, $this->s2); + + $new = $list->replace(0, $this->s3); + + self::assertSame([$this->s3, $this->s2], $new->all()); + self::assertSame([$this->s1, $this->s2], $list->all()); // immutability + } + + public function testReplaceThrows(): void + { + $list = new FieldList(); + + $this->expectException(ValueError::class); + + $list->replace(0, $this->s1); + } + + public function testRemoveByOffset(): void + { + $list = new FieldList($this->s1, $this->s2, $this->s3); + + $new = $list->removeByOffset(1); + + self::assertSame([$this->s1, $this->s3], $new->all()); + } + + public function testRemoveByOffsetMultiple(): void + { + $list = new FieldList($this->s1, $this->s2, $this->s3); + + $new = $list->removeByOffset(0, 2); + + self::assertSame([$this->s2], $new->all()); + } + + public function testRemoveByOffsetInvalidReturnsSameInstance(): void + { + $list = new FieldList($this->s1); + + $new = $list->removeByOffset(10); + + self::assertSame($list, $new); + } + + public function testRemoveByType(): void + { + $list = new FieldList($this->s1, $this->s2); + + $new = $list->removeByType(FieldType::String); + + self::assertSame([$this->s2], $new->all()); + } + + public function testRemoveByTypeNoMatchReturnsSameInstance(): void + { + $list = new FieldList($this->s1); + + $new = $list->removeByType(FieldType::Numeric); + + self::assertSame($list, $new); + } + + public function testRemoveByName(): void + { + $list = new FieldList($this->s1, $this->s2); + + $new = $list->removeByName($this->s1); + + self::assertSame([$this->s2], $new->all()); + } + + public function testRemoveByNameNoMatchReturnsSameInstance(): void + { + $list = new FieldList($this->s1); + + $new = $list->removeByName('enum'); + + self::assertSame($list, $new); + } +} diff --git a/src/Schema/FieldMetadata.php b/src/Schema/FieldMetadata.php new file mode 100644 index 00000000..db45aad3 --- /dev/null +++ b/src/Schema/FieldMetadata.php @@ -0,0 +1,105 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use Countable; +use Iterator; +use IteratorAggregate; +use ValueError; + +use function array_key_exists; +use function array_keys; +use function count; + +final class FieldMetadata implements Countable, IteratorAggregate +{ + private readonly array $data; + + public function __construct(iterable $data = []) + { + $newData = []; + foreach ($data as $key => $value) { + self::assertNoDuplicate($newData, $key); + $newData[$key] = $value; + } + + $this->data = $newData; + } + + private static function assertNoDuplicate(array $data, string|int $key): void + { + ! array_key_exists($key, $data) || throw new ValueError('The key already exists: '.$key); + } + + public function count(): int + { + return count($this->data); + } + + /** + * @return Iterator + */ + public function getIterator(): Iterator + { + yield from $this->data; + } + + public function all(): array + { + return array_map( + fn (mixed $value) => $value instanceof self ? $value->all() : $value, + $this->data + ); + } + + public function isEmpty(): bool + { + return [] === $this->data; + } + + /** + * @return list + */ + public function keys(): array + { + return array_keys($this->data); + } + + public function has(int|string $offset): bool + { + return array_key_exists($offset, $this->data); + } + + public function get(int|string $offset): mixed + { + return $this->has($offset) ? $this->data[$offset] : throw new ValueError('The key does not exist: '.$offset); + } + + public function union(FieldMetadata ...$metadatas): self + { + if ([] === $metadatas) { + return $this; + } + + $newData = $this->data; + foreach ($metadatas as $metadata) { + foreach ($metadata->data as $key => $value) { + self::assertNoDuplicate($newData, $key); + $newData[$key] = $value; + } + } + + return new self($newData); + } +} diff --git a/src/Schema/FieldMetadataTest.php b/src/Schema/FieldMetadataTest.php new file mode 100644 index 00000000..cefa4d89 --- /dev/null +++ b/src/Schema/FieldMetadataTest.php @@ -0,0 +1,145 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use Iterator; +use IteratorAggregate; +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\TestCase; +use ValueError; + +use function iterator_to_array; + +#[CoversClass(FieldMetadata::class)] +final class FieldMetadataTest extends TestCase +{ + public function testConstructAndAll(): void + { + $metadata = new FieldMetadata(['a' => 1, 'b' => 2]); + + self::assertSame(['a' => 1, 'b' => 2], $metadata->all()); + } + + public function testCount(): void + { + $metadata = new FieldMetadata(['a' => 1, 'b' => 2]); + + self::assertCount(2, $metadata); + } + + public function testIsEmpty(): void + { + self::assertTrue((new FieldMetadata([]))->isEmpty()); + self::assertFalse((new FieldMetadata(['a' => 1]))->isEmpty()); + } + + public function testKeys(): void + { + $metadata = new FieldMetadata(['a' => 1, 'b' => 2]); + + self::assertSame(['a', 'b'], $metadata->keys()); + } + + public function testHas(): void + { + $metadata = new FieldMetadata(['a' => 1]); + + self::assertTrue($metadata->has('a')); + self::assertFalse($metadata->has('b')); + } + + public function testGet(): void + { + $metadata = new FieldMetadata(['a' => 42]); + + self::assertSame(42, $metadata->get('a')); + } + + public function testGetThrowsOnMissingKey(): void + { + $metadata = new FieldMetadata(); + + $this->expectException(ValueError::class); + $this->expectExceptionMessage('The key does not exist: a'); + + $metadata->get('a'); + } + + public function testIterator(): void + { + $data = ['a' => 1, 'b' => 2]; + $metadata = new FieldMetadata($data); + + self::assertSame($data, iterator_to_array($metadata)); + } + + public function testConstructWithDuplicateKeysThrows(): void + { + $test = new class () implements IteratorAggregate { + public function getIterator(): Iterator + { + yield 'a' => 1; + yield 'a' => 2; + } + }; + + $this->expectException(ValueError::class); + + new FieldMetadata($test); + } + + public function testMergeSingle(): void + { + $m1 = new FieldMetadata(['a' => 1]); + $m2 = new FieldMetadata(['b' => 2]); + + $merged = $m1->union($m2); + + self::assertSame(['a' => 1, 'b' => 2], $merged->all()); + } + + public function testMergeMultiple(): void + { + $m1 = new FieldMetadata(['a' => 1]); + $m2 = new FieldMetadata(['b' => 2]); + $m3 = new FieldMetadata(['c' => 3]); + + $merged = $m1->union($m2, $m3); + + self::assertSame([ + 'a' => 1, + 'b' => 2, + 'c' => 3, + ], $merged->all()); + } + + public function testMergeDuplicateKeysThrows(): void + { + $m1 = new FieldMetadata(['a' => 1]); + $m2 = new FieldMetadata(['a' => 2]); + + $this->expectException(ValueError::class); + + $m1->union($m2); + } + + public function testMergeWithNoArgumentsReturnsSameInstance(): void + { + $m1 = new FieldMetadata(['a' => 1]); + + $merged = $m1->union(); + + self::assertSame($m1, $merged); + } +} diff --git a/src/Schema/FieldParser.php b/src/Schema/FieldParser.php new file mode 100644 index 00000000..25bdeca6 --- /dev/null +++ b/src/Schema/FieldParser.php @@ -0,0 +1,29 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +/** + * @template T + */ +interface FieldParser +{ + /** + * Try to parse and normalize the value according + * to the detector handled type. If the value can + * not be parse null is returned. + * + * @return ?T + */ + public function parse(mixed $value): mixed; +} diff --git a/src/Schema/FieldType.php b/src/Schema/FieldType.php new file mode 100644 index 00000000..ad602a49 --- /dev/null +++ b/src/Schema/FieldType.php @@ -0,0 +1,27 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +enum FieldType: string +{ + case Boolean = 'boolean'; + case Custom = 'custom'; + case Datetime = 'datetime'; + case Enum = 'enum'; + case Json = 'json'; + case Numeric = 'numeric'; + case String = 'string'; + case Set = 'set'; + case Time = 'time'; +} diff --git a/src/Schema/Inspector.php b/src/Schema/Inspector.php new file mode 100644 index 00000000..509a9ada --- /dev/null +++ b/src/Schema/Inspector.php @@ -0,0 +1,118 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use League\Csv\InvalidArgument; +use League\Csv\Statement; +use League\Csv\SyntaxError; +use League\Csv\TabularData; +use ValueError; + +use function arsort; +use function is_string; +use function trim; + +use const SORT_NUMERIC; + +final readonly class Inspector +{ + /** + * @param positive-int $sampleLimit + */ + public function __construct( + public int $sampleLimit = 10, + public FieldList $fieldList = new FieldList(), + ) { + 1 <= $this->sampleLimit || throw new ValueError('A sample size must be greater or equal to 1.'); + } + + /** + * @param positive-int $sampleLimit + */ + public function withSampleLimit(int $sampleLimit): self + { + return $sampleLimit === $this->sampleLimit ? $this : new self($sampleLimit, $this->fieldList); + } + + public function withFields(FieldList $fieldList): self + { + return new self($this->sampleLimit, $fieldList); + } + + /** + * @param positive-int $sampleLimit + */ + public static function default(int $sampleLimit = 10): self + { + return new self($sampleLimit, FieldList::default()); + } + + /** + * @throws InvalidArgument + * @throws SyntaxError + * @throws \League\Csv\Exception + */ + public function schema(TabularData $tabularData, array $header = []): Schema + { + $score = []; + $counted = []; + foreach ((new Statement())->limit($this->sampleLimit)->process($tabularData, $header) as $record) { + foreach ($record as $column => $value) { + $counted[$column] ??= 0; + $score[$column] ??= []; + if (is_string($value)) { + $value = trim($value); + } + + if (null === $value || '' === $value) { + continue; + } + + $counted[$column]++; + foreach ($this->fieldList as $offset => $field) { + $score[$column][$offset] ??= 0; + if (1 === $field->evaluate($value)) { + $score[$column][$offset]++; + } + } + } + } + + $result = []; + foreach ($score as $column => $fields) { + $result[$column] = new StringField(); + $total = $counted[$column] ?? 0; + if (0 === $total) { + continue; + } + + $normalized = []; + foreach ($fields as $offset => $validCount) { + $normalized[$offset] = $validCount / $total; + } + + arsort($normalized, SORT_NUMERIC); + + foreach ($normalized as $offset => $scoreValue) { + $field = $this->fieldList->get($offset); + if ($scoreValue >= $field->confidenceThreshold()) { + $result[$column] = $field; + break; + } + } + } + + return new Schema($result); + } +} diff --git a/src/Schema/InspectorTest.php b/src/Schema/InspectorTest.php new file mode 100644 index 00000000..9af4b263 --- /dev/null +++ b/src/Schema/InspectorTest.php @@ -0,0 +1,304 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use League\Csv\Reader; +use PHPUnit\Framework\TestCase; +use ValueError; + +use function array_map; +use function implode; +use function rand; +use function random_int; +use function str_repeat; +use function str_shuffle; +use function substr; + +final class InspectorTest extends TestCase +{ + private function csv(string $content): Reader + { + $reader = Reader::fromString($content); + $reader->setHeaderOffset(0); + $reader->setDelimiter(';'); + + return $reader; + } + + public function testConstructorRejectsInvalidSampleLimit(): void + { + $this->expectException(ValueError::class); + + new Inspector(0); /* @phpstan-ignore-line */ + } + + public function testWithSampleLimitReturnsSameInstanceIfUnchanged(): void + { + $inspector = new Inspector(10); + + self::assertSame($inspector, $inspector->withSampleLimit(10)); + } + + public function testWithSampleLimitReturnsNewInstanceIfChanged(): void + { + $inspector = new Inspector(10); + $new = $inspector->withSampleLimit(5); + + self::assertNotSame($inspector, $new); + self::assertSame(5, $new->sampleLimit); + } + + public function testWithFieldsReturnsNewInstance(): void + { + $inspector = new Inspector(10); + $fieldList = new FieldList(); + + $new = $inspector->withFields($fieldList); + + self::assertNotSame($inspector, $new); + self::assertSame($fieldList, $new->fieldList); + } + + public function testDefaultFactory(): void + { + $inspector = Inspector::default(20); + + self::assertSame(20, $inspector->sampleLimit); + self::assertCount(3, $inspector->fieldList); + } + + public function testSchemaFallsBackToStringFieldWhenNoMatch(): void + { + $csv = $this->csv(<<inferSchema(Inspector::default()); + + self::assertSame(['name' => 'string', 'value' => 'string'], $schema->types()); + } + + public function testSchemaDetectsNumericField(): void + { + $csv = $this->csv(<<inferSchema(new Inspector(10, new FieldList(new NumericField()))); + + self::assertSame(['age' => 'numeric'], $schema->types()); + } + + public function testSchemaIgnoresEmptyValues(): void + { + $csv = $this->csv(<<inferSchema(new Inspector(10, new FieldList(new NumericField()))); + + self::assertSame('numeric', $schema->get('age')->name()); + } + + public function testSchemaRespectsSampleLimit(): void + { + $csv = $this->csv(<<inferSchema(new Inspector(2, new FieldList(new NumericField()))); + + self::assertSame('numeric', $schema->get('value')->name()); + } + + public function testSchemaChoosesBestScoringField(): void + { + $csv = $this->csv(<<inferSchema(new Inspector(10, $fieldList)); + + self::assertSame('string', $schema->get('value')->name()); + } + + /******************* + * FUZZY Tests + *******************/ + + private function csvFromRows(array $rows): Reader + { + $content = implode( + "\n", + array_map( + fn (array $row): string => implode(';', $row), + $rows + ) + ); + + return $this->csv($content); + } + + private function randomString(): string + { + return substr(str_shuffle(str_repeat('abcdefghijklmnopqrstuvwxyz', 5)), 0, random_int(1, 10)); + } + + private function randomValue(): mixed + { + return match (rand(0, 5)) { + 0 => random_int(0, 1000), // int + 1 => random_int(0, 1000) / 10, // float + 2 => (string) random_int(0, 1000), // numeric string + 3 => $this->randomString(), // random string + 4 => '', // empty string + default => null, + }; + } + + public function testFuzzSchemaDoesNotCrash(): void + { + $inspector = Inspector::default(); + $columns = ['col1', 'col2', 'col3']; + $rows = [$columns]; + for ($i = 0; $i < 50; $i++) { + for ($r = 0; $r < rand(1, 20); $r++) { + $rows[] = [ + $this->randomValue(), + $this->randomValue(), + $this->randomValue(), + ]; + } + + $csv = $this->csvFromRows($rows); + + self::assertSame($columns, $inspector->schema($csv)->names()); + } + + } + + public function testFuzzTypesAreAlwaysNonEmptyStrings(): void + { + $inspector = Inspector::default(); + + for ($i = 0; $i < 50; $i++) { + $columns = ['a', 'b']; + + $rows = [$columns]; + + for ($r = 0; $r < rand(1, 20); $r++) { + $rows[] = [ + $this->randomValue(), + $this->randomValue(), + ]; + } + + $schema = $inspector->schema($this->csvFromRows($rows)); + + foreach ($schema->types() as $type) { + self::assertIsString($type); + self::assertNotSame('', $type); + } + } + } + + public function testFuzzEmptyColumnsFallbackToString(): void + { + $inspector = Inspector::default(); + + $rows = [ + ['col'], + ['', null, '', null], + ]; + + $schema = $inspector->schema($this->csvFromRows($rows)); + + self::assertSame('string', $schema->get('col')->name()); + } + + public function testFuzzNumericColumnsDetected(): void + { + $inspector = new Inspector(50, new FieldList(new NumericField(), new StringField())); + + for ($i = 0; $i < 30; $i++) { + $rows = [ + ['num'], + ]; + + for ($r = 0; $r < rand(5, 20); $r++) { + $rows[] = [rand(0, 1000)]; + } + + $schema = $inspector->schema($this->csvFromRows($rows)); + + self::assertSame('numeric', $schema->get('num')->name()); + } + } + + public function testFuzzMixedDataPrefersString(): void + { + $inspector = new Inspector(50, new FieldList(new NumericField(), new StringField())); + + for ($i = 0; $i < 30; $i++) { + $rows = [ + ['mixed'], + ]; + + for ($r = 0; $r < 20; $r++) { + $rows[] = [ + 1 === rand(0, 1) + ? rand(0, 100) + : $this->randomString(), + ]; + } + + $schema = $inspector->schema($this->csvFromRows($rows)); + + self::assertSame('string', $schema->get('mixed')->name()); + } + } + + public function testFuzzSampleLimitDoesNotBreakInference(): void + { + $fieldList = new FieldList(new NumericField(), new StringField()); + for ($limit = 1; $limit <= 10; $limit++) { + $inspector = new Inspector($limit, $fieldList); + + $rows = [ + ['value'], + ]; + + for ($i = 0; $i < 50; $i++) { + $rows[] = [random_int(0, 100)]; + } + + $schema = $inspector->schema($this->csvFromRows($rows)); + + self::assertSame('numeric', $schema->get('value')->name()); + } + } +} diff --git a/src/Schema/JsonField.php b/src/Schema/JsonField.php new file mode 100644 index 00000000..dc7d3aef --- /dev/null +++ b/src/Schema/JsonField.php @@ -0,0 +1,83 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use ValueError; + +use function is_string; +use function json_decode; +use function json_encode; +use function json_last_error; +use function json_last_error_msg; +use function trim; + +use const JSON_ERROR_NONE; +use const JSON_THROW_ON_ERROR; + +final class JsonField extends FieldEvaluator implements Field +{ + public readonly int $flags; + /** @var int<1, max> */ + public readonly int $depth; + + /** + * @param int<1, max> $depth + */ + public function __construct( + int $flags = 0, + int $depth = 512, + float $confidenceThreshold = 0.8 + ) { + json_encode([], flags: $flags & ~JSON_THROW_ON_ERROR, depth: $depth); + JSON_ERROR_NONE === ($errorCode = json_last_error()) || throw new ValueError('The flags or the depth given are not valid JSON encoding parameters in PHP; '.json_last_error_msg(), $errorCode); + + parent::__construct($confidenceThreshold); + $this->flags = $flags; + $this->depth = $depth; + } + + public function type(): FieldType + { + return FieldType::Json; + } + + public function name(): string + { + return FieldType::Json->value; + } + + public function parse(mixed $value): mixed + { + if (!is_string($value)) { + return null; + } + + $value = trim($value); + if ('' === $value) { + return null; + } + + $res = json_decode(json: $value, associative: true, depth: $this->depth, flags: $this->flags & ~JSON_THROW_ON_ERROR); + + return JSON_ERROR_NONE === json_last_error() ? $res : null; + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata([ + 'flags' => $this->flags, + 'depth' => $this->depth, + ]); + } +} diff --git a/src/Schema/JsonFieldTest.php b/src/Schema/JsonFieldTest.php new file mode 100644 index 00000000..fe0a592a --- /dev/null +++ b/src/Schema/JsonFieldTest.php @@ -0,0 +1,140 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\TestCase; +use stdClass; +use ValueError; + +#[CoversClass(JsonField::class)] +final class JsonFieldTest extends TestCase +{ + private JsonField $field; + + protected function setUp(): void + { + $this->field = new JsonField(); + } + + public function testTypeAndName(): void + { + self::assertSame(FieldType::Json, $this->field->type()); + self::assertSame(FieldType::Json->value, $this->field->name()); + } + + public function testDetailsExposeFlagsAndDepth(): void + { + $field = new JsonField(flags: JSON_BIGINT_AS_STRING, depth: 256); + $details = $field->metadata(); + + self::assertSame(JSON_BIGINT_AS_STRING, $details->get('flags')); + self::assertSame(256, $details->get('depth')); + } + + public static function provideValidJson(): array + { + return [ + ['{"a":1}', ['a' => 1]], + ['{"a":1,"b":2}', ['a' => 1, 'b' => 2]], + ['[1,2,3]', [1, 2, 3]], + [' {"foo":"bar"} ', ['foo' => 'bar']], + ['{"nested":{"x":1}}', ['nested' => ['x' => 1]]], + ['true', true], + ['false', false], + ['null', null], + ['123', 123], + ]; + } + + #[DataProvider('provideValidJson')] + public function testParseValidJson(string $input, mixed $expected): void + { + $result = $this->field->parse($input); + + self::assertSame($expected, $result); + } + + public static function provideInvalidJson(): array + { + return [ + [''], + [' '], + ['{invalid}'], + ['{"a":1'], // missing closing brace + ['[1,2,]'], // trailing comma + ['foo'], + ]; + } + + #[DataProvider('provideInvalidJson')] + public function testParseInvalidJsonReturnsNull(string $input): void + { + self::assertNull($this->field->parse($input)); + } + + public function testParseRejectsNonStringValues(): void + { + self::assertNull($this->field->parse(null)); + self::assertNull($this->field->parse(123)); + self::assertNull($this->field->parse([])); + self::assertNull($this->field->parse(new stdClass())); + } + + public function testDepthLimitIsRespected(): void + { + $field = new JsonField(depth: 2); + + $json = '{"a":{"b":{"c":1}}}'; // depth 3 + + self::assertNull($field->parse($json)); + } + + public function testFlagsAffectDecoding(): void + { + $json = '{"big":12345678901234567890}'; + + $default = new JsonField(); + $withFlag = new JsonField(flags: JSON_BIGINT_AS_STRING); + + $defaultResult = $default->parse($json); + $flagResult = $withFlag->parse($json); + + // default: bigint becomes float + self::assertIsArray($defaultResult); + self::assertIsFloat($defaultResult['big']); + + // with flag: bigint preserved as string + self::assertIsArray($flagResult); + self::assertIsString($flagResult['big']); + } + + public function testInvalidConstructorArgumentsThrow(): void + { + $this->expectException(ValueError::class); + + new JsonField(depth: 0); /* @phpstan-ignore-line */ + } + + public function test_metadata_contains_expected_structure(): void + { + $field = new JsonField(depth: 2, flags: JSON_BIGINT_AS_STRING); + + self::assertSame([ + 'flags' => JSON_BIGINT_AS_STRING, + 'depth' => 2, + ], $field->metadata()->all()); + } +} diff --git a/src/Schema/NumericField.php b/src/Schema/NumericField.php new file mode 100644 index 00000000..fa0512ca --- /dev/null +++ b/src/Schema/NumericField.php @@ -0,0 +1,125 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use ValueError; + +use function filter_var; +use function is_float; +use function is_int; +use function is_numeric; +use function is_string; +use function trim; + +use const FILTER_VALIDATE_INT; + +final class NumericField extends FieldEvaluator implements Field +{ + public function __construct( + public readonly int|float|null $min = null, + public readonly int|float|null $max = null, + float $confidenceThreshold = 0.8 + ) { + if (null !== $min && null !== $max && $min > $max) { + throw new ValueError('Minimum length can not be greater than maximum length.'); + } + + parent::__construct($confidenceThreshold); + } + + public static function min(int $value, float $confidenceThreshold = 0.8): self + { + return new self(min: $value, max: null, confidenceThreshold: $confidenceThreshold); + } + + public static function max(int $value, float $confidenceThreshold = 0.8): self + { + return new self(min: null, max: $value, confidenceThreshold: $confidenceThreshold); + } + + public static function fixed(int $value, float $confidenceThreshold = 0.8): self + { + return new self(min: $value, max: $value, confidenceThreshold: $confidenceThreshold); + } + + public static function between(int $min, int $max, float $confidenceThreshold = 0.8): self + { + return new self(min: $min, max: $max, confidenceThreshold: $confidenceThreshold); + } + + public static function positive(float $confidenceThreshold = 0.8): self + { + return new self(min: 0, confidenceThreshold: $confidenceThreshold); + } + + public static function negative(float $confidenceThreshold = 0.8): self + { + return new self(max: 0, confidenceThreshold: $confidenceThreshold); + } + + public function type(): FieldType + { + return FieldType::Numeric; + } + + public function name(): string + { + $range = (null === $this->min && null === $this->max) + ? '' : + ( + $this->min === $this->max + ? '['.$this->min.']' + : '['.$this->min.','.$this->max.']' + ); + + return FieldType::Numeric->value.$range; + } + + public function parse(mixed $value): int|float|null + { + if (is_string($value)) { + $value = trim($value); + if ('' === $value || !is_numeric($value)) { + return null; + } + + $filterValue = filter_var($value, FILTER_VALIDATE_INT); + $value = false === $filterValue ? (float) $value : $filterValue; + } + + if (!is_float($value) && !is_int($value)) { + return null; + } + + if (null !== $this->min && $value < $this->min) { + return null; + } + + if (null !== $this->max && $value > $this->max) { + return null; + } + + return $value; + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata([ + 'constraints' => [ + 'min_value' => $this->min, + 'max_value' => $this->max, + ], + ]); + } +} diff --git a/src/Schema/NumericFieldTest.php b/src/Schema/NumericFieldTest.php new file mode 100644 index 00000000..0d64efce --- /dev/null +++ b/src/Schema/NumericFieldTest.php @@ -0,0 +1,166 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\TestCase; +use stdClass; +use ValueError; + +#[CoversClass(NumericField::class)] +final class NumericFieldTest extends TestCase +{ + private NumericField $field; + + protected function setUp(): void + { + $this->field = new NumericField(); + } + + // -------------------------------------------------------- + // VALID VALUES → float + // -------------------------------------------------------- + + public static function provideValidNumericValues(): array + { + return [ + 'positive int' => [10, 10], + 'negative int' => [-5, -5], + 'zero' => [0, 0], + 'positive float' => [10.5, 10.5], + 'negative float' => [-3.14, -3.14], + 'string positive int' => ['10', 10], + 'string positive float' => ['10.5', 10.5], + 'string negative int' => ['-2', -2], + 'string positive int with extra spaces' => [' 12 ', 12], + 'string positive float with extra spaces' => [' 3.14 ', 3.14], + 'string positive power float with extra spaces' => [' 3e14 ', 3e14], + ]; + } + + #[DataProvider('provideValidNumericValues')] + public function testParseValidValues(mixed $input, int|float $expected): void + { + self::assertSame($expected, $this->field->parse($input)); + } + + // -------------------------------------------------------- + // INVALID VALUES → null + // -------------------------------------------------------- + + public static function provideInvalidNumericValues(): array + { + return [ + [''], + [' '], + ['abc'], + ['12abc'], + ['abc12'], + [true], + [false], + [null], + [[]], + [new stdClass()], + ]; + } + + #[DataProvider('provideInvalidNumericValues')] + public function testParseInvalidValues(mixed $input): void + { + self::assertNull($this->field->parse($input)); + } + + public function test_metadata_contains_expected_structure(): void + { + self::assertFalse($this->field->metadata()->isEmpty()); + } + + // -------------------------------------------------------- + // Factory constructors + // -------------------------------------------------------- + + public function testMinFactory(): void + { + $field = NumericField::min(4); + + self::assertSame(FieldType::Numeric, $field->type()); + self::assertSame('numeric[4,]', $field->name()); + self::assertSame(0.8, $field->confidenceThreshold()); + self::assertSame(5, $field->parse(5)); + self::assertNull($field->parse(-4.1)); + self::assertNull($field->parse('0')); + } + + public function testMaxFactory(): void + { + $field = NumericField::max(4); + + self::assertSame(FieldType::Numeric, $field->type()); + self::assertSame('numeric[,4]', $field->name()); + self::assertSame(0.8, $field->confidenceThreshold()); + self::assertNull($field->parse(5)); + self::assertSame(-4.1, $field->parse(-4.1)); + self::assertSame(0, $field->parse('0')); + } + + public function testFixedFactory(): void + { + $field = NumericField::fixed(4); + + self::assertSame(FieldType::Numeric, $field->type()); + self::assertSame('numeric[4]', $field->name()); + self::assertSame(0.8, $field->confidenceThreshold()); + self::assertNull($field->parse(5)); + self::assertNull($field->parse(-4.1)); + self::assertSame(4, $field->parse('4')); + } + + public function testBetweenFactory(): void + { + $field = NumericField::between(-4, 4); + + self::assertSame(FieldType::Numeric, $field->type()); + self::assertSame('numeric[-4,4]', $field->name()); + self::assertSame(0.8, $field->confidenceThreshold()); + self::assertNull($field->parse(5)); + self::assertNull($field->parse(-4.1)); + self::assertSame(0, $field->parse('0')); + } + + public function testPositiveFactory(): void + { + $field = NumericField::positive(.5); + + self::assertSame(FieldType::Numeric, $field->type()); + self::assertSame('numeric[0,]', $field->name()); + self::assertSame(0.5, $field->confidenceThreshold()); + } + + public function testNegativeFactory(): void + { + $field = NumericField::negative(1); + + self::assertSame(FieldType::Numeric, $field->type()); + self::assertSame('numeric[,0]', $field->name()); + self::assertSame(1.0, $field->confidenceThreshold()); + } + + public function testItFailsToInstantiateBetweenFactoryWithInvalidValues(): void + { + $this->expectException(ValueError::class); + + NumericField::between(4, -4); + } +} diff --git a/src/Schema/Schema.php b/src/Schema/Schema.php new file mode 100644 index 00000000..bfd1c5bc --- /dev/null +++ b/src/Schema/Schema.php @@ -0,0 +1,161 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use Countable; +use Iterator; +use IteratorAggregate; +use League\Csv\MapIterator; +use League\Csv\TabularData; +use ValueError; + +use function array_diff_key; +use function array_flip; +use function array_key_exists; +use function array_keys; +use function array_map; +use function count; + +/** + * @implements IteratorAggregate + */ +final class Schema implements Countable, IteratorAggregate +{ + /** @var array */ + private readonly array $fields; + + public function __construct(iterable $fields = []) + { + $newFields = []; + foreach ($fields as $key => $value) { + self::assertNoDuplicate($newFields, $key); + $newFields[$key] = $value; + } + + $this->fields = $newFields; + } + + public function append(int|string $name, Field $field): self + { + self::assertNoDuplicate($this->fields, $name); + + return new self([...$this->fields, ...[$name => $field]]); + } + + public function replace(int|string $name, Field $field): self + { + $this->has($name) || throw new ValueError('Field "'.$name.'" does not exist.'); + + $fields = $this->fields; + $fields[$name] = $field; + + return new self($fields); + } + + public function remove(int|string ...$names): self + { + return [] === $names + ? $this + : new self(array_diff_key($this->fields, array_flip($names))); + } + + private static function assertNoDuplicate(array $data, string|int $key): void + { + ! array_key_exists($key, $data) || throw new ValueError('The key already exists: '.$key); + } + + public function count(): int + { + return count($this->fields); + } + + /** + * @return Iterator + */ + public function getIterator(): Iterator + { + yield from $this->fields; + } + + /** + * @return array + */ + public function all(): array + { + return $this->fields; + } + + public function isEmpty(): bool + { + return [] === $this->fields; + } + + /** + * @return array + */ + public function types(): array + { + return array_map(fn (Field $field) => $field->name(), $this->fields); + } + + /** + * @return list + */ + public function names(): array + { + return array_keys($this->fields); + } + + public function has(int|string $offset): bool + { + return array_key_exists($offset, $this->fields); + } + + public function get(int|string $offset): Field + { + return $this->has($offset) ? $this->fields[$offset] : throw new ValueError('The key does not exist: '.$offset); + } + + /** + * @template TValue + * + * @param callable(Field, array-key): TValue $callback + * + * @return Iterator + */ + public function map(callable $callback): Iterator + { + foreach ($this->fields as $name => $field) { + yield $name => $callback($field, $name); + } + } + + /** + * @return Iterator> + */ + public function parse(TabularData $tabularData): Iterator + { + return MapIterator::fromIterable($tabularData->getRecords($this->names()), $this->format(...)); + } + + public function format(array $row): array + { + $result = []; + foreach ($this->fields as $column => $field) { + $result[$column] = $field->parse($row[$column] ?? null); + } + + return $result; + } +} diff --git a/src/Schema/SchemaTest.php b/src/Schema/SchemaTest.php new file mode 100644 index 00000000..0be0ba6d --- /dev/null +++ b/src/Schema/SchemaTest.php @@ -0,0 +1,181 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use Iterator; +use IteratorAggregate; +use PHPUnit\Framework\TestCase; +use ValueError; + +final class SchemaTest extends TestCase +{ + private function field(string $name): Field + { + return new CustomField(fn (mixed $value): mixed => $value, $name, 0.95); + } + + public function testConstructAndCount(): void + { + $schema = new Schema([ + 'name' => $this->field('string'), + 'age' => $this->field('numeric'), + ]); + + self::assertCount(2, $schema); + } + + public function testConstructThrowsOnDuplicateKey(): void + { + $test = new class () implements IteratorAggregate { + public function getIterator(): Iterator + { + yield 'a' => new StringField(); + yield 'a' => new BooleanField(); + } + }; + + $this->expectException(ValueError::class); + new Schema($test); + } + + public function testIsEmpty(): void + { + $schema = new Schema(); + self::assertTrue($schema->isEmpty()); + + $schema = new Schema(['name' => $this->field('string')]); + self::assertFalse($schema->isEmpty()); + } + + public function testAllReturnsFields(): void + { + $fields = [ + 'name' => $this->field('string'), + ]; + + $schema = new Schema($fields); + + self::assertSame($fields, $schema->all()); + } + + public function testNames(): void + { + $schema = new Schema([ + 'name' => $this->field('string'), + 'age' => $this->field('numeric'), + ]); + + self::assertSame(['name', 'age'], $schema->names()); + } + + public function testTypes(): void + { + $schema = new Schema([ + 'name' => $this->field('string'), + 'age' => $this->field('numeric'), + ]); + + self::assertSame([ + 'name' => 'custom(string)', + 'age' => 'custom(numeric)', + ], $schema->types()); + } + + public function testHas(): void + { + $schema = new Schema([ + 'name' => $this->field('string'), + ]); + + self::assertTrue($schema->has('name')); + self::assertFalse($schema->has('age')); + } + + public function testGetReturnsField(): void + { + $field = $this->field('string'); + + $schema = new Schema([ + 'name' => $field, + ]); + + self::assertSame($field, $schema->get('name')); + } + + public function testGetThrowsOnMissingKey(): void + { + $schema = new Schema(); + + $this->expectException(ValueError::class); + + $schema->get('missing'); + } + + public function testIterator(): void + { + $fields = [ + 'name' => $this->field('string'), + 'age' => $this->field('numeric'), + ]; + + $schema = new Schema($fields); + + $result = []; + foreach ($schema as $key => $field) { + $result[$key] = $field; + } + + self::assertSame($fields, $result); + } + + public function testMap(): void + { + $schema = new Schema([ + 'name' => $this->field('string'), + 'age' => $this->field('numeric'), + ]); + + $result = iterator_to_array( + $schema->map(fn (Field $field, $key) => $field->name()) + ); + + self::assertSame([ + 'name' => 'custom(string)', + 'age' => 'custom(numeric)', + ], $result); + } + + public function testGetByNumericIndex(): void + { + $fields = [ + $this->field('string'), + $this->field('numeric'), + ]; + + $schema = new Schema($fields); + + self::assertSame('custom(string)', $schema->get(0)->name()); + self::assertSame('custom(numeric)', $schema->get(1)->name()); + } + + public function testHasWithNumericIndex(): void + { + $schema = new Schema([ + $this->field('string'), + ]); + + self::assertTrue($schema->has(0)); + self::assertFalse($schema->has(1)); + } +} diff --git a/src/Schema/SetField.php b/src/Schema/SetField.php new file mode 100644 index 00000000..35908fa5 --- /dev/null +++ b/src/Schema/SetField.php @@ -0,0 +1,113 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use UnitEnum; +use ValueError; + +use function explode; +use function is_string; +use function trim; + +use const PHP_INT_MAX; + +final class SetField extends FieldEvaluator implements Field +{ + /** @var non-empty-string */ + public readonly string $separator; + public readonly int $limit; + public readonly EnumField $enumField; + + /** + * @param non-empty-string $separator + */ + public function __construct(EnumField $enumField, string $separator = ',', int $limit = PHP_INT_MAX) + { + $separator = trim($separator); + '' !== $separator || throw new ValueError('The set field separator can not be an empty string.'); + + parent::__construct($enumField->confidenceThreshold()); + $this->enumField = $enumField; + $this->separator = $separator; + $this->limit = $limit; + } + + /** + * @param class-string $enumClass + * @param non-empty-string $separator + */ + public static function fromEnum( + string $enumClass, + string $separator = ',', + int $limit = PHP_INT_MAX, + float $confidenceThreshold = 0.8 + ): self { + return new self( + new EnumField($enumClass, $confidenceThreshold), + $separator, + $limit + ); + } + + public function type(): FieldType + { + return FieldType::Set; + } + + public function name(): string + { + return FieldType::Set->value.'('.$this->enumField->name().')'; + } + + /** + * @return list|null + */ + public function parse(mixed $value): mixed + { + if (!is_string($value)) { + return null; + } + + $value = trim($value); + if ('' === $value) { + return null; + } + + $result = []; + foreach (explode($this->separator, $value) as $part) { + $part = trim($part); + if ('' === $part || isset($result[$part])) { + continue; + } + + $parsed = $this->enumField->parse($part); + if (null === $parsed) { + continue; + } + + $result[$part] = $parsed; + } + + return array_values($result); + } + + public function metadata(): FieldMetadata + { + return (new FieldMetadata([ + 'separator' => $this->separator, + 'limit' => $this->limit, + 'enum' => $this->enumField->metadata(), + ])); + } +} diff --git a/src/Schema/SetFieldTest.php b/src/Schema/SetFieldTest.php new file mode 100644 index 00000000..274c2e78 --- /dev/null +++ b/src/Schema/SetFieldTest.php @@ -0,0 +1,144 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema\Tests; + +use League\Csv\Schema\FieldType; +use League\Csv\Schema\SetField; +use PHPUnit\Framework\TestCase; +use stdClass; +use ValueError; + +use function array_is_list; + +use const PHP_INT_MAX; + +final class SetFieldTest extends TestCase +{ + public function test_it_can_be_instantiated(): void + { + $field = SetField::fromEnum(TestSetEnum::class); + + self::assertSame(',', $field->separator); + self::assertSame(PHP_INT_MAX, $field->limit); + } + + public function test_it_trims_the_separator(): void + { + $field = SetField::fromEnum(TestSetEnum::class, ' | '); + + self::assertSame('|', $field->separator); + } + + public function test_it_throws_when_separator_is_empty(): void + { + $this->expectException(ValueError::class); + $this->expectExceptionMessage('The set field separator can not be an empty string.'); + + SetField::fromEnum(TestSetEnum::class, ' '); + } + + public function test_it_returns_the_correct_type(): void + { + $field = SetField::fromEnum(TestSetEnum::class); + + self::assertSame(FieldType::Set, $field->type()); + } + + public function test_it_returns_the_correct_name(): void + { + $field = SetField::fromEnum(TestSetEnum::class); + + self::assertSame('set(enum(League\Csv\Schema\Tests\TestSetEnum))', $field->name()); + } + + public function test_it_returns_null_for_non_string_values(): void + { + $field = SetField::fromEnum(TestSetEnum::class); + + self::assertNull($field->parse(null)); + self::assertNull($field->parse(1)); + self::assertNull($field->parse(true)); + self::assertNull($field->parse([])); + self::assertNull($field->parse(new stdClass())); + } + + public function test_it_returns_null_for_empty_strings(): void + { + $field = SetField::fromEnum(TestSetEnum::class); + + self::assertNull($field->parse('')); + self::assertNull($field->parse(' ')); + } + + public function test_it_parses_a_set_value(): void + { + $field = SetField::fromEnum(TestSetEnum::class); + + self::assertSame([TestSetEnum::Read, TestSetEnum::Write, TestSetEnum::Delete], $field->parse('read,write,delete')); + } + + public function test_it_respects_the_limit(): void + { + $field = SetField::fromEnum(TestSetEnum::class, limit: 2); + + self::assertSame([TestSetEnum::Read, TestSetEnum::Write, TestSetEnum::Delete], $field->parse('read,write,delete')); + } + + public function test_it_can_use_custom_separator(): void + { + $field = SetField::fromEnum(TestSetEnum::class, '|'); + + self::assertSame([TestSetEnum::Read, TestSetEnum::Write], $field->parse('read|write')); + } + + public function test_it_returns_metadata(): void + { + $field = SetField::fromEnum(TestSetEnum::class, '|', 3); + + self::assertSame( + [ + 'separator' => '|', + 'limit' => 3, + 'enum' => [ + 'class' => TestSetEnum::class, + 'backedType' => 'string', + 'cases' => [ + ['name' => 'Read', 'value' => 'read'], + ['name' => 'Write', 'value' => 'write'], + ['name' => 'Delete', 'value' => 'delete'], + ], + ], + ], + $field->metadata()->all() + ); + } + + public function test_it_handles_set_with_duplicate_values(): void + { + $field = SetField::fromEnum(TestSetEnum::class); + $value = $field->parse('read, write,read,,delete'); + + self::assertIsArray($value); + self::assertTrue(array_is_list($value)); + self::assertCount(3, $value); + self::assertSame([TestSetEnum::Read, TestSetEnum::Write, TestSetEnum::Delete], $value); + } +} + +enum TestSetEnum: string +{ + case Read = 'read'; + case Write = 'write'; + case Delete = 'delete'; +} diff --git a/src/Schema/StringConstraint.php b/src/Schema/StringConstraint.php new file mode 100644 index 00000000..321572e3 --- /dev/null +++ b/src/Schema/StringConstraint.php @@ -0,0 +1,23 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +interface StringConstraint +{ + public function apply(string $value): ?string; + /** + * @return non-empty-string + */ + public function fieldTypeName(): string; +} diff --git a/src/Schema/StringField.php b/src/Schema/StringField.php new file mode 100644 index 00000000..12e6d4f0 --- /dev/null +++ b/src/Schema/StringField.php @@ -0,0 +1,140 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use function is_string; +use function trim; + +final class StringField extends FieldEvaluator implements Field +{ + public function __construct( + public readonly ?StringConstraint $constraint = null, + float $confidenceThreshold = 0.0 + ) { + parent::__construct($confidenceThreshold); + } + + /** + * @param positive-int $length + */ + public static function max(int $length, float $confidenceThreshold = 0.8): self + { + return new self(StringLengthConstraint::max($length), $confidenceThreshold); + } + + /** + * @param positive-int $length + */ + public static function min(int $length, float $confidenceThreshold = 0.8): self + { + return new self(StringLengthConstraint::min($length), $confidenceThreshold); + } + + /** + * @param positive-int $length + */ + public static function fixed(int $length, float $confidenceThreshold = 0.8): self + { + return new self(StringLengthConstraint::fixed($length), $confidenceThreshold); + } + + public static function uuid(float $confidenceThreshold = 0.8): self + { + return new self(StructuredStringConstraint::uuid(), $confidenceThreshold); + } + + public static function ulid(float $confidenceThreshold = 0.8): self + { + return new self(StructuredStringConstraint::ulid(), $confidenceThreshold); + } + + public static function hexColor(float $confidenceThreshold = 0.8): self + { + return new self(StructuredStringConstraint::hexColor(), $confidenceThreshold); + } + + public static function jwtToken(float $confidenceThreshold = 0.8): self + { + return new self(StructuredStringConstraint::jwtToken(), $confidenceThreshold); + } + + public static function md5(float $confidenceThreshold = 0.8): self + { + return new self(StructuredStringConstraint::md5(), $confidenceThreshold); + } + + public static function sha1(float $confidenceThreshold = 0.8): self + { + return new self(StructuredStringConstraint::sha1(), $confidenceThreshold); + } + + public static function cases(float $confidenceThreshold = 0.8): FieldList + { + return new FieldList( + self::uuid($confidenceThreshold), + self::ulid($confidenceThreshold), + self::hexColor($confidenceThreshold), + self::jwtToken($confidenceThreshold), + self::md5($confidenceThreshold), + self::sha1($confidenceThreshold), + ); + } + + public function type(): FieldType + { + return FieldType::String; + } + + public function name(): string + { + return $this->constraint?->fieldTypeName() ?? FieldType::String->value; + } + + public function parse(mixed $value): ?string + { + if (!is_string($value)) { + return null; + } + + $value = trim($value); + + return match (true) { + '' === $value => null, + null === $this->constraint => $value, + default => $this->constraint->apply($value), + }; + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata(); + } + + /** + * @return int<-1, 1> + */ + public function evaluate(mixed $value): int + { + return null === $this->constraint + ? (is_string($value) ? 1 : 0) + : parent::evaluate($value); + } + + public function confidenceThreshold(): float + { + return null === $this->constraint + ? 0.0 + : parent::confidenceThreshold(); + } +} diff --git a/src/Schema/StringFieldTest.php b/src/Schema/StringFieldTest.php new file mode 100644 index 00000000..617e1041 --- /dev/null +++ b/src/Schema/StringFieldTest.php @@ -0,0 +1,192 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\TestCase; +use stdClass; + +#[CoversClass(StructuredStringConstraint::class)] +#[CoversClass(StringLengthConstraint::class)] +#[CoversClass(StringField::class)] +final class StringFieldTest extends TestCase +{ + private StringField $field; + + protected function setUp(): void + { + $this->field = new StringField(); + } + + // -------------------------------------------------------- + // parse() + // -------------------------------------------------------- + + public static function provideParseValues(): array + { + return [ + ['hello', 'hello'], + [' world ', 'world'], + ['', null], + ['123', '123'], + ]; + } + + #[DataProvider('provideParseValues')] + public function testParse(mixed $input, ?string $expected): void + { + self::assertSame($expected, $this->field->parse($input)); + } + + public static function provideInvalidParseValues(): array + { + return [ + [123], + [12.5], + [true], + [false], + [null], + [[]], + [new stdClass()], + ]; + } + + #[DataProvider('provideInvalidParseValues')] + public function testParseReturnsNullForNonStrings(mixed $input): void + { + self::assertNull($this->field->parse($input)); + } + + // -------------------------------------------------------- + // evaluate() + // -------------------------------------------------------- + + public static function provideEvaluateValues(): array + { + return [ + ['hello', 1], + ['', 1], + ['123', 1], + + [123, 0], + [12.5, 0], + [true, 0], + [false, 0], + [null, 0], + [[], 0], + ]; + } + + #[DataProvider('provideEvaluateValues')] + public function testEvaluate(mixed $input, int $expected): void + { + self::assertSame($expected, $this->field->evaluate($input)); + } + + // -------------------------------------------------------- + // type() + // -------------------------------------------------------- + + public function testTypeIsString(): void + { + self::assertSame(FieldType::String, $this->field->type()); + } + + // -------------------------------------------------------- + // confidenceThreshold() + // -------------------------------------------------------- + + public function testConfidenceThresholdIsZero(): void + { + self::assertSame(0.0, $this->field->confidenceThreshold()); + } + + public function test_metadata_contains_expected_structure(): void + { + $field = new StringField(); + + self::assertTrue($field->metadata()->isEmpty()); + } + + public function test_max_length_constraint_applied(): void + { + $field = StringField::max(3); + + self::assertSame('string[,3]', $field->name()); + self::assertNull($field->parse(null)); + self::assertNull($field->parse('abcdef')); + self::assertSame('a', $field->parse('a')); + self::assertSame('ab', $field->parse('ab')); + self::assertSame('abc', $field->parse('abc')); + } + + public function test_fixed_length_constraint_applied(): void + { + $field = StringField::fixed(3); + + self::assertSame('string[3]', $field->name()); + self::assertNull($field->parse(null)); + self::assertNull($field->parse('abcdef')); + self::assertNull($field->parse('a')); + self::assertNull($field->parse('ab')); + self::assertSame('abc', $field->parse('abc')); + } + + public function test_min_length_constraint_applied(): void + { + $field = StringField::min(3); + + self::assertSame('string[3,]', $field->name()); + self::assertNull($field->parse(null)); + self::assertNull($field->parse('a')); + self::assertNull($field->parse('ab')); + self::assertSame('abc', $field->parse('abc')); + self::assertSame('abcdef', $field->parse('abcdef')); + } + + // -------------------------------------------------------- + // Factory constructors + // -------------------------------------------------------- + + public function testUuidFactoryCreatesValidStrategy(): void + { + $field = StringField::uuid(); + + self::assertSame(FieldType::String, $field->type()); + self::assertSame('string(uuid)', $field->name()); + self::assertSame(0.8, $field->confidenceThreshold()); + } + + public function testUlidFactoryCreatesValidStrategy(): void + { + $field = StringField::ulid(); + + self::assertSame('string(ulid)', $field->name()); + } + + public function testHexColorFactoryCreatesValidStrategy(): void + { + $field = StringField::hexColor(); + + self::assertSame('string(hex_color)', $field->name()); + } + + public function testJwtTokenFactoryCreatesValidStrategy(): void + { + $field = StringField::jwtToken(); + + self::assertSame('string(jwt_token)', $field->name()); + } +} diff --git a/src/Schema/StringLengthConstraint.php b/src/Schema/StringLengthConstraint.php new file mode 100644 index 00000000..290df6dc --- /dev/null +++ b/src/Schema/StringLengthConstraint.php @@ -0,0 +1,107 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use ValueError; + +final readonly class StringLengthConstraint implements StringConstraint +{ + /** + * @param ?positive-int $min + * @param ?positive-int $max + */ + private function __construct( + public ?int $min, + public ?int $max, + ) { + null === $min || $min > 0 || throw new ValueError('Min length must be greater than 0'); + null === $max || $max > 0 || throw new ValueError('Max length must be greater than 0'); + if (null !== $min && null !== $max && $min > $max) { + throw new ValueError('Minimum length can not be greater than maximum length.'); + } + } + + /** + * @param positive-int $length + */ + public static function min(int $length): self + { + return new self(min: $length, max: null); + } + + /** + * @param positive-int $length + */ + public static function max(int $length): self + { + return new self(min: null, max: $length); + } + + /** + * @param positive-int $length + */ + public static function fixed(int $length): self + { + return new self(min: $length, max: $length); + } + + /** + * @param positive-int $min + * @param positive-int $max + */ + public static function between(int $min, int $max): self + { + return new self(min: $min, max: $max); + } + + public function apply(string $value): ?string + { + $value = trim($value); + if ('' === $value) { + return null; + } + + $length = mb_strlen($value); + if (null !== $this->min && $length < $this->min) { + return null; + } + + if (null !== $this->max && $length > $this->max) { + return null; + } + + return $value; + } + + public function fieldTypeName(): string + { + $range = (null === $this->min && null === $this->max) + ? '' : + ( + $this->min === $this->max + ? '['.$this->min.']' + : '['.$this->min.','.$this->max.']' + ); + + return FieldType::String->value.$range; + } + + public function constraint(): FieldMetadata + { + return new FieldMetadata([ + 'min_length' => $this->min, + 'max_length' => $this->max, + ]); + } +} diff --git a/src/Schema/StructuredStringConstraint.php b/src/Schema/StructuredStringConstraint.php new file mode 100644 index 00000000..52c1815c --- /dev/null +++ b/src/Schema/StructuredStringConstraint.php @@ -0,0 +1,94 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use ValueError; + +use function preg_match; +use function trim; + +final readonly class StructuredStringConstraint implements StringConstraint +{ + /** + * @param non-empty-string $fieldTypeName + * @param non-empty-string $pattern + */ + public function __construct( + public string $fieldTypeName, + public string $pattern, + ) { + ('' !== $pattern && false !== @preg_match($pattern, '')) || throw new ValueError('the regular expression pattern "'.$pattern.'" is not valid. Did you forget the delimiter?'); + ('' !== $fieldTypeName && 1 === preg_match('/^[a-z][a-z0-9]*(?:_[a-z0-9]+)*$/', $fieldTypeName)) || throw new ValueError('The name "'.$fieldTypeName.'" is not a valid snake case variable name.'); + } + + public static function uuid(): self + { + return new self( + fieldTypeName: 'uuid', + pattern: '/^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i', + ); + } + + public static function ulid(): self + { + return new self( + fieldTypeName: 'ulid', + pattern: '/^[0-9A-HJKMNP-TV-Z]{26}$/i', + ); + } + + public static function jwtToken(): self + { + return new self( + fieldTypeName: 'jwt_token', + pattern: '/^[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+$/i', + ); + } + + public static function hexColor(): self + { + return new self( + fieldTypeName: 'hex_color', + pattern: '/^#(?:[0-9a-fA-F]{3}){1,2}$/i', + ); + } + + public static function md5(): self + { + return new self( + fieldTypeName: 'md5', + pattern: '/^[a-fA-F0-9]{32}$/', + ); + } + + public static function sha1(): self + { + return new self( + fieldTypeName: 'sha1', + pattern: '/^[a-fA-F0-9]{40}$/', + ); + } + + public function apply(string $value): ?string + { + $value = trim($value); + + return ('' === $value || 1 !== preg_match($this->pattern, $value)) ? null : $value; + } + + public function fieldTypeName(): string + { + return FieldType::String->value.'('.$this->fieldTypeName.')'; + } +} diff --git a/src/Schema/TimeField.php b/src/Schema/TimeField.php new file mode 100644 index 00000000..181c239c --- /dev/null +++ b/src/Schema/TimeField.php @@ -0,0 +1,137 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use DateTimeInterface; +use ValueError; + +use function array_map; +use function ctype_digit; +use function implode; +use function is_string; +use function preg_match; +use function strlen; +use function trim; + +final class TimeField extends FieldEvaluator implements Field +{ + /** @var non-empty-string */ + private readonly string $pattern; + + private function __construct( + public readonly string $separator, + public readonly TimePrecision $precision, + public readonly TimePadding $padding, + float $confidenceThreshold = 0.8 + ) { + (1 === strlen($separator) && !ctype_digit($this->separator)) || throw new ValueError('The separator character must be a non-empty single byte string.'); + + parent::__construct($confidenceThreshold); + + $this->pattern = $this->generatePattern(); + } + + public static function seconds(string $separator = ':', TimePadding $padding = TimePadding::Padded, float $confidenceThreshold = 0.8): self + { + return new self($separator, TimePrecision::HoursMinutesSeconds, $padding, $confidenceThreshold); + } + + public static function minutes(string $separator = ':', TimePadding $padding = TimePadding::Padded, float $confidenceThreshold = 0.8): self + { + return new self($separator, TimePrecision::HoursMinutes, $padding, $confidenceThreshold); + } + + public static function hours(string $separator = ':', TimePadding $padding = TimePadding::Padded, float $confidenceThreshold = 0.8): self + { + return new self($separator, TimePrecision::Hours, $padding, $confidenceThreshold); + } + + public function type(): FieldType + { + return FieldType::Time; + } + + public function metadata(): FieldMetadata + { + return new FieldMetadata(); + } + + public function name(): string + { + $precision = match ($this->precision) { + TimePrecision::Hours => 'hours', + TimePrecision::HoursMinutes => 'hours_minutes', + TimePrecision::HoursMinutesSeconds => 'hours_minutes_seconds', + }; + + $paddingMode = match ($this->padding) { + TimePadding::Unpadded => 'un_padded', + TimePadding::Padded => 'padded', + }; + + return FieldType::Time->value.'(precision='.$precision.',padding='.$paddingMode.',separator='.$this->separator.')'; + } + + public function parse(mixed $value): ?string + { + if ($value instanceof DateTimeInterface) { + return $value->format('H:i:s'); + } + + if (!is_string($value)) { + return null; + } + + $value = trim($value); + if (1 !== preg_match($this->pattern, $value, $found)) { + return null; + } + + $hour = (int) $found['hour']; + $minute = (int) ($found['minute'] ?? 0); + $second = (int) ($found['second'] ?? 0); + + return ($hour > 23 || $minute > 59 || $second > 59) + ? null + : $this->formatTimePart($hour) + .$this->separator + .$this->formatTimePart($minute) + .$this->separator + .$this->formatTimePart($second); + } + + private function formatTimePart(int $value): string + { + return ($value < 10 ? '0' : '').$value; + } + + /** + * @return non-empty-string + */ + private function generatePattern(): string + { + $digit = fn () => TimePadding::Padded === $this->padding ? '\d{2}' : '\d{1,2}'; + + $patternParts = array_map( + fn (string $part): string => "(?<{$part}>".$digit().')', + match ($this->precision) { + TimePrecision::Hours => ['hour'], + TimePrecision::HoursMinutes => ['hour', 'minute'], + TimePrecision::HoursMinutesSeconds => ['hour', 'minute', 'second'], + } + ); + + return '/^'.implode($this->separator, $patternParts).'$/'; + } +} diff --git a/src/Schema/TimeFieldTest.php b/src/Schema/TimeFieldTest.php new file mode 100644 index 00000000..36de040c --- /dev/null +++ b/src/Schema/TimeFieldTest.php @@ -0,0 +1,125 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\TestCase; + +#[CoversClass(TimeField::class)] +final class TimeFieldTest extends TestCase +{ + public function test_hours_constructor_parses_correctly(): void + { + $field = TimeField::hours(); + + self::assertSame('time(precision=hours,padding=padded,separator=:)', $field->name()); + + self::assertSame('10:00:00', $field->parse('10')); + self::assertSame('23:00:00', $field->parse('23')); + } + + public function test_minutes_constructor_parses_correctly(): void + { + $field = TimeField::minutes(separator: '.'); + + self::assertSame('time(precision=hours_minutes,padding=padded,separator=.)', $field->name()); + + self::assertSame('10.30.00', $field->parse('10.30')); + self::assertSame('23.59.00', $field->parse('23.59')); + } + + public function test_seconds_constructor_parses_correctly(): void + { + $field = TimeField::seconds(); + + self::assertSame('time(precision=hours_minutes_seconds,padding=padded,separator=:)', $field->name()); + + self::assertSame('10:30:45', $field->parse('10:30:45')); + self::assertSame('00:00:00', $field->parse('00:00:00')); + } + + public function test_invalid_string_returns_null(): void + { + $field = TimeField::seconds(); + + self::assertNull($field->parse('')); + self::assertNull($field->parse(' ')); + self::assertNull($field->parse('invalid')); + } + + public function test_non_string_returns_null(): void + { + $field = TimeField::seconds(); + + self::assertNull($field->parse(null)); + self::assertNull($field->parse(123)); + self::assertNull($field->parse([])); + } + + public function test_seconds_precision_rejects_invalid_time(): void + { + $field = TimeField::seconds(); + + self::assertNull($field->parse('25:00:00')); // invalid hour + self::assertNull($field->parse('10:70:00')); // invalid minute + self::assertNull($field->parse('10:00:90')); // invalid second + } + + public function test_minutes_precision_rejects_seconds_input(): void + { + $field = TimeField::minutes(); + + self::assertNull($field->parse('10:30:45')); // too precise + } + + public function test_hours_precision_rejects_minutes_input(): void + { + $field = TimeField::hours(); + + self::assertNull($field->parse('10:30')); // too precise + self::assertNull($field->parse('10:30:45')); + } + + public function test_output_is_always_normalized_to_his(): void + { + $field = TimeField::seconds(padding: TimePadding::Unpadded); + + self::assertSame('01:02:03', $field->parse('1:2:3')); + } + + public function test_metadata_contains_format(): void + { + $field = TimeField::seconds(); + + self::assertSame([], $field->metadata()->all()); + } + + public function test_name_contains_format(): void + { + self::assertSame( + 'time(precision=hours_minutes_seconds,padding=padded,separator=:)', + TimeField::seconds()->name() + ); + + self::assertSame( + 'time(precision=hours_minutes,padding=padded,separator=:)', + TimeField::minutes()->name() + ); + + self::assertSame( + 'time(precision=hours,padding=padded,separator=:)', + TimeField::hours()->name() + ); + } +} diff --git a/src/Schema/TimePadding.php b/src/Schema/TimePadding.php new file mode 100644 index 00000000..3d028fa1 --- /dev/null +++ b/src/Schema/TimePadding.php @@ -0,0 +1,20 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +enum TimePadding +{ + case Padded; + case Unpadded; +} diff --git a/src/Schema/TimePrecision.php b/src/Schema/TimePrecision.php new file mode 100644 index 00000000..6f378a4a --- /dev/null +++ b/src/Schema/TimePrecision.php @@ -0,0 +1,21 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Schema; + +enum TimePrecision +{ + case Hours; + case HoursMinutes; + case HoursMinutesSeconds; +} diff --git a/src/TabularData.php b/src/TabularData.php index f886eecf..44c8aca9 100644 --- a/src/TabularData.php +++ b/src/TabularData.php @@ -13,6 +13,7 @@ namespace League\Csv; +use Closure; use Iterator; /** @@ -24,6 +25,7 @@ * @method object|null lastAsObject(string $className, array $header = []) returns the last record from the tabular data as an instance of the defined class name. * @method Iterator map(callable $callback) Run a map over each container record. * @method Iterator getRecordsAsObject(string $className, array $header = []) Returns the tabular data records as an iterator object containing instance of the defined class name. + * @method mixed reduce(Closure $callback, mixed $initial = null) reduces the collection to a single value, passing the result of each iteration into the subsequent iteration */ interface TabularData { diff --git a/src/TabularDataReader.php b/src/TabularDataReader.php index 3dacc1c1..72e81d69 100644 --- a/src/TabularDataReader.php +++ b/src/TabularDataReader.php @@ -30,7 +30,6 @@ * @method mixed value(int|string $column = 0) returns a given value from the first element of the tabular data. * @method bool each(Closure $callback) iterates over each record and passes it to a closure. Iteration is interrupted if the closure returns false * @method bool exists(Closure $callback) tells whether at least one record satisfies the predicate. - * @method mixed reduce(Closure $callback, mixed $initial = null) reduces the collection to a single value, passing the result of each iteration into the subsequent iteration * @method Iterator getObjects(string $className, array $header = []) Returns the tabular data records as an iterator object containing instance of the defined class name. * @method TabularDataReader filter(Query\Predicate|Closure $predicate) returns all the elements of this collection for which your callback function returns `true` * @method TabularDataReader slice(int $offset, ?int $length = null) extracts a slice of $length elements starting at position $offset from the Collection.