Skip to content
Draft
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion gokart/file_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from logging import getLogger

import luigi
import luigi.contrib.s3
import luigi.format
import numpy as np
import pandas as pd
Expand Down
6 changes: 6 additions & 0 deletions gokart/gcs_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
try:
import googleapiclient # noqa: F401
except ImportError:
# sentinal: this file should not be imported if [gcs] extra is not installed.
raise

import json
import os

Expand Down
50 changes: 43 additions & 7 deletions gokart/object_storage.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,47 @@
from datetime import datetime

import luigi
import luigi.contrib.gcs
import luigi.contrib.s3
from luigi.format import Format

from gokart.gcs_config import GCSConfig
from gokart.gcs_zip_client import GCSZipClient
from gokart.s3_config import S3Config
from gokart.s3_zip_client import S3ZipClient
from gokart.zip_client import ZipClient

try:
from gokart.gcs_config import GCSConfig
from gokart.gcs_zip_client import GCSZipClient

# to avoid warning, import here which means gcs dependencies are exist
import luigi.contrib.gcs # isort: skip
GCS_AVAILABLE = True
except ImportError:
GCS_AVAILABLE = False

try:
from gokart.s3_config import S3Config
from gokart.s3_zip_client import S3ZipClient

# to avoid warning, import here which means s3 dependencies are exist
import luigi.contrib.s3 # isort: skip
S3_AVAILABLE = True
except ImportError:
S3_AVAILABLE = False

object_storage_path_prefix = ['s3://', 'gs://']


def assert_gcs_available():
if GCS_AVAILABLE:
return

raise ImportError('gs:// is not available. Please install gokart[gcs]')
Comment thread
yokomotod marked this conversation as resolved.
Outdated


def assert_s3_available():
if S3_AVAILABLE:
return

raise ImportError('s3:// is not available. Please install gokart[s3]')


class ObjectStorage(object):

@staticmethod
Expand All @@ -26,26 +54,32 @@ def if_object_storage_path(path: str) -> bool:
@staticmethod
def get_object_storage_target(path: str, format: Format) -> luigi.Target:
if path.startswith('s3://'):
assert_s3_available()
return luigi.contrib.s3.S3Target(path, client=S3Config().get_s3_client(), format=format)
elif path.startswith('gs://'):
assert_gcs_available()
return luigi.contrib.gcs.GCSTarget(path, client=GCSConfig().get_gcs_client(), format=format)
else:
raise

@staticmethod
def exists(path: str) -> bool:
if path.startswith('s3://'):
assert_s3_available()
return S3Config().get_s3_client().exists(path)
elif path.startswith('gs://'):
assert_gcs_available()
return GCSConfig().get_gcs_client().exists(path)
else:
raise

@staticmethod
def get_timestamp(path: str) -> datetime:
if path.startswith('s3://'):
assert_s3_available()
return S3Config().get_s3_client().get_key(path).last_modified
elif path.startswith('gs://'):
assert_gcs_available()
# for gcs object
# should PR to luigi
bucket, obj = GCSConfig().get_gcs_client()._path_to_bucket_and_key(path)
Expand All @@ -57,12 +91,14 @@ def get_timestamp(path: str) -> datetime:
@staticmethod
def get_zip_client(file_path: str, temporary_directory: str) -> ZipClient:
if file_path.startswith('s3://'):
assert_s3_available()
return S3ZipClient(file_path=file_path, temporary_directory=temporary_directory)
elif file_path.startswith('gs://'):
assert_gcs_available()
return GCSZipClient(file_path=file_path, temporary_directory=temporary_directory)
else:
raise

@staticmethod
def is_buffered_reader(file: object):
return not isinstance(file, luigi.contrib.s3.ReadableS3File)
return not (S3_AVAILABLE and isinstance(file, luigi.contrib.s3.ReadableS3File))
6 changes: 6 additions & 0 deletions gokart/s3_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
try:
import boto3 # noqa: F401
except ImportError:
# sentinal: this file should not be imported if [s3] extra is not installed.
raise

import os

import luigi
Expand Down
53 changes: 52 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 22 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,22 @@ pattern = "^(?P<base>\\d+\\.\\d+\\.\\d+)"
[tool.poetry.dependencies]
python = ">=3.8,<3.12"
luigi = "*"
boto3 = "*"
boto3 = {version = "*", optional = true}
slack-sdk = "^3"
pandas = "*"
numpy = "*"
tqdm = "*"
google-auth = "*"
pyarrow = "*"
uritemplate = "*"
google-api-python-client = "*"
google-api-python-client = {version = "*", optional = true}
APScheduler = "*"
redis = "*"
matplotlib = "*"
pytest = "*"
Comment thread
yokomotod marked this conversation as resolved.
Outdated

[tool.poetry.group.dev.dependencies]
pyproject-flake8 = "5.0.4"
tox = "*"
moto = "*"
testfixtures = "*"
coverage = "*"
isort = "^5.7"
Expand All @@ -45,6 +44,25 @@ fakeredis = "*"
mypy = "*"
types-redis = "*"

# for dev, extra is installed by default
[tool.poetry.group.dev-extra.dependencies]
google-api-python-client = "*"
moto = "*"

[tool.poetry.extras]
s3 = ["boto3"]
gcs = ["google-api-python-client"]

[tool.pytest.ini_options]
addopts = "--strict-markers -m 'not no_gcs and not no_s3'"
testpaths = "test"
markers = [
"gcs",
"s3",
"no_gcs",
"no_s3"
]

[tool.flake8]
# B006: Do not use mutable data structures for argument defaults. They are created during function definition time. All calls to the function reuse this one instance of that data structure, persisting changes between them.
# B008 Do not perform function calls in argument defaults. The call is performed only once at function definition time. All calls to your function will reuse the result of that definition-time function call. If this is intended, assign the function call to a module-level variable and use that variable as a default value.
Expand Down
19 changes: 19 additions & 0 deletions test/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
def safe_mock_s3(func):
"""
Annotations will be evaluated even if `pytes -m "not s3"` is specified. So
```
@pytest.mark.s3
class TestS3(unittest.TestCase):
@mock_s3
def test_foo():
```
will raise an error if moto is not installed.
This decorator is used to avoid this error.
"""

def wrapper(*args, **kwargs):
from moto import mock_s3
with mock_s3():
return func(*args, **kwargs)

return wrapper
8 changes: 7 additions & 1 deletion test/test_gcs_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@
import unittest
from unittest.mock import MagicMock, patch

from gokart.gcs_config import GCSConfig
import pytest

try:
from gokart.gcs_config import GCSConfig
except ImportError:
pass


@pytest.mark.gcs
class TestGCSConfig(unittest.TestCase):

def test_get_gcs_client_without_gcs_credential_name(self):
Expand Down
36 changes: 36 additions & 0 deletions test/test_pyproject_extra.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import unittest

import pytest


class TestPyprojectExtra(unittest.TestCase):

@pytest.mark.gcs
def test_gcs_installed(self):
try:
import googleapiclient # noqa: F401
except ImportError:
raise Exception('googleapiclient should be installed')

@pytest.mark.no_gcs
def test_no_gcs(self):
try:
import googleapiclient # noqa: F401
raise Exception('googleapiclient should not be installed')
except ImportError:
pass

@pytest.mark.s3
def test_s3_installed(self):
try:
import boto3 # noqa: F401
except ImportError:
raise Exception('boto3 should be installed')

@pytest.mark.no_s3
def test_no_s3(self):
try:
import boto3 # noqa: F401
raise Exception('boto3 should not be installed')
except ImportError:
pass
8 changes: 7 additions & 1 deletion test/test_s3_config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import unittest

from gokart.s3_config import S3Config
import pytest

try:
from gokart.s3_config import S3Config
except ImportError:
pass


@pytest.mark.s3
class TestS3Config(unittest.TestCase):

def test_get_same_s3_client(self):
Expand Down
17 changes: 12 additions & 5 deletions test/test_s3_zip_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,29 @@
import shutil
import unittest

import boto3
from moto import mock_s3
import pytest

from gokart.s3_zip_client import S3ZipClient
from .helpers import safe_mock_s3

try:
import boto3

from gokart.s3_zip_client import S3ZipClient
except ImportError:
pass


def _get_temporary_directory():
return os.path.abspath(os.path.join(os.path.dirname(__name__), 'temporary'))


@pytest.mark.s3
class TestS3ZipClient(unittest.TestCase):

def tearDown(self):
shutil.rmtree(_get_temporary_directory(), ignore_errors=True)

@mock_s3
@safe_mock_s3
def test_make_archive(self):
conn = boto3.resource('s3', region_name='us-east-1')
conn.create_bucket(Bucket='test')
Expand All @@ -34,7 +41,7 @@ def test_make_archive(self):
os.makedirs(temporary_directory, exist_ok=True)
zip_client.make_archive()

@mock_s3
@safe_mock_s3
def test_unpack_archive(self):
conn = boto3.resource('s3', region_name='us-east-1')
conn.create_bucket(Bucket='test')
Expand Down
Loading