I am trying to write parquet files to LakeFS with S3 configuration. I configured docker for my services.
Here is my docker compose file
version: "3.7"
services:
web:
build: .
depends_on:
- postgres_db
- redis_db
- laksefs
env_file:
- path: .env
required: true
ports:
- "3000:3000"
environment:
REDIS_HOST: "redis_db"
redis_db:
image: redis
ports:
- "6379:6379"
volumes:
- redis_data:/data
postgres_db:
image: postgres
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data/
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
laksefs:
image: treeverse/lakefs:latest
ports:
- "8000:8000"
depends_on:
- postgres_db
environment:
LAKEFS_BLOCKSTORE_TYPE: "s3"
LAKEFS_LOGGING_FORMAT: "text"
LAKEFS_LOGGING_LEVEL: "TRACE"
LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE: true
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
LAKEFS_BLOCKSTORE_S3_REGION: "us-east-2"
LAKEFS_AUTH_ENCRYPT_SECRET_KEY: ${AWS_SECRET_ACCESS_KEY}
LAKEFS_DATABASE_TYPE: ${LAKEFS_DATABASE_TYPE}
LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING: "host=postgres_db user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB} sslmode=disable"
LAKEFS_INSTALLATION_USER_NAME: "admin"
LAKEFS_INSTALLATION_ACCESS_KEY_ID: ${LAKEFS_ACCESS_KEY}
LAKEFS_INSTALLATION_SECRET_ACCESS_KEY: ${LAKEFS_SECRET_KEY}
LAKEFS_GATEWAYS_S3_REGION: "us-east-2"
LAKECTL_SERVER_ENDPOINT_URL: "http://localhost:8000"
volumes:
postgres_data:
driver: local
redis_data:
driver: local
I tried with multiple variations but it is not working. I am getting the error:
I checked the Endpoint, LakeFS Access Key, and Secret, All are correctly configured.
I am using AWS Wrangler to write the data to lakeFS.
wr.config.s3_endpoint_url = lfs_endpoint
await loop.run_in_executor(None, lambda: wr.s3.to_parquet(
df=batch.to_pandas() ,
path=lfs_s3_path,
dataset=True,
max_rows_by_file=max_rows_per_file,
use_threads=True,
partition_cols = partitioning,
mode='append',
boto3_session=s3_session
))
What could go wrong here?
When I am running without docker, it works fine