I am trying to get started with a "local" data processing ecosystem which includes Presto, Spark, Hive. lakeFS and a few other.
My docker-compose.yml looks like this:
version: "3.5"
services:
lakefs:
image: treeverse/lakefs:latest
container_name: lakefs
depends_on:
- minio-setup
ports:
- "8000:8000"
environment:
- LAKEFS_DATABASE_TYPE=local
- LAKEFS_BLOCKSTORE_TYPE=s3
- LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE=true
- LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://minio:9000
- LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=minioadmin
- LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=minioadmin
- LAKEFS_AUTH_ENCRYPT_SECRET_KEY=some random secret string
- LAKEFS_STATS_ENABLED
- LAKEFS_LOGGING_LEVEL
- LAKECTL_CREDENTIALS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
- LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
- LAKECTL_SERVER_ENDPOINT_URL=http://localhost:8000
entrypoint: ["/bin/sh", "-c"]
command:
- |
lakefs setup --local-settings --user-name docker --access-key-id AKIAIOSFODNN7EXAMPLE --secret-access-key wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY || true
lakefs run --local-settings &
wait-for -t 60 lakefs:8000 -- lakectl repo create lakefs://example s3://example || true
wait
minio-setup:
image: minio/mc
container_name: minio-setup
environment:
- MC_HOST_lakefs=http://minioadmin:minioadmin@minio:9000
depends_on:
- minio
command: ["mb", "lakefs/example"]
minio:
image: minio/minio
container_name: minio
ports:
- "9000:9000"
- "9001:9001"
entrypoint: ["minio", "server", "/data", "--console-address", ":9001"]
mariadb:
image: mariadb:10
container_name: mariadb
environment:
MYSQL_ROOT_PASSWORD: admin
MYSQL_USER: admin
MYSQL_PASSWORD: admin
MYSQL_DATABASE: metastore_db
hive-metastore:
build: hive
container_name: hive
depends_on:
- mariadb
ports:
- "9083:9083"
environment:
- DB_URI=mariadb:3306
volumes:
- ./etc/hive-site.xml:/opt/apache-hive-bin/conf/hive-site.xml
ulimits:
nofile:
soft: 65536
hard: 65536
hive-server:
build: hive
container_name: hiveserver2
ports:
- "10001:10000"
depends_on:
- hive-metastore
environment:
- DB_URI=mariadb:3306
volumes:
- ./etc/hive-site.xml:/opt/apache-hive-bin/conf/hive-site.xml
ulimits:
nofile:
soft: 65536
hard: 65536
entrypoint: [
"wait-for-it", "-t", "60", "hive:9083", "--",
"hive", "--service", "hiveserver2", "--hiveconf", "hive.root.logger=INFO,console"]
hive-client:
build: hive
profiles: ["client"]
entrypoint: ["beeline", "-u", "jdbc:hive2://hiveserver2:10000"]
trino:
image: trinodb/trino:358
container_name: trino
volumes:
- ./etc/s3.properties:/etc/trino/catalog/s3.properties
ports:
- "48080:8080"
trino-client:
image: trinodb/trino:358
profiles: ["client"]
entrypoint: ["trino", "--server", "trino:8080", "--catalog", "s3", "--schema", "default"]
spark:
image: docker.io/bitnami/spark:3
container_name: spark
environment:
- SPARK_MODE=master
- SPARK_MASTER_HOST=spark
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
ports:
- "18080:8080"
volumes:
- ./etc/hive-site.xml:/opt/bitnami/spark/conf/hive-site.xml
spark-worker:
image: docker.io/bitnami/spark:3
ports:
- "8081"
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_CORES=1
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
deploy:
replicas: 3
volumes:
- ./etc/hive-site.xml:/opt/bitnami/spark/conf/hive-site.xml
spark-submit:
image: docker.io/bitnami/spark:3
profiles: ["client"]
entrypoint: /opt/bitnami/spark/bin/spark-submit
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_CORES=1
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
volumes:
- ./:/local
- ./etc/hive-site.xml:/opt/bitnami/spark/conf/hive-site.xml
spark-sql:
image: docker.io/bitnami/spark:3
profiles: ["client"]
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_CORES=1
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
volumes:
- ./:/local
- ./etc/hive-site.xml:/opt/bitnami/spark/conf/hive-site.xml
command: ["spark-sql", "--master", "spark://spark:7077"]
spark-thrift:
image: docker.io/bitnami/spark:3
container_name: spark-thrift
command: ["bash","-c", "/opt/bitnami/entrypoint.sh"]
depends_on:
- spark
environment:
- SPARK_MODE=master
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_MODE=worker
volumes:
- ./etc/spark-thrift-entrypoint.sh:/opt/bitnami/entrypoint.sh
- ./etc/hive-site.xml:/opt/bitnami/spark/conf/hive-site.xml
create-dbt-schema-main:
image: trinodb/trino:358
profiles: ["client"]
entrypoint: ["trino", "--server", "trino:8080", "--catalog", "s3", "--execute", "drop schema if exists dbt_main ;create schema dbt_main with (location = 's3://example/main/dbt' )"]
dbt:
build: dbt
profiles: ["client"]
volumes:
- ./dbt/dbt-project:/usr/app
- ./dbt/profiles.yml:/root/.dbt/profiles.yml
entrypoint: dbt
notebook:
# To login to jupyter notebook, use password:lakefs
build: jupyter
container_name: notebook
ports:
- 8888:8888
volumes:
- ./etc/jupyter_notebook_config.py:/home/jovyan/.jupyter/jupyter_notebook_config.py
- ./etc/hive-site.xml:/usr/local/spark/conf/hive-site.xml
networks:
default:
name: bagel
when I run "docker compose up" I get this error:
=> ERROR [build 7/8] RUN --mount=type=cache,target=/root/.cache/go-build --mount=type=cache,target=/go/pkg GOOS=linux GOARCH=amd64 0.4s
=> CACHED [lakefs 2/8] RUN apk add -U --no-cache ca-certificates 0.0s
=> CACHED [lakefs 3/8] RUN apk add netcat-openbsd 0.0s
=> CACHED [lakefs 4/8] WORKDIR /app 0.0s
=> CACHED [lakefs 5/8] COPY ./scripts/wait-for ./ 0.0s
------
> [build 7/8] RUN --mount=type=cache,target=/root/.cache/go-build --mount=type=cache,target=/go/pkg GOOS=linux GOARCH=amd64 go build -ldflags "-X github.com/treeverse/lakefs/pkg/version.Version=dev" -o lakefs ./cmd/lakefs:
#0 0.407 webui/content.go:7:12: pattern dist: no matching files found
------
failed to solve: executor failed running [/bin/sh -c GOOS=$TARGETOS GOARCH=$TARGETARCH go build -ldflags "-X github.com/treeverse/lakefs/pkg/version.Version=${VERSION}" -o lakefs ./cmd/lakefs]: exit code: 1
My OS is:
Linux B460MDS3HACY1 5.15.0-58-generic #64~20.04.1-Ubuntu SMP Fri Jan 6 16:42:31 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
My go is:
go version go1.16.7 linux/amd64
What should I do to overcome this error?
Strange - the docker-compose uses an image and it should just pull it and not try to build a docker image. Can you verify that the working directoy holds your docker-compose? You can also verify that you are using the latest images by
docker-compose pull
before callingdocker-compose up
.