diff --git a/docker/Dockerfile b/docker/Dockerfile index cddc136..b397d8f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -34,7 +34,8 @@ RUN apt-get update && \ sudo \ curl \ mysql-client=${MYSQL_CLIENT_VERSION} && \ - apt-get clean -y + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/* /var/cache/apt/archives WORKDIR /opt @@ -45,8 +46,24 @@ RUN if echo $METASTORE_VERSION | grep -E '^3\.' > /dev/null; then \ fi # download and install hadoop and fix the (>= ubuntu jammy) distribution executable bug -RUN curl -L https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \ - sed -i 's|if \[\[ ! -x "\$JAVA" \]\]; then|if [ \$("$JAVA" -version) ]; then|' ${HADOOP_HOME}/libexec/hadoop-functions.sh +RUN set -eux; \ + curl -L https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \ + sed -i 's|if \[\[ ! -x "\$JAVA" \]\]; then|if [ \$("$JAVA" -version) ]; then|' ${HADOOP_HOME}/libexec/hadoop-functions.sh && \ + echo "Cleaning up unnecessary Hadoop components for Hive Metastore..." && \ + rm -rf ${HADOOP_HOME}/share/hadoop/yarn/* && \ + rm -rf ${HADOOP_HOME}/share/hadoop/mapreduce/* && \ + rm -rf ${HADOOP_HOME}/share/hadoop/client/* && \ + find ${HADOOP_HOME}/share/hadoop -type d \( \ + -name "jdiff" -o \ + -name "test" -o \ + -name "examples" \ + \) -exec rm -rf {} + && \ + rm -rf ${HADOOP_HOME}/share/doc && \ + rm -rf ${HADOOP_HOME}/share/hadoop/hdfs/webapps && \ + find ${HADOOP_HOME}/share/hadoop -type d -name "webapps" -exec rm -rf {} + && \ + echo "Verify what remains" && \ + du -sh ${HADOOP_HOME}/share/hadoop/* || true && \ + echo "Hadoop cleanup completed." RUN rm -f ${HIVE_HOME}/lib/postgresql-*.jar && \ curl -sL https://jdbc.postgresql.org/download/postgresql-${JDBC_VERSION}.jar -o /opt/apache-hive-metastore-${METASTORE_VERSION}-bin/lib/postgresql-${JDBC_VERSION}.jar @@ -71,55 +88,43 @@ RUN curl -sLO https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_ja # Download and install GCS connector and Google Cloud Storage dependencies RUN mkdir -p ${HIVE_HOME}/lib/ && \ - curl -sLO "https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/${GCS_CONNECTOR_VERSION}/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" && \ mv gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar ${HIVE_HOME}/lib/ && \ - curl -sLO "https://repo1.maven.org/maven2/com/google/cloud/google-cloud-storage/${GOOGLE_CLOUD_STORAGE_VERSION}/google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar" && \ mv google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar ${HIVE_HOME}/lib/ && \ - curl -sLO "https://repo1.maven.org/maven2/com/google/auth/google-auth-library-oauth2-http/${GOOGLE_AUTH_LIBRARY_VERSION}/google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar" && \ mv google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar ${HIVE_HOME}/lib/ && \ - curl -sLO "https://repo1.maven.org/maven2/com/google/http-client/google-http-client-jackson2/${GOOGLE_HTTP_CLIENT_VERSION}/google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar" && \ mv google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar ${HIVE_HOME}/lib/ && \ - curl -sLO "https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop3.jar" && \ mv gcs-connector-latest-hadoop3.jar ${HIVE_HOME}/lib/ && \ - chmod 644 ${HIVE_HOME}/lib/*.jar && \ - export GCS_CONNECTOR_JAR_HIVE="${HIVE_HOME}/lib/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" && \ - export GCS_CONNECTOR_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" &&\ - + export GCS_CONNECTOR_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" && \ export GCS_CLIENT_HTTP_JAR_HIVE="${HIVE_HOME}/lib/google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar" && \ - export GCS_CLIENT_HTTP_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar" &&\ - + export GCS_CLIENT_HTTP_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar" && \ export GCS_CLOUD_STORAGE_JAR_HIVE="${HIVE_HOME}/lib/google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar" && \ - export GCS_CLOUD_STORAGE_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar" &&\ - + export GCS_CLOUD_STORAGE_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar" && \ export GCS_OAUTH_JAR_HIVE="${HIVE_HOME}/lib/google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar" && \ - export GCS_OAUTH_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar" &&\ - + export GCS_OAUTH_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar" && \ export GCS_HADOOP_CONNECTOR_JAR_HIVE="${HIVE_HOME}/lib/gcs-connector-latest-hadoop3.jar" && \ - export GCS_HADOOP_CONNECTOR_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-latest-hadoop3.jar" &&\ - - cp "${GCS_CONNECTOR_JAR_HIVE}" "${GCS_CONNECTOR_JAR_HADOOP}" &&\ - cp "${GCS_CLIENT_HTTP_JAR_HIVE}" "${GCS_CLIENT_HTTP_JAR_HADOOP}" &&\ - cp "${GCS_CLOUD_STORAGE_JAR_HIVE}" "${GCS_CLOUD_STORAGE_JAR_HADOOP}" &&\ - cp "${GCS_OAUTH_JAR_HIVE}" "${GCS_OAUTH_JAR_HADOOP}" &&\ - cp "${GCS_HADOOP_CONNECTOR_JAR_HIVE}" "${GCS_HADOOP_CONNECTOR_JAR_HADOOP}" &&\ - - chown ubuntu:ubuntu "${GCS_CONNECTOR_JAR_HADOOP}" &&\ - chown ubuntu:ubuntu "${GCS_CLIENT_HTTP_JAR_HADOOP}" &&\ - chown ubuntu:ubuntu "${GCS_CLOUD_STORAGE_JAR_HADOOP}" &&\ - chown ubuntu:ubuntu "${GCS_OAUTH_JAR_HADOOP}" &&\ - chown ubuntu:ubuntu "${GCS_HADOOP_CONNECTOR_JAR_HADOOP}" &&\ - + export GCS_HADOOP_CONNECTOR_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-latest-hadoop3.jar" && \ + cp "${GCS_CONNECTOR_JAR_HIVE}" "${GCS_CONNECTOR_JAR_HADOOP}" && \ + cp "${GCS_CLIENT_HTTP_JAR_HIVE}" "${GCS_CLIENT_HTTP_JAR_HADOOP}" && \ + cp "${GCS_CLOUD_STORAGE_JAR_HIVE}" "${GCS_CLOUD_STORAGE_JAR_HADOOP}" && \ + cp "${GCS_OAUTH_JAR_HIVE}" "${GCS_OAUTH_JAR_HADOOP}" && \ + cp "${GCS_HADOOP_CONNECTOR_JAR_HIVE}" "${GCS_HADOOP_CONNECTOR_JAR_HADOOP}" && \ + chown ubuntu:ubuntu "${GCS_CONNECTOR_JAR_HADOOP}" && \ + chown ubuntu:ubuntu "${GCS_CLIENT_HTTP_JAR_HADOOP}" && \ + chown ubuntu:ubuntu "${GCS_CLOUD_STORAGE_JAR_HADOOP}" && \ + chown ubuntu:ubuntu "${GCS_OAUTH_JAR_HADOOP}" && \ + chown ubuntu:ubuntu "${GCS_HADOOP_CONNECTOR_JAR_HADOOP}" && \ export GCS_CONNECTOR_JAR="${GCS_CONNECTOR_JAR_HADOOP}" -# Ensure GCS connector is in the classpath +# Ensure both GCS and AWS connectors are on the Hadoop classpath RUN echo "export HADOOP_CLASSPATH=\${HADOOP_CLASSPATH}:${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" \ + >> ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh && \ + echo "export HADOOP_CLASSPATH=\${HADOOP_CLASSPATH}:${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-bundle-*.jar:${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}.jar" \ >> ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh RUN sed -i '/<\/configuration>/i \ diff --git a/docker/metastore.sh b/docker/metastore.sh index 6276b16..a52a5ba 100755 --- a/docker/metastore.sh +++ b/docker/metastore.sh @@ -293,7 +293,7 @@ if [ "$MODE" = "init" ]; then if [ $? -ne 0 ]; then echo "Will initialize the DB"; ${BASEDIR}/apache-hive-metastore-${METASTORE_VERSION}-bin/bin/schematool -initSchema -dbType ${DB_DRIVER_NAME} -userName ${HIVEMS_USER} -passWord ${HIVEMS_PASSWORD} -url "jdbc:mysql://${DB_HOST}:${DB_PORT}/${HIVEMS_DB}?createDatabaseIfNotExist=true&connectTimeout=1000"; fi else psql --host=${DB_HOST} --port=${DB_PORT} -U ${HIVEMS_USER} -d ${HIVEMS_DB} -c 'SELECT "DB_ID" FROM "DBS"' >/dev/null 2>&1; - if [ $? -ne 0 ]; then echo "Will initialize the DB"; ${BASEDIR}/apache-hive-metastore-${METASTORE_VERSION}-bin/bin/schematool -initSchema -dbType ${DB_DRIVER_NAME}; fi + if [ $? -ne 0 ]; then echo "Will initialize the DB"; ${BASEDIR}/apache-hive-metastore-${METASTORE_VERSION}-bin/bin/schematool -initSchema -dbType postgres; fi fi echo "DATABASE SCHEMA SHOULD BE OK NOW!!" exit 0 diff --git a/helm/hive-metastore/Chart.yaml b/helm/hive-metastore/Chart.yaml index 73a55ed..5284b3b 100644 --- a/helm/hive-metastore/Chart.yaml +++ b/helm/hive-metastore/Chart.yaml @@ -1,7 +1,7 @@ # File generated by Makefile apiVersion: v2 name: hive-metastore -version: 3.1.3-1.3.0 +version: 3.1.3-1.3.1 sources: - https://github.com/okdp/hive-metastore appVersion: 3.1.3 diff --git a/helm/hive-metastore/README.md b/helm/hive-metastore/README.md index 51fd62c..fd5d369 100644 --- a/helm/hive-metastore/README.md +++ b/helm/hive-metastore/README.md @@ -35,13 +35,13 @@ This chart bootstraps a [Hive Metastore](https://cwiki.apache.org/confluence/dis To install the chart with the release name `my-release`: ```shell -$ helm install my-release oci://quay.io/okdp/charts/hive-metastore --version 3.1.3-1.3.0 +$ helm install my-release oci://quay.io/okdp/charts/hive-metastore --version 3.1.3-1.3.1 ``` This will create a release of `my-release` in the default namespace. To install in a different namespace: ```shell -$ helm install my-release oci://quay.io/okdp/charts/hive-metastore --version 3.1.3-1.3.0 \ +$ helm install my-release oci://quay.io/okdp/charts/hive-metastore --version 3.1.3-1.3.1 \ --namespace hive-metastore ``` @@ -62,7 +62,7 @@ The command removes all the Kubernetes components associated with the chart and To download the chart locally, use the following command: ```shell -$ helm pull oci://quay.io/okdp/charts/hive-metastore --version 3.1.3-1.3.0 +$ helm pull oci://quay.io/okdp/charts/hive-metastore --version 3.1.3-1.3.1 ``` ## Values @@ -76,7 +76,7 @@ $ helm pull oci://quay.io/okdp/charts/hive-metastore --version 3.1.3-1.3.0
+[]
+
+