Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 38 additions & 33 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ RUN apt-get update && \
sudo \
curl \
mysql-client=${MYSQL_CLIENT_VERSION} && \
apt-get clean -y
apt-get clean -y && \
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives

WORKDIR /opt

Expand All @@ -45,8 +46,24 @@ RUN if echo $METASTORE_VERSION | grep -E '^3\.' > /dev/null; then \
fi

# download and install hadoop and fix the (>= ubuntu jammy) distribution executable bug
RUN curl -L https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \
sed -i 's|if \[\[ ! -x "\$JAVA" \]\]; then|if [ \$("$JAVA" -version) ]; then|' ${HADOOP_HOME}/libexec/hadoop-functions.sh
RUN set -eux; \
curl -L https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \
sed -i 's|if \[\[ ! -x "\$JAVA" \]\]; then|if [ \$("$JAVA" -version) ]; then|' ${HADOOP_HOME}/libexec/hadoop-functions.sh && \
echo "Cleaning up unnecessary Hadoop components for Hive Metastore..." && \
rm -rf ${HADOOP_HOME}/share/hadoop/yarn/* && \
rm -rf ${HADOOP_HOME}/share/hadoop/mapreduce/* && \
rm -rf ${HADOOP_HOME}/share/hadoop/client/* && \
find ${HADOOP_HOME}/share/hadoop -type d \( \
-name "jdiff" -o \
-name "test" -o \
-name "examples" \
\) -exec rm -rf {} + && \
rm -rf ${HADOOP_HOME}/share/doc && \
rm -rf ${HADOOP_HOME}/share/hadoop/hdfs/webapps && \
find ${HADOOP_HOME}/share/hadoop -type d -name "webapps" -exec rm -rf {} + && \
echo "Verify what remains" && \
du -sh ${HADOOP_HOME}/share/hadoop/* || true && \
echo "Hadoop cleanup completed."

RUN rm -f ${HIVE_HOME}/lib/postgresql-*.jar && \
curl -sL https://jdbc.postgresql.org/download/postgresql-${JDBC_VERSION}.jar -o /opt/apache-hive-metastore-${METASTORE_VERSION}-bin/lib/postgresql-${JDBC_VERSION}.jar
Expand All @@ -71,55 +88,43 @@ RUN curl -sLO https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_ja

# Download and install GCS connector and Google Cloud Storage dependencies
RUN mkdir -p ${HIVE_HOME}/lib/ && \

curl -sLO "https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/${GCS_CONNECTOR_VERSION}/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" && \
mv gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar ${HIVE_HOME}/lib/ && \

curl -sLO "https://repo1.maven.org/maven2/com/google/cloud/google-cloud-storage/${GOOGLE_CLOUD_STORAGE_VERSION}/google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar" && \
mv google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar ${HIVE_HOME}/lib/ && \

curl -sLO "https://repo1.maven.org/maven2/com/google/auth/google-auth-library-oauth2-http/${GOOGLE_AUTH_LIBRARY_VERSION}/google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar" && \
mv google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar ${HIVE_HOME}/lib/ && \

curl -sLO "https://repo1.maven.org/maven2/com/google/http-client/google-http-client-jackson2/${GOOGLE_HTTP_CLIENT_VERSION}/google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar" && \
mv google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar ${HIVE_HOME}/lib/ && \

curl -sLO "https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop3.jar" && \
mv gcs-connector-latest-hadoop3.jar ${HIVE_HOME}/lib/ && \

chmod 644 ${HIVE_HOME}/lib/*.jar && \

export GCS_CONNECTOR_JAR_HIVE="${HIVE_HOME}/lib/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" && \
export GCS_CONNECTOR_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" &&\

export GCS_CONNECTOR_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" && \
export GCS_CLIENT_HTTP_JAR_HIVE="${HIVE_HOME}/lib/google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar" && \
export GCS_CLIENT_HTTP_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar" &&\

export GCS_CLIENT_HTTP_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-http-client-jackson2-${GOOGLE_HTTP_CLIENT_VERSION}.jar" && \
export GCS_CLOUD_STORAGE_JAR_HIVE="${HIVE_HOME}/lib/google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar" && \
export GCS_CLOUD_STORAGE_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar" &&\

export GCS_CLOUD_STORAGE_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-cloud-storage-${GOOGLE_CLOUD_STORAGE_VERSION}.jar" && \
export GCS_OAUTH_JAR_HIVE="${HIVE_HOME}/lib/google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar" && \
export GCS_OAUTH_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar" &&\

export GCS_OAUTH_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/google-auth-library-oauth2-http-${GOOGLE_AUTH_LIBRARY_VERSION}.jar" && \
export GCS_HADOOP_CONNECTOR_JAR_HIVE="${HIVE_HOME}/lib/gcs-connector-latest-hadoop3.jar" && \
export GCS_HADOOP_CONNECTOR_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-latest-hadoop3.jar" &&\

cp "${GCS_CONNECTOR_JAR_HIVE}" "${GCS_CONNECTOR_JAR_HADOOP}" &&\
cp "${GCS_CLIENT_HTTP_JAR_HIVE}" "${GCS_CLIENT_HTTP_JAR_HADOOP}" &&\
cp "${GCS_CLOUD_STORAGE_JAR_HIVE}" "${GCS_CLOUD_STORAGE_JAR_HADOOP}" &&\
cp "${GCS_OAUTH_JAR_HIVE}" "${GCS_OAUTH_JAR_HADOOP}" &&\
cp "${GCS_HADOOP_CONNECTOR_JAR_HIVE}" "${GCS_HADOOP_CONNECTOR_JAR_HADOOP}" &&\

chown ubuntu:ubuntu "${GCS_CONNECTOR_JAR_HADOOP}" &&\
chown ubuntu:ubuntu "${GCS_CLIENT_HTTP_JAR_HADOOP}" &&\
chown ubuntu:ubuntu "${GCS_CLOUD_STORAGE_JAR_HADOOP}" &&\
chown ubuntu:ubuntu "${GCS_OAUTH_JAR_HADOOP}" &&\
chown ubuntu:ubuntu "${GCS_HADOOP_CONNECTOR_JAR_HADOOP}" &&\

export GCS_HADOOP_CONNECTOR_JAR_HADOOP="${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-latest-hadoop3.jar" && \
cp "${GCS_CONNECTOR_JAR_HIVE}" "${GCS_CONNECTOR_JAR_HADOOP}" && \
cp "${GCS_CLIENT_HTTP_JAR_HIVE}" "${GCS_CLIENT_HTTP_JAR_HADOOP}" && \
cp "${GCS_CLOUD_STORAGE_JAR_HIVE}" "${GCS_CLOUD_STORAGE_JAR_HADOOP}" && \
cp "${GCS_OAUTH_JAR_HIVE}" "${GCS_OAUTH_JAR_HADOOP}" && \
cp "${GCS_HADOOP_CONNECTOR_JAR_HIVE}" "${GCS_HADOOP_CONNECTOR_JAR_HADOOP}" && \
chown ubuntu:ubuntu "${GCS_CONNECTOR_JAR_HADOOP}" && \
chown ubuntu:ubuntu "${GCS_CLIENT_HTTP_JAR_HADOOP}" && \
chown ubuntu:ubuntu "${GCS_CLOUD_STORAGE_JAR_HADOOP}" && \
chown ubuntu:ubuntu "${GCS_OAUTH_JAR_HADOOP}" && \
chown ubuntu:ubuntu "${GCS_HADOOP_CONNECTOR_JAR_HADOOP}" && \
export GCS_CONNECTOR_JAR="${GCS_CONNECTOR_JAR_HADOOP}"

# Ensure GCS connector is in the classpath
# Ensure both GCS and AWS connectors are on the Hadoop classpath
RUN echo "export HADOOP_CLASSPATH=\${HADOOP_CLASSPATH}:${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-${GCS_CONNECTOR_VERSION}-sources.jar" \
>> ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh && \
echo "export HADOOP_CLASSPATH=\${HADOOP_CLASSPATH}:${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-bundle-*.jar:${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}.jar" \
>> ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh

RUN sed -i '/<\/configuration>/i \
Expand Down
2 changes: 1 addition & 1 deletion docker/metastore.sh
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ if [ "$MODE" = "init" ]; then
if [ $? -ne 0 ]; then echo "Will initialize the DB"; ${BASEDIR}/apache-hive-metastore-${METASTORE_VERSION}-bin/bin/schematool -initSchema -dbType ${DB_DRIVER_NAME} -userName ${HIVEMS_USER} -passWord ${HIVEMS_PASSWORD} -url "jdbc:mysql://${DB_HOST}:${DB_PORT}/${HIVEMS_DB}?createDatabaseIfNotExist=true&connectTimeout=1000"; fi
else
psql --host=${DB_HOST} --port=${DB_PORT} -U ${HIVEMS_USER} -d ${HIVEMS_DB} -c 'SELECT "DB_ID" FROM "DBS"' >/dev/null 2>&1;
if [ $? -ne 0 ]; then echo "Will initialize the DB"; ${BASEDIR}/apache-hive-metastore-${METASTORE_VERSION}-bin/bin/schematool -initSchema -dbType ${DB_DRIVER_NAME}; fi
if [ $? -ne 0 ]; then echo "Will initialize the DB"; ${BASEDIR}/apache-hive-metastore-${METASTORE_VERSION}-bin/bin/schematool -initSchema -dbType postgres; fi
fi
echo "DATABASE SCHEMA SHOULD BE OK NOW!!"
exit 0
Expand Down
2 changes: 1 addition & 1 deletion helm/hive-metastore/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# File generated by Makefile
apiVersion: v2
name: hive-metastore
version: 3.1.3-1.3.0
version: 3.1.3-1.3.1
sources:
- https://github.com/okdp/hive-metastore
appVersion: 3.1.3
Expand Down
Loading
Loading