Skip to content

ATLAS-5021: Extract Metadata from Trino periodically #336

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
442 changes: 442 additions & 0 deletions addons/models/6000-Trino/6000-trino_model.json

Large diffs are not rendered by default.

93 changes: 93 additions & 0 deletions addons/trino-extractor/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.apache.atlas</groupId>
<artifactId>apache-atlas</artifactId>
<version>3.0.0-SNAPSHOT</version>
<relativePath>../../</relativePath>
</parent>

<artifactId>atlas-trino-extractor</artifactId>
<packaging>jar</packaging>

<name>Apache Atlas Trino Bridge</name>
<description>Apache Atlas Trino Bridge Module</description>

<dependencies>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-client</artifactId>
<version>1.9</version>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-jdbc</artifactId>
<version>403</version>
<!-- java8 supported version -->
</dependency>
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-client-v2</artifactId>
</dependency>
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-intg</artifactId>
</dependency>
<dependency>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz</artifactId>
<version>2.3.2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.30</version>
</dependency>

</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies</id>
<goals>
<goal>copy-dependencies</goal>
</goals>
<phase>package</phase>
<configuration>
<excludeScope>test</excludeScope>
<includeScope>compile</includeScope>
<outputDirectory>${project.build.directory}/dependency/trino</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
138 changes: 138 additions & 0 deletions addons/trino-extractor/src/main/bin/run-trino-extractor.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. See accompanying LICENSE file.
#
# resolve links - $0 may be a softlink
PRG="${0}"

[[ `uname -s` == *"CYGWIN"* ]] && CYGWIN=true

while [ -h "${PRG}" ]; do
ls=`ls -ld "${PRG}"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "${PRG}"`/"$link"
fi
done

BASEDIR=`dirname ${PRG}`
BASEDIR=`cd ${BASEDIR}/..;pwd`

if test -z "${JAVA_HOME}"
then
JAVA_BIN=`which java`
JAR_BIN=`which jar`
else
JAVA_BIN="${JAVA_HOME}/bin/java"
JAR_BIN="${JAVA_HOME}/bin/jar"
fi
export JAVA_BIN

if [ ! -e "${JAVA_BIN}" ] || [ ! -e "${JAR_BIN}" ]; then
echo "$JAVA_BIN and/or $JAR_BIN not found on the system. Please make sure java and jar commands are available."
exit 1
fi

# Construct Atlas classpath using jars from hook/hive/atlas-hive-plugin-impl/ directory.
for i in "${BASEDIR}/lib/"*.jar; do
ATLASCPPATH="${ATLASCPPATH}:$i"
done

if [ -z "${ATLAS_CONF_DIR}" ] && [ -e "${BASEDIR}/conf/" ];then
ATLAS_CONF_DIR="${BASEDIR}/conf/"
fi
ATLASCPPATH=${ATLASCPPATH}:${ATLAS_CONF_DIR}

# log dir for applications
ATLAS_LOG_DIR="${BASEDIR}/log"
export ATLAS_LOG_DIR
LOGFILE="$ATLAS_LOG_DIR/atlas-trino-extractor.log"

TIME=`date +%Y%m%d%H%M%s`

CP="${ATLASCPPATH}"

# If running in cygwin, convert pathnames and classpath to Windows format.
if [ "${CYGWIN}" == "true" ]
then
ATLAS_LOG_DIR=`cygpath -w ${ATLAS_LOG_DIR}`
LOGFILE=`cygpath -w ${LOGFILE}`
HIVE_CP=`cygpath -w ${HIVE_CP}`
HADOOP_CP=`cygpath -w ${HADOOP_CP}`
CP=`cygpath -w -p ${CP}`
fi

JAVA_PROPERTIES="$ATLAS_OPTS -Datlas.log.dir=$ATLAS_LOG_DIR -Datlas.log.file=atlas-trino-extractor.log
-Dlog4j.configuration=atlas-log4j.xml -Djdk.httpclient.HttpClient.log=requests -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5006"

IMPORT_ARGS=()
JVM_ARGS=

set -f
while true
do
option=${1}
shift

case "${option}" in
-c) IMPORT_ARGS+=("-c" "$1"); shift;;
-s) IMPORT_ARGS+=("-s" "$1"); shift;;
-t) IMPORT_ARGS+=("-t" "$1"); shift;;
-cx)
CRON_EXPR="$1"
shift
while [[ "$1" != "" && "$1" != -* ]]; do
CRON_EXPR="$CRON_EXPR $1"
shift
done
IMPORT_ARGS+=("-cx" "$CRON_EXPR");;
-h) export HELP_OPTION="true"; IMPORT_ARGS+=("-h");;
--catalog) IMPORT_ARGS+=("--catalog" "$1"); shift;;
--table) IMPORT_ARGS+=("--table" "$1"); shift;;
--schema) IMPORT_ARGS+=("--schema" "$1"); shift;;
--cronExpression)
CRON_EXPR="$1"
shift
while [[ "$1" != "" && "$1" != -* ]]; do
CRON_EXPR="$CRON_EXPR $1"
shift
done
IMPORT_ARGS+=("--cronExpression" "$CRON_EXPR");;
--help) export HELP_OPTION="true"; IMPORT_ARGS+=("--help");;
-*)
echo "Invalid argument found"
export HELP_OPTION="true"; IMPORT_ARGS+=("--help")
break;;
"") break;;
esac
done

JAVA_PROPERTIES="${JAVA_PROPERTIES} ${JVM_ARGS}"

if [ -z ${HELP_OPTION} ]; then
echo "Log file for import is $LOGFILE"
fi

"${JAVA_BIN}" ${JAVA_PROPERTIES} -cp "${CP}" org.apache.atlas.trino.cli.TrinoExtractor "${IMPORT_ARGS[@]}"

set +f

RETVAL=$?
if [ -z ${HELP_OPTION} ]; then
[ $RETVAL -eq 0 ] && echo Trino Meta Data imported successfully!
[ $RETVAL -eq 1 ] && echo Failed to import Trino Meta Data! Check logs at: $LOGFILE for details.
fi

exit $RETVAL
32 changes: 32 additions & 0 deletions addons/trino-extractor/src/main/conf/atlas-application.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
######## Atlas connection ############
atlas.rest.address=http://localhost:21000/

######## Trino connection ############
atlas.trino.jdbc.address=jdbc:trino://<host>:<port>/
atlas.trino.jdbc.user=<username>

######## Trino environment name ######
atlas.trino.namespace=cm
#atlas.trino.catalogs.registered=

######## Datasource for which ########
######## Atlas hook is enabled #######
#atlas.trino.catalog.hook.enabled.hive_catalog=true
#atlas.trino.catalog.hook.enabled.hive_catalog.namespace=cm
42 changes: 42 additions & 0 deletions addons/trino-extractor/src/main/conf/atlas-log4j.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->

<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">

<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="FILE" class="org.apache.log4j.RollingFileAppender">
<param name="File" value="${atlas.log.dir}/${atlas.log.file}"/>
<param name="Append" value="true"/>
<param name="maxFileSize" value="100MB" />
<param name="maxBackupIndex" value="20" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d %-5p - [%t:%x] ~ %m (%C{1}:%L)%n"/>
</layout>
</appender>

<logger name="org.apache.atlas.trino" additivity="false">
<level value="info"/>
<appender-ref ref="FILE"/>
</logger>

<root>
<priority value="warn"/>
<appender-ref ref="FILE"/>
</root>
</log4j:configuration>
Loading
Loading