Skip to content

Commit d113a60

Browse files
authored
Merge pull request #1198 from romainx/fix_spark_java11
Fix spark installation for Java 11 and Arrow
2 parents a0a544e + 1dd95ba commit d113a60

File tree

3 files changed

+79
-1
lines changed

3 files changed

+79
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"This is a test for the issue [#1168](https://github.com/jupyter/docker-stacks/issues/1168)"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 6,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"from pyspark.sql import SparkSession\n",
17+
"from pyspark.sql.functions import pandas_udf\n",
18+
"\n",
19+
"# Spark session & context\n",
20+
"spark = SparkSession.builder.master('local').getOrCreate()"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": 7,
26+
"metadata": {},
27+
"outputs": [
28+
{
29+
"name": "stdout",
30+
"output_type": "stream",
31+
"text": [
32+
"+---+---+\n",
33+
"| id|age|\n",
34+
"+---+---+\n",
35+
"| 1| 21|\n",
36+
"+---+---+\n",
37+
"\n"
38+
]
39+
}
40+
],
41+
"source": [
42+
"df = spark.createDataFrame([(1, 21), (2, 30)], (\"id\", \"age\"))\n",
43+
"def filter_func(iterator):\n",
44+
" for pdf in iterator:\n",
45+
" yield pdf[pdf.id == 1]\n",
46+
"\n",
47+
"df.mapInPandas(filter_func, df.schema).show()"
48+
]
49+
}
50+
],
51+
"metadata": {
52+
"kernelspec": {
53+
"display_name": "Python 3",
54+
"language": "python",
55+
"name": "python3"
56+
},
57+
"language_info": {
58+
"codemirror_mode": {
59+
"name": "ipython",
60+
"version": 3
61+
},
62+
"file_extension": ".py",
63+
"mimetype": "text/x-python",
64+
"name": "python",
65+
"nbconvert_exporter": "python",
66+
"pygments_lexer": "ipython3",
67+
"version": "3.8.6"
68+
}
69+
},
70+
"nbformat": 4,
71+
"nbformat_minor": 4
72+
}

all-spark-notebook/test/test_spark_notebooks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
@pytest.mark.parametrize(
1313
"test_file",
1414
# TODO: add local_sparklyr
15-
["local_pyspark", "local_spylon", "local_sparkR"],
15+
["local_pyspark", "local_spylon", "local_sparkR", "issue_1168"],
1616
)
1717
def test_nbconvert(container, test_file):
1818
"""Check if Spark notebooks can be executed"""

pyspark-notebook/Dockerfile

+6
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" spark && \
4949
mkdir -p /usr/local/bin/before-notebook.d && \
5050
ln -s "${SPARK_HOME}/sbin/spark-config.sh" /usr/local/bin/before-notebook.d/spark-config.sh
5151

52+
# Fix Spark installation for Java 11 and Apache Arrow library
53+
# see: https://github.com/apache/spark/pull/27356, https://spark.apache.org/docs/latest/#downloading
54+
RUN cp -p "$SPARK_HOME/conf/spark-defaults.conf.template" "$SPARK_HOME/conf/spark-defaults.conf" && \
55+
echo 'spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf && \
56+
echo 'spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
57+
5258
USER $NB_UID
5359

5460
# Install pyarrow

0 commit comments

Comments
 (0)