0 0 spark windows jupyter - smart1004/ReadTheDocs GitHub Wiki

Apache Spark๋กœ ์‹œ์ž‘ํ•˜๋Š” ๋จธ์‹ ๋Ÿฌ๋‹ ์ž…๋ฌธ
https://www.youtube.com/watch?v=PRLz11vv7VA

pip install toree
https://toree.apache.org/docs/current/user/installation/

pip install toree

jupyter toree install --spark_home=C:\java\spark\spark-2.2.3-bin-hadoop2.7
/usr/local/bin/apache-spark/

[ToreeInstall] Creating kernel Scala [ToreeInstall] Removing existing kernelspec in C:\ProgramData\jupyter\kernels\apache_toree_scala [ToreeInstall] Installed kernelspec apache_toree_scala in C:\ProgramData\jupyter\kernels\apache_toree_scala


C:\ProgramData\jupyter\kernels\apache_toree_scala\kernel.json

"argv": [ "C:\ProgramData\jupyter\kernels\apache_toree_scala\bin\run.cmd",

#run.cmd
#์•„๋ž˜ ์‚ฌ์ดํŠธ๋ฅผ ์ฐธ๊ณ ํ–ˆ๋‹ค #https://stackoverflow.com/questions/39486351/work-with-jupyter-on-windows-and-apache-toree-kernel-for-spark-compatibility
@echo off

set PROG_HOME=%~dp0..

if not defined SPARK_HOME ( echo SPARK_HOME must be set to the location of a Spark distribution! exit 1 )

REM disable randomized hash for string in Python 3.3+ set PYTHONHASHSEED=0

REM The SPARK_OPTS values during installation are stored in TOREE_SPARK_OPTS. This allows values to be specified during REM install, but also during runtime. The runtime options take precedence over the install options.

if not defined SPARK_OPTS ( set SPARK_OPTS=%TOREE_SPARK_OPTS% ) else ( if "%SPARK_OPTS%" == "" ( set SPARK_OPTS=%TOREE_SPARK_OPTS% ) )

if not defined TOREE_OPTS ( set TOREE_OPTS=%TOREE_OPTS% ) else ( if "%TOREE_OPTS%" == "" ( set TOREE_OPTS=%TOREE_OPTS% ) )

echo Starting Spark Kernel with SPARK_HOME=%SPARK_HOME%

REM This doesn't work because the classpath doesn't get set properly, REM unless you hardcode it in SPARK_SUBMIT_OPTS using forward slashes or double backslashes, REM but then you can't use the SPARK_HOME and PROG_HOME variables. set SPARK_SUBMIT_OPTS=-cp "%SPARK_HOME%\conf;%SPARK_HOME%\jars*;C:\ProgramData\jupyter\kernels\apache_toree_scala\lib\toree-assembly-0.3.0-incubating.jar" -Dscala.usejavacp=true REM set TOREE_COMMAND="%SPARK_HOME%\bin\spark-submit.cmd" %SPARK_OPTS% --class org.apache.toree.Main %PROG_HOME%\lib\toree-assembly-0.3.0-incubating.jar %TOREE_OPTS% %*

REM The two important things that we must do differently on Windows are that we must add toree-assembly-0.3.0-incubating.jar to the classpath, and we must define the java property scala.usejavacp=true. set TOREE_COMMAND="%JAVA_HOME%\bin\java" -cp "%SPARK_HOME%\conf;%SPARK_HOME%\jars*;C:\ProgramData\jupyter\kernels\apache_toree_scala\lib\toree-assembly-0.3.0-incubating.jar" -Dscala.usejavacp=true -Xmx1g org.apache.spark.deploy.SparkSubmit %SPARK_OPTS% --class org.apache.toree.Main %PROG_HOME%\lib\toree-assembly-0.3.0-incubating.jar %TOREE_OPTS% %*

echo. echo %TOREE_COMMAND% echo.

%TOREE_COMMAND%