PySpark实战:Jupyter Notebook安装
来自CloudWiki
安装jupyter
pip3 install jupyter -i https://pypi.mirrors.ustc.edu.cn/simple/
[root@localhost ~]# find / -name \jupyter
/usr/local/Python3/bin/jupyter /usr/local/Python3/share/jupyter /usr/local/Python3/etc/jupyter
cd /usr/local/Python3/bin
./jupyter notebook --allow-root
安装findspark
pip3 install findspark
设置环境变量
vi /etc/profile
export SPARK_HOME=/root/wmtools/spark-2.4.8-bin-hadoop2.7
source /etc/profile
运行Spark代码
python3 demo20.py
#pip install findspark #fix:ModuleNotFoundError: No module named 'pyspark' import findspark findspark.init() ############################# from pyspark import SparkConf, SparkContext # 创建SparkContext conf = SparkConf().setAppName("WordCount").setMaster("local[*]") sc = SparkContext(conf=conf) rdd = sc.parallelize(["hello world","hello spark"]); rdd2 = rdd.flatMap(lambda line:line.split(" ")); rdd3 = rdd2.map(lambda word:(word,1)); rdd5 = rdd3.reduceByKey(lambda a, b : a + b); #print,否则无法显示结果 #[('spark', 1), ('hello', 2), ('world', 1)] print(rdd5.collect()); #防止多次创建SparkContexts sc.stop()