|
|
第11行: |
第11行: |
| | | |
| ./jupyter notebook --allow-root | | ./jupyter notebook --allow-root |
− |
| |
− | ===安装findspark===
| |
− | pip3 install findspark
| |
− |
| |
− | ===设置环境变量===
| |
− | vi /etc/profile
| |
− |
| |
− | export SPARK_HOME=/root/wmtools/spark-2.4.8-bin-hadoop2.7
| |
− |
| |
− | source /etc/profile
| |
− |
| |
− | ===运行Spark代码===
| |
− | python3 demo20.py
| |
− |
| |
− | <nowiki>#pip install findspark
| |
− | #fix:ModuleNotFoundError: No module named 'pyspark'
| |
− | import findspark
| |
− | findspark.init()
| |
− |
| |
− | #############################
| |
− | from pyspark import SparkConf, SparkContext
| |
− |
| |
− | # 创建SparkContext
| |
− | conf = SparkConf().setAppName("WordCount").setMaster("local[*]")
| |
− | sc = SparkContext(conf=conf)
| |
− |
| |
− | rdd = sc.parallelize(["hello world","hello spark"]);
| |
− | rdd2 = rdd.flatMap(lambda line:line.split(" "));
| |
− | rdd3 = rdd2.map(lambda word:(word,1));
| |
− | rdd5 = rdd3.reduceByKey(lambda a, b : a + b);
| |
− | #print,否则无法显示结果
| |
− | #[('spark', 1), ('hello', 2), ('world', 1)]
| |
− | print(rdd5.collect());
| |
− | #防止多次创建SparkContexts
| |
− | sc.stop()
| |
− | </nowiki>
| |
2021年6月30日 (三) 03:24的最新版本
安装jupyter
pip3 install jupyter -i https://pypi.mirrors.ustc.edu.cn/simple/
[root@localhost ~]# find / -name \jupyter
/usr/local/Python3/bin/jupyter
/usr/local/Python3/share/jupyter
/usr/local/Python3/etc/jupyter
cd /usr/local/Python3/bin
./jupyter notebook --allow-root