mirror of
https://github.com/Vale54321/BigData.git
synced 2025-12-11 09:59:33 +01:00
Merge branch 'main' of https://github.com/Vale54321/BigData
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
from sparkstart import scon, spark
|
||||
|
||||
def a(scon, spark, path):
|
||||
rdd = scon.textFile(path)
|
||||
return rdd
|
||||
@@ -49,11 +51,11 @@ def e(scon,spark, path, top_n=20):
|
||||
return d(rdd, top_n)
|
||||
|
||||
def main(scon, spark):
|
||||
"""
|
||||
main(scon, spark)
|
||||
"""
|
||||
rdd = a(scon, spark, "/data/texte/test/robinsonCrusoe.txt")
|
||||
b(rdd)
|
||||
c(rdd)
|
||||
d(rdd)
|
||||
e(scon, spark, "/data/texte/test/DonQuijote.txt")
|
||||
e(scon, spark, "/data/texte/test/DonQuijote.txt")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(scon, spark)
|
||||
22
Aufgabe 6/sparkstart.py
Normal file
22
Aufgabe 6/sparkstart.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Erzeugen einer Spark-Konfiguration
|
||||
"""
|
||||
|
||||
from pyspark import SparkConf, SparkContext
|
||||
from pyspark.sql import SparkSession
|
||||
|
||||
# connect to cluster
|
||||
conf = SparkConf().setMaster("spark://193.174.205.250:7077").setAppName("HeisererValentin")
|
||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
|
||||
conf.set("spark.executor.memory", '32g')
|
||||
conf.set("spark.driver.memory", '8g')
|
||||
conf.set("spark.cores.max", "40")
|
||||
scon = SparkContext(conf=conf)
|
||||
|
||||
|
||||
spark = SparkSession \
|
||||
.builder \
|
||||
.appName("Python Spark SQL") \
|
||||
.getOrCreate()
|
||||
Reference in New Issue
Block a user