This commit is contained in:
2025-11-14 10:01:38 +01:00
6 changed files with 273 additions and 124 deletions

View File

@@ -1,3 +1,5 @@
from sparkstart import scon, spark
def a(scon, spark, path):
rdd = scon.textFile(path)
return rdd
@@ -49,11 +51,11 @@ def e(scon,spark, path, top_n=20):
return d(rdd, top_n)
def main(scon, spark):
"""
main(scon, spark)
"""
rdd = a(scon, spark, "/data/texte/test/robinsonCrusoe.txt")
b(rdd)
c(rdd)
d(rdd)
e(scon, spark, "/data/texte/test/DonQuijote.txt")
e(scon, spark, "/data/texte/test/DonQuijote.txt")
if __name__ == "__main__":
main(scon, spark)

22
Aufgabe 6/sparkstart.py Normal file
View File

@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
"""
Erzeugen einer Spark-Konfiguration
"""
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession
# connect to cluster
conf = SparkConf().setMaster("spark://193.174.205.250:7077").setAppName("HeisererValentin")
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
conf.set("spark.executor.memory", '32g')
conf.set("spark.driver.memory", '8g')
conf.set("spark.cores.max", "40")
scon = SparkContext(conf=conf)
spark = SparkSession \
.builder \
.appName("Python Spark SQL") \
.getOrCreate()