diff --git a/Aufgabe 10/Aufgabe10.py b/Aufgabe 10/Aufgabe10.py index 6e1d636..2dd9ced 100644 --- a/Aufgabe 10/Aufgabe10.py +++ b/Aufgabe 10/Aufgabe10.py @@ -60,8 +60,9 @@ def duration_circle_size(spark: SparkSession): def compute_daily_and_yearly_frosts(spark: SparkSession): q_daily_max = ( - "SELECT stationId, date, SUBSTR(date,1,4) AS year, MAX(TT_TU) AS max_temp " + "SELECT stationId, date, SUBSTR(CAST(date AS STRING),1,4) AS year, MAX(TT_TU) AS max_temp " "FROM german_stations_data " + "WHERE TT_TU IS NOT NULL " "GROUP BY stationId, date" ) daily_max = spark.sql(q_daily_max) @@ -87,6 +88,9 @@ def compute_daily_and_yearly_frosts(spark: SparkSession): def frost_analysis(spark: SparkSession, year=2024, station_name_matches=('kempten',)): compute_daily_and_yearly_frosts(spark) + # Debug: check available years and data + spark.sql("SELECT year, COUNT(*) as cnt FROM station_year_frost GROUP BY year ORDER BY year").show(50) + q_hist = ( f"SELECT frost_days, COUNT(*) AS station_count " f"FROM station_year_frost WHERE year = '{year}' GROUP BY frost_days ORDER BY frost_days" @@ -94,6 +98,17 @@ def frost_analysis(spark: SparkSession, year=2024, station_name_matches=('kempte hist_df = spark.sql(q_hist) hist_pdf = hist_df.toPandas() + + if hist_pdf.empty: + print(f"No frost data found for year {year}. Trying to find available years...") + # Try without year filter to see if data exists + q_all = "SELECT frost_days, COUNT(*) AS station_count FROM station_year_frost GROUP BY frost_days ORDER BY frost_days" + hist_pdf = spark.sql(q_all).toPandas() + if hist_pdf.empty: + print("No frost data available at all. Check if TT_TU column contains valid temperature data.") + return + print(f"Found {len(hist_pdf)} frost day categories across all years") + plt.figure(figsize=(8, 5)) plt.bar(hist_pdf.frost_days, hist_pdf.station_count, color='steelblue') plt.xlabel('Number of Frost Days in year ' + str(year)) diff --git a/Aufgabe 9/Aufgabe9.py b/Aufgabe 9/Aufgabe9.py index 8b65214..fd0f217 100644 --- a/Aufgabe 9/Aufgabe9.py +++ b/Aufgabe 9/Aufgabe9.py @@ -145,7 +145,6 @@ def import_produkt_files(spark: SparkSession, scon: SparkContext, path='/data/cd ) - def read_product_data_from_parquet(spark): """ read_product_data_from_parquet(spark)