fix

2026-02-04 00:35:55 +01:00 · 2025-12-04 17:42:03 +01:00
parent c072850289
commit de3782d570
2 changed files with 16 additions and 2 deletions
--- a/10/Aufgabe10.py
+++ b/10/Aufgabe10.py
@@ -60,8 +60,9 @@ def duration_circle_size(spark: SparkSession):

 def compute_daily_and_yearly_frosts(spark: SparkSession):
 	q_daily_max = (
-		"SELECT stationId, date, SUBSTR(date,1,4) AS year, MAX(TT_TU) AS max_temp "
+		"SELECT stationId, date, SUBSTR(CAST(date AS STRING),1,4) AS year, MAX(TT_TU) AS max_temp "
 		"FROM german_stations_data "
+		"WHERE TT_TU IS NOT NULL "
 		"GROUP BY stationId, date"
 	)
 	daily_max = spark.sql(q_daily_max)
@@ -87,6 +88,9 @@ def compute_daily_and_yearly_frosts(spark: SparkSession):
 def frost_analysis(spark: SparkSession, year=2024, station_name_matches=('kempten',)):
 	compute_daily_and_yearly_frosts(spark)

+	# Debug: check available years and data
+	spark.sql("SELECT year, COUNT(*) as cnt FROM station_year_frost GROUP BY year ORDER BY year").show(50)
+
 	q_hist = (
 		f"SELECT frost_days, COUNT(*) AS station_count "
 		f"FROM station_year_frost WHERE year = '{year}' GROUP BY frost_days ORDER BY frost_days"
@@ -94,6 +98,17 @@ def frost_analysis(spark: SparkSession, year=2024, station_name_matches=('kempte
 	hist_df = spark.sql(q_hist)

 	hist_pdf = hist_df.toPandas()
+	
+	if hist_pdf.empty:
+		print(f"No frost data found for year {year}. Trying to find available years...")
+		# Try without year filter to see if data exists
+		q_all = "SELECT frost_days, COUNT(*) AS station_count FROM station_year_frost GROUP BY frost_days ORDER BY frost_days"
+		hist_pdf = spark.sql(q_all).toPandas()
+		if hist_pdf.empty:
+			print("No frost data available at all. Check if TT_TU column contains valid temperature data.")
+			return
+		print(f"Found {len(hist_pdf)} frost day categories across all years")
+	
 	plt.figure(figsize=(8, 5))
 	plt.bar(hist_pdf.frost_days, hist_pdf.station_count, color='steelblue')
 	plt.xlabel('Number of Frost Days in year ' + str(year))
--- a/9/Aufgabe9.py
+++ b/9/Aufgabe9.py
@@ -145,7 +145,6 @@ def import_produkt_files(spark: SparkSession, scon: SparkContext, path='/data/cd
        )
    
   
-    
 def read_product_data_from_parquet(spark):
    """
    read_product_data_from_parquet(spark)