mirror of
https://github.com/Vale54321/BigData.git
synced 2025-12-15 11:29:32 +01:00
Aufgabe 10
This commit is contained in:
@@ -62,7 +62,7 @@ def compute_daily_and_yearly_frosts(spark: SparkSession):
|
||||
q_daily_max = (
|
||||
"SELECT stationId, date, SUBSTR(CAST(date AS STRING),1,4) AS year, MAX(TT_TU) AS max_temp "
|
||||
"FROM german_stations_data "
|
||||
"WHERE TT_TU IS NOT NULL "
|
||||
"WHERE TT_TU IS NOT NULL AND TT_TU > -50 AND TT_TU < 60 "
|
||||
"GROUP BY stationId, date"
|
||||
)
|
||||
daily_max = spark.sql(q_daily_max)
|
||||
@@ -88,9 +88,6 @@ def compute_daily_and_yearly_frosts(spark: SparkSession):
|
||||
def frost_analysis(spark: SparkSession, year=2024, station_name_matches=('kempten',)):
|
||||
compute_daily_and_yearly_frosts(spark)
|
||||
|
||||
# Debug: check available years and data
|
||||
spark.sql("SELECT year, COUNT(*) as cnt FROM station_year_frost GROUP BY year ORDER BY year").show(50)
|
||||
|
||||
q_hist = (
|
||||
f"SELECT frost_days, COUNT(*) AS station_count "
|
||||
f"FROM station_year_frost WHERE year = '{year}' GROUP BY frost_days ORDER BY frost_days"
|
||||
@@ -101,7 +98,6 @@ def frost_analysis(spark: SparkSession, year=2024, station_name_matches=('kempte
|
||||
|
||||
if hist_pdf.empty:
|
||||
print(f"No frost data found for year {year}. Trying to find available years...")
|
||||
# Try without year filter to see if data exists
|
||||
q_all = "SELECT frost_days, COUNT(*) AS station_count FROM station_year_frost GROUP BY frost_days ORDER BY frost_days"
|
||||
hist_pdf = spark.sql(q_all).toPandas()
|
||||
if hist_pdf.empty:
|
||||
@@ -129,11 +125,10 @@ def frost_analysis(spark: SparkSession, year=2024, station_name_matches=('kempte
|
||||
sname = r['station_name']
|
||||
print(f"Analyzing stationId={sid} name={sname}")
|
||||
|
||||
# compute frostdays + 5-yr and 20-yr rolling averages using window frame
|
||||
q_ts = (
|
||||
"SELECT year, frost_days, "
|
||||
"AVG(frost_days) OVER (PARTITION BY stationId ORDER BY CAST(year AS INT) ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS avg_5, "
|
||||
"AVG(frost_days) OVER (PARTITION BY stationId ORDER BY CAST(year AS INT) ROWS BETWEEN 19 PRECEDING AND CURRENT ROW) AS avg_20 "
|
||||
"AVG(frost_days) OVER (PARTITION BY stationId ORDER BY CAST(year AS INT) RANGE BETWEEN 4 PRECEDING AND CURRENT ROW) AS avg_5, "
|
||||
"AVG(frost_days) OVER (PARTITION BY stationId ORDER BY CAST(year AS INT) RANGE BETWEEN 19 PRECEDING AND CURRENT ROW) AS avg_20 "
|
||||
f"FROM station_year_frost WHERE stationId = {sid} ORDER BY CAST(year AS INT)"
|
||||
)
|
||||
ts_df = spark.sql(q_ts)
|
||||
|
||||
Reference in New Issue
Block a user