mirror of
https://github.com/Vale54321/BigData.git
synced 2025-12-11 09:59:33 +01:00
fix
This commit is contained in:
@@ -60,8 +60,9 @@ def duration_circle_size(spark: SparkSession):
|
||||
|
||||
def compute_daily_and_yearly_frosts(spark: SparkSession):
|
||||
q_daily_max = (
|
||||
"SELECT stationId, date, SUBSTR(date,1,4) AS year, MAX(TT_TU) AS max_temp "
|
||||
"SELECT stationId, date, SUBSTR(CAST(date AS STRING),1,4) AS year, MAX(TT_TU) AS max_temp "
|
||||
"FROM german_stations_data "
|
||||
"WHERE TT_TU IS NOT NULL "
|
||||
"GROUP BY stationId, date"
|
||||
)
|
||||
daily_max = spark.sql(q_daily_max)
|
||||
@@ -87,6 +88,9 @@ def compute_daily_and_yearly_frosts(spark: SparkSession):
|
||||
def frost_analysis(spark: SparkSession, year=2024, station_name_matches=('kempten',)):
|
||||
compute_daily_and_yearly_frosts(spark)
|
||||
|
||||
# Debug: check available years and data
|
||||
spark.sql("SELECT year, COUNT(*) as cnt FROM station_year_frost GROUP BY year ORDER BY year").show(50)
|
||||
|
||||
q_hist = (
|
||||
f"SELECT frost_days, COUNT(*) AS station_count "
|
||||
f"FROM station_year_frost WHERE year = '{year}' GROUP BY frost_days ORDER BY frost_days"
|
||||
@@ -94,6 +98,17 @@ def frost_analysis(spark: SparkSession, year=2024, station_name_matches=('kempte
|
||||
hist_df = spark.sql(q_hist)
|
||||
|
||||
hist_pdf = hist_df.toPandas()
|
||||
|
||||
if hist_pdf.empty:
|
||||
print(f"No frost data found for year {year}. Trying to find available years...")
|
||||
# Try without year filter to see if data exists
|
||||
q_all = "SELECT frost_days, COUNT(*) AS station_count FROM station_year_frost GROUP BY frost_days ORDER BY frost_days"
|
||||
hist_pdf = spark.sql(q_all).toPandas()
|
||||
if hist_pdf.empty:
|
||||
print("No frost data available at all. Check if TT_TU column contains valid temperature data.")
|
||||
return
|
||||
print(f"Found {len(hist_pdf)} frost day categories across all years")
|
||||
|
||||
plt.figure(figsize=(8, 5))
|
||||
plt.bar(hist_pdf.frost_days, hist_pdf.station_count, color='steelblue')
|
||||
plt.xlabel('Number of Frost Days in year ' + str(year))
|
||||
|
||||
@@ -145,7 +145,6 @@ def import_produkt_files(spark: SparkSession, scon: SparkContext, path='/data/cd
|
||||
)
|
||||
|
||||
|
||||
|
||||
def read_product_data_from_parquet(spark):
|
||||
"""
|
||||
read_product_data_from_parquet(spark)
|
||||
|
||||
Reference in New Issue
Block a user