updateing csv file

2026-01-20 12:06:45 +01:00 · 2026-01-20 12:06:45 +01:00 · 4143bdf4d7
parent c11b10a73f
commit 4143bdf4d7
3 changed files with 117 additions and 102 deletions
--- a/r_app/80_calculate_kpis.R
+++ b/r_app/80_calculate_kpis.R
@ -66,17 +66,15 @@ CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05
 #   Negative slope = CV decreasing = field becoming MORE uniform = GOOD
 #   Positive slope = CV increasing = field becoming MORE patchy = BAD
 #   Near zero = Homogenous growth (all crops progressing equally)
-CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.05  # CV decreasing rapidly
-CV_SLOPE_IMPROVEMENT_MIN <- -0.02         # Gradual synchronization
-CV_SLOPE_IMPROVEMENT_MAX <- -0.005        # Becoming uniform
-CV_SLOPE_HOMOGENOUS_MIN <- -0.005         # Stable, uniform growth
-CV_SLOPE_HOMOGENOUS_MAX <- 0.005          # No change in uniformity
-CV_SLOPE_PATCHINESS_MIN <- 0.005          # Minor divergence
-CV_SLOPE_PATCHINESS_MAX <- 0.02           # Growing patchiness
-CV_SLOPE_SEVERE_MIN <- 0.02               # Field fragmentation beginning
+CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03   # CV decreasing rapidly (>3% drop over 8 weeks)
+CV_SLOPE_IMPROVEMENT_MIN <- -0.02         # CV decreasing (2-3% drop over 8 weeks)
+CV_SLOPE_IMPROVEMENT_MAX <- -0.01         # Gradual improvement (1-2% drop over 8 weeks)
+CV_SLOPE_HOMOGENOUS_MIN <- -0.01          # Essentially stable (small natural variation)
+CV_SLOPE_HOMOGENOUS_MAX <- 0.01           # No change in uniformity (within ±1% over 8 weeks)
+CV_SLOPE_PATCHINESS_MIN <- 0.01           # Minor divergence (1-2% increase over 8 weeks)
+CV_SLOPE_PATCHINESS_MAX <- 0.02           # Growing patchiness (2-3% increase over 8 weeks)
+CV_SLOPE_SEVERE_MIN <- 0.02               # Severe fragmentation (>3% increase over 8 weeks)

-# CLOUD COVER ROUNDING INTERVALS
-CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100)

 # PERCENTILE CALCULATIONS
 CI_PERCENTILE_LOW <- 0.10
@ -391,7 +389,7 @@ main <- function() {
                                        current_week = current_week,
                                        year = year)
  
-  message(paste("  ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_weeks_in_this_phase"))
+  message(paste("  ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
  
  # Load weekly harvest probabilities from script 31 (if available)
  message("\n4. Loading harvest probabilities from script 31...")
@ -479,7 +477,7 @@ main <- function() {
          if (is.na(planting_dt)) {
            return(NA_real_)
          }
-          round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 1)
+          round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0)
        })
      },
      # Column 10: Phase (recalculate based on updated Age_week)
@ -493,31 +491,23 @@ main <- function() {
          NA_character_
        })
      },
-      # Column 11: nmr_weeks_in_this_phase (already in current_stats from calculate_kpi_trends)
+      # Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends)
      # Column 12: Germination_progress (calculated here from CI values)
-      Germination_progress = {
-        sapply(seq_len(nrow(current_stats)), function(idx) {
-          age_w <- Age_week[idx]
-          mean_ci_val <- Mean_CI[idx]
-          
-          # Only relevant for germination phase (0-4 weeks)
-          if (is.na(age_w) || age_w < 0 || age_w >= 4) {
-            return(NA_character_)
-          }
-          
-          # Estimate % of field with CI >= germination threshold
-          # Based on mean CI, estimate germination percentage
-          if (mean_ci_val >= 0.4) {
-            return(">80%")
-          } else if (mean_ci_val >= 0.25) {
-            return("50-80%")
-          } else if (mean_ci_val >= 0.1) {
-            return("20-50%")
-          } else {
-            return("<20%")
-          }
-        })
-      },
+      # Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
+      Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) {
+        if (is.na(pct)) return(NA_character_)
+        if (pct >= 95) return("95-100%")
+        else if (pct >= 90) return("90-95%")
+        else if (pct >= 80) return("80-90%")
+        else if (pct >= 70) return("70-80%")
+        else if (pct >= 60) return("60-70%")
+        else if (pct >= 50) return("50-60%")
+        else if (pct >= 40) return("40-50%")
+        else if (pct >= 30) return("30-40%")
+        else if (pct >= 20) return("20-30%")
+        else if (pct >= 10) return("10-20%")
+        else return("0-10%")
+      }),
      # Column 13: Imminent_prob (from script 31 or NA if not available)
      Imminent_prob = {
        if (!is.null(imminent_prob_data)) {
@ -526,59 +516,84 @@ main <- function() {
          rep(NA_real_, nrow(current_stats))
        }
      },
-      # Column 14: Status_trigger (based on harvest probability + growth status)
-      Status_trigger = {
-        triggers <- sapply(seq_len(nrow(current_stats)), function(idx) {
+      # Column 14: Status_Alert (based on harvest probability + crop health status)
+      # Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA
+      Status_Alert = {
+        sapply(seq_len(nrow(current_stats)), function(idx) {
          imminent_prob <- Imminent_prob[idx]
          age_w <- Age_week[idx]
-          ci_change <- Weekly_ci_change[idx]
-          phase <- Phase[idx]
+          weekly_ci_chg <- Weekly_ci_change[idx]
+          mean_ci_val <- Mean_CI[idx]
          
-          # Priority 1: Harvest imminent (high probability)
-          if (!is.na(imminent_prob) && imminent_prob > 0.5) {
-            return("harvest_imminent")
+          # Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months)
+          if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) {
+            return("Ready for harvest-check")
          }
          
-          # Priority 2: Age-based triggers
-          if (!is.na(age_w)) {
-            if (age_w >= 45) return("harvest_ready")
-            if (age_w >= 39) return("maturation_progressing")
-            if (age_w >= 4 & age_w < 39) return("growth_on_track")
-            if (age_w < 4) return("germination_started")
+          # Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5)
+          if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) {
+            return("Strong decline in crop health")
          }
          
-          # Fallback
+          # Priority 3: Harvested/bare (Mean CI < 1.5)
+          if (!is.na(mean_ci_val) && mean_ci_val < 1.5) {
+            return("Harvested/bare")
+          }
+          
+          # Fallback: no alert
          NA_character_
        })
-        triggers
      },
      # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
      # Column 17: Already in current_stats (CV)
      # Column 18: Already in current_stats (CV_Trend_Short_Term)
      # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
      # Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope)
+      # 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01)
      CV_Trend_Long_Term_Category = {
-        sapply(current_stats$CV_Trend_Long_Term, categorize_cv_slope)
+        sapply(current_stats$CV_Trend_Long_Term, function(slope) {
+          if (is.na(slope)) {
+            return(NA_character_)
+          } else if (slope < -0.01) {
+            return("More uniform")
+          } else if (slope > 0.01) {
+            return("Less uniform")
+          } else {
+            return("Stable uniformity")
+          }
+        })
      },
      # Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
-      .keep = "all"  # Keep all existing columns
+      # Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
+      Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) {
+        if (is.na(pct)) return(NA_character_)
+        if (pct >= 95) return("95-100%")
+        else if (pct >= 90) return("90-95%")
+        else if (pct >= 80) return("80-90%")
+        else if (pct >= 70) return("70-80%")
+        else if (pct >= 60) return("60-70%")
+        else if (pct >= 50) return("50-60%")
+        else if (pct >= 40) return("40-50%")
+        else if (pct >= 30) return("30-40%")
+        else if (pct >= 20) return("20-30%")
+        else if (pct >= 10) return("10-20%")
+        else return("0-10%")
+      }),
    ) %>%
    select(
-      all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Mean_CI", "Weekly_ci_change",
-               "Four_week_trend", "Last_harvest_or_planting_date", "Age_week", "Phase",
-               "nmr_weeks_in_this_phase", "Germination_progress", "Imminent_prob", "Status_trigger",
+      all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
+               "Last_harvest_or_planting_date", "Age_week", "Phase", 
+               "Germination_progress",
+               "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
               "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
-               "Cloud_pct_clear", "Cloud_category")),
-      any_of(c("CI_range", "CI_Percentiles"))
+               "Imminent_prob", "Cloud_pct_clear", "Cloud_category"))
    )
  
  message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
  
-  summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)
-  
  export_paths <- export_field_analysis_excel(
    field_analysis_df,
-    summary_statistics_df,
+    NULL,
    project_dir,
    current_week,
    year,
@ -586,15 +601,12 @@ main <- function() {
  )
  
  cat("\n--- Per-field Results (first 10) ---\n")
-  available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_trigger", "Cloud_category")
+  available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
  available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
  if (length(available_cols) > 0) {
    print(head(field_analysis_df[, available_cols], 10))
  }
  
-  cat("\n--- Summary Statistics ---\n")
-  print(summary_statistics_df)
-  
  # ========== FARM-LEVEL KPI AGGREGATION ==========
  # Aggregate the per-field analysis into farm-level summary statistics
  
@ -623,15 +635,15 @@ main <- function() {
      
      farm_summary$phase_distribution <- phase_dist
      
-      # 2. STATUS TRIGGER DISTRIBUTION
+      # 2. STATUS ALERT DISTRIBUTION
      status_dist <- field_data %>%
-        group_by(Status_trigger) %>%
+        group_by(Status_Alert) %>%
        summarise(
          num_fields = n(),
          acreage = sum(Acreage, na.rm = TRUE),
          .groups = 'drop'
        ) %>%
-        rename(Category = Status_trigger)
+        rename(Category = Status_Alert)
      
      farm_summary$status_distribution <- status_dist
      
--- a/r_app/80_report_building_utils.R
+++ b/r_app/80_report_building_utils.R
@ -112,8 +112,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
  field_df_rounded <- field_df %>%
    mutate(across(where(is.numeric), ~ round(., 2)))
  
-  summary_df_rounded <- summary_df %>%
-    mutate(across(where(is.numeric), ~ round(., 2)))
+  # Handle NULL summary_df
+  summary_df_rounded <- if (!is.null(summary_df)) {
+    summary_df %>%
+      mutate(across(where(is.numeric), ~ round(., 2)))
+  } else {
+    NULL
+  }
  
  output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
  if (!dir.exists(output_subdir)) {
@ -124,10 +129,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
  excel_path <- file.path(output_subdir, excel_filename)
  excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
  
+  # Build sheets list dynamically
  sheets <- list(
-    "Field Data" = field_df_rounded,
-    "Summary" = summary_df_rounded
+    "Field Data" = field_df_rounded
  )
+  if (!is.null(summary_df_rounded)) {
+    sheets[["Summary"]] <- summary_df_rounded
+  }
  
  write_xlsx(sheets, excel_path)
  message(paste("✓ Field analysis Excel exported to:", excel_path))
--- a/r_app/80_weekly_stats_utils.R
+++ b/r_app/80_weekly_stats_utils.R
@ -184,12 +184,17 @@ round_cloud_to_intervals <- function(cloud_pct_clear) {
    return(NA_character_)
  }
  
-  if (cloud_pct_clear < 50) return("<50%")
+  if (cloud_pct_clear < 10) return("0-10%")
+  if (cloud_pct_clear < 20) return("10-20%")
+  if (cloud_pct_clear < 30) return("20-30%")
+  if (cloud_pct_clear < 40) return("30-40%")
+  if (cloud_pct_clear < 50) return("40-50%")
  if (cloud_pct_clear < 60) return("50-60%")
  if (cloud_pct_clear < 70) return("60-70%")
  if (cloud_pct_clear < 80) return("70-80%")
  if (cloud_pct_clear < 90) return("80-90%")
-  return(">90%")
+  if (cloud_pct_clear < 95) return("90-95%")
+  return("95-100%")
 }

 get_ci_percentiles <- function(ci_values) {
@ -420,12 +425,18 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
        range_str <- sprintf("%.1f-%.1f", range_min, range_max)
        ci_percentiles_str <- get_ci_percentiles(ci_vals)
        
+        # Count pixels with CI >= 2 (germination threshold)
+        GERMINATION_CI_THRESHOLD <- 2.0
+        num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE)
+        num_pixels_total <- length(ci_vals)
+        pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0
+        
        field_rows <- extracted[extracted$ID == field_poly_idx, ]
        num_total <- nrow(field_rows)
        num_data <- sum(!is.na(field_rows$CI))
        pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
        cloud_cat <- if (num_data == 0) "No image available"
-                     else if (pct_clear >= 99.5) "Clear view"
+                     else if (pct_clear >= 95) "Clear view"
                     else "Partial coverage"
        
        # Age_week and Phase are now calculated in main script using actual planting dates
@ -440,9 +451,10 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
        results_list[[length(results_list) + 1]] <- data.frame(
          Field_id = field_id,
          Mean_CI = round(mean_ci, 2),
-          CV = round(cv, 4),
+          CV = round(cv * 100, 2),
          CI_range = range_str,
          CI_Percentiles = ci_percentiles_str,
+          Pct_pixels_CI_gte_2 = pct_pixels_gte_2,
          Cloud_pct_clear = pct_clear,
          Cloud_category = cloud_cat,
          stringsAsFactors = FALSE
@ -482,7 +494,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
  current_stats$CV_Trend_Short_Term <- NA_real_
  current_stats$Four_week_trend <- NA_real_
  current_stats$CV_Trend_Long_Term <- NA_real_
-  current_stats$nmr_weeks_in_this_phase <- 1L
+  current_stats$nmr_of_weeks_analysed <- 1L
  
  if (is.null(prev_stats) || nrow(prev_stats) == 0) {
    message("  No previous week data available - using defaults")
@ -502,7 +514,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
      if (length(analysis_files) > 0) {
        recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)]
        prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, 
-                                               col_select = c(Field_id, nmr_weeks_in_this_phase, Phase))
+                                               col_select = c(Field_id, nmr_of_weeks_analysed, Phase))
      }
    }
  }, error = function(e) {
@ -510,7 +522,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
  })
  
  if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
-    message(paste("  Using previous field_analysis to track nmr_weeks_in_this_phase"))
+    message(paste("  Using previous field_analysis to track nmr_of_weeks_analysed"))
  }
  
  historical_4weeks <- list()
@ -643,30 +655,13 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
          dplyr::filter(Field_id == field_id)
        
        if (nrow(prev_analysis_row) > 0) {
-          prev_phase_analysis <- prev_analysis_row$Phase[1]
-          prev_nmr_weeks_analysis <- prev_analysis_row$nmr_weeks_in_this_phase[1]
+          prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1]
          
-          if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase_analysis)) {
-            if (current_stats$Phase[i] == prev_phase_analysis) {
-              current_stats$nmr_weeks_in_this_phase[i] <- 
-                if (!is.na(prev_nmr_weeks_analysis)) prev_nmr_weeks_analysis + 1L else 2L
-            } else {
-              current_stats$nmr_weeks_in_this_phase[i] <- 1L
-            }
-          }
-        } else if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) {
-          if (current_stats$Phase[i] == prev_row$Phase[1]) {
-            current_stats$nmr_weeks_in_this_phase[i] <- 2L
+          # Only increment nmr_of_weeks_analysed if we have previous data
+          if (!is.na(prev_nmr_weeks_analysis)) {
+            current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L
          } else {
-            current_stats$nmr_weeks_in_this_phase[i] <- 1L
-          }
-        }
-      } else {
-        if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) {
-          if (current_stats$Phase[i] == prev_row$Phase[1]) {
-            current_stats$nmr_weeks_in_this_phase[i] <- 2L
-          } else {
-            current_stats$nmr_weeks_in_this_phase[i] <- 1L
+            current_stats$nmr_of_weeks_analysed[i] <- 1L
          }
        }
      }