diff --git a/laravel_app/tests/__fixtures__/Muhoroni/harvest.xlsx b/laravel_app/tests/__fixtures__/Muhoroni/harvest.xlsx index 6309f20..fda9d9e 100644 Binary files a/laravel_app/tests/__fixtures__/Muhoroni/harvest.xlsx and b/laravel_app/tests/__fixtures__/Muhoroni/harvest.xlsx differ diff --git a/r_app/CI_report_dashboard_planet.Rmd b/r_app/CI_report_dashboard_planet.Rmd index f466b04..1b28888 100644 --- a/r_app/CI_report_dashboard_planet.Rmd +++ b/r_app/CI_report_dashboard_planet.Rmd @@ -53,6 +53,7 @@ library(randomForest) library(CAST) source("report_utils.R") +# source(here("r_app", "report_utils.R")) ``` @@ -269,87 +270,24 @@ print(" PRINT") } ``` -```{r eval=FALSE, fig.height=10, fig.width=14, include=FALSE} -CI_all2 <- readRDS(here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_whole_pivot_year.rds")) %>% - mutate(#line = substr(pivot, 1 , 1), - season = as.factor(season)) - -pivots_dates <- readRDS(here(harvest_dir, "harvest_data_new")) #%>% - -pvt_age <- pivots_dates %>% ungroup() %>% select(pivot, Age) %>% unique() -CI_all2 <- left_join(CI_all2, pvt_age , by = "pivot") %>% mutate(month = plyr::round_any((Age/4),2)) %>% - mutate(month = case_when(month > 16 ~ 18, - TRUE ~month)) %>% - group_by(pivot) %>% filter(Age == max(Age)) %>% ungroup() - -CI_all2$season <- ordered(CI_all2$season, levels = c("Data_2021", "Data_2022")) -# CI_all2 <- CI_all2 %>% mutate(season_order = as.integer(season)) -latest_model <- CI_all2 %>% group_by(pivot) %>% filter(season =="Data_2022") -# latest_model <- CI_all2 %>% group_by(pivot) %>% arrange(season, desc(DOY)) %>% slice(1) - -# CI_all2 <- CI_all %>% group_by(pivot, DOY ) %>% mutate(pivot_cumulative_CI = mean(cumulative_CI)) -# label_data <- CI_all2 %>% group_by(pivot) %>% arrange(season, desc(cumulative_CI)) %>% slice(1) %>% mutate(label = paste(pivot, " - ", round(cumulative_CI))) -label_data <- latest_model %>% arrange(season, desc(cumulative_CI)) %>% slice(1) %>% mutate(label = paste(pivot, " - ", round(cumulative_CI))) - -max_day <- label_data %>% group_by(pivot) %>% summarise(max_day = max(DOY)) - -ggplot(data= CI_all2%>% filter(season =="Data_2022"), aes(DOY, cumulative_CI, col = pivot)) + - facet_wrap(~month) + - geom_line() + - # scale_y_continuous(breaks = seq(0, max(label_data$cumulative_CI) + 100, by = 100)) + - labs(title = "Cumulative CI values over the year per pivot, split per 2-month age (rounded down)", x = "Days after harvest/planting (scale is per 2 weeks)", y = "Cumulative CI value", - color = "Line") + - geom_label_repel(data = label_data %>% filter(model %in% latest_model$model), aes(DOY, cumulative_CI, label = label), box.padding = 1, - # ylim = c(1300, NA), - max.overlaps = Inf - # segment.linetype = 4, - - # segment.curvature = -1e-20, - # arrow = arrow(length = unit(0.015, "npc")) - ) + - theme(legend.position="right", legend.text = element_text(size=8), legend.title = element_text(size = 8), - plot.title = element_text(size=19)) + - # geom_smooth()+ - guides(fill = guide_legend(nrow=2,byrow=TRUE)) + - scale_y_continuous(breaks=seq(0,max(label_data$cumulative_CI),100)) + - scale_x_continuous(breaks=seq(0,max(max_day$max_day),30)) + theme(axis.text.x = element_text(angle = 90)) + - labs(x = "Weeks")+ - theme(legend.position = "none") - -``` # Yield prediction The below table shows estimates of the biomass if you would harvest them now. -```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE} -CI_quadrant <- readRDS(here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% - rename( pivot_quadrant = field)#All_pivots_Cumulative_CI.rds -ggplot(CI_quadrant %>% filter(pivot %in% "1.11")) + - geom_line(aes(DOY, cumulative_CI, col = as.factor(season))) + - facet_wrap(~pivot_quadrant) +```{r eval=FALSE, message=FALSE, warning=FALSE, include=TRUE} +CI_quadrant <- readRDS(here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds")) -pivots_dates0 <- pivots_dates0 %>% ungroup() %>% unique() %>% - dplyr::select(field, sub_field, Tcha_2021, Tcha_2022 ) %>% - pivot_longer(cols = c("Tcha_2021", "Tcha_2022"), names_to = "Tcha_Year", values_to = "Tcha") %>% - filter(Tcha > 50) %>% - mutate(season = as.integer(str_extract(Tcha_Year, "\\d+"))) +harvesting_data <- harvesting_data %>% rename(season = year) -CI_and_yield <- left_join(CI_quadrant , pivots_dates0, by = c("pivot", "pivot_quadrant", "season")) %>% filter(!is.na(Tcha)) %>% - group_by(pivot_quadrant, season) %>% slice(which.max(DOY)) %>% - dplyr::select(pivot, pivot_quadrant, Tcha_Year, Tcha, cumulative_CI, DOY, season) %>% +CI_and_yield <- left_join(CI_quadrant , harvesting_data, by = c("field", "sub_field", "season")) %>% #filter(!is.na(tonnage_ha)) %>% + group_by(sub_field, season) %>% slice(which.max(DOY)) %>% + dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% mutate(CI_per_day = cumulative_CI/ DOY) -ggplot(CI_and_yield) + - geom_point(aes(Tcha, CI_per_day, col = Tcha_Year )) - - -set.seed(20) -CI_and_yield_split <- initial_split(CI_and_yield, prop = 0.75, strata = pivot_quadrant) -CI_and_yield_test <- training(CI_and_yield_split) -CI_and_yield_validation <- testing(CI_and_yield_split) - - predictors <- c( "cumulative_CI" , "DOY" ,"CI_per_day" ) -response <- "Tcha" -CI_and_yield_test <- as.data.frame(CI_and_yield_test) +response <- "tonnage_ha" +# CI_and_yield_test <- as.data.frame(CI_and_yield_test) +CI_and_yield_test <- CI_and_yield %>% as.data.frame() %>% filter(!is.na(tonnage_ha)) +CI_and_yield_validation <- CI_and_yield_test +prediction_yields <- CI_and_yield %>% as.data.frame() %>% filter(is.na(tonnage_ha)) ctrl <- trainControl(method="cv", savePredictions = TRUE, @@ -368,46 +306,52 @@ model_ffs_rf <- ffs( CI_and_yield_test[,predictors], na.rm = TRUE ) -pred_ffs_rf <- - predict(model_ffs_rf, newdata = CI_and_yield_validation) %>% as.data.frame() %>% rename(predicted_Tcha = ".") %>% mutate( - pivot_quadrant = CI_and_yield_validation$pivot_quadrant, - pivot = CI_and_yield_validation$pivot, - Age_days = CI_and_yield_validation$DOY, - total_CI = round(CI_and_yield_validation$cumulative_CI, 0), - predicted_Tcha = round(predicted_Tcha, 0), - season = CI_and_yield_validation$season - ) %>% dplyr::select(pivot , pivot_quadrant, Age_days, total_CI, predicted_Tcha, season) %>% - left_join(., CI_and_yield_validation, by = c("pivot", "pivot_quadrant", "season")) %>% - filter(Age_days > 250) +# Function to prepare predictions +prepare_predictions <- function(predictions, newdata) { + return(predictions %>% + as.data.frame() %>% + rename(predicted_Tcha = ".") %>% + mutate(sub_field = newdata$sub_field, + field = newdata$field, + Age_days = newdata$DOY, + total_CI = round(newdata$cumulative_CI, 0), + predicted_Tcha = round(predicted_Tcha, 0), + season = newdata$season) %>% + dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>% + left_join(., newdata, by = c("field", "sub_field", "season"))) +} +# Predict yields for the validation dataset +pred_ffs_rf <- prepare_predictions(predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation) - -prediction_2023 <- CI_quadrant %>% filter(season == "2023") %>% group_by(pivot_quadrant) %>% slice(which.max(DOY))%>% - mutate(CI_per_day = cumulative_CI/ DOY) - -pred_rf_2023 <- predict(model_ffs_rf, newdata=prediction_2023) %>% - as.data.frame() %>% rename(predicted_Tcha_2023 = ".") %>% mutate(pivot_quadrant = prediction_2023$pivot_quadrant, - pivot = prediction_2023$pivot, - Age_days = prediction_2023$DOY, - total_CI = round(prediction_2023$cumulative_CI,0), - predicted_Tcha_2023 = round(predicted_Tcha_2023, 0)) %>% +# Predict yields for the current season +pred_rf_current_season <- prepare_predictions(predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% filter(Age_days > 300) %>% - dplyr::select(pivot ,pivot_quadrant, Age_days, total_CI, predicted_Tcha_2023)%>% - mutate(CI_per_day = round(total_CI/ Age_days, 1)) - - - + mutate(CI_per_day = round(total_CI / Age_days, 1)) ``` -```{r yield_plaatjes, eval=FALSE, include=FALSE} -ggplot(pred_ffs_rf, aes(y = predicted_Tcha , x = Tcha , col = pivot )) + - geom_point() +geom_abline() + - scale_x_continuous(limits = c(50, 160))+ - scale_y_continuous(limits = c(50, 160)) + - labs(title = "Model trained and tested on historical results - RF") +```{r yield_plaatjes, eval=FALSE, include=TRUE} +ggplot(pred_ffs_rf, aes(y = predicted_Tcha, x = tonnage_ha, col = sub_area)) + + geom_point(size = 2, alpha = 0.6) + # Adjust point size and transparency + geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") + # Reference line + scale_x_continuous(limits = c(0, 200)) + + scale_y_continuous(limits = c(0, 200)) + + labs(title = "Model Performance: \nPredicted vs Actual Tonnage/ha", + x = "Actual tonnage/ha (Tcha)", + y = "Predicted tonnage/ha (Tcha)") + + theme_minimal() -ggplot(pred_rf_2023, aes(total_CI , predicted_Tcha_2023 , col = pivot )) + - geom_point() + labs(title = "2023 data (still to be harvested) - fields over 300 days old") +ggplot(pred_rf_current_season, aes(x = Age_days, y = predicted_Tcha, col = field)) + + geom_point(size = 2, alpha = 0.6) + # Adjust point size and transparency + labs(title = "Predicted Yields for Fields Over 300 Days \nOld Yet to Be Harvested", + x = "Age (days)", + y = "Predicted tonnage/ha (Tcha)") + + facet_wrap(~sub_area) + + scale_y_continuous(limits = c(0, 200)) + # Optional: Set limits for y-axis + theme_minimal() -knitr::kable(pred_rf_2023) +knitr::kable(pred_rf_current_season, + digits = 0, + caption = "Predicted Tonnage/ha for Fields Over 300 Days Old") ``` + diff --git a/r_app/counting_clouds.R b/r_app/counting_clouds.R new file mode 100644 index 0000000..e4cd942 --- /dev/null +++ b/r_app/counting_clouds.R @@ -0,0 +1,91 @@ +# Required packages +# library(ggplot2) +# library(dplyr) +raster_files_NEW <- list.files(merged_final,full.names = T, pattern = ".tif") + +# Extracting the dates from vrt_list (assuming the format "YYYY-MM-DD.vrt" at the end) +no_cloud_dates <- as.Date(sapply(raster_files_NEW, function(x) { + sub(".*/([0-9]{4}-[0-9]{2}-[0-9]{2})\\.tif", "\\1", x) +})) + +# Generate a full sequence of dates in the range +start_date <- min(no_cloud_dates) +end_date <- max(no_cloud_dates) +all_dates <- seq(start_date, end_date, by = "day") + +# Create a data frame marking no clouds (1) and clouds (0) +cloud_data <- data.frame( + date = all_dates, + cloud_status = ifelse(all_dates %in% no_cloud_dates, 1, 0) +) + +# Plot the data +ggplot(cloud_data, aes(x = date, y = cloud_status)) + + geom_point() + + labs(x = "Date", y = "Cloud Status (1 = No Cloud, 0 = Cloud)") + + scale_y_continuous(breaks = c(0, 1)) + + theme_minimal() + +# Updated ggplot code to display months on the x-axis +ggplot(cloud_data, aes(x = date, y = cloud_status)) + + geom_point() + + scale_x_date(date_labels = "%b", date_breaks = "1 month") + + labs(x = "Month", y = "Cloud Status (1 = No Cloud, 0 = Cloud)") + + scale_y_continuous(breaks = c(0, 1)) + + theme_minimal() + +# Group data by year and week +cloud_data <- cloud_data %>% + mutate(week = isoweek(date), year = year(date)) %>% + group_by(year, week) %>% + summarise(no_cloud_days = sum(cloud_status == 1), + cloud_days = sum(cloud_status == 0)) + +# 1. Show how many weeks per year have no images (clouds for all 7 days) +weeks_no_images <- cloud_data %>% + filter(cloud_days == 7) + +# Plot weeks with no images +ggplot(weeks_no_images, aes(x = week, y = year)) + + geom_tile(fill = "red") + + labs(x = "Week", y = "Year", title = "Weeks with No Images (Full Cloud Cover)") + + theme_minimal() + + +# 2. Determine when most clouds are present (cloud_days > no_cloud_days) +weeks_most_clouds <- cloud_data %>% + filter(cloud_days > no_cloud_days) + +# Plot when most clouds are present +ggplot(weeks_most_clouds, aes(x = week, y = year)) + + geom_tile(fill = "blue") + + labs(x = "Week", y = "Year", title = "Weeks with Most Clouds") + + theme_minimal() + +# Group weeks by number of cloud days and count how many weeks had 0-7 cloud days +weeks_by_cloud_days <- cloud_data %>% + group_by(cloud_days) %>% + summarise(week_count = n()) + +# Display the summary +print(weeks_by_cloud_days) + +# Optional: Plot the results to visualise how many weeks had 0-7 cloud days +ggplot(weeks_by_cloud_days, aes(x = cloud_days, y = week_count)) + + geom_bar(stat = "identity", fill = "skyblue") + + labs(x = "Number of Cloud Days (per week)", y = "Number of Weeks", + title = "Distribution of Cloud Days per Week") + + theme_minimal() + +weeks_by_no_cloud_days <- cloud_data %>% + mutate(no_cloud_days = 7 - cloud_days) %>% + group_by(no_cloud_days) %>% + summarise(week_count = n()) + +# Plot the results to visualise how many weeks had 0-7 cloud-free days +ggplot(weeks_by_no_cloud_days, aes(x = no_cloud_days, y = week_count)) + + geom_bar(stat = "identity", fill = "#00A799") + + geom_text(aes(label = week_count), vjust = -0.5, size = 4) + # Add the count of weeks on top of bars + labs(x = "Number of Cloud-Free Days (per week)", y = "Number of Weeks", + title = "Distribution of Cloud-Free Days per Week") + + theme_minimal() diff --git a/r_app/report_utils.R b/r_app/report_utils.R index 5aff7f2..3759e21 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -90,7 +90,7 @@ ci_plot <- function(pivotName){ cum_ci_plot <- function(pivotName){ - # pivotName = "1.1" + # pivotName = "3a13" data_ci <- CI_quadrant %>% filter(field == pivotName) if (nrow(data_ci) == 0) { @@ -125,6 +125,75 @@ cum_ci_plot <- function(pivotName){ } +cum_ci_plot <- function(pivotName, plot_type = "value", facet_on = TRUE) { + + # pivotName = "3a13" + data_ci <- CI_quadrant %>% filter(field == pivotName) + + if (nrow(data_ci) == 0) { + return(cum_ci_plot2(pivotName)) # Return an empty data frame if no data is found + } + + data_ci2 <- data_ci %>% + mutate(CI_rate = cumulative_CI / DOY, + week = week(Date)) %>% + group_by(field) %>% + mutate(mean_CIrate_rolling_10_days = rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE), + mean_rolling_10_days = rollapplyr(value, width = 10, FUN = mean, partial = TRUE)) + + data_ci2 <- data_ci2 %>% mutate(season = as.factor(season)) + + date_preperation_perfect_pivot <- data_ci2 %>% + group_by(season) %>% + summarise(min_date = min(Date), + max_date = max(Date), + days = max_date - min_date) + + unique_seasons <- sort(unique(date_preperation_perfect_pivot$season), decreasing = TRUE)[1:3] + + # Determine the y aesthetic based on the plot type + y_aesthetic <- switch(plot_type, + "CI_rate" = "mean_CIrate_rolling_10_days", + "cumulative_CI" = "cumulative_CI", + "value" = "mean_rolling_10_days") + + y_label <- switch(plot_type, + "CI_rate" = "10-Day Rolling Mean CI Rate (cumulative CI / age)", + "cumulative_CI" = "Cumulative CI", + "value" = "10-Day Rolling Mean CI") + + if (facet_on) { + g <- ggplot(data = data_ci2 %>% filter(season %in% unique_seasons)) + + facet_wrap(~season, scales = "free_x") + + geom_line(aes_string(x = "Date", y = y_aesthetic, col = "sub_field", group = "sub_field")) + + labs(title = paste("Plot of", y_label, "for Pivot", pivotName), + color = "Field Name", + y = y_label) + + scale_x_date(date_breaks = "1 month", date_labels = "%m-%Y") + + theme_minimal() + + theme(axis.text.x = element_text(angle = 60, hjust = 1), + legend.justification = c(1, 0), legend.position = c(1, 0), + legend.title = element_text(size = 8), + legend.text = element_text(size = 8)) + + guides(color = guide_legend(nrow = 2, byrow = TRUE)) + } else { + g <- ggplot(data = data_ci2 %>% filter(season %in% unique_seasons)) + + geom_line(aes_string(x = "DOY", y = y_aesthetic, col = "season", group = "season")) + + labs(title = paste("Plot of", y_label, "for Pivot", pivotName), + color = "Season", + y = y_label, + x = "Age of Crop (Days)") + + theme_minimal() + + theme(axis.text.x = element_text(angle = 60, hjust = 1), + legend.justification = c(1, 0), legend.position = c(1, 0), + legend.title = element_text(size = 8), + legend.text = element_text(size = 8)) + + guides(color = guide_legend(nrow = 2, byrow = TRUE)) + } + + subchunkify(g, 3.2, 10) +} + cum_ci_plot2 <- function(pivotName){ end_date <- Sys.Date() start_date <- end_date %m-% months(11) # 11 months ago from end_date