Refactor cum_ci_plot function for enhanced plotting options; added parameters for plot type and season filtering; improved x-axis labeling and updated ggplot aesthetics. Also, continued work on yield prediction in R.
This commit is contained in:
parent
ddb4fdbc02
commit
c1a32bf6ad
Binary file not shown.
|
|
@ -53,6 +53,7 @@ library(randomForest)
|
|||
library(CAST)
|
||||
|
||||
source("report_utils.R")
|
||||
# source(here("r_app", "report_utils.R"))
|
||||
|
||||
```
|
||||
|
||||
|
|
@ -269,87 +270,24 @@ print(" PRINT")
|
|||
}
|
||||
```
|
||||
|
||||
```{r eval=FALSE, fig.height=10, fig.width=14, include=FALSE}
|
||||
CI_all2 <- readRDS(here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_whole_pivot_year.rds")) %>%
|
||||
mutate(#line = substr(pivot, 1 , 1),
|
||||
season = as.factor(season))
|
||||
|
||||
pivots_dates <- readRDS(here(harvest_dir, "harvest_data_new")) #%>%
|
||||
|
||||
pvt_age <- pivots_dates %>% ungroup() %>% select(pivot, Age) %>% unique()
|
||||
CI_all2 <- left_join(CI_all2, pvt_age , by = "pivot") %>% mutate(month = plyr::round_any((Age/4),2)) %>%
|
||||
mutate(month = case_when(month > 16 ~ 18,
|
||||
TRUE ~month)) %>%
|
||||
group_by(pivot) %>% filter(Age == max(Age)) %>% ungroup()
|
||||
|
||||
CI_all2$season <- ordered(CI_all2$season, levels = c("Data_2021", "Data_2022"))
|
||||
# CI_all2 <- CI_all2 %>% mutate(season_order = as.integer(season))
|
||||
latest_model <- CI_all2 %>% group_by(pivot) %>% filter(season =="Data_2022")
|
||||
# latest_model <- CI_all2 %>% group_by(pivot) %>% arrange(season, desc(DOY)) %>% slice(1)
|
||||
|
||||
# CI_all2 <- CI_all %>% group_by(pivot, DOY ) %>% mutate(pivot_cumulative_CI = mean(cumulative_CI))
|
||||
# label_data <- CI_all2 %>% group_by(pivot) %>% arrange(season, desc(cumulative_CI)) %>% slice(1) %>% mutate(label = paste(pivot, " - ", round(cumulative_CI)))
|
||||
label_data <- latest_model %>% arrange(season, desc(cumulative_CI)) %>% slice(1) %>% mutate(label = paste(pivot, " - ", round(cumulative_CI)))
|
||||
|
||||
max_day <- label_data %>% group_by(pivot) %>% summarise(max_day = max(DOY))
|
||||
|
||||
ggplot(data= CI_all2%>% filter(season =="Data_2022"), aes(DOY, cumulative_CI, col = pivot)) +
|
||||
facet_wrap(~month) +
|
||||
geom_line() +
|
||||
# scale_y_continuous(breaks = seq(0, max(label_data$cumulative_CI) + 100, by = 100)) +
|
||||
labs(title = "Cumulative CI values over the year per pivot, split per 2-month age (rounded down)", x = "Days after harvest/planting (scale is per 2 weeks)", y = "Cumulative CI value",
|
||||
color = "Line") +
|
||||
geom_label_repel(data = label_data %>% filter(model %in% latest_model$model), aes(DOY, cumulative_CI, label = label), box.padding = 1,
|
||||
# ylim = c(1300, NA),
|
||||
max.overlaps = Inf
|
||||
# segment.linetype = 4,
|
||||
|
||||
# segment.curvature = -1e-20,
|
||||
# arrow = arrow(length = unit(0.015, "npc"))
|
||||
) +
|
||||
theme(legend.position="right", legend.text = element_text(size=8), legend.title = element_text(size = 8),
|
||||
plot.title = element_text(size=19)) +
|
||||
# geom_smooth()+
|
||||
guides(fill = guide_legend(nrow=2,byrow=TRUE)) +
|
||||
scale_y_continuous(breaks=seq(0,max(label_data$cumulative_CI),100)) +
|
||||
scale_x_continuous(breaks=seq(0,max(max_day$max_day),30)) + theme(axis.text.x = element_text(angle = 90)) +
|
||||
labs(x = "Weeks")+
|
||||
theme(legend.position = "none")
|
||||
|
||||
```
|
||||
# Yield prediction
|
||||
The below table shows estimates of the biomass if you would harvest them now.
|
||||
```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE}
|
||||
CI_quadrant <- readRDS(here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
|
||||
rename( pivot_quadrant = field)#All_pivots_Cumulative_CI.rds
|
||||
ggplot(CI_quadrant %>% filter(pivot %in% "1.11")) +
|
||||
geom_line(aes(DOY, cumulative_CI, col = as.factor(season))) +
|
||||
facet_wrap(~pivot_quadrant)
|
||||
```{r eval=FALSE, message=FALSE, warning=FALSE, include=TRUE}
|
||||
CI_quadrant <- readRDS(here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
|
||||
pivots_dates0 <- pivots_dates0 %>% ungroup() %>% unique() %>%
|
||||
dplyr::select(field, sub_field, Tcha_2021, Tcha_2022 ) %>%
|
||||
pivot_longer(cols = c("Tcha_2021", "Tcha_2022"), names_to = "Tcha_Year", values_to = "Tcha") %>%
|
||||
filter(Tcha > 50) %>%
|
||||
mutate(season = as.integer(str_extract(Tcha_Year, "\\d+")))
|
||||
harvesting_data <- harvesting_data %>% rename(season = year)
|
||||
|
||||
CI_and_yield <- left_join(CI_quadrant , pivots_dates0, by = c("pivot", "pivot_quadrant", "season")) %>% filter(!is.na(Tcha)) %>%
|
||||
group_by(pivot_quadrant, season) %>% slice(which.max(DOY)) %>%
|
||||
dplyr::select(pivot, pivot_quadrant, Tcha_Year, Tcha, cumulative_CI, DOY, season) %>%
|
||||
CI_and_yield <- left_join(CI_quadrant , harvesting_data, by = c("field", "sub_field", "season")) %>% #filter(!is.na(tonnage_ha)) %>%
|
||||
group_by(sub_field, season) %>% slice(which.max(DOY)) %>%
|
||||
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
|
||||
mutate(CI_per_day = cumulative_CI/ DOY)
|
||||
|
||||
ggplot(CI_and_yield) +
|
||||
geom_point(aes(Tcha, CI_per_day, col = Tcha_Year ))
|
||||
|
||||
|
||||
set.seed(20)
|
||||
CI_and_yield_split <- initial_split(CI_and_yield, prop = 0.75, strata = pivot_quadrant)
|
||||
CI_and_yield_test <- training(CI_and_yield_split)
|
||||
CI_and_yield_validation <- testing(CI_and_yield_split)
|
||||
|
||||
|
||||
predictors <- c( "cumulative_CI" , "DOY" ,"CI_per_day" )
|
||||
response <- "Tcha"
|
||||
CI_and_yield_test <- as.data.frame(CI_and_yield_test)
|
||||
response <- "tonnage_ha"
|
||||
# CI_and_yield_test <- as.data.frame(CI_and_yield_test)
|
||||
CI_and_yield_test <- CI_and_yield %>% as.data.frame() %>% filter(!is.na(tonnage_ha))
|
||||
CI_and_yield_validation <- CI_and_yield_test
|
||||
prediction_yields <- CI_and_yield %>% as.data.frame() %>% filter(is.na(tonnage_ha))
|
||||
|
||||
ctrl <- trainControl(method="cv",
|
||||
savePredictions = TRUE,
|
||||
|
|
@ -368,46 +306,52 @@ model_ffs_rf <- ffs( CI_and_yield_test[,predictors],
|
|||
na.rm = TRUE
|
||||
)
|
||||
|
||||
pred_ffs_rf <-
|
||||
predict(model_ffs_rf, newdata = CI_and_yield_validation) %>% as.data.frame() %>% rename(predicted_Tcha = ".") %>% mutate(
|
||||
pivot_quadrant = CI_and_yield_validation$pivot_quadrant,
|
||||
pivot = CI_and_yield_validation$pivot,
|
||||
Age_days = CI_and_yield_validation$DOY,
|
||||
total_CI = round(CI_and_yield_validation$cumulative_CI, 0),
|
||||
|
||||
# Function to prepare predictions
|
||||
prepare_predictions <- function(predictions, newdata) {
|
||||
return(predictions %>%
|
||||
as.data.frame() %>%
|
||||
rename(predicted_Tcha = ".") %>%
|
||||
mutate(sub_field = newdata$sub_field,
|
||||
field = newdata$field,
|
||||
Age_days = newdata$DOY,
|
||||
total_CI = round(newdata$cumulative_CI, 0),
|
||||
predicted_Tcha = round(predicted_Tcha, 0),
|
||||
season = CI_and_yield_validation$season
|
||||
) %>% dplyr::select(pivot , pivot_quadrant, Age_days, total_CI, predicted_Tcha, season) %>%
|
||||
left_join(., CI_and_yield_validation, by = c("pivot", "pivot_quadrant", "season")) %>%
|
||||
filter(Age_days > 250)
|
||||
season = newdata$season) %>%
|
||||
dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>%
|
||||
left_join(., newdata, by = c("field", "sub_field", "season")))
|
||||
}
|
||||
# Predict yields for the validation dataset
|
||||
pred_ffs_rf <- prepare_predictions(predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation)
|
||||
|
||||
|
||||
|
||||
prediction_2023 <- CI_quadrant %>% filter(season == "2023") %>% group_by(pivot_quadrant) %>% slice(which.max(DOY))%>%
|
||||
mutate(CI_per_day = cumulative_CI/ DOY)
|
||||
|
||||
pred_rf_2023 <- predict(model_ffs_rf, newdata=prediction_2023) %>%
|
||||
as.data.frame() %>% rename(predicted_Tcha_2023 = ".") %>% mutate(pivot_quadrant = prediction_2023$pivot_quadrant,
|
||||
pivot = prediction_2023$pivot,
|
||||
Age_days = prediction_2023$DOY,
|
||||
total_CI = round(prediction_2023$cumulative_CI,0),
|
||||
predicted_Tcha_2023 = round(predicted_Tcha_2023, 0)) %>%
|
||||
# Predict yields for the current season
|
||||
pred_rf_current_season <- prepare_predictions(predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
|
||||
filter(Age_days > 300) %>%
|
||||
dplyr::select(pivot ,pivot_quadrant, Age_days, total_CI, predicted_Tcha_2023)%>%
|
||||
mutate(CI_per_day = round(total_CI/ Age_days, 1))
|
||||
|
||||
|
||||
|
||||
mutate(CI_per_day = round(total_CI / Age_days, 1))
|
||||
```
|
||||
|
||||
```{r yield_plaatjes, eval=FALSE, include=FALSE}
|
||||
ggplot(pred_ffs_rf, aes(y = predicted_Tcha , x = Tcha , col = pivot )) +
|
||||
geom_point() +geom_abline() +
|
||||
scale_x_continuous(limits = c(50, 160))+
|
||||
scale_y_continuous(limits = c(50, 160)) +
|
||||
labs(title = "Model trained and tested on historical results - RF")
|
||||
```{r yield_plaatjes, eval=FALSE, include=TRUE}
|
||||
ggplot(pred_ffs_rf, aes(y = predicted_Tcha, x = tonnage_ha, col = sub_area)) +
|
||||
geom_point(size = 2, alpha = 0.6) + # Adjust point size and transparency
|
||||
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") + # Reference line
|
||||
scale_x_continuous(limits = c(0, 200)) +
|
||||
scale_y_continuous(limits = c(0, 200)) +
|
||||
labs(title = "Model Performance: \nPredicted vs Actual Tonnage/ha",
|
||||
x = "Actual tonnage/ha (Tcha)",
|
||||
y = "Predicted tonnage/ha (Tcha)") +
|
||||
theme_minimal()
|
||||
|
||||
ggplot(pred_rf_2023, aes(total_CI , predicted_Tcha_2023 , col = pivot )) +
|
||||
geom_point() + labs(title = "2023 data (still to be harvested) - fields over 300 days old")
|
||||
ggplot(pred_rf_current_season, aes(x = Age_days, y = predicted_Tcha, col = field)) +
|
||||
geom_point(size = 2, alpha = 0.6) + # Adjust point size and transparency
|
||||
labs(title = "Predicted Yields for Fields Over 300 Days \nOld Yet to Be Harvested",
|
||||
x = "Age (days)",
|
||||
y = "Predicted tonnage/ha (Tcha)") +
|
||||
facet_wrap(~sub_area) +
|
||||
scale_y_continuous(limits = c(0, 200)) + # Optional: Set limits for y-axis
|
||||
theme_minimal()
|
||||
|
||||
knitr::kable(pred_rf_2023)
|
||||
knitr::kable(pred_rf_current_season,
|
||||
digits = 0,
|
||||
caption = "Predicted Tonnage/ha for Fields Over 300 Days Old")
|
||||
```
|
||||
|
||||
|
|
|
|||
91
r_app/counting_clouds.R
Normal file
91
r_app/counting_clouds.R
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
# Required packages
|
||||
# library(ggplot2)
|
||||
# library(dplyr)
|
||||
raster_files_NEW <- list.files(merged_final,full.names = T, pattern = ".tif")
|
||||
|
||||
# Extracting the dates from vrt_list (assuming the format "YYYY-MM-DD.vrt" at the end)
|
||||
no_cloud_dates <- as.Date(sapply(raster_files_NEW, function(x) {
|
||||
sub(".*/([0-9]{4}-[0-9]{2}-[0-9]{2})\\.tif", "\\1", x)
|
||||
}))
|
||||
|
||||
# Generate a full sequence of dates in the range
|
||||
start_date <- min(no_cloud_dates)
|
||||
end_date <- max(no_cloud_dates)
|
||||
all_dates <- seq(start_date, end_date, by = "day")
|
||||
|
||||
# Create a data frame marking no clouds (1) and clouds (0)
|
||||
cloud_data <- data.frame(
|
||||
date = all_dates,
|
||||
cloud_status = ifelse(all_dates %in% no_cloud_dates, 1, 0)
|
||||
)
|
||||
|
||||
# Plot the data
|
||||
ggplot(cloud_data, aes(x = date, y = cloud_status)) +
|
||||
geom_point() +
|
||||
labs(x = "Date", y = "Cloud Status (1 = No Cloud, 0 = Cloud)") +
|
||||
scale_y_continuous(breaks = c(0, 1)) +
|
||||
theme_minimal()
|
||||
|
||||
# Updated ggplot code to display months on the x-axis
|
||||
ggplot(cloud_data, aes(x = date, y = cloud_status)) +
|
||||
geom_point() +
|
||||
scale_x_date(date_labels = "%b", date_breaks = "1 month") +
|
||||
labs(x = "Month", y = "Cloud Status (1 = No Cloud, 0 = Cloud)") +
|
||||
scale_y_continuous(breaks = c(0, 1)) +
|
||||
theme_minimal()
|
||||
|
||||
# Group data by year and week
|
||||
cloud_data <- cloud_data %>%
|
||||
mutate(week = isoweek(date), year = year(date)) %>%
|
||||
group_by(year, week) %>%
|
||||
summarise(no_cloud_days = sum(cloud_status == 1),
|
||||
cloud_days = sum(cloud_status == 0))
|
||||
|
||||
# 1. Show how many weeks per year have no images (clouds for all 7 days)
|
||||
weeks_no_images <- cloud_data %>%
|
||||
filter(cloud_days == 7)
|
||||
|
||||
# Plot weeks with no images
|
||||
ggplot(weeks_no_images, aes(x = week, y = year)) +
|
||||
geom_tile(fill = "red") +
|
||||
labs(x = "Week", y = "Year", title = "Weeks with No Images (Full Cloud Cover)") +
|
||||
theme_minimal()
|
||||
|
||||
|
||||
# 2. Determine when most clouds are present (cloud_days > no_cloud_days)
|
||||
weeks_most_clouds <- cloud_data %>%
|
||||
filter(cloud_days > no_cloud_days)
|
||||
|
||||
# Plot when most clouds are present
|
||||
ggplot(weeks_most_clouds, aes(x = week, y = year)) +
|
||||
geom_tile(fill = "blue") +
|
||||
labs(x = "Week", y = "Year", title = "Weeks with Most Clouds") +
|
||||
theme_minimal()
|
||||
|
||||
# Group weeks by number of cloud days and count how many weeks had 0-7 cloud days
|
||||
weeks_by_cloud_days <- cloud_data %>%
|
||||
group_by(cloud_days) %>%
|
||||
summarise(week_count = n())
|
||||
|
||||
# Display the summary
|
||||
print(weeks_by_cloud_days)
|
||||
|
||||
# Optional: Plot the results to visualise how many weeks had 0-7 cloud days
|
||||
ggplot(weeks_by_cloud_days, aes(x = cloud_days, y = week_count)) +
|
||||
geom_bar(stat = "identity", fill = "skyblue") +
|
||||
labs(x = "Number of Cloud Days (per week)", y = "Number of Weeks",
|
||||
title = "Distribution of Cloud Days per Week") +
|
||||
theme_minimal()
|
||||
|
||||
weeks_by_no_cloud_days <- cloud_data %>%
|
||||
mutate(no_cloud_days = 7 - cloud_days) %>%
|
||||
group_by(no_cloud_days) %>%
|
||||
summarise(week_count = n())
|
||||
|
||||
# Plot the results to visualise how many weeks had 0-7 cloud-free days
|
||||
ggplot(weeks_by_no_cloud_days, aes(x = no_cloud_days, y = week_count)) +
|
||||
geom_bar(stat = "identity", fill = "#00A799") +
|
||||
geom_text(aes(label = week_count), vjust = -0.5, size = 4) + # Add the count of weeks on top of bars
|
||||
labs(x = "Number of Cloud-Free Days (per week)", y = "Number of Weeks",
|
||||
title = "Distribution of Cloud-Free Days per Week") +
|
||||
theme_minimal()
|
||||
|
|
@ -90,7 +90,7 @@ ci_plot <- function(pivotName){
|
|||
|
||||
cum_ci_plot <- function(pivotName){
|
||||
|
||||
# pivotName = "1.1"
|
||||
# pivotName = "3a13"
|
||||
data_ci <- CI_quadrant %>% filter(field == pivotName)
|
||||
|
||||
if (nrow(data_ci) == 0) {
|
||||
|
|
@ -125,6 +125,75 @@ cum_ci_plot <- function(pivotName){
|
|||
|
||||
}
|
||||
|
||||
cum_ci_plot <- function(pivotName, plot_type = "value", facet_on = TRUE) {
|
||||
|
||||
# pivotName = "3a13"
|
||||
data_ci <- CI_quadrant %>% filter(field == pivotName)
|
||||
|
||||
if (nrow(data_ci) == 0) {
|
||||
return(cum_ci_plot2(pivotName)) # Return an empty data frame if no data is found
|
||||
}
|
||||
|
||||
data_ci2 <- data_ci %>%
|
||||
mutate(CI_rate = cumulative_CI / DOY,
|
||||
week = week(Date)) %>%
|
||||
group_by(field) %>%
|
||||
mutate(mean_CIrate_rolling_10_days = rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
|
||||
mean_rolling_10_days = rollapplyr(value, width = 10, FUN = mean, partial = TRUE))
|
||||
|
||||
data_ci2 <- data_ci2 %>% mutate(season = as.factor(season))
|
||||
|
||||
date_preperation_perfect_pivot <- data_ci2 %>%
|
||||
group_by(season) %>%
|
||||
summarise(min_date = min(Date),
|
||||
max_date = max(Date),
|
||||
days = max_date - min_date)
|
||||
|
||||
unique_seasons <- sort(unique(date_preperation_perfect_pivot$season), decreasing = TRUE)[1:3]
|
||||
|
||||
# Determine the y aesthetic based on the plot type
|
||||
y_aesthetic <- switch(plot_type,
|
||||
"CI_rate" = "mean_CIrate_rolling_10_days",
|
||||
"cumulative_CI" = "cumulative_CI",
|
||||
"value" = "mean_rolling_10_days")
|
||||
|
||||
y_label <- switch(plot_type,
|
||||
"CI_rate" = "10-Day Rolling Mean CI Rate (cumulative CI / age)",
|
||||
"cumulative_CI" = "Cumulative CI",
|
||||
"value" = "10-Day Rolling Mean CI")
|
||||
|
||||
if (facet_on) {
|
||||
g <- ggplot(data = data_ci2 %>% filter(season %in% unique_seasons)) +
|
||||
facet_wrap(~season, scales = "free_x") +
|
||||
geom_line(aes_string(x = "Date", y = y_aesthetic, col = "sub_field", group = "sub_field")) +
|
||||
labs(title = paste("Plot of", y_label, "for Pivot", pivotName),
|
||||
color = "Field Name",
|
||||
y = y_label) +
|
||||
scale_x_date(date_breaks = "1 month", date_labels = "%m-%Y") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 60, hjust = 1),
|
||||
legend.justification = c(1, 0), legend.position = c(1, 0),
|
||||
legend.title = element_text(size = 8),
|
||||
legend.text = element_text(size = 8)) +
|
||||
guides(color = guide_legend(nrow = 2, byrow = TRUE))
|
||||
} else {
|
||||
g <- ggplot(data = data_ci2 %>% filter(season %in% unique_seasons)) +
|
||||
geom_line(aes_string(x = "DOY", y = y_aesthetic, col = "season", group = "season")) +
|
||||
labs(title = paste("Plot of", y_label, "for Pivot", pivotName),
|
||||
color = "Season",
|
||||
y = y_label,
|
||||
x = "Age of Crop (Days)") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 60, hjust = 1),
|
||||
legend.justification = c(1, 0), legend.position = c(1, 0),
|
||||
legend.title = element_text(size = 8),
|
||||
legend.text = element_text(size = 8)) +
|
||||
guides(color = guide_legend(nrow = 2, byrow = TRUE))
|
||||
}
|
||||
|
||||
subchunkify(g, 3.2, 10)
|
||||
}
|
||||
|
||||
cum_ci_plot2 <- function(pivotName){
|
||||
end_date <- Sys.Date()
|
||||
start_date <- end_date %m-% months(11) # 11 months ago from end_date
|
||||
|
|
|
|||
Loading…
Reference in a new issue