From c1a32bf6ad3c38be07bf89968e1a474ef8789aba Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 29 Oct 2024 10:01:37 +0100 Subject: [PATCH] Refactor cum_ci_plot function for enhanced plotting options; added parameters for plot type and season filtering; improved x-axis labeling and updated ggplot aesthetics. Also, continued work on yield prediction in R. --- .../tests/__fixtures__/Muhoroni/harvest.xlsx | Bin 24105 -> 27033 bytes r_app/CI_report_dashboard_planet.Rmd | 162 ++++++------------ r_app/counting_clouds.R | 91 ++++++++++ r_app/report_utils.R | 71 +++++++- 4 files changed, 214 insertions(+), 110 deletions(-) create mode 100644 r_app/counting_clouds.R diff --git a/laravel_app/tests/__fixtures__/Muhoroni/harvest.xlsx b/laravel_app/tests/__fixtures__/Muhoroni/harvest.xlsx index 6309f204bc71b96a9e8d653f7fc3aac1d13a2519..fda9d9e12840713d17571ba2b6ef4091e4be0eb5 100644 GIT binary patch delta 4219 zcmZ`+byO5yx1Aw%2uJA<80khp!9l>Gn<1o2$)QUcMq-d2VCa@skRCulL6DYiq?GRN zdiwp|TJOE}zHgtk*ShE2b^f?#?R(eVADV!vBftt1C?5#$4GU=eGmXo(I+YFpH=rCQUiE_ zm|YoK98Mqx4P~i>;{%bz(W*qP0$`clpj*h+7q;U(IYfdIc@wu6$+`|vUzxB5-hOlX z$exv%$(P#@Sd&YXI401w@CB2%qi+LUy@?u%;65gAoJ7UuE%iy`dcNag?VIlQ@61_b z1aXlPt5dXRW!rKhoq49%DQ!me}*aE?a>fk)ammf>oWKHAZTq zjD0dr^h5Nwf~ZAf*NI~W-b>vVO0H$vtk>L|ZM?)xo=5-&rcKUP+N1Oi1_S_M8L5R=G!JaAi5he!8jFFP(Ku#C@G~9Wnt6M&sQRH$=05RBw z%2d;TYT+#ZB<8-BLR>af=(K7@8W0-G^L;GdxiVEuIA2&KMCM5gi}_Q13cn*Z%O3Ub zbmkPXZ?cjtf>^LB@qQYEByKS$WbV++DzaWbQKQ!hO*;(P<@WZ*D#M)hMOMc&7h{f@ z`_(FKD5P#2J579W#W5egq}*uTbjczf-*!HRu-?dDj7uvqloEy0ht=`Tii{9+B-V=h z)F;1B`dA}TmXms1mRF$2HxyOMx`NWD(-LA!G$H2)7yh)m9Z59`_gB8*YN%iK=El%6 zbnMST=cN_h_$qRQJB^a?DKp0OB5A$8aRISL6gn}rE*@KOko*gaxa%77^Bc-*v40J+ z8m(}SgTs7)iBJwjA_}D&SM6Clf9D6FeIC?0h}o@5(|th7WluCSglf1s_?EN8+J0b> zk0#Mp_JI~^&3w7S4>$J#X~GD=Yf{G}u;gu2A9;xV_9N1_=GaqzBIC`Vh;WzttYJYM z!6zvJZhpTW#f?UX)vL{4euVk&el(!7u4W&sr|7uXbW4719Sl5aFpb^+rj@^Bwmu0h z=1Ki7uZ&JCI-f;fWNBM!n5?!x;@9h=<}W{=IL{6!i@f}TFmI)YUL~CIT1TcOHd$m) zSbt~Ep*_)NvNu}mI9Vny=+3WB@mK@m?~n!rq(DuNHW>doH%N5}=Mn!O1wHteg7ASB zPUdh|CucYQf28z}f`HXvI1Uh*AJ=Q!CIFT_Mco7s%?;_rU^pg816X2Z-w)RKzUqyE z#7E`V-z~T`Nfk!FX~d)qrMk)QZ?KTgwP4p1RNw-@B^Uh<4H$=@IZrxd;B)iT(kYl? zR*sN}O`?Qkd%3}}(a#vV0Z6{$;M{aTc;$x^BLN{7%XO~k;UIm_Fd4C`?J*5! z#A9H}uML~h2LZ^o_e4(5CYvAGy^Qu)G56fbbfmp)9WY_G*JWQTiHfL-VmjzfqYMDR zc1B|R{q+E`8RT?npPJ()(0zE`@pq}dz=xsR_ZvPvd{faYeTyhn`-K++6v$rHprOnU zA?cY3sb1(9CYSR>O8w)>(DJhUZ8ZFsZ2b#qBp;7dvWXadX_#Mk?i+-KEJx? zVw^mn^3Uz8R@b~kfKk*Y2AME3jZFP{!5~jDKzVXZwjn+y63T@%bowbu8Ng_E6TINB z!=ry7D>n;EtEv8W6dG>rF;76Q*goA8FQ5B>F!FlWS0UoTn@Z;HDcQYe@;&xeJHFkV8H<)z^;^Nd)5fJqfs%9N6Bb*Fi=+787ZHa}_y2z}6=?r37K zvra3)$0x;Cy1wg3q3MynrKjJ{8M?k3PS<(XrH8+%u|IR*P2OFp>+jndzYu6N`CKbf z&9gb}+GICzkkQ`Aw!K?WbGV{gC)|ecGJ7qwi?II=>B}b8Q~>4%LFL69Mu(aoV=E*q|d$A_QBCsZdgAK)=mX@5{w|PBAq4 z${&bBhY5SmK3fomylQJhm5o0zD^SGowMMA5cQWPQ?-q4;hh?<5WfaKYO%rny*^9** z;f>E^^MeDC6N~n2Shjxp^I$x(C0=}AAD;#v*RuGdKyz#L+ne<~ZE^g`$D6l1@BQAi zK-Mo44+3Y>!xwp}4pFNq7GDokCQcrXNOM~>9GX$$ad*P%M@c6ciyPFbYL|Tqt$!x1 z=eExr8ChFCaFU&U_6~vSSp=QFW3iModZ=A~T)0t>{CV^xiGLj-^SK)Yd`d=wg9WS| zL2~7HK%;q4H>tt%9Fh~>9G90NuaB5glGC>rP`L_~FYPsH?b(bhB{Ux7`ld+)!Th;Y z?z;0wHJfD=<&ci&W6^5A?{S#p(ui(eK2+j7H|+9gwpRZYDPc}xnC-v==lD3wd479xJ-t&P5_t0v8%5f zWbhr!?EElb`F=>gq#HR39HYd4#Yn_$VcMRPjH~?fz8Z~}hA0V*F=@SN0%(!x8B^_| zi_5d?2u*@Q7hD>Hw6|^=^|iRL-7uu);BaG%d4S1Y{O5zq=LC*y>=_aU?;gMJ?6bgM zN6JvkcA*!R1?eEKJi-R6e@LrWO`<~JU4F_{;NclzxgjEVPT3`8dU2`*&;hYocrgQw zRvJty^z%#y z7;!p+QJR>Cv4KIFT{gbO6Et6`T0a-aBRvAEjuq&+1ZLF)XrOE^Rl4J?${h`v#|F|a z=PjF2wf=M4kk2!@gbY6i8?0_70@VWvA`A+@mJ5);6A<`69@0-puF6(f9N2d+t;csC zvTCWP+^~*oNcjh~kl7MuS`zjnupn?%&eN8pdj&ygqmg^|w!agXrJtz`4hpj8BOC1C%uTrCM_6^_h>{sPg4~W?93pF5XTUPQrxB z-?E~)p472BS@t+1K=1kIV!);{NFRN~Qmz)W-Nyb+mu3hJYp4K4jFz$Z@!tD0T@Xu@ zAy9+^hvU%;#TDxg<_OoY`_<=0)q1?4&JmH)a@?4*L4&VRNu+XwMWpOrT)7@_)36Ox zncBQTwN9}BZHiM34awVL)vIf!{0S)vDDGhl6hmrpAwLK$*(EHB`B)x;v?CyyfOOlj zmy?dFr5dqcqu^g&3zm%4QplY2g2&dv=I|*s)X72OhZVFXL~-6Eo}+v-+!hywWya4h zI8>=-QL$H)$Vg0)UZmr3*c4BIMaAYA|JvBf6|#8&`PFpLaU_Y-{M5NlQmZu*A6M=P z>ToaeU>bMvqU^}^x{ z|3~%(?=ALcH7W|-Rno18B{MpziQ(T2b<@K+NOIcmOA04R4E25Ppw53QCI5Kg8VnIH zhuc<(;4FHFHm&FCWhE;%kUPAFFbG$yok*+Z9xU}VHTk#t*Vic{)0752rosw3#HDfz zI|w*CNi$^YpqsrJ@U@1b6+yei@WRGN5O~kEX|clwSvaWRyf+*-HljZSB1cQ`LpUm( zrRTJ%89n}qng&G^eDYRVH>)lMzi^j_GwK?gz4{7=5!aIjjJ&I5KgCd-H%l)3lT|%8 zy)v~p4#ZNaP4dnvYaO|DzUrFF<<;d8nL&}yN=4d4%?Cf=u*NCJ+IG2g zjVLfF?$xY;EVV9svUGNXs)yxxpn+$FenNq9vYaZXEOJx7h4)LqUam3k`Nv{at8`IP zpSilPIYN4cI4OL&LQ-1Gs@JnCq^ZY6wR$rRsyV;unuQ78U7?&(24R0q=$Y(&?1eKv z-g8(BR-!>}In^SCzj6dut@uZj;BF_sRN)AXYNev6BaxwsTX+&WiMzOM#RcDFPO{^Q zBYIuQ*&T?!+31$xC*1@W!lKZmgi1`9is^zZpys=}j zk7mZ5_IuB~b{}5)@tRL!6GgNhDQk?dgywg`=ls1tgUeYnJq-R$#;)Sb^QtOEe zt6fd+w5rsWk=S5E=yQLViP?TXpDX$hhbI@x@Jm0QR;g>NXR?)U%{fdISRU~9E=$5Y zJX;7dWtkRi8-4X+q45k@pQxMzAfC;+v#VsKD|s`>nr5FU;t9#lopwxL|GJa5q$qpU z0XB+SngGoi`&~RiWBDhu#Osm-Cye^V>5U|MB)<2XHXK8wW3(e9cgV7u0ehei@jo7$ ztUi*Bt2k<~(lg7;sCu4J?yOAwqw-8GG8TkX{FL?FYB|axf5`c_5E3K%&p8g7xo_2^LCJMY$O!E=|kHll5kjvLiix-7+oj zl73r`JB@`_tPXXOG=WHqg6qfIemSbV!+L}F9P|y|=7rED2;?Uc`RRBnFDumgl#3#> z+}yt??qmzeoqEci3?y%ntoV1Khe-8%s zDd?xck1e!LwGvk-#2l#w=I6Y1S>ry@9C+7htUF4JB(`2D;ieskwMQ3w^)rrQuVP-L z+n18#S#jqfGKp-wt;#%M@3uT|9ynNy$RAbZHk7$%968r=lS@f48w{u0vU!~ZKdwNU(XEhFA|{5o$uCc1pPsz= z;dBV3ClV)q6s~3cMw2BH;>h*G$F-bNPqE}5-oC+fg`<#?knUcN;r)=ruZ|7y$SyZP4G%(mKx*{x;j_Ds{XTg|X< zP0cnI@0zr~7Sfu{S6hy>N3e&Z`r}VggQq_d3CAm5HIF6FYqc=D@sFCOS zn5eL)g?b#6y1x9JBhU(l0(fG~gM0<7iAk>?A5O&s-%rjz2&TXR{u{+v2prCT zD!|@y;py`^Ca&qa*n!Zx)D6?~3~U%PZ}4t*=(Ir;?{9-(lIlNgex5}4R}zvKvB19b z4=m!0KlS%dVAy{{$$k^L(>QD)pX|^V5>8tGfuI=gKNk^u5|fV+=33|?R$08y9EktD z;a~h&=2gte`0u3Ryu*ojs=Jh8hDFMMOrQ@@eAp5Xr2%A>uPF+A$S6f1{nIjodnnV$ zq122;xJ3EFYY-dj&m{D26aU zFL+ND>51ED<%cdgtz`=;0zU|l6F^;36cnzX90T+s>d0Oed*5{lRIUY?4`ph99HQU? z9n^;Ir!aWQC<{}D{27pHa*IgYbf)DMHG+Nn!=tamb;jL=|JK+mgrhh zi1$&x#~GL zOZ`$uBWsN|qY)}<6Ms7zshyfA+nH6~K34XCTQKb)NP$;jS%X!0DFqnOR2jE8o zzN(g*Y3M{L00Zthn7E2swM)uv2_48118CMVAB8|8A3k1!-5B(~mTxfzDWIg!8L+OO zOV4WYD)`t;0X-mpNXC#s3&Ui(0-b9@deA?LlPfCF_6ZNc{@R)@?W_d1OEVr2{3~WG z)me0#bM>45vMRm~otIjgUrv!g%KLALWA74x@6sj;(gOpc(W+HK1bzY_>K`VtY z8y;&m`v@jLAEW3?dK?&cer2s$)s-{q?!Ys^!MnZlwH$3kInh9E8AbqDv?9@C-C8lM@;yhvZ03xyH3=vutOM07^-Hf+Dvuv4TSu1 zs5(fBe8aD?Jm*!Hp$??8ZB!>}BH3?tzc*eKNx2e#eX<#M`{TUe>6=Bnc!k;Bk|;h? z#^^n>Xcw?5uoc094Y=yXL=AK6o@T@HKFmBQ8<35ZK-4z1+b3t1?<$>&v;`$<6Q#}S zMTyrqy|PoE2F>wn9cmO99v=+}th9y&?>K!g`y}O)@@aalbf4oG2*rL8cjsOXt*;PJ1e?XsXXsgZGurO9tYx5xBZQ(BvDB-Rj%p=7D#dXz8yTx~zDV?JmO!DxOP;$7-%73=x zVA5QrkQDmp2ANs84`C_I-4|)Pe&GFo>S(el5vOq+I&?EpmZqu(vZ=?Z((rW5X9VeY5 z!NbF5?@>=?#m;&@RMZ^{oGEO_+a=db;7UFUiqgA!*|3!NHF94H4R1^<5pzqeY0RRAf z1ONaS04S1Y8Iy)sD1Wq+Uu)Yi6vf{M`wm9$vMm3LRO*zF5GbrLwhsEL$k&OOB^gOx zHpagDWVd5?@eMkJFkBGgP*RTSr8k?f+W&1lxm?_6Gm}m@ER#S-D1X!r-QIG~xw?Ao<~)t7Hwe*s*CCqa zXa!DrZQY?m$7FDgHhnucbMP~SWFfAWgp1AGp&_NYAtb5+CKd}U+nVv^(7q>ibWm3b#INn?5 zdnjqn1s0U%n9H)jg0mWHQ3EenC7_ne$`jEx&W;^Q;RuKXV8~hJWczhjGu@N{+=z{T}rsY}(*q!!Qtr_d@@J(C+AL$4L}y2Y6J{dKLd=otX0#?F2T?H z%3EP%ERzj8D#;Uk;RbKg`@8s2tAT6VbAud0ngk!L9oB@H`oIM% - mutate(#line = substr(pivot, 1 , 1), - season = as.factor(season)) - -pivots_dates <- readRDS(here(harvest_dir, "harvest_data_new")) #%>% - -pvt_age <- pivots_dates %>% ungroup() %>% select(pivot, Age) %>% unique() -CI_all2 <- left_join(CI_all2, pvt_age , by = "pivot") %>% mutate(month = plyr::round_any((Age/4),2)) %>% - mutate(month = case_when(month > 16 ~ 18, - TRUE ~month)) %>% - group_by(pivot) %>% filter(Age == max(Age)) %>% ungroup() - -CI_all2$season <- ordered(CI_all2$season, levels = c("Data_2021", "Data_2022")) -# CI_all2 <- CI_all2 %>% mutate(season_order = as.integer(season)) -latest_model <- CI_all2 %>% group_by(pivot) %>% filter(season =="Data_2022") -# latest_model <- CI_all2 %>% group_by(pivot) %>% arrange(season, desc(DOY)) %>% slice(1) - -# CI_all2 <- CI_all %>% group_by(pivot, DOY ) %>% mutate(pivot_cumulative_CI = mean(cumulative_CI)) -# label_data <- CI_all2 %>% group_by(pivot) %>% arrange(season, desc(cumulative_CI)) %>% slice(1) %>% mutate(label = paste(pivot, " - ", round(cumulative_CI))) -label_data <- latest_model %>% arrange(season, desc(cumulative_CI)) %>% slice(1) %>% mutate(label = paste(pivot, " - ", round(cumulative_CI))) - -max_day <- label_data %>% group_by(pivot) %>% summarise(max_day = max(DOY)) - -ggplot(data= CI_all2%>% filter(season =="Data_2022"), aes(DOY, cumulative_CI, col = pivot)) + - facet_wrap(~month) + - geom_line() + - # scale_y_continuous(breaks = seq(0, max(label_data$cumulative_CI) + 100, by = 100)) + - labs(title = "Cumulative CI values over the year per pivot, split per 2-month age (rounded down)", x = "Days after harvest/planting (scale is per 2 weeks)", y = "Cumulative CI value", - color = "Line") + - geom_label_repel(data = label_data %>% filter(model %in% latest_model$model), aes(DOY, cumulative_CI, label = label), box.padding = 1, - # ylim = c(1300, NA), - max.overlaps = Inf - # segment.linetype = 4, - - # segment.curvature = -1e-20, - # arrow = arrow(length = unit(0.015, "npc")) - ) + - theme(legend.position="right", legend.text = element_text(size=8), legend.title = element_text(size = 8), - plot.title = element_text(size=19)) + - # geom_smooth()+ - guides(fill = guide_legend(nrow=2,byrow=TRUE)) + - scale_y_continuous(breaks=seq(0,max(label_data$cumulative_CI),100)) + - scale_x_continuous(breaks=seq(0,max(max_day$max_day),30)) + theme(axis.text.x = element_text(angle = 90)) + - labs(x = "Weeks")+ - theme(legend.position = "none") - -``` # Yield prediction The below table shows estimates of the biomass if you would harvest them now. -```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE} -CI_quadrant <- readRDS(here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% - rename( pivot_quadrant = field)#All_pivots_Cumulative_CI.rds -ggplot(CI_quadrant %>% filter(pivot %in% "1.11")) + - geom_line(aes(DOY, cumulative_CI, col = as.factor(season))) + - facet_wrap(~pivot_quadrant) +```{r eval=FALSE, message=FALSE, warning=FALSE, include=TRUE} +CI_quadrant <- readRDS(here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds")) -pivots_dates0 <- pivots_dates0 %>% ungroup() %>% unique() %>% - dplyr::select(field, sub_field, Tcha_2021, Tcha_2022 ) %>% - pivot_longer(cols = c("Tcha_2021", "Tcha_2022"), names_to = "Tcha_Year", values_to = "Tcha") %>% - filter(Tcha > 50) %>% - mutate(season = as.integer(str_extract(Tcha_Year, "\\d+"))) +harvesting_data <- harvesting_data %>% rename(season = year) -CI_and_yield <- left_join(CI_quadrant , pivots_dates0, by = c("pivot", "pivot_quadrant", "season")) %>% filter(!is.na(Tcha)) %>% - group_by(pivot_quadrant, season) %>% slice(which.max(DOY)) %>% - dplyr::select(pivot, pivot_quadrant, Tcha_Year, Tcha, cumulative_CI, DOY, season) %>% +CI_and_yield <- left_join(CI_quadrant , harvesting_data, by = c("field", "sub_field", "season")) %>% #filter(!is.na(tonnage_ha)) %>% + group_by(sub_field, season) %>% slice(which.max(DOY)) %>% + dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% mutate(CI_per_day = cumulative_CI/ DOY) -ggplot(CI_and_yield) + - geom_point(aes(Tcha, CI_per_day, col = Tcha_Year )) - - -set.seed(20) -CI_and_yield_split <- initial_split(CI_and_yield, prop = 0.75, strata = pivot_quadrant) -CI_and_yield_test <- training(CI_and_yield_split) -CI_and_yield_validation <- testing(CI_and_yield_split) - - predictors <- c( "cumulative_CI" , "DOY" ,"CI_per_day" ) -response <- "Tcha" -CI_and_yield_test <- as.data.frame(CI_and_yield_test) +response <- "tonnage_ha" +# CI_and_yield_test <- as.data.frame(CI_and_yield_test) +CI_and_yield_test <- CI_and_yield %>% as.data.frame() %>% filter(!is.na(tonnage_ha)) +CI_and_yield_validation <- CI_and_yield_test +prediction_yields <- CI_and_yield %>% as.data.frame() %>% filter(is.na(tonnage_ha)) ctrl <- trainControl(method="cv", savePredictions = TRUE, @@ -368,46 +306,52 @@ model_ffs_rf <- ffs( CI_and_yield_test[,predictors], na.rm = TRUE ) -pred_ffs_rf <- - predict(model_ffs_rf, newdata = CI_and_yield_validation) %>% as.data.frame() %>% rename(predicted_Tcha = ".") %>% mutate( - pivot_quadrant = CI_and_yield_validation$pivot_quadrant, - pivot = CI_and_yield_validation$pivot, - Age_days = CI_and_yield_validation$DOY, - total_CI = round(CI_and_yield_validation$cumulative_CI, 0), - predicted_Tcha = round(predicted_Tcha, 0), - season = CI_and_yield_validation$season - ) %>% dplyr::select(pivot , pivot_quadrant, Age_days, total_CI, predicted_Tcha, season) %>% - left_join(., CI_and_yield_validation, by = c("pivot", "pivot_quadrant", "season")) %>% - filter(Age_days > 250) +# Function to prepare predictions +prepare_predictions <- function(predictions, newdata) { + return(predictions %>% + as.data.frame() %>% + rename(predicted_Tcha = ".") %>% + mutate(sub_field = newdata$sub_field, + field = newdata$field, + Age_days = newdata$DOY, + total_CI = round(newdata$cumulative_CI, 0), + predicted_Tcha = round(predicted_Tcha, 0), + season = newdata$season) %>% + dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>% + left_join(., newdata, by = c("field", "sub_field", "season"))) +} +# Predict yields for the validation dataset +pred_ffs_rf <- prepare_predictions(predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation) - -prediction_2023 <- CI_quadrant %>% filter(season == "2023") %>% group_by(pivot_quadrant) %>% slice(which.max(DOY))%>% - mutate(CI_per_day = cumulative_CI/ DOY) - -pred_rf_2023 <- predict(model_ffs_rf, newdata=prediction_2023) %>% - as.data.frame() %>% rename(predicted_Tcha_2023 = ".") %>% mutate(pivot_quadrant = prediction_2023$pivot_quadrant, - pivot = prediction_2023$pivot, - Age_days = prediction_2023$DOY, - total_CI = round(prediction_2023$cumulative_CI,0), - predicted_Tcha_2023 = round(predicted_Tcha_2023, 0)) %>% +# Predict yields for the current season +pred_rf_current_season <- prepare_predictions(predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% filter(Age_days > 300) %>% - dplyr::select(pivot ,pivot_quadrant, Age_days, total_CI, predicted_Tcha_2023)%>% - mutate(CI_per_day = round(total_CI/ Age_days, 1)) - - - + mutate(CI_per_day = round(total_CI / Age_days, 1)) ``` -```{r yield_plaatjes, eval=FALSE, include=FALSE} -ggplot(pred_ffs_rf, aes(y = predicted_Tcha , x = Tcha , col = pivot )) + - geom_point() +geom_abline() + - scale_x_continuous(limits = c(50, 160))+ - scale_y_continuous(limits = c(50, 160)) + - labs(title = "Model trained and tested on historical results - RF") +```{r yield_plaatjes, eval=FALSE, include=TRUE} +ggplot(pred_ffs_rf, aes(y = predicted_Tcha, x = tonnage_ha, col = sub_area)) + + geom_point(size = 2, alpha = 0.6) + # Adjust point size and transparency + geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") + # Reference line + scale_x_continuous(limits = c(0, 200)) + + scale_y_continuous(limits = c(0, 200)) + + labs(title = "Model Performance: \nPredicted vs Actual Tonnage/ha", + x = "Actual tonnage/ha (Tcha)", + y = "Predicted tonnage/ha (Tcha)") + + theme_minimal() -ggplot(pred_rf_2023, aes(total_CI , predicted_Tcha_2023 , col = pivot )) + - geom_point() + labs(title = "2023 data (still to be harvested) - fields over 300 days old") +ggplot(pred_rf_current_season, aes(x = Age_days, y = predicted_Tcha, col = field)) + + geom_point(size = 2, alpha = 0.6) + # Adjust point size and transparency + labs(title = "Predicted Yields for Fields Over 300 Days \nOld Yet to Be Harvested", + x = "Age (days)", + y = "Predicted tonnage/ha (Tcha)") + + facet_wrap(~sub_area) + + scale_y_continuous(limits = c(0, 200)) + # Optional: Set limits for y-axis + theme_minimal() -knitr::kable(pred_rf_2023) +knitr::kable(pred_rf_current_season, + digits = 0, + caption = "Predicted Tonnage/ha for Fields Over 300 Days Old") ``` + diff --git a/r_app/counting_clouds.R b/r_app/counting_clouds.R new file mode 100644 index 0000000..e4cd942 --- /dev/null +++ b/r_app/counting_clouds.R @@ -0,0 +1,91 @@ +# Required packages +# library(ggplot2) +# library(dplyr) +raster_files_NEW <- list.files(merged_final,full.names = T, pattern = ".tif") + +# Extracting the dates from vrt_list (assuming the format "YYYY-MM-DD.vrt" at the end) +no_cloud_dates <- as.Date(sapply(raster_files_NEW, function(x) { + sub(".*/([0-9]{4}-[0-9]{2}-[0-9]{2})\\.tif", "\\1", x) +})) + +# Generate a full sequence of dates in the range +start_date <- min(no_cloud_dates) +end_date <- max(no_cloud_dates) +all_dates <- seq(start_date, end_date, by = "day") + +# Create a data frame marking no clouds (1) and clouds (0) +cloud_data <- data.frame( + date = all_dates, + cloud_status = ifelse(all_dates %in% no_cloud_dates, 1, 0) +) + +# Plot the data +ggplot(cloud_data, aes(x = date, y = cloud_status)) + + geom_point() + + labs(x = "Date", y = "Cloud Status (1 = No Cloud, 0 = Cloud)") + + scale_y_continuous(breaks = c(0, 1)) + + theme_minimal() + +# Updated ggplot code to display months on the x-axis +ggplot(cloud_data, aes(x = date, y = cloud_status)) + + geom_point() + + scale_x_date(date_labels = "%b", date_breaks = "1 month") + + labs(x = "Month", y = "Cloud Status (1 = No Cloud, 0 = Cloud)") + + scale_y_continuous(breaks = c(0, 1)) + + theme_minimal() + +# Group data by year and week +cloud_data <- cloud_data %>% + mutate(week = isoweek(date), year = year(date)) %>% + group_by(year, week) %>% + summarise(no_cloud_days = sum(cloud_status == 1), + cloud_days = sum(cloud_status == 0)) + +# 1. Show how many weeks per year have no images (clouds for all 7 days) +weeks_no_images <- cloud_data %>% + filter(cloud_days == 7) + +# Plot weeks with no images +ggplot(weeks_no_images, aes(x = week, y = year)) + + geom_tile(fill = "red") + + labs(x = "Week", y = "Year", title = "Weeks with No Images (Full Cloud Cover)") + + theme_minimal() + + +# 2. Determine when most clouds are present (cloud_days > no_cloud_days) +weeks_most_clouds <- cloud_data %>% + filter(cloud_days > no_cloud_days) + +# Plot when most clouds are present +ggplot(weeks_most_clouds, aes(x = week, y = year)) + + geom_tile(fill = "blue") + + labs(x = "Week", y = "Year", title = "Weeks with Most Clouds") + + theme_minimal() + +# Group weeks by number of cloud days and count how many weeks had 0-7 cloud days +weeks_by_cloud_days <- cloud_data %>% + group_by(cloud_days) %>% + summarise(week_count = n()) + +# Display the summary +print(weeks_by_cloud_days) + +# Optional: Plot the results to visualise how many weeks had 0-7 cloud days +ggplot(weeks_by_cloud_days, aes(x = cloud_days, y = week_count)) + + geom_bar(stat = "identity", fill = "skyblue") + + labs(x = "Number of Cloud Days (per week)", y = "Number of Weeks", + title = "Distribution of Cloud Days per Week") + + theme_minimal() + +weeks_by_no_cloud_days <- cloud_data %>% + mutate(no_cloud_days = 7 - cloud_days) %>% + group_by(no_cloud_days) %>% + summarise(week_count = n()) + +# Plot the results to visualise how many weeks had 0-7 cloud-free days +ggplot(weeks_by_no_cloud_days, aes(x = no_cloud_days, y = week_count)) + + geom_bar(stat = "identity", fill = "#00A799") + + geom_text(aes(label = week_count), vjust = -0.5, size = 4) + # Add the count of weeks on top of bars + labs(x = "Number of Cloud-Free Days (per week)", y = "Number of Weeks", + title = "Distribution of Cloud-Free Days per Week") + + theme_minimal() diff --git a/r_app/report_utils.R b/r_app/report_utils.R index 5aff7f2..3759e21 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -90,7 +90,7 @@ ci_plot <- function(pivotName){ cum_ci_plot <- function(pivotName){ - # pivotName = "1.1" + # pivotName = "3a13" data_ci <- CI_quadrant %>% filter(field == pivotName) if (nrow(data_ci) == 0) { @@ -125,6 +125,75 @@ cum_ci_plot <- function(pivotName){ } +cum_ci_plot <- function(pivotName, plot_type = "value", facet_on = TRUE) { + + # pivotName = "3a13" + data_ci <- CI_quadrant %>% filter(field == pivotName) + + if (nrow(data_ci) == 0) { + return(cum_ci_plot2(pivotName)) # Return an empty data frame if no data is found + } + + data_ci2 <- data_ci %>% + mutate(CI_rate = cumulative_CI / DOY, + week = week(Date)) %>% + group_by(field) %>% + mutate(mean_CIrate_rolling_10_days = rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE), + mean_rolling_10_days = rollapplyr(value, width = 10, FUN = mean, partial = TRUE)) + + data_ci2 <- data_ci2 %>% mutate(season = as.factor(season)) + + date_preperation_perfect_pivot <- data_ci2 %>% + group_by(season) %>% + summarise(min_date = min(Date), + max_date = max(Date), + days = max_date - min_date) + + unique_seasons <- sort(unique(date_preperation_perfect_pivot$season), decreasing = TRUE)[1:3] + + # Determine the y aesthetic based on the plot type + y_aesthetic <- switch(plot_type, + "CI_rate" = "mean_CIrate_rolling_10_days", + "cumulative_CI" = "cumulative_CI", + "value" = "mean_rolling_10_days") + + y_label <- switch(plot_type, + "CI_rate" = "10-Day Rolling Mean CI Rate (cumulative CI / age)", + "cumulative_CI" = "Cumulative CI", + "value" = "10-Day Rolling Mean CI") + + if (facet_on) { + g <- ggplot(data = data_ci2 %>% filter(season %in% unique_seasons)) + + facet_wrap(~season, scales = "free_x") + + geom_line(aes_string(x = "Date", y = y_aesthetic, col = "sub_field", group = "sub_field")) + + labs(title = paste("Plot of", y_label, "for Pivot", pivotName), + color = "Field Name", + y = y_label) + + scale_x_date(date_breaks = "1 month", date_labels = "%m-%Y") + + theme_minimal() + + theme(axis.text.x = element_text(angle = 60, hjust = 1), + legend.justification = c(1, 0), legend.position = c(1, 0), + legend.title = element_text(size = 8), + legend.text = element_text(size = 8)) + + guides(color = guide_legend(nrow = 2, byrow = TRUE)) + } else { + g <- ggplot(data = data_ci2 %>% filter(season %in% unique_seasons)) + + geom_line(aes_string(x = "DOY", y = y_aesthetic, col = "season", group = "season")) + + labs(title = paste("Plot of", y_label, "for Pivot", pivotName), + color = "Season", + y = y_label, + x = "Age of Crop (Days)") + + theme_minimal() + + theme(axis.text.x = element_text(angle = 60, hjust = 1), + legend.justification = c(1, 0), legend.position = c(1, 0), + legend.title = element_text(size = 8), + legend.text = element_text(size = 8)) + + guides(color = guide_legend(nrow = 2, byrow = TRUE)) + } + + subchunkify(g, 3.2, 10) +} + cum_ci_plot2 <- function(pivotName){ end_date <- Sys.Date() start_date <- end_date %m-% months(11) # 11 months ago from end_date