SmartCane/python_app/python_scripts/generate_ci_graphs_dashboard.py

916 lines
31 KiB
Python

"""
Generate Interactive CI Graphs Dashboard
=========================================
This script creates an interactive HTML dashboard with:
1. Historic CI trends by field/season (from RDS file)
2. Current and last week statistics (box plots, heatmaps, scatter)
3. Field and season selection dropdowns
Data Sources:
- Historic: laravel_app/storage/app/esa/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds
- Current: laravel_app/storage/app/esa/weekly_mosaic/week_*.tif
Usage:
python generate_ci_graphs_dashboard.py [project] [--current-week W] [--previous-week W] [--output-dir DIR]
Example:
python generate_ci_graphs_dashboard.py esa --current-week 43 --previous-week 42 --output-dir output
"""
import argparse
import json
import warnings
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
import rasterio
from rasterio.mask import mask
import geopandas as gpd
from shapely.geometry import box, shape
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
warnings.filterwarnings('ignore')
class CIGraphsDashboard:
"""Create interactive CI graphs dashboard with historic and current data."""
def __init__(self, project='esa', current_week=None, previous_week=None, output_dir='output'):
"""Initialize dashboard generator."""
self.project = project
self.current_week = current_week
self.previous_week = previous_week
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
# Data paths
self.base_dir = Path(__file__).parent.parent / 'laravel_app' / 'storage' / 'app' / project
self.weekly_mosaic_dir = self.base_dir / 'weekly_mosaic'
self.rds_file = self.base_dir / 'Data' / 'extracted_ci' / 'cumulative_vals' / 'All_pivots_Cumulative_CI_quadrant_year_v2.rds'
self.pivot_geojson = self.base_dir / 'Data' / 'pivot.geojson'
# Load data
self.field_gdf = None
self.historic_data = None
self.current_week_data = None
self.previous_week_data = None
print(f"Initialized CIGraphsDashboard for project: {project}")
print(f"Data directory: {self.base_dir}")
print(f"RDS file: {self.rds_file}")
def load_rds_file(self):
"""Load RDS file with historic CI values using pandas and pyreadr."""
try:
import pyreadr
print(f"Loading RDS file: {self.rds_file}")
result = pyreadr.read_r(str(self.rds_file))
# RDS files are stored as dict of dataframes
# Get the first (and usually only) dataframe
df_name = list(result.keys())[0]
self.historic_data = result[df_name]
print(f"Loaded historic data shape: {self.historic_data.shape}")
print(f"Columns: {self.historic_data.columns.tolist()}")
print(f"First few rows:\n{self.historic_data.head()}")
return self.historic_data
except ImportError:
print("pyreadr not installed. Attempting alternative approach...")
# Fallback: try using rpy2 to read RDS
try:
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
pandas2ri.activate()
robjects.r(f'data <- readRDS("{str(self.rds_file)}")')
self.historic_data = pandas2ri.rpy2py(robjects.r('data'))
print(f"Loaded historic data shape: {self.historic_data.shape}")
print(f"Columns: {self.historic_data.columns.tolist()}")
return self.historic_data
except ImportError:
print("rpy2 not installed either. Trying CSV fallback...")
# Try to find a CSV version of the data
csv_file = self.rds_file.with_suffix('.csv')
if csv_file.exists():
self.historic_data = pd.read_csv(csv_file)
print(f"Loaded historic data from CSV: {csv_file}")
return self.historic_data
else:
raise ValueError(f"Could not load RDS file: {self.rds_file}\n"
"Install pyreadr or rpy2 to read RDS files.")
def load_field_boundaries(self):
"""Load field boundaries GeoJSON."""
print(f"Loading field boundaries: {self.pivot_geojson}")
self.field_gdf = gpd.read_file(self.pivot_geojson)
print(f"Loaded {len(self.field_gdf)} fields")
print(f"Columns: {self.field_gdf.columns.tolist()}")
return self.field_gdf
def load_weekly_ci_data(self, week_num):
"""Load CI data from weekly mosaic GeoTIFF."""
# Try multiple file naming patterns
possible_files = [
self.weekly_mosaic_dir / f"week_{week_num}.tif",
self.weekly_mosaic_dir / f"week_{week_num}_2025.tif",
self.weekly_mosaic_dir / f"week_{week_num:02d}.tif",
self.weekly_mosaic_dir / f"week_{week_num:02d}_2025.tif",
]
week_file = None
for f in possible_files:
if f.exists():
week_file = f
break
if week_file is None:
print(f"Warning: Week file not found for week {week_num}. Tried: {possible_files}")
return None
print(f"Loading week {week_num} data: {week_file}")
try:
with rasterio.open(week_file) as src:
# CI is typically in band 5
ci_band = src.read(5)
profile = src.profile
# Extract CI values for each field
field_ci_stats = {}
for idx, row in self.field_gdf.iterrows():
field_name = row.get('pivot_name', row.get('PIVOT', f'field_{idx}'))
try:
# Get geometry and extract CI values
geom = [row.geometry]
# Use rasterio mask to extract values within field boundary
masked_array, _ = mask(src, geom, crop=True, indexes=5)
# Remove masked/invalid values
valid_values = masked_array[masked_array > 0]
if len(valid_values) > 0:
field_ci_stats[field_name] = {
'mean': float(np.mean(valid_values)),
'median': float(np.median(valid_values)),
'std': float(np.std(valid_values)),
'min': float(np.min(valid_values)),
'max': float(np.max(valid_values)),
'q25': float(np.percentile(valid_values, 25)),
'q75': float(np.percentile(valid_values, 75)),
'count': len(valid_values),
'values': valid_values.tolist() # Store all values for heatmap
}
except Exception as e:
print(f"Could not extract CI for field {field_name}: {e}")
continue
return field_ci_stats
except Exception as e:
print(f"Error loading week {week_num}: {e}")
return None
def extract_current_week_data(self):
"""Extract CI statistics from current and previous week GeoTIFFs."""
print(f"\nExtracting current week data (week {self.current_week})...")
self.current_week_data = self.load_weekly_ci_data(self.current_week)
if self.previous_week:
print(f"Extracting previous week data (week {self.previous_week})...")
self.previous_week_data = self.load_weekly_ci_data(self.previous_week)
return self.current_week_data, self.previous_week_data
def create_historic_trend_chart(self):
"""Create line chart for historic CI trends by field and season."""
if self.historic_data is None:
print("No historic data loaded")
return None
# Prepare data - assumes columns include: pivot_name, year, season, mean_ci (or similar)
print("Creating historic trend chart...")
print(f"Historic data columns: {self.historic_data.columns.tolist()}")
# Create Plotly figure
fig = go.Figure()
# Get unique fields
field_col = next((col for col in ['pivot_name', 'PIVOT', 'field']
if col in self.historic_data.columns), None)
if field_col is None:
print("Warning: Could not find field column in historic data")
return None
unique_fields = self.historic_data[field_col].unique()
# Add traces for each field
for field in unique_fields[:10]: # Limit to first 10 for clarity
field_data = self.historic_data[self.historic_data[field_col] == field]
# Try to find CI value column
ci_col = next((col for col in field_data.columns
if 'ci' in col.lower() or 'mean' in col.lower()),
field_data.columns[-1])
if field_data.shape[0] > 0:
x_label = 'year' if 'year' in field_data.columns else field_data.columns[0]
fig.add_trace(go.Scatter(
x=field_data[x_label].astype(str),
y=field_data[ci_col],
mode='lines+markers',
name=str(field),
hovertemplate=f"<b>{field}</b><br>Value: %{{y:.3f}}<extra></extra>"
))
fig.update_layout(
title="Historic CI Trends by Field",
xaxis_title="Time Period",
yaxis_title="Chlorophyll Index",
hovermode='x unified',
height=500,
template='plotly_white'
)
return fig
def create_current_boxplot(self):
"""Create box plots for current and previous week."""
if not self.current_week_data:
print("No current week data available")
return None
print("Creating box plots...")
# Prepare data
data_list = []
for field, stats in self.current_week_data.items():
data_list.append({
'field': field,
'week': f'Week {self.current_week}',
'mean': stats['mean'],
'median': stats['median'],
'q25': stats['q25'],
'q75': stats['q75']
})
if self.previous_week_data:
for field, stats in self.previous_week_data.items():
data_list.append({
'field': field,
'week': f'Week {self.previous_week}',
'mean': stats['mean'],
'median': stats['median'],
'q25': stats['q25'],
'q75': stats['q75']
})
df_box = pd.DataFrame(data_list)
# Create figure
fig = go.Figure()
weeks = df_box['week'].unique()
for week in weeks:
week_data = df_box[df_box['week'] == week]
fig.add_trace(go.Box(
y=week_data['mean'],
name=week,
x=week_data['field'],
boxmean='sd'
))
fig.update_layout(
title="CI Distribution by Field and Week",
xaxis_title="Field",
yaxis_title="Chlorophyll Index",
hovermode='x',
height=500,
template='plotly_white',
boxmode='group'
)
return fig
def create_scatter_plot(self):
"""Create scatter plot comparing current vs previous week."""
if not (self.current_week_data and self.previous_week_data):
print("Cannot create scatter plot without both weeks")
return None
print("Creating scatter plot...")
# Prepare data
scatter_data = []
for field in self.current_week_data:
if field in self.previous_week_data:
current_mean = self.current_week_data[field]['mean']
previous_mean = self.previous_week_data[field]['mean']
scatter_data.append({
'field': field,
'current': current_mean,
'previous': previous_mean,
'change': current_mean - previous_mean
})
df_scatter = pd.DataFrame(scatter_data)
# Create figure
fig = go.Figure()
fig.add_trace(go.Scatter(
x=df_scatter['previous'],
y=df_scatter['current'],
mode='markers+text',
text=df_scatter['field'],
textposition='top center',
marker=dict(
size=10,
color=df_scatter['change'],
colorscale='RdBu_r',
showscale=True,
colorbar=dict(title="Change")
),
hovertemplate="<b>%{text}</b><br>Previous: %{x:.3f}<br>Current: %{y:.3f}<extra></extra>"
))
# Add diagonal reference line
min_val = min(df_scatter['previous'].min(), df_scatter['current'].min())
max_val = max(df_scatter['previous'].max(), df_scatter['current'].max())
fig.add_trace(go.Scatter(
x=[min_val, max_val],
y=[min_val, max_val],
mode='lines',
name='No change',
line=dict(dash='dash', color='gray'),
hoverinfo='skip'
))
fig.update_layout(
title=f"CI Comparison: Week {self.previous_week} vs Week {self.current_week}",
xaxis_title=f"Week {self.previous_week} Mean CI",
yaxis_title=f"Week {self.current_week} Mean CI",
hovermode='closest',
height=500,
template='plotly_white'
)
return fig
def create_distribution_histogram(self):
"""Create histogram showing CI distribution for all fields in current week."""
if not self.current_week_data:
print("No current week data available")
return None
print("Creating histogram...")
# Collect all CI values from all fields
all_values = []
for field, stats in self.current_week_data.items():
all_values.extend(stats['values'])
fig = go.Figure()
fig.add_trace(go.Histogram(
x=all_values,
nbinsx=50,
name='CI Values',
marker_color='rgba(0,100,200,0.7)'
))
fig.update_layout(
title=f"CI Value Distribution (Week {self.current_week})",
xaxis_title="Chlorophyll Index",
yaxis_title="Frequency",
height=500,
template='plotly_white',
hovermode='x'
)
return fig
def create_heatmap(self):
"""Create heatmap showing mean CI by field over multiple weeks."""
if not self.current_week_data:
print("No current week data available")
return None
print("Creating heatmap...")
# Create matrix for heatmap
fields = sorted(self.current_week_data.keys())
weeks = [self.current_week]
if self.previous_week_data:
weeks.insert(0, self.previous_week)
z_values = []
for field in fields:
row = []
if self.previous_week_data and self.previous_week:
row.append(self.previous_week_data.get(field, {}).get('mean', np.nan))
row.append(self.current_week_data.get(field, {}).get('mean', np.nan))
z_values.append(row)
fig = go.Figure(data=go.Heatmap(
z=z_values,
x=[f'Week {w}' for w in weeks],
y=fields,
colorscale='Viridis',
hovertemplate='Field: %{y}<br>Week: %{x}<br>Mean CI: %{z:.3f}<extra></extra>'
))
fig.update_layout(
title="Mean CI by Field and Week (Heatmap)",
xaxis_title="Week",
yaxis_title="Field",
height=600,
template='plotly_white'
)
return fig
def create_summary_statistics(self):
"""Create summary statistics table."""
if not self.current_week_data:
return None
print("Creating summary statistics...")
# Prepare summary data
summary_data = []
for field, stats in self.current_week_data.items():
summary_data.append({
'Field': field,
'Mean CI': f"{stats['mean']:.3f}",
'Median CI': f"{stats['median']:.3f}",
'Std Dev': f"{stats['std']:.3f}",
'Min': f"{stats['min']:.3f}",
'Max': f"{stats['max']:.3f}",
'Pixels': stats['count']
})
df_summary = pd.DataFrame(summary_data)
fig = go.Figure(data=[go.Table(
header=dict(
values=list(df_summary.columns),
fill_color='paleturquoise',
align='left',
font=dict(size=12)
),
cells=dict(
values=[df_summary[col] for col in df_summary.columns],
fill_color='lavender',
align='left',
font=dict(size=11)
)
)])
fig.update_layout(
title=f"Week {self.current_week} - Field Statistics",
height=400
)
return fig
def generate_html(self):
"""Generate complete HTML dashboard with all graphs."""
print("\nGenerating HTML dashboard...")
# Load all data
self.load_field_boundaries()
try:
print("Attempting to load RDS file...")
self.load_rds_file()
except Exception as e:
print(f"Warning: Could not load RDS file: {e}")
self.historic_data = None
print("Extracting current week data...")
self.extract_current_week_data()
# Create all figures
figs = {
'historic_trend': self.create_historic_trend_chart(),
'summary_table': self.create_summary_statistics(),
'boxplot': self.create_current_boxplot(),
'histogram': self.create_distribution_histogram(),
'heatmap': self.create_heatmap(),
'scatter': self.create_scatter_plot()
}
# Generate HTML
html_content = self._build_html(figs)
output_file = self.output_dir / f'ci_graphs_dashboard_{self.current_week}.html'
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"Dashboard saved to: {output_file}")
return output_file
def _build_html(self, figs):
"""Build complete HTML document."""
html = """
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>CI Graphs Dashboard</title>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px;
min-height: 100vh;
}
.container {
max-width: 1400px;
margin: 0 auto;
background: white;
border-radius: 10px;
box-shadow: 0 8px 32px rgba(0,0,0,0.1);
padding: 30px;
}
.header {
text-align: center;
margin-bottom: 30px;
border-bottom: 3px solid #667eea;
padding-bottom: 20px;
}
h1 {
color: #333;
font-size: 2.5em;
margin-bottom: 10px;
}
.subtitle {
color: #666;
font-size: 1.1em;
}
.controls {
background: #f8f9fa;
padding: 20px;
border-radius: 8px;
margin-bottom: 30px;
display: flex;
gap: 20px;
flex-wrap: wrap;
}
.control-group {
display: flex;
flex-direction: column;
gap: 5px;
}
.control-group label {
font-weight: 600;
color: #333;
font-size: 0.9em;
}
.control-group select {
padding: 8px 12px;
border: 2px solid #ddd;
border-radius: 4px;
font-size: 1em;
cursor: pointer;
transition: border-color 0.3s;
}
.control-group select:hover {
border-color: #667eea;
}
.control-group select:focus {
outline: none;
border-color: #667eea;
box-shadow: 0 0 5px rgba(102, 126, 234, 0.3);
}
.tabs {
display: flex;
gap: 10px;
margin-bottom: 20px;
border-bottom: 2px solid #eee;
}
.tab-button {
padding: 12px 20px;
border: none;
background: #f8f9fa;
color: #666;
cursor: pointer;
font-size: 0.95em;
font-weight: 500;
border-bottom: 3px solid transparent;
transition: all 0.3s;
}
.tab-button:hover {
background: #e9ecef;
color: #333;
}
.tab-button.active {
color: #667eea;
border-bottom-color: #667eea;
}
.tab-content {
display: none;
}
.tab-content.active {
display: block;
animation: fadeIn 0.3s;
}
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
.graph-container {
background: white;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0,0,0,0.08);
margin-bottom: 30px;
padding: 20px;
overflow: auto;
}
.graph-container h3 {
color: #333;
margin-bottom: 15px;
font-size: 1.3em;
border-left: 4px solid #667eea;
padding-left: 10px;
}
.grid-2 {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
}
@media (max-width: 1200px) {
.grid-2 {
grid-template-columns: 1fr;
}
}
.info-box {
background: #f0f4ff;
border-left: 4px solid #667eea;
padding: 15px;
border-radius: 4px;
margin-bottom: 20px;
color: #333;
font-size: 0.95em;
}
.info-box strong {
color: #667eea;
}
footer {
text-align: center;
margin-top: 40px;
padding-top: 20px;
border-top: 2px solid #eee;
color: #999;
font-size: 0.9em;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🌾 Chlorophyll Index (CI) Analysis Dashboard</h1>
<p class="subtitle">Historic Trends & Current Week Statistics</p>
</div>
<div class="controls">
<div class="control-group">
<label for="field-select">Select Field:</label>
<select id="field-select">
<option value="all">All Fields</option>
</select>
</div>
<div class="control-group">
<label for="season-select">Select Season:</label>
<select id="season-select">
<option value="all">All Seasons</option>
</select>
</div>
<div class="control-group">
<label for="year-select">Select Year:</label>
<select id="year-select">
<option value="all">All Years</option>
</select>
</div>
</div>
<div class="tabs">
<button class="tab-button active" onclick="showTab('overview')">📊 Overview</button>
<button class="tab-button" onclick="showTab('historic')">📈 Historic Trends</button>
<button class="tab-button" onclick="showTab('current')">⏱️ Current Week Analysis</button>
<button class="tab-button" onclick="showTab('comparison')">🔄 Week Comparison</button>
<button class="tab-button" onclick="showTab('distribution')">📉 Distribution Analysis</button>
</div>
"""
# Add graph containers based on available figures
html += """
<div id="overview" class="tab-content active">
<div class="info-box">
<strong>📌 Overview:</strong> Summary statistics and key metrics for the current week.
</div>
"""
if figs['summary_table']:
html += f" {figs['summary_table'].to_html(include_plotlyjs=False, div_id='summary-table')}\n"
html += """
</div>
<div id="historic" class="tab-content">
<div class="info-box">
<strong>📈 Historic Trends:</strong> Chlorophyll Index values over time for each field by season.
This data comes from the cumulative CI extraction (RDS file) and shows long-term patterns.
</div>
"""
if figs['historic_trend']:
html += f" {figs['historic_trend'].to_html(include_plotlyjs=False, div_id='historic-trend')}\n"
html += """
</div>
<div id="current" class="tab-content">
<div class="info-box">
<strong>⏱️ Current Week Analysis:</strong> Box plots and histograms showing CI distribution
across all fields in the current week.
</div>
<div class="grid-2">
"""
if figs['boxplot']:
html += f" {figs['boxplot'].to_html(include_plotlyjs=False, div_id='boxplot')}\n"
if figs['histogram']:
html += f" {figs['histogram'].to_html(include_plotlyjs=False, div_id='histogram')}\n"
html += """
</div>
<div class="graph-container">
<h3>Heatmap View</h3>
"""
if figs['heatmap']:
html += f" {figs['heatmap'].to_html(include_plotlyjs=False, div_id='heatmap')}\n"
html += """
</div>
</div>
<div id="comparison" class="tab-content">
<div class="info-box">
<strong>🔄 Week Comparison:</strong> Scatter plot comparing mean CI values between
week """ + str(self.previous_week) + """ and week """ + str(self.current_week) + """.
Points colored by change magnitude.
</div>
"""
if figs['scatter']:
html += f" {figs['scatter'].to_html(include_plotlyjs=False, div_id='scatter')}\n"
html += """
</div>
<div id="distribution" class="tab-content">
<div class="info-box">
<strong>📉 Distribution Analysis:</strong> Statistical distribution of CI values
across all pixels in all fields for the current week.
</div>
"""
if figs['histogram']:
html += f" {figs['histogram'].to_html(include_plotlyjs=False, div_id='histogram-dist')}\n"
html += """
</div>
<footer>
<p>Generated on """ + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + """</p>
<p>Data source: ESA Weekly CI Mosaics | Historic data from RDS extraction</p>
</footer>
</div>
<script>
function showTab(tabName) {
// Hide all tab contents
const contents = document.querySelectorAll('.tab-content');
contents.forEach(content => content.classList.remove('active'));
// Remove active class from all buttons
const buttons = document.querySelectorAll('.tab-button');
buttons.forEach(button => button.classList.remove('active'));
// Show selected tab
document.getElementById(tabName).classList.add('active');
// Add active class to clicked button
event.target.classList.add('active');
// Trigger resize to redraw plotly charts
setTimeout(() => window.dispatchEvent(new Event('resize')), 100);
}
// Populate dropdowns from data
function initializeControls() {
// This would be populated from the data
// For now, just showing the structure
}
document.addEventListener('DOMContentLoaded', initializeControls);
</script>
</body>
</html>
"""
return html
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description='Generate interactive CI graphs dashboard'
)
parser.add_argument(
'project',
nargs='?',
default='esa',
help='Project name (default: esa)'
)
parser.add_argument(
'--current-week',
type=int,
default=43,
help='Current week number (default: 43)'
)
parser.add_argument(
'--previous-week',
type=int,
default=42,
help='Previous week number (default: 42)'
)
parser.add_argument(
'--output-dir',
default='output',
help='Output directory (default: output)'
)
args = parser.parse_args()
# Create dashboard
dashboard = CIGraphsDashboard(
project=args.project,
current_week=args.current_week,
previous_week=args.previous_week,
output_dir=args.output_dir
)
# Generate HTML
output_file = dashboard.generate_html()
print(f"\n✅ Dashboard successfully generated: {output_file}")
if __name__ == '__main__':
main()