Source code for operational_analysis.methods.electrical_losses

# This class defines key analytical routines for calculating electrical losses for
# a wind plant using operational data. Electrical loss is calculated per month and on
# an average annual basis by comparing monthly energy production from the turbines
# and the revenue meter

import numpy as np
import pandas as pd
from tqdm import tqdm

from operational_analysis import logging, logged_method_call


logger = logging.getLogger(__name__)


[docs]class ElectricalLosses(object): """ A serial (Pandas-driven) implementation of calculating the average monthly and annual electrical losses at a wind plant, and their uncertainty. Energy output from the turbine SCADA meter and the wind plant revenue meter are used to estimate electrical losses. The approach is to first calculate daily sums of turbine and revenue meter energy over the plant period of record. Only those days where all turbines and the revenue meter were reporting for all timesteps are considered. Electrical loss is then the difference in total turbine energy production and meter production over those concurrent days. A Monte Carlo approach is applied to sample revenue meter data and SCADA data with a 0.5% imposed uncertainty, and one filtering parameter is sampled too. The uncertainty in estimated electrical losses is quantified as standard deviation of the distribution of losses obtained from the MC sampling. In the case that meter data is not provided on a daily or sub-daily basis (e.g. monthly), a different approach is implemented. The sum of daily turbine energy is corrected for any missing reported energy data from the turbines based on the ratio of expected number of data counts per day to the actual. Daily corrected sum of turbine energy is then summed on a monthly basis. Electrical loss is then the difference between total corrected turbine energy production and meter production over those concurrent months. """ @logged_method_call def __init__(self, plant, UQ=False, num_sim=20000): """ Initialize electrical losses class with input parameters Args: plant(:obj:`PlantData object`): PlantData object from which EYAGapAnalysis should draw data. num_sim:(:obj:`int`): number of Monte Carlo simulations UQ:(:obj:`bool`): choice whether to perform (True) or not (False) uncertainty quantification """ logger.info("Initializing Electrical Losses Object") # Check that selected UQ is allowed if UQ: logger.info("Note: uncertainty quantification will be performed in the calculation") self.num_sim = num_sim elif not UQ: logger.info("Note: uncertainty quantification will NOT be performed in the calculation") self.num_sim = 1 else: raise ValueError( "UQ has to either be True (uncertainty quantification performed, default) or False (uncertainty quantification NOT performed)" ) self.UQ = UQ self._plant = plant self._min_per_hour = 60 # Mintues per hour converter self._hours_per_day = 24 # Hours per day converter @logged_method_call def run( self, uncertainty_meter=0.005, uncertainty_scada=0.005, uncertainty_correction_thresh=0.95 ): """ Run the electrical loss calculation in order by calling this function. Args: uncertainty_meter(:obj:`float`): uncertainty imposed to revenue meter data (for UQ = True case) uncertainty_scada(:obj:`float`): uncertainty imposed to scada data (for UQ = True case) uncertainty_correction_thresh(:obj:`tuple`): Data availability thresholds (fractions) under which months should be eliminated. This should be a tuple in the UQ = True case, a single value when UQ = False. Returns: (None) """ # Define uncertainties and check types expected_type = float if not self.UQ else tuple assert ( type(uncertainty_correction_thresh) == expected_type ), f"uncertainty_correction_thresh must be {expected_type} for UQ={self.UQ}" self.uncertainty_correction_thresh = np.array( uncertainty_correction_thresh, dtype=np.float64 ) if self.UQ: self.uncertainty_meter = uncertainty_meter self.uncertainty_scada = uncertainty_scada # Process SCADA data to daily sums self.process_scada() # Process meter data to daily sums (if time frequency is less than monthly) self._monthly_meter = True # Keep track of reported meter data frequency if ( (self._plant._meter_freq != "MS") & (self._plant._meter_freq != "M") & (self._plant._meter_freq != "1MS") ): self.process_meter() self._monthly_meter = False # Set to false if sub-monthly frequency # Setup Monte Carlo approach self.setup_inputs() # Calculate electrical losses, Monte Carlo approach self.calculate_electrical_losses()
[docs] def setup_inputs(self): """ Create and populate the data frame defining the simulation parameters. This data frame is stored as self._inputs Args: (None) Returns: (None) """ if self.UQ: inputs = { "meter_data_fraction": np.random.normal(1, self.uncertainty_meter, self.num_sim), "scada_data_fraction": np.random.normal(1, self.uncertainty_scada, self.num_sim), "correction_threshold": np.random.randint( self.uncertainty_correction_thresh[0] * 1000, self.uncertainty_correction_thresh[1] * 1000, self.num_sim, ) / 1000.0, } self._inputs = pd.DataFrame(inputs) if not self.UQ: inputs = { "meter_data_fraction": 1, "scada_data_fraction": 1, "correction_threshold": self.uncertainty_correction_thresh, } self._inputs = pd.DataFrame(inputs, index=[0]) self._electrical_losses = np.empty([self.num_sim, 1])
@logged_method_call def process_scada(self): """ Calculate daily sum of turbine energy only for days when all turbines are reporting at all time steps. Args: (None) Returns: (None) """ logger.info("Processing SCADA data") scada_df = self._plant._scada.df # Sum up SCADA data power and energy and count number of entries scada_sum = scada_df.groupby(scada_df.index)[["energy_kwh"]].sum() scada_sum["count"] = scada_df.groupby(scada_df.index)[["energy_kwh"]].count() self._scada_sum = scada_sum # Calculate daily sum of all turbine energy production and count number of entries self._scada_daily = scada_sum.resample("D")["energy_kwh"].sum().to_frame() self._scada_daily.columns = ["turbine_energy_kwh"] self._scada_daily["count"] = scada_sum.resample("D")["count"].sum() # Specify expected count provided all turbines reporting expected_count = ( self._hours_per_day * self._min_per_hour / (pd.to_timedelta(self._plant._scada_freq).total_seconds() / 60) * self._plant._num_turbines ) # Correct sum of turbine energy for cases with missing reported data self._scada_daily["corrected_energy"] = ( self._scada_daily["turbine_energy_kwh"] * expected_count / self._scada_daily["count"] ) self._scada_daily["perc"] = self._scada_daily["count"] / expected_count # Store daily SCADA data where all turbines reporting for every time step during the day self._scada_sub = self._scada_daily[self._scada_daily["count"] == expected_count] @logged_method_call def process_meter(self): """ Calculate daily sum of meter energy only for days when meter data is reporting at all time steps. Args: (None) Returns: (None) """ logger.info("Processing meter data") meter_df = self._plant._meter.df # Sum up meter data to daily self._meter_daily = meter_df.resample("D").sum() self._meter_daily["mcount"] = meter_df.resample("D")["energy_kwh"].count() # Specify expected count provided all timestamps reporting expected_mcount = ( self._hours_per_day * self._min_per_hour / (pd.to_timedelta(self._plant._meter_freq).total_seconds() / 60) ) # Keep only data with all turbines reporting for every time step during the day self._meter_daily = self._meter_daily[self._meter_daily["mcount"] == expected_mcount] @logged_method_call def calculate_electrical_losses(self): """ Apply Monte Carlo approach to calculate electrical losses and their uncertainty based on the difference in the sum of turbine and metered energy over the compiled days. Args: (None) Returns: (None) """ logger.info("Calculating electrical losses") # Loop through number of simulations, calculate losses each time, store results for n in tqdm(np.arange(self.num_sim)): self._run = self._inputs.loc[n] meter_df = self._plant._meter.df # If monthly meter data, sum the corrected daily turbine energy to monthly and merge with meter if self._monthly_meter: scada_monthly = ( self._scada_daily.resample("MS")["corrected_energy"].sum().to_frame() ) scada_monthly.columns = ["turbine_energy_kwh"] # Determine availability for each month represented scada_monthly["count"] = self._scada_sum.resample("MS")["count"].sum() scada_monthly["expected_count_monthly"] = ( scada_monthly.index.daysinmonth * self._hours_per_day * self._min_per_hour / (pd.to_timedelta(self._plant._scada_freq).total_seconds() / 60) * self._plant._num_turbines ) scada_monthly["perc"] = ( scada_monthly["count"] / scada_monthly["expected_count_monthly"] ) # Filter out months in which there was less than x% of total running (all turbines at all timesteps) scada_monthly = scada_monthly.loc[ scada_monthly["perc"] >= self._run.correction_threshold, : ] merge_df = meter_df.join(scada_monthly) # If sub-monthly meter data, merge the daily data for which all turbines are reporting at all timestamps else: # Note 'self._scada_sub' only contains full reported data merge_df = self._meter_daily.join(self._scada_sub) # Drop non-concurrent timestamps and get total sums over concurrent period of record merge_df.dropna(inplace=True) self._merge_df = merge_df merge_sum = merge_df.sum(axis=0) # Calculate electrical loss from difference of sum of turbine and meter energy self._total_turbine_energy = ( merge_sum["turbine_energy_kwh"] * self._run.scada_data_fraction ) self._total_meter_energy = merge_sum["energy_kwh"] * self._run.meter_data_fraction self._electrical_losses[n] = 1 - self._total_meter_energy / self._total_turbine_energy