Datasets¶
CceeOpenDataEnergyGc(ccee)
¶
Class used for handling CCEE Energy at Gravity Center. Can be accessed via ccee.opendata.energygc
.
Source code in echo_ons/ccee_root.py
def __init__(self, ccee: e_o.Ccee) -> None:
"""Base class that all subclasses should inherit from.
Parameters
----------
ccee : Ccee
Top level object carrying all functionality and the connection handler.
"""
# check inputs
if not isinstance(ccee, e_o.Ccee):
raise ValueError(f"ccee must be of type Ccee, not {type(ccee)}")
self._ccee: e_o.Ccee = ccee
get(period, spes=None, cegs=None, columns='spe')
¶
Get the Energy at Gravity Center df.
Values are returned in MWh per month.
Parameters:
-
period
¶DateTimeRange
) –Period to get the data for. As the data is monthly, the start and end dates will be adjusted to the first and last day of the month.
-
spes
¶list[str] | None
, default:None
) –List of SPEs to filter by. It is the name as registered in performance database. If set to None will get all SPEs, by default None
If both
spes
andcegs
are None, will get all SPEs.This cannot be used with
cegs
at the same time. -
cegs
¶list[str] | None
, default:None
) –List of CEGs to filter by. This will search for the CEG number in the data, allowing to get data for SPEs that are not in the performance database. If set to None will get all CEGs, by default None
This cannot be used with
spes
at the same time. -
columns
¶Literal['ceg', 'cod_ativo', 'sigla_ativo']
, default:'spe'
) –What value will be used as columns names in the DataFrame. Options are: - "spe": SPE name as in the performance database. Only available if
spes
is not None. - "ceg": CEG number. - "cod_ativo": Site code. - "sigla_ativo": Site acronym.
Returns:
-
DataFrame
–DataFrame with the data.
Source code in echo_ons/ccee_opendata_energy_gc.py
def get(
self,
period: DateTimeRange,
spes: list[str] | None = None,
cegs: list[str] | None = None,
columns: Literal["spe", "ceg", "cod_ativo", "sigla_ativo"] = "spe",
) -> DataFrame:
"""Get the Energy at Gravity Center df.
Values are returned in MWh per month.
Parameters
----------
period : DateTimeRange
Period to get the data for. As the data is monthly, the start and end dates will be adjusted to the first and last day of the month.
spes : list[str] | None, optional
List of SPEs to filter by. It is the name as registered in performance database. If set to None will get all SPEs, by default None
If both `spes` and `cegs` are None, will get all SPEs.
This cannot be used with `cegs` at the same time.
cegs : list[str] | None, optional
List of CEGs to filter by. This will search for the CEG number in the data, allowing to get data for SPEs that are not in the performance database. If set to None will get all CEGs, by default None
This cannot be used with `spes` at the same time.
columns : Literal["ceg", "cod_ativo", "sigla_ativo"], optional
What value will be used as columns names in the DataFrame. Options are:
- "spe": SPE name as in the performance database. Only available if `spes` is not None.
- "ceg": CEG number.
- "cod_ativo": Site code.
- "sigla_ativo": Site acronym.
Returns
-------
DataFrame
DataFrame with the data.
"""
# validating input
if not isinstance(period, DateTimeRange):
raise ValueError(f"period must be a DateTimeRange object, not {type(period)}.")
if spes is not None and not isinstance(spes, list):
raise ValueError(f"spes must be a list, not {type(spes)}.")
if cegs is not None and not isinstance(cegs, list):
raise ValueError(f"cegs must be a list, not {type(cegs)}.")
if columns not in ["spe", "ceg", "cod_ativo", "sigla_ativo"]:
raise ValueError(f"columns must be 'spe', 'ceg', 'cod_ativo' or 'sigla_ativo', not {columns}.")
# adjusting period to the first day of the month at 00:00:00 and the last day of the month at 23:59:59
period.start = period.start.replace(day=1, hour=0, minute=0, second=0)
period.end = period.end.replace(day=1) + relativedelta(months=1, days=-1, hour=23, minute=59, second=59)
# getting SPEs in case spes and cegs are None
if spes is None and cegs is None:
spes = self._ccee._perfdb.objects.instances.get( # noqa: SLF001
object_models=["wind_farm", "solar_farm"],
output_type="DataFrame",
get_attributes=True,
)
# checking if any SPE does not have ons_spe_key
if spes["ons_spe_key"].isna().any():
wrong_spes = spes[spes["ons_spe_key"].isna()].index.to_list()
logger.warning(f"The following SPEs do not have the ons_spe_key attribute: {wrong_spes}")
spes = spes[spes["ons_spe_key"].notna()].index.to_list()
if columns == "spe" and spes is None:
raise ValueError("Cannot use 'spe' as columns if spes is None.")
# getting the ceg numbers from the database in case spes is not None
if spes is not None:
objs = self._ccee._perfdb.objects.instances.get( # noqa: SLF001
object_names=spes,
object_models=["wind_farm", "solar_farm"],
get_attributes=True,
output_type="DataFrame",
)
# validating that the SPEs exist
wrong_spes = set(spes) - set(objs.index)
if wrong_spes:
raise ValueError(f"The following SPEs do not exist in the performance database: {wrong_spes}")
# validating that all SPEs have ons_spe_key attribute
if "ons_spe_key" not in objs.columns:
raise ValueError("The SPEs do not have the ons_spe_key attribute.")
if objs["ons_spe_key"].isna().any():
wrong_spes = objs[objs["ons_spe_key"].isna()].index.to_list()
raise ValueError(f"The following SPEs do not have the ons_spe_key attribute: {wrong_spes}")
# adding .01 to the ons_spe_key to get the ceg numbers
objs["ons_spe_key"] = objs["ons_spe_key"] + ".01"
# getting the ceg numbers
cegs = objs["ons_spe_key"].to_list()
# getting list of resource names that will be used based on period
resource_names = list(self._ccee.opendata.datasets.resources.get(dataset_name="parcela_usina_montante_mensal").keys())
wanted_years = list(range(period.start.year, period.end.year + 1))
resource_names = [resource_name for resource_name in resource_names if int(resource_name[-4:]) in wanted_years]
# getting the df
df = self._ccee.opendata.datasets.resources.values.get(
resource_names=resource_names,
dataset_name="parcela_usina_montante_mensal",
filters={"CEG": cegs},
)
# checking if found data for all the cegs
missing_cegs = set(cegs) - set(df["CEG"].unique())
if missing_cegs:
# getting the SPEs that are missing
if spes is not None:
spes_with_missing_cegs = objs[objs["ons_spe_key"].isin(missing_cegs)]
logger.warning(f"The following SPEs were not found in the data: {spes_with_missing_cegs.index.to_list()} - {missing_cegs}")
else:
logger.warning(f"The following CEGs were not found in the data: {missing_cegs}")
columns_mapping = {
"spe": "CEG",
"ceg": "CEG",
"cod_ativo": "COD_ATIVO",
"sigla_ativo": "SIGLA_ATIVO",
}
df = df[[columns_mapping[columns], "MES_REFERENCIA", "GERACAO_CENTRO_GRAVIDADE"]]
# in case spes is not None, changing the columns to the SPE names
if columns == "spe":
ceg_remap = objs["ons_spe_key"].to_dict()
ceg_remap = {v: k for k, v in ceg_remap.items()}
df[columns_mapping[columns]] = df[columns_mapping[columns]].map(ceg_remap)
# converting MES_REFERENCIA (YYYYMM) to datetime
df["MES_REFERENCIA"] = to_datetime(df["MES_REFERENCIA"], format="%Y%m")
# renaming columns
df = df.rename(columns={columns_mapping[columns]: "id", "MES_REFERENCIA": "date", "GERACAO_CENTRO_GRAVIDADE": "value"})
# dropping unwanted dates
df = df[(df["date"] >= period.start) & (df["date"] <= period.end)]
# pivoting the df
df = df.pivot(index="date", columns="id", values="value")
# converting from MWavg to MWh by multiplying by the number of hours in the month
df = df.mul(df.index.to_series().dt.days_in_month * 24, axis=0)
# removing column index name
df.columns.name = None
# checking if found the correct amount of spes/cegs
wanted_cols = len(cegs) if spes is None else len(spes)
if len(df.columns) != wanted_cols:
logger.warning(f"Found data for {len(df.columns)} SPEs/CEGs, but was expecting data for {wanted_cols}.")
# returning the df
return df
import_database(period, spes=None, on_conflict='ignore')
¶
Imports the CCEE Energy at Gravity Center data for a given period to the database.
The values acquired from the CCEE API are in MWavg. It will be converted to kWh in daily resolution. As a result, all values of the month will be the same, being the sum of the values of the month equal to the total energy generated in the month in kWh.
Parameters:
-
period
¶DateTimeRange
) –Desired period to import the data for. As data is in monthly resolution, the start and end dates will be adjusted to the first and last day of the month.
-
spes
¶list[str] | None
, default:None
) –List of SPEs to import the data. If set to None all will be imported. By default None
-
on_conflict
¶Literal['ignore', 'update']
, default:'ignore'
) –What to do in case of conflict. Can be one of ["ignore", "update"]. By default "ignore"
Source code in echo_ons/ccee_opendata_energy_gc.py
def import_database(
self,
period: DateTimeRange,
spes: list[str] | None = None,
on_conflict: Literal["ignore", "update"] = "ignore",
) -> None:
"""Imports the CCEE Energy at Gravity Center data for a given period to the database.
The values acquired from the CCEE API are in MWavg. It will be converted to kWh in daily resolution. As a result, all values of the month will be the same, being the sum of the values of the month equal to the total energy generated in the month in kWh.
Parameters
----------
period : DateTimeRange
Desired period to import the data for. As data is in monthly resolution, the start and end dates will be adjusted to the first and last day of the month.
spes : list[str] | None, optional
List of SPEs to import the data. If set to None all will be imported. By default None
on_conflict : Literal["ignore", "update"], optional
What to do in case of conflict. Can be one of ["ignore", "update"].
By default "ignore"
"""
# checking inputs
if not isinstance(period, DateTimeRange):
raise ValueError(f"period must be a DateTimeRange object, not {type(period)}.")
if not isinstance(spes, list | type(None)):
raise ValueError(f"spes must be a list or None, not {type(spes)}.")
if on_conflict not in ["ignore", "update"]:
raise ValueError(f"on_conflict must be 'ignore' or 'update', not {on_conflict}.")
# getting the possible SPEs to import
all_spes = list(self._ccee._perfdb.objects.instances.get(object_models=["wind_farm", "solar_farm"]).keys()) # noqa: SLF001
if spes is not None and not set(spes).issubset(all_spes):
wrong_spes = set(spes) - set(all_spes)
raise ValueError(f"The following SPEs do not exist in the performance database: {wrong_spes}")
spes = all_spes
# getting the data
df = self.get(period, spes=spes)
# skipping if no data was found
if df.empty:
logger.warning(f"No data found for the period {period}.")
return
# converting values to MWavg by dividing by the number of hours in the month
df = df.div(df.index.to_series().dt.days_in_month * 24, axis=0)
# converting to kWavg
df = df.mul(1e3)
# resampling to daily and filling all values in the month with the same value
# creating new index with all days of the month
imported_period = DateTimeRange(df.index.min(), df.index.max())
imported_period.end = imported_period.end + relativedelta(months=1, days=-1)
new_index = date_range(start=imported_period.start, end=imported_period.end, freq="D")
df = df.reindex(new_index)
df: DataFrame = df.ffill(limit=30)
# converting the values to kWh
df = df.mul(24)
# converting the DataFrame to have columns object_name, date, measurement_point (Gravity Center), energy
df.index.name = "date"
df = df.reset_index()
df = df.melt(id_vars="date", var_name="object_name", value_name="energy")
df["measurement_point"] = "Gravity Center"
# dropping rows with NaN values in the energy column
df = df.dropna(subset=["energy"])
# saving the data to the database
self._ccee._perfdb.kpis.energy.values.insert(df=df, on_conflict=on_conflict) # noqa: SLF001
logger.info(f"Imported CCEE Energy at Gravity Center data for the period {imported_period} to the database. SPEs: {spes}")