Skip to content

Datasets

CceeOpenDataEnergyGc(ccee)

Class used for handling CCEE Energy at Gravity Center. Can be accessed via ccee.opendata.energygc.

Source code in echo_ons/ccee_root.py
def __init__(self, ccee: e_o.Ccee) -> None:
    """Base class that all subclasses should inherit from.

    Parameters
    ----------
    ccee : Ccee
        Top level object carrying all functionality and the connection handler.
    """
    # check inputs
    if not isinstance(ccee, e_o.Ccee):
        raise ValueError(f"ccee must be of type Ccee, not {type(ccee)}")

    self._ccee: e_o.Ccee = ccee

get(period, spes=None, cegs=None, columns='spe')

Get the Energy at Gravity Center df.

Values are returned in MWh per month.

Parameters:

  • period

    (DateTimeRange) –

    Period to get the data for. As the data is monthly, the start and end dates will be adjusted to the first and last day of the month.

  • spes

    (list[str] | None, default: None ) –

    List of SPEs to filter by. It is the name as registered in performance database. If set to None will get all SPEs, by default None

    If both spes and cegs are None, will get all SPEs.

    This cannot be used with cegs at the same time.

  • cegs

    (list[str] | None, default: None ) –

    List of CEGs to filter by. This will search for the CEG number in the data, allowing to get data for SPEs that are not in the performance database. If set to None will get all CEGs, by default None

    This cannot be used with spes at the same time.

  • columns

    (Literal['ceg', 'cod_ativo', 'sigla_ativo'], default: 'spe' ) –

    What value will be used as columns names in the DataFrame. Options are: - "spe": SPE name as in the performance database. Only available if spes is not None. - "ceg": CEG number. - "cod_ativo": Site code. - "sigla_ativo": Site acronym.

Returns:

  • DataFrame

    DataFrame with the data.

Source code in echo_ons/ccee_opendata_energy_gc.py
def get(
    self,
    period: DateTimeRange,
    spes: list[str] | None = None,
    cegs: list[str] | None = None,
    columns: Literal["spe", "ceg", "cod_ativo", "sigla_ativo"] = "spe",
) -> DataFrame:
    """Get the Energy at Gravity Center df.

    Values are returned in MWh per month.

    Parameters
    ----------
    period : DateTimeRange
        Period to get the data for. As the data is monthly, the start and end dates will be adjusted to the first and last day of the month.
    spes : list[str] | None, optional
        List of SPEs to filter by. It is the name as registered in performance database. If set to None will get all SPEs, by default None

        If both `spes` and `cegs` are None, will get all SPEs.

        This cannot be used with `cegs` at the same time.
    cegs : list[str] | None, optional
        List of CEGs to filter by. This will search for the CEG number in the data, allowing to get data for SPEs that are not in the performance database. If set to None will get all CEGs, by default None

        This cannot be used with `spes` at the same time.
    columns : Literal["ceg", "cod_ativo", "sigla_ativo"], optional
        What value will be used as columns names in the DataFrame. Options are:
        - "spe": SPE name as in the performance database. Only available if `spes` is not None.
        - "ceg": CEG number.
        - "cod_ativo": Site code.
        - "sigla_ativo": Site acronym.

    Returns
    -------
    DataFrame
        DataFrame with the data.
    """
    # validating input
    if not isinstance(period, DateTimeRange):
        raise ValueError(f"period must be a DateTimeRange object, not {type(period)}.")
    if spes is not None and not isinstance(spes, list):
        raise ValueError(f"spes must be a list, not {type(spes)}.")
    if cegs is not None and not isinstance(cegs, list):
        raise ValueError(f"cegs must be a list, not {type(cegs)}.")
    if columns not in ["spe", "ceg", "cod_ativo", "sigla_ativo"]:
        raise ValueError(f"columns must be 'spe', 'ceg', 'cod_ativo' or 'sigla_ativo', not {columns}.")

    # adjusting period to the first day of the month at 00:00:00 and the last day of the month at 23:59:59
    period.start = period.start.replace(day=1, hour=0, minute=0, second=0)
    period.end = period.end.replace(day=1) + relativedelta(months=1, days=-1, hour=23, minute=59, second=59)

    # getting SPEs in case spes and cegs are None
    if spes is None and cegs is None:
        spes = self._ccee._perfdb.objects.instances.get(  # noqa: SLF001
            object_models=["wind_farm", "solar_farm"],
            output_type="DataFrame",
            get_attributes=True,
        )
        # checking if any SPE does not have ons_spe_key
        if spes["ons_spe_key"].isna().any():
            wrong_spes = spes[spes["ons_spe_key"].isna()].index.to_list()
            logger.warning(f"The following SPEs do not have the ons_spe_key attribute: {wrong_spes}")
        spes = spes[spes["ons_spe_key"].notna()].index.to_list()

    if columns == "spe" and spes is None:
        raise ValueError("Cannot use 'spe' as columns if spes is None.")

    # getting the ceg numbers from the database in case spes is not None
    if spes is not None:
        objs = self._ccee._perfdb.objects.instances.get(  # noqa: SLF001
            object_names=spes,
            object_models=["wind_farm", "solar_farm"],
            get_attributes=True,
            output_type="DataFrame",
        )
        # validating that the SPEs exist
        wrong_spes = set(spes) - set(objs.index)
        if wrong_spes:
            raise ValueError(f"The following SPEs do not exist in the performance database: {wrong_spes}")
        # validating that all SPEs have ons_spe_key attribute
        if "ons_spe_key" not in objs.columns:
            raise ValueError("The SPEs do not have the ons_spe_key attribute.")
        if objs["ons_spe_key"].isna().any():
            wrong_spes = objs[objs["ons_spe_key"].isna()].index.to_list()
            raise ValueError(f"The following SPEs do not have the ons_spe_key attribute: {wrong_spes}")

        # adding .01 to the ons_spe_key to get the ceg numbers
        objs["ons_spe_key"] = objs["ons_spe_key"] + ".01"
        # getting the ceg numbers
        cegs = objs["ons_spe_key"].to_list()

    # getting list of resource names that will be used based on period
    resource_names = list(self._ccee.opendata.datasets.resources.get(dataset_name="parcela_usina_montante_mensal").keys())
    wanted_years = list(range(period.start.year, period.end.year + 1))
    resource_names = [resource_name for resource_name in resource_names if int(resource_name[-4:]) in wanted_years]

    # getting the df
    df = self._ccee.opendata.datasets.resources.values.get(
        resource_names=resource_names,
        dataset_name="parcela_usina_montante_mensal",
        filters={"CEG": cegs},
    )

    # checking if found data for all the cegs
    missing_cegs = set(cegs) - set(df["CEG"].unique())
    if missing_cegs:
        # getting the SPEs that are missing
        if spes is not None:
            spes_with_missing_cegs = objs[objs["ons_spe_key"].isin(missing_cegs)]
            logger.warning(f"The following SPEs were not found in the data: {spes_with_missing_cegs.index.to_list()} - {missing_cegs}")
        else:
            logger.warning(f"The following CEGs were not found in the data: {missing_cegs}")

    columns_mapping = {
        "spe": "CEG",
        "ceg": "CEG",
        "cod_ativo": "COD_ATIVO",
        "sigla_ativo": "SIGLA_ATIVO",
    }
    df = df[[columns_mapping[columns], "MES_REFERENCIA", "GERACAO_CENTRO_GRAVIDADE"]]

    # in case spes is not None, changing the columns to the SPE names
    if columns == "spe":
        ceg_remap = objs["ons_spe_key"].to_dict()
        ceg_remap = {v: k for k, v in ceg_remap.items()}
        df[columns_mapping[columns]] = df[columns_mapping[columns]].map(ceg_remap)

    # converting MES_REFERENCIA (YYYYMM) to datetime
    df["MES_REFERENCIA"] = to_datetime(df["MES_REFERENCIA"], format="%Y%m")

    # renaming columns
    df = df.rename(columns={columns_mapping[columns]: "id", "MES_REFERENCIA": "date", "GERACAO_CENTRO_GRAVIDADE": "value"})

    # dropping unwanted dates
    df = df[(df["date"] >= period.start) & (df["date"] <= period.end)]

    # pivoting the df
    df = df.pivot(index="date", columns="id", values="value")

    # converting from MWavg to MWh by multiplying by the number of hours in the month
    df = df.mul(df.index.to_series().dt.days_in_month * 24, axis=0)

    # removing column index name
    df.columns.name = None

    # checking if found the correct amount of spes/cegs
    wanted_cols = len(cegs) if spes is None else len(spes)
    if len(df.columns) != wanted_cols:
        logger.warning(f"Found data for {len(df.columns)} SPEs/CEGs, but was expecting data for {wanted_cols}.")

    # returning the df
    return df

import_database(period, spes=None, on_conflict='ignore')

Imports the CCEE Energy at Gravity Center data for a given period to the database.

The values acquired from the CCEE API are in MWavg. It will be converted to kWh in daily resolution. As a result, all values of the month will be the same, being the sum of the values of the month equal to the total energy generated in the month in kWh.

Parameters:

  • period

    (DateTimeRange) –

    Desired period to import the data for. As data is in monthly resolution, the start and end dates will be adjusted to the first and last day of the month.

  • spes

    (list[str] | None, default: None ) –

    List of SPEs to import the data. If set to None all will be imported. By default None

  • on_conflict

    (Literal['ignore', 'update'], default: 'ignore' ) –

    What to do in case of conflict. Can be one of ["ignore", "update"]. By default "ignore"

Source code in echo_ons/ccee_opendata_energy_gc.py
def import_database(
    self,
    period: DateTimeRange,
    spes: list[str] | None = None,
    on_conflict: Literal["ignore", "update"] = "ignore",
) -> None:
    """Imports the CCEE Energy at Gravity Center data for a given period to the database.

    The values acquired from the CCEE API are in MWavg. It will be converted to kWh in daily resolution. As a result, all values of the month will be the same, being the sum of the values of the month equal to the total energy generated in the month in kWh.

    Parameters
    ----------
    period : DateTimeRange
        Desired period to import the data for. As data is in monthly resolution, the start and end dates will be adjusted to the first and last day of the month.
    spes : list[str] | None, optional
        List of SPEs to import the data. If set to None all will be imported. By default None
    on_conflict : Literal["ignore", "update"], optional
        What to do in case of conflict. Can be one of ["ignore", "update"].
        By default "ignore"
    """
    # checking inputs
    if not isinstance(period, DateTimeRange):
        raise ValueError(f"period must be a DateTimeRange object, not {type(period)}.")
    if not isinstance(spes, list | type(None)):
        raise ValueError(f"spes must be a list or None, not {type(spes)}.")
    if on_conflict not in ["ignore", "update"]:
        raise ValueError(f"on_conflict must be 'ignore' or 'update', not {on_conflict}.")

    # getting the possible SPEs to import
    all_spes = list(self._ccee._perfdb.objects.instances.get(object_models=["wind_farm", "solar_farm"]).keys())  # noqa: SLF001
    if spes is not None and not set(spes).issubset(all_spes):
        wrong_spes = set(spes) - set(all_spes)
        raise ValueError(f"The following SPEs do not exist in the performance database: {wrong_spes}")
    spes = all_spes

    # getting the data
    df = self.get(period, spes=spes)

    # skipping if no data was found
    if df.empty:
        logger.warning(f"No data found for the period {period}.")
        return

    # converting values to MWavg by dividing by the number of hours in the month
    df = df.div(df.index.to_series().dt.days_in_month * 24, axis=0)
    # converting to kWavg
    df = df.mul(1e3)

    # resampling to daily and filling all values in the month with the same value
    # creating new index with all days of the month
    imported_period = DateTimeRange(df.index.min(), df.index.max())
    imported_period.end = imported_period.end + relativedelta(months=1, days=-1)

    new_index = date_range(start=imported_period.start, end=imported_period.end, freq="D")
    df = df.reindex(new_index)
    df: DataFrame = df.ffill(limit=30)

    # converting the values to kWh
    df = df.mul(24)

    # converting the DataFrame to have columns object_name, date, measurement_point (Gravity Center), energy
    df.index.name = "date"
    df = df.reset_index()
    df = df.melt(id_vars="date", var_name="object_name", value_name="energy")
    df["measurement_point"] = "Gravity Center"

    # dropping rows with NaN values in the energy column
    df = df.dropna(subset=["energy"])

    # saving the data to the database
    self._ccee._perfdb.kpis.energy.values.insert(df=df, on_conflict=on_conflict)  # noqa: SLF001

    logger.info(f"Imported CCEE Energy at Gravity Center data for the period {imported_period} to the database. SPEs: {spes}")