Environment API Reference

General sequence enviornmnet

`AutoregressiveSequenceEnvironment`

Bases: SequenceEnvironment

Class for sequence environments with a non-fixed length and autoregressive generation.

Source code in gfnx/environment/sequence.py

class AutoregressiveSequenceEnvironment(SequenceEnvironment):
    """
    Class for sequence environments with a non-fixed length and
    autoregressive generation.
    """

    def __init__(
        self,
        reward_module: TRewardModule,
        max_length: int,
        nchar: int,
        ntoken: int,
        *,
        bos_token: int,
        eos_token: int,
        pad_token: int,
    ):
        super().__init__(
            reward_module,
            max_length,
            nchar,
            ntoken,
            bos_token=bos_token,
            eos_token=eos_token,
            pad_token=pad_token,
        )
        self.stop_action = nchar  # Action corresponding to generating EOS token

    def _single_transition(
        self,
        state: EnvState,
        action: TAction,
        env_params: EnvParams,
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        is_terminal = state.is_terminal

        def get_next_state_terminal(state: EnvState, action: TAction) -> EnvState:
            return state.replace(is_pad=True)

        def get_next_state_not_terminal(state: EnvState, action: TAction) -> EnvState:
            num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
            pos_to_update = self.max_length - num_pad
            action_to_token = jnp.where(action != self.stop_action, action, self.eos_token)
            next_tokens = state.tokens.at[pos_to_update].set(action_to_token)
            is_done = jnp.logical_or(
                jnp.all(next_tokens != self.pad_token), # All pad tokens are replaced by characters
                action == self.stop_action,  # EOS token is generated
            )
            return EnvState(
                tokens=next_tokens,
                is_terminal=is_done,
                is_initial=False,
                is_pad=False,
            )

        next_state: EnvState = jax.lax.cond(
            is_terminal, get_next_state_terminal, get_next_state_not_terminal, state, action
        )

        return next_state, next_state.is_terminal, {}

    def _single_backward_transition(
        self, state: EnvState, backward_action: TBackwardAction, env_params: EnvParams
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        """
        Environment-specific step backward transition. Rewards always zero!
        """
        is_initial = state.is_initial

        def get_prev_state_init_state(state: EnvState, backward_action: TAction) -> EnvState:
            return state.replace(is_pad=True)

        def get_prev_state_not_init_state(state: EnvState, backward_action: TAction) -> EnvState:
            num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
            last_pos = self.max_length - num_pad
            prev_tokens = state.tokens.at[last_pos - 1].set(self.pad_token)
            is_initial = jnp.all(prev_tokens == self.pad_token)
            return state.replace(
                tokens=prev_tokens,
                is_terminal=False,
                is_initial=is_initial,
            )

        prev_state: EnvState = jax.lax.cond(
            is_initial,
            get_prev_state_init_state,
            get_prev_state_not_init_state,
            state,
            backward_action,
        )
        return prev_state, prev_state.is_initial, {}

    def get_backward_action(
        self,
        state: EnvState,
        forward_action: chex.Array,
        next_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        """Returns backward action given the complete characterization
        of the forward transition."""
        num_envs = state.is_pad.shape[0]
        return jnp.zeros((num_envs,), dtype=forward_action.dtype)

    def get_forward_action(
        self,
        state: EnvState,
        backward_action: chex.Array,
        prev_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        """Returns forward action given the complete characterization
        of the backward transition."""
        num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
        last_pos = self.max_length - num_pad
        all_actions = jnp.where(state.tokens != self.eos_token, state.tokens, self.stop_action)
        actions = jnp.take_along_axis(
            all_actions, jnp.expand_dims(last_pos - 1, axis=-1), axis=-1, mode="clip"
        ).squeeze()
        # Clip actions to be in valid range even for invalid states, i.e., an initial state
        return jnp.clip(actions, min=0, max=self.action_space.n - 1)

    def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Return mask of invalid actions"""
        num_envs = state.is_pad.shape[0]
        return jnp.zeros((num_envs, self.nchar + 1), dtype=jnp.bool)

    def get_invalid_backward_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Returns mask of invalid backward actions."""
        num_envs = state.is_pad.shape[0]
        return jnp.zeros((num_envs, 1), dtype=jnp.bool)

    @property
    def action_space(self) -> spaces.Discrete:
        """Action space of the environment, consists of characters
        and a stop action corresponding to appending EOS token"""
        return spaces.Discrete(self.nchar + 1)

    @property
    def backward_action_space(self) -> spaces.Discrete:
        """Backward action space of the environment,
        only about removing the last character."""
        return spaces.Discrete(1)

`action_space` `property`

Action space of the environment, consists of characters and a stop action corresponding to appending EOS token

`backward_action_space` `property`

Backward action space of the environment, only about removing the last character.

`get_backward_action(state, forward_action, next_state, env_params)`

Returns backward action given the complete characterization of the forward transition.

Source code in gfnx/environment/sequence.py

def get_backward_action(
    self,
    state: EnvState,
    forward_action: chex.Array,
    next_state: EnvState,
    env_params: EnvParams,
) -> chex.Array:
    """Returns backward action given the complete characterization
    of the forward transition."""
    num_envs = state.is_pad.shape[0]
    return jnp.zeros((num_envs,), dtype=forward_action.dtype)

`get_forward_action(state, backward_action, prev_state, env_params)`

Returns forward action given the complete characterization of the backward transition.

Source code in gfnx/environment/sequence.py

def get_forward_action(
    self,
    state: EnvState,
    backward_action: chex.Array,
    prev_state: EnvState,
    env_params: EnvParams,
) -> chex.Array:
    """Returns forward action given the complete characterization
    of the backward transition."""
    num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
    last_pos = self.max_length - num_pad
    all_actions = jnp.where(state.tokens != self.eos_token, state.tokens, self.stop_action)
    actions = jnp.take_along_axis(
        all_actions, jnp.expand_dims(last_pos - 1, axis=-1), axis=-1, mode="clip"
    ).squeeze()
    # Clip actions to be in valid range even for invalid states, i.e., an initial state
    return jnp.clip(actions, min=0, max=self.action_space.n - 1)

`get_invalid_backward_mask(state, env_params)`

Returns mask of invalid backward actions.

Source code in gfnx/environment/sequence.py

def get_invalid_backward_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Returns mask of invalid backward actions."""
    num_envs = state.is_pad.shape[0]
    return jnp.zeros((num_envs, 1), dtype=jnp.bool)

`get_invalid_mask(state, env_params)`

Return mask of invalid actions

Source code in gfnx/environment/sequence.py

def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Return mask of invalid actions"""
    num_envs = state.is_pad.shape[0]
    return jnp.zeros((num_envs, self.nchar + 1), dtype=jnp.bool)

`EnvState`

Bases: BaseEnvState

Source code in gfnx/environment/sequence.py

@chex.dataclass(frozen=True)
class EnvState(BaseEnvState):
    tokens: Int[Array, "batch_size max_length"]
    is_terminal: Bool[Array, " batch_size"]
    is_initial: Bool[Array, " batch_size"]
    is_pad: Bool[Array, " batch_size"]

    @classmethod
    def from_tokens(cls, tokens: Int[Array, "batch_size max_length"]) -> "EnvState":
        """Create an EnvState from a batch of tokens."""
        batch_size = tokens.shape[0]
        return cls(
            tokens=tokens,
            is_terminal=jnp.zeros((batch_size,), dtype=jnp.bool_),
            is_initial=jnp.zeros((batch_size,), dtype=jnp.bool_),
            is_pad=jnp.zeros((batch_size,), dtype=jnp.bool_),
        )

`from_tokens(tokens)` `classmethod`

Create an EnvState from a batch of tokens.

Source code in gfnx/environment/sequence.py

@classmethod
def from_tokens(cls, tokens: Int[Array, "batch_size max_length"]) -> "EnvState":
    """Create an EnvState from a batch of tokens."""
    batch_size = tokens.shape[0]
    return cls(
        tokens=tokens,
        is_terminal=jnp.zeros((batch_size,), dtype=jnp.bool_),
        is_initial=jnp.zeros((batch_size,), dtype=jnp.bool_),
        is_pad=jnp.zeros((batch_size,), dtype=jnp.bool_),
    )

`FixedAutoregressiveSequenceEnvironment`

Bases: SequenceEnvironment

Class for sequence environments with a fixed length and autoregressive generation.

Source code in gfnx/environment/sequence.py

class FixedAutoregressiveSequenceEnvironment(SequenceEnvironment):
    """
    Class for sequence environments with a fixed length and
    autoregressive generation.
    """

    def _single_transition(
        self,
        state: EnvState,
        action: TAction,
        env_params: EnvParams,
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        is_terminal = state.is_terminal

        def get_next_state_terminal(state: EnvState, action: TAction) -> EnvState:
            return state.replace(is_pad=True)

        def get_next_state_not_terminal(state: EnvState, action: TAction) -> EnvState:
            num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
            pos_to_update = self.max_length - num_pad
            next_tokens = state.tokens.at[pos_to_update].set(action)
            is_done = jnp.all(next_tokens != self.pad_token)
            return EnvState(
                tokens=next_tokens,
                is_terminal=is_done,
                is_initial=False,
                is_pad=False,
            )

        next_state: EnvState = jax.lax.cond(
            is_terminal, get_next_state_terminal, get_next_state_not_terminal, state, action
        )

        return next_state, next_state.is_terminal, {}

    def _single_backward_transition(
        self, state: EnvState, backward_action: TBackwardAction, env_params: EnvParams
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        """
        Environment-specific step backward transition. Rewards always zero!
        """
        is_initial = state.is_initial

        def get_prev_state_init_state(state: EnvState, backward_action: TAction) -> EnvState:
            return state.replace(is_pad=True)

        def get_prev_state_not_init_state(state: EnvState, backward_action: TAction) -> EnvState:
            num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
            last_position = self.max_length - num_pad
            prev_tokens = state.tokens.at[last_position - 1].set(self.pad_token)
            is_initial = jnp.all(prev_tokens == self.pad_token)
            return state.replace(
                tokens=prev_tokens,
                is_terminal=False,
                is_initial=is_initial,
            )

        prev_state: EnvState = jax.lax.cond(
            is_initial,
            get_prev_state_init_state,
            get_prev_state_not_init_state,
            state,
            backward_action,
        )
        return prev_state, prev_state.is_initial, {}

    def get_backward_action(
        self,
        state: EnvState,
        forward_action: chex.Array,
        next_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        """Returns backward action given the complete characterization
        of the forward transition."""
        num_envs = state.is_pad.shape[0]
        return jnp.zeros((num_envs,), dtype=forward_action.dtype)

    def get_forward_action(
        self,
        state: EnvState,
        backward_action: chex.Array,
        prev_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        """Returns forward action given the complete characterization
        of the backward transition."""
        num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
        last_position = self.max_length - num_pad
        actions = jnp.take_along_axis(
            state.tokens, jnp.expand_dims(last_position - 1, axis=-1), axis=-1, mode="clip"
        ).squeeze()
        # Clip actions to be in valid range even for invalid states, i.e., an initial state
        return jnp.clip(actions, min=0, max=self.action_space.n - 1)

    def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Return mask of invalid actions"""
        num_envs = state.is_pad.shape[0]
        return jnp.zeros((num_envs, self.nchar), dtype=jnp.bool)

    def get_invalid_backward_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Returns mask of invalid backward actions."""
        num_envs = state.is_pad.shape[0]
        return jnp.zeros((num_envs, 1), dtype=jnp.bool)

    @property
    def action_space(self) -> spaces.Discrete:
        """Action space of the environment, consists of characters."""
        return spaces.Discrete(self.nchar)

    @property
    def backward_action_space(self) -> spaces.Discrete:
        """Backward action space of the environment,
        only about removing the last character."""
        return spaces.Discrete(1)

`action_space` `property`

Action space of the environment, consists of characters.

`backward_action_space` `property`

Backward action space of the environment, only about removing the last character.

`get_backward_action(state, forward_action, next_state, env_params)`

Returns backward action given the complete characterization of the forward transition.

Source code in gfnx/environment/sequence.py

def get_backward_action(
    self,
    state: EnvState,
    forward_action: chex.Array,
    next_state: EnvState,
    env_params: EnvParams,
) -> chex.Array:
    """Returns backward action given the complete characterization
    of the forward transition."""
    num_envs = state.is_pad.shape[0]
    return jnp.zeros((num_envs,), dtype=forward_action.dtype)

`get_forward_action(state, backward_action, prev_state, env_params)`

Returns forward action given the complete characterization of the backward transition.

Source code in gfnx/environment/sequence.py

def get_forward_action(
    self,
    state: EnvState,
    backward_action: chex.Array,
    prev_state: EnvState,
    env_params: EnvParams,
) -> chex.Array:
    """Returns forward action given the complete characterization
    of the backward transition."""
    num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
    last_position = self.max_length - num_pad
    actions = jnp.take_along_axis(
        state.tokens, jnp.expand_dims(last_position - 1, axis=-1), axis=-1, mode="clip"
    ).squeeze()
    # Clip actions to be in valid range even for invalid states, i.e., an initial state
    return jnp.clip(actions, min=0, max=self.action_space.n - 1)

`get_invalid_backward_mask(state, env_params)`

Returns mask of invalid backward actions.

Source code in gfnx/environment/sequence.py

def get_invalid_backward_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Returns mask of invalid backward actions."""
    num_envs = state.is_pad.shape[0]
    return jnp.zeros((num_envs, 1), dtype=jnp.bool)

`get_invalid_mask(state, env_params)`

Return mask of invalid actions

Source code in gfnx/environment/sequence.py

def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Return mask of invalid actions"""
    num_envs = state.is_pad.shape[0]
    return jnp.zeros((num_envs, self.nchar), dtype=jnp.bool)

`FixedPrependAppendSequenceEnvironment`

Bases: SequenceEnvironment

Class for sequence environments with a fixed length and prepend-append generation.

Source code in gfnx/environment/sequence.py

class FixedPrependAppendSequenceEnvironment(SequenceEnvironment):
    """
    Class for sequence environments with a fixed length and
    prepend-append generation.
    """

    def _single_transition(
        self,
        state: EnvState,
        action: TAction,
        env_params: EnvParams,
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        is_terminal = state.is_terminal

        def get_next_state_terminal(state: EnvState, action: TAction) -> EnvState:
            return state.replace(is_pad=True)

        def get_next_state_not_terminal(state: EnvState, action: TAction) -> EnvState:
            def get_next_tokens_prepend(state: EnvState, action: TAction) -> chex.Array:
                next_tokens = jax.lax.dynamic_update_slice(state.tokens, state.tokens[:-1], (1,))
                return next_tokens.at[0].set(action)

            def get_next_tokens_append(state: EnvState, action: TAction) -> chex.Array:
                num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
                last_position = self.max_length - num_pad
                return state.tokens.at[last_position].set(action - self.nchar)

            next_tokens = jax.lax.cond(
                action < self.nchar,
                get_next_tokens_prepend,
                get_next_tokens_append,
                state,
                action,
            )
            is_done = jnp.all(next_tokens != self.pad_token)
            return EnvState(
                tokens=next_tokens,
                is_terminal=is_done,
                is_initial=False,
                is_pad=False,
            )

        next_state: EnvState = jax.lax.cond(
            is_terminal, get_next_state_terminal, get_next_state_not_terminal, state, action
        )

        return next_state, next_state.is_terminal, {}

    def _single_backward_transition(
        self,
        state: EnvState,
        backward_action: TBackwardAction,
        env_params: EnvParams,
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        """
        Environment-specific step backward transition. Rewards are always zero!
        """
        is_initial = state.is_initial

        def get_prev_state_init_state(state: EnvState, backward_action: TAction) -> EnvState:
            return state.replace(is_pad=True)

        def get_prev_state_not_init_state(state: EnvState, backward_action: TAction) -> EnvState:
            num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
            last_position = self.max_length - num_pad
            def get_prev_tokens_prepend(state: EnvState) -> chex.Array:
                prev_tokens = jax.lax.dynamic_update_slice(state.tokens, state.tokens[1:], (0,))
                return prev_tokens.at[last_position - 1].set(self.pad_token)

            def get_prev_tokens_append(state: EnvState) -> chex.Array:
                return state.tokens.at[last_position - 1].set(self.pad_token)

            prev_tokens = jax.lax.cond(
                backward_action == 0,
                get_prev_tokens_prepend,
                get_prev_tokens_append,
                state,
            )

            is_initial = jnp.all(prev_tokens == self.pad_token)
            return state.replace(
                tokens=prev_tokens,
                is_terminal=False,
                is_initial=is_initial,
            )

        prev_state: EnvState = jax.lax.cond(
            is_initial,
            get_prev_state_init_state,
            get_prev_state_not_init_state,
            state,
            backward_action,
        )
        return prev_state, prev_state.is_initial, {}

    def get_backward_action(
        self,
        state: EnvState,
        forward_action: chex.Array,
        next_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        """Returns backward action given the forward transition."""
        return jnp.where(forward_action < self.nchar, 0, 1)

    def get_forward_action(
        self,
        state: EnvState,
        backward_action: chex.Array,
        prev_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        """Returns forward action given the backward transition."""
        num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
        last_position = self.max_length - num_pad
        removed_tokens = jnp.take_along_axis(
            state.tokens, jnp.expand_dims(last_position - 1, axis=-1), axis=-1
        ).squeeze()
        actions = jnp.where(backward_action == 0, state.tokens[:, 0], self.nchar + removed_tokens)
        # Clip actions to be in valid range even for invalid states, i.e., an initial state
        return jnp.clip(actions, min=0, max=self.action_space.n - 1)

    def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Return mask of invalid actions"""
        num_envs = state.is_pad.shape[0]
        return jnp.zeros((num_envs, 2 * self.nchar), dtype=jnp.bool)

    def get_invalid_backward_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Returns mask of invalid backward actions."""
        num_envs = state.is_pad.shape[0]
        return jnp.zeros((num_envs, 2), dtype=jnp.bool)

    @property
    def action_space(self) -> spaces.Discrete:
        """Action space of the environment, consists of prepending
        or appending any character."""
        return spaces.Discrete(2 * self.nchar)

    @property
    def backward_action_space(self) -> spaces.Discrete:
        """Backward action space of the environment,
        removing the first or the last character."""
        return spaces.Discrete(2)

`action_space` `property`

Action space of the environment, consists of prepending or appending any character.

`backward_action_space` `property`

Backward action space of the environment, removing the first or the last character.

`get_backward_action(state, forward_action, next_state, env_params)`

Returns backward action given the forward transition.

Source code in gfnx/environment/sequence.py

def get_backward_action(
    self,
    state: EnvState,
    forward_action: chex.Array,
    next_state: EnvState,
    env_params: EnvParams,
) -> chex.Array:
    """Returns backward action given the forward transition."""
    return jnp.where(forward_action < self.nchar, 0, 1)

`get_forward_action(state, backward_action, prev_state, env_params)`

Returns forward action given the backward transition.

Source code in gfnx/environment/sequence.py

def get_forward_action(
    self,
    state: EnvState,
    backward_action: chex.Array,
    prev_state: EnvState,
    env_params: EnvParams,
) -> chex.Array:
    """Returns forward action given the backward transition."""
    num_pad = jnp.sum(state.tokens == self.pad_token, axis=-1)
    last_position = self.max_length - num_pad
    removed_tokens = jnp.take_along_axis(
        state.tokens, jnp.expand_dims(last_position - 1, axis=-1), axis=-1
    ).squeeze()
    actions = jnp.where(backward_action == 0, state.tokens[:, 0], self.nchar + removed_tokens)
    # Clip actions to be in valid range even for invalid states, i.e., an initial state
    return jnp.clip(actions, min=0, max=self.action_space.n - 1)

`get_invalid_backward_mask(state, env_params)`

Returns mask of invalid backward actions.

Source code in gfnx/environment/sequence.py

def get_invalid_backward_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Returns mask of invalid backward actions."""
    num_envs = state.is_pad.shape[0]
    return jnp.zeros((num_envs, 2), dtype=jnp.bool)

`get_invalid_mask(state, env_params)`

Return mask of invalid actions

Source code in gfnx/environment/sequence.py

def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Return mask of invalid actions"""
    num_envs = state.is_pad.shape[0]
    return jnp.zeros((num_envs, 2 * self.nchar), dtype=jnp.bool)

`NonAutoregressiveSequenceEnvironment`

Bases: SequenceEnvironment

Class for sequence environments with a fixed length and non-autoregressive generation.

Source code in gfnx/environment/sequence.py

class NonAutoregressiveSequenceEnvironment(SequenceEnvironment):
    """
    Class for sequence environments with a fixed length and
    non-autoregressive generation.
    """

    def _single_transition(
        self,
        state: EnvState,
        action: TAction,
        env_params: EnvParams,
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        is_terminal = state.is_terminal

        def get_next_state_terminal(state: EnvState, action: TAction) -> EnvState:
            return state.replace(is_pad=True)

        def get_next_state_non_terminal(state: EnvState, action: TAction) -> EnvState:
            # action is a raveled multi-index of a pair (pos, char)
            pos, word = jnp.unravel_index(action, (self.max_length, self.nchar))
            next_tokens = state.tokens.at[pos].set(word)
            is_done = jnp.all(next_tokens != self.pad_token)
            return EnvState(
                tokens=next_tokens,
                is_terminal=is_done,
                is_initial=False,
                is_pad=False,
            )

        next_state: EnvState = jax.lax.cond(
            is_terminal,
            get_next_state_terminal,
            get_next_state_non_terminal,
            state,
            action,
        )

        return next_state, next_state.is_terminal, {}

    def _single_backward_transition(
        self,
        state: EnvState,
        backward_action: TBackwardAction,
        env_params: EnvParams,
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        """
        Environment-specific step backward transition. Rewards always zero!
        """
        is_initial = state.is_initial

        def get_prev_state_init_state(state: EnvState, backward_action: TAction) -> EnvState:
            return state.replace(is_pad=True)

        def get_prev_state_not_init_state(state: EnvState, backward_action: TAction) -> EnvState:
            prev_tokens = state.tokens.at[backward_action].set(self.pad_token)
            is_initial = jnp.all(prev_tokens == self.pad_token)
            return state.replace(
                tokens=prev_tokens,
                is_terminal=False,
                is_initial=is_initial,
            )

        prev_state: EnvState = jax.lax.cond(
            is_initial,
            get_prev_state_init_state,
            get_prev_state_not_init_state,
            state,
            backward_action,
        )
        return prev_state, prev_state.is_initial, {}

    def get_backward_action(
        self,
        state: EnvState,
        forward_action: chex.Array,
        next_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        """Returns backward action given the forward transition."""
        pos, _ = jnp.unravel_index(forward_action, (self.max_length, self.nchar))
        return pos

    def get_forward_action(
        self,
        state: EnvState,
        backward_action: chex.Array,
        prev_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        """Returns forward action given the backward transition."""
        word = jnp.take_along_axis(
            state.tokens, jnp.expand_dims(backward_action, axis=-1), axis=-1
        ).squeeze()
        return jnp.ravel_multi_index(
            (backward_action, word), (self.max_length, self.nchar), mode="clip"
        )

    def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Return mask of invalid actions"""
        pos_mask = state.tokens != self.pad_token  # [B, token_len]
        chex.assert_shape(pos_mask, (state.tokens.shape[0], self.max_length))
        invalid_mask_2d = jnp.repeat(jnp.expand_dims(pos_mask, axis=2), repeats=self.nchar, axis=2)
        chex.assert_shape(
            invalid_mask_2d,
            (state.tokens.shape[0], self.max_length, self.nchar),
        )
        invalid_mask_flat = invalid_mask_2d.reshape(state.tokens.shape[0], -1)
        # If all positions are already filled (all True), return all zeros mask
        all_filled = jnp.all(pos_mask, axis=-1, keepdims=True)
        return jnp.where(all_filled, jnp.zeros_like(invalid_mask_flat), invalid_mask_flat)

    def get_invalid_backward_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Returns mask of invalid backward actions."""
        pos_mask = state.tokens == self.pad_token
        all_filled = jnp.all(pos_mask, axis=-1, keepdims=True)
        return jnp.where(all_filled, jnp.zeros_like(pos_mask), pos_mask)

    @property
    def action_space(self) -> spaces.Discrete:
        """Action space of the environment, consists of pairs
        (position, word)"""
        return spaces.Discrete(self.max_length * self.nchar)

    @property
    def backward_action_space(self) -> spaces.Discrete:
        """Backward action space of the environment, consists of position"""
        return spaces.Discrete(self.max_length)

`action_space` `property`

Action space of the environment, consists of pairs (position, word)

`backward_action_space` `property`

Backward action space of the environment, consists of position

`get_backward_action(state, forward_action, next_state, env_params)`

Returns backward action given the forward transition.

Source code in gfnx/environment/sequence.py

def get_backward_action(
    self,
    state: EnvState,
    forward_action: chex.Array,
    next_state: EnvState,
    env_params: EnvParams,
) -> chex.Array:
    """Returns backward action given the forward transition."""
    pos, _ = jnp.unravel_index(forward_action, (self.max_length, self.nchar))
    return pos

`get_forward_action(state, backward_action, prev_state, env_params)`

Returns forward action given the backward transition.

Source code in gfnx/environment/sequence.py

def get_forward_action(
    self,
    state: EnvState,
    backward_action: chex.Array,
    prev_state: EnvState,
    env_params: EnvParams,
) -> chex.Array:
    """Returns forward action given the backward transition."""
    word = jnp.take_along_axis(
        state.tokens, jnp.expand_dims(backward_action, axis=-1), axis=-1
    ).squeeze()
    return jnp.ravel_multi_index(
        (backward_action, word), (self.max_length, self.nchar), mode="clip"
    )

`get_invalid_backward_mask(state, env_params)`

Returns mask of invalid backward actions.

Source code in gfnx/environment/sequence.py

def get_invalid_backward_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Returns mask of invalid backward actions."""
    pos_mask = state.tokens == self.pad_token
    all_filled = jnp.all(pos_mask, axis=-1, keepdims=True)
    return jnp.where(all_filled, jnp.zeros_like(pos_mask), pos_mask)

`get_invalid_mask(state, env_params)`

Return mask of invalid actions

Source code in gfnx/environment/sequence.py

def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Return mask of invalid actions"""
    pos_mask = state.tokens != self.pad_token  # [B, token_len]
    chex.assert_shape(pos_mask, (state.tokens.shape[0], self.max_length))
    invalid_mask_2d = jnp.repeat(jnp.expand_dims(pos_mask, axis=2), repeats=self.nchar, axis=2)
    chex.assert_shape(
        invalid_mask_2d,
        (state.tokens.shape[0], self.max_length, self.nchar),
    )
    invalid_mask_flat = invalid_mask_2d.reshape(state.tokens.shape[0], -1)
    # If all positions are already filled (all True), return all zeros mask
    all_filled = jnp.all(pos_mask, axis=-1, keepdims=True)
    return jnp.where(all_filled, jnp.zeros_like(invalid_mask_flat), invalid_mask_flat)

`SequenceEnvironment`

Bases: BaseVecEnvironment[EnvState, EnvParams]

Class for sequence environments with a fixed length.

Source code in gfnx/environment/sequence.py

class SequenceEnvironment(BaseVecEnvironment[EnvState, EnvParams]):
    """
    Class for sequence environments with a fixed length.
    """

    def __init__(
        self,
        reward_module: TRewardModule,
        max_length: int,  # Maximal length of the sequence
        nchar: int,  # Number of active characters in the vocabulary
        ntoken: int,  # Size of the vocabulary including special tokens
        *,
        bos_token: int,  # id of beginning of sentence token, must be >= nchar
        eos_token: int,  # id of end of sentence token, must be >= nchar
        pad_token: int,  # id of padding token, must be >= nchar
    ) -> None:
        super().__init__(reward_module)
        self.max_length = max_length
        self.nchar = nchar
        self.ntoken = ntoken

        self.bos_token = bos_token
        self.eos_token = eos_token
        self.pad_token = pad_token

    def get_init_state(self, num_envs: int) -> EnvState:
        # Fill empty tokens with [PAD] token
        tokens = jnp.full(
            shape=(num_envs, self.max_length),
            fill_value=self.pad_token,
            dtype=jnp.int32,
        )
        return EnvState.from_tokens(tokens).replace(
            is_initial=jnp.ones((num_envs,), dtype=jnp.bool_),
        )

    def init(self, rng_key: chex.PRNGKey) -> EnvParams:
        dummy_state = self.get_init_state(1)
        reward_params = self.reward_module.init(rng_key, dummy_state)
        return EnvParams(
            reward_params=reward_params,
            max_length=self.max_length,
            nchar=self.nchar,
            ntoken=self.ntoken,
            bos_token=self.bos_token,
            eos_token=self.eos_token,
            pad_token=self.pad_token,
        )

    @property
    def max_steps_in_episode(self) -> int:
        return self.max_length

    def _single_transition(
        self,
        state: EnvState,
        action: TAction,
        env_params: EnvParams,
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        raise NotImplementedError

    def _single_backward_transition(
        self,
        state: EnvState,
        backward_action: TBackwardAction,
        env_params: EnvParams,
    ) -> tuple[EnvState, TDone, dict[Any, Any]]:
        """
        Environment-specific step backward transition. Rewards always zero!
        """
        raise NotImplementedError

    def get_obs(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Applies observation function to state."""
        # Add BOS token to the beginning of the sentence
        num_envs = state.is_pad.shape[0]
        return jnp.concat(
            [
                jnp.full(
                    shape=(num_envs, 1),
                    fill_value=self.bos_token,
                    dtype=state.tokens.dtype,
                ),
                state.tokens,
            ],
            axis=-1,
        )

    def get_backward_action(
        self,
        state: EnvState,
        forward_action: chex.Array,
        next_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        raise NotImplementedError

    def get_forward_action(
        self,
        state: EnvState,
        backward_action: chex.Array,
        prev_state: EnvState,
        env_params: EnvParams,
    ) -> chex.Array:
        raise NotImplementedError

    def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Return mask of invalid actions"""
        raise NotImplementedError

    def get_invalid_backward_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        raise NotImplementedError

    @property
    def name(self) -> str:
        """Environment name."""
        raise NotImplementedError

    @property
    def action_space(self) -> spaces.Discrete:
        raise NotImplementedError

    @property
    def backward_action_space(self) -> spaces.Discrete:
        raise NotImplementedError

    @property
    def observation_space(self) -> spaces.Box:
        """Observation space of the environment."""
        return spaces.Box(
            low=0,
            high=self.ntoken,  # Includes all special tokens
            shape=(self.max_length + 1,),  # +1 because of BOS token
            dtype=jnp.int32,
        )

    @property
    def state_space(self) -> spaces.Dict:
        """State space of the environment."""
        return spaces.Dict({
            "token": spaces.Box(
                low=0,
                high=self.ntoken,  # Includes special tokens
                # (e.g. PAD and EOS)
                shape=(self.max_length,),
                dtype=jnp.int32,
            ),
            "is_done": spaces.Box(low=0, high=1, shape=(), dtype=jnp.bool),
        })

`name` `property`

Environment name.

`observation_space` `property`

Observation space of the environment.

`state_space` `property`

State space of the environment.

`get_invalid_mask(state, env_params)`

Return mask of invalid actions

Source code in gfnx/environment/sequence.py

def get_invalid_mask(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Return mask of invalid actions"""
    raise NotImplementedError

`get_obs(state, env_params)`

Applies observation function to state.

Source code in gfnx/environment/sequence.py

def get_obs(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Applies observation function to state."""
    # Add BOS token to the beginning of the sentence
    num_envs = state.is_pad.shape[0]
    return jnp.concat(
        [
            jnp.full(
                shape=(num_envs, 1),
                fill_value=self.bos_token,
                dtype=state.tokens.dtype,
            ),
            state.tokens,
        ],
        axis=-1,
    )

Bit Sequence environment

`BitseqEnvironment`

Bases: NonAutoregressiveSequenceEnvironment

Source code in gfnx/environment/bitseq.py

class BitseqEnvironment(NonAutoregressiveSequenceEnvironment):
    def __init__(self, reward_module: TRewardModule, n: int = 120, k: int = 8) -> None:
        self.n = n
        self.k = k
        assert n % k == 0, "n should be divisible by k"

        super().__init__(
            reward_module,
            max_length=n // k,
            nchar=2**k,
            ntoken=2**k + 3,
            bos_token=2**k + 1,
            eos_token=2**k + 2,
            pad_token=2**k,
        )

    @property
    def name(self) -> str:
        """Environment name."""
        return f"Bitseq-{self.n}-{self.k}-v0"

`name` `property`

Environment name.

TFBind-8 environment

`TFBind8Environment`

Bases: FixedAutoregressiveSequenceEnvironment

Source code in gfnx/environment/tfbind.py

class TFBind8Environment(FixedAutoregressiveSequenceEnvironment):
    def __init__(self, reward_module: TRewardModule) -> None:
        self.char_to_id = {char: i for i, char in enumerate(NUCLEOTIDES_FULL_ALPHABET)}

        super().__init__(
            reward_module,
            max_length=8,
            nchar=len(NUCLEOTIDES),
            ntoken=len(NUCLEOTIDES_FULL_ALPHABET),
            bos_token=self.char_to_id["[BOS]"],
            eos_token=self.char_to_id["[EOS]"],
            pad_token=self.char_to_id["[PAD]"],
        )

    @property
    def is_enumerable(self) -> bool:
        """Whether the environment is enumerable."""
        return True

    @property
    def name(self) -> str:
        """Environment name."""
        return "TFBind8-v0"

    def _get_states_rewards(self, env_params: EnvParams) -> chex.Array:
        """
        Returns the true distribution of rewards for all states in the hypergrid.
        """
        rewards = jnp.zeros((self.nchar,) * self.max_length, dtype=jnp.float32)

        def update_rewards(idx: int, rewards: chex.Array):
            state = jnp.unravel_index(idx, shape=rewards.shape)  # Unpack index to state
            env_state = EnvState(
                tokens=jnp.array(state),
                is_terminal=True,
                is_initial=False,
                is_pad=False,
            )
            batched_env_state = jax.tree.map(lambda x: jnp.expand_dims(x, 0), env_state)
            reward = self.reward_module.reward(batched_env_state, env_params)
            return rewards.at[state].set(reward[0])

        return jax.lax.fori_loop(0, self.nchar**self.max_length, update_rewards, rewards)

    def get_true_distribution(self, env_params: EnvParams) -> chex.Array:
        """
        Returns the true distribution of rewards for all states in the hypergrid.
        """
        rewards = self._get_states_rewards(env_params)
        return rewards / rewards.sum()

    def get_empirical_distribution(self, states: EnvState, env_params: EnvParams) -> chex.Array:
        """
        Extracts the empirical distribution from the given states.
        """
        dist_shape = (self.nchar,) * self.max_length
        sample_idx = jax.vmap(lambda x: jnp.ravel_multi_index(x, dims=dist_shape, mode="clip"))(
            states.tokens
        )

        valid_mask = states.is_terminal.astype(jnp.float32)
        empirical_dist = jax.ops.segment_sum(valid_mask, sample_idx, num_segments=prod(dist_shape))
        empirical_dist = empirical_dist.reshape(dist_shape)
        empirical_dist /= empirical_dist.sum()
        return empirical_dist

    @property
    def is_mean_reward_tractable(self) -> bool:
        """Whether this environment supports mean reward tractability."""
        return True

    def get_mean_reward(self, env_params: EnvParams) -> float:
        """
        Returns the mean reward for the hypergrid environment.
        The mean reward is computed as the sum of rewards divided by the number of states.
        """
        rewards = self._get_states_rewards(env_params)
        return jnp.pow(rewards, 2).sum() / rewards.sum()

    @property
    def is_normalizing_constant_tractable(self) -> bool:
        """Whether this environment supports tractable normalizing constant."""
        return True

    def get_normalizing_constant(self, env_params: EnvParams) -> float:
        """
        Returns the normalizing constant for the hypergrid environment.
        The normalizing constant is computed as the sum of rewards.
        """
        rewards = self._get_states_rewards(env_params)
        return rewards.sum()

    @property
    def is_ground_truth_sampling_tractable(self) -> bool:
        """Whether this environment supports tractable sampling from the GT distribution."""
        return True

    def get_ground_truth_sampling(
        self, rng_key: chex.PRNGKey, batch_size: int, env_params: EnvParams
    ) -> EnvState:
        """
        Returns a batch of terminal states sampled from the ground-truth distribution
        proportional to rewards over all sequences of length `max_length`.

        Args:
            rng_key: JAX random key for sampling.
            batch_size: Number of samples to generate.
            env_params: Environment parameters.

        Returns:
            EnvState with shape [batch_size, max_length].
        """
        true_distribution = self.get_true_distribution(env_params)
        flat_distribution = true_distribution.flatten()

        sampled_indices = jax.random.choice(
            rng_key,
            a=flat_distribution.size,
            shape=(batch_size,),
            p=flat_distribution,
        )

        sampled_coords_unstacked = jnp.unravel_index(
            sampled_indices, shape=true_distribution.shape
        )
        sampled_tokens = jnp.stack(sampled_coords_unstacked, axis=1)

        return EnvState(
            tokens=sampled_tokens.astype(jnp.int32),
            is_terminal=jnp.ones((batch_size,), dtype=jnp.bool_),
            is_initial=jnp.zeros((batch_size,), dtype=jnp.bool_),
            is_pad=jnp.zeros((batch_size,), dtype=jnp.bool_),
        )

`is_enumerable` `property`

Whether the environment is enumerable.

`is_ground_truth_sampling_tractable` `property`

Whether this environment supports tractable sampling from the GT distribution.

`is_mean_reward_tractable` `property`

Whether this environment supports mean reward tractability.

`is_normalizing_constant_tractable` `property`

Whether this environment supports tractable normalizing constant.

`name` `property`

Environment name.

`get_empirical_distribution(states, env_params)`

Extracts the empirical distribution from the given states.

Source code in gfnx/environment/tfbind.py

def get_empirical_distribution(self, states: EnvState, env_params: EnvParams) -> chex.Array:
    """
    Extracts the empirical distribution from the given states.
    """
    dist_shape = (self.nchar,) * self.max_length
    sample_idx = jax.vmap(lambda x: jnp.ravel_multi_index(x, dims=dist_shape, mode="clip"))(
        states.tokens
    )

    valid_mask = states.is_terminal.astype(jnp.float32)
    empirical_dist = jax.ops.segment_sum(valid_mask, sample_idx, num_segments=prod(dist_shape))
    empirical_dist = empirical_dist.reshape(dist_shape)
    empirical_dist /= empirical_dist.sum()
    return empirical_dist

`get_ground_truth_sampling(rng_key, batch_size, env_params)`

Returns a batch of terminal states sampled from the ground-truth distribution proportional to rewards over all sequences of length max_length.

Parameters:

Name	Type	Description	Default
`rng_key`	`PRNGKey`	JAX random key for sampling.	required
`batch_size`	`int`	Number of samples to generate.	required
`env_params`	`EnvParams`	Environment parameters.	required

Returns:

Type	Description
`EnvState`	EnvState with shape [batch_size, max_length].

Source code in gfnx/environment/tfbind.py

def get_ground_truth_sampling(
    self, rng_key: chex.PRNGKey, batch_size: int, env_params: EnvParams
) -> EnvState:
    """
    Returns a batch of terminal states sampled from the ground-truth distribution
    proportional to rewards over all sequences of length `max_length`.

    Args:
        rng_key: JAX random key for sampling.
        batch_size: Number of samples to generate.
        env_params: Environment parameters.

    Returns:
        EnvState with shape [batch_size, max_length].
    """
    true_distribution = self.get_true_distribution(env_params)
    flat_distribution = true_distribution.flatten()

    sampled_indices = jax.random.choice(
        rng_key,
        a=flat_distribution.size,
        shape=(batch_size,),
        p=flat_distribution,
    )

    sampled_coords_unstacked = jnp.unravel_index(
        sampled_indices, shape=true_distribution.shape
    )
    sampled_tokens = jnp.stack(sampled_coords_unstacked, axis=1)

    return EnvState(
        tokens=sampled_tokens.astype(jnp.int32),
        is_terminal=jnp.ones((batch_size,), dtype=jnp.bool_),
        is_initial=jnp.zeros((batch_size,), dtype=jnp.bool_),
        is_pad=jnp.zeros((batch_size,), dtype=jnp.bool_),
    )

`get_mean_reward(env_params)`

Returns the mean reward for the hypergrid environment. The mean reward is computed as the sum of rewards divided by the number of states.

Source code in gfnx/environment/tfbind.py

def get_mean_reward(self, env_params: EnvParams) -> float:
    """
    Returns the mean reward for the hypergrid environment.
    The mean reward is computed as the sum of rewards divided by the number of states.
    """
    rewards = self._get_states_rewards(env_params)
    return jnp.pow(rewards, 2).sum() / rewards.sum()

`get_normalizing_constant(env_params)`

Returns the normalizing constant for the hypergrid environment. The normalizing constant is computed as the sum of rewards.

Source code in gfnx/environment/tfbind.py

def get_normalizing_constant(self, env_params: EnvParams) -> float:
    """
    Returns the normalizing constant for the hypergrid environment.
    The normalizing constant is computed as the sum of rewards.
    """
    rewards = self._get_states_rewards(env_params)
    return rewards.sum()

`get_true_distribution(env_params)`

Returns the true distribution of rewards for all states in the hypergrid.

Source code in gfnx/environment/tfbind.py

def get_true_distribution(self, env_params: EnvParams) -> chex.Array:
    """
    Returns the true distribution of rewards for all states in the hypergrid.
    """
    rewards = self._get_states_rewards(env_params)
    return rewards / rewards.sum()

QM9 Small environment

`QM9SmallEnvironment`

Bases: FixedPrependAppendSequenceEnvironment

Source code in gfnx/environment/qm9_small.py

class QM9SmallEnvironment(FixedPrependAppendSequenceEnvironment):
    def __init__(self, reward_module: TRewardModule) -> None:
        self.char_to_id = {char: i for i, char in enumerate(QM9_SMALL_FULL_ALPHABET)}

        super().__init__(
            reward_module,
            max_length=5,
            nchar=len(QM9_SMALL_BLOCKS),
            ntoken=len(QM9_SMALL_FULL_ALPHABET),
            bos_token=self.char_to_id["[BOS]"],
            eos_token=self.char_to_id["[EOS]"],
            pad_token=self.char_to_id["[PAD]"],
        )

    @property
    def name(self) -> str:
        """Environment name."""
        return "QM9Small-v0"

    @property
    def is_enumerable(self) -> bool:
        """Whether the environment is enumerable."""
        return True

    def _get_states_rewards(self, env_params: EnvParams) -> chex.Array:
        """
        Returns the true distribution of rewards for all states in the hypergrid.
        """
        rewards = jnp.zeros((self.nchar,) * self.max_length, dtype=jnp.float32)

        def update_rewards(idx: int, rewards: chex.Array):
            state = jnp.unravel_index(idx, shape=rewards.shape)  # Unpack index to state
            env_state = EnvState(
                tokens=jnp.array(state),
                is_terminal=True,
                is_initial=False,
                is_pad=False,
            )
            batched_env_state = jax.tree.map(lambda x: jnp.expand_dims(x, 0), env_state)
            reward = self.reward_module.reward(batched_env_state, env_params)
            return rewards.at[state].set(reward[0])

        return jax.lax.fori_loop(0, self.nchar**self.max_length, update_rewards, rewards)

    def get_true_distribution(self, env_params: EnvParams) -> chex.Array:
        """
        Returns the true distribution of rewards for all states in the hypergrid.
        """
        rewards = self._get_states_rewards(env_params)
        return rewards / rewards.sum()

    def get_empirical_distribution(self, states: EnvState, env_params: EnvParams) -> chex.Array:
        """
        Extracts the empirical distribution from the given states.
        """
        dist_shape = (self.nchar,) * self.max_length
        sample_idx = jax.vmap(lambda x: jnp.ravel_multi_index(x, dims=dist_shape, mode="clip"))(
            states.tokens
        )

        valid_mask = states.is_terminal.astype(jnp.float32)
        empirical_dist = jax.ops.segment_sum(valid_mask, sample_idx, num_segments=prod(dist_shape))
        empirical_dist = empirical_dist.reshape(dist_shape)
        empirical_dist /= empirical_dist.sum()
        return empirical_dist

    @property
    def is_mean_reward_tractable(self) -> bool:
        """Whether this environment supports mean reward tractability."""
        return True

    def get_mean_reward(self, env_params: EnvParams) -> float:
        """
        Returns the mean reward for the hypergrid environment.
        The mean reward is computed as the sum of rewards divided by the number of states.
        """
        rewards = self._get_states_rewards(env_params)
        return jnp.pow(rewards, 2).sum() / rewards.sum()

    @property
    def is_normalizing_constant_tractable(self) -> bool:
        """Whether this environment supports tractable normalizing constant."""
        return True

    def get_normalizing_constant(self, env_params: EnvParams) -> float:
        """
        Returns the normalizing constant for the hypergrid environment.
        The normalizing constant is computed as the sum of rewards.
        """
        rewards = self._get_states_rewards(env_params)
        return rewards.sum()

`is_enumerable` `property`

Whether the environment is enumerable.

`is_mean_reward_tractable` `property`

Whether this environment supports mean reward tractability.

`is_normalizing_constant_tractable` `property`

Whether this environment supports tractable normalizing constant.

`name` `property`

Environment name.

`get_empirical_distribution(states, env_params)`

Extracts the empirical distribution from the given states.

Source code in gfnx/environment/qm9_small.py

def get_empirical_distribution(self, states: EnvState, env_params: EnvParams) -> chex.Array:
    """
    Extracts the empirical distribution from the given states.
    """
    dist_shape = (self.nchar,) * self.max_length
    sample_idx = jax.vmap(lambda x: jnp.ravel_multi_index(x, dims=dist_shape, mode="clip"))(
        states.tokens
    )

    valid_mask = states.is_terminal.astype(jnp.float32)
    empirical_dist = jax.ops.segment_sum(valid_mask, sample_idx, num_segments=prod(dist_shape))
    empirical_dist = empirical_dist.reshape(dist_shape)
    empirical_dist /= empirical_dist.sum()
    return empirical_dist

`get_mean_reward(env_params)`

Returns the mean reward for the hypergrid environment. The mean reward is computed as the sum of rewards divided by the number of states.

Source code in gfnx/environment/qm9_small.py

def get_mean_reward(self, env_params: EnvParams) -> float:
    """
    Returns the mean reward for the hypergrid environment.
    The mean reward is computed as the sum of rewards divided by the number of states.
    """
    rewards = self._get_states_rewards(env_params)
    return jnp.pow(rewards, 2).sum() / rewards.sum()

`get_normalizing_constant(env_params)`

Returns the normalizing constant for the hypergrid environment. The normalizing constant is computed as the sum of rewards.

Source code in gfnx/environment/qm9_small.py

def get_normalizing_constant(self, env_params: EnvParams) -> float:
    """
    Returns the normalizing constant for the hypergrid environment.
    The normalizing constant is computed as the sum of rewards.
    """
    rewards = self._get_states_rewards(env_params)
    return rewards.sum()

`get_true_distribution(env_params)`

Returns the true distribution of rewards for all states in the hypergrid.

Source code in gfnx/environment/qm9_small.py

def get_true_distribution(self, env_params: EnvParams) -> chex.Array:
    """
    Returns the true distribution of rewards for all states in the hypergrid.
    """
    rewards = self._get_states_rewards(env_params)
    return rewards / rewards.sum()

AMP environment

`AMPEnvironment`

Bases: AutoregressiveSequenceEnvironment

Source code in gfnx/environment/amp.py

class AMPEnvironment(AutoregressiveSequenceEnvironment):
    def __init__(self, reward_module: TRewardModule) -> None:
        self.char_to_id = {char: i for i, char in enumerate(PROTEINS_FULL_ALPHABET)}

        super().__init__(
            reward_module,
            max_length=60,
            nchar=len(AMINO_ACIDS),
            ntoken=len(PROTEINS_FULL_ALPHABET),
            bos_token=self.char_to_id["[BOS]"],
            eos_token=self.char_to_id["[EOS]"],
            pad_token=self.char_to_id["[PAD]"],
        )

    @property
    def name(self) -> str:
        """Environment name."""
        return "AMP-v0"

    def get_obs(self, state: EnvState, env_params: EnvParams) -> chex.Array:
        """Applies observation function to state."""

        # Use PAD if the last token is already PAD or EOS, otherwise use EOS
        last_token = state.tokens[:, -1]
        to_append = jnp.where(
            jnp.logical_or(last_token == self.pad_token, last_token == self.eos_token),
            self.pad_token,
            self.eos_token
        )
        to_append = to_append[:, None]  # Add dimension to match concatenation

        return jnp.concat(
            [
                state.tokens,
                to_append,
            ],
            axis=-1,
        )

`name` `property`

Environment name.

`get_obs(state, env_params)`

Applies observation function to state.

Source code in gfnx/environment/amp.py

def get_obs(self, state: EnvState, env_params: EnvParams) -> chex.Array:
    """Applies observation function to state."""

    # Use PAD if the last token is already PAD or EOS, otherwise use EOS
    last_token = state.tokens[:, -1]
    to_append = jnp.where(
        jnp.logical_or(last_token == self.pad_token, last_token == self.eos_token),
        self.pad_token,
        self.eos_token
    )
    to_append = to_append[:, None]  # Add dimension to match concatenation

    return jnp.concat(
        [
            state.tokens,
            to_append,
        ],
        axis=-1,
    )

GFP environment

`GFPEnvironment`

Bases: FixedAutoregressiveSequenceEnvironment

Source code in gfnx/environment/gfp.py

class GFPEnvironment(FixedAutoregressiveSequenceEnvironment):
    def __init__(self, reward_module: TRewardModule) -> None:
        self.char_to_id = {char: i for i, char in enumerate(PROTEINS_FULL_ALPHABET)}

        super().__init__(
            reward_module,
            max_length=237,
            nchar=len(AMINO_ACIDS),
            ntoken=len(PROTEINS_FULL_ALPHABET),
            bos_token=self.char_to_id["[BOS]"],
            eos_token=self.char_to_id["[EOS]"],
            pad_token=self.char_to_id["[PAD]"],
        )

    @property
    def name(self) -> str:
        """Environment name."""
        return "GFP-v0"

`name` `property`

Environment name.

Environment API Reference

General sequence enviornmnet

AutoregressiveSequenceEnvironment

action_space property

backward_action_space property

get_backward_action(state, forward_action, next_state, env_params)

get_forward_action(state, backward_action, prev_state, env_params)

get_invalid_backward_mask(state, env_params)

get_invalid_mask(state, env_params)

EnvState

from_tokens(tokens) classmethod

FixedAutoregressiveSequenceEnvironment

action_space property

backward_action_space property

get_backward_action(state, forward_action, next_state, env_params)

get_forward_action(state, backward_action, prev_state, env_params)

get_invalid_backward_mask(state, env_params)

get_invalid_mask(state, env_params)

FixedPrependAppendSequenceEnvironment

action_space property

backward_action_space property

get_backward_action(state, forward_action, next_state, env_params)

get_forward_action(state, backward_action, prev_state, env_params)

get_invalid_backward_mask(state, env_params)

get_invalid_mask(state, env_params)

NonAutoregressiveSequenceEnvironment

action_space property

backward_action_space property

get_backward_action(state, forward_action, next_state, env_params)

get_forward_action(state, backward_action, prev_state, env_params)

get_invalid_backward_mask(state, env_params)

get_invalid_mask(state, env_params)

SequenceEnvironment

name property

observation_space property

state_space property

get_invalid_mask(state, env_params)

get_obs(state, env_params)

Bit Sequence environment

BitseqEnvironment

name property

TFBind-8 environment

TFBind8Environment

is_enumerable property

is_ground_truth_sampling_tractable property

is_mean_reward_tractable property

is_normalizing_constant_tractable property

name property

get_empirical_distribution(states, env_params)

get_ground_truth_sampling(rng_key, batch_size, env_params)

get_mean_reward(env_params)

get_normalizing_constant(env_params)

get_true_distribution(env_params)

QM9 Small environment

QM9SmallEnvironment

is_enumerable property

is_mean_reward_tractable property

is_normalizing_constant_tractable property

name property

get_empirical_distribution(states, env_params)

get_mean_reward(env_params)

get_normalizing_constant(env_params)

get_true_distribution(env_params)

AMP environment

AMPEnvironment

name property

get_obs(state, env_params)

GFP environment

GFPEnvironment

name property

`AutoregressiveSequenceEnvironment`

`action_space` `property`

`backward_action_space` `property`

`get_backward_action(state, forward_action, next_state, env_params)`

`get_forward_action(state, backward_action, prev_state, env_params)`

`get_invalid_backward_mask(state, env_params)`

`get_invalid_mask(state, env_params)`

`EnvState`

`from_tokens(tokens)` `classmethod`

`FixedAutoregressiveSequenceEnvironment`

`action_space` `property`

`backward_action_space` `property`

`get_backward_action(state, forward_action, next_state, env_params)`

`get_forward_action(state, backward_action, prev_state, env_params)`

`get_invalid_backward_mask(state, env_params)`

`get_invalid_mask(state, env_params)`

`FixedPrependAppendSequenceEnvironment`

`action_space` `property`

`backward_action_space` `property`

`get_backward_action(state, forward_action, next_state, env_params)`

`get_forward_action(state, backward_action, prev_state, env_params)`

`get_invalid_backward_mask(state, env_params)`

`get_invalid_mask(state, env_params)`

`NonAutoregressiveSequenceEnvironment`

`action_space` `property`

`backward_action_space` `property`

`get_backward_action(state, forward_action, next_state, env_params)`

`get_forward_action(state, backward_action, prev_state, env_params)`

`get_invalid_backward_mask(state, env_params)`

`get_invalid_mask(state, env_params)`

`SequenceEnvironment`

`name` `property`

`observation_space` `property`

`state_space` `property`

`get_invalid_mask(state, env_params)`

`get_obs(state, env_params)`

`BitseqEnvironment`

`name` `property`

`TFBind8Environment`

`is_enumerable` `property`

`is_ground_truth_sampling_tractable` `property`

`is_mean_reward_tractable` `property`

`is_normalizing_constant_tractable` `property`

`name` `property`

`get_empirical_distribution(states, env_params)`

`get_ground_truth_sampling(rng_key, batch_size, env_params)`

`get_mean_reward(env_params)`

`get_normalizing_constant(env_params)`

`get_true_distribution(env_params)`

`QM9SmallEnvironment`

`is_enumerable` `property`

`is_mean_reward_tractable` `property`

`is_normalizing_constant_tractable` `property`

`name` `property`

`get_empirical_distribution(states, env_params)`

`get_mean_reward(env_params)`

`get_normalizing_constant(env_params)`

`get_true_distribution(env_params)`

`AMPEnvironment`

`name` `property`

`get_obs(state, env_params)`

`GFPEnvironment`

`name` `property`