Weighted loss function in EdgeFeatureGraphCRF by al13n321 · Pull Request #95 · pystruct/pystruct · GitHub
Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pystruct/learners/frankwolfe_ssvm.py
6 changes: 3 additions & 3 deletions pystruct/learners/latent_structured_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,9 @@ def score(self, X, Y):
score : float
Average of 1 - loss over training examples.
"""
losses = [self.model.base_loss(y, y_pred)
for y, y_pred in zip(Y, self.predict(X))]
max_losses = [self.model.max_loss(y) for y in Y]
losses = [self.model.base_loss(x, y, y_pred)
for x, y, y_pred in zip(X, Y, self.predict(X))]
max_losses = [self.model.max_loss(x, y) for y in Y]
return 1. - np.sum(losses) / float(np.sum(max_losses))

@property
Expand Down
4 changes: 2 additions & 2 deletions pystruct/learners/one_slack_ssvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def constraint_equal(y_1, y_2):
# the idea is that if we cache, inference is way more expensive
# and this doesn't matter much.
sample.append((self.model.psi(x, y_hat),
self.model.loss(y, y_hat), y_hat))
self.model.loss(x, y, y_hat), y_hat))

def _constraint_from_cache(self, X, Y, psi_gt, constraints):
if (not getattr(self, 'inference_cache_', False) or
Expand Down Expand Up @@ -358,7 +358,7 @@ def _find_new_constraint(self, X, Y, psi_gt, constraints, check=True):
else:
dpsi = (psi_gt - self.model.batch_psi(X, Y_hat)) / len(X)

loss_mean = np.mean(self.model.batch_loss(Y, Y_hat))
loss_mean = np.mean(self.model.batch_loss(X, Y, Y_hat))

violation = loss_mean - np.dot(self.w, dpsi)
if check and self._check_bad_constraint(
Expand Down
8 changes: 4 additions & 4 deletions pystruct/learners/ssvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ def score(self, X, Y):
Average of 1 - loss over training examples.
"""
if hasattr(self.model, 'batch_loss'):
losses = self.model.batch_loss(Y, self.predict(X))
losses = self.model.batch_loss(X, Y, self.predict(X))
else:
losses = [self.model.loss(y, y_pred)
for y, y_pred in zip(Y, self.predict(X))]
max_losses = [self.model.max_loss(y) for y in Y]
losses = [self.model.loss(x, y, y_pred)
for x, y, y_pred in zip(X, Y, self.predict(X))]
max_losses = [self.model.max_loss(x, y) for x, y in zip(X, Y)]
return 1. - np.sum(losses) / float(np.sum(max_losses))

def _compute_training_loss(self, X, Y, iteration):
Expand Down
6 changes: 3 additions & 3 deletions pystruct/learners/structured_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def fit(self, X, Y, initialize=True):
w_bar = np.zeros(size_psi)
n_obs = 0
self.loss_curve_ = []
max_losses = np.sum([self.model.max_loss(y) for y in Y])
max_losses = np.sum([self.model.max_loss(x, y) for x, y in zip(X, Y)])
try:
for iteration in xrange(self.max_iter):
if self.average == -1:
Expand All @@ -129,7 +129,7 @@ def fit(self, X, Y, initialize=True):
delayed(inference)(self.model, x, self.w) for x, y in
zip(X, Y)))
for x, y, y_hat in zip(X, Y, Y_hat):
current_loss = self.model.loss(y, y_hat)
current_loss = self.model.loss(x, y, y_hat)
losses += current_loss
if current_loss:
self.w += effective_lr * (self.model.psi(x, y) -
Expand All @@ -142,7 +142,7 @@ def fit(self, X, Y, initialize=True):
# standard online update
for x, y in zip(X, Y):
y_hat = self.model.inference(x, self.w)
current_loss = self.model.loss(y, y_hat)
current_loss = self.model.loss(x, y, y_hat)
losses += current_loss
if current_loss:
self.w += effective_lr * (self.model.psi(x, y) -
Expand Down
10 changes: 5 additions & 5 deletions pystruct/learners/subgradient_latent_ssvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def fit(self, X, Y, H_init=None, warm_start=False, initialize=True):
delta_psi = (self.model.psi(x, h)
- self.model.psi(x, h_hat))
slack = (-np.dot(delta_psi, w)
+ self.model.loss(h, h_hat))
+ self.model.loss(x, h, h_hat))
objective += np.maximum(slack, 0)
if slack > 0:
positive_slacks += 1
Expand Down Expand Up @@ -263,11 +263,11 @@ def score(self, X, Y):
"""
if hasattr(self.model, 'batch_loss'):
losses = self.model.batch_loss(
Y, self.model.batch_inference(X, self.w))
X, Y, self.model.batch_inference(X, self.w))
else:
losses = [self.model.loss(y, self.model.inference(y, self.w))
for y, y_pred in zip(Y, self.predict(X))]
max_losses = [self.model.max_loss(y) for y in Y]
losses = [self.model.loss(x, y, self.model.inference(y, self.w))
for x, y, y_pred in zip(X, Y, self.predict(X))]
max_losses = [self.model.max_loss(x, y) for x, y in zip(X, Y)]
return 1. - np.sum(losses) / float(np.sum(max_losses))

def _objective(self, X, Y):
Expand Down
2 changes: 1 addition & 1 deletion pystruct/learners/subgradient_ssvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def _sequential_learning(self, X, Y, w):
X_b, Y_b, w, relaxed=True)
delta_psi = (self.model.batch_psi(X_b, Y_b)
- self.model.batch_psi(X_b, Y_hat))
loss = np.sum(self.model.batch_loss(Y_b, Y_hat))
loss = np.sum(self.model.batch_loss(X_b, Y_b, Y_hat))

violation = np.maximum(0, loss - np.dot(w, delta_psi))
objective += violation
Expand Down
37 changes: 24 additions & 13 deletions pystruct/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,34 @@ def batch_inference(self, X, w, relaxed=None):
return [self.inference(x, w, relaxed=relaxed)
for x in X]

def loss(self, y, y_hat):
def loss(self, x, y, y_hat, node_weights=None):
# hamming loss:
if isinstance(y_hat, tuple):
return self.continuous_loss(y, y_hat[0])
if node_weights == None:
return self.continuous_loss(x, y, y_hat[0])
else:
return self.continuous_loss(x, y, y_hat[0], node_weights)
if node_weights == None:
node_weights = 1
if hasattr(self, 'class_weight'):
return np.sum(self.class_weight[y] * (y != y_hat))
return np.sum(y != y_hat)
return np.sum(self.class_weight[y] * (y != y_hat) * node_weights)
return np.sum((y != y_hat) * node_weights)

def batch_loss(self, Y, Y_hat):
def batch_loss(self, X, Y, Y_hat):
# default implementation of batch loss
return [self.loss(y, y_hat) for y, y_hat in zip(Y, Y_hat)]
return [self.loss(x, y, y_hat) for x, y, y_hat in zip(X, Y, Y_hat)]

def max_loss(self, y):
def max_loss(self, x, y, node_weights=None):
# maximum possible los on y for macro averages
if hasattr(self, 'class_weight'):
return np.sum(self.class_weight[y])
return y.size

def continuous_loss(self, y, y_hat):
if node_weights == None:
node_weights = 1
return np.sum(self.class_weight[y] * node_weights)
if node_weights == None:
return y.size
return sum(node_weights)

def continuous_loss(self, x, y, y_hat, node_weights=None):
# continuous version of the loss
# y is the result of linear programming
if y.ndim == 2:
Expand All @@ -81,9 +90,11 @@ def continuous_loss(self, y, y_hat):

# all entries minus correct ones
result = 1 - y_hat[gx, y]
if node_weights == None:
node_weights = 1
if hasattr(self, 'class_weight'):
return np.sum(self.class_weight[y] * result)
return np.sum(result)
return np.sum(self.class_weight[y] * result * node_weights)
return np.sum(result * node_weights)

def loss_augmented_inference(self, x, y, w, relaxed=None):
print("FALLBACK no loss augmented inference found")
Expand Down
17 changes: 13 additions & 4 deletions pystruct/models/crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .base import StructuredModel
from ..inference import inference_dispatch, get_installed
from .utils import loss_augment_unaries
from .utils import loss_augment_unaries, loss_augment_unaries_weighted


class CRF(StructuredModel):
Expand Down Expand Up @@ -53,11 +53,12 @@ def _check_size_x(self, x):
% (self.n_features, features.shape[1]))

def loss_augmented_inference(self, x, y, w, relaxed=False,
return_energy=False):
return_energy=False,
loss_weights=None):
"""Loss-augmented Inference for x relative to y using parameters w.

Finds (approximately)
armin_y_hat np.dot(w, psi(x, y_hat)) + loss(y, y_hat)
armin_y_hat np.dot(w, psi(x, y_hat)) + loss(x, y, y_hat)
using self.inference_method.


Expand Down Expand Up @@ -85,6 +86,9 @@ def loss_augmented_inference(self, x, y, w, relaxed=False,
return_energy : bool, default=False
Whether to return the energy of the solution (x, y) that was found.

loss_weights : None or ndarray with shape=(n_nodes,)
Node weights for weighted hamming loss.

Returns
-------
y_pred : ndarray or tuple
Expand All @@ -103,7 +107,12 @@ def loss_augmented_inference(self, x, y, w, relaxed=False,
unary_potentials = self._get_unary_potentials(x, w)
pairwise_potentials = self._get_pairwise_potentials(x, w)
edges = self._get_edges(x)
loss_augment_unaries(unary_potentials, np.asarray(y), self.class_weight)
if loss_weights == None:
loss_augment_unaries(unary_potentials, np.asarray(y),
self.class_weight)
else:
loss_augment_unaries_weighted(unary_potentials, np.asarray(y),
self.class_weight, loss_weights)

return inference_dispatch(unary_potentials, pairwise_potentials, edges,
self.inference_method, relaxed=relaxed,
Expand Down
36 changes: 34 additions & 2 deletions pystruct/models/edge_feature_graph_crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .graph_crf import GraphCRF
from .crf import CRF
from .base import StructuredModel


class EdgeFeatureGraphCRF(GraphCRF):
Expand Down Expand Up @@ -54,12 +55,17 @@ class EdgeFeatureGraphCRF(GraphCRF):
antisymmetric_edge_features : None or list
Indices of edge features that are forced to be anti-symmetric.

weighted_loss : bool
If True, ``x`` is represented as a tuple ``(node_features, edges,
edge_features, node_weights)``, and loss is calculated as sum(node_weights[y != y_hat]) instead of just sum(y != y_hat).
"""
def __init__(self, n_states=None, n_features=None, n_edge_features=None,
inference_method=None, class_weight=None,
symmetric_edge_features=None,
antisymmetric_edge_features=None):
antisymmetric_edge_features=None,
weighted_loss=False):
self.n_edge_features = n_edge_features
self.weighted_loss = weighted_loss

if symmetric_edge_features is None:
symmetric_edge_features = []
Expand Down Expand Up @@ -108,17 +114,27 @@ def __repr__(self):
def _check_size_x(self, x):
GraphCRF._check_size_x(self, x)

_, edges, edge_features = x
edges = x[1]
edge_features = x[2]
if edges.shape[0] != edge_features.shape[0]:
raise ValueError("Got %d edges but %d edge features."
% (edges.shape[0], edge_features.shape[0]))
if edge_features.shape[1] != self.n_edge_features:
raise ValueError("Got edge features of size %d, but expected %d."
% (edge_features.shape[1], self.n_edge_features))
if self.weighted_loss:
node_features = x[0]
node_weights = x[3]
if (node_features.shape[0],) != node_weights.shape:
raise ValueError("Got node weights of shape %s, but expected %d."
% (node_weights.shape, node_features.shape[0]))

def _get_edge_features(self, x):
return x[2]

def _get_node_weights(self, x):
return x[3] if self.weighted_loss else None

def _get_pairwise_potentials(self, x, w):
"""Computes pairwise potentials for x and w.

Expand Down Expand Up @@ -204,3 +220,19 @@ def psi(self, x, y):

psi_vector = np.hstack([unaries_acc.ravel(), pw.ravel()])
return psi_vector

def loss_augmented_inference(self, x, y, w, relaxed=False,
return_energy=False):
return CRF.loss_augmented_inference(self, x, y, w, relaxed, return_energy, self._get_node_weights(x))

def loss(self, x, y, y_hat):
return StructuredModel.loss(self, x, y, y_hat,
node_weights=self._get_node_weights(x))

def max_loss(self, x, y):
return StructuredModel.max_loss(self, x, y,
node_weights=self._get_node_weights(x))

def continuous_loss(self, x, y, y_hat):
return StructuredModel.continuous_loss(self, x, y, y_hat,
node_weights=self._get_node_weights(x))
4 changes: 2 additions & 2 deletions pystruct/models/grid_crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ def loss_augmented_inference(self, x, y, w, relaxed=False,
return_energy=return_energy)
return self._reshape_y(y_hat, x.shape, return_energy)

def continuous_loss(self, y, y_hat):
def continuous_loss(self, x, y, y_hat):
# continuous version of the loss
# y_hat is the result of linear programming
return GraphCRF.continuous_loss(
self, y.ravel(), y_hat.reshape(-1, y_hat.shape[-1]))
self, x, y.ravel(), y_hat.reshape(-1, y_hat.shape[-1]))


class DirectionalGridCRF(GridCRF, EdgeFeatureGraphCRF):
Expand Down
16 changes: 8 additions & 8 deletions pystruct/models/latent_graph_crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,22 +184,22 @@ def latent(self, x, y, w):
h = np.hstack([0, np.cumsum(self.n_states_per_label)])[y]
return h

def loss(self, h, h_hat):
def loss(self, x, h, h_hat):
if isinstance(h_hat, tuple):
return self.continuous_loss(h, h_hat[0])
return GraphCRF.loss(self, self.label_from_latent(h),
return self.continuous_loss(x, h, h_hat[0])
return GraphCRF.loss(self, x, self.label_from_latent(h),
self.label_from_latent(h_hat))

def continuous_loss(self, y, y_hat):
def continuous_loss(self, x, y, y_hat):
# continuous version of the loss
# y_hat is the result of linear programming
y_hat_org = np.zeros((y_hat.shape[0], self.n_labels))
for s in xrange(self.n_states):
y_hat_org[:, self._states_map[s]] += y_hat[:, s]
y_org = self.label_from_latent(y)
return GraphCRF.continuous_loss(self, y_org, y_hat_org)
return GraphCRF.continuous_loss(self, x, y_org, y_hat_org)

def base_loss(self, y, y_hat):
def base_loss(self, x, y, y_hat):
if isinstance(y_hat, tuple):
return GraphCRF.continuous_loss(self, y, y_hat)
return GraphCRF.loss(self, y, y_hat)
return GraphCRF.continuous_loss(self, x, y, y_hat)
return GraphCRF.loss(self, x, y, y_hat)
5 changes: 3 additions & 2 deletions pystruct/models/latent_grid_crf.py
Loading