pystruct · al13n321 · Oct 28, 2013 · Oct 28, 2013 · Oct 28, 2013 · Oct 29, 2013
diff --git a/pystruct/learners/frankwolfe_ssvm.py b/pystruct/learners/frankwolfe_ssvm.py
@@ -130,7 +130,7 @@ def _calc_dual_gap(self, X, Y):
         Y_hat = self.model.batch_loss_augmented_inference(X, Y, self.w,
                                                           relaxed=True)
         dpsi = psi_gt - self.model.batch_psi(X, Y_hat)
-        ls = np.sum(self.model.batch_loss(Y, Y_hat))
+        ls = np.sum(self.model.batch_loss(X, Y, Y_hat))
         ws = dpsi * self.C
         l_rescaled = self.l * n_samples * self.C
 
@@ -156,7 +156,7 @@ def _frank_wolfe_batch(self, X, Y):
             Y_hat = self.model.batch_loss_augmented_inference(X, Y, self.w,
                                                               relaxed=True)
             dpsi = psi_gt - self.model.batch_psi(X, Y_hat)
-            ls = np.mean(self.model.batch_loss(Y, Y_hat))
+            ls = np.mean(self.model.batch_loss(X, Y, Y_hat))
             ws = dpsi * self.C
 
             w_diff = self.w - ws

diff --git a/pystruct/learners/latent_structured_svm.py b/pystruct/learners/latent_structured_svm.py
@@ -146,9 +146,9 @@ def score(self, X, Y):
         score : float
             Average of 1 - loss over training examples.
         """
-        losses = [self.model.base_loss(y, y_pred)
-                  for y, y_pred in zip(Y, self.predict(X))]
-        max_losses = [self.model.max_loss(y) for y in Y]
+        losses = [self.model.base_loss(x, y, y_pred)
+                  for x, y, y_pred in zip(X, Y, self.predict(X))]
+        max_losses = [self.model.max_loss(x, y) for y in Y]
         return 1. - np.sum(losses) / float(np.sum(max_losses))
 
     @property

diff --git a/pystruct/learners/one_slack_ssvm.py b/pystruct/learners/one_slack_ssvm.py
@@ -301,7 +301,7 @@ def constraint_equal(y_1, y_2):
             # the idea is that if we cache, inference is way more expensive
             # and this doesn't matter much.
             sample.append((self.model.psi(x, y_hat),
-                           self.model.loss(y, y_hat), y_hat))
+                           self.model.loss(x, y, y_hat), y_hat))
 
     def _constraint_from_cache(self, X, Y, psi_gt, constraints):
         if (not getattr(self, 'inference_cache_', False) or
@@ -358,7 +358,7 @@ def _find_new_constraint(self, X, Y, psi_gt, constraints, check=True):
         else:
             dpsi = (psi_gt - self.model.batch_psi(X, Y_hat)) / len(X)
 
-        loss_mean = np.mean(self.model.batch_loss(Y, Y_hat))
+        loss_mean = np.mean(self.model.batch_loss(X, Y, Y_hat))
 
         violation = loss_mean - np.dot(self.w, dpsi)
         if check and self._check_bad_constraint(

diff --git a/pystruct/learners/ssvm.py b/pystruct/learners/ssvm.py
@@ -62,11 +62,11 @@ def score(self, X, Y):
             Average of 1 - loss over training examples.
         """
         if hasattr(self.model, 'batch_loss'):
-            losses = self.model.batch_loss(Y, self.predict(X))
+            losses = self.model.batch_loss(X, Y, self.predict(X))
         else:
-            losses = [self.model.loss(y, y_pred)
-                      for y, y_pred in zip(Y, self.predict(X))]
-        max_losses = [self.model.max_loss(y) for y in Y]
+            losses = [self.model.loss(x, y, y_pred)
+                      for x, y, y_pred in zip(X, Y, self.predict(X))]
+        max_losses = [self.model.max_loss(x, y) for x, y in zip(X, Y)]
         return 1. - np.sum(losses) / float(np.sum(max_losses))
 
     def _compute_training_loss(self, X, Y, iteration):

diff --git a/pystruct/learners/structured_perceptron.py b/pystruct/learners/structured_perceptron.py
@@ -111,7 +111,7 @@ def fit(self, X, Y, initialize=True):
             w_bar = np.zeros(size_psi)
             n_obs = 0
         self.loss_curve_ = []
-        max_losses = np.sum([self.model.max_loss(y) for y in Y])
+        max_losses = np.sum([self.model.max_loss(x, y) for x, y in zip(X, Y)])
         try:
             for iteration in xrange(self.max_iter):
                 if self.average == -1:
@@ -129,7 +129,7 @@ def fit(self, X, Y, initialize=True):
                         delayed(inference)(self.model, x, self.w) for x, y in
                         zip(X, Y)))
                     for x, y, y_hat in zip(X, Y, Y_hat):
-                        current_loss = self.model.loss(y, y_hat)
+                        current_loss = self.model.loss(x, y, y_hat)
                         losses += current_loss
                         if current_loss:
                             self.w += effective_lr * (self.model.psi(x, y) -
@@ -142,7 +142,7 @@ def fit(self, X, Y, initialize=True):
                     # standard online update
                     for x, y in zip(X, Y):
                         y_hat = self.model.inference(x, self.w)
-                        current_loss = self.model.loss(y, y_hat)
+                        current_loss = self.model.loss(x, y, y_hat)
                         losses += current_loss
                         if current_loss:
                             self.w += effective_lr * (self.model.psi(x, y) -

diff --git a/pystruct/learners/subgradient_latent_ssvm.py b/pystruct/learners/subgradient_latent_ssvm.py
@@ -164,7 +164,7 @@ def fit(self, X, Y, H_init=None, warm_start=False, initialize=True):
                         delta_psi = (self.model.psi(x, h)
                                      - self.model.psi(x, h_hat))
                         slack = (-np.dot(delta_psi, w)
-                                 + self.model.loss(h, h_hat))
+                                 + self.model.loss(x, h, h_hat))
                         objective += np.maximum(slack, 0)
                         if slack > 0:
                             positive_slacks += 1
@@ -263,11 +263,11 @@ def score(self, X, Y):
         """
         if hasattr(self.model, 'batch_loss'):
             losses = self.model.batch_loss(
-                Y, self.model.batch_inference(X, self.w))
+                X, Y, self.model.batch_inference(X, self.w))
         else:
-            losses = [self.model.loss(y, self.model.inference(y, self.w))
-                      for y, y_pred in zip(Y, self.predict(X))]
-        max_losses = [self.model.max_loss(y) for y in Y]
+            losses = [self.model.loss(x, y, self.model.inference(y, self.w))
+                      for x, y, y_pred in zip(X, Y, self.predict(X))]
+        max_losses = [self.model.max_loss(x, y) for x, y in zip(X, Y)]
         return 1. - np.sum(losses) / float(np.sum(max_losses))
 
     def _objective(self, X, Y):

diff --git a/pystruct/learners/subgradient_ssvm.py b/pystruct/learners/subgradient_ssvm.py
@@ -290,7 +290,7 @@ def _sequential_learning(self, X, Y, w):
                     X_b, Y_b, w, relaxed=True)
                 delta_psi = (self.model.batch_psi(X_b, Y_b)
                              - self.model.batch_psi(X_b, Y_hat))
-                loss = np.sum(self.model.batch_loss(Y_b, Y_hat))
+                loss = np.sum(self.model.batch_loss(X_b, Y_b, Y_hat))
 
                 violation = np.maximum(0, loss - np.dot(w, delta_psi))
                 objective += violation

diff --git a/pystruct/models/base.py b/pystruct/models/base.py
@@ -54,25 +54,34 @@ def batch_inference(self, X, w, relaxed=None):
         return [self.inference(x, w, relaxed=relaxed)
                 for x in X]
 
-    def loss(self, y, y_hat):
+    def loss(self, x, y, y_hat, node_weights=None):
         # hamming loss:
         if isinstance(y_hat, tuple):
-            return self.continuous_loss(y, y_hat[0])
+            if node_weights == None:
+                return self.continuous_loss(x, y, y_hat[0])
+            else:
+                return self.continuous_loss(x, y, y_hat[0], node_weights)
+        if node_weights == None:
+            node_weights = 1
         if hasattr(self, 'class_weight'):
-            return np.sum(self.class_weight[y] * (y != y_hat))
-        return np.sum(y != y_hat)
+            return np.sum(self.class_weight[y] * (y != y_hat) * node_weights)
+        return np.sum((y != y_hat) * node_weights)
 
-    def batch_loss(self, Y, Y_hat):
+    def batch_loss(self, X, Y, Y_hat):
         # default implementation of batch loss
-        return [self.loss(y, y_hat) for y, y_hat in zip(Y, Y_hat)]
+        return [self.loss(x, y, y_hat) for x, y, y_hat in zip(X, Y, Y_hat)]
 
-    def max_loss(self, y):
+    def max_loss(self, x, y, node_weights=None):
         # maximum possible los on y for macro averages
         if hasattr(self, 'class_weight'):
-            return np.sum(self.class_weight[y])
-        return y.size
-
-    def continuous_loss(self, y, y_hat):
+            if node_weights == None:
+                node_weights = 1
+            return np.sum(self.class_weight[y] * node_weights)
+        if node_weights == None:
+            return y.size
+        return sum(node_weights)
+
+    def continuous_loss(self, x, y, y_hat, node_weights=None):
         # continuous version of the loss
         # y is the result of linear programming
         if y.ndim == 2:
@@ -81,9 +90,11 @@ def continuous_loss(self, y, y_hat):
 
         # all entries minus correct ones
         result = 1 - y_hat[gx, y]
+        if node_weights == None:
+            node_weights = 1
         if hasattr(self, 'class_weight'):
-            return np.sum(self.class_weight[y] * result)
-        return np.sum(result)
+            return np.sum(self.class_weight[y] * result * node_weights)
+        return np.sum(result * node_weights)
 
     def loss_augmented_inference(self, x, y, w, relaxed=None):
         print("FALLBACK no loss augmented inference found")

diff --git a/pystruct/models/crf.py b/pystruct/models/crf.py
@@ -2,7 +2,7 @@
 
 from .base import StructuredModel
 from ..inference import inference_dispatch, get_installed
-from .utils import loss_augment_unaries
+from .utils import loss_augment_unaries, loss_augment_unaries_weighted
 
 
 class CRF(StructuredModel):
@@ -53,11 +53,12 @@ def _check_size_x(self, x):
                              % (self.n_features, features.shape[1]))
 
     def loss_augmented_inference(self, x, y, w, relaxed=False,
-                                 return_energy=False):
+                                 return_energy=False,
+                                 loss_weights=None):
         """Loss-augmented Inference for x relative to y using parameters w.
 
         Finds (approximately)
-        armin_y_hat np.dot(w, psi(x, y_hat)) + loss(y, y_hat)
+        armin_y_hat np.dot(w, psi(x, y_hat)) + loss(x, y, y_hat)
         using self.inference_method.
 
 
@@ -85,6 +86,9 @@ def loss_augmented_inference(self, x, y, w, relaxed=False,
         return_energy : bool, default=False
             Whether to return the energy of the solution (x, y) that was found.
 
+        loss_weights : None or ndarray with shape=(n_nodes,)
+            Node weights for weighted hamming loss.
+
         Returns
         -------
         y_pred : ndarray or tuple
@@ -103,7 +107,12 @@ def loss_augmented_inference(self, x, y, w, relaxed=False,
         unary_potentials = self._get_unary_potentials(x, w)
         pairwise_potentials = self._get_pairwise_potentials(x, w)
         edges = self._get_edges(x)
-        loss_augment_unaries(unary_potentials, np.asarray(y), self.class_weight)
+        if loss_weights == None:
+            loss_augment_unaries(unary_potentials, np.asarray(y),
+                                 self.class_weight)
+        else:
+            loss_augment_unaries_weighted(unary_potentials, np.asarray(y),
+                                 self.class_weight, loss_weights)
 
         return inference_dispatch(unary_potentials, pairwise_potentials, edges,
                                   self.inference_method, relaxed=relaxed,

diff --git a/pystruct/models/edge_feature_graph_crf.py b/pystruct/models/edge_feature_graph_crf.py
@@ -2,6 +2,7 @@
 
 from .graph_crf import GraphCRF
 from .crf import CRF
+from .base import StructuredModel
 
 
 class EdgeFeatureGraphCRF(GraphCRF):
@@ -54,12 +55,17 @@ class EdgeFeatureGraphCRF(GraphCRF):
     antisymmetric_edge_features : None or list
         Indices of edge features that are forced to be anti-symmetric.
 
+    weighted_loss : bool
+        If True, ``x`` is represented as a tuple ``(node_features, edges,
+    edge_features, node_weights)``, and loss is calculated as sum(node_weights[y != y_hat]) instead of just sum(y != y_hat).
     """
     def __init__(self, n_states=None, n_features=None, n_edge_features=None,
                  inference_method=None, class_weight=None,
                  symmetric_edge_features=None,
-                 antisymmetric_edge_features=None):
+                 antisymmetric_edge_features=None,
+                 weighted_loss=False):
         self.n_edge_features = n_edge_features
+        self.weighted_loss = weighted_loss
 
         if symmetric_edge_features is None:
             symmetric_edge_features = []
@@ -108,17 +114,27 @@ def __repr__(self):
     def _check_size_x(self, x):
         GraphCRF._check_size_x(self, x)
 
-        _, edges, edge_features = x
+        edges = x[1]
+        edge_features = x[2]
         if edges.shape[0] != edge_features.shape[0]:
             raise ValueError("Got %d edges but %d edge features."
                              % (edges.shape[0], edge_features.shape[0]))
         if edge_features.shape[1] != self.n_edge_features:
             raise ValueError("Got edge features of size %d, but expected %d."
                              % (edge_features.shape[1], self.n_edge_features))
+        if self.weighted_loss:
+            node_features = x[0]
+            node_weights = x[3]
+            if (node_features.shape[0],) != node_weights.shape:
+                raise ValueError("Got node weights of shape %s, but expected %d."
+                                 % (node_weights.shape, node_features.shape[0]))
 
     def _get_edge_features(self, x):
         return x[2]
 
+    def _get_node_weights(self, x):
+            return x[3] if self.weighted_loss else None
+
     def _get_pairwise_potentials(self, x, w):
         """Computes pairwise potentials for x and w.
 
@@ -204,3 +220,19 @@ def psi(self, x, y):
 
         psi_vector = np.hstack([unaries_acc.ravel(), pw.ravel()])
         return psi_vector
+
+    def loss_augmented_inference(self, x, y, w, relaxed=False,
+                                 return_energy=False):
+        return CRF.loss_augmented_inference(self, x, y, w, relaxed, return_energy,                                self._get_node_weights(x))
+
+    def loss(self, x, y, y_hat):
+        return StructuredModel.loss(self, x, y, y_hat,
+                                    node_weights=self._get_node_weights(x))
+
+    def max_loss(self, x, y):
+        return StructuredModel.max_loss(self, x, y,
+                                        node_weights=self._get_node_weights(x))
+
+    def continuous_loss(self, x, y, y_hat):
+        return StructuredModel.continuous_loss(self, x, y, y_hat,
+                                node_weights=self._get_node_weights(x))
diff --git a/pystruct/models/grid_crf.py b/pystruct/models/grid_crf.py
@@ -70,11 +70,11 @@ def loss_augmented_inference(self, x, y, w, relaxed=False,
                                                   return_energy=return_energy)
         return self._reshape_y(y_hat, x.shape, return_energy)
 
-    def continuous_loss(self, y, y_hat):
+    def continuous_loss(self, x, y, y_hat):
         # continuous version of the loss
         # y_hat is the result of linear programming
         return GraphCRF.continuous_loss(
-            self, y.ravel(), y_hat.reshape(-1, y_hat.shape[-1]))
+            self, x, y.ravel(), y_hat.reshape(-1, y_hat.shape[-1]))
 
 
 class DirectionalGridCRF(GridCRF, EdgeFeatureGraphCRF):

diff --git a/pystruct/models/latent_graph_crf.py b/pystruct/models/latent_graph_crf.py
@@ -184,22 +184,22 @@ def latent(self, x, y, w):
             h = np.hstack([0, np.cumsum(self.n_states_per_label)])[y]
         return h
 
-    def loss(self, h, h_hat):
+    def loss(self, x, h, h_hat):
         if isinstance(h_hat, tuple):
-            return self.continuous_loss(h, h_hat[0])
-        return GraphCRF.loss(self, self.label_from_latent(h),
+            return self.continuous_loss(x, h, h_hat[0])
+        return GraphCRF.loss(self, x, self.label_from_latent(h),
                              self.label_from_latent(h_hat))
 
-    def continuous_loss(self, y, y_hat):
+    def continuous_loss(self, x, y, y_hat):
         # continuous version of the loss
         # y_hat is the result of linear programming
         y_hat_org = np.zeros((y_hat.shape[0], self.n_labels))
         for s in xrange(self.n_states):
             y_hat_org[:, self._states_map[s]] += y_hat[:, s]
         y_org = self.label_from_latent(y)
-        return GraphCRF.continuous_loss(self, y_org, y_hat_org)
+        return GraphCRF.continuous_loss(self, x, y_org, y_hat_org)
 
-    def base_loss(self, y, y_hat):
+    def base_loss(self, x, y, y_hat):
         if isinstance(y_hat, tuple):
-            return GraphCRF.continuous_loss(self, y, y_hat)
-        return GraphCRF.loss(self, y, y_hat)
+            return GraphCRF.continuous_loss(self, x, y, y_hat)
+        return GraphCRF.loss(self, x, y, y_hat)
diff --git a/pystruct/models/latent_grid_crf.py b/pystruct/models/latent_grid_crf.py
@@ -42,11 +42,11 @@ def latent(self, x, y, w):
         res = LatentGraphCRF.latent(self, x, y.ravel(), w)
         return res.reshape(y.shape)
 
-    def continuous_loss(self, y, y_hat):
+    def continuous_loss(self, x, y, y_hat):
         # continuous version of the loss
         # y_hat is the result of linear programming
         return LatentGraphCRF.continuous_loss(
-            self, y.ravel(), y_hat.reshape(-1, y_hat.shape[-1]))
+            self, x, y.ravel(), y_hat.reshape(-1, y_hat.shape[-1]))
 
 
 class LatentDirectionalGridCRF(DirectionalGridCRF, LatentGridCRF):
@@ -65,6 +65,7 @@ def __init__(self, n_labels=None, n_features=None, n_states_per_label=2,
         self.symmetric_edge_features = []
         self.antisymmetric_edge_features = []
         self.n_edge_features = 2 if neighborhood == 4 else 4
+        self.weighted_loss = False
         LatentGridCRF.__init__(self, n_labels, n_features, n_states_per_label,
                                inference_method=inference_method)