diff --git a/examples/plot_cv_vs_cp_path_lowrank.py b/examples/plot_cv_vs_cp_path_lowrank.py new file mode 100644 index 0000000..be59c5d --- /dev/null +++ b/examples/plot_cv_vs_cp_path_lowrank.py @@ -0,0 +1,99 @@ +""" +========================================= +Compare CV and CP for Low rank completion +========================================= + +Compare explicit and implicit for low rank matrix completion. +""" + +import numpy as np +import matplotlib.pyplot as plt +from numpy.linalg import norm +from celer.plot_utils import configure_plt + +from iterreg.low_rank.solvers import dual_primal_low_rank +from sklearn.model_selection import train_test_split +# configure_plt() + + +d = 50 +np.random.seed(0) +observed = np.zeros([d, d], dtype=bool) +idx = np.random.choice(d ** 2, int(0.6 * d ** 2), replace=False) +observed.flat[idx] = True +# rank = d // 10 +rank = 5 +Y_true = np.random.randn(d, rank) @ np.random.randn(rank, d) +Y_true /= norm(Y_true, ord="fro") +noise = np.random.randn(*Y_true.shape) +noise /= norm(noise) +Y_true += 0.3 * noise + +Y = Y_true.copy() +Y[~observed] = 0 + +idx_train, idx_test = train_test_split(idx, test_size=0.25) + +observed_train = np.zeros_like(observed) +observed_train.flat[idx_train] = True +Y_train = Y.copy() +Y_train[~observed_train] = 0 +observed_test = np.zeros_like(observed) +observed_test.flat[idx_test] = True +Y_test = Y.copy() +Y_test[~observed_test] = 0 + + +def pgd_nucnorm(Y, alpha, max_iter=10_000, tol=1e-6, X_init=None): + norm_Y = norm(Y) + if X_init is None: + X = Y.copy() + else: + X = X_init.copy() + for it in range(max_iter): + X_old = X.copy() + X -= observed * (X - Y) + U, s, VT = np.linalg.svd(X) + s = np.sign(s) * np.maximum(np.abs(s) - alpha, 0) + X = U @ (s[:, np.newaxis] * VT) + delta = norm(X - X_old) + if it % 100 == 0: + print(f"Iter {it}, delta={delta:.2e}") + if delta <= tol * norm_Y: + print(f"Early exit at iter {it}.") + break + return X + + +x, _, _, all_x = dual_primal_low_rank( + observed, Y_train, max_iter=100, f_store=1, retall=True) + +losses_cp = [norm((Y_test - x)[observed_test]) / norm(Y_test) for x in all_x] + + +alpha_max = np.linalg.norm(Y, ord=2) +X = Y_train.copy() +rhos = np.geomspace(1, 1e-3, 20) +all_X_cv = [] +for idx, rho in enumerate(rhos): + X = pgd_nucnorm(Y_train, alpha=rho * alpha_max, X_init=X) + all_X_cv.append(X.copy()) +all_X_cv = np.array(all_X_cv) + +losses_cv = [norm((Y_test - x)[observed_test]) / norm(Y_test) for x in all_X_cv] + + +plt.close('all') +fig, axarr = plt.subplots( + 1, 2, constrained_layout=True, sharey=True, figsize=(7.4, 2)) +ax = axarr[0] +ax.semilogx(rhos, losses_cv) +ax.invert_xaxis() +ax.set_xlabel(r"$\lambda / \lambda_{\mathrm{max}}$") +ax.set_ylabel(r"Loss on left out indices") + +ax = axarr[1] +ax.plot(np.arange(len(losses_cp)), losses_cp) +ax.set_xlabel("Chambolle-Pock iteration") + +plt.show(block=False) diff --git a/examples/plot_sparse_recovery.py b/examples/plot_sparse_recovery.py index 0a4e326..c6306a8 100644 --- a/examples/plot_sparse_recovery.py +++ b/examples/plot_sparse_recovery.py @@ -31,7 +31,7 @@ def plot_varying_sigma(corr, density, snr, max_iter=100): supp = np.random.choice(n_features, size=int(density * n_features), replace=False) w_true = np.zeros(n_features) - w_true[supp] = 1 + w_true[supp] = np.random.uniform(low=0.5, high=1.5, size=len(supp)) X_, y_, w_true = make_correlated_data( n_samples=int(n_samples * 4 / 3.), n_features=n_features, w_true=w_true, diff --git a/examples/plot_stopping_lowrank.py b/examples/plot_stopping_lowrank.py index b147570..749401e 100644 --- a/examples/plot_stopping_lowrank.py +++ b/examples/plot_stopping_lowrank.py @@ -16,7 +16,7 @@ configure_plt() -d = 100 +d = 200 np.random.seed(0) mask = np.zeros([d, d], dtype=bool) idx = np.random.choice(d ** 2, d ** 2 // 5, replace=False) @@ -55,7 +55,7 @@ distances[delta] = dist plt.close('all') -fig1, ax = plt.subplots(1, 1, constrained_layout=True, figsize=(3.8, 2.2)) +fig1, ax = plt.subplots(1, 1, constrained_layout=True, figsize=(8., 4)) n_points = 100 for delta in deltas: x_plt = f_store * np.arange(len(distances[delta])) diff --git a/iterreg/low_rank/solvers.py b/iterreg/low_rank/solvers.py index 141db66..8a59adb 100644 --- a/iterreg/low_rank/solvers.py +++ b/iterreg/low_rank/solvers.py @@ -6,7 +6,7 @@ def dual_primal_low_rank( mask, Y, max_iter=1000, f_store=10, sigma=None, limit=None, - stop_crit=1e-10, verbose=False): + stop_crit=1e-10, verbose=False, retall=False): """Lowest nuclear norm matrix equal to Y on mask. mask and Y are np.arrays, shape (d, d). """ @@ -17,9 +17,12 @@ def dual_primal_low_rank( tau = 1. else: tau = 0.99 / sigma + print(sigma, tau) Theta = np.zeros((d, d)) Theta_old = Theta.copy() distances = np.zeros(max_iter // f_store) + if retall: + all_W = [] W = Theta.copy() for k in range(max_iter): @@ -32,6 +35,8 @@ def dual_primal_low_rank( if k % f_store == 0: if limit is not None: distances[k // f_store] = norm(W - limit) + if retall: + all_W.append(W.copy()) feasability = norm((W - Y)[mask]) if verbose: print(f"Iter {k}, feasability: {feasability:.1e}") @@ -40,4 +45,6 @@ def dual_primal_low_rank( f"Feasability {feasability:.1e} < {stop_crit:.1e}, exit.") break + if retall: + return W, Theta, distances, np.array(all_W) return W, Theta, distances