Skip to content

Commit 7dacda6

Browse files
committed
sync: do a deep dive and I think finally genuinely fix the last sync bug -- fix #5823
1 parent 1761583 commit 7dacda6

File tree

6 files changed

+113
-39
lines changed

6 files changed

+113
-39
lines changed

src/packages/project/sync/server.ts

+6-4
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@ silently swallowed in persistent mode...
1414
*/
1515

1616
// How long to wait from when we hit 0 clients until closing this channel.
17-
// Making this short can save some memory and cpu.
17+
// Making this short saves memory and cpu.
1818
// Making it longer reduces the potential time to open a file, e.g., if you
19-
// disconnect then reconnect.
19+
// disconnect then reconnect, e.g., by refreshing your browser.
2020
// Related to https://github.com/sagemathinc/cocalc/issues/5627
21-
// I tried 0 and that made "won't save" much works.
22-
const CLOSE_DELAY_MS = 5 * 60 * 1000;
21+
// and https://github.com/sagemathinc/cocalc/issues/5823
22+
// and https://github.com/sagemathinc/cocalc/issues/5617
23+
24+
const CLOSE_DELAY_MS = 15 * 1000; // 15 seconds -- longer enough to refresh your browser without it closing.
2325

2426
// This is a hard upper bound on the number of browser sessions that could
2527
// have the same file open at once. We put some limit on it, to at least

src/packages/project/sync/sync-doc.ts

+93-32
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,95 @@ import { Client } from "@cocalc/sync/editor/generic/types";
2121
import { once } from "@cocalc/util/async-utils";
2222
import { filename_extension } from "@cocalc/util/misc";
2323
import { jupyter_backend } from "../jupyter/jupyter";
24+
import { EventEmitter } from "events";
2425

2526
const COCALC_EPHEMERAL_STATE: boolean =
2627
process.env.COCALC_EPHEMERAL_STATE === "yes";
2728

28-
const syncdocs: { [path: string]: SyncDoc } = {};
29+
class SyncDocs extends EventEmitter {
30+
private syncdocs: { [path: string]: SyncDoc } = {};
31+
private closing: Set<string> = new Set();
32+
33+
async close(path: string, log?): Promise<void> {
34+
const doc = this.get(path);
35+
if (doc == null) {
36+
log?.(`close ${path} -- no need, as it is not opened`);
37+
return;
38+
}
39+
try {
40+
log?.(`close ${path} -- starting close`);
41+
this.closing.add(path);
42+
// As soon as this close starts, doc is in an undefined state.
43+
// Also, this can take an **unbounded** amount of time to finish,
44+
// since it tries to save the patches table (among other things)
45+
// to the database, and if there is no connection from the hub
46+
// to this project, then it will simply wait however long it takes
47+
// until we get a connection (and there is no timeout). That is
48+
// perfectly fine! E.g., a user closes their browser connected
49+
// to a project, then comes back 8 hours later and tries to open
50+
// this document when they resume their browser. During those entire
51+
// 8 hours, the project might have been waiting to reconnect, just
52+
// so it could send the patches from patches_list to the database.
53+
// It does that, then finishes this async doc.close(), releases
54+
// the lock, and finally the user gets to open their file. See
55+
// https://github.com/sagemathinc/cocalc/issues/5823 for how not being
56+
// careful with locking like this resulted in a very difficult to
57+
// track down heisenbug. See also
58+
// https://github.com/sagemathinc/cocalc/issues/5617
59+
await doc.close();
60+
log?.(`close ${path} -- successfully closed`);
61+
} finally {
62+
// No matter what happens above when it finishes, we clear it
63+
// and consider it closed.
64+
// There is perhaps a chance closing fails above (no idea how),
65+
// but we don't want it to be impossible to attempt to open
66+
// the path again I.e., we don't want to leave around a lock.
67+
log?.(`close ${path} -- recording that close succeeded`);
68+
delete this.syncdocs[path];
69+
this.closing.delete(path);
70+
this.emit(`close-${path}`);
71+
}
72+
}
73+
74+
get(path: string): SyncDoc | undefined {
75+
return this.syncdocs[path];
76+
}
77+
78+
async create(type, opts, log): Promise<SyncDoc> {
79+
const path = opts.path;
80+
if (this.closing.has(path)) {
81+
log(
82+
`create ${path} -- waiting for previous version to completely finish closing...`
83+
);
84+
await once(this, `close-${path}`);
85+
log(`create ${path} -- successfully closed.`);
86+
}
87+
let doc;
88+
switch (type) {
89+
case "string":
90+
doc = new SyncString(opts);
91+
break;
92+
case "db":
93+
doc = new SyncDB(opts);
94+
break;
95+
default:
96+
throw Error(`unknown syncdoc type ${type}`);
97+
}
98+
this.syncdocs[path] = doc;
99+
log(`create ${path} -- successfully created.`);
100+
return doc;
101+
}
102+
103+
async closeAll(filename: string): Promise<void> {
104+
for (const path in this.syncdocs) {
105+
if (path == filename || path.startsWith(filename + "/")) {
106+
await this.close(path);
107+
}
108+
}
109+
}
110+
}
111+
112+
const syncDocs = new SyncDocs();
29113

30114
export function init_syncdoc(
31115
client: Client,
@@ -47,14 +131,7 @@ export function init_syncdoc(
47131
// return it; otherwise, return undefined. This is useful
48132
// for getting a reference to a syncdoc, e.g., for prettier.
49133
export function get_syncdoc(path: string): SyncDoc | undefined {
50-
return syncdocs[path];
51-
}
52-
53-
async function close_syncdoc(path: string): Promise<void> {
54-
const doc = get_syncdoc(path);
55-
if (doc == null) return;
56-
delete syncdocs[path];
57-
await doc.close();
134+
return syncDocs.get(path);
58135
}
59136

60137
async function init_syncdoc_async(
@@ -74,24 +151,23 @@ async function init_syncdoc_async(
74151
log("type = ", type);
75152
log("opts = ", JSON.stringify(opts));
76153
opts.client = client;
77-
log("now creating syncdoc...");
154+
log(`now creating syncdoc ${opts.path}...`);
78155
let syncdoc;
79156
try {
80-
syncdoc = create_syncdoc(type, opts);
81-
syncdocs[opts.path] = syncdoc;
157+
syncdoc = await syncDocs.create(type, opts, log);
82158
} catch (err) {
83159
log(`ERROR creating syncdoc -- ${err.toString()}`, err.stack);
84160
// TODO: how to properly inform clients and deal with this?!
85161
return;
86162
}
87163
synctable.on("closed", function () {
88164
log("syncstring table closed, so closing syncdoc", opts.path);
89-
close_syncdoc(opts.path);
165+
syncDocs.close(opts.path, log);
90166
});
91167

92168
syncdoc.on("error", function (err) {
93169
log(`syncdoc error -- ${err}`);
94-
close_syncdoc(opts.path);
170+
syncDocs.close(opts.path, log);
95171
});
96172

97173
// Extra backend support in some cases, e.g., Jupyter, Sage, etc.
@@ -168,32 +244,21 @@ function get_type_and_opts(synctable: SyncTable): { type: string; opts: any } {
168244
return { type, opts };
169245
}
170246

171-
function create_syncdoc(type, opts): SyncDoc {
172-
switch (type) {
173-
case "string":
174-
return new SyncString(opts);
175-
case "db":
176-
return new SyncDB(opts);
177-
default:
178-
throw Error(`unknown syncdoc type ${type}`);
179-
}
180-
}
181-
182247
export async function syncdoc_call(
183248
path: string,
184249
logger: any,
185250
mesg: any
186251
): Promise<string> {
187252
logger.debug("syncdoc_call", path, mesg);
188-
const doc = get_syncdoc(path);
253+
const doc = syncDocs.get(path);
189254
if (doc == null) {
190255
logger.debug("syncdoc_call -- not open: ", path);
191256
return "not open";
192257
}
193258
switch (mesg.cmd) {
194259
case "close":
195260
logger.debug("syncdoc_call -- now closing: ", path);
196-
await close_syncdoc(path);
261+
await syncDocs.close(path, logger.debug);
197262
logger.debug("syncdoc_call -- closed: ", path);
198263
return "successfully closed";
199264
default:
@@ -206,9 +271,5 @@ export async function syncdoc_call(
206271
export async function close_all_syncdocs_in_tree(
207272
filename: string
208273
): Promise<void> {
209-
for (const path in syncdocs) {
210-
if (path == filename || path.indexOf(filename + "/") != -1) {
211-
await close_syncdoc(path);
212-
}
213-
}
274+
return await syncDocs.closeAll(filename);
214275
}

src/packages/sync/editor/generic/sync-doc.ts

+7
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,14 @@ export class SyncDoc extends EventEmitter {
796796
// Do nothing here.
797797
}
798798
}
799+
// WARNING: that 'closed' is emitted at the beginning of the
800+
// close function (before anything async) for the project is
801+
// assumed in src/packages/project/sync/sync-doc.ts, because
802+
// that ensures that the moment close is called we lock trying
803+
// try create the syncdoc again until closing is finished.
804+
// (This set_state call emits "closed"):
799805
this.set_state("closed");
806+
800807
this.emit("close");
801808

802809
// must be after the emits above, so clients know

src/packages/sync/table/synctable.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,8 @@ export class SyncTable extends EventEmitter {
304304
}
305305
//console.log("SAVE -- has uncommitted changes, so trying again.");
306306
if (this.state !== "connected") {
307-
// wait for state change
307+
// wait for state change.
308+
// This could take a long time, and that is fine.
308309
await once(this, "state");
309310
}
310311
if (this.state === "connected") {

src/packages/util/async-utils.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,10 @@ export async function retry_until_success<T>(
116116
import { EventEmitter } from "events";
117117

118118
/* Wait for an event emitter to emit any event at all once.
119-
Returns array of args emitted by that event. */
119+
Returns array of args emitted by that event.
120+
If timeout_ms is 0 (the default) this can wait an unbounded
121+
amount of time. That's intentional and does make sense
122+
in our applications. */
120123
export async function once(
121124
obj: EventEmitter,
122125
event: string,

src/packages/util/smc-version.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
/* autogenerated by the update_version script */
2-
exports.version=1649172642;
2+
exports.version=1649308753;

0 commit comments

Comments
 (0)