Skip to content

Commit bfbc8ec

Browse files
scruwysScott Cruwys
authored andcommitted
fix: Bugs related to revisioner and definitions
1 parent a091264 commit bfbc8ec

File tree

11 files changed

+91
-60
lines changed

11 files changed

+91
-60
lines changed

app/definitions/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ def to_doc(self):
384384
'datastore_engine': self.schema.datastore.engine,
385385
'schema': self.schema.name,
386386
'name': self.name,
387+
'exact_name': self.search_label,
387388
'description': self.short_desc,
388389
'tags': self.tags,
389390
}
@@ -551,6 +552,7 @@ def to_doc(self):
551552
'schema': self.table.schema.name,
552553
'table': self.table.name,
553554
'name': self.name,
555+
'exact_name': self.search_label,
554556
'description': self.short_desc,
555557
'tags': self.table.tags,
556558
}

app/omnisearch/backends/elastic_backend.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class ElasticBackend(base.BaseSearchBackend):
2828

2929
ALLOWED_FACET_MAP = {
3030
'datastores': 'datastore_id',
31-
'datastore_engines': 'datastore_engine',
31+
'engines': 'datastore_engine',
3232
'schemas': 'schema.keyword',
3333
'tags': 'tags.keyword',
3434
}
@@ -91,14 +91,16 @@ def execute(self, query, types=None, datastores=None, start=0, size=100, **facet
9191
t = time.time()
9292
s = Search(index=index, using=self.client)
9393
s = s.query(
94-
'multi_match',
95-
type='phrase_prefix',
94+
'simple_query_string',
9695
fields=[
97-
'schema',
98-
'table',
99-
'description',
100-
'name^1.1',
96+
'exact_name^100',
97+
'name.raw^30',
98+
'name^10',
99+
'table^5',
100+
'schema^3',
101+
'description^3',
101102
'text^1.1',
103+
'tags',
102104
],
103105
query=query,
104106
).filter(

app/omnisearch/constants.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -128,24 +128,34 @@
128128
'now'
129129
]
130130

131+
SNAKE_CASE_SPLIT_TOKENIZER = {
132+
'type': 'simple_pattern_split',
133+
'pattern': '_',
134+
}
135+
136+
DBO_NAME_SPLIT_TOKENIZER = {
137+
'type': 'simple_pattern_split',
138+
'pattern': '|'.join([
139+
'\\.',
140+
' ',
141+
'_',
142+
])
143+
}
131144

132145
TABLE_INDEX_SETTINGS = {
133146
'settings': {
134147
'analysis': {
135148
'analyzer': {
136-
'slug_case_split': {
137-
'tokenizer': 'slug_case_split',
149+
'dbo_name_split': {
150+
'tokenizer': 'dbo_name_split',
138151
},
139152
'custom_english_stop': {
140153
'type': 'stop',
141154
'stopwords': ENGLISH_STOPWORDS,
142155
}
143156
},
144157
'tokenizer': {
145-
'slug_case_split': {
146-
'type': 'simple_pattern_split',
147-
'pattern': '_',
148-
}
158+
'dbo_name_split': DBO_NAME_SPLIT_TOKENIZER,
149159
}
150160
}
151161
},
@@ -165,7 +175,10 @@
165175
},
166176
'name': {
167177
'type': 'text',
168-
'analyzer': 'slug_case_split',
178+
'analyzer': 'dbo_name_split',
179+
},
180+
'exact_name': {
181+
'type': 'text',
169182
},
170183
'description': {
171184
'type': 'text',
@@ -179,7 +192,7 @@
179192
},
180193
'text': {
181194
'type': 'text',
182-
'analyzer': 'slug_case_split',
195+
'analyzer': 'dbo_name_split',
183196
}
184197
}
185198
},
@@ -203,19 +216,16 @@
203216
'settings': {
204217
'analysis': {
205218
'analyzer': {
206-
'slug_case_split': {
207-
'tokenizer': 'slug_case_split',
219+
'dbo_name_split': {
220+
'tokenizer': 'dbo_name_split',
208221
},
209222
'custom_english_stop': {
210223
'type': 'stop',
211224
'stopwords': ENGLISH_STOPWORDS
212225
}
213226
},
214227
'tokenizer': {
215-
'slug_case_split': {
216-
'type': 'simple_pattern_split',
217-
'pattern': '_',
218-
}
228+
'dbo_name_split': DBO_NAME_SPLIT_TOKENIZER,
219229
}
220230
}
221231
},
@@ -235,11 +245,14 @@
235245
},
236246
'table': {
237247
'type': 'text',
238-
'analyzer': 'slug_case_split',
248+
'analyzer': 'dbo_name_split',
239249
},
240250
'name': {
241251
'type': 'text',
242-
'analyzer': 'slug_case_split',
252+
'analyzer': 'dbo_name_split',
253+
},
254+
'exact_name': {
255+
'type': 'text',
243256
},
244257
'description': {
245258
'type': 'text',
@@ -253,7 +266,7 @@
253266
},
254267
'text': {
255268
'type': 'text',
256-
'analyzer': 'slug_case_split',
269+
'analyzer': 'dbo_name_split',
257270
}
258271
}
259272
},

app/revisioner/actions/created.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,12 @@ def apply(self, batch_size=2000):
6363

6464
for page_num in paginator.page_range:
6565
page = paginator.get_page(page_num)
66-
data = [
67-
self.get_attributes(revision) for revision in page.object_list
68-
]
66+
data = []
67+
68+
for revision in page.object_list:
69+
attributes = self.get_attributes(revision)
70+
if attributes is not None:
71+
data.append(attributes)
6972

7073
if len(data):
7174
self.bulk_insert(data)
@@ -135,6 +138,8 @@ def get_attributes(self, revision):
135138
"""Get the instance attributes from the Revision.
136139
"""
137140
table_id = revision.parent_instance_id
141+
if table_id is None:
142+
return None
138143
defaults = {
139144
'workspace_id': self.workspace_id,
140145
'table_id': table_id,
@@ -158,6 +163,8 @@ def get_attributes(self, revision):
158163
"""
159164
metadata = revision.metadata.copy()
160165
table_id = revision.parent_instance_id
166+
if table_id is None:
167+
return None
161168
defaults = {
162169
'workspace_id': self.workspace_id,
163170
'table_id': table_id,

app/revisioner/definition.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from app.inspector import service as inspector
33

44

5-
def make(collector, *args, **kwargs): # noqa: C901
5+
def make(collector, logger=None, *args, **kwargs): # noqa: C901
66
"""We take an initial pass at making the definition based on the OID, if supported. The collector
77
marks the raw database object as "processed" if we are able to match it to a Django model instance.
88
"""
@@ -14,6 +14,9 @@ def make(collector, *args, **kwargs): # noqa: C901
1414
for number, row in enumerate(inspector.tables_and_views(collector.datastore)):
1515
schema_name = row.pop('table_schema')
1616

17+
if logger and schema_name != last_schema_name:
18+
logger.info('Starting to process schema: %s', schema_name)
19+
1720
if schema_name not in definition:
1821
schema_instance = collector.schemas.find_by_oid(row['schema_object_id'])
1922

@@ -74,42 +77,42 @@ def make(collector, *args, **kwargs): # noqa: C901
7477
'indexes': indexes,
7578
})
7679

77-
schema = definition[schema_name]['schema']
80+
if number > 0 and last_schema_name != schema_name:
81+
schema = definition[last_schema_name]['schema']
7882

79-
if not schema['instance']:
80-
schema['instance'] = collector.schemas.search_unassigned(name=schema['name'])
83+
if not schema['instance']:
84+
schema['instance'] = collector.schemas.search_unassigned(name=schema['name'])
8185

82-
if schema['instance']:
83-
collector.schemas.mark_as_processed(schema['instance'].pk)
86+
if schema['instance']:
87+
collector.schemas.mark_as_processed(schema['instance'].pk)
8488

85-
for table in definition[schema_name]['tables']:
86-
if not table['instance'] and schema['instance']:
87-
table['instance'] = collector.tables.search_unassigned(name=table['name'], schema_id=schema['instance'].pk)
89+
for table in definition[last_schema_name]['tables']:
90+
if not table['instance'] and schema['instance']:
91+
table['instance'] = collector.tables.search_unassigned(name=table['name'], schema_id=schema['instance'].pk)
8892

89-
if not table['instance']:
90-
continue
93+
if not table['instance']:
94+
continue
9195

92-
collector.tables.mark_as_processed(table['instance'].pk)
96+
collector.tables.mark_as_processed(table['instance'].pk)
9397

94-
for column in table['columns']:
95-
if column['instance']:
96-
continue
98+
for column in table['columns']:
99+
if column['instance']:
100+
continue
97101

98-
column['instance'] = collector.columns.search_unassigned(name=column['name'], table_id=table['instance'].pk)
102+
column['instance'] = collector.columns.search_unassigned(name=column['name'], table_id=table['instance'].pk)
99103

100-
if column['instance']:
101-
collector.columns.mark_as_processed(column['instance'].pk)
104+
if column['instance']:
105+
collector.columns.mark_as_processed(column['instance'].pk)
102106

103-
for index in table['indexes']:
104-
if index['instance']:
105-
continue
107+
for index in table['indexes']:
108+
if index['instance']:
109+
continue
106110

107-
index['instance'] = collector.indexes.search_unassigned(name=index['name'], table_id=table['instance'].pk)
111+
index['instance'] = collector.indexes.search_unassigned(name=index['name'], table_id=table['instance'].pk)
108112

109-
if index['instance']:
110-
collector.indexes.mark_as_processed(index['instance'].pk)
113+
if index['instance']:
114+
collector.indexes.mark_as_processed(index['instance'].pk)
111115

112-
if number > 0 and last_schema_name != schema_name:
113116
yield (last_schema_name, definition.pop(last_schema_name))
114117

115118
last_schema_name = schema_name

app/revisioner/tasks/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,13 @@ def start_revisioner_run(self, run_id, *arg, **kwargs):
5555
collector = DefinitionCollector(self._run.datastore)
5656
run_tasks = []
5757

58-
for schema, schema_definition in definition.make(collector):
59-
self.log.info(f'Uploading schema: {schema}')
58+
for schema, schema_definition in definition.make(collector, logger=self.log):
6059
storage_path = f'revisioner/{self._run.datastore_id}/run_id={self._run.id}/{schema}.json.gz'
6160
blob.put_object(storage_path, schema_definition)
6261
run_tasks.append(
6362
RunTask(run=self._run, storage_path=storage_path, status=RunTask.PENDING),
6463
)
64+
self.log.info(f'Finished processing: {schema}')
6565

6666
RunTask.objects.bulk_create(run_tasks, ignore_conflicts=True)
6767

app/revisioner/tasks/scheduler.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,17 @@ def queue_runs(self, datastore_slug=None, *args, **kwargs):
8484

8585
@app.task(bind=True)
8686
@logging.task_logger(__name__)
87-
def detect_run_timeout(self, minutes=60, *args, **kwargs):
88-
"""Garbage collection. Clears out runs if they haven't finished running after 60 minutes.
87+
def detect_run_timeout(self, minutes=60 * 2, *args, **kwargs):
88+
"""Garbage collection. Clears out runs if they haven't finished running after 120 minutes.
8989
"""
9090
date_from = timezone.now() - timedelta(minutes=minutes)
9191
runs = Run.objects.filter(created_at__lte=date_from, finished_at=None)
9292

9393
for run in runs:
94+
self.log.info(
95+
f'(run: {run.id}) Marking run as timed out'
96+
)
97+
9498
run.mark_as_finished()
9599

96100
RevisionerError.objects.create(

docker-development.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ services:
4949
- metamapper
5050
webserver:
5151
<< : *metamapper_defaults
52-
build:
53-
context: ./
5452
command: webserver
5553
ports:
5654
- 5050:5050

www/src/app/Common/CodeMirror/CodeMirrorEditor.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ class CodeMirrorEditor extends React.Component {
6868
readOnly: this.props.readOnly,
6969
defaultValue: this.props.defaultValue,
7070
onChange: this.props.onChange,
71-
className: this.props.textAreaClassName
71+
className: this.props.textAreaClassName,
7272
})
7373

7474
return React.createElement('div', null, editor);

www/src/app/Datastores/Readme/ReadmeEditor.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ const ReadmeEditor = (props) => {
1111
value={props.value}
1212
onChange={props.onChange}
1313
placeholder="read"
14+
lineWrapping={true}
15+
lineNumbers={true}
1416
/>
1517
</form>
1618
)

0 commit comments

Comments
 (0)