From 0527a14cbb1d6dfac244392160fdf5a448760598 Mon Sep 17 00:00:00 2001 From: Patrick Huck Date: Fri, 15 Nov 2024 17:03:39 -0800 Subject: [PATCH 1/5] linting and hybrid search --- .../mpcontribs/api/contributions/document.py | 62 +++++++++---------- .../mpcontribs/api/contributions/views.py | 50 --------------- mpcontribs-api/mpcontribs/api/core.py | 35 ++++++++--- 3 files changed, 53 insertions(+), 94 deletions(-) diff --git a/mpcontribs-api/mpcontribs/api/contributions/document.py b/mpcontribs-api/mpcontribs/api/contributions/document.py index 55c9e204b..c502d673c 100644 --- a/mpcontribs-api/mpcontribs/api/contributions/document.py +++ b/mpcontribs-api/mpcontribs/api/contributions/document.py @@ -163,7 +163,32 @@ class Contributions(DynamicDocument): ReferenceField("Attachments", null=True), default=list, max_length=10 ) notebook = ReferenceField("Notebooks") - atlas = AtlasManager("formula_autocomplete") + atlas = AtlasManager( + "formula_autocomplete", + definition={ + "analyzer": "lucene.whitespace", + "searchAnalyzer": "lucene.whitespace", + "mappings": { + "dynamic": False, + "fields": { + "formula": {"type": "string"}, + "identifier": {"type": "string"}, + "is_public": {"type": "boolean"}, + "project": [{"type": "stringFacet"}, {"type": "string"}], + }, + }, + "storedSource": { + "include": [ + "formula", + "identifier", + "is_public", + "last_modified", + "needs_build", + "project", + ] + }, + }, + ) meta = { "collection": "contributions", "indexes": [ @@ -183,39 +208,8 @@ class Contributions(DynamicDocument): @queryset_manager def objects(doc_cls, queryset): - return queryset.no_dereference().only( - "project", - "identifier", - "formula", - "is_public", - "last_modified", - "needs_build", - ) - - @classmethod - def atlas_filter(cls, term): - try: - comp = Composition(term) - except Exception: - raise ValueError(f"{term} is not a valid composition") - - try: - for element in comp.elements: - Element(element) - except Exception: - raise ValueError(f"{element} not a valid element") - - ind_str = [] - - if len(comp) == 1: - d = comp.get_integer_formula_and_factor() - ind_str.append(d[0] + str(int(d[1])) if d[1] != 1 else d[0]) - else: - for i, j in comp.reduced_composition.items(): - ind_str.append(i.name + str(int(j)) if j != 1 else i.name) - - final_terms = ["".join(entry) for entry in permutations(ind_str)] - return AtlasQ(formula=final_terms[0]) # TODO formula__in=final_terms + only = doc_cls.atlas.definition["storedSource"]["include"] + return queryset.no_dereference().only(*only) @classmethod def post_init(cls, sender, document, **kwargs): diff --git a/mpcontribs-api/mpcontribs/api/contributions/views.py b/mpcontribs-api/mpcontribs/api/contributions/views.py index fe4e941b5..2d113228c 100644 --- a/mpcontribs-api/mpcontribs/api/contributions/views.py +++ b/mpcontribs-api/mpcontribs/api/contributions/views.py @@ -169,53 +169,3 @@ def has_add_permission(self, req, obj): raise Unauthorized(f"{obj.identifier} already added for {obj.project.id}") return True - - -@contributions.route("/search") -def search(): - formula = request.args.get("formula") - if not formula: - abort(404, description="Missing formula param.") - - try: - comp = Composition(formula) - except (CompositionError, ValueError): - abort(400, description="Invalid formula provided.") - - ind_str = [] - - if len(comp) == 1: - d = comp.get_integer_formula_and_factor() - ind_str.append(d[0] + str(int(d[1])) if d[1] != 1 else d[0]) - else: - for i, j in comp.reduced_composition.items(): - ind_str.append(i.name + str(int(j)) if j != 1 else i.name) - - final_terms = ["".join(entry) for entry in permutations(ind_str)] - limit = request.args.get("limit", ContributionsResource.default_limit) - - pipeline = [ - { - "$search": { - "index": "formula_autocomplete", - "text": {"path": "formula", "query": final_terms}, - } - }, - {"$project": {"formula": 1, "length": {"$strLenCP": "$formula"}, "project": 1}}, - {"$match": {"length": {"$gte": len(final_terms[0])}}}, - {"$limit": limit}, - {"$sort": {"length": 1}}, - ] - - results = [] - - for contrib in Contributions.objects().aggregate(pipeline): - results.append( - { - "id": str(contrib["_id"]), - "formula": contrib["formula"], - "project": contrib["project"], - } - ) - - return jsonify(results) diff --git a/mpcontribs-api/mpcontribs/api/core.py b/mpcontribs-api/mpcontribs/api/core.py index 6524d099f..1808b7900 100644 --- a/mpcontribs-api/mpcontribs/api/core.py +++ b/mpcontribs-api/mpcontribs/api/core.py @@ -581,8 +581,18 @@ def has_read_permission(self, request, qs): return qs.none() else: names = None - if q and "project" in q and "$in" in q["project"]: - names = q.pop("project").pop("$in") + if hasattr(qs._query_obj, "children"): + children = deepcopy(qs._query_obj.children) + else: + children = [deepcopy(qs._query_obj)] + + qs._query_obj = Q() + for node in children: + for field, value in node.query.items(): + if field == "project__in": + names = value + else: + qs = qs.filter(**{field: value}) qfilter = self.get_projects_filter( username, groups, filter_names=names @@ -610,15 +620,20 @@ def has_read_permission(self, request, qs): qfilter = self.get_projects_filter(username, groups) component = component[:-1] if component == "notebooks" else component qfilter &= Q(**{f"{component}__in": ids}) - contribs = Contributions.objects(qfilter).only(component).limit(len(ids)) + contribs = ( + Contributions.objects(qfilter).only(component).limit(len(ids)) + ) # return new queryset using "ids__in" - readable_ids = [ - getattr(contrib, component).id for contrib in contribs - ] if component == "notebook" else [ - dbref.id for contrib in contribs - for dbref in getattr(contrib, component) - if dbref.id in ids - ] + readable_ids = ( + [getattr(contrib, component).id for contrib in contribs] + if component == "notebook" + else [ + dbref.id + for contrib in contribs + for dbref in getattr(contrib, component) + if dbref.id in ids + ] + ) if not readable_ids: return qs.none() From ce77f9c8b8d4bffc115d102afde3749667a2a95a Mon Sep 17 00:00:00 2001 From: Patrick Huck Date: Wed, 4 Dec 2024 17:08:57 -0800 Subject: [PATCH 2/5] update atlas index def --- .../mpcontribs/api/contributions/document.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mpcontribs-api/mpcontribs/api/contributions/document.py b/mpcontribs-api/mpcontribs/api/contributions/document.py index c502d673c..42d3c41ea 100644 --- a/mpcontribs-api/mpcontribs/api/contributions/document.py +++ b/mpcontribs-api/mpcontribs/api/contributions/document.py @@ -171,22 +171,16 @@ class Contributions(DynamicDocument): "mappings": { "dynamic": False, "fields": { + "data": {"dynamic": True, "type": "document"}, "formula": {"type": "string"}, "identifier": {"type": "string"}, "is_public": {"type": "boolean"}, + "last_modified": {"type": "date"}, + "needs_build": {"type": "boolean"}, "project": [{"type": "stringFacet"}, {"type": "string"}], }, }, - "storedSource": { - "include": [ - "formula", - "identifier", - "is_public", - "last_modified", - "needs_build", - "project", - ] - }, + "storedSource": True, }, ) meta = { @@ -208,8 +202,14 @@ class Contributions(DynamicDocument): @queryset_manager def objects(doc_cls, queryset): - only = doc_cls.atlas.definition["storedSource"]["include"] - return queryset.no_dereference().only(*only) + return queryset.no_dereference().only( + "project", + "identifier", + "formula", + "is_public", + "last_modified", + "needs_build", + ) @classmethod def post_init(cls, sender, document, **kwargs): From 660016932b964f1521e0b797b5020136a5816b79 Mon Sep 17 00:00:00 2001 From: Patrick Huck Date: Wed, 4 Dec 2024 17:09:09 -0800 Subject: [PATCH 3/5] remove unused import --- mpcontribs-api/mpcontribs/api/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mpcontribs-api/mpcontribs/api/core.py b/mpcontribs-api/mpcontribs/api/core.py index 1808b7900..cf27b578f 100644 --- a/mpcontribs-api/mpcontribs/api/core.py +++ b/mpcontribs-api/mpcontribs/api/core.py @@ -2,7 +2,6 @@ """Custom meta-class and MethodView for Swagger""" import os -import logging import yaml from copy import deepcopy From 55f5f26d2a25c1fd8c07650a0026af95035afa88 Mon Sep 17 00:00:00 2001 From: Patrick Huck Date: Thu, 5 Dec 2024 13:14:33 -0800 Subject: [PATCH 4/5] start testing contributions --- .../tests/test_contributions.tavern.yaml | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 mpcontribs-api/tests/test_contributions.tavern.yaml diff --git a/mpcontribs-api/tests/test_contributions.tavern.yaml b/mpcontribs-api/tests/test_contributions.tavern.yaml new file mode 100644 index 000000000..4a0907107 --- /dev/null +++ b/mpcontribs-api/tests/test_contributions.tavern.yaml @@ -0,0 +1,54 @@ +test_name: retrieving contributions + +# TODO test public response without API key +# TODO test public vs private projects + +stages: + - name: get a random contribution with all fields + request: + url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/" + method: GET + headers: + accept: application/json + X-API-KEY: "{tavern.env_vars.MP_API_KEY}" + params: + per_page: 1 + _limit: 1 + _fields: _all + response: + strict: false + status_code: 200 + save: + json: + contrib: "data[0]" + + - name: retrieve single contribution with default fields + request: + url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/{contrib.id:s}/" + method: GET + headers: + accept: application/json + X-API-KEY: "{tavern.env_vars.MP_API_KEY}" + response: + status_code: 200 + json: + id: "{contrib.id}" + project: "{contrib.project}" + identifier: "{contrib.identifier}" + formula: "{contrib.formula}" + is_public: !bool "{contrib.is_public}" + last_modified: "{contrib.last_modified}" + needs_build: !bool "{contrib.needs_build}" + + - name: retrieve single contribution with all fields + request: + url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/{contrib.id:s}/" + params: + _fields: _all + method: GET + headers: + accept: application/json + X-API-KEY: "{tavern.env_vars.MP_API_KEY}" + response: + status_code: 200 + json: !force_original_structure "{contrib}" From ecc7f302d91e5c4ecbb502b380e02743a0972d84 Mon Sep 17 00:00:00 2001 From: Patrick Huck Date: Tue, 10 Dec 2024 14:26:22 -0800 Subject: [PATCH 5/5] separate auth/anon tests --- .../tests/test_contributions_anon.tavern.yaml | 71 +++++++++++++++++++ ...ml => test_contributions_auth.tavern.yaml} | 29 ++++---- 2 files changed, 84 insertions(+), 16 deletions(-) create mode 100644 mpcontribs-api/tests/test_contributions_anon.tavern.yaml rename mpcontribs-api/tests/{test_contributions.tavern.yaml => test_contributions_auth.tavern.yaml} (63%) diff --git a/mpcontribs-api/tests/test_contributions_anon.tavern.yaml b/mpcontribs-api/tests/test_contributions_anon.tavern.yaml new file mode 100644 index 000000000..a79561938 --- /dev/null +++ b/mpcontribs-api/tests/test_contributions_anon.tavern.yaml @@ -0,0 +1,71 @@ +test_name: anonymous retrieval of contributions + +stages: + - name: get a contribution from a private project with all fields + skip: True + request: + url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/" + method: GET + headers: + accept: application/json + params: + project: periodic_band_structures + per_page: 1 + _limit: 1 + _fields: _all + response: + status_code: 200 + json: + data: [] + has_more: false + total_count: 0 + total_pages: 0 + + - name: get a contribution with all fields + skip: True + request: + url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/" + method: GET + headers: + accept: application/json + params: + per_page: 1 + _limit: 1 + _fields: _all + response: + strict: false + status_code: 200 + save: + json: + contrib_anon: "data[0]" + + - name: retrieve single contribution with default fields + skip: True + request: + url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/{contrib_anon.id:s}/" + method: GET + headers: + accept: application/json + response: + status_code: 200 + json: + id: "{contrib_anon.id}" + project: "{contrib_anon.project}" + identifier: "{contrib_anon.identifier}" + formula: "{contrib_anon.formula}" + is_public: !bool "{contrib_anon.is_public}" + last_modified: "{contrib_anon.last_modified}" + needs_build: !bool "{contrib_anon.needs_build}" + + - name: retrieve single contribution with all fields + skip: True + request: + url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/{contrib_anon.id:s}/" + params: + _fields: _all + method: GET + headers: + accept: application/json + response: + status_code: 200 + json: !force_original_structure "{contrib_anon}" diff --git a/mpcontribs-api/tests/test_contributions.tavern.yaml b/mpcontribs-api/tests/test_contributions_auth.tavern.yaml similarity index 63% rename from mpcontribs-api/tests/test_contributions.tavern.yaml rename to mpcontribs-api/tests/test_contributions_auth.tavern.yaml index 4a0907107..c33d7b2c5 100644 --- a/mpcontribs-api/tests/test_contributions.tavern.yaml +++ b/mpcontribs-api/tests/test_contributions_auth.tavern.yaml @@ -1,10 +1,7 @@ -test_name: retrieving contributions - -# TODO test public response without API key -# TODO test public vs private projects +test_name: authenticated retrieval of contributions stages: - - name: get a random contribution with all fields + - name: get a contribution with all fields request: url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/" method: GET @@ -20,11 +17,11 @@ stages: status_code: 200 save: json: - contrib: "data[0]" + contrib_auth: "data[0]" - name: retrieve single contribution with default fields request: - url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/{contrib.id:s}/" + url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/{contrib_auth.id:s}/" method: GET headers: accept: application/json @@ -32,17 +29,17 @@ stages: response: status_code: 200 json: - id: "{contrib.id}" - project: "{contrib.project}" - identifier: "{contrib.identifier}" - formula: "{contrib.formula}" - is_public: !bool "{contrib.is_public}" - last_modified: "{contrib.last_modified}" - needs_build: !bool "{contrib.needs_build}" + id: "{contrib_auth.id}" + project: "{contrib_auth.project}" + identifier: "{contrib_auth.identifier}" + formula: "{contrib_auth.formula}" + is_public: !bool "{contrib_auth.is_public}" + last_modified: "{contrib_auth.last_modified}" + needs_build: !bool "{contrib_auth.needs_build}" - name: retrieve single contribution with all fields request: - url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/{contrib.id:s}/" + url: "{tavern.env_vars.MP_CONTRIBS_API_URL}/contributions/{contrib_auth.id:s}/" params: _fields: _all method: GET @@ -51,4 +48,4 @@ stages: X-API-KEY: "{tavern.env_vars.MP_API_KEY}" response: status_code: 200 - json: !force_original_structure "{contrib}" + json: !force_original_structure "{contrib_auth}"