From 0e2e91e02c1c9d0c849e7c4fa43298719ba6108e Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Mon, 18 Aug 2025 18:05:09 +0200 Subject: [PATCH 01/14] #403 Implementation of AuthorMetrics --- docs/reference.rst | 1 + docs/reference/scival/AuthorMetrics.rst | 309 ++++++++++++++++++ pybliometrics/scival/__init__.py | 1 + pybliometrics/scival/author_metrics.py | 224 +++++++++++++ .../scival/tests/test_AuthorMetrics.py | 251 ++++++++++++++ pybliometrics/superclasses/retrieval.py | 17 +- pybliometrics/utils/__init__.py | 1 + pybliometrics/utils/constants.py | 28 ++ pybliometrics/utils/parse_metrics.py | 152 +++++++++ 9 files changed, 978 insertions(+), 6 deletions(-) create mode 100644 docs/reference/scival/AuthorMetrics.rst create mode 100644 pybliometrics/scival/author_metrics.py create mode 100644 pybliometrics/scival/tests/test_AuthorMetrics.py create mode 100644 pybliometrics/utils/parse_metrics.py diff --git a/docs/reference.rst b/docs/reference.rst index 2e754428..459d5ec8 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -80,6 +80,7 @@ SciVal .. toctree:: :maxdepth: 1 + reference/scival/AuthorMetrics.rst reference/scival/PublicationLookup.rst diff --git a/docs/reference/scival/AuthorMetrics.rst b/docs/reference/scival/AuthorMetrics.rst new file mode 100644 index 00000000..c4fbaba4 --- /dev/null +++ b/docs/reference/scival/AuthorMetrics.rst @@ -0,0 +1,309 @@ +pybliometrics.scival.AuthorMetrics +================================== + +`AuthorMetrics()` implements the `SciVal Author Metrics API `_. + +It accepts one or more Scopus Author IDs as the main argument and retrieves various performance metrics for the specified authors. + +.. currentmodule:: pybliometrics.scival +.. contents:: Table of Contents + :local: + +Documentation +------------- + +.. autoclass:: AuthorMetrics + :members: + :inherited-members: + +Examples +-------- + +You initialize the class with one or more Scopus Author IDs. The argument can be a single ID, a list of IDs, or a comma-separated string of IDs. + +.. code-block:: python + + >>> import pybliometrics + >>> from pybliometrics.scival import AuthorMetrics + >>> pybliometrics.scival.init() + >>> author_metrics = AuthorMetrics("6602819806") + +You can obtain basic information just by printing the object: + +.. code-block:: python + + >>> print(author_metrics) + AuthorMetrics for 1 author(s): + - Algül, Hana (ID: 6602819806) + +There are many properties available that provide different types of metrics. You can explore the available authors: + +.. code-block:: python + + >>> author_metrics.authors + [Author(id=6602819806, name='Algül, Hana', uri='Author/6602819806')] + +**Individual Metric Properties** + +Each metric property returns a list of `MetricData` namedtuples with the structure: `(author_id, author_name, metric, metric_type, year, value, percentage, threshold)`. + +.. code-block:: python + + >>> author_metrics.CitationCount + [MetricData(author_id=6602819806, author_name='Algül, Hana', metric='CitationCount', + metric_type='Citation count', year='all', value=1234, percentage=85.5, threshold=None)] + + >>> author_metrics.HIndices + [MetricData(author_id=6602819806, author_name='Algül, Hana', metric='HIndices', + metric_type='h-index', year='all', value=46.0, percentage=None, threshold=None)] + +**Available Metric Properties**: + +- `AcademicCorporateCollaboration` +- `AcademicCorporateCollaborationImpact` +- `CitationCount` +- `CitationsPerPublication` +- `CitedPublications` +- `Collaboration` +- `CollaborationImpact` +- `FieldWeightedCitationImpact` +- `HIndices` (only available when `by_year=False`) +- `OutputsInTopCitationPercentiles` +- `PublicationsInTopJournalPercentiles` +- `ScholarlyOutput` + +**Getting All Metrics at Once** + +You can retrieve all available metrics in a single list using the `all_metrics` property: + +.. code-block:: python + + >>> all_data = author_metrics.all_metrics + >>> len(all_data) + 29 + >>> # Convert to pandas DataFrame for analysis + >>> import pandas as pd + >>> df = pd.DataFrame(all_data) + >>> df.head() + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
author_idauthor_namemetricmetric_typeyearvaluepercentagethreshold
06602819806Algül, HanaAcademicCorporateCollaborationAcademic-corporate collaborationall12.00000022.64151NaN
16602819806Algül, HanaAcademicCorporateCollaborationNo academic-corporate collaborationall41.00000077.35849NaN
26602819806Algül, HanaAcademicCorporateCollaborationImpactAcademic-corporate collaborationall43.166668NaNNaN
36602819806Algül, HanaAcademicCorporateCollaborationImpactNo academic-corporate collaborationall14.682927NaNNaN
46602819806Algül, HanaCollaborationInstitutional collaborationall6.00000011.32000NaN
+
+ + +**Multiple Authors** + +You can analyze multiple authors simultaneously. Furthermore, you can specify whether you want metrics broken down by year or not. If `by_year=True`, each metric will be returned for each year separately. + +.. code-block:: python + + >>> multi_authors = AuthorMetrics([7201667143, 6603480302], by_year=True) + >>> print(multi_authors) + AuthorMetrics for 2 author(s): + - Wolff, Klaus Dietrich (ID: 7201667143) + - Vogel-Heuser, Birgit (ID: 6603480302) + >>> df = pd.DataFrame(multi_authors.all_metrics) + >>> df.tail(5) + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
author_idauthor_namemetricmetric_typeyearvaluepercentagethreshold
2756603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesNone20245.013.15789525.0
2766603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesNone20205.010.00000025.0
2776603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesNone202114.027.45098125.0
2786603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesNone20228.024.24242425.0
2796603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesNone20231.02.17391325.0
+
+ + +**Filtering Specific Metrics** + +You can request only specific metrics to reduce API response size: + +.. code-block:: python + + >>> h_index_only = AuthorMetrics("6602819806", metric_types=["HIndices"]) + >>> h_index_only.HIndices + [MetricData(author_id=6602819806, author_name='Algül, Hana', metric='HIndices', + metric_type='h-index', year='all', value=46.0, percentage=None, threshold=None)] + + >>> # Multiple specific metrics + >>> selected_metrics = AuthorMetrics("6602819806", + ... metric_types=["CitationCount", "ScholarlyOutput"]) + + +Downloaded results are cached to expedite subsequent analyses. This information may become outdated. To refresh the cached results if they exist, set `refresh=True`, or provide an integer that will be interpreted as the maximum allowed number of days since the last modification date. For example, if you want to refresh all cached results older than 100 days, set `refresh=100`. Use `author_metrics.get_cache_file_mdate()` to obtain the date of last modification, and `author_metrics.get_cache_file_age()` to determine the number of days since the last modification. diff --git a/pybliometrics/scival/__init__.py b/pybliometrics/scival/__init__.py index a9aefbc9..65fc6eac 100644 --- a/pybliometrics/scival/__init__.py +++ b/pybliometrics/scival/__init__.py @@ -1,3 +1,4 @@ from pybliometrics.utils import * +from pybliometrics.scival.author_metrics import * from pybliometrics.scival.publication_lookup import * diff --git a/pybliometrics/scival/author_metrics.py b/pybliometrics/scival/author_metrics.py new file mode 100644 index 00000000..823fdfd9 --- /dev/null +++ b/pybliometrics/scival/author_metrics.py @@ -0,0 +1,224 @@ +from collections import namedtuple +from typing import Union, Optional + +from pybliometrics.superclasses import Retrieval +from pybliometrics.utils import make_int_if_possible +from pybliometrics.utils.constants import SCIVAL_METRICS +from pybliometrics.utils.parse_metrics import extract_metric_data + + +class AuthorMetrics(Retrieval): + @property + def AcademicCorporateCollaboration(self) -> Optional[list]: + """Academic corporate collaboration metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'AcademicCorporateCollaboration', self._by_year) + + @property + def AcademicCorporateCollaborationImpact(self) -> Optional[list]: + """Academic corporate collaboration impact metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'AcademicCorporateCollaborationImpact', self._by_year) + + @property + def all_metrics(self) -> Optional[list]: + """Get all available metrics concatenated into a single list. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + all_metrics = [] + + # List of all metric properties + metric_properties = self._metric_types.split(",") + + for prop_name in metric_properties: + metrics = getattr(self, prop_name) + if metrics: + all_metrics.extend(metrics) + + return all_metrics or None + + @property + def authors(self) -> Optional[list]: + """A list of namedtuples representing authors and their basic info + in the form `(id, name, uri)`. + """ + out = [] + Author = namedtuple('Author', 'id name uri') + + # Handle both dict and direct access to results + if isinstance(self._json, dict): + results = self._json.get('results', []) + else: + results = [] + + for result in results: + author_data = result.get('author', {}) + new = Author( + id=make_int_if_possible(author_data.get('id')), + name=author_data.get('name'), + uri=author_data.get('uri') + ) + out.append(new) + return out or None + + @property + def CitationCount(self) -> Optional[list]: + """Citation count metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'CitationCount', self._by_year) + + @property + def CitationsPerPublication(self) -> Optional[list]: + """Citations per publication metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'CitationsPerPublication', self._by_year) + + @property + def CitedPublications(self) -> Optional[list]: + """Cited publications metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'CitedPublications', self._by_year) + + @property + def Collaboration(self) -> Optional[list]: + """Collaboration metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'Collaboration', self._by_year) + + @property + def CollaborationImpact(self) -> Optional[list]: + """Collaboration impact metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'CollaborationImpact', self._by_year) + + @property + def FieldWeightedCitationImpact(self) -> Optional[list]: + """Field weighted citation impact metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'FieldWeightedCitationImpact', self._by_year) + + @property + def HIndices(self) -> Optional[list]: + """H-indices metrics for each author (only available when by_year=False). + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'HIndices', self._by_year) + + @property + def OutputsInTopCitationPercentiles(self) -> Optional[list]: + """Outputs in top citation percentiles metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'OutputsInTopCitationPercentiles', self._by_year) + + @property + def PublicationsInTopJournalPercentiles(self) -> Optional[list]: + """Publications in top journal percentiles metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'PublicationsInTopJournalPercentiles', self._by_year) + + @property + def ScholarlyOutput(self) -> Optional[list]: + """Scholarly output metrics for each author. + Returns list of MetricData namedtuples with unified structure: + (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'ScholarlyOutput', self._by_year) + + def __init__(self, + author_ids: Union[str, list], + metric_types: Optional[Union[str, list]] = None, + by_year: bool = False, + refresh: Union[bool, int] = False, + **kwds: str + ) -> None: + """Interaction with the SciVal Author Metrics API. + + :param author_ids: Scopus Author ID(s). Can be a single ID or comma-separated + string of IDs, or a list of IDs (e.g. `[55586732900, 57215631099]`). + :param metric_types: Metric type(s) to retrieve. Can be a single metric + or comma-separated string, or a list. Available metrics are: + AcademicCorporateCollaboration, AcademicCorporateCollaborationImpact, + CitationCount, CitedPublications, Collaboration, CollaborationImpact, + FieldWeightedCitationImpact, ScholarlyOutput, + PublicationsInTopJournalPercentiles, OutputsInTopCitationPercentiles, + HIndices. + If not provided, all metrics are retrieved. + :param by_year: Whether to retrieve metrics broken down by year. + :param refresh: Whether to refresh the cached file if it exists or not. + If int is passed, cached file will be refreshed if the + number of days since last modification exceeds that value. + :param kwds: Keywords passed on as query parameters. Must contain + fields and values mentioned in the API specification at + https://dev.elsevier.com/documentation/SciValAuthorAPI.wadl. + + Note: + All metric properties return lists of MetricData namedtuples with + unified structure: `(author_id, author_name, metric, metric_type, + year, value, percentage, threshold)`. Use the `all_metrics` property + to get all metrics concatenated into a single list for easy data + manipulation and analysis. + """ + self._view = '' + self._refresh = refresh + self._by_year = by_year + + # Handle authors parameter + if isinstance(author_ids, list): + author_ids = ",".join(str(a) for a in author_ids) + + # Handle metric_types parameter - use all metrics by default + if metric_types is None: + if not by_year: + metric_types = SCIVAL_METRICS["AuthorMetrics"]["byYear"] + SCIVAL_METRICS["AuthorMetrics"]["notByYear"] + if by_year: + metric_types = SCIVAL_METRICS["AuthorMetrics"]["byYear"] + + if isinstance(metric_types, list): + metric_types = ",".join(metric_types) + + self._metric_types = metric_types + + # Set up parameters for the API call + params = { + 'authors': author_ids, + 'metricTypes': metric_types, + 'byYear': str(by_year).lower(), + **kwds + } + + Retrieval.__init__(self, **params, **kwds) + + def __str__(self): + """Return pretty text version of the author metrics.""" + authors = self.authors or [] + author_count = len(authors) + + if author_count == 0: + return "No authors found" + else: + s = f"AuthorMetrics for {author_count} author(s):" + for author in authors: + s += f"\n- {author.name} (ID: {author.id})" + return s diff --git a/pybliometrics/scival/tests/test_AuthorMetrics.py b/pybliometrics/scival/tests/test_AuthorMetrics.py new file mode 100644 index 00000000..d09d7ab4 --- /dev/null +++ b/pybliometrics/scival/tests/test_AuthorMetrics.py @@ -0,0 +1,251 @@ +from collections import namedtuple + +from pybliometrics.scival.author_metrics import AuthorMetrics +from pybliometrics.utils.startup import init + +init() + +# Test cases as specified +single_author_all = AuthorMetrics("6602819806", by_year=False, refresh=30) +single_author_h_index = AuthorMetrics("6602819806", metric_types=["HIndices"], by_year=False, refresh=30) +multiple_authors_all = AuthorMetrics([7201667143, 6603480302], by_year=True, refresh=30) +empty_metrics = AuthorMetrics("0000000000") + + +def test_academic_corporate_collaboration(): + """Test AcademicCorporateCollaboration property for all test cases.""" + result = single_author_all.AcademicCorporateCollaboration + + assert has_all_fields(result[0]) + assert single_author_h_index.AcademicCorporateCollaboration is None + + result_multi = multiple_authors_all.AcademicCorporateCollaboration + assert has_all_fields(result_multi[0]) + + +def test_academic_corporate_collaboration_impact(): + """Test AcademicCorporateCollaborationImpact property for all test cases.""" + result = single_author_all.AcademicCorporateCollaborationImpact + assert has_all_fields(result[0]) + assert single_author_h_index.AcademicCorporateCollaborationImpact is None + + result_multi = multiple_authors_all.AcademicCorporateCollaborationImpact + assert has_all_fields(result_multi[0]) + + +def test_all_metrics(): + """Test all_metrics property for all test cases.""" + MetricData = namedtuple('MetricData', + 'author_id author_name metric metric_type year value percentage threshold', + defaults=(None, None, None, None, "all", None, None, None)) + + single_author_all_metrics = single_author_all.all_metrics + assert len(single_author_all_metrics) == 29 + expected_first_metric = MetricData(author_id=6602819806, + author_name='Algül, Hana', + metric='AcademicCorporateCollaboration', + metric_type='Academic-corporate collaboration', + year='all', + value=12, + percentage=22.64151, + threshold=None) + assert expected_first_metric == single_author_all_metrics[0] + + + single_author_h_index_all_metrics = single_author_h_index.all_metrics + assert len(single_author_h_index_all_metrics) == 1 + expected_h_index = MetricData(author_id=6602819806, + author_name='Algül, Hana', + metric='HIndices', + metric_type='h-index', + year='all', value=46.0, + percentage=None, + threshold=None) + assert expected_h_index == single_author_h_index_all_metrics[0] + + + multiple_authors_all_metrics = multiple_authors_all.all_metrics + assert len(multiple_authors_all_metrics) == 280 + expected_multi_metric = MetricData(author_id=6603480302, + author_name='Vogel-Heuser, Birgit', + metric='AcademicCorporateCollaboration', + metric_type='Academic-corporate collaboration', + year='2024', + value=8, + percentage=21.052631, + threshold=None) + assert expected_multi_metric == multiple_authors_all_metrics[10] + + +def test_authors(): + """Test the authors property for all test cases with actual values.""" + # Test single author with all metrics + authors = single_author_all.authors + assert len(authors) == 1 + assert authors[0].id == 6602819806 + assert authors[0].name == "Algül, Hana" + assert authors[0].uri == "Author/6602819806" + + # Test single author with H-indices only + authors_h = single_author_h_index.authors + assert len(authors_h) == 1 + assert authors_h[0].id == 6602819806 + assert authors_h[0].name == "Algül, Hana" + assert authors_h[0].uri == "Author/6602819806" + + # Test multiple authors with actual names and IDs + authors_multi = multiple_authors_all.authors + assert len(authors_multi) == 2 + + # Sort by ID and test + authors_sorted = sorted(authors_multi, key=lambda x: x.id) + + assert authors_sorted[0].id == 6603480302 + assert authors_sorted[0].name == "Vogel-Heuser, Birgit" + assert authors_sorted[0].uri == "Author/6603480302" + + assert authors_sorted[1].id == 7201667143 + assert authors_sorted[1].name == "Wolff, Klaus Dietrich" + assert authors_sorted[1].uri == "Author/7201667143" + + +def has_all_fields(metric_data): + """Check if the metric data has all required fields.""" + required_fields = ['author_id', 'author_name', 'metric', 'metric_type', 'year', 'value', 'percentage', 'threshold'] + return all(hasattr(metric_data, field) for field in required_fields) + + +def test_citation_count(): + """Test CitationCount property for all test cases.""" + result = single_author_all.CitationCount + assert has_all_fields(result[0]) + assert single_author_h_index.CitationCount is None + + result_multi = multiple_authors_all.CitationCount + assert has_all_fields(result_multi[0]) + + +def test_citations_per_publication(): + """Test CitationsPerPublication property for all test cases.""" + result = single_author_all.CitationsPerPublication + assert has_all_fields(result[0]) + assert single_author_h_index.CitationsPerPublication is None + + result_multi = multiple_authors_all.CitationsPerPublication + assert has_all_fields(result_multi[0]) + + +def test_cited_publications(): + """Test CitedPublications property for all test cases.""" + result = single_author_all.CitedPublications + assert has_all_fields(result[0]) + assert single_author_h_index.CitedPublications is None + + result_multi = multiple_authors_all.CitedPublications + assert has_all_fields(result_multi[0]) + + +def test_collaboration(): + """Test Collaboration property for all test cases.""" + result = single_author_all.Collaboration + assert has_all_fields(result[0]) + assert single_author_h_index.Collaboration is None + + result_multi = multiple_authors_all.Collaboration + assert has_all_fields(result_multi[0]) + + +def test_collaboration_impact(): + """Test CollaborationImpact property for all test cases.""" + result = single_author_all.CollaborationImpact + assert has_all_fields(result[0]) + assert single_author_h_index.CollaborationImpact is None + + result_multi = multiple_authors_all.CollaborationImpact + assert has_all_fields(result_multi[0]) + + +def test_empty_metrics(): + """Test handling of empty metric_types.""" + assert empty_metrics.all_metrics is None + assert empty_metrics.authors is None + assert empty_metrics.CitationCount is None + assert empty_metrics.CitationsPerPublication is None + assert empty_metrics.CitedPublications is None + assert empty_metrics.Collaboration is None + assert empty_metrics.CollaborationImpact is None + + +def test_field_weighted_citation_impact(): + """Test FieldWeightedCitationImpact property for all test cases.""" + result = single_author_all.FieldWeightedCitationImpact + assert has_all_fields(result[0]) + assert single_author_h_index.FieldWeightedCitationImpact is None + + result_multi = multiple_authors_all.FieldWeightedCitationImpact + assert has_all_fields(result_multi[0]) + + +def test_h_indices(): + """Test HIndices property for all test cases.""" + result = single_author_all.HIndices + assert has_all_fields(result[0]) + + result_h = single_author_h_index.HIndices + assert has_all_fields(result_h[0]) + + # HIndices are not available by year + assert multiple_authors_all.HIndices is None + + +def test_outputs_in_top_citation_percentiles(): + """Test OutputsInTopCitationPercentiles property for all test cases.""" + result = single_author_all.OutputsInTopCitationPercentiles + assert has_all_fields(result[0]) + assert single_author_h_index.OutputsInTopCitationPercentiles is None + + result_multi = multiple_authors_all.OutputsInTopCitationPercentiles + assert has_all_fields(result_multi[0]) + + +def test_publications_in_top_journal_percentiles(): + """Test PublicationsInTopJournalPercentiles property for all test cases.""" + result = single_author_all.PublicationsInTopJournalPercentiles + assert has_all_fields(result[0]) + assert single_author_h_index.PublicationsInTopJournalPercentiles is None + + result_multi = multiple_authors_all.PublicationsInTopJournalPercentiles + assert has_all_fields(result_multi[0]) + + +def test_scholarly_output(): + """Test ScholarlyOutput property for all test cases.""" + result = single_author_all.ScholarlyOutput + assert has_all_fields(result[0]) + assert single_author_h_index.ScholarlyOutput is None + + result_multi = multiple_authors_all.ScholarlyOutput + assert has_all_fields(result_multi[0]) + + +def test_str_representation(): + """Test the string representation of AuthorMetrics objects using actual results.""" + # Test single author with all metrics + str_single = str(single_author_all) + expected_single = "AuthorMetrics for 1 author(s):\n- Algül, Hana (ID: 6602819806)" + assert str_single == expected_single + + # Test single author with H-indices only + str_h_index = str(single_author_h_index) + expected_h_index = "AuthorMetrics for 1 author(s):\n- Algül, Hana (ID: 6602819806)" + assert str_h_index == expected_h_index + + # Test multiple authors + str_multiple = str(multiple_authors_all) + expected_multiple = "AuthorMetrics for 2 author(s):\n- Wolff, Klaus Dietrich (ID: 7201667143)\n- Vogel-Heuser, Birgit (ID: 6603480302)" + assert str_multiple == expected_multiple + + # Test empty metrics + str_empty = str(empty_metrics) + expected_empty = "No authors found" + assert str_empty == expected_empty diff --git a/pybliometrics/superclasses/retrieval.py b/pybliometrics/superclasses/retrieval.py index 3ae3c2c8..620c9a29 100644 --- a/pybliometrics/superclasses/retrieval.py +++ b/pybliometrics/superclasses/retrieval.py @@ -1,16 +1,17 @@ """Superclass to access all Scopus retrieval APIs and dump the results.""" +import hashlib from pathlib import Path -from typing import Union +from typing import Optional, Union from pybliometrics.superclasses import Base -from pybliometrics.utils import APIS_WITH_ID_TYPE, get_config, URLS +from pybliometrics.utils import APIS_NO_ID_IN_ULR, APIS_WITH_ID_TYPE, get_config, URLS class Retrieval(Base): def __init__(self, - identifier: Union[int, str], - id_type: str = None, + identifier: Optional[Union[int, str]] = None, + id_type: Optional[str] = None, **kwds: str ) -> None: """Class intended as superclass to perform retrievals. @@ -38,9 +39,13 @@ def __init__(self, stem += "-" + self._citation if self._date: stem += "-" + self._date + # For APIs that don't use ID in URL, hash the parameters for unique cache filename + elif api in APIS_NO_ID_IN_ULR: + params_str = str(sorted(kwds.items())) + stem = hashlib.md5(params_str.encode()).hexdigest() else: - url += identifier - stem = identifier.replace('/', '_') + url += str(identifier) + stem = str(identifier).replace('/', '_') # Get cache file path config = get_config() parent = Path(config.get('Directories', api)) diff --git a/pybliometrics/utils/__init__.py b/pybliometrics/utils/__init__.py index c3acf5b3..819ee276 100644 --- a/pybliometrics/utils/__init__.py +++ b/pybliometrics/utils/__init__.py @@ -3,4 +3,5 @@ from pybliometrics.utils.create_config import * from pybliometrics.utils.get_content import * from pybliometrics.utils.parse_content import * +from pybliometrics.utils.parse_metrics import * from pybliometrics.utils.startup import * diff --git a/pybliometrics/utils/constants.py b/pybliometrics/utils/constants.py index 4cc76320..9643c31a 100644 --- a/pybliometrics/utils/constants.py +++ b/pybliometrics/utils/constants.py @@ -33,6 +33,7 @@ 'ObjectRetrieval': CACHE_PATH / "ScienceDirect" / 'object_retrieval', 'PlumXMetrics': CACHE_PATH / "Scopus" / 'plumx', 'PublicationLookup': CACHE_PATH / "Scival" / "publication_lookup", + 'AuthorMetrics': CACHE_PATH / "Scival" / "author_metrics", 'ScDirSubjectClassifications': CACHE_PATH / "ScienceDirect" / 'subject_classification', 'ScienceDirectSearch': CACHE_PATH / "ScienceDirect" / 'science_direct_search', 'ScopusSearch': CACHE_PATH / "Scopus" / 'scopus_search', @@ -59,6 +60,7 @@ 'ObjectMetadata': RETRIEVAL_BASE + 'object/', 'ObjectRetrieval': RETRIEVAL_BASE + 'object/', 'PublicationLookup': SCIVAL_BASE + 'publication/', + 'AuthorMetrics': SCIVAL_BASE + 'author/metrics/', 'PlumXMetrics': 'https://api.elsevier.com/analytics/plumx/', 'ScDirSubjectClassifications': RETRIEVAL_BASE + 'subject/scidir/', 'ScienceDirectSearch': SEARCH_BASE + 'sciencedirect/', @@ -91,6 +93,28 @@ "ObjectRetrieval": [""] } +# SciVal Metrics +SCIVAL_METRICS = { + "AuthorMetrics": { + "byYear": [ + "AcademicCorporateCollaboration", + "AcademicCorporateCollaborationImpact", + "Collaboration", + "CitationCount", + "CitationsPerPublication", + "CollaborationImpact", + "CitedPublications", + "FieldWeightedCitationImpact", + "ScholarlyOutput", + "PublicationsInTopJournalPercentiles", + "OutputsInTopCitationPercentiles" + ], + "notByYear": [ + "HIndices" + ] + } +} + # APIs whose URL needs an id_type APIS_WITH_ID_TYPE = {"AbstractRetrieval", "PlumXMetrics", @@ -99,6 +123,9 @@ "ObjectMetadata", "ObjectRetrieval"} +# APIs that do not require an ID in the URL +APIS_NO_ID_IN_ULR = {"AuthorMetrics"} + # Item per page limits for all classes COUNTS = { "AffiliationSearch": {"STANDARD": 200}, @@ -119,6 +146,7 @@ 'ArticleEntitlement': 0, 'ArticleMetadata': 6, 'ArticleRetrieval': 10, + 'AuthorMetrics': 6, 'AuthorRetrieval': 3, 'AuthorSearch': 2, 'CitationOverview': 4, diff --git a/pybliometrics/utils/parse_metrics.py b/pybliometrics/utils/parse_metrics.py new file mode 100644 index 00000000..63432bf3 --- /dev/null +++ b/pybliometrics/utils/parse_metrics.py @@ -0,0 +1,152 @@ +"""Utility functions to parse and extract metrics data from JSON responses.""" +from collections import namedtuple + +from pybliometrics.utils import make_int_if_possible + +# Global namedtuple for all metric data with default values +MetricData = namedtuple('MetricData', + 'author_id author_name metric metric_type year value percentage threshold', + defaults=(None, None, None, None, "all", None, None, None)) + + +def extract_metric_data(json_data, metric_type: str, by_year: bool = False): + """Helper function to extract metric data for a specific metric type. + + Parameters + ---------- + json_data : dict + The JSON response from the API + metric_type : str + The metric type to extract + by_year : bool, optional + Whether the data is broken down by year + + Returns + ------- + list or None + List of MetricData namedtuples or None if no data found + """ + out = [] + + # Get results from JSON data + if isinstance(json_data, dict): + results = json_data.get('results', []) + else: + results = [] + + for result in results: + author_id, author_name = extract_author_info(result) + metric_data = find_metric_data(result, metric_type) + + if not metric_data: + continue + + # Process metric data using unified approach + metric_items = process_metric(metric_data, author_id, author_name, metric_type, by_year) + if metric_items: + out.extend(metric_items) + + return out or None + + +def extract_author_info(result: dict) -> tuple: + """Extract author ID and name from a result. + + Parameters + ---------- + result : dict + Author result from API response + + Returns + ------- + tuple + (author_id, author_name) + """ + author_data = result.get('author', {}) + author_id = make_int_if_possible(author_data.get('id')) + author_name = author_data.get('name') + return author_id, author_name + + +def find_metric_data(result: dict, metric_type: str): + """Find specific metric data in the metrics list. + + Parameters + ---------- + result : dict + Author result from API response + metric_type : str + The metric type to find + + Returns + ------- + dict or None + The metric data or None if not found + """ + metrics = result.get('metrics', []) + for metric in metrics: + if metric.get('metricType') == metric_type: + return metric + return None + + +def process_metric(metric_data: dict, author_id: int, author_name: str, metric_type: str, by_year: bool = False): + """Unified function to process all metric types. + + Parameters + ---------- + metric_data : dict + The metric data from API response + author_id : int + Author ID + author_name : str + Author name + metric_type : str + The metric type + by_year : bool, optional + Whether the data is broken down by year + + Returns + ------- + list or None + List of MetricData namedtuples or None if no data + """ + out = [] + + # Normalize all metrics to have a 'values' structure + if 'values' in metric_data: + # Already has multiple values (collaboration/threshold metrics) + values_list = metric_data['values'] + else: + # Simple metric - wrap in a list to make it uniform + values_list = [metric_data] + + # Process all value items uniformly + for value_item in values_list: + # Extract type-specific information - just try to get them, default to None + collab_type = value_item.get('collabType') + threshold = value_item.get('threshold') + + # Normalize data structure: convert single values to year-keyed dictionaries + if by_year: + value_data = value_item.get('valueByYear', {}) + percentage_data = value_item.get('percentageByYear', {}) + else: + value_data = {"all": value_item.get('value')} + percentage_data = {"all": value_item.get('percentage')} + + # Process all years uniformly + for year in value_data.keys(): + new = MetricData( + author_id=author_id, + author_name=author_name, + metric=metric_type, + metric_type=collab_type or value_item.get('indexType') or value_item.get('impactType'), + year=str(year), + value=value_data.get(year), + percentage=percentage_data.get(year), + threshold=threshold + ) + out.append(new) + + return out if out else None From b3f20fc7cc462d843e8187659abb2cb8de1aef9a Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Tue, 19 Aug 2025 10:40:00 +0200 Subject: [PATCH 02/14] Adapt metric parsing for other entities (not only authors) --- docs/reference/scival/AuthorMetrics.rst | 19 +++--- pybliometrics/scival/author_metrics.py | 65 ++++++++++--------- .../scival/tests/test_AuthorMetrics.py | 16 ++--- pybliometrics/utils/parse_metrics.py | 42 ++++++------ 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/docs/reference/scival/AuthorMetrics.rst b/docs/reference/scival/AuthorMetrics.rst index c4fbaba4..eb3a9052 100644 --- a/docs/reference/scival/AuthorMetrics.rst +++ b/docs/reference/scival/AuthorMetrics.rst @@ -45,16 +45,16 @@ There are many properties available that provide different types of metrics. You **Individual Metric Properties** -Each metric property returns a list of `MetricData` namedtuples with the structure: `(author_id, author_name, metric, metric_type, year, value, percentage, threshold)`. +Each metric property returns a list of `MetricData` namedtuples with the structure: `(entity_id, entity_name, metric, metric_type, year, value, percentage, threshold)` where `entity_id` and `entity_name` refer to the author. .. code-block:: python >>> author_metrics.CitationCount - [MetricData(author_id=6602819806, author_name='Algül, Hana', metric='CitationCount', + [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='CitationCount', metric_type='Citation count', year='all', value=1234, percentage=85.5, threshold=None)] >>> author_metrics.HIndices - [MetricData(author_id=6602819806, author_name='Algül, Hana', metric='HIndices', + [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='HIndices', metric_type='h-index', year='all', value=46.0, percentage=None, threshold=None)] **Available Metric Properties**: @@ -72,6 +72,9 @@ Each metric property returns a list of `MetricData` namedtuples with the structu - `PublicationsInTopJournalPercentiles` - `ScholarlyOutput` +.. note:: + **Unified Data Structure**: AuthorMetrics uses a unified `MetricData` structure with `entity_id` and `entity_name` fields. For authors, these fields contain the author ID and author name respectively. This structure is compatible with `InstitutionMetrics` and other SciVal metric classes, enabling consistent data analysis across different entity types. + **Getting All Metrics at Once** You can retrieve all available metrics in a single list using the `all_metrics` property: @@ -110,8 +113,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` - author_id - author_name + entity_id + entity_name metric metric_type year @@ -219,8 +222,8 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh - author_id - author_name + entity_id + entity_name metric metric_type year @@ -298,7 +301,7 @@ You can request only specific metrics to reduce API response size: >>> h_index_only = AuthorMetrics("6602819806", metric_types=["HIndices"]) >>> h_index_only.HIndices - [MetricData(author_id=6602819806, author_name='Algül, Hana', metric='HIndices', + [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='HIndices', metric_type='h-index', year='all', value=46.0, percentage=None, threshold=None)] >>> # Multiple specific metrics diff --git a/pybliometrics/scival/author_metrics.py b/pybliometrics/scival/author_metrics.py index 823fdfd9..ae4d0908 100644 --- a/pybliometrics/scival/author_metrics.py +++ b/pybliometrics/scival/author_metrics.py @@ -12,33 +12,34 @@ class AuthorMetrics(Retrieval): def AcademicCorporateCollaboration(self) -> Optional[list]: """Academic corporate collaboration metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'AcademicCorporateCollaboration', self._by_year) + return extract_metric_data(self._json, 'AcademicCorporateCollaboration', self._by_year, "author") @property def AcademicCorporateCollaborationImpact(self) -> Optional[list]: """Academic corporate collaboration impact metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'AcademicCorporateCollaborationImpact', self._by_year) + return extract_metric_data(self._json, 'AcademicCorporateCollaborationImpact', self._by_year, "author") @property def all_metrics(self) -> Optional[list]: """Get all available metrics concatenated into a single list. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ all_metrics = [] # List of all metric properties - metric_properties = self._metric_types.split(",") - - for prop_name in metric_properties: - metrics = getattr(self, prop_name) - if metrics: - all_metrics.extend(metrics) + if self._metric_types: + metric_properties = self._metric_types.split(",") + + for prop_name in metric_properties: + metrics = getattr(self, prop_name) + if metrics: + all_metrics.extend(metrics) return all_metrics or None @@ -70,81 +71,81 @@ def authors(self) -> Optional[list]: def CitationCount(self) -> Optional[list]: """Citation count metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'CitationCount', self._by_year) + return extract_metric_data(self._json, 'CitationCount', self._by_year, "author") @property def CitationsPerPublication(self) -> Optional[list]: """Citations per publication metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'CitationsPerPublication', self._by_year) + return extract_metric_data(self._json, 'CitationsPerPublication', self._by_year, "author") @property def CitedPublications(self) -> Optional[list]: """Cited publications metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'CitedPublications', self._by_year) + return extract_metric_data(self._json, 'CitedPublications', self._by_year, "author") @property def Collaboration(self) -> Optional[list]: """Collaboration metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'Collaboration', self._by_year) + return extract_metric_data(self._json, 'Collaboration', self._by_year, "author") @property def CollaborationImpact(self) -> Optional[list]: """Collaboration impact metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'CollaborationImpact', self._by_year) + return extract_metric_data(self._json, 'CollaborationImpact', self._by_year, "author") @property def FieldWeightedCitationImpact(self) -> Optional[list]: """Field weighted citation impact metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'FieldWeightedCitationImpact', self._by_year) + return extract_metric_data(self._json, 'FieldWeightedCitationImpact', self._by_year, "author") @property def HIndices(self) -> Optional[list]: """H-indices metrics for each author (only available when by_year=False). Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'HIndices', self._by_year) + return extract_metric_data(self._json, 'HIndices', self._by_year, "author") @property def OutputsInTopCitationPercentiles(self) -> Optional[list]: """Outputs in top citation percentiles metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'OutputsInTopCitationPercentiles', self._by_year) + return extract_metric_data(self._json, 'OutputsInTopCitationPercentiles', self._by_year, "author") @property def PublicationsInTopJournalPercentiles(self) -> Optional[list]: """Publications in top journal percentiles metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'PublicationsInTopJournalPercentiles', self._by_year) + return extract_metric_data(self._json, 'PublicationsInTopJournalPercentiles', self._by_year, "author") @property def ScholarlyOutput(self) -> Optional[list]: """Scholarly output metrics for each author. Returns list of MetricData namedtuples with unified structure: - (author_id, author_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) """ - return extract_metric_data(self._json, 'ScholarlyOutput', self._by_year) + return extract_metric_data(self._json, 'ScholarlyOutput', self._by_year, "author") def __init__(self, author_ids: Union[str, list], @@ -175,7 +176,7 @@ def __init__(self, Note: All metric properties return lists of MetricData namedtuples with - unified structure: `(author_id, author_name, metric, metric_type, + unified structure: `(entity_id, entity_name, metric, metric_type, year, value, percentage, threshold)`. Use the `all_metrics` property to get all metrics concatenated into a single list for easy data manipulation and analysis. diff --git a/pybliometrics/scival/tests/test_AuthorMetrics.py b/pybliometrics/scival/tests/test_AuthorMetrics.py index d09d7ab4..ba73150e 100644 --- a/pybliometrics/scival/tests/test_AuthorMetrics.py +++ b/pybliometrics/scival/tests/test_AuthorMetrics.py @@ -36,13 +36,13 @@ def test_academic_corporate_collaboration_impact(): def test_all_metrics(): """Test all_metrics property for all test cases.""" MetricData = namedtuple('MetricData', - 'author_id author_name metric metric_type year value percentage threshold', + 'entity_id entity_name metric metric_type year value percentage threshold', defaults=(None, None, None, None, "all", None, None, None)) single_author_all_metrics = single_author_all.all_metrics assert len(single_author_all_metrics) == 29 - expected_first_metric = MetricData(author_id=6602819806, - author_name='Algül, Hana', + expected_first_metric = MetricData(entity_id=6602819806, + entity_name='Algül, Hana', metric='AcademicCorporateCollaboration', metric_type='Academic-corporate collaboration', year='all', @@ -54,8 +54,8 @@ def test_all_metrics(): single_author_h_index_all_metrics = single_author_h_index.all_metrics assert len(single_author_h_index_all_metrics) == 1 - expected_h_index = MetricData(author_id=6602819806, - author_name='Algül, Hana', + expected_h_index = MetricData(entity_id=6602819806, + entity_name='Algül, Hana', metric='HIndices', metric_type='h-index', year='all', value=46.0, @@ -66,8 +66,8 @@ def test_all_metrics(): multiple_authors_all_metrics = multiple_authors_all.all_metrics assert len(multiple_authors_all_metrics) == 280 - expected_multi_metric = MetricData(author_id=6603480302, - author_name='Vogel-Heuser, Birgit', + expected_multi_metric = MetricData(entity_id=6603480302, + entity_name='Vogel-Heuser, Birgit', metric='AcademicCorporateCollaboration', metric_type='Academic-corporate collaboration', year='2024', @@ -111,7 +111,7 @@ def test_authors(): def has_all_fields(metric_data): """Check if the metric data has all required fields.""" - required_fields = ['author_id', 'author_name', 'metric', 'metric_type', 'year', 'value', 'percentage', 'threshold'] + required_fields = ['entity_id', 'entity_name', 'metric', 'metric_type', 'year', 'value', 'percentage', 'threshold'] return all(hasattr(metric_data, field) for field in required_fields) diff --git a/pybliometrics/utils/parse_metrics.py b/pybliometrics/utils/parse_metrics.py index 63432bf3..0b75ddb6 100644 --- a/pybliometrics/utils/parse_metrics.py +++ b/pybliometrics/utils/parse_metrics.py @@ -5,11 +5,11 @@ # Global namedtuple for all metric data with default values MetricData = namedtuple('MetricData', - 'author_id author_name metric metric_type year value percentage threshold', + 'entity_id entity_name metric metric_type year value percentage threshold', defaults=(None, None, None, None, "all", None, None, None)) -def extract_metric_data(json_data, metric_type: str, by_year: bool = False): +def extract_metric_data(json_data, metric_type: str, by_year: bool, entity_type: str): """Helper function to extract metric data for a specific metric type. Parameters @@ -20,6 +20,8 @@ def extract_metric_data(json_data, metric_type: str, by_year: bool = False): The metric type to extract by_year : bool, optional Whether the data is broken down by year + entity_type : str, optional + The type of entity ("author" or "institution") Returns ------- @@ -35,37 +37,39 @@ def extract_metric_data(json_data, metric_type: str, by_year: bool = False): results = [] for result in results: - author_id, author_name = extract_author_info(result) + entity_id, entity_name = extract_entity_info(result, entity_type) metric_data = find_metric_data(result, metric_type) if not metric_data: continue # Process metric data using unified approach - metric_items = process_metric(metric_data, author_id, author_name, metric_type, by_year) + metric_items = process_metric(metric_data, entity_id, entity_name, metric_type, by_year) if metric_items: out.extend(metric_items) return out or None -def extract_author_info(result: dict) -> tuple: - """Extract author ID and name from a result. +def extract_entity_info(result: dict, entity_type: str) -> tuple: + """Extract entity ID and name from a result. Parameters ---------- result : dict - Author result from API response + Entity result from API response + entity_type : str + The type of entity ("author" or "institution") Returns ------- tuple - (author_id, author_name) + (entity_id, entity_name) """ - author_data = result.get('author', {}) - author_id = make_int_if_possible(author_data.get('id')) - author_name = author_data.get('name') - return author_id, author_name + entity_data = result.get(entity_type, {}) + entity_id = make_int_if_possible(entity_data.get('id')) + entity_name = entity_data.get('name') + return entity_id, entity_name def find_metric_data(result: dict, metric_type: str): @@ -90,17 +94,17 @@ def find_metric_data(result: dict, metric_type: str): return None -def process_metric(metric_data: dict, author_id: int, author_name: str, metric_type: str, by_year: bool = False): +def process_metric(metric_data: dict, entity_id: int, entity_name: str, metric_type: str, by_year: bool = False): """Unified function to process all metric types. Parameters ---------- metric_data : dict The metric data from API response - author_id : int - Author ID - author_name : str - Author name + entity_id : int + Entity ID (author or institution) + entity_name : str + Entity name (author or institution) metric_type : str The metric type by_year : bool, optional @@ -138,8 +142,8 @@ def process_metric(metric_data: dict, author_id: int, author_name: str, metric_t # Process all years uniformly for year in value_data.keys(): new = MetricData( - author_id=author_id, - author_name=author_name, + entity_id=entity_id, + entity_name=entity_name, metric=metric_type, metric_type=collab_type or value_item.get('indexType') or value_item.get('impactType'), year=str(year), From f121f45f598a4892b0855afb2c6cdbee5a956d8c Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Tue, 19 Aug 2025 11:09:14 +0200 Subject: [PATCH 03/14] Correct typo: Pass only kwds once --- pybliometrics/scival/author_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybliometrics/scival/author_metrics.py b/pybliometrics/scival/author_metrics.py index ae4d0908..1ab54775 100644 --- a/pybliometrics/scival/author_metrics.py +++ b/pybliometrics/scival/author_metrics.py @@ -209,7 +209,7 @@ def __init__(self, **kwds } - Retrieval.__init__(self, **params, **kwds) + Retrieval.__init__(self, **params) def __str__(self): """Return pretty text version of the author metrics.""" From 712cb91d4ed5d019763ac61415052102f5115adb Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Wed, 20 Aug 2025 10:08:40 +0200 Subject: [PATCH 04/14] Always fill metric --- docs/reference/scival/AuthorMetrics.rst | 41 +++++++++---------- .../scival/tests/test_AuthorMetrics.py | 12 +++--- pybliometrics/utils/parse_metrics.py | 8 +++- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/docs/reference/scival/AuthorMetrics.rst b/docs/reference/scival/AuthorMetrics.rst index eb3a9052..b71f68a2 100644 --- a/docs/reference/scival/AuthorMetrics.rst +++ b/docs/reference/scival/AuthorMetrics.rst @@ -23,9 +23,8 @@ You initialize the class with one or more Scopus Author IDs. The argument can be .. code-block:: python - >>> import pybliometrics - >>> from pybliometrics.scival import AuthorMetrics - >>> pybliometrics.scival.init() + >>> from pybliometrics.scival import AuthorMetrics, init + >>> init() >>> author_metrics = AuthorMetrics("6602819806") You can obtain basic information just by printing the object: @@ -50,12 +49,10 @@ Each metric property returns a list of `MetricData` namedtuples with the structu .. code-block:: python >>> author_metrics.CitationCount - [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='CitationCount', - metric_type='Citation count', year='all', value=1234, percentage=85.5, threshold=None)] + [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='CitationCount', metric_type='CitationCount', year='all', value=1120, percentage=None, threshold=None)] >>> author_metrics.HIndices - [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='HIndices', - metric_type='h-index', year='all', value=46.0, percentage=None, threshold=None)] + [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='h-index', metric_type='HIndices', year='all', value=46.0, percentage=None, threshold=None)] **Available Metric Properties**: @@ -128,8 +125,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` 0 6602819806 Algül, Hana - AcademicCorporateCollaboration Academic-corporate collaboration + AcademicCorporateCollaboration all 12.000000 22.64151 @@ -139,8 +136,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` 1 6602819806 Algül, Hana - AcademicCorporateCollaboration No academic-corporate collaboration + AcademicCorporateCollaboration all 41.000000 77.35849 @@ -150,8 +147,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` 2 6602819806 Algül, Hana - AcademicCorporateCollaborationImpact Academic-corporate collaboration + AcademicCorporateCollaborationImpact all 43.166668 NaN @@ -161,8 +158,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` 3 6602819806 Algül, Hana - AcademicCorporateCollaborationImpact No academic-corporate collaboration + AcademicCorporateCollaborationImpact all 14.682927 NaN @@ -172,8 +169,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` 4 6602819806 Algül, Hana - Collaboration Institutional collaboration + Collaboration all 6.000000 11.32000 @@ -196,7 +193,7 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh - Wolff, Klaus Dietrich (ID: 7201667143) - Vogel-Heuser, Birgit (ID: 6603480302) >>> df = pd.DataFrame(multi_authors.all_metrics) - >>> df.tail(5) + >>> df.tail() .. raw:: html @@ -238,7 +235,7 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 6603480302 Vogel-Heuser, Birgit OutputsInTopCitationPercentiles - None + OutputsInTopCitationPercentiles 2024 5.0 13.157895 @@ -249,7 +246,7 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 6603480302 Vogel-Heuser, Birgit OutputsInTopCitationPercentiles - None + OutputsInTopCitationPercentiles 2020 5.0 10.000000 @@ -260,7 +257,7 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 6603480302 Vogel-Heuser, Birgit OutputsInTopCitationPercentiles - None + OutputsInTopCitationPercentiles 2021 14.0 27.450981 @@ -271,7 +268,7 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 6603480302 Vogel-Heuser, Birgit OutputsInTopCitationPercentiles - None + OutputsInTopCitationPercentiles 2022 8.0 24.242424 @@ -282,7 +279,7 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 6603480302 Vogel-Heuser, Birgit OutputsInTopCitationPercentiles - None + OutputsInTopCitationPercentiles 2023 1.0 2.173913 @@ -301,12 +298,12 @@ You can request only specific metrics to reduce API response size: >>> h_index_only = AuthorMetrics("6602819806", metric_types=["HIndices"]) >>> h_index_only.HIndices - [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='HIndices', - metric_type='h-index', year='all', value=46.0, percentage=None, threshold=None)] + [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='h-index', metric_type='HIndices', year='all', value=46.0, percentage=None, threshold=None)] >>> # Multiple specific metrics - >>> selected_metrics = AuthorMetrics("6602819806", - ... metric_types=["CitationCount", "ScholarlyOutput"]) + >>> selected_metrics = AuthorMetrics("6602819806", metric_types=["CitationCount", "ScholarlyOutput"]) + >>> selected_metrics.CitationCount + [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='CitationCount', metric_type='CitationCount', year='all', value=1126, percentage=None, threshold=None)] Downloaded results are cached to expedite subsequent analyses. This information may become outdated. To refresh the cached results if they exist, set `refresh=True`, or provide an integer that will be interpreted as the maximum allowed number of days since the last modification date. For example, if you want to refresh all cached results older than 100 days, set `refresh=100`. Use `author_metrics.get_cache_file_mdate()` to obtain the date of last modification, and `author_metrics.get_cache_file_age()` to determine the number of days since the last modification. diff --git a/pybliometrics/scival/tests/test_AuthorMetrics.py b/pybliometrics/scival/tests/test_AuthorMetrics.py index ba73150e..e158aa41 100644 --- a/pybliometrics/scival/tests/test_AuthorMetrics.py +++ b/pybliometrics/scival/tests/test_AuthorMetrics.py @@ -43,8 +43,8 @@ def test_all_metrics(): assert len(single_author_all_metrics) == 29 expected_first_metric = MetricData(entity_id=6602819806, entity_name='Algül, Hana', - metric='AcademicCorporateCollaboration', - metric_type='Academic-corporate collaboration', + metric='Academic-corporate collaboration', + metric_type='AcademicCorporateCollaboration', year='all', value=12, percentage=22.64151, @@ -56,8 +56,8 @@ def test_all_metrics(): assert len(single_author_h_index_all_metrics) == 1 expected_h_index = MetricData(entity_id=6602819806, entity_name='Algül, Hana', - metric='HIndices', - metric_type='h-index', + metric='h-index', + metric_type='HIndices', year='all', value=46.0, percentage=None, threshold=None) @@ -68,8 +68,8 @@ def test_all_metrics(): assert len(multiple_authors_all_metrics) == 280 expected_multi_metric = MetricData(entity_id=6603480302, entity_name='Vogel-Heuser, Birgit', - metric='AcademicCorporateCollaboration', - metric_type='Academic-corporate collaboration', + metric='Academic-corporate collaboration', + metric_type='AcademicCorporateCollaboration', year='2024', value=8, percentage=21.052631, diff --git a/pybliometrics/utils/parse_metrics.py b/pybliometrics/utils/parse_metrics.py index 0b75ddb6..5a2d4095 100644 --- a/pybliometrics/utils/parse_metrics.py +++ b/pybliometrics/utils/parse_metrics.py @@ -141,11 +141,15 @@ def process_metric(metric_data: dict, entity_id: int, entity_name: str, metric_t # Process all years uniformly for year in value_data.keys(): + # For nested metrics (like Collaboration), metric is the specific type (collabType) + # For simple metrics (like CitationCount), metric is the metric_type itself + metric_name = collab_type or value_item.get('indexType') or value_item.get('impactType') or metric_type + new = MetricData( entity_id=entity_id, entity_name=entity_name, - metric=metric_type, - metric_type=collab_type or value_item.get('indexType') or value_item.get('impactType'), + metric=metric_name, + metric_type=metric_type, year=str(year), value=value_data.get(year), percentage=percentage_data.get(year), From 1191954c70e9dd9c8a393a78404f818b504a835c Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Wed, 20 Aug 2025 13:49:21 +0200 Subject: [PATCH 05/14] Remove all_metrics --- docs/reference/scival/AuthorMetrics.rst | 179 +++++++++++------- pybliometrics/scival/author_metrics.py | 24 +-- .../scival/tests/test_AuthorMetrics.py | 65 ++----- 3 files changed, 129 insertions(+), 139 deletions(-) diff --git a/docs/reference/scival/AuthorMetrics.rst b/docs/reference/scival/AuthorMetrics.rst index b71f68a2..cfb46a7e 100644 --- a/docs/reference/scival/AuthorMetrics.rst +++ b/docs/reference/scival/AuthorMetrics.rst @@ -72,18 +72,17 @@ Each metric property returns a list of `MetricData` namedtuples with the structu .. note:: **Unified Data Structure**: AuthorMetrics uses a unified `MetricData` structure with `entity_id` and `entity_name` fields. For authors, these fields contain the author ID and author name respectively. This structure is compatible with `InstitutionMetrics` and other SciVal metric classes, enabling consistent data analysis across different entity types. -**Getting All Metrics at Once** +**Concatenating Metrics** -You can retrieve all available metrics in a single list using the `all_metrics` property: +Metrics can be concatenated and converted into a pandas DataFrame for easier analysis. .. code-block:: python - >>> all_data = author_metrics.all_metrics - >>> len(all_data) - 29 - >>> # Convert to pandas DataFrame for analysis >>> import pandas as pd - >>> df = pd.DataFrame(all_data) + >>> collab_data = [] + >>> collab_data.extend(author_metrics.Collaboration) + >>> collab_data.extend(author_metrics.CollaborationImpact) + >>> df = pd.DataFrame(collab_data) >>> df.head() @@ -125,56 +124,89 @@ You can retrieve all available metrics in a single list using the `all_metrics` 0 6602819806 Algül, Hana - Academic-corporate collaboration - AcademicCorporateCollaboration + Institutional collaboration + Collaboration all - 12.000000 - 22.64151 - NaN + 6.000000 + 11.32 + None 1 6602819806 Algül, Hana - No academic-corporate collaboration - AcademicCorporateCollaboration + International collaboration + Collaboration all - 41.000000 - 77.35849 - NaN + 26.000000 + 49.06 + None 2 6602819806 Algül, Hana - Academic-corporate collaboration - AcademicCorporateCollaborationImpact + National collaboration + Collaboration all - 43.166668 - NaN - NaN + 21.000000 + 39.62 + None 3 6602819806 Algül, Hana - No academic-corporate collaboration - AcademicCorporateCollaborationImpact + Single authorship + Collaboration all - 14.682927 - NaN - NaN + 0.000000 + 0.00 + None 4 6602819806 Algül, Hana Institutional collaboration - Collaboration + CollaborationImpact all - 6.000000 - 11.32000 + 3.500000 + NaN + None + + + 5 + 6602819806 + Algül, Hana + International collaboration + CollaborationImpact + all + 28.461538 + NaN + None + + + 6 + 6602819806 + Algül, Hana + National collaboration + CollaborationImpact + all + 17.095238 + NaN + None + + + 7 + 6602819806 + Algül, Hana + Single authorship + CollaborationImpact + all + 0.000000 NaN + None @@ -192,8 +224,9 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh AuthorMetrics for 2 author(s): - Wolff, Klaus Dietrich (ID: 7201667143) - Vogel-Heuser, Birgit (ID: 6603480302) - >>> df = pd.DataFrame(multi_authors.all_metrics) - >>> df.tail() + >>> # Create a DataFrame from the Collaboration metrics + >>> df_multi = pd.DataFrame(multi_authors.Collaboration) + >>> df_multi.head() .. raw:: html @@ -215,6 +248,7 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh font-size: 12px; } +
@@ -231,63 +265,64 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh - - - - - + + + + + - - - + + + - - - - - + + + + + - - - + + + - - - - - + + + + + - - - + + + - - - - - + + + + + - - - + + + - - - - - + + + + + - - - + + +
2756603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesOutputsInTopCitationPercentiles07201667143Wolff, Klaus DietrichInstitutional collaborationCollaboration 20245.013.15789525.0330.000002None
2766603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesOutputsInTopCitationPercentiles17201667143Wolff, Klaus DietrichInstitutional collaborationCollaboration 20205.010.00000025.0316.666668None
2776603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesOutputsInTopCitationPercentiles27201667143Wolff, Klaus DietrichInstitutional collaborationCollaboration 202114.027.45098125.0225.000000None
2786603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesOutputsInTopCitationPercentiles37201667143Wolff, Klaus DietrichInstitutional collaborationCollaboration 20228.024.24242425.000.000000None
2796603480302Vogel-Heuser, BirgitOutputsInTopCitationPercentilesOutputsInTopCitationPercentiles47201667143Wolff, Klaus DietrichInstitutional collaborationCollaboration 20231.02.17391325.0430.769232None
+ **Filtering Specific Metrics** diff --git a/pybliometrics/scival/author_metrics.py b/pybliometrics/scival/author_metrics.py index 1ab54775..f5e1b687 100644 --- a/pybliometrics/scival/author_metrics.py +++ b/pybliometrics/scival/author_metrics.py @@ -24,25 +24,6 @@ def AcademicCorporateCollaborationImpact(self) -> Optional[list]: """ return extract_metric_data(self._json, 'AcademicCorporateCollaborationImpact', self._by_year, "author") - @property - def all_metrics(self) -> Optional[list]: - """Get all available metrics concatenated into a single list. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) - """ - all_metrics = [] - - # List of all metric properties - if self._metric_types: - metric_properties = self._metric_types.split(",") - - for prop_name in metric_properties: - metrics = getattr(self, prop_name) - if metrics: - all_metrics.extend(metrics) - - return all_metrics or None - @property def authors(self) -> Optional[list]: """A list of namedtuples representing authors and their basic info @@ -177,9 +158,8 @@ def __init__(self, Note: All metric properties return lists of MetricData namedtuples with unified structure: `(entity_id, entity_name, metric, metric_type, - year, value, percentage, threshold)`. Use the `all_metrics` property - to get all metrics concatenated into a single list for easy data - manipulation and analysis. + year, value, percentage, threshold)` which enable concatenation + of results from different metrics. """ self._view = '' self._refresh = refresh diff --git a/pybliometrics/scival/tests/test_AuthorMetrics.py b/pybliometrics/scival/tests/test_AuthorMetrics.py index e158aa41..38f41331 100644 --- a/pybliometrics/scival/tests/test_AuthorMetrics.py +++ b/pybliometrics/scival/tests/test_AuthorMetrics.py @@ -11,6 +11,10 @@ multiple_authors_all = AuthorMetrics([7201667143, 6603480302], by_year=True, refresh=30) empty_metrics = AuthorMetrics("0000000000") +MetricData = namedtuple('MetricData', + 'entity_id entity_name metric metric_type year value percentage threshold', + defaults=(None, None, None, None, "all", None, None, None)) + def test_academic_corporate_collaboration(): """Test AcademicCorporateCollaboration property for all test cases.""" @@ -18,9 +22,25 @@ def test_academic_corporate_collaboration(): assert has_all_fields(result[0]) assert single_author_h_index.AcademicCorporateCollaboration is None + assert result[0].entity_id == 6602819806 + assert result[0].entity_name == 'Algül, Hana' + assert result[0].metric == 'Academic-corporate collaboration' + assert result[0].metric_type == 'AcademicCorporateCollaboration' + assert result[0].year == 'all' + assert result[0].value >= 12 + assert result[0].percentage >= 22 + assert result[0].threshold is None result_multi = multiple_authors_all.AcademicCorporateCollaboration assert has_all_fields(result_multi[0]) + assert result_multi[0].entity_id == 7201667143 + assert result_multi[0].entity_name == 'Wolff, Klaus Dietrich' + assert result_multi[0].metric == 'Academic-corporate collaboration' + assert result_multi[0].metric_type == 'AcademicCorporateCollaboration' + assert result_multi[0].year >= '2024' + assert result_multi[0].value >= 0 + assert result_multi[0].percentage >= 0 + assert result_multi[0].threshold is None def test_academic_corporate_collaboration_impact(): @@ -33,50 +53,6 @@ def test_academic_corporate_collaboration_impact(): assert has_all_fields(result_multi[0]) -def test_all_metrics(): - """Test all_metrics property for all test cases.""" - MetricData = namedtuple('MetricData', - 'entity_id entity_name metric metric_type year value percentage threshold', - defaults=(None, None, None, None, "all", None, None, None)) - - single_author_all_metrics = single_author_all.all_metrics - assert len(single_author_all_metrics) == 29 - expected_first_metric = MetricData(entity_id=6602819806, - entity_name='Algül, Hana', - metric='Academic-corporate collaboration', - metric_type='AcademicCorporateCollaboration', - year='all', - value=12, - percentage=22.64151, - threshold=None) - assert expected_first_metric == single_author_all_metrics[0] - - - single_author_h_index_all_metrics = single_author_h_index.all_metrics - assert len(single_author_h_index_all_metrics) == 1 - expected_h_index = MetricData(entity_id=6602819806, - entity_name='Algül, Hana', - metric='h-index', - metric_type='HIndices', - year='all', value=46.0, - percentage=None, - threshold=None) - assert expected_h_index == single_author_h_index_all_metrics[0] - - - multiple_authors_all_metrics = multiple_authors_all.all_metrics - assert len(multiple_authors_all_metrics) == 280 - expected_multi_metric = MetricData(entity_id=6603480302, - entity_name='Vogel-Heuser, Birgit', - metric='Academic-corporate collaboration', - metric_type='AcademicCorporateCollaboration', - year='2024', - value=8, - percentage=21.052631, - threshold=None) - assert expected_multi_metric == multiple_authors_all_metrics[10] - - def test_authors(): """Test the authors property for all test cases with actual values.""" # Test single author with all metrics @@ -167,7 +143,6 @@ def test_collaboration_impact(): def test_empty_metrics(): """Test handling of empty metric_types.""" - assert empty_metrics.all_metrics is None assert empty_metrics.authors is None assert empty_metrics.CitationCount is None assert empty_metrics.CitationsPerPublication is None From 22fd32e2ea8330160cae99091dfdd5d400b46cc8 Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Thu, 21 Aug 2025 09:16:24 +0200 Subject: [PATCH 06/14] Change examples in documentation --- docs/reference/scival/AuthorMetrics.rst | 120 ++++++++++++------------ 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/docs/reference/scival/AuthorMetrics.rst b/docs/reference/scival/AuthorMetrics.rst index cfb46a7e..f5469519 100644 --- a/docs/reference/scival/AuthorMetrics.rst +++ b/docs/reference/scival/AuthorMetrics.rst @@ -25,7 +25,7 @@ You initialize the class with one or more Scopus Author IDs. The argument can be >>> from pybliometrics.scival import AuthorMetrics, init >>> init() - >>> author_metrics = AuthorMetrics("6602819806") + >>> author_metrics = AuthorMetrics("57209617104") You can obtain basic information just by printing the object: @@ -33,14 +33,14 @@ You can obtain basic information just by printing the object: >>> print(author_metrics) AuthorMetrics for 1 author(s): - - Algül, Hana (ID: 6602819806) + - Rose, Michael E. (ID: 57209617104) There are many properties available that provide different types of metrics. You can explore the available authors: .. code-block:: python >>> author_metrics.authors - [Author(id=6602819806, name='Algül, Hana', uri='Author/6602819806')] + [Author(id=57209617104, name='Rose, Michael E.', uri='Author/57209617104')] **Individual Metric Properties** @@ -49,10 +49,10 @@ Each metric property returns a list of `MetricData` namedtuples with the structu .. code-block:: python >>> author_metrics.CitationCount - [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='CitationCount', metric_type='CitationCount', year='all', value=1120, percentage=None, threshold=None)] + [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='CitationCount', metric_type='CitationCount', year='all', value=92, percentage=None, threshold=None)] >>> author_metrics.HIndices - [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='h-index', metric_type='HIndices', year='all', value=46.0, percentage=None, threshold=None)] + [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='h-index', metric_type='HIndices', year='all', value=5.0, percentage=None, threshold=None)] **Available Metric Properties**: @@ -122,89 +122,89 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana 0 - 6602819806 - Algül, Hana + 57209617104 + Rose, Michael E. Institutional collaboration Collaboration all - 6.000000 - 11.32 + 1.000000 + 11.11 None 1 - 6602819806 - Algül, Hana + 57209617104 + Rose, Michael E. International collaboration Collaboration all - 26.000000 - 49.06 + 7.000000 + 77.78 None 2 - 6602819806 - Algül, Hana + 57209617104 + Rose, Michael E. National collaboration Collaboration all - 21.000000 - 39.62 + 0.000000 + 0.00 None 3 - 6602819806 - Algül, Hana + 57209617104 + Rose, Michael E. Single authorship Collaboration all - 0.000000 - 0.00 + 1.000000 + 11.11 None 4 - 6602819806 - Algül, Hana + 57209617104 + Rose, Michael E. Institutional collaboration CollaborationImpact all - 3.500000 + 0.000000 NaN None 5 - 6602819806 - Algül, Hana + 57209617104 + Rose, Michael E. International collaboration CollaborationImpact all - 28.461538 + 12.571428 NaN None 6 - 6602819806 - Algül, Hana + 57209617104 + Rose, Michael E. National collaboration CollaborationImpact all - 17.095238 + 0.000000 NaN None 7 - 6602819806 - Algül, Hana + 57209617104 + Rose, Michael E. Single authorship CollaborationImpact all - 0.000000 + 4.000000 NaN None @@ -219,11 +219,11 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh .. code-block:: python - >>> multi_authors = AuthorMetrics([7201667143, 6603480302], by_year=True) + >>> multi_authors = AuthorMetrics([57209617104, 7004212771], by_year=True) >>> print(multi_authors) AuthorMetrics for 2 author(s): - - Wolff, Klaus Dietrich (ID: 7201667143) - - Vogel-Heuser, Birgit (ID: 6603480302) + - Kitchin, John R. (ID: 7004212771) + - Rose, Michael E. (ID: 57209617104) >>> # Create a DataFrame from the Collaboration metrics >>> df_multi = pd.DataFrame(multi_authors.Collaboration) >>> df_multi.head() @@ -266,57 +266,57 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 0 - 7201667143 - Wolff, Klaus Dietrich + 7004212771 + Kitchin, John R. Institutional collaboration Collaboration 2024 - 3 - 30.000002 + 6 + 37.500000 None 1 - 7201667143 - Wolff, Klaus Dietrich + 7004212771 + Kitchin, John R. Institutional collaboration Collaboration 2020 - 3 - 16.666668 + 1 + 50.000000 None 2 - 7201667143 - Wolff, Klaus Dietrich + 7004212771 + Kitchin, John R. Institutional collaboration Collaboration 2021 - 2 - 25.000000 + 1 + 33.333336 None 3 - 7201667143 - Wolff, Klaus Dietrich + 7004212771 + Kitchin, John R. Institutional collaboration Collaboration 2022 - 0 - 0.000000 + 6 + 66.666670 None 4 - 7201667143 - Wolff, Klaus Dietrich + 7004212771 + Kitchin, John R. Institutional collaboration Collaboration 2023 - 4 - 30.769232 + 5 + 55.555557 None @@ -331,14 +331,14 @@ You can request only specific metrics to reduce API response size: .. code-block:: python - >>> h_index_only = AuthorMetrics("6602819806", metric_types=["HIndices"]) + >>> h_index_only = AuthorMetrics("57209617104", metric_types=["HIndices"]) >>> h_index_only.HIndices - [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='h-index', metric_type='HIndices', year='all', value=46.0, percentage=None, threshold=None)] + [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='h-index', metric_type='HIndices', year='all', value=5.0, percentage=None, threshold=None)] >>> # Multiple specific metrics - >>> selected_metrics = AuthorMetrics("6602819806", metric_types=["CitationCount", "ScholarlyOutput"]) + >>> selected_metrics = AuthorMetrics("57209617104", metric_types=["CitationCount", "ScholarlyOutput"]) >>> selected_metrics.CitationCount - [MetricData(entity_id=6602819806, entity_name='Algül, Hana', metric='CitationCount', metric_type='CitationCount', year='all', value=1126, percentage=None, threshold=None)] + [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='CitationCount', metric_type='CitationCount', year='all', value=92, percentage=None, threshold=None)] Downloaded results are cached to expedite subsequent analyses. This information may become outdated. To refresh the cached results if they exist, set `refresh=True`, or provide an integer that will be interpreted as the maximum allowed number of days since the last modification date. For example, if you want to refresh all cached results older than 100 days, set `refresh=100`. Use `author_metrics.get_cache_file_mdate()` to obtain the date of last modification, and `author_metrics.get_cache_file_age()` to determine the number of days since the last modification. From eb238bfc01facecd24525461b6cbb53a6a69fc09 Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Tue, 26 Aug 2025 10:45:29 +0200 Subject: [PATCH 07/14] Requested changes: Typo in constant, update docstrings, remove metric_type --- docs/reference/scival/AuthorMetrics.rst | 25 ++---- pybliometrics/scival/author_metrics.py | 77 +++++++++---------- .../scival/tests/test_AuthorMetrics.py | 8 +- pybliometrics/superclasses/retrieval.py | 4 +- pybliometrics/utils/constants.py | 2 +- pybliometrics/utils/parse_metrics.py | 5 +- 6 files changed, 51 insertions(+), 70 deletions(-) diff --git a/docs/reference/scival/AuthorMetrics.rst b/docs/reference/scival/AuthorMetrics.rst index f5469519..676744d6 100644 --- a/docs/reference/scival/AuthorMetrics.rst +++ b/docs/reference/scival/AuthorMetrics.rst @@ -44,15 +44,15 @@ There are many properties available that provide different types of metrics. You **Individual Metric Properties** -Each metric property returns a list of `MetricData` namedtuples with the structure: `(entity_id, entity_name, metric, metric_type, year, value, percentage, threshold)` where `entity_id` and `entity_name` refer to the author. +Each metric property returns a list of `MetricData` namedtuples with the structure: `(entity_id, entity_name, metric, year, value, percentage, threshold)` where `entity_id` and `entity_name` refer to the author. .. code-block:: python >>> author_metrics.CitationCount - [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='CitationCount', metric_type='CitationCount', year='all', value=92, percentage=None, threshold=None)] + [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='CitationCount', year='all', value=92, percentage=None, threshold=None)] >>> author_metrics.HIndices - [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='h-index', metric_type='HIndices', year='all', value=5.0, percentage=None, threshold=None)] + [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='h-index', year='all', value=5.0, percentage=None, threshold=None)] **Available Metric Properties**: @@ -112,7 +112,6 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana entity_id entity_name metric - metric_type year value percentage @@ -125,7 +124,6 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana 57209617104 Rose, Michael E. Institutional collaboration - Collaboration all 1.000000 11.11 @@ -136,7 +134,6 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana 57209617104 Rose, Michael E. International collaboration - Collaboration all 7.000000 77.78 @@ -147,7 +144,6 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana 57209617104 Rose, Michael E. National collaboration - Collaboration all 0.000000 0.00 @@ -158,7 +154,6 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana 57209617104 Rose, Michael E. Single authorship - Collaboration all 1.000000 11.11 @@ -169,7 +164,6 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana 57209617104 Rose, Michael E. Institutional collaboration - CollaborationImpact all 0.000000 NaN @@ -180,7 +174,6 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana 57209617104 Rose, Michael E. International collaboration - CollaborationImpact all 12.571428 NaN @@ -191,7 +184,6 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana 57209617104 Rose, Michael E. National collaboration - CollaborationImpact all 0.000000 NaN @@ -202,7 +194,6 @@ Metrics can be concatenated and converted into a pandas DataFrame for easier ana 57209617104 Rose, Michael E. Single authorship - CollaborationImpact all 4.000000 NaN @@ -256,7 +247,6 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh entity_id entity_name metric - metric_type year value percentage @@ -269,7 +259,6 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 7004212771 Kitchin, John R. Institutional collaboration - Collaboration 2024 6 37.500000 @@ -280,7 +269,6 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 7004212771 Kitchin, John R. Institutional collaboration - Collaboration 2020 1 50.000000 @@ -291,7 +279,6 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 7004212771 Kitchin, John R. Institutional collaboration - Collaboration 2021 1 33.333336 @@ -302,7 +289,6 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 7004212771 Kitchin, John R. Institutional collaboration - Collaboration 2022 6 66.666670 @@ -313,7 +299,6 @@ You can analyze multiple authors simultaneously. Furthermore, you can specify wh 7004212771 Kitchin, John R. Institutional collaboration - Collaboration 2023 5 55.555557 @@ -333,12 +318,12 @@ You can request only specific metrics to reduce API response size: >>> h_index_only = AuthorMetrics("57209617104", metric_types=["HIndices"]) >>> h_index_only.HIndices - [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='h-index', metric_type='HIndices', year='all', value=5.0, percentage=None, threshold=None)] + [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='h-index', year='all', value=5.0, percentage=None, threshold=None)] >>> # Multiple specific metrics >>> selected_metrics = AuthorMetrics("57209617104", metric_types=["CitationCount", "ScholarlyOutput"]) >>> selected_metrics.CitationCount - [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='CitationCount', metric_type='CitationCount', year='all', value=92, percentage=None, threshold=None)] + [MetricData(entity_id=57209617104, entity_name='Rose, Michael E.', metric='CitationCount', year='all', value=92, percentage=None, threshold=None)] Downloaded results are cached to expedite subsequent analyses. This information may become outdated. To refresh the cached results if they exist, set `refresh=True`, or provide an integer that will be interpreted as the maximum allowed number of days since the last modification date. For example, if you want to refresh all cached results older than 100 days, set `refresh=100`. Use `author_metrics.get_cache_file_mdate()` to obtain the date of last modification, and `author_metrics.get_cache_file_age()` to determine the number of days since the last modification. diff --git a/pybliometrics/scival/author_metrics.py b/pybliometrics/scival/author_metrics.py index f5e1b687..2c12123f 100644 --- a/pybliometrics/scival/author_metrics.py +++ b/pybliometrics/scival/author_metrics.py @@ -4,28 +4,27 @@ from pybliometrics.superclasses import Retrieval from pybliometrics.utils import make_int_if_possible from pybliometrics.utils.constants import SCIVAL_METRICS -from pybliometrics.utils.parse_metrics import extract_metric_data - +from pybliometrics.utils.parse_metrics import extract_metric_data, MetricData class AuthorMetrics(Retrieval): @property - def AcademicCorporateCollaboration(self) -> Optional[list]: + def AcademicCorporateCollaboration(self) -> Optional[list[MetricData]]: """Academic corporate collaboration metrics for each author. Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'AcademicCorporateCollaboration', self._by_year, "author") @property - def AcademicCorporateCollaborationImpact(self) -> Optional[list]: + def AcademicCorporateCollaborationImpact(self) -> Optional[list[MetricData]]: """Academic corporate collaboration impact metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'AcademicCorporateCollaborationImpact', self._by_year, "author") @property - def authors(self) -> Optional[list]: + def authors(self) -> Optional[list[MetricData]]: """A list of namedtuples representing authors and their basic info in the form `(id, name, uri)`. """ @@ -49,82 +48,82 @@ def authors(self) -> Optional[list]: return out or None @property - def CitationCount(self) -> Optional[list]: + def CitationCount(self) -> Optional[list[MetricData]]: """Citation count metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'CitationCount', self._by_year, "author") @property - def CitationsPerPublication(self) -> Optional[list]: + def CitationsPerPublication(self) -> Optional[list[MetricData]]: """Citations per publication metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'CitationsPerPublication', self._by_year, "author") @property - def CitedPublications(self) -> Optional[list]: + def CitedPublications(self) -> Optional[list[MetricData]]: """Cited publications metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'CitedPublications', self._by_year, "author") @property - def Collaboration(self) -> Optional[list]: + def Collaboration(self) -> Optional[list[MetricData]]: """Collaboration metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'Collaboration', self._by_year, "author") @property - def CollaborationImpact(self) -> Optional[list]: + def CollaborationImpact(self) -> Optional[list[MetricData]]: """Collaboration impact metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'CollaborationImpact', self._by_year, "author") @property - def FieldWeightedCitationImpact(self) -> Optional[list]: + def FieldWeightedCitationImpact(self) -> Optional[list[MetricData]]: """Field weighted citation impact metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'FieldWeightedCitationImpact', self._by_year, "author") @property - def HIndices(self) -> Optional[list]: + def HIndices(self) -> Optional[list[MetricData]]: """H-indices metrics for each author (only available when by_year=False). - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'HIndices', self._by_year, "author") @property - def OutputsInTopCitationPercentiles(self) -> Optional[list]: + def OutputsInTopCitationPercentiles(self) -> Optional[list[MetricData]]: """Outputs in top citation percentiles metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'OutputsInTopCitationPercentiles', self._by_year, "author") @property - def PublicationsInTopJournalPercentiles(self) -> Optional[list]: + def PublicationsInTopJournalPercentiles(self) -> Optional[list[MetricData]]: """Publications in top journal percentiles metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'PublicationsInTopJournalPercentiles', self._by_year, "author") @property - def ScholarlyOutput(self) -> Optional[list]: + def ScholarlyOutput(self) -> Optional[list[MetricData]]: """Scholarly output metrics for each author. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'ScholarlyOutput', self._by_year, "author") @@ -157,7 +156,7 @@ def __init__(self, Note: All metric properties return lists of MetricData namedtuples with - unified structure: `(entity_id, entity_name, metric, metric_type, + structure: `(entity_id, entity_name, metric, year, value, percentage, threshold)` which enable concatenation of results from different metrics. """ diff --git a/pybliometrics/scival/tests/test_AuthorMetrics.py b/pybliometrics/scival/tests/test_AuthorMetrics.py index 38f41331..ff7ffff0 100644 --- a/pybliometrics/scival/tests/test_AuthorMetrics.py +++ b/pybliometrics/scival/tests/test_AuthorMetrics.py @@ -12,8 +12,8 @@ empty_metrics = AuthorMetrics("0000000000") MetricData = namedtuple('MetricData', - 'entity_id entity_name metric metric_type year value percentage threshold', - defaults=(None, None, None, None, "all", None, None, None)) + 'entity_id entity_name metric year value percentage threshold', + defaults=(None, None, None, "all", None, None, None)) def test_academic_corporate_collaboration(): @@ -25,7 +25,6 @@ def test_academic_corporate_collaboration(): assert result[0].entity_id == 6602819806 assert result[0].entity_name == 'Algül, Hana' assert result[0].metric == 'Academic-corporate collaboration' - assert result[0].metric_type == 'AcademicCorporateCollaboration' assert result[0].year == 'all' assert result[0].value >= 12 assert result[0].percentage >= 22 @@ -36,7 +35,6 @@ def test_academic_corporate_collaboration(): assert result_multi[0].entity_id == 7201667143 assert result_multi[0].entity_name == 'Wolff, Klaus Dietrich' assert result_multi[0].metric == 'Academic-corporate collaboration' - assert result_multi[0].metric_type == 'AcademicCorporateCollaboration' assert result_multi[0].year >= '2024' assert result_multi[0].value >= 0 assert result_multi[0].percentage >= 0 @@ -87,7 +85,7 @@ def test_authors(): def has_all_fields(metric_data): """Check if the metric data has all required fields.""" - required_fields = ['entity_id', 'entity_name', 'metric', 'metric_type', 'year', 'value', 'percentage', 'threshold'] + required_fields = ['entity_id', 'entity_name', 'metric', 'year', 'value', 'percentage', 'threshold'] return all(hasattr(metric_data, field) for field in required_fields) diff --git a/pybliometrics/superclasses/retrieval.py b/pybliometrics/superclasses/retrieval.py index 620c9a29..fe8479c2 100644 --- a/pybliometrics/superclasses/retrieval.py +++ b/pybliometrics/superclasses/retrieval.py @@ -5,7 +5,7 @@ from typing import Optional, Union from pybliometrics.superclasses import Base -from pybliometrics.utils import APIS_NO_ID_IN_ULR, APIS_WITH_ID_TYPE, get_config, URLS +from pybliometrics.utils import APIS_NO_ID_IN_URL, APIS_WITH_ID_TYPE, get_config, URLS class Retrieval(Base): @@ -40,7 +40,7 @@ def __init__(self, if self._date: stem += "-" + self._date # For APIs that don't use ID in URL, hash the parameters for unique cache filename - elif api in APIS_NO_ID_IN_ULR: + elif api in APIS_NO_ID_IN_URL: params_str = str(sorted(kwds.items())) stem = hashlib.md5(params_str.encode()).hexdigest() else: diff --git a/pybliometrics/utils/constants.py b/pybliometrics/utils/constants.py index 9643c31a..5d2364e5 100644 --- a/pybliometrics/utils/constants.py +++ b/pybliometrics/utils/constants.py @@ -124,7 +124,7 @@ "ObjectRetrieval"} # APIs that do not require an ID in the URL -APIS_NO_ID_IN_ULR = {"AuthorMetrics"} +APIS_NO_ID_IN_URL = {"AuthorMetrics"} # Item per page limits for all classes COUNTS = { diff --git a/pybliometrics/utils/parse_metrics.py b/pybliometrics/utils/parse_metrics.py index 5a2d4095..77663334 100644 --- a/pybliometrics/utils/parse_metrics.py +++ b/pybliometrics/utils/parse_metrics.py @@ -5,8 +5,8 @@ # Global namedtuple for all metric data with default values MetricData = namedtuple('MetricData', - 'entity_id entity_name metric metric_type year value percentage threshold', - defaults=(None, None, None, None, "all", None, None, None)) + 'entity_id entity_name metric year value percentage threshold', + defaults=(None, None, None, "all", None, None, None)) def extract_metric_data(json_data, metric_type: str, by_year: bool, entity_type: str): @@ -149,7 +149,6 @@ def process_metric(metric_data: dict, entity_id: int, entity_name: str, metric_t entity_id=entity_id, entity_name=entity_name, metric=metric_name, - metric_type=metric_type, year=str(year), value=value_data.get(year), percentage=percentage_data.get(year), From 2703e92ac59adf8f0bb7249d138fa2fbd8e8abb8 Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Tue, 26 Aug 2025 10:47:52 +0200 Subject: [PATCH 08/14] Docstring update --- pybliometrics/scival/author_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybliometrics/scival/author_metrics.py b/pybliometrics/scival/author_metrics.py index 2c12123f..02761afe 100644 --- a/pybliometrics/scival/author_metrics.py +++ b/pybliometrics/scival/author_metrics.py @@ -10,7 +10,7 @@ class AuthorMetrics(Retrieval): @property def AcademicCorporateCollaboration(self) -> Optional[list[MetricData]]: """Academic corporate collaboration metrics for each author. - Returns list of MetricData namedtuples with unified structure: + Returns list of MetricData namedtuples with structure: (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'AcademicCorporateCollaboration', self._by_year, "author") From 8e9f09c2c1c6c3520b960256460ed1c825fd800a Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Tue, 19 Aug 2025 11:53:56 +0200 Subject: [PATCH 09/14] Implementation of InstitutuionMetrics --- docs/reference.rst | 1 + docs/reference/scival/InstitutionMetrics.rst | 301 ++++++++++++++++++ pybliometrics/scival/__init__.py | 1 + pybliometrics/scival/institution_metrics.py | 213 +++++++++++++ .../scival/tests/test_InstitutionMetrics.py | 221 +++++++++++++ pybliometrics/utils/constants.py | 21 +- 6 files changed, 757 insertions(+), 1 deletion(-) create mode 100644 docs/reference/scival/InstitutionMetrics.rst create mode 100644 pybliometrics/scival/institution_metrics.py create mode 100644 pybliometrics/scival/tests/test_InstitutionMetrics.py diff --git a/docs/reference.rst b/docs/reference.rst index 459d5ec8..e28a1f41 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -81,6 +81,7 @@ SciVal :maxdepth: 1 reference/scival/AuthorMetrics.rst + reference/scival/InstitutionMetrics.rst reference/scival/PublicationLookup.rst diff --git a/docs/reference/scival/InstitutionMetrics.rst b/docs/reference/scival/InstitutionMetrics.rst new file mode 100644 index 00000000..f646c1b8 --- /dev/null +++ b/docs/reference/scival/InstitutionMetrics.rst @@ -0,0 +1,301 @@ +pybliometrics.scival.InstitutionMetrics +======================================= + +`InstitutionMetrics()` implements the `SciVal Institution Metrics API `_. + +It accepts one or more SciVal Institution IDs as the main argument and retrieves various performance metrics for the specified institutions. + +.. currentmodule:: pybliometrics.scival +.. contents:: Table of Contents + :local: + +Documentation +------------- + +.. autoclass:: InstitutionMetrics + :members: + :inherited-members: + +Examples +-------- + +You initialize the class with one or more SciVal Institution IDs. The argument can be a single ID, a list of IDs, or a comma-separated string of IDs. + +.. code-block:: python + + >>> import pybliometrics + >>> from pybliometrics.scival import InstitutionMetrics + >>> pybliometrics.scival.init() + >>> institution_metrics = InstitutionMetrics("309021") + +You can obtain basic information just by printing the object: + +.. code-block:: python + + >>> print(institution_metrics) + InstitutionMetrics for 1 institution(s): + - Humboldt University of Berlin (ID: 309021) + +There are many properties available that provide different types of metrics. You can explore the available institutions: + +.. code-block:: python + + >>> institution_metrics.institutions + [Institution(id=309021, name='Humboldt University of Berlin', uri='Institution/309021')] + +**Individual Metric Properties** + +Each metric property returns a list of `MetricData` namedtuples with the structure: `(entity_id, entity_name, metric, metric_type, year, value, percentage, threshold)` where `entity_id` and `entity_name` refer to the institution. + +.. code-block:: python + + >>> institution_metrics.CitationCount + [MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CitationCount', + metric_type=None, year='all', value=368527, percentage=None, threshold=None)] + + >>> institution_metrics.CollaborationImpact + [MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CollaborationImpact', + metric_type='Institutional collaboration', year='all', value=8.610204, percentage=None, threshold=None), + MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CollaborationImpact', + metric_type='International collaboration', year='all', value=22.430689, percentage=None, threshold=None), + MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CollaborationImpact', + metric_type='National collaboration', year='all', value=9.935493, percentage=None, threshold=None), + MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CollaborationImpact', + metric_type='Single authorship', year='all', value=3.187361, percentage=None, threshold=None)] + +**Available Metric Properties**: + +- `AcademicCorporateCollaboration` +- `AcademicCorporateCollaborationImpact` +- `CitationCount` +- `CitationsPerPublication` +- `CitedPublications` +- `Collaboration` +- `CollaborationImpact` +- `FieldWeightedCitationImpact` +- `OutputsInTopCitationPercentiles` +- `PublicationsInTopJournalPercentiles` +- `ScholarlyOutput` + +.. note:: + **Unified Data Structure**: InstitutionMetrics uses a unified `MetricData` structure with `entity_id` and `entity_name` fields. For institutions, these fields contain the institution ID and institution name respectively. This structure is compatible with `AuthorMetrics` and other SciVal metric classes, enabling consistent data analysis across different entity types. + +**Getting All Metrics at Once** + +You can retrieve all available metrics in a single list using the `all_metrics` property: + +.. code-block:: python + + >>> all_data = institution_metrics.all_metrics + >>> len(all_data) + 28 + >>> # Convert to pandas DataFrame for analysis + >>> import pandas as pd + >>> df = pd.DataFrame(all_data) + >>> df.head() + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
entity_identity_namemetricmetric_typeyearvaluepercentagethreshold
0309021Humboldt University of BerlinAcademicCorporateCollaborationAcademic-corporate collaborationall1015.0000004.469594NaN
1309021Humboldt University of BerlinAcademicCorporateCollaborationNo academic-corporate collaborationall21694.00000095.530410NaN
2309021Humboldt University of BerlinAcademicCorporateCollaborationImpactAcademic-corporate collaborationall59.104435NaNNaN
3309021Humboldt University of BerlinAcademicCorporateCollaborationImpactNo academic-corporate collaborationall14.222181NaNNaN
4309021Humboldt University of BerlinCollaborationInstitutional collaborationall980.0000004.320000NaN
+
+ + +**Multiple Institutions** + +You can analyze multiple institutions simultaneously and retrieve metrics `by_year`: + +.. code-block:: python + + >>> multi_institutions = InstitutionMetrics([309050, 309076], by_year=True) + >>> print(multi_institutions) + InstitutionMetrics for 2 institution(s): + - Technical University of Berlin (ID: 309050) + - Heidelberg University  (ID: 309076) + >>> # Get all collaboration metrics for all institutions + >>> df = pd.DataFrame(multi_institutions.all_metrics) + >>> df.head() + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
entity_identity_namemetricmetric_typeyearvaluepercentagethreshold
0309050Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaboration2024282.07.770736NaN
1309050Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaboration2020285.07.740358NaN
2309050Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaboration2021250.06.529120NaN
3309050Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaboration2022249.06.709782NaN
4309050Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaboration2023253.06.693122NaN
+
+ + +Downloaded results are cached to expedite subsequent analyses. This information may become outdated. To refresh the cached results if they exist, set `refresh=True`, or provide an integer that will be interpreted as the maximum allowed number of days since the last modification date. For example, if you want to refresh all cached results older than 100 days, set `refresh=100`. Use `institution_metrics.get_cache_file_mdate()` to obtain the date of last modification, and `institution_metrics.get_cache_file_age()` to determine the number of days since the last modification. diff --git a/pybliometrics/scival/__init__.py b/pybliometrics/scival/__init__.py index 65fc6eac..ca18b715 100644 --- a/pybliometrics/scival/__init__.py +++ b/pybliometrics/scival/__init__.py @@ -1,4 +1,5 @@ from pybliometrics.utils import * from pybliometrics.scival.author_metrics import * +from pybliometrics.scival.institution_metrics import * from pybliometrics.scival.publication_lookup import * diff --git a/pybliometrics/scival/institution_metrics.py b/pybliometrics/scival/institution_metrics.py new file mode 100644 index 00000000..2ea34eb2 --- /dev/null +++ b/pybliometrics/scival/institution_metrics.py @@ -0,0 +1,213 @@ +from collections import namedtuple +from typing import Union, Optional + +from pybliometrics.superclasses import Retrieval +from pybliometrics.utils import make_int_if_possible +from pybliometrics.utils.constants import SCIVAL_METRICS +from pybliometrics.utils.parse_metrics import extract_metric_data + + +class InstitutionMetrics(Retrieval): + @property + def AcademicCorporateCollaboration(self) -> Optional[list]: + """Academic corporate collaboration metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'AcademicCorporateCollaboration', self._by_year, "institution") + + @property + def AcademicCorporateCollaborationImpact(self) -> Optional[list]: + """Academic corporate collaboration impact metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'AcademicCorporateCollaborationImpact', self._by_year, "institution") + + @property + def all_metrics(self) -> Optional[list]: + """Get all available metrics concatenated into a single list. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + all_metrics = [] + + # List of all metric properties + if self._metric_types: + metric_properties = self._metric_types.split(",") + + for prop_name in metric_properties: + metrics = getattr(self, prop_name) + if metrics: + all_metrics.extend(metrics) + + return all_metrics or None + + @property + def institutions(self) -> Optional[list]: + """A list of namedtuples representing institutions and their basic info + in the form `(id, name, uri)`. + """ + out = [] + Institution = namedtuple('Institution', 'id name uri') + + # Handle both dict and direct access to results + if isinstance(self._json, dict): + results = self._json.get('results', []) + else: + results = [] + + for result in results: + institution_data = result.get('institution', {}) + new = Institution( + id=make_int_if_possible(institution_data.get('id')), + name=institution_data.get('name'), + uri=institution_data.get('uri') + ) + out.append(new) + return out or None + + @property + def CitationCount(self) -> Optional[list]: + """Citation count metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'CitationCount', self._by_year, "institution") + + @property + def CitationsPerPublication(self) -> Optional[list]: + """Citations per publication metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'CitationsPerPublication', self._by_year, "institution") + + @property + def CitedPublications(self) -> Optional[list]: + """Cited publications metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'CitedPublications', self._by_year, "institution") + + @property + def Collaboration(self) -> Optional[list]: + """Collaboration metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'Collaboration', self._by_year, "institution") + + @property + def CollaborationImpact(self) -> Optional[list]: + """Collaboration impact metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'CollaborationImpact', self._by_year, "institution") + + @property + def FieldWeightedCitationImpact(self) -> Optional[list]: + """Field weighted citation impact metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'FieldWeightedCitationImpact', self._by_year, "institution") + + @property + def OutputsInTopCitationPercentiles(self) -> Optional[list]: + """Outputs in top citation percentiles metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'OutputsInTopCitationPercentiles', self._by_year, "institution") + + @property + def PublicationsInTopJournalPercentiles(self) -> Optional[list]: + """Publications in top journal percentiles metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'PublicationsInTopJournalPercentiles', self._by_year, "institution") + + @property + def ScholarlyOutput(self) -> Optional[list]: + """Scholarly output metrics for each institution. + Returns list of MetricData namedtuples with unified structure: + (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + """ + return extract_metric_data(self._json, 'ScholarlyOutput', self._by_year, "institution") + + def __init__(self, + institution_ids: Union[str, list], + metric_types: Optional[Union[str, list]] = None, + by_year: bool = False, + refresh: Union[bool, int] = False, + **kwds: str + ) -> None: + """Interaction with the SciVal Institution Metrics API. + + :param institution_ids: SciVal Institution ID(s). Can be a single ID or comma-separated + string of IDs, or a list of IDs (e.g. `[309054, 309086]`). + :param metric_types: Metric type(s) to retrieve. Can be a single metric + or comma-separated string, or a list. Available metrics are: + AcademicCorporateCollaboration, AcademicCorporateCollaborationImpact, + CitationCount, CitedPublications, Collaboration, CollaborationImpact, + FieldWeightedCitationImpact, ScholarlyOutput, + PublicationsInTopJournalPercentiles, OutputsInTopCitationPercentiles. + If not provided, all metrics are retrieved. + :param by_year: Whether to retrieve metrics broken down by year. + :param refresh: Whether to refresh the cached file if it exists or not. + If int is passed, cached file will be refreshed if the + number of days since last modification exceeds that value. + :param kwds: Keywords passed on as query parameters. Must contain + fields and values mentioned in the API specification at + https://dev.elsevier.com/documentation/SciValInstitutionAPI.wadl. + + Note: + All metric properties return lists of MetricData namedtuples with + unified structure: `(entity_id, entity_name, metric, metric_type, + year, value, percentage, threshold)`. Use the `all_metrics` property + to get all metrics concatenated into a single list for easy data + manipulation and analysis. + """ + self._view = '' + self._refresh = refresh + self._by_year = by_year + + # Handle institutions parameter + if isinstance(institution_ids, list): + institution_ids = ",".join(str(i) for i in institution_ids) + + # Handle metric_types parameter - use all metrics by default + if metric_types is None: + metric_types = SCIVAL_METRICS["InstitutionMetrics"]["byYear"] + + if isinstance(metric_types, list): + metric_types = ",".join(metric_types) + + self._metric_types = metric_types + + # Set up parameters for the API call + params = { + 'institutionIds': institution_ids, + 'metricTypes': metric_types, + 'byYear': str(by_year).lower(), + **kwds + } + + Retrieval.__init__(self, **params, **kwds) + + def __str__(self): + """Return pretty text version of the institution metrics.""" + institutions = self.institutions or [] + institution_count = len(institutions) + + if institution_count == 0: + return "No institutions found" + else: + s = f"InstitutionMetrics for {institution_count} institution(s):" + for institution in institutions: + s += f"\n- {institution.name} (ID: {institution.id})" + return s diff --git a/pybliometrics/scival/tests/test_InstitutionMetrics.py b/pybliometrics/scival/tests/test_InstitutionMetrics.py new file mode 100644 index 00000000..d477840a --- /dev/null +++ b/pybliometrics/scival/tests/test_InstitutionMetrics.py @@ -0,0 +1,221 @@ +from collections import namedtuple + +from pybliometrics.scival.institution_metrics import InstitutionMetrics +from pybliometrics.utils.startup import init + +init() + +# Test cases with actual institution IDs from the response examples +single_institution_all = InstitutionMetrics("505023", by_year=False, refresh=30) +multiple_institutions_all = InstitutionMetrics([309054, 309086], by_year=True, refresh=30) +empty_metrics = InstitutionMetrics("0000000") + + +def test_academic_corporate_collaboration(): + """Test AcademicCorporateCollaboration property for all test cases.""" + result = single_institution_all.AcademicCorporateCollaboration + + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.AcademicCorporateCollaboration + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_academic_corporate_collaboration_impact(): + """Test AcademicCorporateCollaborationImpact property for all test cases.""" + result = single_institution_all.AcademicCorporateCollaborationImpact + if result and len(result) > 0: + assert has_all_fields(result[0]) + + +def test_all_metrics(): + """Test all_metrics property for all test cases.""" + MetricData = namedtuple('MetricData', + 'entity_id entity_name metric metric_type year value percentage threshold', + defaults=(None, None, None, None, "all", None, None, None)) + + result = single_institution_all.all_metrics + expected_result_0 =MetricData(entity_id=505023, + entity_name='Universidad Nacional Autónoma de México', + metric='AcademicCorporateCollaboration', + metric_type='Academic-corporate collaboration', + year='all', + value=951, + percentage=2.31415, + threshold=None) + assert result[0] == expected_result_0 + assert len(result) >= 28 + + result_multi = multiple_institutions_all.all_metrics + expected_result_multi_last = MetricData(entity_id=309086, + entity_name='Ludwig Maximilian University of Munich', + metric='OutputsInTopCitationPercentiles', + metric_type=None, + year='2023', + value=3792, + percentage=38.50528, + threshold=25) + assert result_multi[-1] == expected_result_multi_last + assert len(result_multi) >= 280 + + +def test_institutions(): + """Test the institutions property for all test cases.""" + Institution = namedtuple('Institution', 'id name uri') + + # Test single institution + institutions = single_institution_all.institutions + if institutions and len(institutions) > 0: + assert len(institutions) == 1 + expected_institution = Institution(id=505023, + name='Universidad Nacional Autónoma de México', + uri='Institution/505023') + assert institutions[0] == expected_institution + + # Test multiple institutions + institutions_multi = multiple_institutions_all.institutions + if institutions_multi and len(institutions_multi) > 0: + assert len(institutions_multi) == 2 + + expected_institution_1 = Institution(id=309054, + name='Technical University of Munich', + uri='Institution/309054') + assert institutions_multi[0] == expected_institution_1 + + # Test empty metrics + assert empty_metrics.institutions is None + + +def has_all_fields(metric_data): + """Check if the metric data has all required fields.""" + required_fields = ['entity_id', 'entity_name', 'metric', 'metric_type', 'year', 'value', 'percentage', 'threshold'] + return all(hasattr(metric_data, field) for field in required_fields) + + +def test_citation_count(): + """Test CitationCount property for all test cases.""" + result = single_institution_all.CitationCount + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.CitationCount + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_citations_per_publication(): + """Test CitationsPerPublication property for all test cases.""" + result = single_institution_all.CitationsPerPublication + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.CitationsPerPublication + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_cited_publications(): + """Test CitedPublications property for all test cases.""" + result = single_institution_all.CitedPublications + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.CitedPublications + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_collaboration(): + """Test Collaboration property for all test cases.""" + result = single_institution_all.Collaboration + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.Collaboration + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_collaboration_impact(): + """Test CollaborationImpact property for all test cases.""" + result = single_institution_all.CollaborationImpact + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.CollaborationImpact + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_empty_metrics(): + """Test handling of empty metrics.""" + assert empty_metrics.all_metrics is None + assert empty_metrics.institutions is None + assert empty_metrics.CitationCount is None + assert empty_metrics.CitationsPerPublication is None + assert empty_metrics.CitedPublications is None + assert empty_metrics.Collaboration is None + assert empty_metrics.CollaborationImpact is None + + +def test_field_weighted_citation_impact(): + """Test FieldWeightedCitationImpact property for all test cases.""" + result = single_institution_all.FieldWeightedCitationImpact + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.FieldWeightedCitationImpact + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_outputs_in_top_citation_percentiles(): + """Test OutputsInTopCitationPercentiles property for all test cases.""" + result = single_institution_all.OutputsInTopCitationPercentiles + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.OutputsInTopCitationPercentiles + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_publications_in_top_journal_percentiles(): + """Test PublicationsInTopJournalPercentiles property for all test cases.""" + result = single_institution_all.PublicationsInTopJournalPercentiles + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.PublicationsInTopJournalPercentiles + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_scholarly_output(): + """Test ScholarlyOutput property for all test cases.""" + result = single_institution_all.ScholarlyOutput + if result and len(result) > 0: + assert has_all_fields(result[0]) + + result_multi = multiple_institutions_all.ScholarlyOutput + if result_multi and len(result_multi) > 0: + assert has_all_fields(result_multi[0]) + + +def test_str_representation(): + """Test the string representation of InstitutionMetrics objects.""" + # Test single institution + str_single = str(single_institution_all) + expected_str = "InstitutionMetrics for 1 institution(s):\n- Universidad Nacional Autónoma de México (ID: 505023)" + assert str_single == expected_str + + # Test multiple institutions + str_multi = str(multiple_institutions_all) + expected_str_multi = "InstitutionMetrics for 2 institution(s):\n- Technical University of Munich (ID: 309054)\n- Ludwig Maximilian University of Munich (ID: 309086)" + assert str_multi == expected_str_multi + + # Test empty metrics + str_empty = str(empty_metrics) + assert str_empty == "No institutions found" diff --git a/pybliometrics/utils/constants.py b/pybliometrics/utils/constants.py index 5d2364e5..4f183045 100644 --- a/pybliometrics/utils/constants.py +++ b/pybliometrics/utils/constants.py @@ -34,6 +34,7 @@ 'PlumXMetrics': CACHE_PATH / "Scopus" / 'plumx', 'PublicationLookup': CACHE_PATH / "Scival" / "publication_lookup", 'AuthorMetrics': CACHE_PATH / "Scival" / "author_metrics", + 'InstitutionMetrics': CACHE_PATH / "Scival" / "institution_metrics", 'ScDirSubjectClassifications': CACHE_PATH / "ScienceDirect" / 'subject_classification', 'ScienceDirectSearch': CACHE_PATH / "ScienceDirect" / 'science_direct_search', 'ScopusSearch': CACHE_PATH / "Scopus" / 'scopus_search', @@ -61,6 +62,7 @@ 'ObjectRetrieval': RETRIEVAL_BASE + 'object/', 'PublicationLookup': SCIVAL_BASE + 'publication/', 'AuthorMetrics': SCIVAL_BASE + 'author/metrics/', + 'InstitutionMetrics': SCIVAL_BASE + 'institution/metrics/', 'PlumXMetrics': 'https://api.elsevier.com/analytics/plumx/', 'ScDirSubjectClassifications': RETRIEVAL_BASE + 'subject/scidir/', 'ScienceDirectSearch': SEARCH_BASE + 'sciencedirect/', @@ -112,6 +114,22 @@ "notByYear": [ "HIndices" ] + }, + "InstitutionMetrics": { + "byYear": [ + "AcademicCorporateCollaboration", + "AcademicCorporateCollaborationImpact", + "Collaboration", + "CitationCount", + "CitationsPerPublication", + "CollaborationImpact", + "CitedPublications", + "FieldWeightedCitationImpact", + "ScholarlyOutput", + "PublicationsInTopJournalPercentiles", + "OutputsInTopCitationPercentiles" + ], + "notByYear": [] } } @@ -124,7 +142,7 @@ "ObjectRetrieval"} # APIs that do not require an ID in the URL -APIS_NO_ID_IN_URL = {"AuthorMetrics"} +APIS_NO_ID_IN_URL = {"AuthorMetrics", "InstitutionMetrics"} # Item per page limits for all classes COUNTS = { @@ -147,6 +165,7 @@ 'ArticleMetadata': 6, 'ArticleRetrieval': 10, 'AuthorMetrics': 6, + 'InstitutionMetrics': 6, 'AuthorRetrieval': 3, 'AuthorSearch': 2, 'CitationOverview': 4, From 0c77d5f1c98be55fa931e85d47c4b3429947f801 Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Wed, 20 Aug 2025 10:13:36 +0200 Subject: [PATCH 10/14] Swap metric and metric type --- docs/reference/scival/InstitutionMetrics.rst | 30 +++++++++++-------- .../scival/tests/test_InstitutionMetrics.py | 6 ++-- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/reference/scival/InstitutionMetrics.rst b/docs/reference/scival/InstitutionMetrics.rst index f646c1b8..164284f0 100644 --- a/docs/reference/scival/InstitutionMetrics.rst +++ b/docs/reference/scival/InstitutionMetrics.rst @@ -51,17 +51,21 @@ Each metric property returns a list of `MetricData` namedtuples with the structu >>> institution_metrics.CitationCount [MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CitationCount', - metric_type=None, year='all', value=368527, percentage=None, threshold=None)] + metric_type='CitationCount', year='all', value=368527, percentage=None, threshold=None)] + +For **nested metrics** (like CollaborationImpact), `metric_type` contains the main category and `metric` contains the specific sub-type: + +.. code-block:: python >>> institution_metrics.CollaborationImpact - [MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CollaborationImpact', - metric_type='Institutional collaboration', year='all', value=8.610204, percentage=None, threshold=None), - MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CollaborationImpact', - metric_type='International collaboration', year='all', value=22.430689, percentage=None, threshold=None), - MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CollaborationImpact', - metric_type='National collaboration', year='all', value=9.935493, percentage=None, threshold=None), - MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CollaborationImpact', - metric_type='Single authorship', year='all', value=3.187361, percentage=None, threshold=None)] + [MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='Institutional collaboration', + metric_type='CollaborationImpact', year='all', value=8.610204, percentage=None, threshold=None), + MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='International collaboration', + metric_type='CollaborationImpact', year='all', value=22.430689, percentage=None, threshold=None), + MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='National collaboration', + metric_type='CollaborationImpact', year='all', value=9.935493, percentage=None, threshold=None), + MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='Single authorship', + metric_type='CollaborationImpact', year='all', value=3.187361, percentage=None, threshold=None)] **Available Metric Properties**: @@ -133,8 +137,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` 0 309021 Humboldt University of Berlin - AcademicCorporateCollaboration Academic-corporate collaboration + AcademicCorporateCollaboration all 1015.000000 4.469594 @@ -144,8 +148,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` 1 309021 Humboldt University of Berlin - AcademicCorporateCollaboration No academic-corporate collaboration + AcademicCorporateCollaboration all 21694.000000 95.530410 @@ -155,8 +159,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` 2 309021 Humboldt University of Berlin - AcademicCorporateCollaborationImpact Academic-corporate collaboration + AcademicCorporateCollaborationImpact all 59.104435 NaN @@ -166,8 +170,8 @@ You can retrieve all available metrics in a single list using the `all_metrics` 3 309021 Humboldt University of Berlin - AcademicCorporateCollaborationImpact No academic-corporate collaboration + AcademicCorporateCollaborationImpact all 14.222181 NaN diff --git a/pybliometrics/scival/tests/test_InstitutionMetrics.py b/pybliometrics/scival/tests/test_InstitutionMetrics.py index d477840a..410718ef 100644 --- a/pybliometrics/scival/tests/test_InstitutionMetrics.py +++ b/pybliometrics/scival/tests/test_InstitutionMetrics.py @@ -39,8 +39,8 @@ def test_all_metrics(): result = single_institution_all.all_metrics expected_result_0 =MetricData(entity_id=505023, entity_name='Universidad Nacional Autónoma de México', - metric='AcademicCorporateCollaboration', - metric_type='Academic-corporate collaboration', + metric='Academic-corporate collaboration', + metric_type='AcademicCorporateCollaboration', year='all', value=951, percentage=2.31415, @@ -52,7 +52,7 @@ def test_all_metrics(): expected_result_multi_last = MetricData(entity_id=309086, entity_name='Ludwig Maximilian University of Munich', metric='OutputsInTopCitationPercentiles', - metric_type=None, + metric_type='OutputsInTopCitationPercentiles', year='2023', value=3792, percentage=38.50528, From 04829778fb4b4bf443ed9a6948876211fcee3255 Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Wed, 20 Aug 2025 14:17:09 +0200 Subject: [PATCH 11/14] Remove all_metrics --- docs/reference/scival/InstitutionMetrics.rst | 170 ++++++++++++------ pybliometrics/scival/institution_metrics.py | 24 +-- .../scival/tests/test_InstitutionMetrics.py | 153 ++++++---------- 3 files changed, 173 insertions(+), 174 deletions(-) diff --git a/docs/reference/scival/InstitutionMetrics.rst b/docs/reference/scival/InstitutionMetrics.rst index 164284f0..74994480 100644 --- a/docs/reference/scival/InstitutionMetrics.rst +++ b/docs/reference/scival/InstitutionMetrics.rst @@ -84,18 +84,18 @@ For **nested metrics** (like CollaborationImpact), `metric_type` contains the ma .. note:: **Unified Data Structure**: InstitutionMetrics uses a unified `MetricData` structure with `entity_id` and `entity_name` fields. For institutions, these fields contain the institution ID and institution name respectively. This structure is compatible with `AuthorMetrics` and other SciVal metric classes, enabling consistent data analysis across different entity types. -**Getting All Metrics at Once** +**Concatenating Metrics** + -You can retrieve all available metrics in a single list using the `all_metrics` property: .. code-block:: python - >>> all_data = institution_metrics.all_metrics - >>> len(all_data) - 28 - >>> # Convert to pandas DataFrame for analysis >>> import pandas as pd - >>> df = pd.DataFrame(all_data) + >>> + >>> collab_data = [] + >>> collab_data.extend(institution_metrics.Collaboration) + >>> collab_data.extend(institution_metrics.CollaborationImpact) + >>> df = pd.DataFrame(collab_data) >>> df.head() @@ -137,56 +137,56 @@ You can retrieve all available metrics in a single list using the `all_metrics` 0 309021 Humboldt University of Berlin - Academic-corporate collaboration - AcademicCorporateCollaboration + Institutional collaboration + Collaboration all - 1015.000000 - 4.469594 - NaN + 980.000000 + 4.32 + None 1 309021 Humboldt University of Berlin - No academic-corporate collaboration - AcademicCorporateCollaboration + International collaboration + Collaboration all - 21694.000000 - 95.530410 - NaN + 12754.000000 + 56.16 + None 2 309021 Humboldt University of Berlin - Academic-corporate collaboration - AcademicCorporateCollaborationImpact + National collaboration + Collaboration all - 59.104435 - NaN - NaN + 6728.000000 + 29.63 + None 3 309021 Humboldt University of Berlin - No academic-corporate collaboration - AcademicCorporateCollaborationImpact + Single authorship + Collaboration all - 14.222181 - NaN - NaN + 2247.000000 + 9.89 + None 4 309021 Humboldt University of Berlin - Collaboration Institutional collaboration + CollaborationImpact all - 980.000000 - 4.320000 + 8.610204 NaN + None @@ -204,8 +204,8 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye InstitutionMetrics for 2 institution(s): - Technical University of Berlin (ID: 309050) - Heidelberg University  (ID: 309076) - >>> # Get all collaboration metrics for all institutions - >>> df = pd.DataFrame(multi_institutions.all_metrics) + >>> # Get CitedPublications metrics + >>> df = pd.DataFrame(multi_institutions.CitedPublications) >>> df.head() .. raw:: html @@ -227,6 +227,7 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye font-size: 12px; } +
@@ -246,56 +247,111 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye - - + + - - - + + + - - + + - - - + + + - - + + - - - + + + - - + + - - - + + + - - + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
0 309050 Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaborationCitedPublicationsCitedPublications 2024282.07.770736NaN240066.133920None
1 309050 Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaborationCitedPublicationsCitedPublications 2020285.07.740358NaN329489.462250None
2 309050 Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaborationCitedPublicationsCitedPublications 2021250.06.529120NaN338588.404290None
3 309050 Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaborationCitedPublicationsCitedPublications 2022249.06.709782NaN320986.472650None
4 309050 Technical University of BerlinAcademicCorporateCollaborationAcademic-corporate collaborationCitedPublicationsCitedPublications 2023253.06.693122NaN304480.529100None
5309076Heidelberg UniversityCitedPublicationsCitedPublications2024593772.517410None
6309076Heidelberg UniversityCitedPublicationsCitedPublications2020742392.005455None
7309076Heidelberg UniversityCitedPublicationsCitedPublications2021782890.864770None
8309076Heidelberg UniversityCitedPublicationsCitedPublications2022735488.166885None
9309076Heidelberg UniversityCitedPublicationsCitedPublications2023692185.150100None
diff --git a/pybliometrics/scival/institution_metrics.py b/pybliometrics/scival/institution_metrics.py index 2ea34eb2..7f2d8337 100644 --- a/pybliometrics/scival/institution_metrics.py +++ b/pybliometrics/scival/institution_metrics.py @@ -24,25 +24,6 @@ def AcademicCorporateCollaborationImpact(self) -> Optional[list]: """ return extract_metric_data(self._json, 'AcademicCorporateCollaborationImpact', self._by_year, "institution") - @property - def all_metrics(self) -> Optional[list]: - """Get all available metrics concatenated into a single list. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) - """ - all_metrics = [] - - # List of all metric properties - if self._metric_types: - metric_properties = self._metric_types.split(",") - - for prop_name in metric_properties: - metrics = getattr(self, prop_name) - if metrics: - all_metrics.extend(metrics) - - return all_metrics or None - @property def institutions(self) -> Optional[list]: """A list of namedtuples representing institutions and their basic info @@ -168,9 +149,8 @@ def __init__(self, Note: All metric properties return lists of MetricData namedtuples with unified structure: `(entity_id, entity_name, metric, metric_type, - year, value, percentage, threshold)`. Use the `all_metrics` property - to get all metrics concatenated into a single list for easy data - manipulation and analysis. + year, value, percentage, threshold)`. + """ self._view = '' self._refresh = refresh diff --git a/pybliometrics/scival/tests/test_InstitutionMetrics.py b/pybliometrics/scival/tests/test_InstitutionMetrics.py index 410718ef..f4f43ad2 100644 --- a/pybliometrics/scival/tests/test_InstitutionMetrics.py +++ b/pybliometrics/scival/tests/test_InstitutionMetrics.py @@ -1,7 +1,6 @@ from collections import namedtuple -from pybliometrics.scival.institution_metrics import InstitutionMetrics -from pybliometrics.utils.startup import init +from pybliometrics.scival import init, InstitutionMetrics init() @@ -11,54 +10,44 @@ empty_metrics = InstitutionMetrics("0000000") +# Auxiliary function to check if a MetricData namedtuple has all required fields +def has_all_fields(metric_data): + """Check if the metric data has all required fields.""" + required_fields = ['entity_id', 'entity_name', 'metric', + 'metric_type', 'year', 'value', 'percentage', + 'threshold'] + return all(hasattr(metric_data, field) for field in required_fields) + + def test_academic_corporate_collaboration(): """Test AcademicCorporateCollaboration property for all test cases.""" result = single_institution_all.AcademicCorporateCollaboration - - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) + assert result[0].entity_id == 505023 + assert result[0].entity_name == 'Universidad Nacional Autónoma de México' + assert result[0].metric == 'Academic-corporate collaboration' + assert result[0].metric_type == 'AcademicCorporateCollaboration' + assert result[0].year == 'all' + assert result[0].value >= 900 + assert result[0].percentage > 2 + assert result[0].threshold is None result_multi = multiple_institutions_all.AcademicCorporateCollaboration - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + assert has_all_fields(result_multi[0]) + assert result_multi[0].entity_id == 309054 + assert result_multi[0].entity_name == 'Technical University of Munich' + assert result_multi[0].metric == 'Academic-corporate collaboration' + assert result_multi[0].metric_type == 'AcademicCorporateCollaboration' + assert result_multi[0].year == '2024' + assert result_multi[0].value >= 1000 + assert result_multi[0].percentage > 9 + assert result_multi[0].threshold is None def test_academic_corporate_collaboration_impact(): """Test AcademicCorporateCollaborationImpact property for all test cases.""" result = single_institution_all.AcademicCorporateCollaborationImpact - if result and len(result) > 0: - assert has_all_fields(result[0]) - - -def test_all_metrics(): - """Test all_metrics property for all test cases.""" - MetricData = namedtuple('MetricData', - 'entity_id entity_name metric metric_type year value percentage threshold', - defaults=(None, None, None, None, "all", None, None, None)) - - result = single_institution_all.all_metrics - expected_result_0 =MetricData(entity_id=505023, - entity_name='Universidad Nacional Autónoma de México', - metric='Academic-corporate collaboration', - metric_type='AcademicCorporateCollaboration', - year='all', - value=951, - percentage=2.31415, - threshold=None) - assert result[0] == expected_result_0 - assert len(result) >= 28 - - result_multi = multiple_institutions_all.all_metrics - expected_result_multi_last = MetricData(entity_id=309086, - entity_name='Ludwig Maximilian University of Munich', - metric='OutputsInTopCitationPercentiles', - metric_type='OutputsInTopCitationPercentiles', - year='2023', - value=3792, - percentage=38.50528, - threshold=25) - assert result_multi[-1] == expected_result_multi_last - assert len(result_multi) >= 280 + assert has_all_fields(result[0]) def test_institutions(): @@ -67,91 +56,72 @@ def test_institutions(): # Test single institution institutions = single_institution_all.institutions - if institutions and len(institutions) > 0: - assert len(institutions) == 1 - expected_institution = Institution(id=505023, - name='Universidad Nacional Autónoma de México', - uri='Institution/505023') - assert institutions[0] == expected_institution + assert len(institutions) == 1 + expected_institution = Institution(id=505023, + name='Universidad Nacional Autónoma de México', + uri='Institution/505023') + assert institutions[0] == expected_institution # Test multiple institutions institutions_multi = multiple_institutions_all.institutions - if institutions_multi and len(institutions_multi) > 0: - assert len(institutions_multi) == 2 + assert len(institutions_multi) == 2 - expected_institution_1 = Institution(id=309054, - name='Technical University of Munich', - uri='Institution/309054') - assert institutions_multi[0] == expected_institution_1 + expected_institution_1 = Institution(id=309054, + name='Technical University of Munich', + uri='Institution/309054') + assert institutions_multi[0] == expected_institution_1 # Test empty metrics assert empty_metrics.institutions is None -def has_all_fields(metric_data): - """Check if the metric data has all required fields.""" - required_fields = ['entity_id', 'entity_name', 'metric', 'metric_type', 'year', 'value', 'percentage', 'threshold'] - return all(hasattr(metric_data, field) for field in required_fields) - - def test_citation_count(): """Test CitationCount property for all test cases.""" result = single_institution_all.CitationCount - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) result_multi = multiple_institutions_all.CitationCount - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + assert has_all_fields(result_multi[0]) def test_citations_per_publication(): """Test CitationsPerPublication property for all test cases.""" result = single_institution_all.CitationsPerPublication - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) result_multi = multiple_institutions_all.CitationsPerPublication - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + assert has_all_fields(result_multi[0]) def test_cited_publications(): """Test CitedPublications property for all test cases.""" result = single_institution_all.CitedPublications - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) result_multi = multiple_institutions_all.CitedPublications - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + assert has_all_fields(result_multi[0]) def test_collaboration(): """Test Collaboration property for all test cases.""" result = single_institution_all.Collaboration - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) result_multi = multiple_institutions_all.Collaboration - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + assert has_all_fields(result_multi[0]) def test_collaboration_impact(): """Test CollaborationImpact property for all test cases.""" result = single_institution_all.CollaborationImpact - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) result_multi = multiple_institutions_all.CollaborationImpact - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + assert has_all_fields(result_multi[0]) def test_empty_metrics(): """Test handling of empty metrics.""" - assert empty_metrics.all_metrics is None assert empty_metrics.institutions is None assert empty_metrics.CitationCount is None assert empty_metrics.CitationsPerPublication is None @@ -163,45 +133,38 @@ def test_empty_metrics(): def test_field_weighted_citation_impact(): """Test FieldWeightedCitationImpact property for all test cases.""" result = single_institution_all.FieldWeightedCitationImpact - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) result_multi = multiple_institutions_all.FieldWeightedCitationImpact - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + assert has_all_fields(result_multi[0]) def test_outputs_in_top_citation_percentiles(): """Test OutputsInTopCitationPercentiles property for all test cases.""" result = single_institution_all.OutputsInTopCitationPercentiles - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) result_multi = multiple_institutions_all.OutputsInTopCitationPercentiles - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + + assert has_all_fields(result_multi[0]) def test_publications_in_top_journal_percentiles(): """Test PublicationsInTopJournalPercentiles property for all test cases.""" result = single_institution_all.PublicationsInTopJournalPercentiles - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) result_multi = multiple_institutions_all.PublicationsInTopJournalPercentiles - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + assert has_all_fields(result_multi[0]) def test_scholarly_output(): """Test ScholarlyOutput property for all test cases.""" result = single_institution_all.ScholarlyOutput - if result and len(result) > 0: - assert has_all_fields(result[0]) + assert has_all_fields(result[0]) result_multi = multiple_institutions_all.ScholarlyOutput - if result_multi and len(result_multi) > 0: - assert has_all_fields(result_multi[0]) + assert has_all_fields(result_multi[0]) def test_str_representation(): From 6663b79e9f9828e53fa0fb93949a73c8977e012c Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Tue, 26 Aug 2025 11:03:15 +0200 Subject: [PATCH 12/14] Rebase and adapt --- docs/reference/scival/InstitutionMetrics.rst | 36 +++------- pybliometrics/scival/institution_metrics.py | 72 +++++++++---------- .../scival/tests/test_InstitutionMetrics.py | 4 +- 3 files changed, 46 insertions(+), 66 deletions(-) diff --git a/docs/reference/scival/InstitutionMetrics.rst b/docs/reference/scival/InstitutionMetrics.rst index 74994480..f322c9ee 100644 --- a/docs/reference/scival/InstitutionMetrics.rst +++ b/docs/reference/scival/InstitutionMetrics.rst @@ -23,9 +23,8 @@ You initialize the class with one or more SciVal Institution IDs. The argument c .. code-block:: python - >>> import pybliometrics - >>> from pybliometrics.scival import InstitutionMetrics - >>> pybliometrics.scival.init() + >>> from pybliometrics.scival import InstitutionMetrics, init + >>> init() >>> institution_metrics = InstitutionMetrics("309021") You can obtain basic information just by printing the object: @@ -45,27 +44,27 @@ There are many properties available that provide different types of metrics. You **Individual Metric Properties** -Each metric property returns a list of `MetricData` namedtuples with the structure: `(entity_id, entity_name, metric, metric_type, year, value, percentage, threshold)` where `entity_id` and `entity_name` refer to the institution. +Each metric property returns a list of `MetricData` namedtuples with the structure: `(entity_id, entity_name, metric, year, value, percentage, threshold)` where `entity_id` and `entity_name` refer to the institution. .. code-block:: python >>> institution_metrics.CitationCount [MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='CitationCount', - metric_type='CitationCount', year='all', value=368527, percentage=None, threshold=None)] + year='all', value=368527, percentage=None, threshold=None)] -For **nested metrics** (like CollaborationImpact), `metric_type` contains the main category and `metric` contains the specific sub-type: +For **nested metrics** (like CollaborationImpact), `metric` contains the specific sub-type: .. code-block:: python >>> institution_metrics.CollaborationImpact [MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='Institutional collaboration', - metric_type='CollaborationImpact', year='all', value=8.610204, percentage=None, threshold=None), + year='all', value=8.610204, percentage=None, threshold=None), MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='International collaboration', - metric_type='CollaborationImpact', year='all', value=22.430689, percentage=None, threshold=None), + year='all', value=22.430689, percentage=None, threshold=None), MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='National collaboration', - metric_type='CollaborationImpact', year='all', value=9.935493, percentage=None, threshold=None), + year='all', value=9.935493, percentage=None, threshold=None), MetricData(entity_id=309021, entity_name='Humboldt University of Berlin', metric='Single authorship', - metric_type='CollaborationImpact', year='all', value=3.187361, percentage=None, threshold=None)] + year='all', value=3.187361, percentage=None, threshold=None)] **Available Metric Properties**: @@ -125,7 +124,6 @@ For **nested metrics** (like CollaborationImpact), `metric_type` contains the ma entity_id entity_name metric - metric_type year value percentage @@ -138,7 +136,6 @@ For **nested metrics** (like CollaborationImpact), `metric_type` contains the ma 309021 Humboldt University of Berlin Institutional collaboration - Collaboration all 980.000000 4.32 @@ -149,7 +146,6 @@ For **nested metrics** (like CollaborationImpact), `metric_type` contains the ma 309021 Humboldt University of Berlin International collaboration - Collaboration all 12754.000000 56.16 @@ -160,7 +156,6 @@ For **nested metrics** (like CollaborationImpact), `metric_type` contains the ma 309021 Humboldt University of Berlin National collaboration - Collaboration all 6728.000000 29.63 @@ -171,7 +166,6 @@ For **nested metrics** (like CollaborationImpact), `metric_type` contains the ma 309021 Humboldt University of Berlin Single authorship - Collaboration all 2247.000000 9.89 @@ -182,7 +176,6 @@ For **nested metrics** (like CollaborationImpact), `metric_type` contains the ma 309021 Humboldt University of Berlin Institutional collaboration - CollaborationImpact all 8.610204 NaN @@ -235,7 +228,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye entity_id entity_name metric - metric_type year value percentage @@ -248,7 +240,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309050 Technical University of Berlin CitedPublications - CitedPublications 2024 2400 66.133920 @@ -259,7 +250,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309050 Technical University of Berlin CitedPublications - CitedPublications 2020 3294 89.462250 @@ -270,7 +260,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309050 Technical University of Berlin CitedPublications - CitedPublications 2021 3385 88.404290 @@ -281,7 +270,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309050 Technical University of Berlin CitedPublications - CitedPublications 2022 3209 86.472650 @@ -292,7 +280,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309050 Technical University of Berlin CitedPublications - CitedPublications 2023 3044 80.529100 @@ -303,7 +290,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309076 Heidelberg University CitedPublications - CitedPublications 2024 5937 72.517410 @@ -314,7 +300,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309076 Heidelberg University CitedPublications - CitedPublications 2020 7423 92.005455 @@ -325,7 +310,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309076 Heidelberg University CitedPublications - CitedPublications 2021 7828 90.864770 @@ -336,7 +320,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309076 Heidelberg University CitedPublications - CitedPublications 2022 7354 88.166885 @@ -347,7 +330,6 @@ You can analyze multiple institutions simultaneously and retrieve metrics `by_ye 309076 Heidelberg University CitedPublications - CitedPublications 2023 6921 85.150100 diff --git a/pybliometrics/scival/institution_metrics.py b/pybliometrics/scival/institution_metrics.py index 7f2d8337..eb9b9b49 100644 --- a/pybliometrics/scival/institution_metrics.py +++ b/pybliometrics/scival/institution_metrics.py @@ -4,28 +4,28 @@ from pybliometrics.superclasses import Retrieval from pybliometrics.utils import make_int_if_possible from pybliometrics.utils.constants import SCIVAL_METRICS -from pybliometrics.utils.parse_metrics import extract_metric_data +from pybliometrics.utils.parse_metrics import extract_metric_data, MetricData class InstitutionMetrics(Retrieval): @property - def AcademicCorporateCollaboration(self) -> Optional[list]: + def AcademicCorporateCollaboration(self) -> Optional[list[MetricData]]: """Academic corporate collaboration metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'AcademicCorporateCollaboration', self._by_year, "institution") @property - def AcademicCorporateCollaborationImpact(self) -> Optional[list]: + def AcademicCorporateCollaborationImpact(self) -> Optional[list[MetricData]]: """Academic corporate collaboration impact metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'AcademicCorporateCollaborationImpact', self._by_year, "institution") @property - def institutions(self) -> Optional[list]: + def institutions(self) -> Optional[list[MetricData]]: """A list of namedtuples representing institutions and their basic info in the form `(id, name, uri)`. """ @@ -49,74 +49,74 @@ def institutions(self) -> Optional[list]: return out or None @property - def CitationCount(self) -> Optional[list]: + def CitationCount(self) -> Optional[list[MetricData]]: """Citation count metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'CitationCount', self._by_year, "institution") @property - def CitationsPerPublication(self) -> Optional[list]: + def CitationsPerPublication(self) -> Optional[list[MetricData]]: """Citations per publication metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'CitationsPerPublication', self._by_year, "institution") @property - def CitedPublications(self) -> Optional[list]: + def CitedPublications(self) -> Optional[list[MetricData]]: """Cited publications metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'CitedPublications', self._by_year, "institution") @property - def Collaboration(self) -> Optional[list]: + def Collaboration(self) -> Optional[list[MetricData]]: """Collaboration metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'Collaboration', self._by_year, "institution") @property - def CollaborationImpact(self) -> Optional[list]: + def CollaborationImpact(self) -> Optional[list[MetricData]]: """Collaboration impact metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'CollaborationImpact', self._by_year, "institution") @property - def FieldWeightedCitationImpact(self) -> Optional[list]: + def FieldWeightedCitationImpact(self) -> Optional[list[MetricData]]: """Field weighted citation impact metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'FieldWeightedCitationImpact', self._by_year, "institution") @property - def OutputsInTopCitationPercentiles(self) -> Optional[list]: + def OutputsInTopCitationPercentiles(self) -> Optional[list[MetricData]]: """Outputs in top citation percentiles metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'OutputsInTopCitationPercentiles', self._by_year, "institution") @property - def PublicationsInTopJournalPercentiles(self) -> Optional[list]: + def PublicationsInTopJournalPercentiles(self) -> Optional[list[MetricData]]: """Publications in top journal percentiles metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'PublicationsInTopJournalPercentiles', self._by_year, "institution") @property - def ScholarlyOutput(self) -> Optional[list]: + def ScholarlyOutput(self) -> Optional[list[MetricData]]: """Scholarly output metrics for each institution. - Returns list of MetricData namedtuples with unified structure: - (entity_id, entity_name, metric, metric_type, year, value, percentage, threshold) + Returns list of MetricData namedtuples with structure: + (entity_id, entity_name, metric, year, value, percentage, threshold) """ return extract_metric_data(self._json, 'ScholarlyOutput', self._by_year, "institution") @@ -148,7 +148,7 @@ def __init__(self, Note: All metric properties return lists of MetricData namedtuples with - unified structure: `(entity_id, entity_name, metric, metric_type, + structure: `(entity_id, entity_name, metric, year, value, percentage, threshold)`. """ diff --git a/pybliometrics/scival/tests/test_InstitutionMetrics.py b/pybliometrics/scival/tests/test_InstitutionMetrics.py index f4f43ad2..eea972db 100644 --- a/pybliometrics/scival/tests/test_InstitutionMetrics.py +++ b/pybliometrics/scival/tests/test_InstitutionMetrics.py @@ -14,7 +14,7 @@ def has_all_fields(metric_data): """Check if the metric data has all required fields.""" required_fields = ['entity_id', 'entity_name', 'metric', - 'metric_type', 'year', 'value', 'percentage', + 'year', 'value', 'percentage', 'threshold'] return all(hasattr(metric_data, field) for field in required_fields) @@ -26,7 +26,6 @@ def test_academic_corporate_collaboration(): assert result[0].entity_id == 505023 assert result[0].entity_name == 'Universidad Nacional Autónoma de México' assert result[0].metric == 'Academic-corporate collaboration' - assert result[0].metric_type == 'AcademicCorporateCollaboration' assert result[0].year == 'all' assert result[0].value >= 900 assert result[0].percentage > 2 @@ -37,7 +36,6 @@ def test_academic_corporate_collaboration(): assert result_multi[0].entity_id == 309054 assert result_multi[0].entity_name == 'Technical University of Munich' assert result_multi[0].metric == 'Academic-corporate collaboration' - assert result_multi[0].metric_type == 'AcademicCorporateCollaboration' assert result_multi[0].year == '2024' assert result_multi[0].value >= 1000 assert result_multi[0].percentage > 9 From 3e50af0f38bb0e82a1ae7c0c3bacac2ceda66619 Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Tue, 26 Aug 2025 11:27:43 +0200 Subject: [PATCH 13/14] Missing changes in constants --- pybliometrics/utils/constants.py | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/pybliometrics/utils/constants.py b/pybliometrics/utils/constants.py index e79f89cb..63d13d42 100644 --- a/pybliometrics/utils/constants.py +++ b/pybliometrics/utils/constants.py @@ -61,6 +61,8 @@ 'ObjectMetadata': RETRIEVAL_BASE + 'object/', 'ObjectRetrieval': RETRIEVAL_BASE + 'object/', 'PublicationLookup': SCIVAL_BASE + 'publication/', + 'AuthorMetrics': SCIVAL_BASE + 'author/metrics/', + 'InstitutionMetrics': SCIVAL_BASE + 'institution/metrics/', 'PlumXMetrics': 'https://api.elsevier.com/analytics/plumx/', 'ScDirSubjectClassifications': RETRIEVAL_BASE + 'subject/scidir/', 'ScienceDirectSearch': SEARCH_BASE + 'sciencedirect/', @@ -93,6 +95,44 @@ "ObjectRetrieval": [""] } +# SciVal Metrics +SCIVAL_METRICS = { + "AuthorMetrics": { + "byYear": [ + "AcademicCorporateCollaboration", + "AcademicCorporateCollaborationImpact", + "Collaboration", + "CitationCount", + "CitationsPerPublication", + "CollaborationImpact", + "CitedPublications", + "FieldWeightedCitationImpact", + "ScholarlyOutput", + "PublicationsInTopJournalPercentiles", + "OutputsInTopCitationPercentiles" + ], + "notByYear": [ + "HIndices" + ] + }, + "InstitutionMetrics": { + "byYear": [ + "AcademicCorporateCollaboration", + "AcademicCorporateCollaborationImpact", + "Collaboration", + "CitationCount", + "CitationsPerPublication", + "CollaborationImpact", + "CitedPublications", + "FieldWeightedCitationImpact", + "ScholarlyOutput", + "PublicationsInTopJournalPercentiles", + "OutputsInTopCitationPercentiles" + ], + "notByYear": [] + } +} + # APIs whose URL needs an id_type APIS_WITH_ID_TYPE = {"AbstractRetrieval", "PlumXMetrics", From 7ba7979fa2821140c0a8d3dcab166fdcd5725453 Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Tue, 26 Aug 2025 11:30:21 +0200 Subject: [PATCH 14/14] Constants --- pybliometrics/utils/constants.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pybliometrics/utils/constants.py b/pybliometrics/utils/constants.py index 63d13d42..a404c101 100644 --- a/pybliometrics/utils/constants.py +++ b/pybliometrics/utils/constants.py @@ -164,9 +164,11 @@ 'ArticleEntitlement': 0, 'ArticleMetadata': 6, 'ArticleRetrieval': 10, + 'AuthorMetrics': 6, 'AuthorRetrieval': 3, 'AuthorSearch': 2, 'CitationOverview': 4, + "InstitutionMetrics": 6, 'NonserialTitle': 6, 'ObjectMetadata': 0, 'ObjectRetrieval': 0,