Skip to content

Commit 5bf55ba

Browse files
committed
Added limit to queries
1 parent 8de3b9a commit 5bf55ba

File tree

5 files changed

+19
-14
lines changed

5 files changed

+19
-14
lines changed

Diff for: .gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ go-scholar
1616
# vendor/
1717

1818
.idea
19+
scholar-example

Diff for: README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ Working:
2222
* Queries each of the articles listed (up to 80) and parses the results for extra information
2323
* Caches the profile for a day, and articles for a week (need to confirm this is working)
2424
* This is in memory, so if the program is restarted, the cache is lost
25+
* Configurable limit to number of articles to query in one go
2526

2627
## TODO:
27-
* Configurable limit to number of articles to query in one go
2828
* Pagination of articles
2929
* Add throttling to avoid hitting the rate limit (figure out what the limit is)
3030
* Cache the results of queries so we aren't hitting Google Scholar's servers every time (if we do too much we get a 429)

Diff for: scholar-example/.gitignore

-1
This file was deleted.

Diff for: scholar-example/main.go

+10-5
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,25 @@ import (
88

99
func main() {
1010
userPtr := flag.String("user", "", "user profile to retrieve")
11+
limitPtr := flag.Int("limit", 1, "limit the number of articles to retrieve")
1112
flag.Parse()
1213

1314
if *userPtr == "" {
1415
flag.Usage()
1516
return
1617
}
18+
if *limitPtr < 1 {
19+
*limitPtr = 1
20+
}
1721

18-
fmt.Println("Searching for user: " + *userPtr)
22+
fmt.Println("Searching for user: " + *userPtr + " with limit: " + fmt.Sprint(*limitPtr))
1923
user := *userPtr
24+
limit := *limitPtr
2025

2126
sch := scholar.New()
22-
//articles := sch.QueryProfileDumpResponse(user, true)
23-
//articles := sch.QueryProfile(user)
24-
articles := sch.QueryProfileWithCache(user)
27+
//articles := sch.QueryProfileDumpResponse(user, limit, true)
28+
//articles := sch.QueryProfile(user, limit)
29+
articles := sch.QueryProfileWithCache(user, limit)
2530

2631
if len(articles) == 0 {
2732
fmt.Println("Not found")
@@ -32,7 +37,7 @@ func main() {
3237
fmt.Println(article)
3338
}
3439

35-
cachedArticles := sch.QueryProfileWithCache(user)
40+
cachedArticles := sch.QueryProfileWithCache(user, limit)
3641
if len(articles) == 0 {
3742
fmt.Println("Not found")
3843
return

Diff for: scholar/scholar.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,18 @@ func (a Article) String() string {
6161
return "Article(\n title=" + a.title + "\n authors=" + a.authors + "\n scholarURL=" + a.scholarURL + "\n year=" + strconv.Itoa(a.year) + "\n month=" + strconv.Itoa(a.month) + "\n day=" + strconv.Itoa(a.day) + "\n numCitations=" + strconv.Itoa(a.numCitations) + "\n articles=" + strconv.Itoa(a.articles) + "\n description=" + a.description + "\n pdfURL=" + a.pdfURL + "\n journal=" + a.journal + "\n volume=" + a.volume + "\n pages=" + a.pages + "\n publisher=" + a.publisher + "\n scholarCitedByURL=" + strings.Join(a.scholarCitedByURLs, ", ") + "\n scholarVersionsURL=" + strings.Join(a.scholarVersionsURLs, ", ") + "\n scholarRelatedURL=" + strings.Join(a.scholarRelatedURLs, ", ") + "\n lastRetrieved=" + a.lastRetrieved.String() + "\n)"
6262
}
6363

64-
func (sch Scholar) QueryProfile(user string) []Article {
65-
return sch.QueryProfileDumpResponse(user, true, false)
64+
func (sch Scholar) QueryProfile(user string, limit int) []Article {
65+
return sch.QueryProfileDumpResponse(user, true, limit, false)
6666
}
6767

68-
func (sch Scholar) QueryProfileWithCache(user string) []Article {
68+
func (sch Scholar) QueryProfileWithCache(user string, limit int) []Article {
6969
if sch.profile.Has(user) {
7070
p, _ := sch.profile.Get(user)
7171
lastAccess := p.lastRetrieved
7272
if (time.Now().Sub(lastAccess)).Seconds() > MAX_TIME_PROFILE.Seconds() {
7373
println("Profile cache expired for user: " + user)
7474
sch.profile.Remove(user)
75-
articles := sch.QueryProfileDumpResponse(user, true, false)
75+
articles := sch.QueryProfileDumpResponse(user, true, limit, false)
7676
var articleList []string
7777
for _, article := range articles {
7878
articleList = append(articleList, article.scholarURL)
@@ -107,7 +107,7 @@ func (sch Scholar) QueryProfileWithCache(user string) []Article {
107107

108108
} else {
109109
println("Profile cache miss for user: " + user)
110-
articles := sch.QueryProfileDumpResponse(user, true, false)
110+
articles := sch.QueryProfileDumpResponse(user, true, limit, false)
111111
var articleList []string
112112
for _, article := range articles {
113113
articleList = append(articleList, article.scholarURL)
@@ -127,12 +127,12 @@ func (sch Scholar) QueryProfileWithCache(user string) []Article {
127127
// want to get updated information from the profile page only to save requests
128128
//
129129
// if dumpResponse is true, it will print the response to stdout (useful for debugging)
130-
func (sch Scholar) QueryProfileDumpResponse(user string, queryArticles bool, dumpResponse bool) []Article {
130+
func (sch Scholar) QueryProfileDumpResponse(user string, queryArticles bool, limit int, dumpResponse bool) []Article {
131131
var articles []Article
132132
client := &http.Client{}
133133

134134
// todo: make page size configurable, also support getting more than one page of citations
135-
req, err := http.NewRequest("GET", BaseURL+"/citations?user="+user+"&cstart=0&pagesize=1", nil)
135+
req, err := http.NewRequest("GET", BaseURL+"/citations?user="+user+"&cstart=0&pagesize="+strconv.Itoa(limit), nil)
136136
if err != nil {
137137
log.Fatalln(err)
138138
}

0 commit comments

Comments
 (0)