diff --git a/build.sh b/build.sh index 66175e5..0297224 100755 --- a/build.sh +++ b/build.sh @@ -3,4 +3,6 @@ commit=$(git show --no-patch --format='%H') buildTime=$(date -u) +printf 'Building migration-verifier …\n\tcommit: %s\n\tbuildTime: %s\n' "$commit" "$buildTime" + go build -ldflags="-X 'main.Revision=$commit' -X 'main.BuildTime=$buildTime'" main/migration_verifier.go diff --git a/internal/verifier/compare.go b/internal/verifier/compare.go index dc275c6..ceeee33 100644 --- a/internal/verifier/compare.go +++ b/internal/verifier/compare.go @@ -512,3 +512,36 @@ func (verifier *Verifier) getDocumentsCursor(ctx mongo.SessionContext, collectio return collection.Database().RunCommandCursor(ctx, findCmd, runCommandOptions) } + +func (verifier *Verifier) compareOneDocument(srcClientDoc, dstClientDoc bson.Raw, namespace string) ([]VerificationResult, error) { + match := bytes.Equal(srcClientDoc, dstClientDoc) + if match { + return nil, nil + } + //verifier.logger.Info().Msg("Byte comparison failed for id %s, falling back to field comparison", id) + + mismatch, err := BsonUnorderedCompareRawDocumentWithDetails(srcClientDoc, dstClientDoc) + if err != nil { + return nil, err + } + if mismatch == nil { + if verifier.ignoreBSONFieldOrder { + return nil, nil + } + dataSize := len(srcClientDoc) + if dataSize < len(dstClientDoc) { + dataSize = len(dstClientDoc) + } + + // If we're respecting field order we have just done a binary compare so we have fields in different order. + return []VerificationResult{{ + ID: srcClientDoc.Lookup("_id"), + Details: Mismatch + fmt.Sprintf(" : Document %s has fields in different order", srcClientDoc.Lookup("_id")), + Cluster: ClusterTarget, + NameSpace: namespace, + dataSize: dataSize, + }}, nil + } + results := mismatchResultsToVerificationResults(mismatch, srcClientDoc, dstClientDoc, namespace, srcClientDoc.Lookup("_id"), "" /* fieldPrefix */) + return results, nil +} diff --git a/internal/verifier/migration_verifier.go b/internal/verifier/migration_verifier.go index d63e241..2ca5f84 100644 --- a/internal/verifier/migration_verifier.go +++ b/internal/verifier/migration_verifier.go @@ -511,39 +511,6 @@ func mismatchResultsToVerificationResults(mismatch *MismatchDetails, srcClientDo return } -func (verifier *Verifier) compareOneDocument(srcClientDoc, dstClientDoc bson.Raw, namespace string) ([]VerificationResult, error) { - match := bytes.Equal(srcClientDoc, dstClientDoc) - if match { - return nil, nil - } - //verifier.logger.Info().Msg("Byte comparison failed for id %s, falling back to field comparison", id) - - mismatch, err := BsonUnorderedCompareRawDocumentWithDetails(srcClientDoc, dstClientDoc) - if err != nil { - return nil, err - } - if mismatch == nil { - if verifier.ignoreBSONFieldOrder { - return nil, nil - } - dataSize := len(srcClientDoc) - if dataSize < len(dstClientDoc) { - dataSize = len(dstClientDoc) - } - - // If we're respecting field order we have just done a binary compare so we have fields in different order. - return []VerificationResult{{ - ID: srcClientDoc.Lookup("_id"), - Details: Mismatch + fmt.Sprintf(" : Document %s has fields in different order", srcClientDoc.Lookup("_id")), - Cluster: ClusterTarget, - NameSpace: namespace, - dataSize: dataSize, - }}, nil - } - results := mismatchResultsToVerificationResults(mismatch, srcClientDoc, dstClientDoc, namespace, srcClientDoc.Lookup("_id"), "" /* fieldPrefix */) - return results, nil -} - func (verifier *Verifier) ProcessVerifyTask(ctx context.Context, workerNum int, task *VerificationTask) error { start := time.Now() diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index 4a741c5..9a5d00d 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -15,6 +15,7 @@ import ( "regexp" "slices" "sort" + "strings" "testing" "time" @@ -27,6 +28,7 @@ import ( "github.com/10gen/migration-verifier/mslices" "github.com/cespare/permute/v2" "github.com/rs/zerolog" + "github.com/samber/lo" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -1240,6 +1242,88 @@ func (suite *IntegrationTestSuite) TestVerifierCompareIndexes() { } } +func (suite *IntegrationTestSuite) TestVerifierDocMismatches() { + ctx := suite.Context() + + suite.Require().NoError( + suite.srcMongoClient. + Database("test"). + Collection("coll").Drop(ctx), + ) + suite.Require().NoError( + suite.dstMongoClient. + Database("test"). + Collection("coll").Drop(ctx), + ) + + _, err := suite.srcMongoClient. + Database("test"). + Collection("coll"). + InsertMany( + ctx, + lo.RepeatBy( + 20, + func(index int) any { + return bson.D{ + {"_id", 100000 + index}, + {"foo", 3}, + } + }, + ), + ) + suite.Require().NoError(err) + + // The first has a mismatched `foo` value, + // and the 2nd lacks `foo` entirely. + _, err = suite.dstMongoClient. + Database("test"). + Collection("coll"). + InsertMany(ctx, lo.ToAnySlice([]bson.D{ + {{"_id", 100000}, {"foo", 1}}, + {{"_id", 100001}}, + })) + suite.Require().NoError(err) + + verifier := suite.BuildVerifier() + verifier.failureDisplaySize = 10 + + ns := "test.coll" + verifier.SetSrcNamespaces([]string{ns}) + verifier.SetDstNamespaces([]string{ns}) + verifier.SetNamespaceMap() + + runner := RunVerifierCheck(ctx, suite.T(), verifier) + suite.Require().NoError(runner.AwaitGenerationEnd()) + + builder := &strings.Builder{} + _, _, err = verifier.reportDocumentMismatches(ctx, builder) + suite.Require().NoError(err) + + suite.Assert().Contains( + builder.String(), + "100009", + "summary should show an early mismatch", + ) + + suite.Assert().Contains( + builder.String(), + " 10 ", + "summary should show the # of missing docs shown", + ) + + suite.Assert().Contains( + builder.String(), + " 18 ", + "summary should show the total # of missing/changed documents", + ) + + suite.Assert().NotContains( + builder.String(), + "100019", + "summary should NOT show a late mismatch", + ) +} + func (suite *IntegrationTestSuite) TestVerifierCompareIndexSpecs() { ctx := suite.Context() verifier := suite.BuildVerifier() diff --git a/internal/verifier/mismatches.go b/internal/verifier/mismatches.go index fa7bf35..9a8f2af 100644 --- a/internal/verifier/mismatches.go +++ b/internal/verifier/mismatches.go @@ -9,6 +9,7 @@ import ( "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" ) const ( @@ -49,6 +50,11 @@ func getMismatchesForTasks( bson.D{ {"task", bson.D{{"$in", taskIDs}}}, }, + options.Find().SetSort( + bson.D{ + {"detail.id", 1}, + }, + ), ) if err != nil { diff --git a/internal/verifier/result.go b/internal/verifier/result.go index 0cf60f4..bd75c36 100644 --- a/internal/verifier/result.go +++ b/internal/verifier/result.go @@ -36,6 +36,12 @@ type VerificationResult struct { DstTimestamp option.Option[primitive.Timestamp] } -func (vr VerificationResult) IsMissing() bool { - return vr.Details == Missing +// DocumentIsMissing returns a boolean that indicates whether the +// VerificationResult indicates a document that is missing on either +// source or destination. +func (vr VerificationResult) DocumentIsMissing() bool { + // NB: Missing gets set as the Details value when a field is missing + // but the document exists. To ascertain that the document is entirely + // absent we have to check Field as well. + return vr.Details == Missing && vr.Field == "" } diff --git a/internal/verifier/summary.go b/internal/verifier/summary.go index f5fa420..e2340c3 100644 --- a/internal/verifier/summary.go +++ b/internal/verifier/summary.go @@ -144,7 +144,7 @@ func (verifier *Verifier) reportDocumentMismatches(ctx context.Context, strBuild missingCount := lo.CountBy( discrepancies, func(d VerificationResult) bool { - return d.IsMissing() + return d.DocumentIsMissing() }, ) @@ -171,7 +171,7 @@ func (verifier *Verifier) reportDocumentMismatches(ctx context.Context, strBuild OUTA: for _, task := range failedTasks { for _, d := range taskDiscrepancies[task.PrimaryKey] { - if d.IsMissing() { + if d.DocumentIsMissing() { continue } @@ -207,14 +207,18 @@ OUTA: printAll = int64(missingOrChangedCount) < (verifier.failureDisplaySize + int64(0.25*float32(verifier.failureDisplaySize))) OUTB: for _, task := range failedTasks { - for _, _id := range task.Ids { + for _, d := range taskDiscrepancies[task.PrimaryKey] { + if !d.DocumentIsMissing() { + continue + } + if !printAll && missingOrChangedDocsTableRows >= verifier.failureDisplaySize { break OUTB } missingOrChangedDocsTableRows++ missingOrChangedDocsTable.Append([]string{ - fmt.Sprintf("%v", _id), + fmt.Sprintf("%v", d.ID), fmt.Sprintf("%v", task.QueryFilter.Namespace), fmt.Sprintf("%v", task.QueryFilter.To), })