使用vector,但是大小限制跟单个文档是一样的,放太多了会出问题。 原型:

virtual void  insert (const string &ns, const vector< BSONObj > &v, int flags=0) 

insert a vector of objects into the database

demo:

#include "client/dbclient.h"
#include <iostream>
#include <vector>
#include <sys/time.h>

#define RECORDS 5000000

int main(int argc, char **argv) {
    std::vector<mongo::BSONObj> bulk_data;
    mongo::DBClientConnection mongo;
    mongo.connect("localhost");
    mongo.dropCollection("insert_test.col1");

    struct timeval start;
    gettimeofday(&start, NULL);

    for (int i=0; i<RECORDS; i++) {
        mongo::BSONObj record = BSON (
                "_id" << i <<
                "mystring" << "hello world" );
        bulk_data.push_back(record);

        if (i % 10000 == 0) {
            mongo.insert("insert_test.col1", bulk_data);
            bulk_data.clear();
        }
    }

    struct timeval end;
    gettimeofday(&end, NULL);
    int now = (end.tv_sec * 1000) + (int)(end.tv_usec/1000);
    int elapsed_time = now - ((start.tv_sec * 1000) +
        (int)(start.tv_usec/1000));

    std::cout << "rate: " << RECORDS/(elapsed_time/1000) <<
        "/sec" << std::endl;

    return 0;
}

注意,以上代码有个坑,bulk_data.push_back(record);最好改为 bulk_data.push_back(record.getOwned()); 在mongosync中就遇到了一个相关的bug,抽象出demo:

#include "pch.h"
#include "mongo/client/dbclientcursor.h"
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <set>


using namespace mongo;


const long long insertBatchSize = 200;  //  batch inserts size,can't too large


void connectAuth(boost::shared_ptr<DBClientConnection> p,
                   const string &hostname, const string &dbname,
                   const string &username, const string &pwd, string &errmsg) {
    try {
             p->connect(hostname, errmsg);
             log() << hostname << " connected ok" << endl;
             p->auth(dbname, username, pwd, errmsg);
             log() << dbname << " auth ok" << endl;
        } catch( DBException &e ) {
             cout << "caught " << e.what() << endl;
        }
}


void cloneCollection(boost::shared_ptr<DBClientConnection> _s,
                            boost::shared_ptr<DBClientConnection> _t,
                            string sns, string tns) {
    log() << "cloning " << sns << " -> " << tns << endl;
    Query q = BSONObj();
    vector<BSONObj> v;
    long long cnt = 0;
    int size=0;
    auto_ptr<DBClientCursor> c = _s->query(sns, Query(), 0, 0, 0, QueryOption_SlaveOk  | QueryOption_NoCursorTimeout );
    while ( c->more() ) {
     BSONObj b = c->next();
     ++cnt;
     v.push_back(b);
     size += b.objsize();
     //cout<<b.toString()<<" cnt:"<<cnt<<" objsize:"<<b.objsize()<<endl;
     if ( cnt % insertBatchSize == 0 ) {  
     cout<<"size:"<<size<<endl;
    for (vector< BSONObj >::const_iterator i = v.begin(); i != v.end(); ++i ) {
     if((*i).isValid() )   
      cout<<"valid, objsize:"<<(*i).objsize()<<endl;
     else
      cout<<"is not valid ,objsize:"<<(*i).objsize()<<endl;
    //cout<<(*i).toString()<<endl;
   } 
   cout<<"vsize:"<<v.size()<<endl;
   cout<<"cnt:"<<cnt<<endl;
     _t->insert(tns, v);
     v.clear();
     }
    }
    if( !v.empty() ) {
     cout<<"last"<<endl;
    _t->insert(tns, v);// last batch
    }
}


int main(int argc , char** argv) {
  boost::shared_ptr<DBClientConnection> _s(new DBClientConnection());
  boost::shared_ptr<DBClientConnection> _t(new DBClientConnection());
  string errmsg;
  string ns="test.t";
  connectAuth(_s, "10.0.0.91:27017", "admin", "testuser", "123", errmsg); 
  connectAuth(_t, "10.0.0.91:27020", "admin", "testuser", "123", errmsg); 
  cloneCollection(_s, _t, ns, ns);
}

插入测试数据

use test
for(var i=0;i<1000;i++)  db.t.insert({v:i})

输出:

Wed Sep 11 11:29:53 cloning test.t -> test.t
size:6600
valid, objsize:32595
valid, objsize:127
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:-654311424
is not valid ,objsize:-484769792
is not valid ,objsize:-941400832
is not valid ,objsize:365421471
is not valid ,objsize:1393936355
is not valid ,objsize:2136151495
valid, objsize:8344341
valid, objsize:32595
valid, objsize:127
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:436207616
is not valid ,objsize:-413401088
is not valid ,objsize:-941122048
is not valid ,objsize:365422560
is not valid ,objsize:1393936360
is not valid ,objsize:2136151495
valid, objsize:8344341
valid, objsize:32595
valid, objsize:127
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:1526726656
is not valid ,objsize:-342032384
is not valid ,objsize:-940843264
is not valid ,objsize:365423649
is not valid ,objsize:1393936364
is not valid ,objsize:2136151495
valid, objsize:8344341
valid, objsize:32595
valid, objsize:127
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:-1761607680
is not valid ,objsize:1658388480
is not valid ,objsize:-933029120
is not valid ,objsize:365454173
is not valid ,objsize:1393936483
is not valid ,objsize:2136151496
valid, objsize:8344341
valid, objsize:32595
valid, objsize:127
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:-671088640
is not valid ,objsize:1729757184
is not valid ,objsize:-932750336
is not valid ,objsize:365455262
is not valid ,objsize:1393936487
is not valid ,objsize:2136151496
valid, objsize:8344341
valid, objsize:32595
valid, objsize:127
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:0
is not valid ,objsize:419430400
is not valid ,objsize:1801125888
is not valid ,objsize:-932471552
is not valid ,objsize:365456351
is not valid ,objsize:1393936492
is not valid ,objsize:2136151496
valid, objsize:8344341
valid, objsize:33
valid, objsize:33
valid, objsize:33
valid, objsize:33
...
valid, objsize:33
vsize:200
cnt:200
Wed Sep 11 11:29:53   Assertion failure objsize() src/mongo/db/../bson/bsonobj.h 456
0x9a13f6 0x9716d1 0x58d20b 0x578d0f 0x579874 0x314621ecdd 0x578379 
 /export/mongodb-r2.2.6/bin/mongodemo(_ZN5mongo15printStackTraceERSo+0x26) [0x9a13f6]
 /export/mongodb-r2.2.6/bin/mongodemo(_ZN5mongo12verifyFailedEPKcS1_j+0xc1) [0x9716d1]
 /export/mongodb-r2.2.6/bin/mongodemo(_ZN5mongo12DBClientBase6insertERKSsRKSt6vectorINS_7BSONObjESaIS4_EEi+0x28b) [0x58d20b]
 /export/mongodb-r2.2.6/bin/mongodemo(_Z15cloneCollectionN5boost10shared_ptrIN5mongo18DBClientConnectionEEES3_SsSs+0x4df) [0x578d0f]
 /export/mongodb-r2.2.6/bin/mongodemo(main+0x6e4) [0x579874]
 /lib64/libc.so.6(__libc_start_main+0xfd) [0x314621ecdd]
 /export/mongodb-r2.2.6/bin/mongodemo() [0x578379]
terminate called after throwing an instance of 'mongo::AssertionException'
  what():  assertion src/mongo/db/../bson/bsonobj.h:456

将v.push_back(b); 改为v.push_back(b.getOwned());即可。 有关BSONObj 是否拥有buffer 相关说明:

       A BSONObj can use a buffer it "owns" or one it does not.

       OWNED CASE
       If the BSONObj owns the buffer, the buffer can be shared among several BSONObj's (by assignment).
       In this case the buffer is basically implemented as a shared_ptr.
       Since BSONObj's are typically immutable, this works well.

       UNOWNED CASE
       A BSONObj can also point to BSON data in some other data structure it does not "own" or free later.
       For example, in a memory mapped file.  In this case, it is important the original data stays in
       scope for as long as the BSONObj is in use.  If you think the original data may go out of scope,
       call BSONObj::getOwned() to promote your BSONObj to having its own copy.

       On a BSONObj assignment, if the source is unowned, both the source and dest will have unowned
       pointers to the original buffer after the assignment.

       If you are not sure about ownership but need the buffer to last as long as the BSONObj, call
       getOwned().  getOwned() is a no-op if the buffer is already owned.  If not already owned, a malloc
       and memcpy will result.

       Most ways to create BSONObj's create 'owned' variants.  Unowned versions can be created with:
       (1) specifying true for the ifree parameter in the constructor
       (2) calling BSONObjBuilder::done().  Use BSONObjBuilder::obj() to get an owned copy
       (3) retrieving a subobject retrieves an unowned pointer into the parent BSON object

相关: http://tebros.com/2010/11/mongodb-bulk-inserts-with-the-c-driver/ https://jira.mongodb.org/browse/SERVER-6326

0 回复
需要 登录 后方可回复, 如果你还没有账号你可以 注册 一个帐号。